diff --git a/results/tau_agent_B3_30M/run_logs/Player-0.log b/results/tau_agent_B3_30M/run_logs/Player-0.log deleted file mode 100644 index b0bee056162a6ad75882d2042093e3189548cef8..0000000000000000000000000000000000000000 --- a/results/tau_agent_B3_30M/run_logs/Player-0.log +++ /dev/null @@ -1,100368 +0,0 @@ -Mono path[0] = 'C:/Users/3nigma/source/repos/Tau/MLAgentsProject/Build/Tau_Data/Managed' -Mono config path = 'C:/Users/3nigma/source/repos/Tau/MLAgentsProject/Build/MonoBleedingEdge/etc' -Initialize engine version: 6000.0.19f1 (302b264628f9) -[Subsystems] Discovering subsystems at path C:/Users/3nigma/source/repos/Tau/MLAgentsProject/Build/Tau_Data/UnitySubsystems -GfxDevice: creating device client; kGfxThreadingModeSplitJobs -Direct3D: - Version: Direct3D 12 [level 12.1] - Renderer: NVIDIA GeForce RTX 3080 Laptop GPU (ID=0x24dc) - Vendor: NVIDIA - VRAM: 8018 MB - App VRAM Budget: 7250 MB - Driver: 32.0.15.6094 -Begin MonoManager ReloadAssembly -- Loaded All Assemblies, in 5.628 seconds -- Finished resetting the current domain, in 0.003 seconds -[PhysX] Initialized MultithreadedTaskDispatcher with 16 workers. - Initializing input. -New input system (experimental) initialized -Using Windows.Gaming.Input - Input initialized. - Initialized touch support. -UnloadTime: 7.256600 ms -RenderGraph is now enabled. -DatabaseManager initialized. -EmbeddingStorage initialized. -EmbeddingManager initialized. -Loading database from C:/Users/3nigma/source/repos/Tau/MLAgentsProject/Build/Tau_Data\..\Data\database.bin -Custom format loaded successfully. Deserializing... -Starting deserialization of custom format. -Total lines: 113366 -Non-empty lines: 113366 -Extracting tables. -Found table: vocabulary -Found table: training_data -Found table: evaluation_data -Deserialization complete. -Database version: 0.1.0 -Model Name: Tau -Organization: Huggingface -Total Embeddings: 8148 -Clearing existing tables. -Adding table vocabulary with 2881 embeddings. -Adding table training_data with 4534 embeddings. -Adding table evaluation_data with 681 embeddings. -Total embeddings set to 8148 -Vector Database loaded successfully from database.bin (Elapsed time: 1.6917412 seconds) -Registered Communicator in Agent. -Checking if database is loaded. -IsLoaded check: True -Starting training for Agent with agent type Tau using file data.json, 20 agents, and 2 columns -Loading prefabs: TauAgent and AgentTrainer -Removing existing instances of prefabs. -Removed existing instances of TauAgent from the scene. -Removed existing instances of AgentTrainer from the scene. -Instantiating prefabs. -Successfully instantiated 20 pairs of TauAgent and AgentTrainer prefabs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -Setting columns for AgentTrainer. -Initializing AgentTrainer. -Setting up AgentTrainer. maxSteps=10000, rewardThreshold=-100, columns=2 -Setting up BaseAgent. -Retrieving all tables. -Loaded 2881 tokens into agent vocabulary. -Initializing BaseAgent. -Loading training data from file: data.json -Loading training data from file: 'data.json'. -Creating training data list from file: data.json -Loading message list from file: data.json -Training data duplicates removed: 2 -Evaluation data duplicates removed: 0 -Messages removed from evaluation data because they exist in training data: 0 -Created training data list with 4252 embedding pairs. -training >> step=100, episode=1 reward=0.7630398 (41.40 it/sec) -training >> step=200, episode=1 reward=0.7609541 (695.81 it/sec) -training >> step=300, episode=1 reward=0.746841 (677.20 it/sec) -training >> step=400, episode=1 reward=0.7864436 (698.50 it/sec) -training >> step=500, episode=1 reward=0.7528865 (698.20 it/sec) -training >> step=600, episode=1 reward=0.7570807 (540.70 it/sec) -training >> step=700, episode=1 reward=0.7798741 (412.11 it/sec) -training >> step=800, episode=1 reward=0.7838635 (641.71 it/sec) -training >> step=900, episode=1 reward=0.7396356 (665.41 it/sec) -training >> step=1000, episode=1 reward=0.7529334 (698.48 it/sec) -training >> step=1100, episode=1 reward=0.7774001 (635.22 it/sec) -training >> step=1200, episode=1 reward=0.75613 (607.01 it/sec) -training >> step=1300, episode=1 reward=0.7656549 (378.05 it/sec) -training >> step=1400, episode=1 reward=0.7727634 (650.51 it/sec) -training >> step=1500, episode=1 reward=0.7713369 (630.99 it/sec) -training >> step=1600, episode=1 reward=0.7485977 (590.45 it/sec) -training >> step=1700, episode=1 reward=0.7675108 (668.94 it/sec) -training >> step=1800, episode=1 reward=0.7736084 (703.43 it/sec) -training >> step=1900, episode=1 reward=0.7639305 (635.56 it/sec) -training >> step=2000, episode=1 reward=0.7708071 (703.24 it/sec) -training >> step=2100, episode=1 reward=0.7430319 (761.99 it/sec) -training >> step=2200, episode=1 reward=0.7797331 (676.43 it/sec) -training >> step=2300, episode=1 reward=0.7480158 (698.89 it/sec) -training >> step=2400, episode=1 reward=0.7652457 (663.53 it/sec) -training >> step=2500, episode=1 reward=0.7746374 (692.19 it/sec) -training >> step=2600, episode=1 reward=0.7671422 (726.27 it/sec) -training >> step=2700, episode=1 reward=0.7430528 (499.61 it/sec) -training >> step=2800, episode=1 reward=0.7768394 (687.06 it/sec) -training >> step=2900, episode=1 reward=0.7609664 (660.68 it/sec) -training >> step=3000, episode=1 reward=0.7614217 (647.44 it/sec) -training >> step=3100, episode=1 reward=0.7746745 (691.88 it/sec) -training >> step=3200, episode=1 reward=0.7611701 (678.14 it/sec) -training >> step=3300, episode=1 reward=0.7304429 (692.24 it/sec) -training >> step=3400, episode=1 reward=0.7407088 (682.29 it/sec) -training >> step=3500, episode=1 reward=0.7281977 (679.34 it/sec) -training >> step=3600, episode=1 reward=0.7657342 (689.37 it/sec) -training >> step=3700, episode=1 reward=0.7512692 (662.31 it/sec) -training >> step=3800, episode=1 reward=0.7511713 (665.83 it/sec) -training >> step=3900, episode=1 reward=0.7740575 (708.34 it/sec) -training >> step=4000, episode=1 reward=0.7713031 (641.55 it/sec) -training >> step=4100, episode=1 reward=0.7578607 (673.26 it/sec) -training >> step=4200, episode=1 reward=0.7401244 (692.65 it/sec) -training >> step=4300, episode=1 reward=0.7647613 (657.75 it/sec) -training >> step=4400, episode=1 reward=0.7662227 (668.03 it/sec) -training >> step=4500, episode=1 reward=0.7587304 (667.77 it/sec) -training >> step=4600, episode=1 reward=0.7568104 (654.03 it/sec) -training >> step=4700, episode=1 reward=0.7615014 (663.17 it/sec) -training >> step=4800, episode=1 reward=0.735477 (681.92 it/sec) -training >> step=4900, episode=1 reward=0.7596221 (666.65 it/sec) -training >> step=5000, episode=1 reward=0.7687472 (684.24 it/sec) -training >> step=5100, episode=1 reward=0.7491471 (676.86 it/sec) -training >> step=5200, episode=1 reward=0.7635667 (587.42 it/sec) -training >> step=5300, episode=1 reward=0.7305636 (609.83 it/sec) -training >> step=5400, episode=1 reward=0.761795 (611.51 it/sec) -training >> step=5500, episode=1 reward=0.7565798 (638.63 it/sec) -training >> step=5600, episode=1 reward=0.7668394 (582.21 it/sec) -training >> step=5700, episode=1 reward=0.7575145 (573.19 it/sec) -training >> step=5800, episode=1 reward=0.7514942 (553.92 it/sec) -training >> step=5900, episode=1 reward=0.7666745 (450.38 it/sec) -training >> step=6000, episode=2 reward=0.745177 (419.91 it/sec) -training >> step=6100, episode=2 reward=0.7476973 (573.20 it/sec) -training >> step=6200, episode=2 reward=0.7824475 (607.45 it/sec) -training >> step=6300, episode=2 reward=0.7619531 (635.18 it/sec) -training >> step=6400, episode=2 reward=0.7718539 (614.62 it/sec) -training >> step=6500, episode=2 reward=0.7579409 (614.79 it/sec) -training >> step=6600, episode=2 reward=0.7750944 (605.37 it/sec) -training >> step=6700, episode=2 reward=0.7521457 (571.46 it/sec) -training >> step=6800, episode=2 reward=0.7569418 (541.80 it/sec) -training >> step=6900, episode=2 reward=0.7274903 (513.92 it/sec) -training >> step=7000, episode=2 reward=0.7749886 (591.85 it/sec) -training >> step=7100, episode=2 reward=0.7675712 (584.85 it/sec) -training >> step=7200, episode=2 reward=0.7694924 (567.95 it/sec) -training >> step=7300, episode=2 reward=0.7574345 (545.16 it/sec) -training >> step=7400, episode=2 reward=0.7605236 (560.63 it/sec) -training >> step=7500, episode=2 reward=0.7773191 (544.85 it/sec) -training >> step=7600, episode=2 reward=0.7544769 (584.65 it/sec) -training >> step=7700, episode=2 reward=0.7614601 (555.59 it/sec) -training >> step=7800, episode=2 reward=0.7686692 (582.91 it/sec) -training >> step=7900, episode=2 reward=0.7495971 (541.83 it/sec) -training >> step=8000, episode=2 reward=0.7746779 (589.34 it/sec) -training >> step=8100, episode=2 reward=0.7585092 (488.15 it/sec) -training >> step=8200, episode=2 reward=0.7676331 (574.37 it/sec) -training >> step=8300, episode=2 reward=0.7669833 (578.23 it/sec) -training >> step=8400, episode=2 reward=0.7590157 (562.83 it/sec) -training >> step=8500, episode=2 reward=0.7706535 (558.69 it/sec) -training >> step=8600, episode=2 reward=0.7518531 (571.10 it/sec) -training >> step=8700, episode=2 reward=0.7561027 (570.74 it/sec) -training >> step=8800, episode=2 reward=0.7611458 (597.56 it/sec) -training >> step=8900, episode=2 reward=0.7696855 (594.42 it/sec) -training >> step=9000, episode=2 reward=0.763058 (558.34 it/sec) -training >> step=9100, episode=2 reward=0.770947 (564.76 it/sec) -training >> step=9200, episode=2 reward=0.7745354 (572.33 it/sec) -training >> step=9300, episode=2 reward=0.7761258 (581.11 it/sec) -training >> step=9400, episode=2 reward=0.7963482 (604.58 it/sec) -training >> step=9500, episode=2 reward=0.7583481 (630.06 it/sec) -training >> step=9600, episode=2 reward=0.7532795 (636.76 it/sec) -training >> step=9700, episode=2 reward=0.7753441 (563.94 it/sec) -training >> step=9800, episode=2 reward=0.7789963 (460.33 it/sec) -training >> step=9900, episode=2 reward=0.7714137 (565.05 it/sec) -training >> step=10000, episode=2 reward=0.7613019 (610.33 it/sec) -training >> step=10100, episode=2 reward=0.7674817 (645.69 it/sec) -training >> step=10200, episode=2 reward=0.767562 (627.77 it/sec) -training >> step=10300, episode=2 reward=0.7767941 (572.37 it/sec) -training >> step=10400, episode=2 reward=0.7478771 (571.51 it/sec) -training >> step=10500, episode=2 reward=0.7693269 (575.39 it/sec) -training >> step=10600, episode=2 reward=0.766874 (605.27 it/sec) -training >> step=10700, episode=2 reward=0.769649 (614.75 it/sec) -training >> step=10800, episode=2 reward=0.7338232 (594.83 it/sec) -training >> step=10900, episode=2 reward=0.7876658 (582.69 it/sec) -training >> step=11000, episode=2 reward=0.7668941 (598.91 it/sec) -training >> step=11100, episode=2 reward=0.7597018 (578.64 it/sec) -training >> step=11200, episode=2 reward=0.7755251 (622.48 it/sec) -training >> step=11300, episode=2 reward=0.7508457 (600.84 it/sec) -training >> step=11400, episode=2 reward=0.752899 (559.43 it/sec) -training >> step=11500, episode=2 reward=0.7728194 (551.93 it/sec) -training >> step=11600, episode=2 reward=0.730661 (611.97 it/sec) -training >> step=11700, episode=2 reward=0.748502 (547.08 it/sec) -training >> step=11800, episode=2 reward=0.7622154 (603.20 it/sec) -training >> step=11900, episode=2 reward=0.7510911 (591.05 it/sec) -training >> step=12000, episode=3 reward=0.7570533 (73.58 it/sec) -training >> step=12100, episode=3 reward=0.7656437 (618.92 it/sec) -training >> step=12200, episode=3 reward=0.745481 (562.40 it/sec) -training >> step=12300, episode=3 reward=0.7546537 (601.09 it/sec) -training >> step=12400, episode=3 reward=0.7623528 (582.48 it/sec) -training >> step=12500, episode=3 reward=0.7725447 (577.60 it/sec) -training >> step=12600, episode=3 reward=0.7559364 (587.29 it/sec) -training >> step=12700, episode=3 reward=0.76266 (579.18 it/sec) -training >> step=12800, episode=3 reward=0.7790766 (607.15 it/sec) -training >> step=12900, episode=3 reward=0.7582159 (578.13 it/sec) -training >> step=13000, episode=3 reward=0.7587578 (619.97 it/sec) -training >> step=13100, episode=3 reward=0.7599045 (593.22 it/sec) -training >> step=13200, episode=3 reward=0.7597032 (562.83 it/sec) -training >> step=13300, episode=3 reward=0.7700242 (585.38 it/sec) -training >> step=13400, episode=3 reward=0.7702058 (587.99 it/sec) -training >> step=13500, episode=3 reward=0.7448273 (588.74 it/sec) -training >> step=13600, episode=3 reward=0.7593668 (620.56 it/sec) -training >> step=13700, episode=3 reward=0.778592 (604.89 it/sec) -training >> step=13800, episode=3 reward=0.7592036 (591.82 it/sec) -training >> step=13900, episode=3 reward=0.7816717 (588.63 it/sec) -training >> step=14000, episode=3 reward=0.7542288 (569.03 it/sec) -training >> step=14100, episode=3 reward=0.7726169 (605.69 it/sec) -training >> step=14200, episode=3 reward=0.7638325 (600.69 it/sec) -training >> step=14300, episode=3 reward=0.7783384 (622.29 it/sec) -training >> step=14400, episode=3 reward=0.7592765 (576.52 it/sec) -training >> step=14500, episode=3 reward=0.7683129 (620.87 it/sec) -training >> step=14600, episode=3 reward=0.7685825 (570.61 it/sec) -training >> step=14700, episode=3 reward=0.7823049 (546.21 it/sec) -training >> step=14800, episode=3 reward=0.769813 (623.68 it/sec) -training >> step=14900, episode=3 reward=0.7536518 (583.92 it/sec) -training >> step=15000, episode=3 reward=0.7643883 (585.86 it/sec) -training >> step=15100, episode=3 reward=0.7748137 (593.54 it/sec) -training >> step=15200, episode=3 reward=0.7716281 (588.90 it/sec) -training >> step=15300, episode=3 reward=0.7583444 (582.38 it/sec) -training >> step=15400, episode=3 reward=0.7596889 (570.41 it/sec) -training >> step=15500, episode=3 reward=0.7477322 (621.35 it/sec) -training >> step=15600, episode=3 reward=0.756667 (570.83 it/sec) -training >> step=15700, episode=3 reward=0.7487915 (573.57 it/sec) -training >> step=15800, episode=3 reward=0.761925 (569.10 it/sec) -training >> step=15900, episode=3 reward=0.7604163 (427.61 it/sec) -training >> step=16000, episode=3 reward=0.7589059 (569.29 it/sec) -training >> step=16100, episode=3 reward=0.7598281 (603.88 it/sec) -training >> step=16200, episode=3 reward=0.7614579 (574.23 it/sec) -training >> step=16300, episode=3 reward=0.7316546 (582.05 it/sec) -training >> step=16400, episode=3 reward=0.8025081 (556.43 it/sec) -training >> step=16500, episode=3 reward=0.7648765 (549.52 it/sec) -training >> step=16600, episode=3 reward=0.7536571 (510.16 it/sec) -training >> step=16700, episode=3 reward=0.7641429 (564.89 it/sec) -training >> step=16800, episode=3 reward=0.7576976 (543.17 it/sec) -training >> step=16900, episode=3 reward=0.7646593 (587.31 it/sec) -training >> step=17000, episode=3 reward=0.7542835 (574.51 it/sec) -training >> step=17100, episode=3 reward=0.7315239 (571.02 it/sec) -training >> step=17200, episode=3 reward=0.7708604 (573.86 it/sec) -training >> step=17300, episode=3 reward=0.7639007 (586.49 it/sec) -training >> step=17400, episode=3 reward=0.7457371 (570.45 it/sec) -training >> step=17500, episode=3 reward=0.7551311 (584.90 it/sec) -training >> step=17600, episode=3 reward=0.761571 (553.92 it/sec) -training >> step=17700, episode=3 reward=0.7655391 (614.48 it/sec) -training >> step=17800, episode=3 reward=0.7636601 (575.62 it/sec) -training >> step=17900, episode=3 reward=0.7701355 (578.01 it/sec) -training >> step=18000, episode=4 reward=0.7655119 (94.44 it/sec) -training >> step=18100, episode=4 reward=0.7325081 (575.58 it/sec) -training >> step=18200, episode=4 reward=0.7328234 (575.92 it/sec) -training >> step=18300, episode=4 reward=0.7475934 (554.04 it/sec) -training >> step=18400, episode=4 reward=0.7612782 (595.63 it/sec) -training >> step=18500, episode=4 reward=0.7559188 (566.64 it/sec) -training >> step=18600, episode=4 reward=0.7540917 (524.52 it/sec) -training >> step=18700, episode=4 reward=0.7584674 (577.52 it/sec) -training >> step=18800, episode=4 reward=0.7622473 (569.22 it/sec) -training >> step=18900, episode=4 reward=0.7513758 (566.00 it/sec) -training >> step=19000, episode=4 reward=0.7632729 (616.71 it/sec) -training >> step=19100, episode=4 reward=0.7773078 (565.43 it/sec) -training >> step=19200, episode=4 reward=0.7556978 (577.59 it/sec) -training >> step=19300, episode=4 reward=0.7718554 (562.28 it/sec) -training >> step=19400, episode=4 reward=0.7629925 (574.95 it/sec) -training >> step=19500, episode=4 reward=0.7458067 (569.60 it/sec) -training >> step=19600, episode=4 reward=0.746497 (598.05 it/sec) -training >> step=19700, episode=4 reward=0.7591783 (573.49 it/sec) -training >> step=19800, episode=4 reward=0.7529306 (569.64 it/sec) -training >> step=19900, episode=4 reward=0.7600577 (548.16 it/sec) -training >> step=20000, episode=4 reward=0.7593914 (557.32 it/sec) -training >> step=20100, episode=4 reward=0.7536269 (571.60 it/sec) -training >> step=20200, episode=4 reward=0.7456782 (593.07 it/sec) -training >> step=20300, episode=4 reward=0.7525119 (586.77 it/sec) -training >> step=20400, episode=4 reward=0.7774879 (581.73 it/sec) -training >> step=20500, episode=4 reward=0.7385943 (553.05 it/sec) -training >> step=20600, episode=4 reward=0.7671191 (551.61 it/sec) -training >> step=20700, episode=4 reward=0.755758 (577.40 it/sec) -training >> step=20800, episode=4 reward=0.7672887 (590.78 it/sec) -training >> step=20900, episode=4 reward=0.7465327 (579.70 it/sec) -training >> step=21000, episode=4 reward=0.8049704 (585.43 it/sec) -training >> step=21100, episode=4 reward=0.7362263 (569.78 it/sec) -training >> step=21200, episode=4 reward=0.754869 (516.04 it/sec) -training >> step=21300, episode=4 reward=0.7823828 (550.03 it/sec) -training >> step=21400, episode=4 reward=0.7684525 (583.27 it/sec) -training >> step=21500, episode=4 reward=0.7708699 (603.54 it/sec) -training >> step=21600, episode=4 reward=0.7577325 (565.97 it/sec) -training >> step=21700, episode=4 reward=0.766227 (602.70 it/sec) -training >> step=21800, episode=4 reward=0.7744038 (493.53 it/sec) -training >> step=21900, episode=4 reward=0.7931352 (582.52 it/sec) -training >> step=22000, episode=4 reward=0.7571732 (548.29 it/sec) -training >> step=22100, episode=4 reward=0.7478532 (612.58 it/sec) -training >> step=22200, episode=4 reward=0.7836151 (542.23 it/sec) -training >> step=22300, episode=4 reward=0.7376906 (418.48 it/sec) -training >> step=22400, episode=4 reward=0.7658979 (544.14 it/sec) -training >> step=22500, episode=4 reward=0.7487357 (595.61 it/sec) -training >> step=22600, episode=4 reward=0.7519643 (578.34 it/sec) -training >> step=22700, episode=4 reward=0.7582445 (571.59 it/sec) -training >> step=22800, episode=4 reward=0.7662666 (598.60 it/sec) -training >> step=22900, episode=4 reward=0.7732882 (527.27 it/sec) -training >> step=23000, episode=4 reward=0.7447804 (597.81 it/sec) -training >> step=23100, episode=4 reward=0.7473006 (544.45 it/sec) -training >> step=23200, episode=4 reward=0.7497725 (605.57 it/sec) -training >> step=23300, episode=4 reward=0.7654662 (552.07 it/sec) -training >> step=23400, episode=4 reward=0.7530577 (595.96 it/sec) -training >> step=23500, episode=4 reward=0.753858 (489.58 it/sec) -training >> step=23600, episode=4 reward=0.755182 (564.15 it/sec) -training >> step=23700, episode=4 reward=0.766323 (544.90 it/sec) -training >> step=23800, episode=4 reward=0.7698866 (592.99 it/sec) -training >> step=23900, episode=4 reward=0.7653978 (582.85 it/sec) -training >> step=24000, episode=5 reward=0.7496746 (118.68 it/sec) -training >> step=24100, episode=5 reward=0.7672679 (551.87 it/sec) -training >> step=24200, episode=5 reward=0.7615555 (548.39 it/sec) -training >> step=24300, episode=5 reward=0.7368601 (476.01 it/sec) -training >> step=24400, episode=5 reward=0.7692152 (564.08 it/sec) -training >> step=24500, episode=5 reward=0.772753 (590.43 it/sec) -training >> step=24600, episode=5 reward=0.7650125 (585.86 it/sec) -training >> step=24700, episode=5 reward=0.7851151 (578.61 it/sec) -training >> step=24800, episode=5 reward=0.76724 (558.36 it/sec) -training >> step=24900, episode=5 reward=0.7545638 (579.56 it/sec) -training >> step=25000, episode=5 reward=0.7423618 (555.62 it/sec) -training >> step=25100, episode=5 reward=0.7467912 (595.31 it/sec) -training >> step=25200, episode=5 reward=0.7680374 (556.99 it/sec) -training >> step=25300, episode=5 reward=0.7720267 (609.22 it/sec) -training >> step=25400, episode=5 reward=0.7613539 (548.27 it/sec) -training >> step=25500, episode=5 reward=0.7557151 (583.84 it/sec) -training >> step=25600, episode=5 reward=0.7524825 (552.36 it/sec) -training >> step=25700, episode=5 reward=0.7705067 (581.43 it/sec) -training >> step=25800, episode=5 reward=0.7540035 (563.88 it/sec) -training >> step=25900, episode=5 reward=0.775359 (576.91 it/sec) -training >> step=26000, episode=5 reward=0.7773692 (541.94 it/sec) -training >> step=26100, episode=5 reward=0.7496319 (559.57 it/sec) -training >> step=26200, episode=5 reward=0.7492679 (599.05 it/sec) -training >> step=26300, episode=5 reward=0.7531162 (561.43 it/sec) -training >> step=26400, episode=5 reward=0.7607731 (572.25 it/sec) -training >> step=26500, episode=5 reward=0.7531728 (561.48 it/sec) -training >> step=26600, episode=5 reward=0.7703916 (528.64 it/sec) -training >> step=26700, episode=5 reward=0.7593289 (572.15 it/sec) -training >> step=26800, episode=5 reward=0.7514156 (563.49 it/sec) -training >> step=26900, episode=5 reward=0.7492712 (597.60 it/sec) -training >> step=27000, episode=5 reward=0.7518885 (513.09 it/sec) -training >> step=27100, episode=5 reward=0.7688088 (497.85 it/sec) -training >> step=27200, episode=5 reward=0.7690083 (546.68 it/sec) -training >> step=27300, episode=5 reward=0.7653158 (561.78 it/sec) -training >> step=27400, episode=5 reward=0.7695273 (552.66 it/sec) -training >> step=27500, episode=5 reward=0.7563307 (571.02 it/sec) -training >> step=27600, episode=5 reward=0.765369 (604.05 it/sec) -training >> step=27700, episode=5 reward=0.7683178 (551.42 it/sec) -training >> step=27800, episode=5 reward=0.747331 (607.15 it/sec) -training >> step=27900, episode=5 reward=0.7453434 (554.72 it/sec) -training >> step=28000, episode=5 reward=0.7465089 (592.34 it/sec) -training >> step=28100, episode=5 reward=0.7618308 (555.22 it/sec) -training >> step=28200, episode=5 reward=0.7713837 (570.74 it/sec) -training >> step=28300, episode=5 reward=0.7458248 (561.80 it/sec) -training >> step=28400, episode=5 reward=0.7708633 (555.59 it/sec) -training >> step=28500, episode=5 reward=0.7705156 (593.40 it/sec) -training >> step=28600, episode=5 reward=0.7490899 (562.88 it/sec) -training >> step=28700, episode=5 reward=0.7557784 (418.01 it/sec) -training >> step=28800, episode=5 reward=0.7694559 (591.36 it/sec) -training >> step=28900, episode=5 reward=0.7598618 (548.53 it/sec) -training >> step=29000, episode=5 reward=0.7624868 (573.42 it/sec) -training >> step=29100, episode=5 reward=0.7521271 (557.36 it/sec) -training >> step=29200, episode=5 reward=0.7596062 (587.72 it/sec) -training >> step=29300, episode=5 reward=0.7521184 (552.61 it/sec) -training >> step=29400, episode=5 reward=0.7754409 (588.32 it/sec) -training >> step=29500, episode=5 reward=0.7438452 (544.54 it/sec) -training >> step=29600, episode=5 reward=0.7608678 (563.16 it/sec) -training >> step=29700, episode=5 reward=0.7623867 (578.43 it/sec) -training >> step=29800, episode=5 reward=0.7442399 (582.83 it/sec) -training >> step=29900, episode=5 reward=0.7440094 (543.07 it/sec) -training >> step=30000, episode=6 reward=0.7530726 (117.68 it/sec) -training >> step=30100, episode=6 reward=0.7602891 (557.24 it/sec) -training >> step=30200, episode=6 reward=0.7553526 (552.39 it/sec) -training >> step=30300, episode=6 reward=0.7637648 (551.97 it/sec) -training >> step=30400, episode=6 reward=0.772849 (570.97 it/sec) -training >> step=30500, episode=6 reward=0.7668537 (548.39 it/sec) -training >> step=30600, episode=6 reward=0.7554818 (550.59 it/sec) -training >> step=30700, episode=6 reward=0.7786147 (587.10 it/sec) -training >> step=30800, episode=6 reward=0.7649679 (532.92 it/sec) -training >> step=30900, episode=6 reward=0.7635908 (598.70 it/sec) -training >> step=31000, episode=6 reward=0.7523547 (520.42 it/sec) -training >> step=31100, episode=6 reward=0.7688394 (602.84 it/sec) -training >> step=31200, episode=6 reward=0.7619565 (535.40 it/sec) -training >> step=31300, episode=6 reward=0.7491407 (590.50 it/sec) -training >> step=31400, episode=6 reward=0.7492722 (540.01 it/sec) -training >> step=31500, episode=6 reward=0.7552652 (573.03 it/sec) -training >> step=31600, episode=6 reward=0.7669393 (601.50 it/sec) -training >> step=31700, episode=6 reward=0.733812 (557.58 it/sec) -training >> step=31800, episode=6 reward=0.7672042 (568.83 it/sec) -training >> step=31900, episode=6 reward=0.7475746 (528.98 it/sec) -training >> step=32000, episode=6 reward=0.7623026 (542.73 it/sec) -training >> step=32100, episode=6 reward=0.7731941 (578.57 it/sec) -training >> step=32200, episode=6 reward=0.7398178 (567.02 it/sec) -training >> step=32300, episode=6 reward=0.7685305 (523.57 it/sec) -training >> step=32400, episode=6 reward=0.7780319 (532.11 it/sec) -training >> step=32500, episode=6 reward=0.7625167 (557.27 it/sec) -training >> step=32600, episode=6 reward=0.7465993 (546.64 it/sec) -training >> step=32700, episode=6 reward=0.7635517 (571.86 it/sec) -training >> step=32800, episode=6 reward=0.7724277 (581.92 it/sec) -training >> step=32900, episode=6 reward=0.767862 (524.58 it/sec) -training >> step=33000, episode=6 reward=0.7643712 (600.21 it/sec) -training >> step=33100, episode=6 reward=0.7713607 (516.46 it/sec) -training >> step=33200, episode=6 reward=0.7591281 (593.17 it/sec) -training >> step=33300, episode=6 reward=0.743866 (583.64 it/sec) -training >> step=33400, episode=6 reward=0.7685593 (576.92 it/sec) -training >> step=33500, episode=6 reward=0.7610748 (589.76 it/sec) -training >> step=33600, episode=6 reward=0.7760247 (558.78 it/sec) -training >> step=33700, episode=6 reward=0.7759142 (558.52 it/sec) -training >> step=33800, episode=6 reward=0.7712159 (564.20 it/sec) -training >> step=33900, episode=6 reward=0.7537031 (589.25 it/sec) -training >> step=34000, episode=6 reward=0.7682434 (513.02 it/sec) -training >> step=34100, episode=6 reward=0.7840191 (533.11 it/sec) -training >> step=34200, episode=6 reward=0.7735156 (585.69 it/sec) -training >> step=34300, episode=6 reward=0.7513078 (553.43 it/sec) -training >> step=34400, episode=6 reward=0.7613264 (592.64 it/sec) -training >> step=34500, episode=6 reward=0.7632502 (540.68 it/sec) -training >> step=34600, episode=6 reward=0.7656192 (578.15 it/sec) -training >> step=34700, episode=6 reward=0.7733082 (561.40 it/sec) -training >> step=34800, episode=6 reward=0.7592719 (535.78 it/sec) -training >> step=34900, episode=6 reward=0.7500972 (581.23 it/sec) -training >> step=35000, episode=6 reward=0.7457377 (565.73 it/sec) -training >> step=35100, episode=6 reward=0.7749681 (449.80 it/sec) -training >> step=35200, episode=6 reward=0.7684051 (527.97 it/sec) -training >> step=35300, episode=6 reward=0.7553928 (578.75 it/sec) -training >> step=35400, episode=6 reward=0.7592304 (537.92 it/sec) -training >> step=35500, episode=6 reward=0.7604721 (562.40 it/sec) -training >> step=35600, episode=6 reward=0.7638264 (572.63 it/sec) -training >> step=35700, episode=6 reward=0.7552573 (566.23 it/sec) -training >> step=35800, episode=6 reward=0.746586 (586.03 it/sec) -training >> step=35900, episode=7 reward=0.7557543 (118.48 it/sec) -training >> step=36000, episode=7 reward=0.7505803 (528.59 it/sec) -training >> step=36100, episode=7 reward=0.7557814 (537.63 it/sec) -training >> step=36200, episode=7 reward=0.7446139 (562.52 it/sec) -training >> step=36300, episode=7 reward=0.7923923 (520.30 it/sec) -training >> step=36400, episode=7 reward=0.7654393 (592.08 it/sec) -training >> step=36500, episode=7 reward=0.7717556 (514.63 it/sec) -training >> step=36600, episode=7 reward=0.7402154 (564.45 it/sec) -training >> step=36700, episode=7 reward=0.7731817 (573.90 it/sec) -training >> step=36800, episode=7 reward=0.7276261 (539.37 it/sec) -training >> step=36900, episode=7 reward=0.7771773 (569.73 it/sec) -training >> step=37000, episode=7 reward=0.7618096 (575.76 it/sec) -training >> step=37100, episode=7 reward=0.7575434 (530.63 it/sec) -training >> step=37200, episode=7 reward=0.7525197 (554.47 it/sec) -training >> step=37300, episode=7 reward=0.7536041 (556.30 it/sec) -training >> step=37400, episode=7 reward=0.7477233 (574.33 it/sec) -training >> step=37500, episode=7 reward=0.7664695 (508.39 it/sec) -training >> step=37600, episode=7 reward=0.7603899 (550.96 it/sec) -training >> step=37700, episode=7 reward=0.7466519 (542.44 it/sec) -training >> step=37800, episode=7 reward=0.7512773 (554.51 it/sec) -training >> step=37900, episode=7 reward=0.7732665 (548.49 it/sec) -training >> step=38000, episode=7 reward=0.7405071 (565.87 it/sec) -training >> step=38100, episode=7 reward=0.7621319 (580.24 it/sec) -training >> step=38200, episode=7 reward=0.7515126 (542.81 it/sec) -training >> step=38300, episode=7 reward=0.7363599 (556.62 it/sec) -training >> step=38400, episode=7 reward=0.7657608 (572.37 it/sec) -training >> step=38500, episode=7 reward=0.7652903 (534.64 it/sec) -training >> step=38600, episode=7 reward=0.7670174 (577.09 it/sec) -training >> step=38700, episode=7 reward=0.7756384 (561.70 it/sec) -training >> step=38800, episode=7 reward=0.7690869 (565.86 it/sec) -training >> step=38900, episode=7 reward=0.7757062 (563.16 it/sec) -training >> step=39000, episode=7 reward=0.7681011 (556.73 it/sec) -training >> step=39100, episode=7 reward=0.7466327 (548.31 it/sec) -training >> step=39200, episode=7 reward=0.7682036 (581.02 it/sec) -training >> step=39300, episode=7 reward=0.7578956 (556.38 it/sec) -training >> step=39400, episode=7 reward=0.770215 (538.47 it/sec) -training >> step=39500, episode=7 reward=0.7676948 (582.67 it/sec) -training >> step=39600, episode=7 reward=0.7799371 (558.53 it/sec) -training >> step=39700, episode=7 reward=0.7452815 (558.20 it/sec) -training >> step=39800, episode=7 reward=0.7831135 (572.53 it/sec) -training >> step=39900, episode=7 reward=0.7663926 (555.54 it/sec) -training >> step=40000, episode=7 reward=0.7535189 (575.06 it/sec) -training >> step=40100, episode=7 reward=0.7570522 (533.83 it/sec) -training >> step=40200, episode=7 reward=0.7557647 (535.05 it/sec) -training >> step=40300, episode=7 reward=0.7524396 (592.27 it/sec) -training >> step=40400, episode=7 reward=0.7599039 (551.99 it/sec) -training >> step=40500, episode=7 reward=0.7501997 (594.85 it/sec) -training >> step=40600, episode=7 reward=0.7672884 (555.81 it/sec) -training >> step=40700, episode=7 reward=0.7802626 (537.76 it/sec) -training >> step=40800, episode=7 reward=0.7614166 (582.50 it/sec) -training >> step=40900, episode=7 reward=0.7409405 (569.05 it/sec) -training >> step=41000, episode=7 reward=0.7529604 (563.77 it/sec) -training >> step=41100, episode=7 reward=0.7591488 (564.06 it/sec) -training >> step=41200, episode=7 reward=0.7688022 (540.51 it/sec) -training >> step=41300, episode=7 reward=0.7804639 (573.08 it/sec) -training >> step=41400, episode=7 reward=0.7640489 (396.48 it/sec) -training >> step=41500, episode=7 reward=0.7576073 (566.42 it/sec) -training >> step=41600, episode=7 reward=0.7540115 (583.66 it/sec) -training >> step=41700, episode=7 reward=0.7757269 (509.03 it/sec) -training >> step=41800, episode=7 reward=0.7673362 (575.39 it/sec) -training >> step=41900, episode=8 reward=0.7755771 (129.89 it/sec) -training >> step=42000, episode=8 reward=0.7470469 (522.15 it/sec) -training >> step=42100, episode=8 reward=0.7689599 (533.21 it/sec) -training >> step=42200, episode=8 reward=0.7432153 (563.93 it/sec) -training >> step=42300, episode=8 reward=0.7693337 (527.11 it/sec) -training >> step=42400, episode=8 reward=0.759109 (578.65 it/sec) -training >> step=42500, episode=8 reward=0.7805647 (549.02 it/sec) -training >> step=42600, episode=8 reward=0.7715946 (540.29 it/sec) -training >> step=42700, episode=8 reward=0.7693471 (536.22 it/sec) -training >> step=42800, episode=8 reward=0.7796775 (515.06 it/sec) -training >> step=42900, episode=8 reward=0.762098 (563.25 it/sec) -training >> step=43000, episode=8 reward=0.7765296 (565.61 it/sec) -training >> step=43100, episode=8 reward=0.7702465 (514.63 it/sec) -training >> step=43200, episode=8 reward=0.7692708 (584.85 it/sec) -training >> step=43300, episode=8 reward=0.762309 (541.79 it/sec) -training >> step=43400, episode=8 reward=0.7549915 (573.41 it/sec) -training >> step=43500, episode=8 reward=0.7585714 (600.77 it/sec) -training >> step=43600, episode=8 reward=0.7548274 (530.16 it/sec) -training >> step=43700, episode=8 reward=0.7428575 (582.30 it/sec) -training >> step=43800, episode=8 reward=0.7694699 (540.11 it/sec) -training >> step=43900, episode=8 reward=0.7688929 (559.73 it/sec) -training >> step=44000, episode=8 reward=0.7636529 (589.30 it/sec) -training >> step=44100, episode=8 reward=0.7717574 (559.02 it/sec) -training >> step=44200, episode=8 reward=0.7859811 (524.97 it/sec) -training >> step=44300, episode=8 reward=0.7778811 (533.54 it/sec) -training >> step=44400, episode=8 reward=0.7454565 (507.89 it/sec) -training >> step=44500, episode=8 reward=0.7688645 (608.20 it/sec) -training >> step=44600, episode=8 reward=0.7691327 (553.49 it/sec) -training >> step=44700, episode=8 reward=0.7606529 (562.82 it/sec) -training >> step=44800, episode=8 reward=0.768106 (546.31 it/sec) -training >> step=44900, episode=8 reward=0.7797679 (543.15 it/sec) -training >> step=45000, episode=8 reward=0.7924733 (550.21 it/sec) -training >> step=45100, episode=8 reward=0.7504036 (567.22 it/sec) -training >> step=45200, episode=8 reward=0.7749874 (537.30 it/sec) -training >> step=45300, episode=8 reward=0.7563273 (519.97 it/sec) -training >> step=45400, episode=8 reward=0.7736651 (553.38 it/sec) -training >> step=45500, episode=8 reward=0.7772926 (550.64 it/sec) -training >> step=45600, episode=8 reward=0.7831126 (542.89 it/sec) -training >> step=45700, episode=8 reward=0.7429341 (547.98 it/sec) -training >> step=45800, episode=8 reward=0.7414984 (565.04 it/sec) -training >> step=45900, episode=8 reward=0.7706831 (531.97 it/sec) -training >> step=46000, episode=8 reward=0.7902769 (558.40 it/sec) -training >> step=46100, episode=8 reward=0.7726678 (568.79 it/sec) -training >> step=46200, episode=8 reward=0.748221 (526.46 it/sec) -training >> step=46300, episode=8 reward=0.7241631 (577.98 it/sec) -training >> step=46400, episode=8 reward=0.7382689 (550.03 it/sec) -training >> step=46500, episode=8 reward=0.7728905 (508.83 it/sec) -training >> step=46600, episode=8 reward=0.7496408 (576.11 it/sec) -training >> step=46700, episode=8 reward=0.7589714 (531.81 it/sec) -training >> step=46800, episode=8 reward=0.7448423 (582.32 it/sec) -training >> step=46900, episode=8 reward=0.7370875 (576.59 it/sec) -training >> step=47000, episode=8 reward=0.7466053 (543.54 it/sec) -training >> step=47100, episode=8 reward=0.7734217 (534.58 it/sec) -training >> step=47200, episode=8 reward=0.77801 (564.37 it/sec) -training >> step=47300, episode=8 reward=0.7407444 (570.72 it/sec) -training >> step=47400, episode=8 reward=0.7559077 (569.31 it/sec) -training >> step=47500, episode=8 reward=0.7706453 (543.49 it/sec) -training >> step=47600, episode=8 reward=0.7681361 (491.43 it/sec) -training >> step=47700, episode=8 reward=0.7568027 (415.87 it/sec) -training >> step=47800, episode=8 reward=0.7422469 (535.71 it/sec) -training >> step=47900, episode=9 reward=0.7700111 (139.22 it/sec) -training >> step=48000, episode=9 reward=0.758866 (451.18 it/sec) -training >> step=48100, episode=9 reward=0.7750275 (549.03 it/sec) -training >> step=48200, episode=9 reward=0.7630244 (536.48 it/sec) -training >> step=48300, episode=9 reward=0.7564521 (562.74 it/sec) -training >> step=48400, episode=9 reward=0.7734724 (526.34 it/sec) -training >> step=48500, episode=9 reward=0.7601682 (557.91 it/sec) -training >> step=48600, episode=9 reward=0.740137 (515.85 it/sec) -training >> step=48700, episode=9 reward=0.7628065 (583.82 it/sec) -training >> step=48800, episode=9 reward=0.7549378 (555.49 it/sec) -training >> step=48900, episode=9 reward=0.7483482 (545.81 it/sec) -training >> step=49000, episode=9 reward=0.7964258 (538.45 it/sec) -training >> step=49100, episode=9 reward=0.7547176 (521.42 it/sec) -training >> step=49200, episode=9 reward=0.7775724 (581.25 it/sec) -training >> step=49300, episode=9 reward=0.7512217 (568.03 it/sec) -training >> step=49400, episode=9 reward=0.7818958 (571.77 it/sec) -training >> step=49500, episode=9 reward=0.7359545 (529.99 it/sec) -training >> step=49600, episode=9 reward=0.754764 (559.93 it/sec) -training >> step=49700, episode=9 reward=0.7438862 (512.78 it/sec) -training >> step=49800, episode=9 reward=0.7561501 (562.02 it/sec) -training >> step=49900, episode=9 reward=0.750889 (571.51 it/sec) -training >> step=50000, episode=9 reward=0.7405975 (564.70 it/sec) -training >> step=50100, episode=9 reward=0.7713825 (551.88 it/sec) -training >> step=50200, episode=9 reward=0.7562082 (561.05 it/sec) -training >> step=50300, episode=9 reward=0.7378758 (533.30 it/sec) -training >> step=50400, episode=9 reward=0.7489986 (583.00 it/sec) -training >> step=50500, episode=9 reward=0.7653326 (564.11 it/sec) -training >> step=50600, episode=9 reward=0.7800827 (565.40 it/sec) -training >> step=50700, episode=9 reward=0.7890338 (482.28 it/sec) -training >> step=50800, episode=9 reward=0.7572945 (552.11 it/sec) -training >> step=50900, episode=9 reward=0.7662445 (539.61 it/sec) -training >> step=51000, episode=9 reward=0.7705028 (581.07 it/sec) -training >> step=51100, episode=9 reward=0.7541645 (555.33 it/sec) -training >> step=51200, episode=9 reward=0.7509906 (534.92 it/sec) -training >> step=51300, episode=9 reward=0.7310396 (560.83 it/sec) -training >> step=51400, episode=9 reward=0.7693391 (553.75 it/sec) -training >> step=51500, episode=9 reward=0.7431379 (596.63 it/sec) -training >> step=51600, episode=9 reward=0.7625012 (561.35 it/sec) -training >> step=51700, episode=9 reward=0.7616337 (557.76 it/sec) -training >> step=51800, episode=9 reward=0.7884456 (534.73 it/sec) -training >> step=51900, episode=9 reward=0.7627114 (528.86 it/sec) -training >> step=52000, episode=9 reward=0.7725187 (547.35 it/sec) -training >> step=52100, episode=9 reward=0.7365295 (548.46 it/sec) -training >> step=52200, episode=9 reward=0.776517 (571.73 it/sec) -training >> step=52300, episode=9 reward=0.7702463 (588.99 it/sec) -training >> step=52400, episode=9 reward=0.7520227 (510.80 it/sec) -training >> step=52500, episode=9 reward=0.7664065 (539.60 it/sec) -training >> step=52600, episode=9 reward=0.7444422 (569.49 it/sec) -training >> step=52700, episode=9 reward=0.7768833 (573.91 it/sec) -training >> step=52800, episode=9 reward=0.747615 (553.30 it/sec) -training >> step=52900, episode=9 reward=0.7430157 (501.31 it/sec) -training >> step=53000, episode=9 reward=0.755634 (542.08 it/sec) -training >> step=53100, episode=9 reward=0.7987233 (580.89 it/sec) -training >> step=53200, episode=9 reward=0.7597058 (579.87 it/sec) -training >> step=53300, episode=9 reward=0.7531466 (582.72 it/sec) -training >> step=53400, episode=9 reward=0.7519019 (514.29 it/sec) -training >> step=53500, episode=9 reward=0.749948 (453.70 it/sec) -training >> step=53600, episode=9 reward=0.7465997 (520.63 it/sec) -training >> step=53700, episode=9 reward=0.7756712 (574.71 it/sec) -training >> step=53800, episode=9 reward=0.7540203 (569.53 it/sec) -training >> step=53900, episode=10 reward=0.755813 (138.89 it/sec) -training >> step=54000, episode=10 reward=0.7659718 (530.33 it/sec) -training >> step=54100, episode=10 reward=0.7479295 (562.78 it/sec) -training >> step=54200, episode=10 reward=0.7426901 (524.87 it/sec) -training >> step=54300, episode=10 reward=0.7512552 (526.51 it/sec) -training >> step=54400, episode=10 reward=0.7702242 (519.93 it/sec) -training >> step=54500, episode=10 reward=0.7485915 (529.07 it/sec) -training >> step=54600, episode=10 reward=0.7552266 (561.13 it/sec) -training >> step=54700, episode=10 reward=0.7618163 (588.84 it/sec) -training >> step=54800, episode=10 reward=0.7740419 (538.91 it/sec) -training >> step=54900, episode=10 reward=0.7546485 (549.91 it/sec) -training >> step=55000, episode=10 reward=0.7437792 (547.08 it/sec) -training >> step=55100, episode=10 reward=0.7497251 (547.03 it/sec) -training >> step=55200, episode=10 reward=0.7698456 (581.20 it/sec) -training >> step=55300, episode=10 reward=0.7873487 (557.15 it/sec) -training >> step=55400, episode=10 reward=0.7698429 (534.48 it/sec) -training >> step=55500, episode=10 reward=0.7658557 (546.20 it/sec) -training >> step=55600, episode=10 reward=0.759032 (524.47 it/sec) -training >> step=55700, episode=10 reward=0.7684076 (579.18 it/sec) -training >> step=55800, episode=10 reward=0.7465296 (580.05 it/sec) -training >> step=55900, episode=10 reward=0.7797888 (555.63 it/sec) -training >> step=56000, episode=10 reward=0.768802 (517.92 it/sec) -training >> step=56100, episode=10 reward=0.7471151 (560.38 it/sec) -training >> step=56200, episode=10 reward=0.7720887 (555.74 it/sec) -training >> step=56300, episode=10 reward=0.7807527 (573.60 it/sec) -training >> step=56400, episode=10 reward=0.7463521 (569.58 it/sec) -training >> step=56500, episode=10 reward=0.748146 (579.01 it/sec) -training >> step=56600, episode=10 reward=0.777564 (496.20 it/sec) -training >> step=56700, episode=10 reward=0.7576426 (516.83 it/sec) -training >> step=56800, episode=10 reward=0.7555159 (578.41 it/sec) -training >> step=56900, episode=10 reward=0.7708125 (550.68 it/sec) -training >> step=57000, episode=10 reward=0.7622968 (557.05 it/sec) -training >> step=57100, episode=10 reward=0.7712829 (559.45 it/sec) -training >> step=57200, episode=10 reward=0.7850128 (531.90 it/sec) -training >> step=57300, episode=10 reward=0.749144 (541.04 it/sec) -training >> step=57400, episode=10 reward=0.7628657 (566.98 it/sec) -training >> step=57500, episode=10 reward=0.7545581 (566.34 it/sec) -training >> step=57600, episode=10 reward=0.7708722 (588.67 it/sec) -training >> step=57700, episode=10 reward=0.771324 (532.20 it/sec) -training >> step=57800, episode=10 reward=0.7566294 (539.40 it/sec) -training >> step=57900, episode=10 reward=0.7786229 (549.68 it/sec) -training >> step=58000, episode=10 reward=0.7497509 (579.44 it/sec) -training >> step=58100, episode=10 reward=0.7654756 (557.55 it/sec) -training >> step=58200, episode=10 reward=0.7645726 (566.11 it/sec) -training >> step=58300, episode=10 reward=0.7403983 (532.37 it/sec) -training >> step=58400, episode=10 reward=0.7622291 (530.99 it/sec) -training >> step=58500, episode=10 reward=0.7667825 (562.05 it/sec) -training >> step=58600, episode=10 reward=0.7633752 (558.71 it/sec) -training >> step=58700, episode=10 reward=0.7465168 (525.31 it/sec) -training >> step=58800, episode=10 reward=0.7424403 (507.25 it/sec) -training >> step=58900, episode=10 reward=0.7506618 (516.91 it/sec) -training >> step=59000, episode=10 reward=0.7718455 (562.52 it/sec) -training >> step=59100, episode=10 reward=0.7614397 (551.24 it/sec) -training >> step=59200, episode=10 reward=0.7451388 (558.01 it/sec) -training >> step=59300, episode=10 reward=0.7698299 (567.99 it/sec) -training >> step=59400, episode=10 reward=0.7442101 (504.51 it/sec) -training >> step=59500, episode=10 reward=0.7587627 (591.10 it/sec) -training >> step=59600, episode=10 reward=0.7440252 (542.49 it/sec) -training >> step=59700, episode=10 reward=0.757029 (561.01 it/sec) -training >> step=59800, episode=10 reward=0.7780309 (575.74 it/sec) -training >> step=59900, episode=11 reward=0.7584422 (132.66 it/sec) -training >> step=60000, episode=11 reward=0.7595277 (539.21 it/sec) -training >> step=60100, episode=11 reward=0.7433395 (544.36 it/sec) -training >> step=60200, episode=11 reward=0.7566919 (520.74 it/sec) -training >> step=60300, episode=11 reward=0.7416531 (544.54 it/sec) -training >> step=60400, episode=11 reward=0.7522568 (539.52 it/sec) -training >> step=60500, episode=11 reward=0.726175 (534.24 it/sec) -training >> step=60600, episode=11 reward=0.742323 (547.28 it/sec) -training >> step=60700, episode=11 reward=0.7405381 (560.48 it/sec) -training >> step=60800, episode=11 reward=0.7655211 (553.00 it/sec) -training >> step=60900, episode=11 reward=0.7877249 (545.18 it/sec) -training >> step=61000, episode=11 reward=0.741675 (549.08 it/sec) -training >> step=61100, episode=11 reward=0.7700527 (544.36 it/sec) -training >> step=61200, episode=11 reward=0.7596636 (558.44 it/sec) -training >> step=61300, episode=11 reward=0.7493073 (557.21 it/sec) -training >> step=61400, episode=11 reward=0.7315086 (537.17 it/sec) -training >> step=61500, episode=11 reward=0.7755406 (540.71 it/sec) -training >> step=61600, episode=11 reward=0.7829983 (572.89 it/sec) -training >> step=61700, episode=11 reward=0.7848665 (524.31 it/sec) -training >> step=61800, episode=11 reward=0.7623313 (583.26 it/sec) -training >> step=61900, episode=11 reward=0.7708878 (515.81 it/sec) -training >> step=62000, episode=11 reward=0.756608 (558.88 it/sec) -training >> step=62100, episode=11 reward=0.7340962 (547.21 it/sec) -training >> step=62200, episode=11 reward=0.7522664 (557.44 it/sec) -training >> step=62300, episode=11 reward=0.7650993 (534.82 it/sec) -training >> step=62400, episode=11 reward=0.7522184 (548.52 it/sec) -training >> step=62500, episode=11 reward=0.769682 (538.46 it/sec) -training >> step=62600, episode=11 reward=0.7661775 (532.73 it/sec) -training >> step=62700, episode=11 reward=0.7873708 (580.32 it/sec) -training >> step=62800, episode=11 reward=0.750834 (564.87 it/sec) -training >> step=62900, episode=11 reward=0.7793688 (562.33 it/sec) -training >> step=63000, episode=11 reward=0.7623675 (521.11 it/sec) -training >> step=63100, episode=11 reward=0.7611302 (505.78 it/sec) -training >> step=63200, episode=11 reward=0.7748803 (544.79 it/sec) -training >> step=63300, episode=11 reward=0.7705248 (572.10 it/sec) -training >> step=63400, episode=11 reward=0.7664545 (561.63 it/sec) -training >> step=63500, episode=11 reward=0.7687284 (562.63 it/sec) -training >> step=63600, episode=11 reward=0.774645 (518.30 it/sec) -training >> step=63700, episode=11 reward=0.7609801 (570.99 it/sec) -training >> step=63800, episode=11 reward=0.739926 (465.88 it/sec) -training >> step=63900, episode=11 reward=0.765861 (501.24 it/sec) -training >> step=64000, episode=11 reward=0.7575256 (552.37 it/sec) -training >> step=64100, episode=11 reward=0.7592548 (529.05 it/sec) -training >> step=64200, episode=11 reward=0.7555152 (518.36 it/sec) -training >> step=64300, episode=11 reward=0.7581179 (537.87 it/sec) -training >> step=64400, episode=11 reward=0.7731084 (557.34 it/sec) -training >> step=64500, episode=11 reward=0.7228866 (555.64 it/sec) -training >> step=64600, episode=11 reward=0.7633886 (560.25 it/sec) -training >> step=64700, episode=11 reward=0.7688793 (552.70 it/sec) -training >> step=64800, episode=11 reward=0.7580174 (547.17 it/sec) -training >> step=64900, episode=11 reward=0.7525854 (553.85 it/sec) -training >> step=65000, episode=11 reward=0.7417935 (569.03 it/sec) -training >> step=65100, episode=11 reward=0.7480033 (567.49 it/sec) -training >> step=65200, episode=11 reward=0.7687464 (556.05 it/sec) -training >> step=65300, episode=11 reward=0.7549376 (542.87 it/sec) -training >> step=65400, episode=11 reward=0.7422624 (514.57 it/sec) -training >> step=65500, episode=11 reward=0.7889601 (564.57 it/sec) -training >> step=65600, episode=11 reward=0.7633572 (553.60 it/sec) -training >> step=65700, episode=11 reward=0.7574514 (559.15 it/sec) -training >> step=65800, episode=12 reward=0.77613 (131.66 it/sec) -training >> step=65900, episode=12 reward=0.7784333 (532.39 it/sec) -training >> step=66000, episode=12 reward=0.7741461 (499.66 it/sec) -training >> step=66100, episode=12 reward=0.7514967 (507.62 it/sec) -training >> step=66200, episode=12 reward=0.7706798 (497.74 it/sec) -training >> step=66300, episode=12 reward=0.7520825 (567.33 it/sec) -training >> step=66400, episode=12 reward=0.7635258 (547.91 it/sec) -training >> step=66500, episode=12 reward=0.7692829 (562.77 it/sec) -training >> step=66600, episode=12 reward=0.763193 (527.87 it/sec) -training >> step=66700, episode=12 reward=0.7688491 (545.33 it/sec) -training >> step=66800, episode=12 reward=0.7740843 (576.60 it/sec) -training >> step=66900, episode=12 reward=0.7635729 (555.18 it/sec) -training >> step=67000, episode=12 reward=0.7774664 (541.71 it/sec) -training >> step=67100, episode=12 reward=0.7628765 (560.97 it/sec) -training >> step=67200, episode=12 reward=0.7538591 (527.22 it/sec) -training >> step=67300, episode=12 reward=0.7686174 (550.96 it/sec) -training >> step=67400, episode=12 reward=0.7776114 (552.88 it/sec) -training >> step=67500, episode=12 reward=0.7514033 (523.99 it/sec) -training >> step=67600, episode=12 reward=0.7559007 (568.46 it/sec) -training >> step=67700, episode=12 reward=0.7675604 (508.21 it/sec) -training >> step=67800, episode=12 reward=0.7591578 (537.65 it/sec) -training >> step=67900, episode=12 reward=0.7763134 (569.93 it/sec) -training >> step=68000, episode=12 reward=0.749332 (558.09 it/sec) -training >> step=68100, episode=12 reward=0.7602439 (536.24 it/sec) -training >> step=68200, episode=12 reward=0.7608355 (517.52 it/sec) -training >> step=68300, episode=12 reward=0.764973 (532.00 it/sec) -training >> step=68400, episode=12 reward=0.78342 (578.06 it/sec) -training >> step=68500, episode=12 reward=0.7507045 (560.38 it/sec) -training >> step=68600, episode=12 reward=0.7700577 (524.69 it/sec) -training >> step=68700, episode=12 reward=0.7633111 (565.21 it/sec) -training >> step=68800, episode=12 reward=0.7594152 (522.47 it/sec) -training >> step=68900, episode=12 reward=0.7404619 (554.73 it/sec) -training >> step=69000, episode=12 reward=0.7476732 (504.16 it/sec) -training >> step=69100, episode=12 reward=0.7651378 (524.88 it/sec) -training >> step=69200, episode=12 reward=0.767352 (578.50 it/sec) -training >> step=69300, episode=12 reward=0.7743624 (532.16 it/sec) -training >> step=69400, episode=12 reward=0.7444618 (541.63 it/sec) -training >> step=69500, episode=12 reward=0.7566872 (576.59 it/sec) -training >> step=69600, episode=12 reward=0.7671821 (541.42 it/sec) -training >> step=69700, episode=12 reward=0.7661225 (530.04 it/sec) -training >> step=69800, episode=12 reward=0.7461893 (571.73 it/sec) -training >> step=69900, episode=12 reward=0.7798329 (497.12 it/sec) -training >> step=70000, episode=12 reward=0.7828645 (559.28 it/sec) -training >> step=70100, episode=12 reward=0.7722332 (553.97 it/sec) -training >> step=70200, episode=12 reward=0.7500719 (551.41 it/sec) -training >> step=70300, episode=12 reward=0.7681174 (560.67 it/sec) -training >> step=70400, episode=12 reward=0.7592423 (572.56 it/sec) -training >> step=70500, episode=12 reward=0.7657657 (515.75 it/sec) -training >> step=70600, episode=12 reward=0.7623147 (531.00 it/sec) -training >> step=70700, episode=12 reward=0.7708397 (527.96 it/sec) -training >> step=70800, episode=12 reward=0.7343735 (536.87 it/sec) -training >> step=70900, episode=12 reward=0.7443994 (587.81 it/sec) -training >> step=71000, episode=12 reward=0.7548268 (532.65 it/sec) -training >> step=71100, episode=12 reward=0.743157 (538.46 it/sec) -training >> step=71200, episode=12 reward=0.7662209 (585.43 it/sec) -training >> step=71300, episode=12 reward=0.7523357 (521.19 it/sec) -training >> step=71400, episode=12 reward=0.7560546 (583.03 it/sec) -training >> step=71500, episode=12 reward=0.7796604 (562.82 it/sec) -training >> step=71600, episode=12 reward=0.7435218 (521.08 it/sec) -training >> step=71700, episode=12 reward=0.7644943 (563.51 it/sec) -training >> step=71800, episode=13 reward=0.7729602 (136.56 it/sec) -training >> step=71900, episode=13 reward=0.7451292 (472.17 it/sec) -training >> step=72000, episode=13 reward=0.7811654 (568.00 it/sec) -training >> step=72100, episode=13 reward=0.7502828 (523.54 it/sec) -training >> step=72200, episode=13 reward=0.7414531 (545.45 it/sec) -training >> step=72300, episode=13 reward=0.7638684 (561.57 it/sec) -training >> step=72400, episode=13 reward=0.75816 (534.11 it/sec) -training >> step=72500, episode=13 reward=0.7782711 (541.19 it/sec) -training >> step=72600, episode=13 reward=0.7381675 (551.39 it/sec) -training >> step=72700, episode=13 reward=0.7734674 (561.94 it/sec) -training >> step=72800, episode=13 reward=0.7651363 (506.72 it/sec) -training >> step=72900, episode=13 reward=0.7572563 (574.47 it/sec) -training >> step=73000, episode=13 reward=0.7765377 (531.34 it/sec) -training >> step=73100, episode=13 reward=0.749599 (555.37 it/sec) -training >> step=73200, episode=13 reward=0.7704661 (531.90 it/sec) -training >> step=73300, episode=13 reward=0.7618056 (561.86 it/sec) -training >> step=73400, episode=13 reward=0.7721866 (587.83 it/sec) -training >> step=73500, episode=13 reward=0.7671574 (514.31 it/sec) -training >> step=73600, episode=13 reward=0.7708411 (530.55 it/sec) -training >> step=73700, episode=13 reward=0.7647879 (551.75 it/sec) -training >> step=73800, episode=13 reward=0.7765052 (547.48 it/sec) -training >> step=73900, episode=13 reward=0.7568914 (548.20 it/sec) -training >> step=74000, episode=13 reward=0.7562071 (564.55 it/sec) -training >> step=74100, episode=13 reward=0.7672319 (473.94 it/sec) -training >> step=74200, episode=13 reward=0.744291 (470.63 it/sec) -training >> step=74300, episode=13 reward=0.7796859 (548.72 it/sec) -training >> step=74400, episode=13 reward=0.7943214 (543.77 it/sec) -training >> step=74500, episode=13 reward=0.7642424 (539.56 it/sec) -training >> step=74600, episode=13 reward=0.7675673 (511.76 it/sec) -training >> step=74700, episode=13 reward=0.7677274 (537.74 it/sec) -training >> step=74800, episode=13 reward=0.7648415 (521.19 it/sec) -training >> step=74900, episode=13 reward=0.774656 (585.46 it/sec) -training >> step=75000, episode=13 reward=0.7419816 (558.15 it/sec) -training >> step=75100, episode=13 reward=0.7311966 (496.92 it/sec) -training >> step=75200, episode=13 reward=0.7612973 (576.56 it/sec) -training >> step=75300, episode=13 reward=0.7700364 (550.99 it/sec) -training >> step=75400, episode=13 reward=0.7755631 (543.16 it/sec) -training >> step=75500, episode=13 reward=0.7528496 (579.52 it/sec) -training >> step=75600, episode=13 reward=0.748358 (539.86 it/sec) -training >> step=75700, episode=13 reward=0.7581325 (532.09 it/sec) -training >> step=75800, episode=13 reward=0.7538983 (536.32 it/sec) -training >> step=75900, episode=13 reward=0.7771138 (541.30 it/sec) -training >> step=76000, episode=13 reward=0.765372 (574.21 it/sec) -training >> step=76100, episode=13 reward=0.7580665 (573.63 it/sec) -training >> step=76200, episode=13 reward=0.7511638 (515.06 it/sec) -training >> step=76300, episode=13 reward=0.7530798 (552.10 it/sec) -training >> step=76400, episode=13 reward=0.7641065 (527.66 it/sec) -training >> step=76500, episode=13 reward=0.7627228 (559.54 it/sec) -training >> step=76600, episode=13 reward=0.7142763 (564.45 it/sec) -training >> step=76700, episode=13 reward=0.7663992 (549.43 it/sec) -training >> step=76800, episode=13 reward=0.720192 (549.83 it/sec) -training >> step=76900, episode=13 reward=0.7564477 (526.77 it/sec) -training >> step=77000, episode=13 reward=0.7592422 (570.55 it/sec) -training >> step=77100, episode=13 reward=0.7487962 (531.72 it/sec) -training >> step=77200, episode=13 reward=0.7490406 (559.46 it/sec) -training >> step=77300, episode=13 reward=0.7634392 (529.56 it/sec) -training >> step=77400, episode=13 reward=0.7723408 (531.66 it/sec) -training >> step=77500, episode=13 reward=0.7576514 (537.42 it/sec) -training >> step=77600, episode=13 reward=0.7606124 (587.29 it/sec) -training >> step=77700, episode=13 reward=0.7671875 (538.20 it/sec) -training >> step=77800, episode=14 reward=0.7626969 (139.14 it/sec) -training >> step=77900, episode=14 reward=0.7821506 (530.33 it/sec) -training >> step=78000, episode=14 reward=0.7445655 (581.80 it/sec) -training >> step=78100, episode=14 reward=0.7781594 (524.91 it/sec) -training >> step=78200, episode=14 reward=0.7690323 (508.43 it/sec) -training >> step=78300, episode=14 reward=0.7557381 (538.59 it/sec) -training >> step=78400, episode=14 reward=0.7783216 (544.71 it/sec) -training >> step=78500, episode=14 reward=0.765307 (541.28 it/sec) -training >> step=78600, episode=14 reward=0.7530512 (556.54 it/sec) -training >> step=78700, episode=14 reward=0.7628965 (559.96 it/sec) -training >> step=78800, episode=14 reward=0.7665936 (541.66 it/sec) -training >> step=78900, episode=14 reward=0.7748368 (528.18 it/sec) -training >> step=79000, episode=14 reward=0.7444507 (531.78 it/sec) -training >> step=79100, episode=14 reward=0.7721753 (555.06 it/sec) -training >> step=79200, episode=14 reward=0.771652 (531.51 it/sec) -training >> step=79300, episode=14 reward=0.7384581 (519.77 it/sec) -training >> step=79400, episode=14 reward=0.7604744 (502.42 it/sec) -training >> step=79500, episode=14 reward=0.7513033 (586.61 it/sec) -training >> step=79600, episode=14 reward=0.7584803 (539.21 it/sec) -training >> step=79700, episode=14 reward=0.7903147 (555.21 it/sec) -training >> step=79800, episode=14 reward=0.7701727 (555.48 it/sec) -training >> step=79900, episode=14 reward=0.7557907 (531.40 it/sec) -training >> step=80000, episode=14 reward=0.7527802 (538.26 it/sec) -training >> step=80100, episode=14 reward=0.763603 (481.40 it/sec) -training >> step=80200, episode=14 reward=0.7689087 (516.19 it/sec) -training >> step=80300, episode=14 reward=0.7616608 (552.92 it/sec) -training >> step=80400, episode=14 reward=0.7585976 (547.83 it/sec) -training >> step=80500, episode=14 reward=0.7732173 (543.72 it/sec) -training >> step=80600, episode=14 reward=0.7628629 (561.11 it/sec) -training >> step=80700, episode=14 reward=0.7727145 (532.94 it/sec) -training >> step=80800, episode=14 reward=0.7595831 (526.28 it/sec) -training >> step=80900, episode=14 reward=0.7569371 (525.59 it/sec) -training >> step=81000, episode=14 reward=0.7883019 (535.37 it/sec) -training >> step=81100, episode=14 reward=0.7580259 (567.19 it/sec) -training >> step=81200, episode=14 reward=0.7428262 (586.91 it/sec) -training >> step=81300, episode=14 reward=0.7574907 (556.44 it/sec) -training >> step=81400, episode=14 reward=0.7638959 (557.50 it/sec) -training >> step=81500, episode=14 reward=0.7691332 (536.26 it/sec) -training >> step=81600, episode=14 reward=0.7723783 (540.49 it/sec) -training >> step=81700, episode=14 reward=0.7543417 (562.14 it/sec) -training >> step=81800, episode=14 reward=0.741209 (544.26 it/sec) -training >> step=81900, episode=14 reward=0.7640451 (547.72 it/sec) -training >> step=82000, episode=14 reward=0.7429328 (556.01 it/sec) -training >> step=82100, episode=14 reward=0.7823766 (537.40 it/sec) -training >> step=82200, episode=14 reward=0.7405007 (558.78 it/sec) -training >> step=82300, episode=14 reward=0.7678265 (567.09 it/sec) -training >> step=82400, episode=14 reward=0.7458696 (531.73 it/sec) -training >> step=82500, episode=14 reward=0.7730787 (511.69 it/sec) -training >> step=82600, episode=14 reward=0.7611216 (549.30 it/sec) -training >> step=82700, episode=14 reward=0.7600896 (529.43 it/sec) -training >> step=82800, episode=14 reward=0.774594 (415.00 it/sec) -training >> step=82900, episode=14 reward=0.7373209 (531.31 it/sec) -training >> step=83000, episode=14 reward=0.7470345 (539.16 it/sec) -training >> step=83100, episode=14 reward=0.761081 (565.58 it/sec) -training >> step=83200, episode=14 reward=0.7534782 (539.96 it/sec) -training >> step=83300, episode=14 reward=0.757416 (551.21 it/sec) -training >> step=83400, episode=14 reward=0.7545865 (541.96 it/sec) -training >> step=83500, episode=14 reward=0.757248 (537.56 it/sec) -training >> step=83600, episode=14 reward=0.7506964 (575.26 it/sec) -training >> step=83700, episode=14 reward=0.760281 (560.12 it/sec) -training >> step=83800, episode=15 reward=0.7571335 (134.98 it/sec) -training >> step=83900, episode=15 reward=0.7454197 (512.63 it/sec) -training >> step=84000, episode=15 reward=0.762025 (552.83 it/sec) -training >> step=84100, episode=15 reward=0.7526647 (522.58 it/sec) -training >> step=84200, episode=15 reward=0.7424657 (545.74 it/sec) -training >> step=84300, episode=15 reward=0.7357779 (531.70 it/sec) -training >> step=84400, episode=15 reward=0.7573448 (523.76 it/sec) -training >> step=84500, episode=15 reward=0.761309 (478.48 it/sec) -training >> step=84600, episode=15 reward=0.7711729 (564.28 it/sec) -training >> step=84700, episode=15 reward=0.785366 (574.49 it/sec) -training >> step=84800, episode=15 reward=0.7370163 (525.97 it/sec) -training >> step=84900, episode=15 reward=0.7936878 (556.50 it/sec) -training >> step=85000, episode=15 reward=0.7592031 (532.20 it/sec) -training >> step=85100, episode=15 reward=0.7537101 (546.15 it/sec) -training >> step=85200, episode=15 reward=0.7554713 (553.11 it/sec) -training >> step=85300, episode=15 reward=0.7664425 (557.79 it/sec) -training >> step=85400, episode=15 reward=0.7731072 (542.69 it/sec) -training >> step=85500, episode=15 reward=0.7555804 (593.06 it/sec) -training >> step=85600, episode=15 reward=0.7756417 (527.46 it/sec) -training >> step=85700, episode=15 reward=0.7636825 (539.18 it/sec) -training >> step=85800, episode=15 reward=0.7634532 (543.83 it/sec) -training >> step=85900, episode=15 reward=0.7692338 (536.68 it/sec) -training >> step=86000, episode=15 reward=0.7699756 (523.44 it/sec) -training >> step=86100, episode=15 reward=0.7552907 (563.29 it/sec) -training >> step=86200, episode=15 reward=0.7577924 (550.96 it/sec) -training >> step=86300, episode=15 reward=0.7567289 (559.58 it/sec) -training >> step=86400, episode=15 reward=0.7634226 (519.10 it/sec) -training >> step=86500, episode=15 reward=0.7781301 (539.19 it/sec) -training >> step=86600, episode=15 reward=0.745246 (564.45 it/sec) -training >> step=86700, episode=15 reward=0.7729531 (558.92 it/sec) -training >> step=86800, episode=15 reward=0.7466162 (548.54 it/sec) -training >> step=86900, episode=15 reward=0.7587515 (581.81 it/sec) -training >> step=87000, episode=15 reward=0.7572626 (549.08 it/sec) -training >> step=87100, episode=15 reward=0.7684058 (534.32 it/sec) -training >> step=87200, episode=15 reward=0.7739792 (535.54 it/sec) -training >> step=87300, episode=15 reward=0.7598498 (563.41 it/sec) -training >> step=87400, episode=15 reward=0.7939125 (563.68 it/sec) -training >> step=87500, episode=15 reward=0.7580015 (552.80 it/sec) -training >> step=87600, episode=15 reward=0.7621391 (551.76 it/sec) -training >> step=87700, episode=15 reward=0.7722075 (560.33 it/sec) -training >> step=87800, episode=15 reward=0.7625113 (524.78 it/sec) -training >> step=87900, episode=15 reward=0.7488561 (554.41 it/sec) -training >> step=88000, episode=15 reward=0.7625121 (544.98 it/sec) -training >> step=88100, episode=15 reward=0.7553049 (547.08 it/sec) -training >> step=88200, episode=15 reward=0.771789 (525.25 it/sec) -training >> step=88300, episode=15 reward=0.7622191 (568.41 it/sec) -training >> step=88400, episode=15 reward=0.7485942 (526.64 it/sec) -training >> step=88500, episode=15 reward=0.7725234 (574.54 it/sec) -training >> step=88600, episode=15 reward=0.7662289 (535.86 it/sec) -training >> step=88700, episode=15 reward=0.7612329 (571.26 it/sec) -training >> step=88800, episode=15 reward=0.7674047 (558.59 it/sec) -training >> step=88900, episode=15 reward=0.7522647 (566.90 it/sec) -training >> step=89000, episode=15 reward=0.7594988 (549.44 it/sec) -training >> step=89100, episode=15 reward=0.7454644 (543.82 it/sec) -training >> step=89200, episode=15 reward=0.7627488 (550.10 it/sec) -training >> step=89300, episode=15 reward=0.749813 (540.04 it/sec) -training >> step=89400, episode=15 reward=0.7671992 (555.63 it/sec) -training >> step=89500, episode=15 reward=0.7623404 (562.47 it/sec) -training >> step=89600, episode=15 reward=0.7525586 (551.64 it/sec) -training >> step=89700, episode=15 reward=0.7474667 (553.14 it/sec) -training >> step=89800, episode=16 reward=0.7563821 (130.11 it/sec) -training >> step=89900, episode=16 reward=0.7608008 (526.69 it/sec) -training >> step=90000, episode=16 reward=0.7585572 (516.66 it/sec) -training >> step=90100, episode=16 reward=0.7716455 (537.03 it/sec) -training >> step=90200, episode=16 reward=0.7643931 (566.34 it/sec) -training >> step=90300, episode=16 reward=0.7582582 (520.69 it/sec) -training >> step=90400, episode=16 reward=0.7683859 (457.78 it/sec) -training >> step=90500, episode=16 reward=0.7668565 (421.69 it/sec) -training >> step=90600, episode=16 reward=0.7852966 (557.23 it/sec) -training >> step=90700, episode=16 reward=0.7660623 (586.00 it/sec) -training >> step=90800, episode=16 reward=0.748494 (532.91 it/sec) -training >> step=90900, episode=16 reward=0.7686445 (518.97 it/sec) -training >> step=91000, episode=16 reward=0.7458948 (552.88 it/sec) -training >> step=91100, episode=16 reward=0.7485683 (552.26 it/sec) -training >> step=91200, episode=16 reward=0.7695943 (560.60 it/sec) -training >> step=91300, episode=16 reward=0.7717832 (566.94 it/sec) -training >> step=91400, episode=16 reward=0.7517346 (510.59 it/sec) -training >> step=91500, episode=16 reward=0.7578644 (566.17 it/sec) -training >> step=91600, episode=16 reward=0.7804814 (529.53 it/sec) -training >> step=91700, episode=16 reward=0.7933495 (564.77 it/sec) -training >> step=91800, episode=16 reward=0.7685058 (561.95 it/sec) -training >> step=91900, episode=16 reward=0.7579038 (547.85 it/sec) -training >> step=92000, episode=16 reward=0.7549347 (567.26 it/sec) -training >> step=92100, episode=16 reward=0.7662962 (531.36 it/sec) -training >> step=92200, episode=16 reward=0.7617922 (556.30 it/sec) -training >> step=92300, episode=16 reward=0.7413273 (587.83 it/sec) -training >> step=92400, episode=16 reward=0.7681932 (543.33 it/sec) -training >> step=92500, episode=16 reward=0.7669109 (519.73 it/sec) -training >> step=92600, episode=16 reward=0.7465796 (593.22 it/sec) -training >> step=92700, episode=16 reward=0.7670742 (501.53 it/sec) -training >> step=92800, episode=16 reward=0.7790364 (561.81 it/sec) -training >> step=92900, episode=16 reward=0.7475584 (568.20 it/sec) -training >> step=93000, episode=16 reward=0.7639518 (527.58 it/sec) -training >> step=93100, episode=16 reward=0.7770351 (543.75 it/sec) -training >> step=93200, episode=16 reward=0.7587663 (561.17 it/sec) -training >> step=93300, episode=16 reward=0.764057 (520.33 it/sec) -training >> step=93400, episode=16 reward=0.7728433 (562.30 it/sec) -training >> step=93500, episode=16 reward=0.7419876 (535.74 it/sec) -training >> step=93600, episode=16 reward=0.7578677 (526.15 it/sec) -training >> step=93700, episode=16 reward=0.7722871 (549.21 it/sec) -training >> step=93800, episode=16 reward=0.7551808 (507.63 it/sec) -training >> step=93900, episode=16 reward=0.7540283 (568.84 it/sec) -training >> step=94000, episode=16 reward=0.7647131 (576.06 it/sec) -training >> step=94100, episode=16 reward=0.733296 (505.76 it/sec) -training >> step=94200, episode=16 reward=0.7868501 (553.71 it/sec) -training >> step=94300, episode=16 reward=0.7692589 (567.70 it/sec) -training >> step=94400, episode=16 reward=0.7619112 (546.28 it/sec) -training >> step=94500, episode=16 reward=0.76559 (559.84 it/sec) -training >> step=94600, episode=16 reward=0.7616779 (552.97 it/sec) -training >> step=94700, episode=16 reward=0.7649459 (521.50 it/sec) -training >> step=94800, episode=16 reward=0.784479 (586.66 it/sec) -training >> step=94900, episode=16 reward=0.7605076 (527.47 it/sec) -training >> step=95000, episode=16 reward=0.7918299 (556.34 it/sec) -training >> step=95100, episode=16 reward=0.770181 (497.94 it/sec) -training >> step=95200, episode=16 reward=0.7648709 (509.07 it/sec) -training >> step=95300, episode=16 reward=0.7776636 (562.78 it/sec) -training >> step=95400, episode=16 reward=0.7642749 (548.67 it/sec) -training >> step=95500, episode=16 reward=0.7438639 (529.10 it/sec) -training >> step=95600, episode=16 reward=0.7671193 (576.94 it/sec) -training >> step=95700, episode=17 reward=0.7619042 (130.02 it/sec) -training >> step=95800, episode=17 reward=0.7576844 (543.93 it/sec) -training >> step=95900, episode=17 reward=0.7531981 (558.90 it/sec) -training >> step=96000, episode=17 reward=0.7641624 (495.34 it/sec) -training >> step=96100, episode=17 reward=0.7669832 (543.21 it/sec) -training >> step=96200, episode=17 reward=0.7662763 (522.84 it/sec) -training >> step=96300, episode=17 reward=0.7847828 (542.89 it/sec) -training >> step=96400, episode=17 reward=0.752625 (562.73 it/sec) -training >> step=96500, episode=17 reward=0.74444 (520.81 it/sec) -training >> step=96600, episode=17 reward=0.7652763 (563.79 it/sec) -training >> step=96700, episode=17 reward=0.7479092 (550.28 it/sec) -training >> step=96800, episode=17 reward=0.7536504 (541.73 it/sec) -training >> step=96900, episode=17 reward=0.7592678 (580.17 it/sec) -training >> step=97000, episode=17 reward=0.7615121 (558.18 it/sec) -training >> step=97100, episode=17 reward=0.7561117 (535.54 it/sec) -training >> step=97200, episode=17 reward=0.7594813 (541.64 it/sec) -training >> step=97300, episode=17 reward=0.7443398 (554.44 it/sec) -training >> step=97400, episode=17 reward=0.7527398 (516.82 it/sec) -training >> step=97500, episode=17 reward=0.7603169 (544.58 it/sec) -training >> step=97600, episode=17 reward=0.761862 (548.72 it/sec) -training >> step=97700, episode=17 reward=0.7594863 (546.17 it/sec) -training >> step=97800, episode=17 reward=0.7578608 (536.36 it/sec) -training >> step=97900, episode=17 reward=0.7696823 (518.63 it/sec) -training >> step=98000, episode=17 reward=0.7617781 (554.52 it/sec) -training >> step=98100, episode=17 reward=0.7674914 (547.97 it/sec) -training >> step=98200, episode=17 reward=0.7704871 (507.57 it/sec) -training >> step=98300, episode=17 reward=0.7579615 (581.95 it/sec) -training >> step=98400, episode=17 reward=0.7288452 (530.13 it/sec) -training >> step=98500, episode=17 reward=0.7565987 (536.87 it/sec) -training >> step=98600, episode=17 reward=0.7369351 (564.89 it/sec) -training >> step=98700, episode=17 reward=0.7697804 (537.31 it/sec) -training >> step=98800, episode=17 reward=0.7636528 (531.53 it/sec) -training >> step=98900, episode=17 reward=0.7620511 (540.33 it/sec) -training >> step=99000, episode=17 reward=0.7448346 (568.92 it/sec) -training >> step=99100, episode=17 reward=0.7578664 (532.67 it/sec) -training >> step=99200, episode=17 reward=0.7728566 (559.41 it/sec) -training >> step=99300, episode=17 reward=0.7577645 (544.22 it/sec) -training >> step=99400, episode=17 reward=0.7425055 (553.58 it/sec) -training >> step=99500, episode=17 reward=0.7447522 (566.96 it/sec) -training >> step=99600, episode=17 reward=0.7685112 (435.61 it/sec) -training >> step=99700, episode=17 reward=0.7856208 (571.85 it/sec) -training >> step=99800, episode=17 reward=0.7412258 (551.71 it/sec) -training >> step=99900, episode=17 reward=0.7772655 (542.06 it/sec) -training >> step=100000, episode=17 reward=0.7602668 (563.33 it/sec) -training >> step=100100, episode=17 reward=0.7584986 (556.94 it/sec) -training >> step=100200, episode=17 reward=0.7428538 (471.01 it/sec) -training >> step=100300, episode=17 reward=0.7377276 (486.41 it/sec) -training >> step=100400, episode=17 reward=0.743369 (523.91 it/sec) -training >> step=100500, episode=17 reward=0.7505376 (567.04 it/sec) -training >> step=100600, episode=17 reward=0.7480705 (558.28 it/sec) -training >> step=100700, episode=17 reward=0.7451826 (555.73 it/sec) -training >> step=100800, episode=17 reward=0.7357066 (533.75 it/sec) -training >> step=100900, episode=17 reward=0.7549602 (536.67 it/sec) -training >> step=101000, episode=17 reward=0.7706146 (558.72 it/sec) -training >> step=101100, episode=17 reward=0.7591946 (570.61 it/sec) -training >> step=101200, episode=17 reward=0.7453664 (560.83 it/sec) -training >> step=101300, episode=17 reward=0.753583 (561.42 it/sec) -training >> step=101400, episode=17 reward=0.7402443 (547.89 it/sec) -training >> step=101500, episode=17 reward=0.767805 (570.18 it/sec) -training >> step=101600, episode=17 reward=0.7337881 (505.25 it/sec) -training >> step=101700, episode=18 reward=0.7550991 (145.28 it/sec) -training >> step=101800, episode=18 reward=0.7603212 (509.20 it/sec) -training >> step=101900, episode=18 reward=0.74305 (522.72 it/sec) -training >> step=102000, episode=18 reward=0.750761 (538.55 it/sec) -training >> step=102100, episode=18 reward=0.7365846 (507.74 it/sec) -training >> step=102200, episode=18 reward=0.7663389 (593.61 it/sec) -training >> step=102300, episode=18 reward=0.7409439 (529.92 it/sec) -training >> step=102400, episode=18 reward=0.7567595 (539.10 it/sec) -training >> step=102500, episode=18 reward=0.7496043 (583.77 it/sec) -training >> step=102600, episode=18 reward=0.7508392 (548.96 it/sec) -training >> step=102700, episode=18 reward=0.741735 (531.76 it/sec) -training >> step=102800, episode=18 reward=0.7619562 (584.45 it/sec) -training >> step=102900, episode=18 reward=0.7669857 (519.59 it/sec) -training >> step=103000, episode=18 reward=0.7414571 (566.56 it/sec) -training >> step=103100, episode=18 reward=0.7623219 (551.96 it/sec) -training >> step=103200, episode=18 reward=0.761758 (543.88 it/sec) -training >> step=103300, episode=18 reward=0.7547823 (583.99 it/sec) -training >> step=103400, episode=18 reward=0.7752851 (548.42 it/sec) -training >> step=103500, episode=18 reward=0.7607504 (539.09 it/sec) -training >> step=103600, episode=18 reward=0.7622744 (525.48 it/sec) -training >> step=103700, episode=18 reward=0.7556674 (558.16 it/sec) -training >> step=103800, episode=18 reward=0.7745863 (541.95 it/sec) -training >> step=103900, episode=18 reward=0.7881539 (572.76 it/sec) -training >> step=104000, episode=18 reward=0.7624193 (545.33 it/sec) -training >> step=104100, episode=18 reward=0.7583687 (544.69 it/sec) -training >> step=104200, episode=18 reward=0.7564804 (572.91 it/sec) -training >> step=104300, episode=18 reward=0.7560039 (519.93 it/sec) -training >> step=104400, episode=18 reward=0.7646553 (559.99 it/sec) -training >> step=104500, episode=18 reward=0.7695066 (536.91 it/sec) -training >> step=104600, episode=18 reward=0.7908417 (548.06 it/sec) -training >> step=104700, episode=18 reward=0.7503201 (534.94 it/sec) -training >> step=104800, episode=18 reward=0.7772146 (563.10 it/sec) -training >> step=104900, episode=18 reward=0.76247 (512.75 it/sec) -training >> step=105000, episode=18 reward=0.7481058 (578.22 it/sec) -training >> step=105100, episode=18 reward=0.7682135 (541.14 it/sec) -training >> step=105200, episode=18 reward=0.747304 (534.50 it/sec) -training >> step=105300, episode=18 reward=0.7563411 (593.31 it/sec) -training >> step=105400, episode=18 reward=0.7633159 (511.41 it/sec) -training >> step=105500, episode=18 reward=0.7748538 (513.65 it/sec) -training >> step=105600, episode=18 reward=0.7600833 (516.39 it/sec) -training >> step=105700, episode=18 reward=0.7480541 (558.62 it/sec) -training >> step=105800, episode=18 reward=0.7711139 (550.01 it/sec) -training >> step=105900, episode=18 reward=0.7582107 (563.63 it/sec) -training >> step=106000, episode=18 reward=0.7392572 (542.63 it/sec) -training >> step=106100, episode=18 reward=0.7605797 (570.82 it/sec) -training >> step=106200, episode=18 reward=0.7841321 (531.63 it/sec) -training >> step=106300, episode=18 reward=0.7688576 (563.00 it/sec) -training >> step=106400, episode=18 reward=0.7629113 (541.36 it/sec) -training >> step=106500, episode=18 reward=0.7590553 (562.66 it/sec) -training >> step=106600, episode=18 reward=0.7607015 (562.08 it/sec) -training >> step=106700, episode=18 reward=0.7690318 (521.72 it/sec) -training >> step=106800, episode=18 reward=0.7692154 (542.93 it/sec) -training >> step=106900, episode=18 reward=0.7545243 (546.79 it/sec) -training >> step=107000, episode=18 reward=0.7496378 (555.05 it/sec) -training >> step=107100, episode=18 reward=0.7718403 (562.93 it/sec) -training >> step=107200, episode=18 reward=0.7523221 (566.30 it/sec) -training >> step=107300, episode=18 reward=0.7546 (574.07 it/sec) -training >> step=107400, episode=18 reward=0.7353709 (569.03 it/sec) -training >> step=107500, episode=18 reward=0.7594348 (540.70 it/sec) -training >> step=107600, episode=18 reward=0.762036 (565.68 it/sec) -training >> step=107700, episode=19 reward=0.7664624 (118.94 it/sec) -training >> step=107800, episode=19 reward=0.7436655 (563.29 it/sec) -training >> step=107900, episode=19 reward=0.7835079 (521.25 it/sec) -training >> step=108000, episode=19 reward=0.7542318 (478.01 it/sec) -training >> step=108100, episode=19 reward=0.7483051 (556.39 it/sec) -training >> step=108200, episode=19 reward=0.7554244 (518.68 it/sec) -training >> step=108300, episode=19 reward=0.747794 (514.24 it/sec) -training >> step=108400, episode=19 reward=0.766085 (559.21 it/sec) -training >> step=108500, episode=19 reward=0.7717126 (556.43 it/sec) -training >> step=108600, episode=19 reward=0.7512076 (524.22 it/sec) -training >> step=108700, episode=19 reward=0.7791584 (578.81 it/sec) -training >> step=108800, episode=19 reward=0.7449808 (519.58 it/sec) -training >> step=108900, episode=19 reward=0.7813021 (547.55 it/sec) -training >> step=109000, episode=19 reward=0.7710063 (587.26 it/sec) -training >> step=109100, episode=19 reward=0.7638304 (471.07 it/sec) -training >> step=109200, episode=19 reward=0.7646191 (527.72 it/sec) -training >> step=109300, episode=19 reward=0.7608859 (551.28 it/sec) -training >> step=109400, episode=19 reward=0.7517563 (521.07 it/sec) -training >> step=109500, episode=19 reward=0.7616934 (560.53 it/sec) -training >> step=109600, episode=19 reward=0.7577869 (534.11 it/sec) -training >> step=109700, episode=19 reward=0.7462662 (560.92 it/sec) -training >> step=109800, episode=19 reward=0.753126 (549.62 it/sec) -training >> step=109900, episode=19 reward=0.7491758 (513.23 it/sec) -training >> step=110000, episode=19 reward=0.7712776 (569.43 it/sec) -training >> step=110100, episode=19 reward=0.7733154 (574.34 it/sec) -training >> step=110200, episode=19 reward=0.7490476 (528.30 it/sec) -training >> step=110300, episode=19 reward=0.7564726 (562.00 it/sec) -training >> step=110400, episode=19 reward=0.7639393 (566.66 it/sec) -training >> step=110500, episode=19 reward=0.7519243 (478.23 it/sec) -training >> step=110600, episode=19 reward=0.7507455 (516.84 it/sec) -training >> step=110700, episode=19 reward=0.7685565 (515.01 it/sec) -training >> step=110800, episode=19 reward=0.7744383 (547.82 it/sec) -training >> step=110900, episode=19 reward=0.7481299 (542.93 it/sec) -training >> step=111000, episode=19 reward=0.7423458 (549.18 it/sec) -training >> step=111100, episode=19 reward=0.7584258 (535.56 it/sec) -training >> step=111200, episode=19 reward=0.7794974 (579.97 it/sec) -training >> step=111300, episode=19 reward=0.7626731 (523.27 it/sec) -training >> step=111400, episode=19 reward=0.7654403 (557.27 it/sec) -training >> step=111500, episode=19 reward=0.7548599 (596.11 it/sec) -training >> step=111600, episode=19 reward=0.753691 (538.32 it/sec) -training >> step=111700, episode=19 reward=0.7617066 (559.69 it/sec) -training >> step=111800, episode=19 reward=0.7547603 (528.26 it/sec) -training >> step=111900, episode=19 reward=0.7405952 (544.99 it/sec) -training >> step=112000, episode=19 reward=0.7654858 (570.87 it/sec) -training >> step=112100, episode=19 reward=0.7385319 (544.72 it/sec) -training >> step=112200, episode=19 reward=0.7583855 (552.43 it/sec) -training >> step=112300, episode=19 reward=0.7573605 (555.31 it/sec) -training >> step=112400, episode=19 reward=0.7671669 (540.46 it/sec) -training >> step=112500, episode=19 reward=0.761381 (545.39 it/sec) -training >> step=112600, episode=19 reward=0.7527357 (571.07 it/sec) -training >> step=112700, episode=19 reward=0.7702079 (563.18 it/sec) -training >> step=112800, episode=19 reward=0.7448509 (555.18 it/sec) -training >> step=112900, episode=19 reward=0.7519494 (577.53 it/sec) -training >> step=113000, episode=19 reward=0.7574425 (488.24 it/sec) -training >> step=113100, episode=19 reward=0.7587742 (563.98 it/sec) -training >> step=113200, episode=19 reward=0.745175 (566.70 it/sec) -training >> step=113300, episode=19 reward=0.759711 (544.21 it/sec) -training >> step=113400, episode=19 reward=0.7540207 (574.59 it/sec) -training >> step=113500, episode=19 reward=0.7334073 (569.08 it/sec) -training >> step=113600, episode=19 reward=0.7316403 (532.95 it/sec) -training >> step=113700, episode=20 reward=0.7521618 (121.95 it/sec) -training >> step=113800, episode=20 reward=0.7693911 (542.12 it/sec) -training >> step=113900, episode=20 reward=0.7374918 (581.86 it/sec) -training >> step=114000, episode=20 reward=0.756063 (512.23 it/sec) -training >> step=114100, episode=20 reward=0.7606538 (520.05 it/sec) -training >> step=114200, episode=20 reward=0.7547178 (556.02 it/sec) -training >> step=114300, episode=20 reward=0.7771752 (496.32 it/sec) -training >> step=114400, episode=20 reward=0.7867857 (535.05 it/sec) -training >> step=114500, episode=20 reward=0.7744089 (564.29 it/sec) -training >> step=114600, episode=20 reward=0.7526392 (533.88 it/sec) -training >> step=114700, episode=20 reward=0.7543021 (556.68 it/sec) -training >> step=114800, episode=20 reward=0.750117 (577.93 it/sec) -training >> step=114900, episode=20 reward=0.7613554 (533.34 it/sec) -training >> step=115000, episode=20 reward=0.7846912 (540.78 it/sec) -training >> step=115100, episode=20 reward=0.7715765 (547.73 it/sec) -training >> step=115200, episode=20 reward=0.7461713 (524.85 it/sec) -training >> step=115300, episode=20 reward=0.7817927 (553.93 it/sec) -training >> step=115400, episode=20 reward=0.7675283 (552.65 it/sec) -training >> step=115500, episode=20 reward=0.7529392 (528.06 it/sec) -training >> step=115600, episode=20 reward=0.7462493 (538.40 it/sec) -training >> step=115700, episode=20 reward=0.7595313 (470.23 it/sec) -training >> step=115800, episode=20 reward=0.7588901 (546.18 it/sec) -training >> step=115900, episode=20 reward=0.7366799 (577.44 it/sec) -training >> step=116000, episode=20 reward=0.7636824 (531.66 it/sec) -training >> step=116100, episode=20 reward=0.7407035 (518.89 it/sec) -training >> step=116200, episode=20 reward=0.7837464 (560.43 it/sec) -training >> step=116300, episode=20 reward=0.775749 (511.61 it/sec) -training >> step=116400, episode=20 reward=0.7596986 (591.20 it/sec) -training >> step=116500, episode=20 reward=0.7596057 (543.65 it/sec) -training >> step=116600, episode=20 reward=0.7587948 (518.53 it/sec) -training >> step=116700, episode=20 reward=0.7528865 (568.51 it/sec) -training >> step=116800, episode=20 reward=0.7270654 (551.15 it/sec) -training >> step=116900, episode=20 reward=0.7636933 (554.25 it/sec) -training >> step=117000, episode=20 reward=0.7549029 (532.93 it/sec) -training >> step=117100, episode=20 reward=0.7343392 (534.40 it/sec) -training >> step=117200, episode=20 reward=0.7644291 (541.91 it/sec) -training >> step=117300, episode=20 reward=0.7980582 (576.16 it/sec) -training >> step=117400, episode=20 reward=0.7495219 (542.10 it/sec) -training >> step=117500, episode=20 reward=0.7865798 (555.13 it/sec) -training >> step=117600, episode=20 reward=0.7559153 (540.29 it/sec) -training >> step=117700, episode=20 reward=0.763884 (541.43 it/sec) -training >> step=117800, episode=20 reward=0.7611881 (567.08 it/sec) -training >> step=117900, episode=20 reward=0.7544595 (551.61 it/sec) -training >> step=118000, episode=20 reward=0.7583632 (555.89 it/sec) -training >> step=118100, episode=20 reward=0.7448669 (578.67 it/sec) -training >> step=118200, episode=20 reward=0.7710806 (530.63 it/sec) -training >> step=118300, episode=20 reward=0.7605827 (495.41 it/sec) -training >> step=118400, episode=20 reward=0.7565395 (565.53 it/sec) -training >> step=118500, episode=20 reward=0.7457846 (531.47 it/sec) -training >> step=118600, episode=20 reward=0.7434199 (547.83 it/sec) -training >> step=118700, episode=20 reward=0.7617096 (576.78 it/sec) -training >> step=118800, episode=20 reward=0.7383575 (544.78 it/sec) -training >> step=118900, episode=20 reward=0.7394548 (558.89 it/sec) -training >> step=119000, episode=20 reward=0.734879 (547.40 it/sec) -training >> step=119100, episode=20 reward=0.7440091 (559.58 it/sec) -training >> step=119200, episode=20 reward=0.7646962 (557.75 it/sec) -training >> step=119300, episode=20 reward=0.7490981 (532.11 it/sec) -training >> step=119400, episode=20 reward=0.7475463 (524.93 it/sec) -training >> step=119500, episode=20 reward=0.749927 (561.19 it/sec) -training >> step=119600, episode=20 reward=0.7404721 (529.45 it/sec) -training >> step=119700, episode=21 reward=0.7594288 (133.44 it/sec) -training >> step=119800, episode=21 reward=0.7664142 (526.66 it/sec) -training >> step=119900, episode=21 reward=0.7549662 (530.73 it/sec) -training >> step=120000, episode=21 reward=0.7628597 (548.63 it/sec) -training >> step=120100, episode=21 reward=0.751559 (527.35 it/sec) -training >> step=120200, episode=21 reward=0.7604024 (539.08 it/sec) -training >> step=120300, episode=21 reward=0.7515972 (522.51 it/sec) -training >> step=120400, episode=21 reward=0.7810158 (545.30 it/sec) -training >> step=120500, episode=21 reward=0.7646388 (592.11 it/sec) -training >> step=120600, episode=21 reward=0.7549923 (538.62 it/sec) -training >> step=120700, episode=21 reward=0.7564161 (545.93 it/sec) -training >> step=120800, episode=21 reward=0.7461464 (499.46 it/sec) -training >> step=120900, episode=21 reward=0.7738456 (509.73 it/sec) -training >> step=121000, episode=21 reward=0.7433562 (541.38 it/sec) -training >> step=121100, episode=21 reward=0.7522696 (576.80 it/sec) -training >> step=121200, episode=21 reward=0.7540166 (537.59 it/sec) -training >> step=121300, episode=21 reward=0.7514976 (536.35 it/sec) -training >> step=121400, episode=21 reward=0.7466511 (572.83 it/sec) -training >> step=121500, episode=21 reward=0.7548652 (498.63 it/sec) -training >> step=121600, episode=21 reward=0.7577825 (591.38 it/sec) -training >> step=121700, episode=21 reward=0.7454398 (542.38 it/sec) -training >> step=121800, episode=21 reward=0.7372237 (565.34 it/sec) -training >> step=121900, episode=21 reward=0.7565411 (553.76 it/sec) -training >> step=122000, episode=21 reward=0.7708508 (550.78 it/sec) -training >> step=122100, episode=21 reward=0.7587999 (515.88 it/sec) -training >> step=122200, episode=21 reward=0.7669719 (560.87 it/sec) -training >> step=122300, episode=21 reward=0.7471805 (549.32 it/sec) -training >> step=122400, episode=21 reward=0.7546821 (549.64 it/sec) -training >> step=122500, episode=21 reward=0.7532832 (564.72 it/sec) -training >> step=122600, episode=21 reward=0.7485954 (544.09 it/sec) -training >> step=122700, episode=21 reward=0.76351 (545.22 it/sec) -training >> step=122800, episode=21 reward=0.762339 (529.01 it/sec) -training >> step=122900, episode=21 reward=0.7597876 (558.56 it/sec) -training >> step=123000, episode=21 reward=0.7638256 (524.41 it/sec) -training >> step=123100, episode=21 reward=0.7725236 (545.40 it/sec) -training >> step=123200, episode=21 reward=0.7709925 (524.97 it/sec) -training >> step=123300, episode=21 reward=0.7728363 (583.23 it/sec) -training >> step=123400, episode=21 reward=0.762759 (513.74 it/sec) -training >> step=123500, episode=21 reward=0.7746751 (541.01 it/sec) -training >> step=123600, episode=21 reward=0.7519805 (552.31 it/sec) -training >> step=123700, episode=21 reward=0.7381944 (534.21 it/sec) -training >> step=123800, episode=21 reward=0.7407213 (562.23 it/sec) -training >> step=123900, episode=21 reward=0.7530012 (528.18 it/sec) -training >> step=124000, episode=21 reward=0.7398697 (446.46 it/sec) -training >> step=124100, episode=21 reward=0.7652909 (536.48 it/sec) -training >> step=124200, episode=21 reward=0.7581154 (552.47 it/sec) -training >> step=124300, episode=21 reward=0.7563738 (544.56 it/sec) -training >> step=124400, episode=21 reward=0.7449297 (560.35 it/sec) -training >> step=124500, episode=21 reward=0.7609937 (543.70 it/sec) -training >> step=124600, episode=21 reward=0.7692406 (537.17 it/sec) -training >> step=124700, episode=21 reward=0.7569128 (556.91 it/sec) -training >> step=124800, episode=21 reward=0.7351461 (527.56 it/sec) -training >> step=124900, episode=21 reward=0.746512 (572.93 it/sec) -training >> step=125000, episode=21 reward=0.7337719 (558.44 it/sec) -training >> step=125100, episode=21 reward=0.757362 (546.03 it/sec) -training >> step=125200, episode=21 reward=0.761879 (562.38 it/sec) -training >> step=125300, episode=21 reward=0.7538196 (547.36 it/sec) -training >> step=125400, episode=21 reward=0.751714 (544.59 it/sec) -training >> step=125500, episode=21 reward=0.7474607 (556.24 it/sec) -training >> step=125600, episode=22 reward=0.7558743 (129.28 it/sec) -training >> step=125700, episode=22 reward=0.7583428 (517.19 it/sec) -training >> step=125800, episode=22 reward=0.7707966 (532.52 it/sec) -training >> step=125900, episode=22 reward=0.782227 (518.70 it/sec) -training >> step=126000, episode=22 reward=0.7542263 (469.83 it/sec) -training >> step=126100, episode=22 reward=0.7597226 (545.81 it/sec) -training >> step=126200, episode=22 reward=0.7541599 (524.06 it/sec) -training >> step=126300, episode=22 reward=0.7445118 (553.23 it/sec) -training >> step=126400, episode=22 reward=0.7659071 (562.89 it/sec) -training >> step=126500, episode=22 reward=0.7688914 (555.93 it/sec) -training >> step=126600, episode=22 reward=0.7604795 (543.99 it/sec) -training >> step=126700, episode=22 reward=0.7694451 (560.12 it/sec) -training >> step=126800, episode=22 reward=0.7810457 (549.09 it/sec) -training >> step=126900, episode=22 reward=0.7512604 (546.77 it/sec) -training >> step=127000, episode=22 reward=0.7561622 (559.42 it/sec) -training >> step=127100, episode=22 reward=0.7607958 (541.49 it/sec) -training >> step=127200, episode=22 reward=0.7532295 (551.74 it/sec) -training >> step=127300, episode=22 reward=0.7600504 (543.11 it/sec) -training >> step=127400, episode=22 reward=0.7757342 (539.91 it/sec) -training >> step=127500, episode=22 reward=0.7560197 (535.12 it/sec) -training >> step=127600, episode=22 reward=0.7506236 (560.64 it/sec) -training >> step=127700, episode=22 reward=0.75334 (532.77 it/sec) -training >> step=127800, episode=22 reward=0.755506 (552.74 it/sec) -training >> step=127900, episode=22 reward=0.7390187 (566.32 it/sec) -training >> step=128000, episode=22 reward=0.7790197 (529.42 it/sec) -training >> step=128100, episode=22 reward=0.7336823 (581.86 it/sec) -training >> step=128200, episode=22 reward=0.7699155 (524.66 it/sec) -training >> step=128300, episode=22 reward=0.7817028 (545.46 it/sec) -training >> step=128400, episode=22 reward=0.7544605 (543.31 it/sec) -training >> step=128500, episode=22 reward=0.7748605 (555.20 it/sec) -training >> step=128600, episode=22 reward=0.7830126 (540.37 it/sec) -training >> step=128700, episode=22 reward=0.746861 (574.18 it/sec) -training >> step=128800, episode=22 reward=0.7813382 (527.23 it/sec) -training >> step=128900, episode=22 reward=0.7401149 (541.21 it/sec) -training >> step=129000, episode=22 reward=0.757715 (554.64 it/sec) -training >> step=129100, episode=22 reward=0.749753 (548.03 it/sec) -training >> step=129200, episode=22 reward=0.7628162 (546.73 it/sec) -training >> step=129300, episode=22 reward=0.7586218 (537.45 it/sec) -training >> step=129400, episode=22 reward=0.7525604 (543.17 it/sec) -training >> step=129500, episode=22 reward=0.7648767 (529.53 it/sec) -training >> step=129600, episode=22 reward=0.7642589 (585.36 it/sec) -training >> step=129700, episode=22 reward=0.7620525 (558.07 it/sec) -training >> step=129800, episode=22 reward=0.7583076 (544.61 it/sec) -training >> step=129900, episode=22 reward=0.7582411 (524.31 it/sec) -training >> step=130000, episode=22 reward=0.726202 (537.04 it/sec) -training >> step=130100, episode=22 reward=0.7382059 (557.85 it/sec) -training >> step=130200, episode=22 reward=0.7444246 (560.15 it/sec) -training >> step=130300, episode=22 reward=0.757606 (546.37 it/sec) -training >> step=130400, episode=22 reward=0.7679878 (554.21 it/sec) -training >> step=130500, episode=22 reward=0.7534238 (522.63 it/sec) -training >> step=130600, episode=22 reward=0.7544333 (553.24 it/sec) -training >> step=130700, episode=22 reward=0.7614995 (559.70 it/sec) -training >> step=130800, episode=22 reward=0.7440274 (541.37 it/sec) -training >> step=130900, episode=22 reward=0.744054 (580.46 it/sec) -training >> step=131000, episode=22 reward=0.7475675 (520.92 it/sec) -training >> step=131100, episode=22 reward=0.7613508 (547.40 it/sec) -training >> step=131200, episode=22 reward=0.7597865 (560.22 it/sec) -training >> step=131300, episode=22 reward=0.6997526 (569.44 it/sec) -training >> step=131400, episode=22 reward=0.7343213 (513.62 it/sec) -training >> step=131500, episode=22 reward=0.7337767 (513.00 it/sec) -training >> step=131600, episode=23 reward=0.7580803 (130.34 it/sec) -training >> step=131700, episode=23 reward=0.7763962 (496.93 it/sec) -training >> step=131800, episode=23 reward=0.7567437 (502.64 it/sec) -training >> step=131900, episode=23 reward=0.764204 (568.42 it/sec) -training >> step=132000, episode=23 reward=0.7587447 (517.29 it/sec) -training >> step=132100, episode=23 reward=0.7576464 (557.22 it/sec) -training >> step=132200, episode=23 reward=0.7622598 (556.19 it/sec) -training >> step=132300, episode=23 reward=0.7386821 (525.48 it/sec) -training >> step=132400, episode=23 reward=0.7603709 (579.23 it/sec) -training >> step=132500, episode=23 reward=0.7520023 (556.00 it/sec) -training >> step=132600, episode=23 reward=0.7648475 (530.62 it/sec) -training >> step=132700, episode=23 reward=0.7738575 (568.28 it/sec) -training >> step=132800, episode=23 reward=0.7825238 (550.90 it/sec) -training >> step=132900, episode=23 reward=0.7484579 (536.86 it/sec) -training >> step=133000, episode=23 reward=0.7430246 (571.39 it/sec) -training >> step=133100, episode=23 reward=0.7637348 (536.88 it/sec) -training >> step=133200, episode=23 reward=0.7724497 (554.38 it/sec) -training >> step=133300, episode=23 reward=0.7676808 (566.20 it/sec) -training >> step=133400, episode=23 reward=0.7482952 (550.75 it/sec) -training >> step=133500, episode=23 reward=0.7607312 (545.60 it/sec) -training >> step=133600, episode=23 reward=0.7766385 (563.70 it/sec) -training >> step=133700, episode=23 reward=0.7463112 (533.59 it/sec) -training >> step=133800, episode=23 reward=0.7549958 (572.44 it/sec) -training >> step=133900, episode=23 reward=0.7509122 (543.76 it/sec) -training >> step=134000, episode=23 reward=0.785809 (521.59 it/sec) -training >> step=134100, episode=23 reward=0.7476696 (563.48 it/sec) -training >> step=134200, episode=23 reward=0.7608282 (518.92 it/sec) -training >> step=134300, episode=23 reward=0.7673324 (555.87 it/sec) -training >> step=134400, episode=23 reward=0.7642718 (542.29 it/sec) -training >> step=134500, episode=23 reward=0.7743627 (570.92 it/sec) -training >> step=134600, episode=23 reward=0.7560748 (521.14 it/sec) -training >> step=134700, episode=23 reward=0.7547654 (587.71 it/sec) -training >> step=134800, episode=23 reward=0.7496442 (530.01 it/sec) -training >> step=134900, episode=23 reward=0.751407 (552.48 it/sec) -training >> step=135000, episode=23 reward=0.775388 (535.64 it/sec) -training >> step=135100, episode=23 reward=0.7538561 (537.48 it/sec) -training >> step=135200, episode=23 reward=0.7415153 (545.33 it/sec) -training >> step=135300, episode=23 reward=0.7539112 (582.52 it/sec) -training >> step=135400, episode=23 reward=0.7814521 (537.44 it/sec) -training >> step=135500, episode=23 reward=0.7712352 (548.96 it/sec) -training >> step=135600, episode=23 reward=0.7422195 (545.89 it/sec) -training >> step=135700, episode=23 reward=0.7554742 (525.84 it/sec) -training >> step=135800, episode=23 reward=0.7582355 (577.13 it/sec) -training >> step=135900, episode=23 reward=0.7619811 (550.71 it/sec) -training >> step=136000, episode=23 reward=0.7542406 (561.86 it/sec) -training >> step=136100, episode=23 reward=0.7527604 (568.32 it/sec) -training >> step=136200, episode=23 reward=0.7592016 (548.62 it/sec) -training >> step=136300, episode=23 reward=0.7419037 (532.90 it/sec) -training >> step=136400, episode=23 reward=0.7351915 (596.62 it/sec) -training >> step=136500, episode=23 reward=0.7304695 (542.97 it/sec) -training >> step=136600, episode=23 reward=0.7466071 (487.43 it/sec) -training >> step=136700, episode=23 reward=0.7520569 (549.85 it/sec) -training >> step=136800, episode=23 reward=0.7429223 (514.53 it/sec) -training >> step=136900, episode=23 reward=0.7519898 (590.80 it/sec) -training >> step=137000, episode=23 reward=0.7489676 (544.82 it/sec) -training >> step=137100, episode=23 reward=0.7547723 (543.38 it/sec) -training >> step=137200, episode=23 reward=0.7646495 (511.78 it/sec) -training >> step=137300, episode=23 reward=0.733618 (561.44 it/sec) -training >> step=137400, episode=23 reward=0.7407763 (502.23 it/sec) -training >> step=137500, episode=23 reward=0.7481779 (587.59 it/sec) -training >> step=137600, episode=24 reward=0.777356 (115.87 it/sec) -training >> step=137700, episode=24 reward=0.7400615 (482.71 it/sec) -training >> step=137800, episode=24 reward=0.7526454 (517.41 it/sec) -training >> step=137900, episode=24 reward=0.7576185 (530.63 it/sec) -training >> step=138000, episode=24 reward=0.766942 (542.40 it/sec) -training >> step=138100, episode=24 reward=0.7447061 (508.12 it/sec) -training >> step=138200, episode=24 reward=0.7751473 (534.30 it/sec) -training >> step=138300, episode=24 reward=0.7411636 (559.66 it/sec) -training >> step=138400, episode=24 reward=0.7544908 (552.89 it/sec) -training >> step=138500, episode=24 reward=0.7614774 (555.57 it/sec) -training >> step=138600, episode=24 reward=0.7578595 (566.56 it/sec) -training >> step=138700, episode=24 reward=0.7793738 (506.06 it/sec) -training >> step=138800, episode=24 reward=0.7793446 (566.12 it/sec) -training >> step=138900, episode=24 reward=0.7517881 (534.75 it/sec) -training >> step=139000, episode=24 reward=0.7788969 (539.25 it/sec) -training >> step=139100, episode=24 reward=0.7835885 (569.62 it/sec) -training >> step=139200, episode=24 reward=0.7712235 (514.70 it/sec) -training >> step=139300, episode=24 reward=0.7693764 (560.30 it/sec) -training >> step=139400, episode=24 reward=0.7761583 (523.68 it/sec) -training >> step=139500, episode=24 reward=0.7507313 (537.96 it/sec) -training >> step=139600, episode=24 reward=0.7324125 (579.84 it/sec) -training >> step=139700, episode=24 reward=0.7740141 (561.70 it/sec) -training >> step=139800, episode=24 reward=0.7517496 (510.14 it/sec) -training >> step=139900, episode=24 reward=0.7521632 (570.37 it/sec) -training >> step=140000, episode=24 reward=0.7762399 (557.98 it/sec) -training >> step=140100, episode=24 reward=0.7529927 (526.19 it/sec) -training >> step=140200, episode=24 reward=0.7689587 (583.49 it/sec) -training >> step=140300, episode=24 reward=0.7563638 (544.83 it/sec) -training >> step=140400, episode=24 reward=0.7411202 (527.17 it/sec) -training >> step=140500, episode=24 reward=0.7683867 (564.47 it/sec) -training >> step=140600, episode=24 reward=0.7602278 (527.61 it/sec) -training >> step=140700, episode=24 reward=0.7716443 (561.21 it/sec) -training >> step=140800, episode=24 reward=0.7520292 (551.81 it/sec) -training >> step=140900, episode=24 reward=0.7668827 (528.84 it/sec) -training >> step=141000, episode=24 reward=0.7840601 (570.10 it/sec) -training >> step=141100, episode=24 reward=0.7705691 (528.52 it/sec) -training >> step=141200, episode=24 reward=0.7473139 (550.66 it/sec) -training >> step=141300, episode=24 reward=0.7639007 (562.55 it/sec) -training >> step=141400, episode=24 reward=0.7560881 (571.51 it/sec) -training >> step=141500, episode=24 reward=0.760602 (527.66 it/sec) -training >> step=141600, episode=24 reward=0.778755 (568.69 it/sec) -training >> step=141700, episode=24 reward=0.7810744 (478.06 it/sec) -training >> step=141800, episode=24 reward=0.749139 (526.24 it/sec) -training >> step=141900, episode=24 reward=0.7398393 (565.66 it/sec) -training >> step=142000, episode=24 reward=0.7457209 (509.27 it/sec) -training >> step=142100, episode=24 reward=0.7519841 (550.67 it/sec) -training >> step=142200, episode=24 reward=0.7586669 (572.13 it/sec) -training >> step=142300, episode=24 reward=0.7553872 (533.25 it/sec) -training >> step=142400, episode=24 reward=0.7627567 (487.59 it/sec) -training >> step=142500, episode=24 reward=0.7421079 (577.36 it/sec) -training >> step=142600, episode=24 reward=0.7403739 (511.18 it/sec) -training >> step=142700, episode=24 reward=0.755147 (555.93 it/sec) -training >> step=142800, episode=24 reward=0.7507995 (528.09 it/sec) -training >> step=142900, episode=24 reward=0.7550403 (549.82 it/sec) -training >> step=143000, episode=24 reward=0.7534795 (574.08 it/sec) -training >> step=143100, episode=24 reward=0.7648979 (544.32 it/sec) -training >> step=143200, episode=24 reward=0.7511891 (490.15 it/sec) -training >> step=143300, episode=24 reward=0.7574574 (557.60 it/sec) -training >> step=143400, episode=24 reward=0.7525421 (544.38 it/sec) -training >> step=143500, episode=24 reward=0.7316847 (552.50 it/sec) -training >> step=143600, episode=25 reward=0.7561672 (122.35 it/sec) -training >> step=143700, episode=25 reward=0.7305709 (584.11 it/sec) -training >> step=143800, episode=25 reward=0.7531707 (552.72 it/sec) -training >> step=143900, episode=25 reward=0.7480273 (511.35 it/sec) -training >> step=144000, episode=25 reward=0.7439154 (546.38 it/sec) -training >> step=144100, episode=25 reward=0.7596092 (520.39 it/sec) -training >> step=144200, episode=25 reward=0.7657538 (544.66 it/sec) -training >> step=144300, episode=25 reward=0.7491233 (570.64 it/sec) -training >> step=144400, episode=25 reward=0.7702674 (516.22 it/sec) -training >> step=144500, episode=25 reward=0.7851141 (532.83 it/sec) -training >> step=144600, episode=25 reward=0.7596062 (553.49 it/sec) -training >> step=144700, episode=25 reward=0.7419056 (556.15 it/sec) -training >> step=144800, episode=25 reward=0.7567874 (549.84 it/sec) -training >> step=144900, episode=25 reward=0.765161 (557.60 it/sec) -training >> step=145000, episode=25 reward=0.7672667 (516.57 it/sec) -training >> step=145100, episode=25 reward=0.7543572 (554.81 it/sec) -training >> step=145200, episode=25 reward=0.7788113 (533.66 it/sec) -training >> step=145300, episode=25 reward=0.7614616 (553.73 it/sec) -training >> step=145400, episode=25 reward=0.7748588 (522.31 it/sec) -training >> step=145500, episode=25 reward=0.7539873 (553.95 it/sec) -training >> step=145600, episode=25 reward=0.7550613 (529.48 it/sec) -training >> step=145700, episode=25 reward=0.756994 (555.37 it/sec) -training >> step=145800, episode=25 reward=0.7234131 (546.15 it/sec) -training >> step=145900, episode=25 reward=0.7649741 (558.48 it/sec) -training >> step=146000, episode=25 reward=0.7653906 (540.86 it/sec) -training >> step=146100, episode=25 reward=0.7616103 (557.55 it/sec) -training >> step=146200, episode=25 reward=0.7566836 (523.81 it/sec) -training >> step=146300, episode=25 reward=0.7738155 (563.54 it/sec) -training >> step=146400, episode=25 reward=0.7671672 (544.97 it/sec) -training >> step=146500, episode=25 reward=0.7702974 (540.90 it/sec) -training >> step=146600, episode=25 reward=0.7459583 (537.40 it/sec) -training >> step=146700, episode=25 reward=0.7838594 (529.11 it/sec) -training >> step=146800, episode=25 reward=0.7545528 (469.19 it/sec) -training >> step=146900, episode=25 reward=0.7438222 (546.54 it/sec) -training >> step=147000, episode=25 reward=0.751965 (555.28 it/sec) -training >> step=147100, episode=25 reward=0.771953 (554.33 it/sec) -training >> step=147200, episode=25 reward=0.742104 (567.97 it/sec) -training >> step=147300, episode=25 reward=0.7402524 (539.32 it/sec) -training >> step=147400, episode=25 reward=0.753007 (538.77 it/sec) -training >> step=147500, episode=25 reward=0.7412323 (560.98 it/sec) -training >> step=147600, episode=25 reward=0.758082 (555.89 it/sec) -training >> step=147700, episode=25 reward=0.7430902 (569.64 it/sec) -training >> step=147800, episode=25 reward=0.7755878 (556.97 it/sec) -training >> step=147900, episode=25 reward=0.7763386 (522.47 it/sec) -training >> step=148000, episode=25 reward=0.7497213 (564.30 it/sec) -training >> step=148100, episode=25 reward=0.7615052 (542.36 it/sec) -training >> step=148200, episode=25 reward=0.7494596 (535.84 it/sec) -training >> step=148300, episode=25 reward=0.7714653 (577.51 it/sec) -training >> step=148400, episode=25 reward=0.7475156 (532.12 it/sec) -training >> step=148500, episode=25 reward=0.7529343 (513.01 it/sec) -training >> step=148600, episode=25 reward=0.7610894 (554.80 it/sec) -training >> step=148700, episode=25 reward=0.7153481 (571.62 it/sec) -training >> step=148800, episode=25 reward=0.7491555 (580.35 it/sec) -training >> step=148900, episode=25 reward=0.7357791 (557.44 it/sec) -training >> step=149000, episode=25 reward=0.7398705 (531.54 it/sec) -training >> step=149100, episode=25 reward=0.734449 (568.58 it/sec) -training >> step=149200, episode=25 reward=0.7059661 (520.62 it/sec) -training >> step=149300, episode=25 reward=0.7381083 (541.73 it/sec) -training >> step=149400, episode=25 reward=0.7371358 (581.51 it/sec) -training >> step=149500, episode=25 reward=0.7369151 (546.76 it/sec) -training >> step=149600, episode=26 reward=0.7541626 (123.25 it/sec) -training >> step=149700, episode=26 reward=0.7581779 (527.00 it/sec) -training >> step=149800, episode=26 reward=0.7613072 (539.70 it/sec) -training >> step=149900, episode=26 reward=0.7509681 (493.53 it/sec) -training >> step=150000, episode=26 reward=0.7679608 (563.36 it/sec) -training >> step=150100, episode=26 reward=0.7645147 (568.63 it/sec) -training >> step=150200, episode=26 reward=0.7483352 (537.06 it/sec) -training >> step=150300, episode=26 reward=0.7858632 (532.85 it/sec) -training >> step=150400, episode=26 reward=0.7613106 (554.42 it/sec) -training >> step=150500, episode=26 reward=0.75003 (537.51 it/sec) -training >> step=150600, episode=26 reward=0.7670346 (587.92 it/sec) -training >> step=150700, episode=26 reward=0.7745872 (539.86 it/sec) -training >> step=150800, episode=26 reward=0.7463794 (546.80 it/sec) -training >> step=150900, episode=26 reward=0.7631603 (536.02 it/sec) -training >> step=151000, episode=26 reward=0.7524191 (580.29 it/sec) -training >> step=151100, episode=26 reward=0.7726696 (549.25 it/sec) -training >> step=151200, episode=26 reward=0.7829422 (562.53 it/sec) -training >> step=151300, episode=26 reward=0.7627544 (538.33 it/sec) -training >> step=151400, episode=26 reward=0.7822265 (519.59 it/sec) -training >> step=151500, episode=26 reward=0.7568073 (563.65 it/sec) -training >> step=151600, episode=26 reward=0.7591047 (542.11 it/sec) -training >> step=151700, episode=26 reward=0.7304555 (558.88 it/sec) -training >> step=151800, episode=26 reward=0.7633094 (578.96 it/sec) -training >> step=151900, episode=26 reward=0.745576 (489.65 it/sec) -training >> step=152000, episode=26 reward=0.763091 (492.42 it/sec) -training >> step=152100, episode=26 reward=0.7615027 (540.79 it/sec) -training >> step=152200, episode=26 reward=0.7627566 (543.53 it/sec) -training >> step=152300, episode=26 reward=0.7851401 (560.76 it/sec) -training >> step=152400, episode=26 reward=0.7629903 (528.99 it/sec) -training >> step=152500, episode=26 reward=0.7796848 (522.54 it/sec) -training >> step=152600, episode=26 reward=0.7572879 (482.68 it/sec) -training >> step=152700, episode=26 reward=0.7635327 (526.54 it/sec) -training >> step=152800, episode=26 reward=0.7617888 (546.67 it/sec) -training >> step=152900, episode=26 reward=0.767204 (546.27 it/sec) -training >> step=153000, episode=26 reward=0.7679698 (539.59 it/sec) -training >> step=153100, episode=26 reward=0.7823564 (528.81 it/sec) -training >> step=153200, episode=26 reward=0.7691875 (538.79 it/sec) -training >> step=153300, episode=26 reward=0.7624151 (557.88 it/sec) -training >> step=153400, episode=26 reward=0.7679409 (580.48 it/sec) -training >> step=153500, episode=26 reward=0.7559444 (556.59 it/sec) -training >> step=153600, episode=26 reward=0.7351041 (535.52 it/sec) -training >> step=153700, episode=26 reward=0.7720537 (551.32 it/sec) -training >> step=153800, episode=26 reward=0.7601205 (516.68 it/sec) -training >> step=153900, episode=26 reward=0.7594526 (584.27 it/sec) -training >> step=154000, episode=26 reward=0.7570429 (531.41 it/sec) -training >> step=154100, episode=26 reward=0.7399771 (556.62 it/sec) -training >> step=154200, episode=26 reward=0.7329531 (537.47 it/sec) -training >> step=154300, episode=26 reward=0.7557467 (558.79 it/sec) -training >> step=154400, episode=26 reward=0.7344252 (552.79 it/sec) -training >> step=154500, episode=26 reward=0.722195 (551.40 it/sec) -training >> step=154600, episode=26 reward=0.7708074 (556.51 it/sec) -training >> step=154700, episode=26 reward=0.758526 (535.97 it/sec) -training >> step=154800, episode=26 reward=0.7463343 (565.25 it/sec) -training >> step=154900, episode=26 reward=0.7775444 (522.57 it/sec) -training >> step=155000, episode=26 reward=0.7697805 (575.55 it/sec) -training >> step=155100, episode=26 reward=0.7497389 (516.08 it/sec) -training >> step=155200, episode=26 reward=0.7531201 (538.58 it/sec) -training >> step=155300, episode=26 reward=0.7555212 (554.78 it/sec) -training >> step=155400, episode=26 reward=0.760197 (532.56 it/sec) -training >> step=155500, episode=27 reward=0.7380148 (133.92 it/sec) -training >> step=155600, episode=27 reward=0.7482885 (415.53 it/sec) -training >> step=155700, episode=27 reward=0.7534282 (545.94 it/sec) -training >> step=155800, episode=27 reward=0.7655183 (557.57 it/sec) -training >> step=155900, episode=27 reward=0.7663446 (543.74 it/sec) -training >> step=156000, episode=27 reward=0.7587689 (548.28 it/sec) -training >> step=156100, episode=27 reward=0.7901899 (533.32 it/sec) -training >> step=156200, episode=27 reward=0.7392274 (555.48 it/sec) -training >> step=156300, episode=27 reward=0.7555875 (552.91 it/sec) -training >> step=156400, episode=27 reward=0.7418216 (538.87 it/sec) -training >> step=156500, episode=27 reward=0.7318417 (556.84 it/sec) -training >> step=156600, episode=27 reward=0.7734413 (578.76 it/sec) -training >> step=156700, episode=27 reward=0.7733518 (535.90 it/sec) -training >> step=156800, episode=27 reward=0.7513837 (533.72 it/sec) -training >> step=156900, episode=27 reward=0.7806699 (570.86 it/sec) -training >> step=157000, episode=27 reward=0.7739292 (507.47 it/sec) -training >> step=157100, episode=27 reward=0.7404963 (496.09 it/sec) -training >> step=157200, episode=27 reward=0.7695557 (523.54 it/sec) -training >> step=157300, episode=27 reward=0.7745696 (529.79 it/sec) -training >> step=157400, episode=27 reward=0.7825384 (544.17 it/sec) -training >> step=157500, episode=27 reward=0.7358149 (571.47 it/sec) -training >> step=157600, episode=27 reward=0.7614586 (534.52 it/sec) -training >> step=157700, episode=27 reward=0.7681985 (547.15 it/sec) -training >> step=157800, episode=27 reward=0.7684419 (537.84 it/sec) -training >> step=157900, episode=27 reward=0.7521968 (528.72 it/sec) -training >> step=158000, episode=27 reward=0.7553076 (572.32 it/sec) -training >> step=158100, episode=27 reward=0.7583007 (549.29 it/sec) -training >> step=158200, episode=27 reward=0.7469571 (530.66 it/sec) -training >> step=158300, episode=27 reward=0.771838 (539.20 it/sec) -training >> step=158400, episode=27 reward=0.7783819 (509.89 it/sec) -training >> step=158500, episode=27 reward=0.7773767 (535.32 it/sec) -training >> step=158600, episode=27 reward=0.7696481 (527.93 it/sec) -training >> step=158700, episode=27 reward=0.7738631 (543.87 it/sec) -training >> step=158800, episode=27 reward=0.7721581 (512.65 it/sec) -training >> step=158900, episode=27 reward=0.7694311 (498.44 it/sec) -training >> step=159000, episode=27 reward=0.7470442 (422.92 it/sec) -training >> step=159100, episode=27 reward=0.7669529 (524.83 it/sec) -training >> step=159200, episode=27 reward=0.7586108 (404.76 it/sec) -training >> step=159300, episode=27 reward=0.7479367 (480.06 it/sec) -training >> step=159400, episode=27 reward=0.7430059 (493.04 it/sec) -training >> step=159500, episode=27 reward=0.7589929 (539.47 it/sec) -training >> step=159600, episode=27 reward=0.7616067 (544.76 it/sec) -training >> step=159700, episode=27 reward=0.7592661 (553.58 it/sec) -training >> step=159800, episode=27 reward=0.7778613 (523.89 it/sec) -training >> step=159900, episode=27 reward=0.7578911 (539.59 it/sec) -training >> step=160000, episode=27 reward=0.7673001 (565.33 it/sec) -training >> step=160100, episode=27 reward=0.7573716 (562.33 it/sec) -training >> step=160200, episode=27 reward=0.7383138 (559.42 it/sec) -training >> step=160300, episode=27 reward=0.7375168 (554.38 it/sec) -training >> step=160400, episode=27 reward=0.7624876 (535.90 it/sec) -training >> step=160500, episode=27 reward=0.7627265 (558.65 it/sec) -training >> step=160600, episode=27 reward=0.7481392 (560.29 it/sec) -training >> step=160700, episode=27 reward=0.7308511 (562.83 it/sec) -training >> step=160800, episode=27 reward=0.7325432 (585.89 it/sec) -training >> step=160900, episode=27 reward=0.7708744 (483.48 it/sec) -training >> step=161000, episode=27 reward=0.7490357 (537.05 it/sec) -training >> step=161100, episode=27 reward=0.7645931 (549.97 it/sec) -training >> step=161200, episode=27 reward=0.7635953 (543.36 it/sec) -training >> step=161300, episode=27 reward=0.7294394 (556.08 it/sec) -training >> step=161400, episode=27 reward=0.73471 (574.47 it/sec) -training >> step=161500, episode=28 reward=0.7454026 (124.79 it/sec) -training >> step=161600, episode=28 reward=0.7534206 (536.57 it/sec) -training >> step=161700, episode=28 reward=0.7725303 (490.48 it/sec) -training >> step=161800, episode=28 reward=0.757019 (560.96 it/sec) -training >> step=161900, episode=28 reward=0.7551311 (379.61 it/sec) -training >> step=162000, episode=28 reward=0.7495613 (508.12 it/sec) -training >> step=162100, episode=28 reward=0.7590159 (514.20 it/sec) -training >> step=162200, episode=28 reward=0.7533989 (507.14 it/sec) -training >> step=162300, episode=28 reward=0.7346347 (544.60 it/sec) -training >> step=162400, episode=28 reward=0.7620481 (566.70 it/sec) -training >> step=162500, episode=28 reward=0.7642123 (571.98 it/sec) -training >> step=162600, episode=28 reward=0.7447572 (556.65 it/sec) -training >> step=162700, episode=28 reward=0.7513332 (524.90 it/sec) -training >> step=162800, episode=28 reward=0.779286 (514.98 it/sec) -training >> step=162900, episode=28 reward=0.7498881 (556.61 it/sec) -training >> step=163000, episode=28 reward=0.7608821 (535.06 it/sec) -training >> step=163100, episode=28 reward=0.7700084 (561.25 it/sec) -training >> step=163200, episode=28 reward=0.7645586 (588.96 it/sec) -training >> step=163300, episode=28 reward=0.763069 (530.95 it/sec) -training >> step=163400, episode=28 reward=0.7631171 (557.30 it/sec) -training >> step=163500, episode=28 reward=0.7759876 (536.54 it/sec) -training >> step=163600, episode=28 reward=0.7376156 (565.84 it/sec) -training >> step=163700, episode=28 reward=0.7485021 (565.38 it/sec) -training >> step=163800, episode=28 reward=0.7641508 (525.58 it/sec) -training >> step=163900, episode=28 reward=0.7658501 (521.78 it/sec) -training >> step=164000, episode=28 reward=0.7698428 (541.47 it/sec) -training >> step=164100, episode=28 reward=0.7720872 (550.54 it/sec) -training >> step=164200, episode=28 reward=0.763271 (535.26 it/sec) -training >> step=164300, episode=28 reward=0.7690663 (564.97 it/sec) -training >> step=164400, episode=28 reward=0.7495118 (526.22 it/sec) -training >> step=164500, episode=28 reward=0.7550501 (548.52 it/sec) -training >> step=164600, episode=28 reward=0.7526401 (535.46 it/sec) -training >> step=164700, episode=28 reward=0.7724815 (546.49 it/sec) -training >> step=164800, episode=28 reward=0.7435137 (546.93 it/sec) -training >> step=164900, episode=28 reward=0.7668485 (537.77 it/sec) -training >> step=165000, episode=28 reward=0.7586117 (542.41 it/sec) -training >> step=165100, episode=28 reward=0.7644288 (540.29 it/sec) -training >> step=165200, episode=28 reward=0.7691346 (569.43 it/sec) -training >> step=165300, episode=28 reward=0.7584832 (555.69 it/sec) -training >> step=165400, episode=28 reward=0.7522674 (552.25 it/sec) -training >> step=165500, episode=28 reward=0.761991 (530.68 it/sec) -training >> step=165600, episode=28 reward=0.7745468 (558.34 it/sec) -training >> step=165700, episode=28 reward=0.7557067 (558.64 it/sec) -training >> step=165800, episode=28 reward=0.7602382 (573.28 it/sec) -training >> step=165900, episode=28 reward=0.7518284 (558.17 it/sec) -training >> step=166000, episode=28 reward=0.7585224 (549.57 it/sec) -training >> step=166100, episode=28 reward=0.7565541 (542.06 it/sec) -training >> step=166200, episode=28 reward=0.7629278 (553.44 it/sec) -training >> step=166300, episode=28 reward=0.7781312 (582.58 it/sec) -training >> step=166400, episode=28 reward=0.7745059 (555.57 it/sec) -training >> step=166500, episode=28 reward=0.7478496 (547.83 it/sec) -training >> step=166600, episode=28 reward=0.7412603 (546.66 it/sec) -training >> step=166700, episode=28 reward=0.7529171 (513.18 it/sec) -training >> step=166800, episode=28 reward=0.7395872 (557.49 it/sec) -training >> step=166900, episode=28 reward=0.7481008 (559.96 it/sec) -training >> step=167000, episode=28 reward=0.7277608 (557.55 it/sec) -training >> step=167100, episode=28 reward=0.7373379 (529.31 it/sec) -training >> step=167200, episode=28 reward=0.7755454 (542.62 it/sec) -training >> step=167300, episode=28 reward=0.7585676 (516.02 it/sec) -training >> step=167400, episode=28 reward=0.7663705 (560.98 it/sec) -training >> step=167500, episode=29 reward=0.7614614 (116.68 it/sec) -training >> step=167600, episode=29 reward=0.7599653 (527.85 it/sec) -training >> step=167700, episode=29 reward=0.7759748 (536.77 it/sec) -training >> step=167800, episode=29 reward=0.7556901 (549.72 it/sec) -training >> step=167900, episode=29 reward=0.7625722 (507.37 it/sec) -training >> step=168000, episode=29 reward=0.7417784 (552.73 it/sec) -training >> step=168100, episode=29 reward=0.7700415 (511.51 it/sec) -training >> step=168200, episode=29 reward=0.762702 (427.24 it/sec) -training >> step=168300, episode=29 reward=0.7495289 (558.57 it/sec) -training >> step=168400, episode=29 reward=0.7664885 (531.80 it/sec) -training >> step=168500, episode=29 reward=0.7632667 (557.06 it/sec) -training >> step=168600, episode=29 reward=0.7692594 (500.60 it/sec) -training >> step=168700, episode=29 reward=0.7246096 (536.05 it/sec) -training >> step=168800, episode=29 reward=0.7506362 (547.46 it/sec) -training >> step=168900, episode=29 reward=0.748446 (538.92 it/sec) -training >> step=169000, episode=29 reward=0.767981 (538.02 it/sec) -training >> step=169100, episode=29 reward=0.7663518 (549.63 it/sec) -training >> step=169200, episode=29 reward=0.7667621 (541.69 it/sec) -training >> step=169300, episode=29 reward=0.7519081 (540.48 it/sec) -training >> step=169400, episode=29 reward=0.7515737 (568.79 it/sec) -training >> step=169500, episode=29 reward=0.7412795 (566.64 it/sec) -training >> step=169600, episode=29 reward=0.7516745 (513.28 it/sec) -training >> step=169700, episode=29 reward=0.7696467 (512.61 it/sec) -training >> step=169800, episode=29 reward=0.747983 (580.23 it/sec) -training >> step=169900, episode=29 reward=0.756223 (533.95 it/sec) -training >> step=170000, episode=29 reward=0.731779 (530.28 it/sec) -training >> step=170100, episode=29 reward=0.7542559 (529.32 it/sec) -training >> step=170200, episode=29 reward=0.7564934 (550.66 it/sec) -training >> step=170300, episode=29 reward=0.7784992 (541.74 it/sec) -training >> step=170400, episode=29 reward=0.7570712 (558.03 it/sec) -training >> step=170500, episode=29 reward=0.7635537 (540.72 it/sec) -training >> step=170600, episode=29 reward=0.752542 (536.55 it/sec) -training >> step=170700, episode=29 reward=0.7348134 (540.59 it/sec) -training >> step=170800, episode=29 reward=0.7682629 (512.95 it/sec) -training >> step=170900, episode=29 reward=0.770748 (574.72 it/sec) -training >> step=171000, episode=29 reward=0.7717866 (563.39 it/sec) -training >> step=171100, episode=29 reward=0.7614785 (550.56 it/sec) -training >> step=171200, episode=29 reward=0.7684876 (568.28 it/sec) -training >> step=171300, episode=29 reward=0.7515043 (491.14 it/sec) -training >> step=171400, episode=29 reward=0.745224 (506.94 it/sec) -training >> step=171500, episode=29 reward=0.7551219 (566.43 it/sec) -training >> step=171600, episode=29 reward=0.7468827 (565.67 it/sec) -training >> step=171700, episode=29 reward=0.7651506 (579.09 it/sec) -training >> step=171800, episode=29 reward=0.7754168 (538.61 it/sec) -training >> step=171900, episode=29 reward=0.7589369 (522.19 it/sec) -training >> step=172000, episode=29 reward=0.7638628 (556.67 it/sec) -training >> step=172100, episode=29 reward=0.7352825 (543.79 it/sec) -training >> step=172200, episode=29 reward=0.7575043 (529.65 it/sec) -training >> step=172300, episode=29 reward=0.7767933 (561.24 it/sec) -training >> step=172400, episode=29 reward=0.7399256 (548.91 it/sec) -training >> step=172500, episode=29 reward=0.7559332 (518.67 it/sec) -training >> step=172600, episode=29 reward=0.740977 (498.17 it/sec) -training >> step=172700, episode=29 reward=0.7600465 (532.73 it/sec) -training >> step=172800, episode=29 reward=0.7423123 (491.23 it/sec) -training >> step=172900, episode=29 reward=0.7734124 (516.11 it/sec) -training >> step=173000, episode=29 reward=0.743233 (518.54 it/sec) -training >> step=173100, episode=29 reward=0.7603109 (558.73 it/sec) -training >> step=173200, episode=29 reward=0.7506713 (577.81 it/sec) -training >> step=173300, episode=29 reward=0.7434589 (546.45 it/sec) -training >> step=173400, episode=29 reward=0.7656898 (519.28 it/sec) -training >> step=173500, episode=30 reward=0.7509648 (124.26 it/sec) -training >> step=173600, episode=30 reward=0.7670595 (525.48 it/sec) -training >> step=173700, episode=30 reward=0.7417337 (530.27 it/sec) -training >> step=173800, episode=30 reward=0.7634029 (533.78 it/sec) -training >> step=173900, episode=30 reward=0.7558605 (559.28 it/sec) -training >> step=174000, episode=30 reward=0.7528057 (517.37 it/sec) -training >> step=174100, episode=30 reward=0.7424693 (511.89 it/sec) -training >> step=174200, episode=30 reward=0.761843 (540.61 it/sec) -training >> step=174300, episode=30 reward=0.7563226 (549.29 it/sec) -training >> step=174400, episode=30 reward=0.7733526 (524.35 it/sec) -training >> step=174500, episode=30 reward=0.7646734 (437.99 it/sec) -training >> step=174600, episode=30 reward=0.7734023 (521.82 it/sec) -training >> step=174700, episode=30 reward=0.7505772 (553.50 it/sec) -training >> step=174800, episode=30 reward=0.7783381 (540.38 it/sec) -training >> step=174900, episode=30 reward=0.7687857 (556.66 it/sec) -training >> step=175000, episode=30 reward=0.7495767 (538.08 it/sec) -training >> step=175100, episode=30 reward=0.7694682 (576.65 it/sec) -training >> step=175200, episode=30 reward=0.7758567 (527.73 it/sec) -training >> step=175300, episode=30 reward=0.7728401 (538.31 it/sec) -training >> step=175400, episode=30 reward=0.7523895 (552.89 it/sec) -training >> step=175500, episode=30 reward=0.7342836 (532.97 it/sec) -training >> step=175600, episode=30 reward=0.7576764 (577.75 it/sec) -training >> step=175700, episode=30 reward=0.753895 (543.74 it/sec) -training >> step=175800, episode=30 reward=0.7433197 (522.74 it/sec) -training >> step=175900, episode=30 reward=0.7682716 (563.23 it/sec) -training >> step=176000, episode=30 reward=0.7620535 (513.57 it/sec) -training >> step=176100, episode=30 reward=0.7534377 (581.34 it/sec) -training >> step=176200, episode=30 reward=0.7660496 (541.37 it/sec) -training >> step=176300, episode=30 reward=0.7311749 (512.04 it/sec) -training >> step=176400, episode=30 reward=0.765419 (549.54 it/sec) -training >> step=176500, episode=30 reward=0.775043 (522.45 it/sec) -training >> step=176600, episode=30 reward=0.7374039 (523.55 it/sec) -training >> step=176700, episode=30 reward=0.758849 (547.92 it/sec) -training >> step=176800, episode=30 reward=0.7875634 (548.79 it/sec) -training >> step=176900, episode=30 reward=0.7835402 (504.14 it/sec) -training >> step=177000, episode=30 reward=0.7586607 (576.35 it/sec) -training >> step=177100, episode=30 reward=0.7728248 (539.15 it/sec) -training >> step=177200, episode=30 reward=0.7515144 (563.87 it/sec) -training >> step=177300, episode=30 reward=0.7488713 (540.00 it/sec) -training >> step=177400, episode=30 reward=0.7666297 (521.46 it/sec) -training >> step=177500, episode=30 reward=0.7575476 (567.98 it/sec) -training >> step=177600, episode=30 reward=0.7597979 (511.15 it/sec) -training >> step=177700, episode=30 reward=0.7684779 (507.17 it/sec) -training >> step=177800, episode=30 reward=0.7591729 (595.98 it/sec) -training >> step=177900, episode=30 reward=0.7514564 (504.15 it/sec) -training >> step=178000, episode=30 reward=0.7606373 (492.00 it/sec) -training >> step=178100, episode=30 reward=0.7423041 (524.35 it/sec) -training >> step=178200, episode=30 reward=0.7532173 (549.72 it/sec) -training >> step=178300, episode=30 reward=0.7457733 (560.01 it/sec) -training >> step=178400, episode=30 reward=0.7187678 (569.53 it/sec) -training >> step=178500, episode=30 reward=0.7583783 (512.19 it/sec) -training >> step=178600, episode=30 reward=0.7412251 (563.87 it/sec) -training >> step=178700, episode=30 reward=0.7400832 (585.25 it/sec) -training >> step=178800, episode=30 reward=0.7682391 (534.37 it/sec) -training >> step=178900, episode=30 reward=0.7355783 (550.13 it/sec) -training >> step=179000, episode=30 reward=0.7575245 (567.12 it/sec) -training >> step=179100, episode=30 reward=0.7283044 (495.38 it/sec) -training >> step=179200, episode=30 reward=0.7495699 (584.23 it/sec) -training >> step=179300, episode=30 reward=0.7550325 (560.42 it/sec) -training >> step=179400, episode=30 reward=0.7294823 (523.98 it/sec) -training >> step=179500, episode=31 reward=0.7593948 (137.85 it/sec) -training >> step=179600, episode=31 reward=0.7630613 (546.31 it/sec) -training >> step=179700, episode=31 reward=0.7521 (528.42 it/sec) -training >> step=179800, episode=31 reward=0.7573979 (501.59 it/sec) -training >> step=179900, episode=31 reward=0.7749627 (557.37 it/sec) -training >> step=180000, episode=31 reward=0.7665692 (532.50 it/sec) -training >> step=180100, episode=31 reward=0.7674778 (523.49 it/sec) -training >> step=180200, episode=31 reward=0.765318 (535.87 it/sec) -training >> step=180300, episode=31 reward=0.7486525 (544.81 it/sec) -training >> step=180400, episode=31 reward=0.7534192 (543.76 it/sec) -training >> step=180500, episode=31 reward=0.7505103 (534.29 it/sec) -training >> step=180600, episode=31 reward=0.7656819 (559.53 it/sec) -training >> step=180700, episode=31 reward=0.7701954 (380.31 it/sec) -training >> step=180800, episode=31 reward=0.77265 (565.97 it/sec) -training >> step=180900, episode=31 reward=0.7728653 (533.30 it/sec) -training >> step=181000, episode=31 reward=0.7554456 (562.91 it/sec) -training >> step=181100, episode=31 reward=0.7621807 (528.69 it/sec) -training >> step=181200, episode=31 reward=0.765655 (553.02 it/sec) -training >> step=181300, episode=31 reward=0.7693603 (547.69 it/sec) -training >> step=181400, episode=31 reward=0.7685055 (536.78 it/sec) -training >> step=181500, episode=31 reward=0.7518034 (549.98 it/sec) -training >> step=181600, episode=31 reward=0.7605411 (587.84 it/sec) -training >> step=181700, episode=31 reward=0.7668775 (530.48 it/sec) -training >> step=181800, episode=31 reward=0.7497522 (560.70 it/sec) -training >> step=181900, episode=31 reward=0.7722237 (549.07 it/sec) -training >> step=182000, episode=31 reward=0.727484 (536.96 it/sec) -training >> step=182100, episode=31 reward=0.7881442 (552.76 it/sec) -training >> step=182200, episode=31 reward=0.7557364 (558.33 it/sec) -training >> step=182300, episode=31 reward=0.7667062 (540.51 it/sec) -training >> step=182400, episode=31 reward=0.7505113 (548.45 it/sec) -training >> step=182500, episode=31 reward=0.754448 (549.57 it/sec) -training >> step=182600, episode=31 reward=0.7671444 (530.61 it/sec) -training >> step=182700, episode=31 reward=0.762473 (525.37 it/sec) -training >> step=182800, episode=31 reward=0.7624717 (461.87 it/sec) -training >> step=182900, episode=31 reward=0.768158 (555.63 it/sec) -training >> step=183000, episode=31 reward=0.7309809 (505.58 it/sec) -training >> step=183100, episode=31 reward=0.7833815 (548.48 it/sec) -training >> step=183200, episode=31 reward=0.7577798 (566.91 it/sec) -training >> step=183300, episode=31 reward=0.7506999 (537.93 it/sec) -training >> step=183400, episode=31 reward=0.7608238 (561.14 it/sec) -training >> step=183500, episode=31 reward=0.7546338 (561.71 it/sec) -training >> step=183600, episode=31 reward=0.7626927 (518.17 it/sec) -training >> step=183700, episode=31 reward=0.7545195 (541.45 it/sec) -training >> step=183800, episode=31 reward=0.7660352 (559.64 it/sec) -training >> step=183900, episode=31 reward=0.7646039 (554.69 it/sec) -training >> step=184000, episode=31 reward=0.790621 (550.93 it/sec) -training >> step=184100, episode=31 reward=0.74371 (557.09 it/sec) -training >> step=184200, episode=31 reward=0.7642398 (539.40 it/sec) -training >> step=184300, episode=31 reward=0.7635396 (557.91 it/sec) -training >> step=184400, episode=31 reward=0.7514147 (545.66 it/sec) -training >> step=184500, episode=31 reward=0.7766986 (563.72 it/sec) -training >> step=184600, episode=31 reward=0.7543218 (509.86 it/sec) -training >> step=184700, episode=31 reward=0.7287765 (578.63 it/sec) -training >> step=184800, episode=31 reward=0.7693259 (538.80 it/sec) -training >> step=184900, episode=31 reward=0.7406954 (553.15 it/sec) -training >> step=185000, episode=31 reward=0.754257 (551.04 it/sec) -training >> step=185100, episode=31 reward=0.7015053 (566.96 it/sec) -training >> step=185200, episode=31 reward=0.7179654 (558.99 it/sec) -training >> step=185300, episode=31 reward=0.7406347 (559.60 it/sec) -training >> step=185400, episode=32 reward=0.7242263 (137.52 it/sec) -training >> step=185500, episode=32 reward=0.7656379 (501.24 it/sec) -training >> step=185600, episode=32 reward=0.7606714 (581.14 it/sec) -training >> step=185700, episode=32 reward=0.733919 (541.97 it/sec) -training >> step=185800, episode=32 reward=0.7433149 (546.91 it/sec) -training >> step=185900, episode=32 reward=0.7540174 (524.39 it/sec) -training >> step=186000, episode=32 reward=0.7492601 (534.34 it/sec) -training >> step=186100, episode=32 reward=0.7741625 (523.39 it/sec) -training >> step=186200, episode=32 reward=0.7476815 (563.72 it/sec) -training >> step=186300, episode=32 reward=0.7381096 (554.50 it/sec) -training >> step=186400, episode=32 reward=0.7657847 (556.37 it/sec) -training >> step=186500, episode=32 reward=0.7728515 (571.53 it/sec) -training >> step=186600, episode=32 reward=0.7764765 (525.39 it/sec) -training >> step=186700, episode=32 reward=0.7452679 (539.34 it/sec) -training >> step=186800, episode=32 reward=0.7670803 (548.44 it/sec) -training >> step=186900, episode=32 reward=0.7540026 (552.81 it/sec) -training >> step=187000, episode=32 reward=0.7863817 (556.37 it/sec) -training >> step=187100, episode=32 reward=0.7636376 (416.40 it/sec) -training >> step=187200, episode=32 reward=0.7438965 (540.19 it/sec) -training >> step=187300, episode=32 reward=0.7707916 (540.83 it/sec) -training >> step=187400, episode=32 reward=0.7748344 (573.55 it/sec) -training >> step=187500, episode=32 reward=0.731365 (535.01 it/sec) -training >> step=187600, episode=32 reward=0.7701074 (573.83 it/sec) -training >> step=187700, episode=32 reward=0.7541921 (523.35 it/sec) -training >> step=187800, episode=32 reward=0.7484932 (519.31 it/sec) -training >> step=187900, episode=32 reward=0.7803616 (451.22 it/sec) -training >> step=188000, episode=32 reward=0.765845 (560.55 it/sec) -training >> step=188100, episode=32 reward=0.7505511 (529.29 it/sec) -training >> step=188200, episode=32 reward=0.7660154 (565.21 it/sec) -training >> step=188300, episode=32 reward=0.7704121 (549.31 it/sec) -training >> step=188400, episode=32 reward=0.7610762 (543.76 it/sec) -training >> step=188500, episode=32 reward=0.7584851 (563.65 it/sec) -training >> step=188600, episode=32 reward=0.7585033 (561.16 it/sec) -training >> step=188700, episode=32 reward=0.7654454 (551.53 it/sec) -training >> step=188800, episode=32 reward=0.7793217 (531.72 it/sec) -training >> step=188900, episode=32 reward=0.7738276 (549.63 it/sec) -training >> step=189000, episode=32 reward=0.7672982 (538.06 it/sec) -training >> step=189100, episode=32 reward=0.7726668 (591.36 it/sec) -training >> step=189200, episode=32 reward=0.7696184 (554.57 it/sec) -training >> step=189300, episode=32 reward=0.745859 (554.38 it/sec) -training >> step=189400, episode=32 reward=0.7549235 (526.86 it/sec) -training >> step=189500, episode=32 reward=0.755214 (555.33 it/sec) -training >> step=189600, episode=32 reward=0.7737689 (561.30 it/sec) -training >> step=189700, episode=32 reward=0.7303926 (564.08 it/sec) -training >> step=189800, episode=32 reward=0.7508122 (565.59 it/sec) -training >> step=189900, episode=32 reward=0.771366 (592.85 it/sec) -training >> step=190000, episode=32 reward=0.7368261 (521.42 it/sec) -training >> step=190100, episode=32 reward=0.7391855 (525.83 it/sec) -training >> step=190200, episode=32 reward=0.7581254 (583.22 it/sec) -training >> step=190300, episode=32 reward=0.7437816 (562.46 it/sec) -training >> step=190400, episode=32 reward=0.7676066 (557.44 it/sec) -training >> step=190500, episode=32 reward=0.7471728 (546.77 it/sec) -training >> step=190600, episode=32 reward=0.7381029 (567.61 it/sec) -training >> step=190700, episode=32 reward=0.7416832 (544.71 it/sec) -training >> step=190800, episode=32 reward=0.7257992 (568.81 it/sec) -training >> step=190900, episode=32 reward=0.7563232 (568.28 it/sec) -training >> step=191000, episode=32 reward=0.7317492 (564.58 it/sec) -training >> step=191100, episode=32 reward=0.7674267 (539.39 it/sec) -training >> step=191200, episode=32 reward=0.7601238 (539.72 it/sec) -training >> step=191300, episode=32 reward=0.7279609 (563.21 it/sec) -training >> step=191400, episode=33 reward=0.7699674 (137.49 it/sec) -training >> step=191500, episode=33 reward=0.7573643 (531.37 it/sec) -training >> step=191600, episode=33 reward=0.766615 (538.73 it/sec) -training >> step=191700, episode=33 reward=0.7733588 (538.56 it/sec) -training >> step=191800, episode=33 reward=0.7669166 (565.35 it/sec) -training >> step=191900, episode=33 reward=0.7202333 (535.68 it/sec) -training >> step=192000, episode=33 reward=0.7747785 (539.12 it/sec) -training >> step=192100, episode=33 reward=0.7604697 (554.58 it/sec) -training >> step=192200, episode=33 reward=0.7677207 (543.63 it/sec) -training >> step=192300, episode=33 reward=0.7463135 (583.34 it/sec) -training >> step=192400, episode=33 reward=0.7464063 (545.81 it/sec) -training >> step=192500, episode=33 reward=0.7740721 (528.52 it/sec) -training >> step=192600, episode=33 reward=0.7495452 (564.09 it/sec) -training >> step=192700, episode=33 reward=0.7521022 (543.11 it/sec) -training >> step=192800, episode=33 reward=0.7686831 (551.61 it/sec) -training >> step=192900, episode=33 reward=0.7728061 (590.19 it/sec) -training >> step=193000, episode=33 reward=0.7853714 (526.19 it/sec) -training >> step=193100, episode=33 reward=0.7542204 (467.19 it/sec) -training >> step=193200, episode=33 reward=0.7566276 (531.31 it/sec) -training >> step=193300, episode=33 reward=0.7445782 (391.86 it/sec) -training >> step=193400, episode=33 reward=0.7436048 (571.19 it/sec) -training >> step=193500, episode=33 reward=0.7586808 (561.21 it/sec) -training >> step=193600, episode=33 reward=0.7320298 (505.22 it/sec) -training >> step=193700, episode=33 reward=0.7573589 (572.88 it/sec) -training >> step=193800, episode=33 reward=0.7700402 (536.99 it/sec) -training >> step=193900, episode=33 reward=0.748149 (563.21 it/sec) -training >> step=194000, episode=33 reward=0.7459528 (534.44 it/sec) -training >> step=194100, episode=33 reward=0.7693734 (537.85 it/sec) -training >> step=194200, episode=33 reward=0.7721226 (500.45 it/sec) -training >> step=194300, episode=33 reward=0.7553595 (566.18 it/sec) -training >> step=194400, episode=33 reward=0.7599394 (551.74 it/sec) -training >> step=194500, episode=33 reward=0.7725229 (549.49 it/sec) -training >> step=194600, episode=33 reward=0.7625914 (572.77 it/sec) -training >> step=194700, episode=33 reward=0.7875485 (543.68 it/sec) -training >> step=194800, episode=33 reward=0.7670082 (590.86 it/sec) -training >> step=194900, episode=33 reward=0.776988 (541.43 it/sec) -training >> step=195000, episode=33 reward=0.7651478 (560.42 it/sec) -training >> step=195100, episode=33 reward=0.7671603 (571.47 it/sec) -training >> step=195200, episode=33 reward=0.7619267 (559.11 it/sec) -training >> step=195300, episode=33 reward=0.7570445 (482.73 it/sec) -training >> step=195400, episode=33 reward=0.791941 (581.96 it/sec) -training >> step=195500, episode=33 reward=0.7451657 (556.14 it/sec) -training >> step=195600, episode=33 reward=0.7485257 (565.39 it/sec) -training >> step=195700, episode=33 reward=0.7479375 (567.63 it/sec) -training >> step=195800, episode=33 reward=0.7729945 (536.11 it/sec) -training >> step=195900, episode=33 reward=0.7529391 (548.25 it/sec) -training >> step=196000, episode=33 reward=0.759388 (524.75 it/sec) -training >> step=196100, episode=33 reward=0.7481552 (562.76 it/sec) -training >> step=196200, episode=33 reward=0.7520192 (579.60 it/sec) -training >> step=196300, episode=33 reward=0.7439769 (560.70 it/sec) -training >> step=196400, episode=33 reward=0.7490969 (520.44 it/sec) -training >> step=196500, episode=33 reward=0.7387244 (585.98 it/sec) -training >> step=196600, episode=33 reward=0.7540845 (515.81 it/sec) -training >> step=196700, episode=33 reward=0.7583786 (563.18 it/sec) -training >> step=196800, episode=33 reward=0.7288606 (587.05 it/sec) -training >> step=196900, episode=33 reward=0.754745 (559.47 it/sec) -training >> step=197000, episode=33 reward=0.7299407 (537.82 it/sec) -training >> step=197100, episode=33 reward=0.7449714 (577.43 it/sec) -training >> step=197200, episode=33 reward=0.7340263 (536.61 it/sec) -training >> step=197300, episode=33 reward=0.7724162 (541.06 it/sec) -training >> step=197400, episode=34 reward=0.7377738 (154.72 it/sec) -training >> step=197500, episode=34 reward=0.7355259 (524.29 it/sec) -training >> step=197600, episode=34 reward=0.7433208 (529.91 it/sec) -training >> step=197700, episode=34 reward=0.7725158 (544.39 it/sec) -training >> step=197800, episode=34 reward=0.7645691 (587.25 it/sec) -training >> step=197900, episode=34 reward=0.7694094 (534.01 it/sec) -training >> step=198000, episode=34 reward=0.7631328 (517.98 it/sec) -training >> step=198100, episode=34 reward=0.7549558 (570.04 it/sec) -training >> step=198200, episode=34 reward=0.7418151 (556.70 it/sec) -training >> step=198300, episode=34 reward=0.7367293 (547.24 it/sec) -training >> step=198400, episode=34 reward=0.723216 (498.15 it/sec) -training >> step=198500, episode=34 reward=0.7622489 (555.52 it/sec) -training >> step=198600, episode=34 reward=0.7610013 (565.32 it/sec) -training >> step=198700, episode=34 reward=0.7487012 (540.17 it/sec) -training >> step=198800, episode=34 reward=0.758844 (582.16 it/sec) -training >> step=198900, episode=34 reward=0.760792 (563.28 it/sec) -training >> step=199000, episode=34 reward=0.7346967 (558.45 it/sec) -training >> step=199100, episode=34 reward=0.7462553 (523.84 it/sec) -training >> step=199200, episode=34 reward=0.7461686 (575.42 it/sec) -training >> step=199300, episode=34 reward=0.7585288 (527.99 it/sec) -training >> step=199400, episode=34 reward=0.7570152 (571.05 it/sec) -training >> step=199500, episode=34 reward=0.7683258 (540.55 it/sec) -training >> step=199600, episode=34 reward=0.7429096 (387.91 it/sec) -training >> step=199700, episode=34 reward=0.7547396 (571.88 it/sec) -training >> step=199800, episode=34 reward=0.7495638 (543.58 it/sec) -training >> step=199900, episode=34 reward=0.7711653 (567.41 it/sec) -training >> step=200000, episode=34 reward=0.7729671 (531.93 it/sec) -training >> step=200100, episode=34 reward=0.7552414 (529.59 it/sec) -training >> step=200200, episode=34 reward=0.7356769 (526.14 it/sec) -training >> step=200300, episode=34 reward=0.7622228 (575.29 it/sec) -training >> step=200400, episode=34 reward=0.7454282 (554.38 it/sec) -training >> step=200500, episode=34 reward=0.7486048 (570.34 it/sec) -training >> step=200600, episode=34 reward=0.736035 (539.04 it/sec) -training >> step=200700, episode=34 reward=0.7374722 (533.32 it/sec) -training >> step=200800, episode=34 reward=0.7776188 (558.66 it/sec) -training >> step=200900, episode=34 reward=0.7662331 (589.44 it/sec) -training >> step=201000, episode=34 reward=0.7458542 (560.26 it/sec) -training >> step=201100, episode=34 reward=0.7557924 (586.56 it/sec) -training >> step=201200, episode=34 reward=0.762951 (522.54 it/sec) -training >> step=201300, episode=34 reward=0.7845656 (556.55 it/sec) -training >> step=201400, episode=34 reward=0.7456046 (568.25 it/sec) -training >> step=201500, episode=34 reward=0.7574738 (552.54 it/sec) -training >> step=201600, episode=34 reward=0.7543522 (551.65 it/sec) -training >> step=201700, episode=34 reward=0.7536054 (580.47 it/sec) -training >> step=201800, episode=34 reward=0.7607838 (517.52 it/sec) -training >> step=201900, episode=34 reward=0.761824 (560.54 it/sec) -training >> step=202000, episode=34 reward=0.764908 (554.52 it/sec) -training >> step=202100, episode=34 reward=0.748588 (540.39 it/sec) -training >> step=202200, episode=34 reward=0.7643387 (573.37 it/sec) -training >> step=202300, episode=34 reward=0.7826446 (548.47 it/sec) -training >> step=202400, episode=34 reward=0.734373 (520.81 it/sec) -training >> step=202500, episode=34 reward=0.7648349 (531.69 it/sec) -training >> step=202600, episode=34 reward=0.7684893 (582.94 it/sec) -training >> step=202700, episode=34 reward=0.7315575 (549.55 it/sec) -training >> step=202800, episode=34 reward=0.7717379 (585.54 it/sec) -training >> step=202900, episode=34 reward=0.7443005 (551.17 it/sec) -training >> step=203000, episode=34 reward=0.762809 (549.07 it/sec) -training >> step=203100, episode=34 reward=0.7610598 (554.08 it/sec) -training >> step=203200, episode=34 reward=0.7505087 (543.91 it/sec) -training >> step=203300, episode=34 reward=0.7501951 (576.14 it/sec) -training >> step=203400, episode=35 reward=0.7718918 (140.15 it/sec) -training >> step=203500, episode=35 reward=0.7540199 (506.49 it/sec) -training >> step=203600, episode=35 reward=0.7521241 (470.93 it/sec) -training >> step=203700, episode=35 reward=0.7586017 (527.29 it/sec) -training >> step=203800, episode=35 reward=0.7615444 (544.56 it/sec) -training >> step=203900, episode=35 reward=0.7851443 (542.73 it/sec) -training >> step=204000, episode=35 reward=0.7708758 (535.96 it/sec) -training >> step=204100, episode=35 reward=0.7846987 (561.52 it/sec) -training >> step=204200, episode=35 reward=0.7472318 (547.79 it/sec) -training >> step=204300, episode=35 reward=0.7686149 (559.92 it/sec) -training >> step=204400, episode=35 reward=0.7543017 (556.80 it/sec) -training >> step=204500, episode=35 reward=0.7623894 (516.92 it/sec) -training >> step=204600, episode=35 reward=0.7649192 (516.06 it/sec) -training >> step=204700, episode=35 reward=0.7709399 (538.98 it/sec) -training >> step=204800, episode=35 reward=0.786441 (564.71 it/sec) -training >> step=204900, episode=35 reward=0.7658589 (583.47 it/sec) -training >> step=205000, episode=35 reward=0.7659395 (508.70 it/sec) -training >> step=205100, episode=35 reward=0.7588052 (566.96 it/sec) -training >> step=205200, episode=35 reward=0.7764364 (526.25 it/sec) -training >> step=205300, episode=35 reward=0.7508687 (565.98 it/sec) -training >> step=205400, episode=35 reward=0.7581578 (539.81 it/sec) -training >> step=205500, episode=35 reward=0.760807 (554.10 it/sec) -training >> step=205600, episode=35 reward=0.7611642 (508.37 it/sec) -training >> step=205700, episode=35 reward=0.7718202 (586.88 it/sec) -training >> step=205800, episode=35 reward=0.7665967 (391.08 it/sec) -training >> step=205900, episode=35 reward=0.7541763 (534.15 it/sec) -training >> step=206000, episode=35 reward=0.7684078 (562.73 it/sec) -training >> step=206100, episode=35 reward=0.776641 (484.39 it/sec) -training >> step=206200, episode=35 reward=0.7651782 (544.61 it/sec) -training >> step=206300, episode=35 reward=0.756924 (537.46 it/sec) -training >> step=206400, episode=35 reward=0.7644925 (563.81 it/sec) -training >> step=206500, episode=35 reward=0.75361 (556.53 it/sec) -training >> step=206600, episode=35 reward=0.7733219 (570.24 it/sec) -training >> step=206700, episode=35 reward=0.7553117 (535.17 it/sec) -training >> step=206800, episode=35 reward=0.7547966 (577.26 it/sec) -training >> step=206900, episode=35 reward=0.7767386 (542.16 it/sec) -training >> step=207000, episode=35 reward=0.7399355 (550.49 it/sec) -training >> step=207100, episode=35 reward=0.7521284 (550.64 it/sec) -training >> step=207200, episode=35 reward=0.7513985 (522.41 it/sec) -training >> step=207300, episode=35 reward=0.7460852 (563.13 it/sec) -training >> step=207400, episode=35 reward=0.7328636 (572.71 it/sec) -training >> step=207500, episode=35 reward=0.7682017 (543.90 it/sec) -training >> step=207600, episode=35 reward=0.7716703 (558.19 it/sec) -training >> step=207700, episode=35 reward=0.7660765 (546.41 it/sec) -training >> step=207800, episode=35 reward=0.749458 (534.66 it/sec) -training >> step=207900, episode=35 reward=0.7753264 (565.63 it/sec) -training >> step=208000, episode=35 reward=0.7606086 (534.56 it/sec) -training >> step=208100, episode=35 reward=0.7695693 (554.72 it/sec) -training >> step=208200, episode=35 reward=0.7544514 (533.10 it/sec) -training >> step=208300, episode=35 reward=0.758963 (520.17 it/sec) -training >> step=208400, episode=35 reward=0.7608243 (535.11 it/sec) -training >> step=208500, episode=35 reward=0.7719923 (573.41 it/sec) -training >> step=208600, episode=35 reward=0.7362785 (544.31 it/sec) -training >> step=208700, episode=35 reward=0.7566144 (561.17 it/sec) -training >> step=208800, episode=35 reward=0.7644594 (564.87 it/sec) -training >> step=208900, episode=35 reward=0.7263814 (548.73 it/sec) -training >> step=209000, episode=35 reward=0.7348195 (508.47 it/sec) -training >> step=209100, episode=35 reward=0.7652233 (536.09 it/sec) -training >> step=209200, episode=35 reward=0.7294338 (561.51 it/sec) -training >> step=209300, episode=35 reward=0.7462577 (574.94 it/sec) -training >> step=209400, episode=36 reward=0.7781175 (154.92 it/sec) -training >> step=209500, episode=36 reward=0.7584066 (562.10 it/sec) -training >> step=209600, episode=36 reward=0.768586 (547.95 it/sec) -training >> step=209700, episode=36 reward=0.7592769 (538.82 it/sec) -training >> step=209800, episode=36 reward=0.7386928 (524.08 it/sec) -training >> step=209900, episode=36 reward=0.7624858 (532.23 it/sec) -training >> step=210000, episode=36 reward=0.7315053 (584.91 it/sec) -training >> step=210100, episode=36 reward=0.7503987 (538.27 it/sec) -training >> step=210200, episode=36 reward=0.7531253 (537.82 it/sec) -training >> step=210300, episode=36 reward=0.7610126 (580.14 it/sec) -training >> step=210400, episode=36 reward=0.7682801 (533.28 it/sec) -training >> step=210500, episode=36 reward=0.7699544 (542.76 it/sec) -training >> step=210600, episode=36 reward=0.7282158 (576.31 it/sec) -training >> step=210700, episode=36 reward=0.7739213 (518.39 it/sec) -training >> step=210800, episode=36 reward=0.7737461 (535.47 it/sec) -training >> step=210900, episode=36 reward=0.7650305 (560.91 it/sec) -training >> step=211000, episode=36 reward=0.7624911 (550.29 it/sec) -training >> step=211100, episode=36 reward=0.7798094 (580.90 it/sec) -training >> step=211200, episode=36 reward=0.7519039 (526.22 it/sec) -training >> step=211300, episode=36 reward=0.7324674 (545.17 it/sec) -training >> step=211400, episode=36 reward=0.7775128 (598.20 it/sec) -training >> step=211500, episode=36 reward=0.7369455 (525.30 it/sec) -training >> step=211600, episode=36 reward=0.7916527 (537.44 it/sec) -training >> step=211700, episode=36 reward=0.7798114 (571.93 it/sec) -training >> step=211800, episode=36 reward=0.7632719 (540.53 it/sec) -training >> step=211900, episode=36 reward=0.7691686 (530.66 it/sec) -training >> step=212000, episode=36 reward=0.7865987 (548.70 it/sec) -training >> step=212100, episode=36 reward=0.7567348 (538.99 it/sec) -training >> step=212200, episode=36 reward=0.7494435 (550.33 it/sec) -training >> step=212300, episode=36 reward=0.7635551 (450.93 it/sec) -training >> step=212400, episode=36 reward=0.7351201 (526.42 it/sec) -training >> step=212500, episode=36 reward=0.7569714 (565.56 it/sec) -training >> step=212600, episode=36 reward=0.7568654 (427.19 it/sec) -training >> step=212700, episode=36 reward=0.7682729 (570.44 it/sec) -training >> step=212800, episode=36 reward=0.7612919 (565.30 it/sec) -training >> step=212900, episode=36 reward=0.7585546 (552.89 it/sec) -training >> step=213000, episode=36 reward=0.7518886 (534.00 it/sec) -training >> step=213100, episode=36 reward=0.7551492 (553.07 it/sec) -training >> step=213200, episode=36 reward=0.7798893 (518.91 it/sec) -training >> step=213300, episode=36 reward=0.7786964 (598.83 it/sec) -training >> step=213400, episode=36 reward=0.7721854 (545.17 it/sec) -training >> step=213500, episode=36 reward=0.7680096 (537.56 it/sec) -training >> step=213600, episode=36 reward=0.7598315 (576.51 it/sec) -training >> step=213700, episode=36 reward=0.7729812 (529.57 it/sec) -training >> step=213800, episode=36 reward=0.7806347 (559.44 it/sec) -training >> step=213900, episode=36 reward=0.770817 (553.73 it/sec) -training >> step=214000, episode=36 reward=0.7414089 (567.69 it/sec) -training >> step=214100, episode=36 reward=0.7394685 (549.30 it/sec) -training >> step=214200, episode=36 reward=0.75508 (503.97 it/sec) -training >> step=214300, episode=36 reward=0.7564084 (511.57 it/sec) -training >> step=214400, episode=36 reward=0.7765722 (582.79 it/sec) -training >> step=214500, episode=36 reward=0.7507461 (551.08 it/sec) -training >> step=214600, episode=36 reward=0.7598907 (531.25 it/sec) -training >> step=214700, episode=36 reward=0.7713391 (555.32 it/sec) -training >> step=214800, episode=36 reward=0.7482278 (517.95 it/sec) -training >> step=214900, episode=36 reward=0.7282647 (552.89 it/sec) -training >> step=215000, episode=36 reward=0.7420515 (578.09 it/sec) -training >> step=215100, episode=36 reward=0.7310488 (533.65 it/sec) -training >> step=215200, episode=36 reward=0.7361006 (539.43 it/sec) -training >> step=215300, episode=37 reward=0.7549659 (144.34 it/sec) -training >> step=215400, episode=37 reward=0.7426818 (599.45 it/sec) -training >> step=215500, episode=37 reward=0.7576103 (533.21 it/sec) -training >> step=215600, episode=37 reward=0.7566296 (527.44 it/sec) -training >> step=215700, episode=37 reward=0.7526196 (524.04 it/sec) -training >> step=215800, episode=37 reward=0.7400318 (543.47 it/sec) -training >> step=215900, episode=37 reward=0.7638701 (542.31 it/sec) -training >> step=216000, episode=37 reward=0.7720621 (565.78 it/sec) -training >> step=216100, episode=37 reward=0.7481167 (546.67 it/sec) -training >> step=216200, episode=37 reward=0.7551586 (537.02 it/sec) -training >> step=216300, episode=37 reward=0.7626309 (572.94 it/sec) -training >> step=216400, episode=37 reward=0.7636625 (534.02 it/sec) -training >> step=216500, episode=37 reward=0.7536466 (558.34 it/sec) -training >> step=216600, episode=37 reward=0.764219 (546.85 it/sec) -training >> step=216700, episode=37 reward=0.7673548 (555.07 it/sec) -training >> step=216800, episode=37 reward=0.7625015 (564.06 it/sec) -training >> step=216900, episode=37 reward=0.7573977 (558.44 it/sec) -training >> step=217000, episode=37 reward=0.764792 (566.44 it/sec) -training >> step=217100, episode=37 reward=0.7637171 (538.83 it/sec) -training >> step=217200, episode=37 reward=0.7564775 (536.34 it/sec) -training >> step=217300, episode=37 reward=0.7597302 (549.81 it/sec) -training >> step=217400, episode=37 reward=0.7553128 (566.86 it/sec) -training >> step=217500, episode=37 reward=0.7723154 (550.30 it/sec) -training >> step=217600, episode=37 reward=0.7535418 (550.92 it/sec) -training >> step=217700, episode=37 reward=0.7503869 (552.32 it/sec) -training >> step=217800, episode=37 reward=0.7558717 (556.02 it/sec) -training >> step=217900, episode=37 reward=0.7504751 (553.04 it/sec) -training >> step=218000, episode=37 reward=0.7614523 (557.39 it/sec) -training >> step=218100, episode=37 reward=0.7803675 (564.43 it/sec) -training >> step=218200, episode=37 reward=0.7322015 (569.66 it/sec) -training >> step=218300, episode=37 reward=0.7561657 (551.56 it/sec) -training >> step=218400, episode=37 reward=0.7647592 (414.91 it/sec) -training >> step=218500, episode=37 reward=0.7692549 (578.80 it/sec) -training >> step=218600, episode=37 reward=0.7627885 (535.16 it/sec) -training >> step=218700, episode=37 reward=0.7529343 (531.96 it/sec) -training >> step=218800, episode=37 reward=0.7633384 (568.01 it/sec) -training >> step=218900, episode=37 reward=0.7627538 (527.48 it/sec) -training >> step=219000, episode=37 reward=0.747346 (539.31 it/sec) -training >> step=219100, episode=37 reward=0.7681898 (558.86 it/sec) -training >> step=219200, episode=37 reward=0.7738598 (557.00 it/sec) -training >> step=219300, episode=37 reward=0.7539852 (565.61 it/sec) -training >> step=219400, episode=37 reward=0.7736177 (531.09 it/sec) -training >> step=219500, episode=37 reward=0.7410076 (522.29 it/sec) -training >> step=219600, episode=37 reward=0.7472011 (559.13 it/sec) -training >> step=219700, episode=37 reward=0.7732173 (556.34 it/sec) -training >> step=219800, episode=37 reward=0.7472466 (538.48 it/sec) -training >> step=219900, episode=37 reward=0.7590877 (577.10 it/sec) -training >> step=220000, episode=37 reward=0.7653213 (539.13 it/sec) -training >> step=220100, episode=37 reward=0.7758092 (571.99 it/sec) -training >> step=220200, episode=37 reward=0.7809319 (535.45 it/sec) -training >> step=220300, episode=37 reward=0.7519554 (549.31 it/sec) -training >> step=220400, episode=37 reward=0.763281 (537.02 it/sec) -training >> step=220500, episode=37 reward=0.7481369 (579.81 it/sec) -training >> step=220600, episode=37 reward=0.7605368 (567.78 it/sec) -training >> step=220700, episode=37 reward=0.7339295 (542.86 it/sec) -training >> step=220800, episode=37 reward=0.7563772 (569.07 it/sec) -training >> step=220900, episode=37 reward=0.7510801 (536.08 it/sec) -training >> step=221000, episode=37 reward=0.7332094 (566.55 it/sec) -training >> step=221100, episode=37 reward=0.7551805 (581.43 it/sec) -training >> step=221200, episode=37 reward=0.7310866 (529.70 it/sec) -training >> step=221300, episode=38 reward=0.742067 (143.93 it/sec) -training >> step=221400, episode=38 reward=0.7556416 (555.78 it/sec) -training >> step=221500, episode=38 reward=0.7650145 (532.05 it/sec) -training >> step=221600, episode=38 reward=0.7558925 (517.59 it/sec) -training >> step=221700, episode=38 reward=0.7617682 (549.69 it/sec) -training >> step=221800, episode=38 reward=0.761142 (587.32 it/sec) -training >> step=221900, episode=38 reward=0.7782912 (570.58 it/sec) -training >> step=222000, episode=38 reward=0.7911653 (538.67 it/sec) -training >> step=222100, episode=38 reward=0.7671839 (576.49 it/sec) -training >> step=222200, episode=38 reward=0.7569136 (524.69 it/sec) -training >> step=222300, episode=38 reward=0.7415094 (578.35 it/sec) -training >> step=222400, episode=38 reward=0.7184669 (555.62 it/sec) -training >> step=222500, episode=38 reward=0.7724575 (534.59 it/sec) -training >> step=222600, episode=38 reward=0.7545645 (558.18 it/sec) -training >> step=222700, episode=38 reward=0.7761062 (554.90 it/sec) -training >> step=222800, episode=38 reward=0.7678708 (530.65 it/sec) -training >> step=222900, episode=38 reward=0.7439298 (595.31 it/sec) -training >> step=223000, episode=38 reward=0.7633335 (488.09 it/sec) -training >> step=223100, episode=38 reward=0.7551531 (563.85 it/sec) -training >> step=223200, episode=38 reward=0.7474944 (582.35 it/sec) -training >> step=223300, episode=38 reward=0.764604 (539.37 it/sec) -training >> step=223400, episode=38 reward=0.7440706 (554.49 it/sec) -training >> step=223500, episode=38 reward=0.7610017 (570.45 it/sec) -training >> step=223600, episode=38 reward=0.7789308 (525.54 it/sec) -training >> step=223700, episode=38 reward=0.7209067 (565.98 it/sec) -training >> step=223800, episode=38 reward=0.7653807 (564.67 it/sec) -training >> step=223900, episode=38 reward=0.7808743 (524.97 it/sec) -training >> step=224000, episode=38 reward=0.7311742 (574.93 it/sec) -training >> step=224100, episode=38 reward=0.7623218 (545.41 it/sec) -training >> step=224200, episode=38 reward=0.7829407 (570.61 it/sec) -training >> step=224300, episode=38 reward=0.7833014 (562.38 it/sec) -training >> step=224400, episode=38 reward=0.7543298 (522.55 it/sec) -training >> step=224500, episode=38 reward=0.7594475 (544.39 it/sec) -training >> step=224600, episode=38 reward=0.7622972 (400.39 it/sec) -training >> step=224700, episode=38 reward=0.7829502 (506.65 it/sec) -training >> step=224800, episode=38 reward=0.7402995 (558.98 it/sec) -training >> step=224900, episode=38 reward=0.787752 (578.06 it/sec) -training >> step=225000, episode=38 reward=0.7553157 (546.95 it/sec) -training >> step=225100, episode=38 reward=0.7577339 (592.19 it/sec) -training >> step=225200, episode=38 reward=0.773083 (527.38 it/sec) -training >> step=225300, episode=38 reward=0.755362 (570.94 it/sec) -training >> step=225400, episode=38 reward=0.7605499 (563.27 it/sec) -training >> step=225500, episode=38 reward=0.7448763 (534.28 it/sec) -training >> step=225600, episode=38 reward=0.7807531 (567.56 it/sec) -training >> step=225700, episode=38 reward=0.7543615 (586.94 it/sec) -training >> step=225800, episode=38 reward=0.7706946 (526.02 it/sec) -training >> step=225900, episode=38 reward=0.772636 (589.51 it/sec) -training >> step=226000, episode=38 reward=0.762763 (553.15 it/sec) -training >> step=226100, episode=38 reward=0.757304 (519.94 it/sec) -training >> step=226200, episode=38 reward=0.7482613 (574.91 it/sec) -training >> step=226300, episode=38 reward=0.7286193 (544.02 it/sec) -training >> step=226400, episode=38 reward=0.7557116 (548.74 it/sec) -training >> step=226500, episode=38 reward=0.7508535 (562.12 it/sec) -training >> step=226600, episode=38 reward=0.7735298 (561.30 it/sec) -training >> step=226700, episode=38 reward=0.7483974 (538.07 it/sec) -training >> step=226800, episode=38 reward=0.7470499 (545.31 it/sec) -training >> step=226900, episode=38 reward=0.7253876 (560.49 it/sec) -training >> step=227000, episode=38 reward=0.7390532 (565.89 it/sec) -training >> step=227100, episode=38 reward=0.7285189 (581.93 it/sec) -training >> step=227200, episode=38 reward=0.7555481 (521.67 it/sec) -training >> step=227300, episode=39 reward=0.7546099 (131.40 it/sec) -training >> step=227400, episode=39 reward=0.744296 (521.45 it/sec) -training >> step=227500, episode=39 reward=0.7622707 (530.47 it/sec) -training >> step=227600, episode=39 reward=0.7363453 (538.24 it/sec) -training >> step=227700, episode=39 reward=0.754139 (563.54 it/sec) -training >> step=227800, episode=39 reward=0.7713095 (553.17 it/sec) -training >> step=227900, episode=39 reward=0.7362512 (555.80 it/sec) -training >> step=228000, episode=39 reward=0.7747511 (568.52 it/sec) -training >> step=228100, episode=39 reward=0.766714 (539.33 it/sec) -training >> step=228200, episode=39 reward=0.7466745 (593.21 it/sec) -training >> step=228300, episode=39 reward=0.7702556 (560.05 it/sec) -training >> step=228400, episode=39 reward=0.7611851 (548.25 it/sec) -training >> step=228500, episode=39 reward=0.7782019 (567.40 it/sec) -training >> step=228600, episode=39 reward=0.7597227 (531.35 it/sec) -training >> step=228700, episode=39 reward=0.7382874 (561.57 it/sec) -training >> step=228800, episode=39 reward=0.746501 (586.32 it/sec) -training >> step=228900, episode=39 reward=0.7594587 (559.24 it/sec) -training >> step=229000, episode=39 reward=0.7537541 (575.33 it/sec) -training >> step=229100, episode=39 reward=0.7638291 (533.77 it/sec) -training >> step=229200, episode=39 reward=0.7451652 (534.24 it/sec) -training >> step=229300, episode=39 reward=0.7603566 (552.41 it/sec) -training >> step=229400, episode=39 reward=0.7552364 (518.11 it/sec) -training >> step=229500, episode=39 reward=0.7571652 (581.02 it/sec) -training >> step=229600, episode=39 reward=0.7849352 (533.80 it/sec) -training >> step=229700, episode=39 reward=0.7512587 (526.28 it/sec) -training >> step=229800, episode=39 reward=0.7719071 (538.17 it/sec) -training >> step=229900, episode=39 reward=0.7578285 (495.54 it/sec) -training >> step=230000, episode=39 reward=0.7648733 (549.22 it/sec) -training >> step=230100, episode=39 reward=0.7887455 (546.62 it/sec) -training >> step=230200, episode=39 reward=0.7499663 (557.81 it/sec) -training >> step=230300, episode=39 reward=0.7646723 (500.96 it/sec) -training >> step=230400, episode=39 reward=0.7579157 (551.92 it/sec) -training >> step=230500, episode=39 reward=0.7608054 (557.80 it/sec) -training >> step=230600, episode=39 reward=0.7800559 (551.54 it/sec) -training >> step=230700, episode=39 reward=0.7672686 (579.43 it/sec) -training >> step=230800, episode=39 reward=0.7682437 (398.87 it/sec) -training >> step=230900, episode=39 reward=0.7392219 (550.11 it/sec) -training >> step=231000, episode=39 reward=0.757162 (564.16 it/sec) -training >> step=231100, episode=39 reward=0.7622213 (566.74 it/sec) -training >> step=231200, episode=39 reward=0.7600315 (561.53 it/sec) -training >> step=231300, episode=39 reward=0.7572202 (558.28 it/sec) -training >> step=231400, episode=39 reward=0.7697942 (539.28 it/sec) -training >> step=231500, episode=39 reward=0.7721187 (570.49 it/sec) -training >> step=231600, episode=39 reward=0.7786114 (561.03 it/sec) -training >> step=231700, episode=39 reward=0.7300753 (563.51 it/sec) -training >> step=231800, episode=39 reward=0.7641543 (558.68 it/sec) -training >> step=231900, episode=39 reward=0.7726891 (562.39 it/sec) -training >> step=232000, episode=39 reward=0.7584754 (533.81 it/sec) -training >> step=232100, episode=39 reward=0.7744719 (562.27 it/sec) -training >> step=232200, episode=39 reward=0.7266047 (563.65 it/sec) -training >> step=232300, episode=39 reward=0.7533424 (539.68 it/sec) -training >> step=232400, episode=39 reward=0.764228 (555.23 it/sec) -training >> step=232500, episode=39 reward=0.7480082 (577.55 it/sec) -training >> step=232600, episode=39 reward=0.7352273 (535.10 it/sec) -training >> step=232700, episode=39 reward=0.774118 (558.48 it/sec) -training >> step=232800, episode=39 reward=0.7242076 (560.44 it/sec) -training >> step=232900, episode=39 reward=0.7216291 (568.52 it/sec) -training >> step=233000, episode=39 reward=0.7399729 (563.21 it/sec) -training >> step=233100, episode=39 reward=0.7345566 (537.78 it/sec) -training >> step=233200, episode=39 reward=0.7507519 (566.31 it/sec) -training >> step=233300, episode=40 reward=0.7567867 (133.32 it/sec) -training >> step=233400, episode=40 reward=0.7657239 (500.67 it/sec) -training >> step=233500, episode=40 reward=0.7466899 (546.97 it/sec) -training >> step=233600, episode=40 reward=0.7550585 (570.09 it/sec) -training >> step=233700, episode=40 reward=0.7397308 (535.06 it/sec) -training >> step=233800, episode=40 reward=0.7521032 (533.52 it/sec) -training >> step=233900, episode=40 reward=0.7568758 (542.53 it/sec) -training >> step=234000, episode=40 reward=0.7654693 (527.96 it/sec) -training >> step=234100, episode=40 reward=0.7702905 (557.47 it/sec) -training >> step=234200, episode=40 reward=0.7486343 (587.97 it/sec) -training >> step=234300, episode=40 reward=0.757088 (561.29 it/sec) -training >> step=234400, episode=40 reward=0.74997 (518.52 it/sec) -training >> step=234500, episode=40 reward=0.762451 (541.23 it/sec) -training >> step=234600, episode=40 reward=0.7640814 (544.16 it/sec) -training >> step=234700, episode=40 reward=0.7477911 (596.51 it/sec) -training >> step=234800, episode=40 reward=0.7402825 (565.11 it/sec) -training >> step=234900, episode=40 reward=0.7660813 (545.19 it/sec) -training >> step=235000, episode=40 reward=0.7525777 (528.35 it/sec) -training >> step=235100, episode=40 reward=0.7637276 (463.56 it/sec) -training >> step=235200, episode=40 reward=0.7625594 (563.22 it/sec) -training >> step=235300, episode=40 reward=0.765396 (550.96 it/sec) -training >> step=235400, episode=40 reward=0.7739898 (565.26 it/sec) -training >> step=235500, episode=40 reward=0.7271963 (551.45 it/sec) -training >> step=235600, episode=40 reward=0.739787 (533.02 it/sec) -training >> step=235700, episode=40 reward=0.7626191 (568.54 it/sec) -training >> step=235800, episode=40 reward=0.747973 (591.22 it/sec) -training >> step=235900, episode=40 reward=0.760003 (539.53 it/sec) -training >> step=236000, episode=40 reward=0.7696038 (546.19 it/sec) -training >> step=236100, episode=40 reward=0.7365205 (565.09 it/sec) -training >> step=236200, episode=40 reward=0.7527978 (522.62 it/sec) -training >> step=236300, episode=40 reward=0.7620719 (534.98 it/sec) -training >> step=236400, episode=40 reward=0.7727444 (578.70 it/sec) -training >> step=236500, episode=40 reward=0.772443 (547.84 it/sec) -training >> step=236600, episode=40 reward=0.7787274 (532.40 it/sec) -training >> step=236700, episode=40 reward=0.7704826 (554.84 it/sec) -training >> step=236800, episode=40 reward=0.7745363 (374.20 it/sec) -training >> step=236900, episode=40 reward=0.743779 (557.02 it/sec) -training >> step=237000, episode=40 reward=0.7663442 (578.23 it/sec) -training >> step=237100, episode=40 reward=0.7820401 (531.74 it/sec) -training >> step=237200, episode=40 reward=0.766342 (564.60 it/sec) -training >> step=237300, episode=40 reward=0.77301 (510.90 it/sec) -training >> step=237400, episode=40 reward=0.7578259 (565.27 it/sec) -training >> step=237500, episode=40 reward=0.7580764 (573.93 it/sec) -training >> step=237600, episode=40 reward=0.7670819 (554.13 it/sec) -training >> step=237700, episode=40 reward=0.7613295 (519.48 it/sec) -training >> step=237800, episode=40 reward=0.7817124 (579.67 it/sec) -training >> step=237900, episode=40 reward=0.7669131 (499.34 it/sec) -training >> step=238000, episode=40 reward=0.7912641 (595.90 it/sec) -training >> step=238100, episode=40 reward=0.7596284 (561.74 it/sec) -training >> step=238200, episode=40 reward=0.754932 (565.30 it/sec) -training >> step=238300, episode=40 reward=0.7353809 (564.71 it/sec) -training >> step=238400, episode=40 reward=0.7485967 (534.70 it/sec) -training >> step=238500, episode=40 reward=0.7459524 (573.48 it/sec) -training >> step=238600, episode=40 reward=0.7685273 (567.96 it/sec) -training >> step=238700, episode=40 reward=0.7645957 (582.49 it/sec) -training >> step=238800, episode=40 reward=0.7934508 (553.87 it/sec) -training >> step=238900, episode=40 reward=0.7484697 (554.19 it/sec) -training >> step=239000, episode=40 reward=0.7728407 (520.78 it/sec) -training >> step=239100, episode=40 reward=0.7311847 (574.14 it/sec) -training >> step=239200, episode=40 reward=0.7342942 (570.00 it/sec) -training >> step=239300, episode=41 reward=0.7492628 (141.03 it/sec) -training >> step=239400, episode=41 reward=0.7606595 (557.80 it/sec) -training >> step=239500, episode=41 reward=0.7526363 (565.07 it/sec) -training >> step=239600, episode=41 reward=0.7593577 (534.53 it/sec) -training >> step=239700, episode=41 reward=0.7625533 (540.45 it/sec) -training >> step=239800, episode=41 reward=0.7544426 (541.47 it/sec) -training >> step=239900, episode=41 reward=0.7398168 (529.01 it/sec) -training >> step=240000, episode=41 reward=0.7536607 (550.60 it/sec) -training >> step=240100, episode=41 reward=0.7617189 (570.58 it/sec) -training >> step=240200, episode=41 reward=0.7611902 (521.25 it/sec) -training >> step=240300, episode=41 reward=0.7505298 (475.84 it/sec) -training >> step=240400, episode=41 reward=0.7714376 (515.30 it/sec) -training >> step=240500, episode=41 reward=0.7612609 (563.92 it/sec) -training >> step=240600, episode=41 reward=0.7717976 (590.98 it/sec) -training >> step=240700, episode=41 reward=0.7652202 (554.25 it/sec) -training >> step=240800, episode=41 reward=0.7711262 (537.55 it/sec) -training >> step=240900, episode=41 reward=0.772175 (543.36 it/sec) -training >> step=241000, episode=41 reward=0.7623639 (495.30 it/sec) -training >> step=241100, episode=41 reward=0.7563961 (585.84 it/sec) -training >> step=241200, episode=41 reward=0.7671967 (563.62 it/sec) -training >> step=241300, episode=41 reward=0.7534496 (545.26 it/sec) -training >> step=241400, episode=41 reward=0.7847435 (571.50 it/sec) -training >> step=241500, episode=41 reward=0.7519801 (529.69 it/sec) -training >> step=241600, episode=41 reward=0.7805867 (572.69 it/sec) -training >> step=241700, episode=41 reward=0.766661 (556.34 it/sec) -training >> step=241800, episode=41 reward=0.7477614 (551.64 it/sec) -training >> step=241900, episode=41 reward=0.7460404 (561.43 it/sec) -training >> step=242000, episode=41 reward=0.7658328 (552.32 it/sec) -training >> step=242100, episode=41 reward=0.7638828 (496.03 it/sec) -training >> step=242200, episode=41 reward=0.7590585 (594.76 it/sec) -training >> step=242300, episode=41 reward=0.7605781 (535.57 it/sec) -training >> step=242400, episode=41 reward=0.7532836 (559.11 it/sec) -training >> step=242500, episode=41 reward=0.7572768 (529.42 it/sec) -training >> step=242600, episode=41 reward=0.7813817 (550.65 it/sec) -training >> step=242700, episode=41 reward=0.7687466 (531.81 it/sec) -training >> step=242800, episode=41 reward=0.7520615 (594.88 it/sec) -training >> step=242900, episode=41 reward=0.7571516 (566.35 it/sec) -training >> step=243000, episode=41 reward=0.7540529 (566.92 it/sec) -training >> step=243100, episode=41 reward=0.7625903 (416.96 it/sec) -training >> step=243200, episode=41 reward=0.7513946 (508.49 it/sec) -training >> step=243300, episode=41 reward=0.7729704 (565.75 it/sec) -training >> step=243400, episode=41 reward=0.7471846 (582.96 it/sec) -training >> step=243500, episode=41 reward=0.7439494 (565.57 it/sec) -training >> step=243600, episode=41 reward=0.776388 (519.35 it/sec) -training >> step=243700, episode=41 reward=0.7539473 (553.42 it/sec) -training >> step=243800, episode=41 reward=0.7528499 (535.41 it/sec) -training >> step=243900, episode=41 reward=0.7384875 (594.24 it/sec) -training >> step=244000, episode=41 reward=0.7534723 (562.19 it/sec) -training >> step=244100, episode=41 reward=0.7346355 (564.34 it/sec) -training >> step=244200, episode=41 reward=0.7502407 (568.55 it/sec) -training >> step=244300, episode=41 reward=0.7495583 (507.94 it/sec) -training >> step=244400, episode=41 reward=0.7628353 (523.61 it/sec) -training >> step=244500, episode=41 reward=0.75735 (586.51 it/sec) -training >> step=244600, episode=41 reward=0.7540569 (568.78 it/sec) -training >> step=244700, episode=41 reward=0.7306979 (557.48 it/sec) -training >> step=244800, episode=41 reward=0.7407716 (546.17 it/sec) -training >> step=244900, episode=41 reward=0.7706096 (529.55 it/sec) -training >> step=245000, episode=41 reward=0.7645679 (586.48 it/sec) -training >> step=245100, episode=41 reward=0.7479134 (557.90 it/sec) -training >> step=245200, episode=41 reward=0.7653177 (547.44 it/sec) -training >> step=245300, episode=42 reward=0.7281142 (141.15 it/sec) -training >> step=245400, episode=42 reward=0.7536762 (536.02 it/sec) -training >> step=245500, episode=42 reward=0.7520959 (495.67 it/sec) -training >> step=245600, episode=42 reward=0.7318226 (512.55 it/sec) -training >> step=245700, episode=42 reward=0.7685179 (549.05 it/sec) -training >> step=245800, episode=42 reward=0.7677537 (562.50 it/sec) -training >> step=245900, episode=42 reward=0.7627531 (560.85 it/sec) -training >> step=246000, episode=42 reward=0.7816161 (563.72 it/sec) -training >> step=246100, episode=42 reward=0.7575305 (551.62 it/sec) -training >> step=246200, episode=42 reward=0.7669499 (561.55 it/sec) -training >> step=246300, episode=42 reward=0.7411153 (504.05 it/sec) -training >> step=246400, episode=42 reward=0.7573272 (579.67 it/sec) -training >> step=246500, episode=42 reward=0.7606572 (553.42 it/sec) -training >> step=246600, episode=42 reward=0.754321 (571.63 it/sec) -training >> step=246700, episode=42 reward=0.7490174 (564.97 it/sec) -training >> step=246800, episode=42 reward=0.7632198 (555.59 it/sec) -training >> step=246900, episode=42 reward=0.7633981 (555.41 it/sec) -training >> step=247000, episode=42 reward=0.7403575 (527.66 it/sec) -training >> step=247100, episode=42 reward=0.7550805 (558.03 it/sec) -training >> step=247200, episode=42 reward=0.7747951 (553.50 it/sec) -training >> step=247300, episode=42 reward=0.7660959 (536.08 it/sec) -training >> step=247400, episode=42 reward=0.7723666 (560.19 it/sec) -training >> step=247500, episode=42 reward=0.777653 (536.04 it/sec) -training >> step=247600, episode=42 reward=0.7290617 (569.47 it/sec) -training >> step=247700, episode=42 reward=0.7435945 (545.47 it/sec) -training >> step=247800, episode=42 reward=0.7674878 (578.18 it/sec) -training >> step=247900, episode=42 reward=0.7701102 (533.73 it/sec) -training >> step=248000, episode=42 reward=0.7664756 (516.19 it/sec) -training >> step=248100, episode=42 reward=0.776117 (545.96 it/sec) -training >> step=248200, episode=42 reward=0.7551504 (563.21 it/sec) -training >> step=248300, episode=42 reward=0.754971 (580.33 it/sec) -training >> step=248400, episode=42 reward=0.7525715 (505.42 it/sec) -training >> step=248500, episode=42 reward=0.7703466 (554.43 it/sec) -training >> step=248600, episode=42 reward=0.7430829 (545.10 it/sec) -training >> step=248700, episode=42 reward=0.753568 (520.98 it/sec) -training >> step=248800, episode=42 reward=0.7712724 (569.13 it/sec) -training >> step=248900, episode=42 reward=0.783251 (601.97 it/sec) -training >> step=249000, episode=42 reward=0.7394935 (533.55 it/sec) -training >> step=249100, episode=42 reward=0.7657881 (541.90 it/sec) -training >> step=249200, episode=42 reward=0.7686898 (554.46 it/sec) -training >> step=249300, episode=42 reward=0.7611276 (548.77 it/sec) -training >> step=249400, episode=42 reward=0.7745627 (456.23 it/sec) -training >> step=249500, episode=42 reward=0.767235 (537.45 it/sec) -training >> step=249600, episode=42 reward=0.7681452 (529.50 it/sec) -training >> step=249700, episode=42 reward=0.7740194 (556.71 it/sec) -training >> step=249800, episode=42 reward=0.740164 (564.98 it/sec) -training >> step=249900, episode=42 reward=0.7662331 (578.02 it/sec) -training >> step=250000, episode=42 reward=0.7764226 (569.72 it/sec) -training >> step=250100, episode=42 reward=0.7384602 (544.19 it/sec) -training >> step=250200, episode=42 reward=0.7550854 (569.44 it/sec) -training >> step=250300, episode=42 reward=0.7850446 (521.44 it/sec) -training >> step=250400, episode=42 reward=0.7618997 (560.31 it/sec) -training >> step=250500, episode=42 reward=0.7468591 (555.22 it/sec) -training >> step=250600, episode=42 reward=0.7315335 (567.16 it/sec) -training >> step=250700, episode=42 reward=0.7239213 (528.29 it/sec) -training >> step=250800, episode=42 reward=0.7592504 (555.34 it/sec) -training >> step=250900, episode=42 reward=0.7575811 (549.22 it/sec) -training >> step=251000, episode=42 reward=0.7439063 (509.93 it/sec) -training >> step=251100, episode=42 reward=0.765249 (589.42 it/sec) -training >> step=251200, episode=42 reward=0.7061636 (535.24 it/sec) -training >> step=251300, episode=43 reward=0.7514035 (96.97 it/sec) -training >> step=251400, episode=43 reward=0.7360955 (524.04 it/sec) -training >> step=251500, episode=43 reward=0.7380955 (501.50 it/sec) -training >> step=251600, episode=43 reward=0.7674603 (556.61 it/sec) -training >> step=251700, episode=43 reward=0.7383892 (560.28 it/sec) -training >> step=251800, episode=43 reward=0.7656022 (527.29 it/sec) -training >> step=251900, episode=43 reward=0.7391608 (519.90 it/sec) -training >> step=252000, episode=43 reward=0.7496779 (570.80 it/sec) -training >> step=252100, episode=43 reward=0.7502329 (557.47 it/sec) -training >> step=252200, episode=43 reward=0.7641037 (555.91 it/sec) -training >> step=252300, episode=43 reward=0.7350932 (574.90 it/sec) -training >> step=252400, episode=43 reward=0.7413997 (540.50 it/sec) -training >> step=252500, episode=43 reward=0.7413963 (542.92 it/sec) -training >> step=252600, episode=43 reward=0.7631039 (532.04 it/sec) -training >> step=252700, episode=43 reward=0.7597688 (560.48 it/sec) -training >> step=252800, episode=43 reward=0.750645 (585.07 it/sec) -training >> step=252900, episode=43 reward=0.7566073 (568.63 it/sec) -training >> step=253000, episode=43 reward=0.7425856 (513.56 it/sec) -training >> step=253100, episode=43 reward=0.7628174 (565.57 it/sec) -training >> step=253200, episode=43 reward=0.7418352 (547.72 it/sec) -training >> step=253300, episode=43 reward=0.7460802 (563.05 it/sec) -training >> step=253400, episode=43 reward=0.7465028 (565.82 it/sec) -training >> step=253500, episode=43 reward=0.7540289 (574.44 it/sec) -training >> step=253600, episode=43 reward=0.7634994 (547.86 it/sec) -training >> step=253700, episode=43 reward=0.7926235 (580.96 it/sec) -training >> step=253800, episode=43 reward=0.7458569 (506.36 it/sec) -training >> step=253900, episode=43 reward=0.7505205 (567.61 it/sec) -training >> step=254000, episode=43 reward=0.7748756 (572.81 it/sec) -training >> step=254100, episode=43 reward=0.7503787 (536.04 it/sec) -training >> step=254200, episode=43 reward=0.7330331 (539.18 it/sec) -training >> step=254300, episode=43 reward=0.7797831 (558.74 it/sec) -training >> step=254400, episode=43 reward=0.7487149 (554.88 it/sec) -training >> step=254500, episode=43 reward=0.7480936 (580.15 it/sec) -training >> step=254600, episode=43 reward=0.7410436 (556.68 it/sec) -training >> step=254700, episode=43 reward=0.7560779 (547.11 it/sec) -training >> step=254800, episode=43 reward=0.7680927 (560.52 it/sec) -training >> step=254900, episode=43 reward=0.7801097 (555.53 it/sec) -training >> step=255000, episode=43 reward=0.7594396 (550.80 it/sec) -training >> step=255100, episode=43 reward=0.745742 (561.55 it/sec) -training >> step=255200, episode=43 reward=0.7608955 (442.76 it/sec) -training >> step=255300, episode=43 reward=0.7582271 (499.25 it/sec) -training >> step=255400, episode=43 reward=0.7697409 (565.11 it/sec) -training >> step=255500, episode=43 reward=0.7508771 (485.49 it/sec) -training >> step=255600, episode=43 reward=0.7697738 (489.45 it/sec) -training >> step=255700, episode=43 reward=0.7522071 (485.90 it/sec) -training >> step=255800, episode=43 reward=0.7702619 (540.45 it/sec) -training >> step=255900, episode=43 reward=0.7803485 (556.75 it/sec) -training >> step=256000, episode=43 reward=0.7507714 (487.16 it/sec) -training >> step=256100, episode=43 reward=0.7558643 (551.73 it/sec) -training >> step=256200, episode=43 reward=0.7588437 (568.71 it/sec) -training >> step=256300, episode=43 reward=0.7634047 (529.95 it/sec) -training >> step=256400, episode=43 reward=0.7527424 (550.46 it/sec) -training >> step=256500, episode=43 reward=0.7518461 (547.41 it/sec) -training >> step=256600, episode=43 reward=0.7428835 (554.96 it/sec) -training >> step=256700, episode=43 reward=0.7442718 (563.97 it/sec) -training >> step=256800, episode=43 reward=0.7509946 (563.23 it/sec) -training >> step=256900, episode=43 reward=0.7376999 (558.72 it/sec) -training >> step=257000, episode=43 reward=0.7063316 (546.15 it/sec) -training >> step=257100, episode=43 reward=0.7504516 (545.88 it/sec) -training >> step=257200, episode=43 reward=0.750217 (592.85 it/sec) -training >> step=257300, episode=44 reward=0.7387695 (181.97 it/sec) -training >> step=257400, episode=44 reward=0.7617255 (553.11 it/sec) -training >> step=257500, episode=44 reward=0.7401265 (538.00 it/sec) -training >> step=257600, episode=44 reward=0.727644 (519.75 it/sec) -training >> step=257700, episode=44 reward=0.7665841 (550.47 it/sec) -training >> step=257800, episode=44 reward=0.7541993 (531.76 it/sec) -training >> step=257900, episode=44 reward=0.7672211 (545.05 it/sec) -training >> step=258000, episode=44 reward=0.767601 (540.80 it/sec) -training >> step=258100, episode=44 reward=0.774841 (553.46 it/sec) -training >> step=258200, episode=44 reward=0.7827545 (558.83 it/sec) -training >> step=258300, episode=44 reward=0.7452093 (554.13 it/sec) -training >> step=258400, episode=44 reward=0.7633628 (597.67 it/sec) -training >> step=258500, episode=44 reward=0.7562367 (541.78 it/sec) -training >> step=258600, episode=44 reward=0.7727742 (523.38 it/sec) -training >> step=258700, episode=44 reward=0.7747212 (585.42 it/sec) -training >> step=258800, episode=44 reward=0.7552972 (537.66 it/sec) -training >> step=258900, episode=44 reward=0.7616454 (532.41 it/sec) -training >> step=259000, episode=44 reward=0.7547589 (564.36 it/sec) -training >> step=259100, episode=44 reward=0.7377896 (549.73 it/sec) -training >> step=259200, episode=44 reward=0.7458868 (535.46 it/sec) -training >> step=259300, episode=44 reward=0.7713673 (594.91 it/sec) -training >> step=259400, episode=44 reward=0.7660549 (552.72 it/sec) -training >> step=259500, episode=44 reward=0.7724749 (559.26 it/sec) -training >> step=259600, episode=44 reward=0.7821657 (547.21 it/sec) -training >> step=259700, episode=44 reward=0.7522698 (542.68 it/sec) -training >> step=259800, episode=44 reward=0.7544329 (572.73 it/sec) -training >> step=259900, episode=44 reward=0.7497844 (549.63 it/sec) -training >> step=260000, episode=44 reward=0.7560747 (542.25 it/sec) -training >> step=260100, episode=44 reward=0.7407389 (591.76 it/sec) -training >> step=260200, episode=44 reward=0.7801306 (543.72 it/sec) -training >> step=260300, episode=44 reward=0.7743313 (536.79 it/sec) -training >> step=260400, episode=44 reward=0.7628142 (578.58 it/sec) -training >> step=260500, episode=44 reward=0.7811698 (570.55 it/sec) -training >> step=260600, episode=44 reward=0.7602195 (568.96 it/sec) -training >> step=260700, episode=44 reward=0.7501084 (558.42 it/sec) -training >> step=260800, episode=44 reward=0.7577348 (562.31 it/sec) -training >> step=260900, episode=44 reward=0.7642065 (572.11 it/sec) -training >> step=261000, episode=44 reward=0.7351329 (538.50 it/sec) -training >> step=261100, episode=44 reward=0.7333714 (556.19 it/sec) -training >> step=261200, episode=44 reward=0.7805175 (570.46 it/sec) -training >> step=261300, episode=44 reward=0.7524464 (475.25 it/sec) -training >> step=261400, episode=44 reward=0.764852 (383.55 it/sec) -training >> step=261500, episode=44 reward=0.7543983 (554.28 it/sec) -training >> step=261600, episode=44 reward=0.7575527 (584.56 it/sec) -training >> step=261700, episode=44 reward=0.7604054 (565.82 it/sec) -training >> step=261800, episode=44 reward=0.7613149 (551.39 it/sec) -training >> step=261900, episode=44 reward=0.7649876 (515.27 it/sec) -training >> step=262000, episode=44 reward=0.7401987 (538.12 it/sec) -training >> step=262100, episode=44 reward=0.7719122 (595.66 it/sec) -training >> step=262200, episode=44 reward=0.7760558 (538.91 it/sec) -training >> step=262300, episode=44 reward=0.7603837 (561.16 it/sec) -training >> step=262400, episode=44 reward=0.7516744 (516.07 it/sec) -training >> step=262500, episode=44 reward=0.73483 (506.81 it/sec) -training >> step=262600, episode=44 reward=0.759403 (553.17 it/sec) -training >> step=262700, episode=44 reward=0.745994 (576.79 it/sec) -training >> step=262800, episode=44 reward=0.7422007 (559.30 it/sec) -training >> step=262900, episode=44 reward=0.7572116 (594.45 it/sec) -training >> step=263000, episode=44 reward=0.7609965 (531.76 it/sec) -training >> step=263100, episode=44 reward=0.7497876 (558.74 it/sec) -training >> step=263200, episode=44 reward=0.7311905 (574.54 it/sec) -training >> step=263300, episode=45 reward=0.7798551 (148.26 it/sec) -training >> step=263400, episode=45 reward=0.7648901 (568.67 it/sec) -training >> step=263500, episode=45 reward=0.7679476 (542.12 it/sec) -training >> step=263600, episode=45 reward=0.7497168 (526.53 it/sec) -training >> step=263700, episode=45 reward=0.72949 (554.30 it/sec) -training >> step=263800, episode=45 reward=0.7719285 (542.60 it/sec) -training >> step=263900, episode=45 reward=0.7438921 (527.70 it/sec) -training >> step=264000, episode=45 reward=0.7432077 (547.59 it/sec) -training >> step=264100, episode=45 reward=0.7481789 (574.95 it/sec) -training >> step=264200, episode=45 reward=0.7639675 (554.17 it/sec) -training >> step=264300, episode=45 reward=0.761964 (593.61 it/sec) -training >> step=264400, episode=45 reward=0.7541298 (519.48 it/sec) -training >> step=264500, episode=45 reward=0.7649543 (575.96 it/sec) -training >> step=264600, episode=45 reward=0.7693558 (561.72 it/sec) -training >> step=264700, episode=45 reward=0.7474065 (561.17 it/sec) -training >> step=264800, episode=45 reward=0.7419845 (587.44 it/sec) -training >> step=264900, episode=45 reward=0.7485808 (562.40 it/sec) -training >> step=265000, episode=45 reward=0.7548294 (531.29 it/sec) -training >> step=265100, episode=45 reward=0.7548373 (549.35 it/sec) -training >> step=265200, episode=45 reward=0.7616537 (557.61 it/sec) -training >> step=265300, episode=45 reward=0.7584363 (558.30 it/sec) -training >> step=265400, episode=45 reward=0.7622042 (599.52 it/sec) -training >> step=265500, episode=45 reward=0.7552009 (556.98 it/sec) -training >> step=265600, episode=45 reward=0.7822102 (551.49 it/sec) -training >> step=265700, episode=45 reward=0.7519833 (519.28 it/sec) -training >> step=265800, episode=45 reward=0.7460434 (575.24 it/sec) -training >> step=265900, episode=45 reward=0.7636797 (548.72 it/sec) -training >> step=266000, episode=45 reward=0.759831 (598.38 it/sec) -training >> step=266100, episode=45 reward=0.7743983 (552.64 it/sec) -training >> step=266200, episode=45 reward=0.7473229 (556.74 it/sec) -training >> step=266300, episode=45 reward=0.757772 (538.84 it/sec) -training >> step=266400, episode=45 reward=0.7610064 (556.01 it/sec) -training >> step=266500, episode=45 reward=0.7593939 (542.83 it/sec) -training >> step=266600, episode=45 reward=0.7404547 (532.22 it/sec) -training >> step=266700, episode=45 reward=0.7519792 (526.52 it/sec) -training >> step=266800, episode=45 reward=0.7663301 (518.14 it/sec) -training >> step=266900, episode=45 reward=0.7457029 (568.40 it/sec) -training >> step=267000, episode=45 reward=0.766836 (553.72 it/sec) -training >> step=267100, episode=45 reward=0.7421629 (587.66 it/sec) -training >> step=267200, episode=45 reward=0.7435026 (547.61 it/sec) -training >> step=267300, episode=45 reward=0.7613857 (545.86 it/sec) -training >> step=267400, episode=45 reward=0.7662441 (414.31 it/sec) -training >> step=267500, episode=45 reward=0.7535788 (540.38 it/sec) -training >> step=267600, episode=45 reward=0.7486 (606.59 it/sec) -training >> step=267700, episode=45 reward=0.7545377 (583.60 it/sec) -training >> step=267800, episode=45 reward=0.7584978 (544.15 it/sec) -training >> step=267900, episode=45 reward=0.735114 (560.61 it/sec) -training >> step=268000, episode=45 reward=0.7435819 (560.66 it/sec) -training >> step=268100, episode=45 reward=0.7763466 (556.28 it/sec) -training >> step=268200, episode=45 reward=0.7687021 (556.19 it/sec) -training >> step=268300, episode=45 reward=0.7520713 (488.61 it/sec) -training >> step=268400, episode=45 reward=0.7668595 (527.69 it/sec) -training >> step=268500, episode=45 reward=0.7606176 (576.51 it/sec) -training >> step=268600, episode=45 reward=0.7557976 (561.89 it/sec) -training >> step=268700, episode=45 reward=0.7618788 (559.98 it/sec) -training >> step=268800, episode=45 reward=0.7644605 (523.27 it/sec) -training >> step=268900, episode=45 reward=0.7626632 (496.88 it/sec) -training >> step=269000, episode=45 reward=0.7646204 (536.41 it/sec) -training >> step=269100, episode=45 reward=0.7206976 (537.86 it/sec) -training >> step=269200, episode=45 reward=0.7602499 (536.72 it/sec) -training >> step=269300, episode=46 reward=0.7371572 (150.56 it/sec) -training >> step=269400, episode=46 reward=0.7517588 (475.54 it/sec) -training >> step=269500, episode=46 reward=0.7656161 (496.06 it/sec) -training >> step=269600, episode=46 reward=0.7233645 (510.86 it/sec) -training >> step=269700, episode=46 reward=0.754243 (537.62 it/sec) -training >> step=269800, episode=46 reward=0.7431129 (522.49 it/sec) -training >> step=269900, episode=46 reward=0.7667002 (514.36 it/sec) -training >> step=270000, episode=46 reward=0.7565523 (546.31 it/sec) -training >> step=270100, episode=46 reward=0.7536052 (525.56 it/sec) -training >> step=270200, episode=46 reward=0.7411964 (558.55 it/sec) -training >> step=270300, episode=46 reward=0.7617669 (565.25 it/sec) -training >> step=270400, episode=46 reward=0.7916119 (540.99 it/sec) -training >> step=270500, episode=46 reward=0.7654734 (562.06 it/sec) -training >> step=270600, episode=46 reward=0.7555591 (579.15 it/sec) -training >> step=270700, episode=46 reward=0.7872875 (534.76 it/sec) -training >> step=270800, episode=46 reward=0.7563212 (592.35 it/sec) -training >> step=270900, episode=46 reward=0.7522221 (535.54 it/sec) -training >> step=271000, episode=46 reward=0.7615871 (547.46 it/sec) -training >> step=271100, episode=46 reward=0.760323 (565.46 it/sec) -training >> step=271200, episode=46 reward=0.758548 (551.74 it/sec) -training >> step=271300, episode=46 reward=0.7616481 (554.63 it/sec) -training >> step=271400, episode=46 reward=0.7587016 (518.52 it/sec) -training >> step=271500, episode=46 reward=0.7363442 (554.92 it/sec) -training >> step=271600, episode=46 reward=0.74383 (571.03 it/sec) -training >> step=271700, episode=46 reward=0.7598022 (493.52 it/sec) -training >> step=271800, episode=46 reward=0.7589405 (506.32 it/sec) -training >> step=271900, episode=46 reward=0.7701914 (548.21 it/sec) -training >> step=272000, episode=46 reward=0.765727 (522.38 it/sec) -training >> step=272100, episode=46 reward=0.7450203 (541.02 it/sec) -training >> step=272200, episode=46 reward=0.7698679 (561.45 it/sec) -training >> step=272300, episode=46 reward=0.7694334 (559.49 it/sec) -training >> step=272400, episode=46 reward=0.7736384 (572.20 it/sec) -training >> step=272500, episode=46 reward=0.7301491 (551.82 it/sec) -training >> step=272600, episode=46 reward=0.7653508 (518.21 it/sec) -training >> step=272700, episode=46 reward=0.7680382 (568.89 it/sec) -training >> step=272800, episode=46 reward=0.7674254 (557.99 it/sec) -training >> step=272900, episode=46 reward=0.7720283 (564.81 it/sec) -training >> step=273000, episode=46 reward=0.7594249 (579.75 it/sec) -training >> step=273100, episode=46 reward=0.7438455 (515.73 it/sec) -training >> step=273200, episode=46 reward=0.7506941 (550.72 it/sec) -training >> step=273300, episode=46 reward=0.7599126 (542.24 it/sec) -training >> step=273400, episode=46 reward=0.7470824 (524.44 it/sec) -training >> step=273500, episode=46 reward=0.7665783 (441.67 it/sec) -training >> step=273600, episode=46 reward=0.7645042 (572.23 it/sec) -training >> step=273700, episode=46 reward=0.7277514 (532.67 it/sec) -training >> step=273800, episode=46 reward=0.7608858 (513.33 it/sec) -training >> step=273900, episode=46 reward=0.748472 (511.32 it/sec) -training >> step=274000, episode=46 reward=0.7615492 (547.36 it/sec) -training >> step=274100, episode=46 reward=0.7553399 (588.49 it/sec) -training >> step=274200, episode=46 reward=0.7594228 (546.29 it/sec) -training >> step=274300, episode=46 reward=0.7455316 (564.71 it/sec) -training >> step=274400, episode=46 reward=0.7518897 (576.72 it/sec) -training >> step=274500, episode=46 reward=0.789301 (568.36 it/sec) -training >> step=274600, episode=46 reward=0.7372344 (552.11 it/sec) -training >> step=274700, episode=46 reward=0.7569845 (558.12 it/sec) -training >> step=274800, episode=46 reward=0.7327173 (523.94 it/sec) -training >> step=274900, episode=46 reward=0.761808 (552.14 it/sec) -training >> step=275000, episode=46 reward=0.770542 (575.40 it/sec) -training >> step=275100, episode=46 reward=0.7505367 (552.62 it/sec) -training >> step=275200, episode=46 reward=0.7388136 (550.71 it/sec) -training >> step=275300, episode=47 reward=0.759598 (151.78 it/sec) -training >> step=275400, episode=47 reward=0.7208028 (560.43 it/sec) -training >> step=275500, episode=47 reward=0.7548957 (579.88 it/sec) -training >> step=275600, episode=47 reward=0.7432442 (560.42 it/sec) -training >> step=275700, episode=47 reward=0.7522725 (520.22 it/sec) -training >> step=275800, episode=47 reward=0.759167 (522.53 it/sec) -training >> step=275900, episode=47 reward=0.7731766 (557.46 it/sec) -training >> step=276000, episode=47 reward=0.7431111 (556.88 it/sec) -training >> step=276100, episode=47 reward=0.785605 (571.13 it/sec) -training >> step=276200, episode=47 reward=0.7849174 (523.51 it/sec) -training >> step=276300, episode=47 reward=0.7495102 (536.03 it/sec) -training >> step=276400, episode=47 reward=0.7586521 (564.98 it/sec) -training >> step=276500, episode=47 reward=0.7523885 (563.23 it/sec) -training >> step=276600, episode=47 reward=0.7800656 (505.56 it/sec) -training >> step=276700, episode=47 reward=0.7730021 (586.11 it/sec) -training >> step=276800, episode=47 reward=0.7662534 (538.33 it/sec) -training >> step=276900, episode=47 reward=0.7476923 (505.34 it/sec) -training >> step=277000, episode=47 reward=0.7678869 (530.15 it/sec) -training >> step=277100, episode=47 reward=0.7691012 (547.64 it/sec) -training >> step=277200, episode=47 reward=0.7790201 (548.17 it/sec) -training >> step=277300, episode=47 reward=0.769569 (525.06 it/sec) -training >> step=277400, episode=47 reward=0.7619765 (529.01 it/sec) -training >> step=277500, episode=47 reward=0.7409658 (566.48 it/sec) -training >> step=277600, episode=47 reward=0.7734852 (584.48 it/sec) -training >> step=277700, episode=47 reward=0.750419 (537.39 it/sec) -training >> step=277800, episode=47 reward=0.7607156 (584.19 it/sec) -training >> step=277900, episode=47 reward=0.7492878 (532.41 it/sec) -training >> step=278000, episode=47 reward=0.7738031 (546.65 it/sec) -training >> step=278100, episode=47 reward=0.7589856 (566.53 it/sec) -training >> step=278200, episode=47 reward=0.7567953 (565.36 it/sec) -training >> step=278300, episode=47 reward=0.7563472 (551.77 it/sec) -training >> step=278400, episode=47 reward=0.7301021 (562.16 it/sec) -training >> step=278500, episode=47 reward=0.7547678 (525.80 it/sec) -training >> step=278600, episode=47 reward=0.7814544 (550.41 it/sec) -training >> step=278700, episode=47 reward=0.7583669 (550.94 it/sec) -training >> step=278800, episode=47 reward=0.7563294 (559.39 it/sec) -training >> step=278900, episode=47 reward=0.767294 (576.11 it/sec) -training >> step=279000, episode=47 reward=0.7738793 (558.05 it/sec) -training >> step=279100, episode=47 reward=0.7463419 (550.50 it/sec) -training >> step=279200, episode=47 reward=0.7438562 (573.99 it/sec) -training >> step=279300, episode=47 reward=0.7630197 (558.31 it/sec) -training >> step=279400, episode=47 reward=0.7635427 (546.76 it/sec) -training >> step=279500, episode=47 reward=0.7274461 (552.60 it/sec) -training >> step=279600, episode=47 reward=0.7664527 (542.01 it/sec) -training >> step=279700, episode=47 reward=0.7392244 (415.70 it/sec) -training >> step=279800, episode=47 reward=0.769533 (574.82 it/sec) -training >> step=279900, episode=47 reward=0.7681477 (536.58 it/sec) -training >> step=280000, episode=47 reward=0.7486764 (541.88 it/sec) -training >> step=280100, episode=47 reward=0.7588218 (560.60 it/sec) -training >> step=280200, episode=47 reward=0.7508025 (518.91 it/sec) -training >> step=280300, episode=47 reward=0.7536968 (567.23 it/sec) -training >> step=280400, episode=47 reward=0.7407327 (583.49 it/sec) -training >> step=280500, episode=47 reward=0.7334577 (536.86 it/sec) -training >> step=280600, episode=47 reward=0.7365662 (544.20 it/sec) -training >> step=280700, episode=47 reward=0.7386027 (554.33 it/sec) -training >> step=280800, episode=47 reward=0.7699615 (557.38 it/sec) -training >> step=280900, episode=47 reward=0.7340224 (579.09 it/sec) -training >> step=281000, episode=47 reward=0.7363902 (573.17 it/sec) -training >> step=281100, episode=47 reward=0.7363134 (581.78 it/sec) -training >> step=281200, episode=47 reward=0.7230848 (561.17 it/sec) -training >> step=281300, episode=48 reward=0.7495183 (148.55 it/sec) -training >> step=281400, episode=48 reward=0.7610757 (544.78 it/sec) -training >> step=281500, episode=48 reward=0.7343112 (529.21 it/sec) -training >> step=281600, episode=48 reward=0.7423859 (519.09 it/sec) -training >> step=281700, episode=48 reward=0.7454401 (530.76 it/sec) -training >> step=281800, episode=48 reward=0.7678956 (595.52 it/sec) -training >> step=281900, episode=48 reward=0.778062 (536.22 it/sec) -training >> step=282000, episode=48 reward=0.753873 (587.00 it/sec) -training >> step=282100, episode=48 reward=0.7537108 (559.96 it/sec) -training >> step=282200, episode=48 reward=0.7522685 (469.13 it/sec) -training >> step=282300, episode=48 reward=0.7698147 (555.69 it/sec) -training >> step=282400, episode=48 reward=0.7545394 (529.82 it/sec) -training >> step=282500, episode=48 reward=0.7721848 (517.52 it/sec) -training >> step=282600, episode=48 reward=0.7658189 (566.13 it/sec) -training >> step=282700, episode=48 reward=0.7537063 (544.74 it/sec) -training >> step=282800, episode=48 reward=0.77108 (563.71 it/sec) -training >> step=282900, episode=48 reward=0.7769454 (569.38 it/sec) -training >> step=283000, episode=48 reward=0.7670286 (564.23 it/sec) -training >> step=283100, episode=48 reward=0.7576178 (569.78 it/sec) -training >> step=283200, episode=48 reward=0.7642432 (550.71 it/sec) -training >> step=283300, episode=48 reward=0.7501245 (533.76 it/sec) -training >> step=283400, episode=48 reward=0.7638534 (538.39 it/sec) -training >> step=283500, episode=48 reward=0.7591377 (560.19 it/sec) -training >> step=283600, episode=48 reward=0.738868 (531.74 it/sec) -training >> step=283700, episode=48 reward=0.7560212 (582.09 it/sec) -training >> step=283800, episode=48 reward=0.7539066 (528.59 it/sec) -training >> step=283900, episode=48 reward=0.7517613 (392.32 it/sec) -training >> step=284000, episode=48 reward=0.7727931 (493.92 it/sec) -training >> step=284100, episode=48 reward=0.7588984 (472.02 it/sec) -training >> step=284200, episode=48 reward=0.7709957 (530.60 it/sec) -training >> step=284300, episode=48 reward=0.7827601 (463.59 it/sec) -training >> step=284400, episode=48 reward=0.7593181 (429.62 it/sec) -training >> step=284500, episode=48 reward=0.7533088 (549.52 it/sec) -training >> step=284600, episode=48 reward=0.7559748 (520.81 it/sec) -training >> step=284700, episode=48 reward=0.7507457 (516.03 it/sec) -training >> step=284800, episode=48 reward=0.779393 (549.46 it/sec) -training >> step=284900, episode=48 reward=0.7698794 (447.84 it/sec) -training >> step=285000, episode=48 reward=0.759588 (269.58 it/sec) -training >> step=285100, episode=48 reward=0.7655641 (333.34 it/sec) -training >> step=285200, episode=48 reward=0.7299847 (226.27 it/sec) -training >> step=285300, episode=48 reward=0.7851436 (295.87 it/sec) -training >> step=285400, episode=48 reward=0.7736933 (450.50 it/sec) -training >> step=285500, episode=48 reward=0.756444 (498.53 it/sec) -training >> step=285600, episode=48 reward=0.7773949 (534.15 it/sec) -training >> step=285700, episode=48 reward=0.7642055 (435.32 it/sec) -training >> step=285800, episode=48 reward=0.7653005 (523.02 it/sec) -training >> step=285900, episode=48 reward=0.7631481 (580.91 it/sec) -training >> step=286000, episode=48 reward=0.7700785 (553.28 it/sec) -training >> step=286100, episode=48 reward=0.7481525 (526.29 it/sec) -training >> step=286200, episode=48 reward=0.7607782 (521.05 it/sec) -training >> step=286300, episode=48 reward=0.7497666 (554.60 it/sec) -training >> step=286400, episode=48 reward=0.7585505 (558.15 it/sec) -training >> step=286500, episode=48 reward=0.7457314 (555.79 it/sec) -training >> step=286600, episode=48 reward=0.7612031 (538.45 it/sec) -training >> step=286700, episode=48 reward=0.7438449 (553.26 it/sec) -training >> step=286800, episode=48 reward=0.733758 (558.24 it/sec) -training >> step=286900, episode=48 reward=0.7593467 (501.39 it/sec) -training >> step=287000, episode=48 reward=0.737062 (542.03 it/sec) -training >> step=287100, episode=48 reward=0.720991 (466.28 it/sec) -training >> step=287200, episode=48 reward=0.7387288 (483.71 it/sec) -training >> step=287300, episode=49 reward=0.7551569 (346.99 it/sec) -training >> step=287400, episode=49 reward=0.7342269 (504.40 it/sec) -training >> step=287500, episode=49 reward=0.7706139 (560.25 it/sec) -training >> step=287600, episode=49 reward=0.7658108 (510.74 it/sec) -training >> step=287700, episode=49 reward=0.7661914 (463.88 it/sec) -training >> step=287800, episode=49 reward=0.742689 (574.15 it/sec) -training >> step=287900, episode=49 reward=0.7640626 (514.17 it/sec) -training >> step=288000, episode=49 reward=0.7659688 (544.14 it/sec) -training >> step=288100, episode=49 reward=0.7499105 (567.34 it/sec) -training >> step=288200, episode=49 reward=0.7555047 (493.70 it/sec) -training >> step=288300, episode=49 reward=0.7391959 (554.13 it/sec) -training >> step=288400, episode=49 reward=0.7525627 (515.81 it/sec) -training >> step=288500, episode=49 reward=0.7469487 (551.02 it/sec) -training >> step=288600, episode=49 reward=0.7557012 (586.39 it/sec) -training >> step=288700, episode=49 reward=0.7663 (543.05 it/sec) -training >> step=288800, episode=49 reward=0.7813492 (526.20 it/sec) -training >> step=288900, episode=49 reward=0.7653552 (534.26 it/sec) -training >> step=289000, episode=49 reward=0.7483063 (542.62 it/sec) -training >> step=289100, episode=49 reward=0.7499604 (561.17 it/sec) -training >> step=289200, episode=49 reward=0.7495102 (574.13 it/sec) -training >> step=289300, episode=49 reward=0.7641633 (537.16 it/sec) -training >> step=289400, episode=49 reward=0.7584519 (545.30 it/sec) -training >> step=289500, episode=49 reward=0.738699 (537.73 it/sec) -training >> step=289600, episode=49 reward=0.7699522 (544.13 it/sec) -training >> step=289700, episode=49 reward=0.7431009 (553.13 it/sec) -training >> step=289800, episode=49 reward=0.757496 (535.55 it/sec) -training >> step=289900, episode=49 reward=0.772155 (532.12 it/sec) -training >> step=290000, episode=49 reward=0.7530959 (489.56 it/sec) -training >> step=290100, episode=49 reward=0.7699654 (527.47 it/sec) -training >> step=290200, episode=49 reward=0.7867745 (552.83 it/sec) -training >> step=290300, episode=49 reward=0.7599568 (524.12 it/sec) -training >> step=290400, episode=49 reward=0.7559239 (541.83 it/sec) -training >> step=290500, episode=49 reward=0.7635939 (551.39 it/sec) -training >> step=290600, episode=49 reward=0.7450218 (559.45 it/sec) -training >> step=290700, episode=49 reward=0.7465093 (544.65 it/sec) -training >> step=290800, episode=49 reward=0.7651375 (563.21 it/sec) -training >> step=290900, episode=49 reward=0.7522168 (593.86 it/sec) -training >> step=291000, episode=49 reward=0.7680816 (540.84 it/sec) -training >> step=291100, episode=49 reward=0.7697709 (526.28 it/sec) -training >> step=291200, episode=49 reward=0.7705109 (586.89 it/sec) -training >> step=291300, episode=49 reward=0.7670529 (565.01 it/sec) -training >> step=291400, episode=49 reward=0.7596982 (562.22 it/sec) -training >> step=291500, episode=49 reward=0.7417287 (555.38 it/sec) -training >> step=291600, episode=49 reward=0.762004 (531.23 it/sec) -training >> step=291700, episode=49 reward=0.7744648 (584.66 it/sec) -training >> step=291800, episode=49 reward=0.7483934 (562.42 it/sec) -training >> step=291900, episode=49 reward=0.7519426 (550.32 it/sec) -training >> step=292000, episode=49 reward=0.7253345 (569.27 it/sec) -training >> step=292100, episode=49 reward=0.7452882 (502.31 it/sec) -training >> step=292200, episode=49 reward=0.7484896 (533.61 it/sec) -training >> step=292300, episode=49 reward=0.7692539 (376.11 it/sec) -training >> step=292400, episode=49 reward=0.7614721 (483.94 it/sec) -training >> step=292500, episode=49 reward=0.7518961 (534.16 it/sec) -training >> step=292600, episode=49 reward=0.7426804 (507.93 it/sec) -training >> step=292700, episode=49 reward=0.7301857 (495.92 it/sec) -training >> step=292800, episode=49 reward=0.7216252 (522.52 it/sec) -training >> step=292900, episode=49 reward=0.751729 (508.53 it/sec) -training >> step=293000, episode=49 reward=0.7233653 (556.35 it/sec) -training >> step=293100, episode=49 reward=0.7300282 (554.17 it/sec) -training >> step=293200, episode=49 reward=0.7332085 (496.81 it/sec) -training >> step=293300, episode=50 reward=0.7144076 (146.39 it/sec) -training >> step=293400, episode=50 reward=0.770326 (541.51 it/sec) -training >> step=293500, episode=50 reward=0.746924 (480.75 it/sec) -training >> step=293600, episode=50 reward=0.7692074 (525.44 it/sec) -training >> step=293700, episode=50 reward=0.7365476 (525.32 it/sec) -training >> step=293800, episode=50 reward=0.7674181 (537.30 it/sec) -training >> step=293900, episode=50 reward=0.7398701 (548.66 it/sec) -training >> step=294000, episode=50 reward=0.7529069 (517.34 it/sec) -training >> step=294100, episode=50 reward=0.7337769 (538.73 it/sec) -training >> step=294200, episode=50 reward=0.7419772 (530.39 it/sec) -training >> step=294300, episode=50 reward=0.7740775 (532.69 it/sec) -training >> step=294400, episode=50 reward=0.7741506 (553.92 it/sec) -training >> step=294500, episode=50 reward=0.7473717 (526.44 it/sec) -training >> step=294600, episode=50 reward=0.7483571 (542.36 it/sec) -training >> step=294700, episode=50 reward=0.7709225 (562.47 it/sec) -training >> step=294800, episode=50 reward=0.7552056 (541.32 it/sec) -training >> step=294900, episode=50 reward=0.7604186 (574.92 it/sec) -training >> step=295000, episode=50 reward=0.7793398 (538.67 it/sec) -training >> step=295100, episode=50 reward=0.7665324 (514.98 it/sec) -training >> step=295200, episode=50 reward=0.7434097 (524.63 it/sec) -training >> step=295300, episode=50 reward=0.7796926 (483.86 it/sec) -training >> step=295400, episode=50 reward=0.7499305 (517.54 it/sec) -training >> step=295500, episode=50 reward=0.7419144 (512.07 it/sec) -training >> step=295600, episode=50 reward=0.7551279 (487.87 it/sec) -training >> step=295700, episode=50 reward=0.7736642 (507.89 it/sec) -training >> step=295800, episode=50 reward=0.7496099 (479.30 it/sec) -training >> step=295900, episode=50 reward=0.7500849 (429.15 it/sec) -training >> step=296000, episode=50 reward=0.7807974 (554.89 it/sec) -training >> step=296100, episode=50 reward=0.7337739 (522.08 it/sec) -training >> step=296200, episode=50 reward=0.7643865 (552.64 it/sec) -training >> step=296300, episode=50 reward=0.7656464 (467.12 it/sec) -training >> step=296400, episode=50 reward=0.7452324 (538.03 it/sec) -training >> step=296500, episode=50 reward=0.7404596 (540.17 it/sec) -training >> step=296600, episode=50 reward=0.7658131 (503.94 it/sec) -training >> step=296700, episode=50 reward=0.7717109 (566.90 it/sec) -training >> step=296800, episode=50 reward=0.7486546 (561.73 it/sec) -training >> step=296900, episode=50 reward=0.7658079 (553.48 it/sec) -training >> step=297000, episode=50 reward=0.7907925 (543.91 it/sec) -training >> step=297100, episode=50 reward=0.7624246 (590.69 it/sec) -training >> step=297200, episode=50 reward=0.7462091 (509.29 it/sec) -training >> step=297300, episode=50 reward=0.7620625 (534.42 it/sec) -training >> step=297400, episode=50 reward=0.7363765 (485.73 it/sec) -training >> step=297500, episode=50 reward=0.7696843 (520.12 it/sec) -training >> step=297600, episode=50 reward=0.7523848 (576.29 it/sec) -training >> step=297700, episode=50 reward=0.7620861 (543.19 it/sec) -training >> step=297800, episode=50 reward=0.7685379 (532.34 it/sec) -training >> step=297900, episode=50 reward=0.7595551 (528.64 it/sec) -training >> step=298000, episode=50 reward=0.7504525 (511.88 it/sec) -training >> step=298100, episode=50 reward=0.7712022 (582.29 it/sec) -training >> step=298200, episode=50 reward=0.7555439 (550.57 it/sec) -training >> step=298300, episode=50 reward=0.7632715 (530.41 it/sec) -training >> step=298400, episode=50 reward=0.7556228 (334.70 it/sec) -training >> step=298500, episode=50 reward=0.7793993 (518.66 it/sec) -training >> step=298600, episode=50 reward=0.7317979 (574.94 it/sec) -training >> step=298700, episode=50 reward=0.7559193 (528.90 it/sec) -training >> step=298800, episode=50 reward=0.7546417 (523.80 it/sec) -training >> step=298900, episode=50 reward=0.719426 (539.75 it/sec) -training >> step=299000, episode=50 reward=0.729688 (566.41 it/sec) -training >> step=299100, episode=50 reward=0.7234977 (530.78 it/sec) -training >> step=299200, episode=50 reward=0.7321164 (572.30 it/sec) -training >> step=299300, episode=51 reward=0.7318972 (170.24 it/sec) -training >> step=299400, episode=51 reward=0.7751453 (559.29 it/sec) -training >> step=299500, episode=51 reward=0.7623537 (585.15 it/sec) -training >> step=299600, episode=51 reward=0.7498078 (554.47 it/sec) -training >> step=299700, episode=51 reward=0.7443405 (530.24 it/sec) -training >> step=299800, episode=51 reward=0.763211 (520.07 it/sec) -training >> step=299900, episode=51 reward=0.74883 (528.65 it/sec) -training >> step=300000, episode=51 reward=0.7490242 (553.69 it/sec) -training >> step=300100, episode=51 reward=0.7603056 (578.35 it/sec) -training >> step=300200, episode=51 reward=0.7476811 (541.52 it/sec) -training >> step=300300, episode=51 reward=0.7820672 (544.99 it/sec) -training >> step=300400, episode=51 reward=0.7502019 (543.67 it/sec) -training >> step=300500, episode=51 reward=0.7476882 (541.19 it/sec) -training >> step=300600, episode=51 reward=0.7751916 (569.74 it/sec) -training >> step=300700, episode=51 reward=0.7619569 (518.76 it/sec) -training >> step=300800, episode=51 reward=0.7673818 (557.62 it/sec) -training >> step=300900, episode=51 reward=0.7530091 (536.45 it/sec) -training >> step=301000, episode=51 reward=0.7373776 (545.82 it/sec) -training >> step=301100, episode=51 reward=0.768751 (579.69 it/sec) -training >> step=301200, episode=51 reward=0.7710924 (560.10 it/sec) -training >> step=301300, episode=51 reward=0.7906061 (558.32 it/sec) -training >> step=301400, episode=51 reward=0.7742292 (578.86 it/sec) -training >> step=301500, episode=51 reward=0.7503292 (525.40 it/sec) -training >> step=301600, episode=51 reward=0.7629178 (532.53 it/sec) -training >> step=301700, episode=51 reward=0.7528753 (578.25 it/sec) -training >> step=301800, episode=51 reward=0.7305056 (536.57 it/sec) -training >> step=301900, episode=51 reward=0.7531098 (579.07 it/sec) -training >> step=302000, episode=51 reward=0.7626551 (545.20 it/sec) -training >> step=302100, episode=51 reward=0.7771468 (535.56 it/sec) -training >> step=302200, episode=51 reward=0.7598851 (559.16 it/sec) -training >> step=302300, episode=51 reward=0.758945 (495.42 it/sec) -training >> step=302400, episode=51 reward=0.7673307 (545.48 it/sec) -training >> step=302500, episode=51 reward=0.757373 (573.77 it/sec) -training >> step=302600, episode=51 reward=0.7728298 (508.93 it/sec) -training >> step=302700, episode=51 reward=0.7513145 (543.71 it/sec) -training >> step=302800, episode=51 reward=0.7460528 (566.09 it/sec) -training >> step=302900, episode=51 reward=0.757386 (526.40 it/sec) -training >> step=303000, episode=51 reward=0.7408035 (538.60 it/sec) -training >> step=303100, episode=51 reward=0.7444803 (544.29 it/sec) -training >> step=303200, episode=51 reward=0.7356917 (570.25 it/sec) -training >> step=303300, episode=51 reward=0.7498271 (563.30 it/sec) -training >> step=303400, episode=51 reward=0.788571 (557.78 it/sec) -training >> step=303500, episode=51 reward=0.7595252 (586.13 it/sec) -training >> step=303600, episode=51 reward=0.7398088 (551.77 it/sec) -training >> step=303700, episode=51 reward=0.7690476 (568.86 it/sec) -training >> step=303800, episode=51 reward=0.7699109 (544.81 it/sec) -training >> step=303900, episode=51 reward=0.7557854 (574.45 it/sec) -training >> step=304000, episode=51 reward=0.7659972 (540.52 it/sec) -training >> step=304100, episode=51 reward=0.7670202 (562.35 it/sec) -training >> step=304200, episode=51 reward=0.7886569 (524.11 it/sec) -training >> step=304300, episode=51 reward=0.7493148 (489.27 it/sec) -training >> step=304400, episode=51 reward=0.7623739 (455.30 it/sec) -training >> step=304500, episode=51 reward=0.7443697 (533.59 it/sec) -training >> step=304600, episode=51 reward=0.7379289 (391.29 it/sec) -training >> step=304700, episode=51 reward=0.7208462 (419.92 it/sec) -training >> step=304800, episode=51 reward=0.7225984 (492.77 it/sec) -training >> step=304900, episode=51 reward=0.7300742 (451.42 it/sec) -training >> step=305000, episode=51 reward=0.7355694 (485.03 it/sec) -training >> step=305100, episode=51 reward=0.7366025 (473.89 it/sec) -training >> step=305200, episode=51 reward=0.7395451 (411.47 it/sec) -training >> step=305300, episode=52 reward=0.7583249 (158.87 it/sec) -training >> step=305400, episode=52 reward=0.7495518 (540.59 it/sec) -training >> step=305500, episode=52 reward=0.7494696 (522.54 it/sec) -training >> step=305600, episode=52 reward=0.7659606 (479.90 it/sec) -training >> step=305700, episode=52 reward=0.7358571 (499.07 it/sec) -training >> step=305800, episode=52 reward=0.740557 (499.00 it/sec) -training >> step=305900, episode=52 reward=0.7608663 (514.76 it/sec) -training >> step=306000, episode=52 reward=0.7642849 (494.22 it/sec) -training >> step=306100, episode=52 reward=0.7800227 (489.84 it/sec) -training >> step=306200, episode=52 reward=0.7618908 (504.19 it/sec) -training >> step=306300, episode=52 reward=0.7674813 (467.30 it/sec) -training >> step=306400, episode=52 reward=0.769504 (512.86 it/sec) -training >> step=306500, episode=52 reward=0.7601564 (550.57 it/sec) -training >> step=306600, episode=52 reward=0.7614787 (508.53 it/sec) -training >> step=306700, episode=52 reward=0.7698978 (536.71 it/sec) -training >> step=306800, episode=52 reward=0.7624699 (452.77 it/sec) -training >> step=306900, episode=52 reward=0.7609614 (491.55 it/sec) -training >> step=307000, episode=52 reward=0.7724582 (520.12 it/sec) -training >> step=307100, episode=52 reward=0.7362834 (391.70 it/sec) -training >> step=307200, episode=52 reward=0.7733785 (394.74 it/sec) -training >> step=307300, episode=52 reward=0.7865437 (369.86 it/sec) -training >> step=307400, episode=52 reward=0.7601367 (285.51 it/sec) -training >> step=307500, episode=52 reward=0.7743061 (319.29 it/sec) -training >> step=307600, episode=52 reward=0.752609 (342.35 it/sec) -training >> step=307700, episode=52 reward=0.7715083 (430.35 it/sec) -training >> step=307800, episode=52 reward=0.7627481 (431.61 it/sec) -training >> step=307900, episode=52 reward=0.7564011 (457.30 it/sec) -training >> step=308000, episode=52 reward=0.7666473 (403.65 it/sec) -training >> step=308100, episode=52 reward=0.7464221 (338.30 it/sec) -training >> step=308200, episode=52 reward=0.7404682 (389.70 it/sec) -training >> step=308300, episode=52 reward=0.7635758 (446.46 it/sec) -training >> step=308400, episode=52 reward=0.7595549 (450.04 it/sec) -training >> step=308500, episode=52 reward=0.7413778 (419.12 it/sec) -training >> step=308600, episode=52 reward=0.7607777 (341.80 it/sec) -training >> step=308700, episode=52 reward=0.7549577 (395.83 it/sec) -training >> step=308800, episode=52 reward=0.7743864 (422.76 it/sec) -training >> step=308900, episode=52 reward=0.7885842 (405.39 it/sec) -training >> step=309000, episode=52 reward=0.7416524 (421.56 it/sec) -training >> step=309100, episode=52 reward=0.7512953 (438.94 it/sec) -training >> step=309200, episode=52 reward=0.7605851 (453.98 it/sec) -training >> step=309300, episode=52 reward=0.7416251 (413.68 it/sec) -training >> step=309400, episode=52 reward=0.7573619 (418.83 it/sec) -training >> step=309500, episode=52 reward=0.7520379 (472.67 it/sec) -training >> step=309600, episode=52 reward=0.75534 (476.67 it/sec) -training >> step=309700, episode=52 reward=0.7688804 (517.25 it/sec) -training >> step=309800, episode=52 reward=0.7623335 (469.09 it/sec) -training >> step=309900, episode=52 reward=0.7636086 (466.57 it/sec) -training >> step=310000, episode=52 reward=0.7466061 (457.05 it/sec) -training >> step=310100, episode=52 reward=0.7481575 (480.47 it/sec) -training >> step=310200, episode=52 reward=0.7525761 (508.23 it/sec) -training >> step=310300, episode=52 reward=0.7643193 (498.02 it/sec) -training >> step=310400, episode=52 reward=0.7737506 (529.31 it/sec) -training >> step=310500, episode=52 reward=0.740192 (537.88 it/sec) -training >> step=310600, episode=52 reward=0.7461596 (488.44 it/sec) -training >> step=310700, episode=52 reward=0.7551972 (466.28 it/sec) -training >> step=310800, episode=52 reward=0.7282104 (383.92 it/sec) -training >> step=310900, episode=52 reward=0.7599878 (466.48 it/sec) -training >> step=311000, episode=52 reward=0.7348793 (462.87 it/sec) -training >> step=311100, episode=52 reward=0.7583215 (440.91 it/sec) -training >> step=311200, episode=52 reward=0.7313679 (439.23 it/sec) -training >> step=311300, episode=53 reward=0.7628419 (141.24 it/sec) -training >> step=311400, episode=53 reward=0.7560936 (414.88 it/sec) -training >> step=311500, episode=53 reward=0.7500713 (395.85 it/sec) -training >> step=311600, episode=53 reward=0.7609259 (421.80 it/sec) -training >> step=311700, episode=53 reward=0.7560782 (414.37 it/sec) -training >> step=311800, episode=53 reward=0.7573695 (439.38 it/sec) -training >> step=311900, episode=53 reward=0.7660185 (456.57 it/sec) -training >> step=312000, episode=53 reward=0.7581699 (482.60 it/sec) -training >> step=312100, episode=53 reward=0.7443887 (517.14 it/sec) -training >> step=312200, episode=53 reward=0.7662065 (514.42 it/sec) -training >> step=312300, episode=53 reward=0.7670001 (459.04 it/sec) -training >> step=312400, episode=53 reward=0.7664391 (449.35 it/sec) -training >> step=312500, episode=53 reward=0.7411559 (489.37 it/sec) -training >> step=312600, episode=53 reward=0.737739 (472.24 it/sec) -training >> step=312700, episode=53 reward=0.772099 (504.84 it/sec) -training >> step=312800, episode=53 reward=0.7616872 (477.59 it/sec) -training >> step=312900, episode=53 reward=0.7607677 (468.50 it/sec) -training >> step=313000, episode=53 reward=0.7534782 (474.11 it/sec) -training >> step=313100, episode=53 reward=0.7684137 (474.95 it/sec) -training >> step=313200, episode=53 reward=0.7765777 (440.48 it/sec) -training >> step=313300, episode=53 reward=0.7555928 (453.74 it/sec) -training >> step=313400, episode=53 reward=0.7713368 (442.68 it/sec) -training >> step=313500, episode=53 reward=0.7510982 (444.29 it/sec) -training >> step=313600, episode=53 reward=0.7516966 (491.64 it/sec) -training >> step=313700, episode=53 reward=0.7411291 (473.30 it/sec) -training >> step=313800, episode=53 reward=0.7418399 (485.31 it/sec) -training >> step=313900, episode=53 reward=0.7655144 (484.48 it/sec) -training >> step=314000, episode=53 reward=0.7518163 (500.08 it/sec) -training >> step=314100, episode=53 reward=0.7327177 (507.03 it/sec) -training >> step=314200, episode=53 reward=0.7432026 (478.24 it/sec) -training >> step=314300, episode=53 reward=0.780429 (447.30 it/sec) -training >> step=314400, episode=53 reward=0.7520366 (485.14 it/sec) -training >> step=314500, episode=53 reward=0.7682934 (440.13 it/sec) -training >> step=314600, episode=53 reward=0.7446876 (479.83 it/sec) -training >> step=314700, episode=53 reward=0.7399755 (471.98 it/sec) -training >> step=314800, episode=53 reward=0.7571682 (472.60 it/sec) -training >> step=314900, episode=53 reward=0.7741379 (504.09 it/sec) -training >> step=315000, episode=53 reward=0.7439359 (507.01 it/sec) -training >> step=315100, episode=53 reward=0.742816 (437.49 it/sec) -training >> step=315200, episode=53 reward=0.7706354 (457.81 it/sec) -training >> step=315300, episode=53 reward=0.777755 (452.28 it/sec) -training >> step=315400, episode=53 reward=0.7713873 (469.00 it/sec) -training >> step=315500, episode=53 reward=0.7723126 (533.69 it/sec) -training >> step=315600, episode=53 reward=0.7484128 (483.88 it/sec) -training >> step=315700, episode=53 reward=0.7842492 (454.32 it/sec) -training >> step=315800, episode=53 reward=0.7780386 (478.30 it/sec) -training >> step=315900, episode=53 reward=0.7492511 (461.58 it/sec) -training >> step=316000, episode=53 reward=0.7569742 (492.20 it/sec) -training >> step=316100, episode=53 reward=0.7690697 (494.79 it/sec) -training >> step=316200, episode=53 reward=0.737033 (486.73 it/sec) -training >> step=316300, episode=53 reward=0.7727149 (500.10 it/sec) -training >> step=316400, episode=53 reward=0.7394367 (513.95 it/sec) -training >> step=316500, episode=53 reward=0.7543797 (495.34 it/sec) -training >> step=316600, episode=53 reward=0.7562656 (490.31 it/sec) -training >> step=316700, episode=53 reward=0.7667066 (519.60 it/sec) -training >> step=316800, episode=53 reward=0.7510715 (529.87 it/sec) -training >> step=316900, episode=53 reward=0.7527002 (543.99 it/sec) -training >> step=317000, episode=53 reward=0.7322871 (364.22 it/sec) -training >> step=317100, episode=53 reward=0.7604209 (491.12 it/sec) -training >> step=317200, episode=53 reward=0.7522165 (505.55 it/sec) -training >> step=317300, episode=54 reward=0.7552206 (127.63 it/sec) -training >> step=317400, episode=54 reward=0.7694663 (481.14 it/sec) -training >> step=317500, episode=54 reward=0.7511559 (515.33 it/sec) -training >> step=317600, episode=54 reward=0.7677189 (527.50 it/sec) -training >> step=317700, episode=54 reward=0.7678403 (502.66 it/sec) -training >> step=317800, episode=54 reward=0.7672457 (490.95 it/sec) -training >> step=317900, episode=54 reward=0.7436732 (511.18 it/sec) -training >> step=318000, episode=54 reward=0.7613453 (500.89 it/sec) -training >> step=318100, episode=54 reward=0.7496975 (518.43 it/sec) -training >> step=318200, episode=54 reward=0.7453743 (552.12 it/sec) -training >> step=318300, episode=54 reward=0.7537546 (457.18 it/sec) -training >> step=318400, episode=54 reward=0.7685299 (468.46 it/sec) -training >> step=318500, episode=54 reward=0.7462258 (505.98 it/sec) -training >> step=318600, episode=54 reward=0.7551498 (520.17 it/sec) -training >> step=318700, episode=54 reward=0.7624248 (521.66 it/sec) -training >> step=318800, episode=54 reward=0.7647852 (495.59 it/sec) -training >> step=318900, episode=54 reward=0.7788731 (504.81 it/sec) -training >> step=319000, episode=54 reward=0.7497764 (540.72 it/sec) -training >> step=319100, episode=54 reward=0.7732891 (524.78 it/sec) -training >> step=319200, episode=54 reward=0.7445942 (531.81 it/sec) -training >> step=319300, episode=54 reward=0.74599 (488.72 it/sec) -training >> step=319400, episode=54 reward=0.7641802 (497.76 it/sec) -training >> step=319500, episode=54 reward=0.7456228 (492.34 it/sec) -training >> step=319600, episode=54 reward=0.7793002 (557.72 it/sec) -training >> step=319700, episode=54 reward=0.7705634 (547.56 it/sec) -training >> step=319800, episode=54 reward=0.7813821 (506.38 it/sec) -training >> step=319900, episode=54 reward=0.7662597 (486.44 it/sec) -training >> step=320000, episode=54 reward=0.7548072 (517.15 it/sec) -training >> step=320100, episode=54 reward=0.7510694 (500.61 it/sec) -training >> step=320200, episode=54 reward=0.7625244 (530.19 it/sec) -training >> step=320300, episode=54 reward=0.7879978 (536.42 it/sec) -training >> step=320400, episode=54 reward=0.7803994 (463.52 it/sec) -training >> step=320500, episode=54 reward=0.7555531 (547.93 it/sec) -training >> step=320600, episode=54 reward=0.7570875 (508.38 it/sec) -training >> step=320700, episode=54 reward=0.7518402 (545.16 it/sec) -training >> step=320800, episode=54 reward=0.7592556 (572.06 it/sec) -training >> step=320900, episode=54 reward=0.7528456 (525.56 it/sec) -training >> step=321000, episode=54 reward=0.7564165 (487.83 it/sec) -training >> step=321100, episode=54 reward=0.7597789 (530.96 it/sec) -training >> step=321200, episode=54 reward=0.7670112 (499.23 it/sec) -training >> step=321300, episode=54 reward=0.759963 (533.06 it/sec) -training >> step=321400, episode=54 reward=0.7689595 (533.29 it/sec) -training >> step=321500, episode=54 reward=0.7740831 (484.63 it/sec) -training >> step=321600, episode=54 reward=0.7509143 (513.79 it/sec) -training >> step=321700, episode=54 reward=0.7734808 (512.07 it/sec) -training >> step=321800, episode=54 reward=0.753625 (535.00 it/sec) -training >> step=321900, episode=54 reward=0.7783909 (544.73 it/sec) -training >> step=322000, episode=54 reward=0.775656 (463.02 it/sec) -training >> step=322100, episode=54 reward=0.7768918 (542.07 it/sec) -training >> step=322200, episode=54 reward=0.7737266 (496.25 it/sec) -training >> step=322300, episode=54 reward=0.7523894 (554.78 it/sec) -training >> step=322400, episode=54 reward=0.7543631 (513.91 it/sec) -training >> step=322500, episode=54 reward=0.7536786 (503.89 it/sec) -training >> step=322600, episode=54 reward=0.7465165 (533.80 it/sec) -training >> step=322700, episode=54 reward=0.7593451 (519.13 it/sec) -training >> step=322800, episode=54 reward=0.7679062 (496.27 it/sec) -training >> step=322900, episode=54 reward=0.7670654 (540.53 it/sec) -training >> step=323000, episode=54 reward=0.748454 (544.93 it/sec) -training >> step=323100, episode=54 reward=0.728241 (481.03 it/sec) -training >> step=323200, episode=54 reward=0.7419192 (544.54 it/sec) -training >> step=323300, episode=55 reward=0.7226368 (72.96 it/sec) -training >> step=323400, episode=55 reward=0.7757414 (501.94 it/sec) -training >> step=323500, episode=55 reward=0.7644824 (470.39 it/sec) -training >> step=323600, episode=55 reward=0.760771 (487.35 it/sec) -training >> step=323700, episode=55 reward=0.7603136 (489.62 it/sec) -training >> step=323800, episode=55 reward=0.7429425 (523.68 it/sec) -training >> step=323900, episode=55 reward=0.7436232 (535.78 it/sec) -training >> step=324000, episode=55 reward=0.7595945 (552.60 it/sec) -training >> step=324100, episode=55 reward=0.7645448 (514.00 it/sec) -training >> step=324200, episode=55 reward=0.7624664 (535.83 it/sec) -training >> step=324300, episode=55 reward=0.7665175 (471.46 it/sec) -training >> step=324400, episode=55 reward=0.7717873 (525.72 it/sec) -training >> step=324500, episode=55 reward=0.7689039 (535.04 it/sec) -training >> step=324600, episode=55 reward=0.7629782 (496.56 it/sec) -training >> step=324700, episode=55 reward=0.7664694 (517.57 it/sec) -training >> step=324800, episode=55 reward=0.7711256 (495.62 it/sec) -training >> step=324900, episode=55 reward=0.7634445 (516.36 it/sec) -training >> step=325000, episode=55 reward=0.7621999 (505.31 it/sec) -training >> step=325100, episode=55 reward=0.7569503 (475.35 it/sec) -training >> step=325200, episode=55 reward=0.7422689 (519.69 it/sec) -training >> step=325300, episode=55 reward=0.7591562 (510.69 it/sec) -training >> step=325400, episode=55 reward=0.7690594 (509.09 it/sec) -training >> step=325500, episode=55 reward=0.7897356 (545.93 it/sec) -training >> step=325600, episode=55 reward=0.7625973 (515.66 it/sec) -training >> step=325700, episode=55 reward=0.788063 (511.19 it/sec) -training >> step=325800, episode=55 reward=0.7666114 (487.42 it/sec) -training >> step=325900, episode=55 reward=0.7653014 (525.53 it/sec) -training >> step=326000, episode=55 reward=0.7555755 (542.92 it/sec) -training >> step=326100, episode=55 reward=0.7501679 (486.09 it/sec) -training >> step=326200, episode=55 reward=0.7422837 (489.73 it/sec) -training >> step=326300, episode=55 reward=0.7632627 (525.75 it/sec) -training >> step=326400, episode=55 reward=0.7427079 (543.98 it/sec) -training >> step=326500, episode=55 reward=0.7600746 (505.83 it/sec) -training >> step=326600, episode=55 reward=0.7751434 (537.27 it/sec) -training >> step=326700, episode=55 reward=0.7778783 (510.08 it/sec) -training >> step=326800, episode=55 reward=0.7708544 (555.31 it/sec) -training >> step=326900, episode=55 reward=0.7576964 (511.59 it/sec) -training >> step=327000, episode=55 reward=0.7775719 (548.78 it/sec) -training >> step=327100, episode=55 reward=0.7591034 (521.02 it/sec) -training >> step=327200, episode=55 reward=0.7788908 (500.37 it/sec) -training >> step=327300, episode=55 reward=0.7620595 (502.02 it/sec) -training >> step=327400, episode=55 reward=0.736193 (483.04 it/sec) -training >> step=327500, episode=55 reward=0.7392814 (477.71 it/sec) -training >> step=327600, episode=55 reward=0.7577434 (553.49 it/sec) -training >> step=327700, episode=55 reward=0.7634823 (447.76 it/sec) -training >> step=327800, episode=55 reward=0.7746981 (484.37 it/sec) -training >> step=327900, episode=55 reward=0.7627235 (504.39 it/sec) -training >> step=328000, episode=55 reward=0.7632731 (510.59 it/sec) -training >> step=328100, episode=55 reward=0.7613866 (548.32 it/sec) -training >> step=328200, episode=55 reward=0.7459499 (519.49 it/sec) -training >> step=328300, episode=55 reward=0.752672 (479.12 it/sec) -training >> step=328400, episode=55 reward=0.7602673 (481.45 it/sec) -training >> step=328500, episode=55 reward=0.7302659 (506.34 it/sec) -training >> step=328600, episode=55 reward=0.755134 (533.04 it/sec) -training >> step=328700, episode=55 reward=0.7587699 (537.09 it/sec) -training >> step=328800, episode=55 reward=0.754024 (495.63 it/sec) -training >> step=328900, episode=55 reward=0.7293313 (542.94 it/sec) -training >> step=329000, episode=55 reward=0.7709669 (498.79 it/sec) -training >> step=329100, episode=55 reward=0.746943 (489.21 it/sec) -training >> step=329200, episode=55 reward=0.760106 (509.53 it/sec) -training >> step=329300, episode=56 reward=0.7326184 (88.92 it/sec) -training >> step=329400, episode=56 reward=0.7526902 (517.83 it/sec) -training >> step=329500, episode=56 reward=0.7517279 (495.69 it/sec) -training >> step=329600, episode=56 reward=0.7632192 (502.94 it/sec) -training >> step=329700, episode=56 reward=0.7628339 (530.09 it/sec) -training >> step=329800, episode=56 reward=0.7584721 (475.01 it/sec) -training >> step=329900, episode=56 reward=0.7660794 (507.99 it/sec) -training >> step=330000, episode=56 reward=0.7529643 (522.73 it/sec) -training >> step=330100, episode=56 reward=0.7750707 (521.53 it/sec) -training >> step=330200, episode=56 reward=0.7499781 (527.37 it/sec) -training >> step=330300, episode=56 reward=0.771714 (504.70 it/sec) -training >> step=330400, episode=56 reward=0.7741737 (491.85 it/sec) -training >> step=330500, episode=56 reward=0.7804065 (519.40 it/sec) -training >> step=330600, episode=56 reward=0.7635118 (549.05 it/sec) -training >> step=330700, episode=56 reward=0.750191 (540.11 it/sec) -training >> step=330800, episode=56 reward=0.7562896 (507.71 it/sec) -training >> step=330900, episode=56 reward=0.7523996 (486.75 it/sec) -training >> step=331000, episode=56 reward=0.7642751 (546.07 it/sec) -training >> step=331100, episode=56 reward=0.7649895 (500.01 it/sec) -training >> step=331200, episode=56 reward=0.7555435 (545.21 it/sec) -training >> step=331300, episode=56 reward=0.7593025 (550.71 it/sec) -training >> step=331400, episode=56 reward=0.7677508 (512.73 it/sec) -training >> step=331500, episode=56 reward=0.7402475 (523.39 it/sec) -training >> step=331600, episode=56 reward=0.7637453 (512.51 it/sec) -training >> step=331700, episode=56 reward=0.7482905 (519.77 it/sec) -training >> step=331800, episode=56 reward=0.7362804 (546.18 it/sec) -training >> step=331900, episode=56 reward=0.7512541 (524.10 it/sec) -training >> step=332000, episode=56 reward=0.7656451 (505.54 it/sec) -training >> step=332100, episode=56 reward=0.7699646 (506.39 it/sec) -training >> step=332200, episode=56 reward=0.7693535 (516.50 it/sec) -training >> step=332300, episode=56 reward=0.7556294 (475.27 it/sec) -training >> step=332400, episode=56 reward=0.7454513 (491.62 it/sec) -training >> step=332500, episode=56 reward=0.7893497 (512.29 it/sec) -training >> step=332600, episode=56 reward=0.7857141 (491.77 it/sec) -training >> step=332700, episode=56 reward=0.760445 (528.25 it/sec) -training >> step=332800, episode=56 reward=0.7756439 (561.76 it/sec) -training >> step=332900, episode=56 reward=0.7458929 (514.15 it/sec) -training >> step=333000, episode=56 reward=0.7406118 (485.02 it/sec) -training >> step=333100, episode=56 reward=0.7396566 (507.79 it/sec) -training >> step=333200, episode=56 reward=0.762509 (514.60 it/sec) -training >> step=333300, episode=56 reward=0.7801358 (528.96 it/sec) -training >> step=333400, episode=56 reward=0.7640724 (526.58 it/sec) -training >> step=333500, episode=56 reward=0.7693193 (517.25 it/sec) -training >> step=333600, episode=56 reward=0.76968 (535.60 it/sec) -training >> step=333700, episode=56 reward=0.7808948 (485.13 it/sec) -training >> step=333800, episode=56 reward=0.7510537 (537.97 it/sec) -training >> step=333900, episode=56 reward=0.725079 (549.23 it/sec) -training >> step=334000, episode=56 reward=0.7486192 (499.52 it/sec) -training >> step=334100, episode=56 reward=0.7455983 (522.37 it/sec) -training >> step=334200, episode=56 reward=0.7515662 (496.25 it/sec) -training >> step=334300, episode=56 reward=0.7505133 (526.99 it/sec) -training >> step=334400, episode=56 reward=0.7487774 (513.53 it/sec) -training >> step=334500, episode=56 reward=0.7506396 (532.76 it/sec) -training >> step=334600, episode=56 reward=0.7474588 (542.40 it/sec) -training >> step=334700, episode=56 reward=0.7683143 (508.51 it/sec) -training >> step=334800, episode=56 reward=0.767716 (517.60 it/sec) -training >> step=334900, episode=56 reward=0.7673107 (509.53 it/sec) -training >> step=335000, episode=56 reward=0.7396674 (450.31 it/sec) -training >> step=335100, episode=56 reward=0.7562849 (514.54 it/sec) -training >> step=335200, episode=56 reward=0.7533345 (491.43 it/sec) -training >> step=335300, episode=57 reward=0.7434731 (69.48 it/sec) -training >> step=335400, episode=57 reward=0.7552567 (484.62 it/sec) -training >> step=335500, episode=57 reward=0.7651923 (484.26 it/sec) -training >> step=335600, episode=57 reward=0.7507887 (520.67 it/sec) -training >> step=335700, episode=57 reward=0.7178829 (538.89 it/sec) -training >> step=335800, episode=57 reward=0.7308001 (509.44 it/sec) -training >> step=335900, episode=57 reward=0.7597777 (541.96 it/sec) -training >> step=336000, episode=57 reward=0.7703428 (526.20 it/sec) -training >> step=336100, episode=57 reward=0.7451694 (562.05 it/sec) -training >> step=336200, episode=57 reward=0.754483 (510.48 it/sec) -training >> step=336300, episode=57 reward=0.7555489 (474.72 it/sec) -training >> step=336400, episode=57 reward=0.7726237 (541.55 it/sec) -training >> step=336500, episode=57 reward=0.7436517 (471.02 it/sec) -training >> step=336600, episode=57 reward=0.7547445 (533.83 it/sec) -training >> step=336700, episode=57 reward=0.7658285 (557.88 it/sec) -training >> step=336800, episode=57 reward=0.7685432 (478.25 it/sec) -training >> step=336900, episode=57 reward=0.7496758 (500.60 it/sec) -training >> step=337000, episode=57 reward=0.7897508 (563.24 it/sec) -training >> step=337100, episode=57 reward=0.7467329 (522.88 it/sec) -training >> step=337200, episode=57 reward=0.7610912 (522.05 it/sec) -training >> step=337300, episode=57 reward=0.7684026 (549.39 it/sec) -training >> step=337400, episode=57 reward=0.7670712 (508.76 it/sec) -training >> step=337500, episode=57 reward=0.7548913 (554.76 it/sec) -training >> step=337600, episode=57 reward=0.7829999 (529.78 it/sec) -training >> step=337700, episode=57 reward=0.7626484 (536.72 it/sec) -training >> step=337800, episode=57 reward=0.7739223 (535.81 it/sec) -training >> step=337900, episode=57 reward=0.7412007 (500.38 it/sec) -training >> step=338000, episode=57 reward=0.7813502 (565.46 it/sec) -training >> step=338100, episode=57 reward=0.7723635 (496.43 it/sec) -training >> step=338200, episode=57 reward=0.7695988 (471.86 it/sec) -training >> step=338300, episode=57 reward=0.7726334 (545.51 it/sec) -training >> step=338400, episode=57 reward=0.7419276 (521.17 it/sec) -training >> step=338500, episode=57 reward=0.7626788 (496.17 it/sec) -training >> step=338600, episode=57 reward=0.7536668 (523.90 it/sec) -training >> step=338700, episode=57 reward=0.7784037 (469.08 it/sec) -training >> step=338800, episode=57 reward=0.7370286 (534.07 it/sec) -training >> step=338900, episode=57 reward=0.76382 (485.60 it/sec) -training >> step=339000, episode=57 reward=0.76346 (465.45 it/sec) -training >> step=339100, episode=57 reward=0.769182 (558.42 it/sec) -training >> step=339200, episode=57 reward=0.7686446 (508.72 it/sec) -training >> step=339300, episode=57 reward=0.7384574 (560.48 it/sec) -training >> step=339400, episode=57 reward=0.7194527 (488.07 it/sec) -training >> step=339500, episode=57 reward=0.7450863 (523.46 it/sec) -training >> step=339600, episode=57 reward=0.758467 (537.70 it/sec) -training >> step=339700, episode=57 reward=0.7674202 (527.62 it/sec) -training >> step=339800, episode=57 reward=0.7616931 (511.65 it/sec) -training >> step=339900, episode=57 reward=0.7598178 (533.85 it/sec) -training >> step=340000, episode=57 reward=0.7554724 (462.93 it/sec) -training >> step=340100, episode=57 reward=0.7473649 (536.70 it/sec) -training >> step=340200, episode=57 reward=0.7353101 (506.70 it/sec) -training >> step=340300, episode=57 reward=0.7519749 (515.63 it/sec) -training >> step=340400, episode=57 reward=0.7452548 (477.41 it/sec) -training >> step=340500, episode=57 reward=0.7583193 (489.52 it/sec) -training >> step=340600, episode=57 reward=0.7555768 (507.68 it/sec) -training >> step=340700, episode=57 reward=0.7528471 (547.29 it/sec) -training >> step=340800, episode=57 reward=0.7334662 (517.01 it/sec) -training >> step=340900, episode=57 reward=0.7603341 (514.36 it/sec) -training >> step=341000, episode=57 reward=0.7367842 (502.33 it/sec) -training >> step=341100, episode=57 reward=0.7574438 (497.42 it/sec) -training >> step=341200, episode=57 reward=0.7587645 (577.73 it/sec) -training >> step=341300, episode=58 reward=0.7711897 (90.86 it/sec) -training >> step=341400, episode=58 reward=0.7471197 (509.05 it/sec) -training >> step=341500, episode=58 reward=0.7552818 (461.19 it/sec) -training >> step=341600, episode=58 reward=0.7547923 (512.55 it/sec) -training >> step=341700, episode=58 reward=0.7499422 (527.78 it/sec) -training >> step=341800, episode=58 reward=0.7549073 (551.19 it/sec) -training >> step=341900, episode=58 reward=0.755538 (492.95 it/sec) -training >> step=342000, episode=58 reward=0.749801 (503.65 it/sec) -training >> step=342100, episode=58 reward=0.7399088 (533.09 it/sec) -training >> step=342200, episode=58 reward=0.7432049 (545.92 it/sec) -training >> step=342300, episode=58 reward=0.7621489 (558.64 it/sec) -training >> step=342400, episode=58 reward=0.7492397 (496.20 it/sec) -training >> step=342500, episode=58 reward=0.7562317 (524.18 it/sec) -training >> step=342600, episode=58 reward=0.7526835 (553.62 it/sec) -training >> step=342700, episode=58 reward=0.7803825 (515.93 it/sec) -training >> step=342800, episode=58 reward=0.7475753 (552.91 it/sec) -training >> step=342900, episode=58 reward=0.7817992 (486.22 it/sec) -training >> step=343000, episode=58 reward=0.7627677 (494.70 it/sec) -training >> step=343100, episode=58 reward=0.7547099 (538.15 it/sec) -training >> step=343200, episode=58 reward=0.7399765 (494.84 it/sec) -training >> step=343300, episode=58 reward=0.7702562 (560.92 it/sec) -training >> step=343400, episode=58 reward=0.759454 (532.49 it/sec) -training >> step=343500, episode=58 reward=0.7608615 (480.07 it/sec) -training >> step=343600, episode=58 reward=0.7472618 (568.78 it/sec) -training >> step=343700, episode=58 reward=0.7733942 (495.11 it/sec) -training >> step=343800, episode=58 reward=0.7473875 (510.72 it/sec) -training >> step=343900, episode=58 reward=0.7890998 (494.83 it/sec) -training >> step=344000, episode=58 reward=0.7663333 (502.48 it/sec) -training >> step=344100, episode=58 reward=0.7632809 (476.59 it/sec) -training >> step=344200, episode=58 reward=0.7773015 (522.86 it/sec) -training >> step=344300, episode=58 reward=0.756338 (523.50 it/sec) -training >> step=344400, episode=58 reward=0.7674051 (584.85 it/sec) -training >> step=344500, episode=58 reward=0.7780101 (475.18 it/sec) -training >> step=344600, episode=58 reward=0.7433622 (514.01 it/sec) -training >> step=344700, episode=58 reward=0.7478028 (480.30 it/sec) -training >> step=344800, episode=58 reward=0.7548271 (515.76 it/sec) -training >> step=344900, episode=58 reward=0.7752752 (515.28 it/sec) -training >> step=345000, episode=58 reward=0.7727525 (490.31 it/sec) -training >> step=345100, episode=58 reward=0.7526265 (515.42 it/sec) -training >> step=345200, episode=58 reward=0.7625741 (505.42 it/sec) -training >> step=345300, episode=58 reward=0.7720997 (547.22 it/sec) -training >> step=345400, episode=58 reward=0.7508959 (541.64 it/sec) -training >> step=345500, episode=58 reward=0.7467647 (501.82 it/sec) -training >> step=345600, episode=58 reward=0.7372563 (526.38 it/sec) -training >> step=345700, episode=58 reward=0.770538 (504.62 it/sec) -training >> step=345800, episode=58 reward=0.7313058 (522.68 it/sec) -training >> step=345900, episode=58 reward=0.7539438 (559.91 it/sec) -training >> step=346000, episode=58 reward=0.7570202 (512.93 it/sec) -training >> step=346100, episode=58 reward=0.7373878 (519.13 it/sec) -training >> step=346200, episode=58 reward=0.7382227 (531.47 it/sec) -training >> step=346300, episode=58 reward=0.7468075 (521.44 it/sec) -training >> step=346400, episode=58 reward=0.7769147 (515.09 it/sec) -training >> step=346500, episode=58 reward=0.7650028 (506.54 it/sec) -training >> step=346600, episode=58 reward=0.7708776 (533.11 it/sec) -training >> step=346700, episode=58 reward=0.7714518 (517.38 it/sec) -training >> step=346800, episode=58 reward=0.7618761 (486.42 it/sec) -training >> step=346900, episode=58 reward=0.7497788 (491.45 it/sec) -training >> step=347000, episode=58 reward=0.7710345 (541.17 it/sec) -training >> step=347100, episode=58 reward=0.7748721 (495.52 it/sec) -training >> step=347200, episode=58 reward=0.7418867 (529.07 it/sec) -training >> step=347300, episode=59 reward=0.7590433 (85.78 it/sec) -training >> step=347400, episode=59 reward=0.7580495 (472.59 it/sec) -training >> step=347500, episode=59 reward=0.7304167 (492.45 it/sec) -training >> step=347600, episode=59 reward=0.785567 (483.38 it/sec) -training >> step=347700, episode=59 reward=0.7790703 (473.43 it/sec) -training >> step=347800, episode=59 reward=0.754338 (483.01 it/sec) -training >> step=347900, episode=59 reward=0.7766349 (524.40 it/sec) -training >> step=348000, episode=59 reward=0.7629531 (516.54 it/sec) -training >> step=348100, episode=59 reward=0.762412 (483.02 it/sec) -training >> step=348200, episode=59 reward=0.7598436 (556.80 it/sec) -training >> step=348300, episode=59 reward=0.7635422 (525.46 it/sec) -training >> step=348400, episode=59 reward=0.7574874 (485.78 it/sec) -training >> step=348500, episode=59 reward=0.7697496 (540.37 it/sec) -training >> step=348600, episode=59 reward=0.776566 (517.14 it/sec) -training >> step=348700, episode=59 reward=0.7326629 (504.65 it/sec) -training >> step=348800, episode=59 reward=0.7559608 (543.05 it/sec) -training >> step=348900, episode=59 reward=0.7701503 (487.19 it/sec) -training >> step=349000, episode=59 reward=0.7607656 (498.54 it/sec) -training >> step=349100, episode=59 reward=0.7589132 (492.18 it/sec) -training >> step=349200, episode=59 reward=0.7589981 (506.60 it/sec) -training >> step=349300, episode=59 reward=0.759626 (519.54 it/sec) -training >> step=349400, episode=59 reward=0.7511491 (493.23 it/sec) -training >> step=349500, episode=59 reward=0.791981 (468.16 it/sec) -training >> step=349600, episode=59 reward=0.7526412 (589.36 it/sec) -training >> step=349700, episode=59 reward=0.7416114 (529.51 it/sec) -training >> step=349800, episode=59 reward=0.7585098 (531.75 it/sec) -training >> step=349900, episode=59 reward=0.7634233 (479.24 it/sec) -training >> step=350000, episode=59 reward=0.7472391 (504.97 it/sec) -training >> step=350100, episode=59 reward=0.7425592 (576.33 it/sec) -training >> step=350200, episode=59 reward=0.7628806 (543.68 it/sec) -training >> step=350300, episode=59 reward=0.7738785 (523.26 it/sec) -training >> step=350400, episode=59 reward=0.7413927 (522.54 it/sec) -training >> step=350500, episode=59 reward=0.7725238 (484.93 it/sec) -training >> step=350600, episode=59 reward=0.7524635 (555.60 it/sec) -training >> step=350700, episode=59 reward=0.7672253 (529.99 it/sec) -training >> step=350800, episode=59 reward=0.7624813 (513.61 it/sec) -training >> step=350900, episode=59 reward=0.762088 (480.37 it/sec) -training >> step=351000, episode=59 reward=0.7717121 (493.22 it/sec) -training >> step=351100, episode=59 reward=0.7758605 (518.78 it/sec) -training >> step=351200, episode=59 reward=0.7578688 (489.74 it/sec) -training >> step=351300, episode=59 reward=0.754105 (516.36 it/sec) -training >> step=351400, episode=59 reward=0.7643363 (526.56 it/sec) -training >> step=351500, episode=59 reward=0.7539821 (500.13 it/sec) -training >> step=351600, episode=59 reward=0.7661797 (504.89 it/sec) -training >> step=351700, episode=59 reward=0.7464072 (508.11 it/sec) -training >> step=351800, episode=59 reward=0.7639478 (509.05 it/sec) -training >> step=351900, episode=59 reward=0.7715819 (516.73 it/sec) -training >> step=352000, episode=59 reward=0.7605804 (497.75 it/sec) -training >> step=352100, episode=59 reward=0.7606588 (507.03 it/sec) -training >> step=352200, episode=59 reward=0.7589163 (554.60 it/sec) -training >> step=352300, episode=59 reward=0.7606819 (554.35 it/sec) -training >> step=352400, episode=59 reward=0.781574 (487.01 it/sec) -training >> step=352500, episode=59 reward=0.7475178 (514.59 it/sec) -training >> step=352600, episode=59 reward=0.7615005 (448.12 it/sec) -training >> step=352700, episode=59 reward=0.7219127 (554.82 it/sec) -training >> step=352800, episode=59 reward=0.761332 (510.32 it/sec) -training >> step=352900, episode=59 reward=0.7405578 (536.05 it/sec) -training >> step=353000, episode=59 reward=0.7264995 (510.52 it/sec) -training >> step=353100, episode=59 reward=0.7366537 (488.57 it/sec) -training >> step=353200, episode=59 reward=0.7334554 (524.77 it/sec) -training >> step=353300, episode=60 reward=0.7683444 (90.67 it/sec) -training >> step=353400, episode=60 reward=0.7572643 (520.57 it/sec) -training >> step=353500, episode=60 reward=0.7612387 (537.91 it/sec) -training >> step=353600, episode=60 reward=0.7497192 (476.78 it/sec) -training >> step=353700, episode=60 reward=0.7587836 (515.30 it/sec) -training >> step=353800, episode=60 reward=0.7507439 (505.28 it/sec) -training >> step=353900, episode=60 reward=0.7482241 (521.81 it/sec) -training >> step=354000, episode=60 reward=0.7485885 (526.84 it/sec) -training >> step=354100, episode=60 reward=0.7509214 (502.39 it/sec) -training >> step=354200, episode=60 reward=0.7572694 (491.42 it/sec) -training >> step=354300, episode=60 reward=0.7749753 (514.89 it/sec) -training >> step=354400, episode=60 reward=0.7557725 (467.02 it/sec) -training >> step=354500, episode=60 reward=0.7673326 (494.26 it/sec) -training >> step=354600, episode=60 reward=0.7620023 (539.44 it/sec) -training >> step=354700, episode=60 reward=0.7614784 (531.86 it/sec) -training >> step=354800, episode=60 reward=0.7614881 (490.49 it/sec) -training >> step=354900, episode=60 reward=0.7641728 (503.23 it/sec) -training >> step=355000, episode=60 reward=0.7553424 (473.64 it/sec) -training >> step=355100, episode=60 reward=0.7540488 (498.75 it/sec) -training >> step=355200, episode=60 reward=0.7631376 (532.48 it/sec) -training >> step=355300, episode=60 reward=0.7364049 (505.06 it/sec) -training >> step=355400, episode=60 reward=0.7679275 (533.20 it/sec) -training >> step=355500, episode=60 reward=0.7532794 (524.10 it/sec) -training >> step=355600, episode=60 reward=0.7645667 (536.32 it/sec) -training >> step=355700, episode=60 reward=0.7613813 (492.72 it/sec) -training >> step=355800, episode=60 reward=0.7693795 (537.55 it/sec) -training >> step=355900, episode=60 reward=0.7570706 (508.41 it/sec) -training >> step=356000, episode=60 reward=0.7834518 (559.45 it/sec) -training >> step=356100, episode=60 reward=0.7640139 (482.31 it/sec) -training >> step=356200, episode=60 reward=0.744128 (521.08 it/sec) -training >> step=356300, episode=60 reward=0.787357 (518.23 it/sec) -training >> step=356400, episode=60 reward=0.7554699 (503.32 it/sec) -training >> step=356500, episode=60 reward=0.7448413 (520.77 it/sec) -training >> step=356600, episode=60 reward=0.7621536 (481.86 it/sec) -training >> step=356700, episode=60 reward=0.7351318 (511.43 it/sec) -training >> step=356800, episode=60 reward=0.7638696 (406.08 it/sec) -training >> step=356900, episode=60 reward=0.7671721 (376.21 it/sec) -training >> step=357000, episode=60 reward=0.735266 (401.24 it/sec) -training >> step=357100, episode=60 reward=0.7714449 (418.01 it/sec) -training >> step=357200, episode=60 reward=0.7581905 (432.89 it/sec) -training >> step=357300, episode=60 reward=0.7563645 (473.73 it/sec) -training >> step=357400, episode=60 reward=0.7334626 (502.35 it/sec) -training >> step=357500, episode=60 reward=0.7851557 (530.11 it/sec) -training >> step=357600, episode=60 reward=0.7427649 (487.90 it/sec) -training >> step=357700, episode=60 reward=0.7605472 (456.09 it/sec) -training >> step=357800, episode=60 reward=0.7566525 (460.80 it/sec) -training >> step=357900, episode=60 reward=0.7788159 (475.35 it/sec) -training >> step=358000, episode=60 reward=0.7406664 (483.85 it/sec) -training >> step=358100, episode=60 reward=0.7467194 (372.84 it/sec) -training >> step=358200, episode=60 reward=0.7604195 (458.05 it/sec) -training >> step=358300, episode=60 reward=0.7753938 (404.97 it/sec) -training >> step=358400, episode=60 reward=0.7616088 (503.80 it/sec) -training >> step=358500, episode=60 reward=0.7518009 (528.24 it/sec) -training >> step=358600, episode=60 reward=0.7480606 (461.68 it/sec) -training >> step=358700, episode=60 reward=0.7839895 (472.60 it/sec) -training >> step=358800, episode=60 reward=0.7502871 (449.63 it/sec) -training >> step=358900, episode=60 reward=0.7660171 (412.28 it/sec) -training >> step=359000, episode=60 reward=0.7380376 (403.84 it/sec) -training >> step=359100, episode=60 reward=0.7523976 (428.73 it/sec) -training >> step=359200, episode=60 reward=0.7459462 (476.79 it/sec) -training >> step=359300, episode=61 reward=0.7625877 (61.52 it/sec) -training >> step=359400, episode=61 reward=0.7412195 (474.01 it/sec) -training >> step=359500, episode=61 reward=0.7410844 (518.86 it/sec) -training >> step=359600, episode=61 reward=0.7770123 (488.80 it/sec) -training >> step=359700, episode=61 reward=0.7725269 (483.06 it/sec) -training >> step=359800, episode=61 reward=0.7621246 (499.14 it/sec) -training >> step=359900, episode=61 reward=0.7764872 (476.74 it/sec) -training >> step=360000, episode=61 reward=0.7475352 (495.55 it/sec) -training >> step=360100, episode=61 reward=0.7489632 (465.39 it/sec) -training >> step=360200, episode=61 reward=0.7475294 (396.92 it/sec) -training >> step=360300, episode=61 reward=0.754968 (393.16 it/sec) -training >> step=360400, episode=61 reward=0.7434661 (403.01 it/sec) -training >> step=360500, episode=61 reward=0.7339236 (475.34 it/sec) -training >> step=360600, episode=61 reward=0.7777275 (488.36 it/sec) -training >> step=360700, episode=61 reward=0.7808054 (439.96 it/sec) -training >> step=360800, episode=61 reward=0.7766199 (461.25 it/sec) -training >> step=360900, episode=61 reward=0.7556751 (511.46 it/sec) -training >> step=361000, episode=61 reward=0.7805459 (463.57 it/sec) -training >> step=361100, episode=61 reward=0.7676675 (514.83 it/sec) -training >> step=361200, episode=61 reward=0.7771904 (459.03 it/sec) -training >> step=361300, episode=61 reward=0.7709838 (454.51 it/sec) -training >> step=361400, episode=61 reward=0.7576391 (553.89 it/sec) -training >> step=361500, episode=61 reward=0.7429838 (458.44 it/sec) -training >> step=361600, episode=61 reward=0.7530984 (477.72 it/sec) -training >> step=361700, episode=61 reward=0.75491 (475.43 it/sec) -training >> step=361800, episode=61 reward=0.7676295 (474.94 it/sec) -training >> step=361900, episode=61 reward=0.7849369 (446.34 it/sec) -training >> step=362000, episode=61 reward=0.7680127 (458.73 it/sec) -training >> step=362100, episode=61 reward=0.7716948 (527.07 it/sec) -training >> step=362200, episode=61 reward=0.7467583 (441.25 it/sec) -training >> step=362300, episode=61 reward=0.7665549 (416.94 it/sec) -training >> step=362400, episode=61 reward=0.7661758 (533.85 it/sec) -training >> step=362500, episode=61 reward=0.7604775 (468.63 it/sec) -training >> step=362600, episode=61 reward=0.7313995 (495.65 it/sec) -training >> step=362700, episode=61 reward=0.7598209 (481.12 it/sec) -training >> step=362800, episode=61 reward=0.7603399 (460.36 it/sec) -training >> step=362900, episode=61 reward=0.7299061 (502.88 it/sec) -training >> step=363000, episode=61 reward=0.7753974 (446.40 it/sec) -training >> step=363100, episode=61 reward=0.7640156 (450.54 it/sec) -training >> step=363200, episode=61 reward=0.7599468 (471.84 it/sec) -training >> step=363300, episode=61 reward=0.7354029 (511.28 it/sec) -training >> step=363400, episode=61 reward=0.7577126 (464.72 it/sec) -training >> step=363500, episode=61 reward=0.7658197 (451.52 it/sec) -training >> step=363600, episode=61 reward=0.7687259 (396.28 it/sec) -training >> step=363700, episode=61 reward=0.7521602 (444.45 it/sec) -training >> step=363800, episode=61 reward=0.7520617 (501.37 it/sec) -training >> step=363900, episode=61 reward=0.7416819 (525.13 it/sec) -training >> step=364000, episode=61 reward=0.7582331 (448.94 it/sec) -training >> step=364100, episode=61 reward=0.7614337 (422.05 it/sec) -training >> step=364200, episode=61 reward=0.7626452 (480.46 it/sec) -training >> step=364300, episode=61 reward=0.7592666 (477.96 it/sec) -training >> step=364400, episode=61 reward=0.7595049 (459.63 it/sec) -training >> step=364500, episode=61 reward=0.7569913 (501.96 it/sec) -training >> step=364600, episode=61 reward=0.7551916 (513.41 it/sec) -training >> step=364700, episode=61 reward=0.7614115 (387.49 it/sec) -training >> step=364800, episode=61 reward=0.7323195 (451.23 it/sec) -training >> step=364900, episode=61 reward=0.7648984 (383.36 it/sec) -training >> step=365000, episode=61 reward=0.7375795 (506.82 it/sec) -training >> step=365100, episode=61 reward=0.7395022 (506.48 it/sec) -training >> step=365200, episode=61 reward=0.7505229 (490.16 it/sec) -training >> step=365300, episode=62 reward=0.7559305 (60.88 it/sec) -training >> step=365400, episode=62 reward=0.7588592 (484.69 it/sec) -training >> step=365500, episode=62 reward=0.7675509 (498.72 it/sec) -training >> step=365600, episode=62 reward=0.7666687 (503.41 it/sec) -training >> step=365700, episode=62 reward=0.7669393 (506.97 it/sec) -training >> step=365800, episode=62 reward=0.771099 (486.02 it/sec) -training >> step=365900, episode=62 reward=0.7569649 (533.37 it/sec) -training >> step=366000, episode=62 reward=0.7461543 (529.05 it/sec) -training >> step=366100, episode=62 reward=0.7587757 (528.28 it/sec) -training >> step=366200, episode=62 reward=0.7535824 (494.08 it/sec) -training >> step=366300, episode=62 reward=0.773612 (485.01 it/sec) -training >> step=366400, episode=62 reward=0.7552553 (511.93 it/sec) -training >> step=366500, episode=62 reward=0.7651632 (537.23 it/sec) -training >> step=366600, episode=62 reward=0.76492 (503.46 it/sec) -training >> step=366700, episode=62 reward=0.7768865 (524.30 it/sec) -training >> step=366800, episode=62 reward=0.7627899 (517.25 it/sec) -training >> step=366900, episode=62 reward=0.7779735 (511.07 it/sec) -training >> step=367000, episode=62 reward=0.7734591 (521.72 it/sec) -training >> step=367100, episode=62 reward=0.7189035 (497.16 it/sec) -training >> step=367200, episode=62 reward=0.7550305 (504.79 it/sec) -training >> step=367300, episode=62 reward=0.7642324 (529.00 it/sec) -training >> step=367400, episode=62 reward=0.7605153 (528.41 it/sec) -training >> step=367500, episode=62 reward=0.7716156 (514.92 it/sec) -training >> step=367600, episode=62 reward=0.7489869 (538.31 it/sec) -training >> step=367700, episode=62 reward=0.7622979 (534.67 it/sec) -training >> step=367800, episode=62 reward=0.7589587 (498.35 it/sec) -training >> step=367900, episode=62 reward=0.7417196 (553.18 it/sec) -training >> step=368000, episode=62 reward=0.7417452 (493.15 it/sec) -training >> step=368100, episode=62 reward=0.7441905 (507.07 it/sec) -training >> step=368200, episode=62 reward=0.7651962 (509.40 it/sec) -training >> step=368300, episode=62 reward=0.758961 (512.21 it/sec) -training >> step=368400, episode=62 reward=0.7825113 (475.61 it/sec) -training >> step=368500, episode=62 reward=0.7631258 (481.51 it/sec) -training >> step=368600, episode=62 reward=0.7640899 (494.60 it/sec) -training >> step=368700, episode=62 reward=0.7660851 (472.76 it/sec) -training >> step=368800, episode=62 reward=0.7349796 (529.97 it/sec) -training >> step=368900, episode=62 reward=0.7693907 (482.30 it/sec) -training >> step=369000, episode=62 reward=0.771623 (482.02 it/sec) -training >> step=369100, episode=62 reward=0.743249 (543.85 it/sec) -training >> step=369200, episode=62 reward=0.7796358 (527.00 it/sec) -training >> step=369300, episode=62 reward=0.7793097 (520.07 it/sec) -training >> step=369400, episode=62 reward=0.7676055 (486.26 it/sec) -training >> step=369500, episode=62 reward=0.7533048 (540.30 it/sec) -training >> step=369600, episode=62 reward=0.766346 (500.90 it/sec) -training >> step=369700, episode=62 reward=0.751792 (482.34 it/sec) -training >> step=369800, episode=62 reward=0.7653098 (410.49 it/sec) -training >> step=369900, episode=62 reward=0.776854 (422.57 it/sec) -training >> step=370000, episode=62 reward=0.7670856 (380.91 it/sec) -training >> step=370100, episode=62 reward=0.7445409 (439.77 it/sec) -training >> step=370200, episode=62 reward=0.7557846 (466.81 it/sec) -training >> step=370300, episode=62 reward=0.7609837 (475.09 it/sec) -training >> step=370400, episode=62 reward=0.7713126 (516.64 it/sec) -training >> step=370500, episode=62 reward=0.7952511 (445.02 it/sec) -training >> step=370600, episode=62 reward=0.7533458 (535.13 it/sec) -training >> step=370700, episode=62 reward=0.7577397 (538.41 it/sec) -training >> step=370800, episode=62 reward=0.7656803 (489.49 it/sec) -training >> step=370900, episode=62 reward=0.7246878 (553.05 it/sec) -training >> step=371000, episode=62 reward=0.7457407 (469.39 it/sec) -training >> step=371100, episode=62 reward=0.7585967 (504.69 it/sec) -training >> step=371200, episode=62 reward=0.7619136 (531.43 it/sec) -training >> step=371300, episode=63 reward=0.7638913 (73.37 it/sec) -training >> step=371400, episode=63 reward=0.731845 (481.23 it/sec) -training >> step=371500, episode=63 reward=0.7487988 (470.82 it/sec) -training >> step=371600, episode=63 reward=0.7528021 (536.06 it/sec) -training >> step=371700, episode=63 reward=0.7350776 (472.46 it/sec) -training >> step=371800, episode=63 reward=0.7543017 (548.68 it/sec) -training >> step=371900, episode=63 reward=0.7465276 (505.52 it/sec) -training >> step=372000, episode=63 reward=0.7362459 (479.19 it/sec) -training >> step=372100, episode=63 reward=0.7503393 (531.78 it/sec) -training >> step=372200, episode=63 reward=0.7640061 (546.58 it/sec) -training >> step=372300, episode=63 reward=0.7549224 (506.48 it/sec) -training >> step=372400, episode=63 reward=0.7760316 (508.27 it/sec) -training >> step=372500, episode=63 reward=0.7572173 (523.55 it/sec) -training >> step=372600, episode=63 reward=0.7649242 (481.64 it/sec) -training >> step=372700, episode=63 reward=0.7692254 (521.47 it/sec) -training >> step=372800, episode=63 reward=0.7594172 (508.61 it/sec) -training >> step=372900, episode=63 reward=0.7329858 (517.75 it/sec) -training >> step=373000, episode=63 reward=0.7814768 (472.89 it/sec) -training >> step=373100, episode=63 reward=0.7543078 (548.23 it/sec) -training >> step=373200, episode=63 reward=0.7368425 (501.72 it/sec) -training >> step=373300, episode=63 reward=0.7721127 (519.43 it/sec) -training >> step=373400, episode=63 reward=0.7624879 (548.98 it/sec) -training >> step=373500, episode=63 reward=0.7358511 (529.65 it/sec) -training >> step=373600, episode=63 reward=0.7519907 (496.05 it/sec) -training >> step=373700, episode=63 reward=0.7648095 (505.64 it/sec) -training >> step=373800, episode=63 reward=0.779925 (500.48 it/sec) -training >> step=373900, episode=63 reward=0.7640817 (539.58 it/sec) -training >> step=374000, episode=63 reward=0.7596031 (473.12 it/sec) -training >> step=374100, episode=63 reward=0.7618636 (511.38 it/sec) -training >> step=374200, episode=63 reward=0.7653829 (544.38 it/sec) -training >> step=374300, episode=63 reward=0.750102 (513.60 it/sec) -training >> step=374400, episode=63 reward=0.7836021 (555.50 it/sec) -training >> step=374500, episode=63 reward=0.7484648 (522.80 it/sec) -training >> step=374600, episode=63 reward=0.7598144 (447.31 it/sec) -training >> step=374700, episode=63 reward=0.7654654 (566.59 it/sec) -training >> step=374800, episode=63 reward=0.7676145 (518.82 it/sec) -training >> step=374900, episode=63 reward=0.7659013 (476.20 it/sec) -training >> step=375000, episode=63 reward=0.7559766 (521.43 it/sec) -training >> step=375100, episode=63 reward=0.7576622 (501.40 it/sec) -training >> step=375200, episode=63 reward=0.7603156 (541.41 it/sec) -training >> step=375300, episode=63 reward=0.7635288 (525.19 it/sec) -training >> step=375400, episode=63 reward=0.7614443 (518.95 it/sec) -training >> step=375500, episode=63 reward=0.7511252 (526.34 it/sec) -training >> step=375600, episode=63 reward=0.7647449 (497.66 it/sec) -training >> step=375700, episode=63 reward=0.7584583 (525.94 it/sec) -training >> step=375800, episode=63 reward=0.7754821 (472.11 it/sec) -training >> step=375900, episode=63 reward=0.7520891 (502.30 it/sec) -training >> step=376000, episode=63 reward=0.7751021 (549.25 it/sec) -training >> step=376100, episode=63 reward=0.7406145 (525.25 it/sec) -training >> step=376200, episode=63 reward=0.7535412 (469.82 it/sec) -training >> step=376300, episode=63 reward=0.7331402 (461.11 it/sec) -training >> step=376400, episode=63 reward=0.7679049 (461.11 it/sec) -training >> step=376500, episode=63 reward=0.7779558 (541.19 it/sec) -training >> step=376600, episode=63 reward=0.7637186 (510.15 it/sec) -training >> step=376700, episode=63 reward=0.7530375 (507.71 it/sec) -training >> step=376800, episode=63 reward=0.7849729 (548.89 it/sec) -training >> step=376900, episode=63 reward=0.7611077 (497.86 it/sec) -training >> step=377000, episode=63 reward=0.7430219 (541.99 it/sec) -training >> step=377100, episode=63 reward=0.7609847 (539.26 it/sec) -training >> step=377200, episode=63 reward=0.7270735 (507.92 it/sec) -training >> step=377300, episode=64 reward=0.77144 (90.18 it/sec) -training >> step=377400, episode=64 reward=0.7714936 (505.65 it/sec) -training >> step=377500, episode=64 reward=0.7450939 (498.82 it/sec) -training >> step=377600, episode=64 reward=0.7436241 (506.88 it/sec) -training >> step=377700, episode=64 reward=0.7688737 (529.93 it/sec) -training >> step=377800, episode=64 reward=0.7576312 (411.30 it/sec) -training >> step=377900, episode=64 reward=0.761584 (506.62 it/sec) -training >> step=378000, episode=64 reward=0.7865798 (487.89 it/sec) -training >> step=378100, episode=64 reward=0.7817845 (473.99 it/sec) -training >> step=378200, episode=64 reward=0.7580607 (421.80 it/sec) -training >> step=378300, episode=64 reward=0.7811763 (429.44 it/sec) -training >> step=378400, episode=64 reward=0.7465592 (451.16 it/sec) -training >> step=378500, episode=64 reward=0.7486577 (377.44 it/sec) -training >> step=378600, episode=64 reward=0.7747163 (364.37 it/sec) -training >> step=378700, episode=64 reward=0.743818 (364.50 it/sec) -training >> step=378800, episode=64 reward=0.7772726 (405.44 it/sec) -training >> step=378900, episode=64 reward=0.7530045 (451.19 it/sec) -training >> step=379000, episode=64 reward=0.7774977 (514.51 it/sec) -training >> step=379100, episode=64 reward=0.7854183 (472.68 it/sec) -training >> step=379200, episode=64 reward=0.7828568 (488.71 it/sec) -training >> step=379300, episode=64 reward=0.7638599 (456.46 it/sec) -training >> step=379400, episode=64 reward=0.7922088 (493.92 it/sec) -training >> step=379500, episode=64 reward=0.7393016 (450.49 it/sec) -training >> step=379600, episode=64 reward=0.7653283 (441.60 it/sec) -training >> step=379700, episode=64 reward=0.7576371 (406.39 it/sec) -training >> step=379800, episode=64 reward=0.7438654 (473.23 it/sec) -training >> step=379900, episode=64 reward=0.7643837 (398.53 it/sec) -training >> step=380000, episode=64 reward=0.7658551 (348.78 it/sec) -training >> step=380100, episode=64 reward=0.7797894 (478.26 it/sec) -training >> step=380200, episode=64 reward=0.7684655 (401.50 it/sec) -training >> step=380300, episode=64 reward=0.7786881 (567.24 it/sec) -training >> step=380400, episode=64 reward=0.7677022 (504.66 it/sec) -training >> step=380500, episode=64 reward=0.7554668 (501.39 it/sec) -training >> step=380600, episode=64 reward=0.7529572 (473.20 it/sec) -training >> step=380700, episode=64 reward=0.7613085 (456.67 it/sec) -training >> step=380800, episode=64 reward=0.7581856 (473.57 it/sec) -training >> step=380900, episode=64 reward=0.7631312 (462.59 it/sec) -training >> step=381000, episode=64 reward=0.7715282 (495.95 it/sec) -training >> step=381100, episode=64 reward=0.7629506 (521.94 it/sec) -training >> step=381200, episode=64 reward=0.7707124 (507.03 it/sec) -training >> step=381300, episode=64 reward=0.7481861 (493.52 it/sec) -training >> step=381400, episode=64 reward=0.7543616 (505.31 it/sec) -training >> step=381500, episode=64 reward=0.751708 (507.06 it/sec) -training >> step=381600, episode=64 reward=0.7654874 (497.47 it/sec) -training >> step=381700, episode=64 reward=0.7606204 (482.78 it/sec) -training >> step=381800, episode=64 reward=0.7441882 (518.90 it/sec) -training >> step=381900, episode=64 reward=0.7624743 (550.44 it/sec) -training >> step=382000, episode=64 reward=0.7570428 (509.55 it/sec) -training >> step=382100, episode=64 reward=0.7624516 (494.71 it/sec) -training >> step=382200, episode=64 reward=0.7733122 (518.64 it/sec) -training >> step=382300, episode=64 reward=0.7573421 (508.91 it/sec) -training >> step=382400, episode=64 reward=0.7693846 (516.02 it/sec) -training >> step=382500, episode=64 reward=0.7361286 (503.03 it/sec) -training >> step=382600, episode=64 reward=0.7675266 (450.11 it/sec) -training >> step=382700, episode=64 reward=0.7580522 (470.13 it/sec) -training >> step=382800, episode=64 reward=0.756878 (445.29 it/sec) -training >> step=382900, episode=64 reward=0.7517821 (455.19 it/sec) -training >> step=383000, episode=64 reward=0.7439273 (512.45 it/sec) -training >> step=383100, episode=64 reward=0.750981 (458.67 it/sec) -training >> step=383200, episode=64 reward=0.7624846 (504.04 it/sec) -training >> step=383300, episode=65 reward=0.7627316 (64.79 it/sec) -training >> step=383400, episode=65 reward=0.7665355 (447.53 it/sec) -training >> step=383500, episode=65 reward=0.7752494 (421.97 it/sec) -training >> step=383600, episode=65 reward=0.7552598 (479.17 it/sec) -training >> step=383700, episode=65 reward=0.7752163 (512.28 it/sec) -training >> step=383800, episode=65 reward=0.7513771 (539.39 it/sec) -training >> step=383900, episode=65 reward=0.7687866 (491.07 it/sec) -training >> step=384000, episode=65 reward=0.7534289 (539.81 it/sec) -training >> step=384100, episode=65 reward=0.7406133 (494.40 it/sec) -training >> step=384200, episode=65 reward=0.74419 (526.77 it/sec) -training >> step=384300, episode=65 reward=0.7489989 (521.70 it/sec) -training >> step=384400, episode=65 reward=0.7592987 (527.11 it/sec) -training >> step=384500, episode=65 reward=0.7617705 (495.01 it/sec) -training >> step=384600, episode=65 reward=0.7579934 (535.91 it/sec) -training >> step=384700, episode=65 reward=0.7651793 (498.23 it/sec) -training >> step=384800, episode=65 reward=0.7628089 (481.64 it/sec) -training >> step=384900, episode=65 reward=0.7366248 (539.57 it/sec) -training >> step=385000, episode=65 reward=0.7676895 (526.14 it/sec) -training >> step=385100, episode=65 reward=0.7689847 (500.25 it/sec) -training >> step=385200, episode=65 reward=0.7372829 (484.19 it/sec) -training >> step=385300, episode=65 reward=0.7625366 (539.03 it/sec) -training >> step=385400, episode=65 reward=0.7611176 (493.19 it/sec) -training >> step=385500, episode=65 reward=0.7601362 (494.30 it/sec) -training >> step=385600, episode=65 reward=0.7611501 (551.94 it/sec) -training >> step=385700, episode=65 reward=0.7528314 (496.08 it/sec) -training >> step=385800, episode=65 reward=0.7628507 (537.31 it/sec) -training >> step=385900, episode=65 reward=0.7731071 (547.47 it/sec) -training >> step=386000, episode=65 reward=0.764577 (534.65 it/sec) -training >> step=386100, episode=65 reward=0.7807174 (490.97 it/sec) -training >> step=386200, episode=65 reward=0.7414786 (462.70 it/sec) -training >> step=386300, episode=65 reward=0.7890966 (508.03 it/sec) -training >> step=386400, episode=65 reward=0.7772934 (535.97 it/sec) -training >> step=386500, episode=65 reward=0.7822474 (505.70 it/sec) -training >> step=386600, episode=65 reward=0.7926424 (531.38 it/sec) -training >> step=386700, episode=65 reward=0.7602729 (483.62 it/sec) -training >> step=386800, episode=65 reward=0.7438458 (510.75 it/sec) -training >> step=386900, episode=65 reward=0.7704156 (543.51 it/sec) -training >> step=387000, episode=65 reward=0.7508929 (544.23 it/sec) -training >> step=387100, episode=65 reward=0.7660682 (493.90 it/sec) -training >> step=387200, episode=65 reward=0.7685528 (508.93 it/sec) -training >> step=387300, episode=65 reward=0.7737101 (509.09 it/sec) -training >> step=387400, episode=65 reward=0.7639362 (520.62 it/sec) -training >> step=387500, episode=65 reward=0.7576576 (542.48 it/sec) -training >> step=387600, episode=65 reward=0.7487862 (550.07 it/sec) -training >> step=387700, episode=65 reward=0.763503 (499.65 it/sec) -training >> step=387800, episode=65 reward=0.7427384 (536.94 it/sec) -training >> step=387900, episode=65 reward=0.7691531 (464.84 it/sec) -training >> step=388000, episode=65 reward=0.7312737 (548.78 it/sec) -training >> step=388100, episode=65 reward=0.7506917 (532.77 it/sec) -training >> step=388200, episode=65 reward=0.72698 (496.69 it/sec) -training >> step=388300, episode=65 reward=0.7388355 (510.95 it/sec) -training >> step=388400, episode=65 reward=0.7803749 (488.04 it/sec) -training >> step=388500, episode=65 reward=0.7547447 (518.88 it/sec) -training >> step=388600, episode=65 reward=0.7453258 (538.66 it/sec) -training >> step=388700, episode=65 reward=0.7368715 (494.37 it/sec) -training >> step=388800, episode=65 reward=0.7644151 (510.99 it/sec) -training >> step=388900, episode=65 reward=0.747534 (522.31 it/sec) -training >> step=389000, episode=65 reward=0.7551028 (561.52 it/sec) -training >> step=389100, episode=65 reward=0.7549706 (494.05 it/sec) -training >> step=389200, episode=65 reward=0.7548796 (479.92 it/sec) -training >> step=389300, episode=66 reward=0.7718126 (208.20 it/sec) -training >> step=389400, episode=66 reward=0.7464455 (506.48 it/sec) -training >> step=389500, episode=66 reward=0.7406974 (504.88 it/sec) -training >> step=389600, episode=66 reward=0.7716809 (493.72 it/sec) -training >> step=389700, episode=66 reward=0.7602354 (539.11 it/sec) -training >> step=389800, episode=66 reward=0.7510114 (499.39 it/sec) -training >> step=389900, episode=66 reward=0.7462907 (476.03 it/sec) -training >> step=390000, episode=66 reward=0.7410274 (520.98 it/sec) -training >> step=390100, episode=66 reward=0.7671359 (491.34 it/sec) -training >> step=390200, episode=66 reward=0.7870105 (526.03 it/sec) -training >> step=390300, episode=66 reward=0.7899619 (488.55 it/sec) -training >> step=390400, episode=66 reward=0.7446917 (517.78 it/sec) -training >> step=390500, episode=66 reward=0.781252 (515.12 it/sec) -training >> step=390600, episode=66 reward=0.7507265 (529.16 it/sec) -training >> step=390700, episode=66 reward=0.7614293 (490.02 it/sec) -training >> step=390800, episode=66 reward=0.7564017 (501.79 it/sec) -training >> step=390900, episode=66 reward=0.7825537 (483.23 it/sec) -training >> step=391000, episode=66 reward=0.7314826 (532.16 it/sec) -training >> step=391100, episode=66 reward=0.7698026 (521.12 it/sec) -training >> step=391200, episode=66 reward=0.7753977 (505.27 it/sec) -training >> step=391300, episode=66 reward=0.7726938 (499.33 it/sec) -training >> step=391400, episode=66 reward=0.7415184 (495.77 it/sec) -training >> step=391500, episode=66 reward=0.7646423 (544.19 it/sec) -training >> step=391600, episode=66 reward=0.7770319 (508.47 it/sec) -training >> step=391700, episode=66 reward=0.7734237 (476.41 it/sec) -training >> step=391800, episode=66 reward=0.7535088 (525.09 it/sec) -training >> step=391900, episode=66 reward=0.7802689 (470.06 it/sec) -training >> step=392000, episode=66 reward=0.7615644 (531.49 it/sec) -training >> step=392100, episode=66 reward=0.77004 (521.22 it/sec) -training >> step=392200, episode=66 reward=0.7686226 (502.73 it/sec) -training >> step=392300, episode=66 reward=0.7670371 (488.22 it/sec) -training >> step=392400, episode=66 reward=0.7546595 (493.30 it/sec) -training >> step=392500, episode=66 reward=0.7712907 (549.35 it/sec) -training >> step=392600, episode=66 reward=0.7611831 (534.58 it/sec) -training >> step=392700, episode=66 reward=0.7813978 (505.78 it/sec) -training >> step=392800, episode=66 reward=0.7521436 (500.74 it/sec) -training >> step=392900, episode=66 reward=0.7713776 (468.16 it/sec) -training >> step=393000, episode=66 reward=0.7491714 (538.24 it/sec) -training >> step=393100, episode=66 reward=0.7731498 (560.52 it/sec) -training >> step=393200, episode=66 reward=0.7413694 (495.03 it/sec) -training >> step=393300, episode=66 reward=0.7469417 (518.10 it/sec) -training >> step=393400, episode=66 reward=0.7678726 (504.06 it/sec) -training >> step=393500, episode=66 reward=0.7699611 (525.73 it/sec) -training >> step=393600, episode=66 reward=0.7600059 (524.63 it/sec) -training >> step=393700, episode=66 reward=0.7746168 (505.84 it/sec) -training >> step=393800, episode=66 reward=0.7495709 (534.98 it/sec) -training >> step=393900, episode=66 reward=0.7749955 (501.16 it/sec) -training >> step=394000, episode=66 reward=0.7735637 (494.51 it/sec) -training >> step=394100, episode=66 reward=0.7645289 (556.10 it/sec) -training >> step=394200, episode=66 reward=0.7614454 (537.00 it/sec) -training >> step=394300, episode=66 reward=0.7584902 (516.20 it/sec) -training >> step=394400, episode=66 reward=0.7471482 (481.51 it/sec) -training >> step=394500, episode=66 reward=0.7507606 (439.45 it/sec) -training >> step=394600, episode=66 reward=0.7353864 (483.22 it/sec) -training >> step=394700, episode=66 reward=0.7321985 (422.82 it/sec) -training >> step=394800, episode=66 reward=0.7382627 (398.74 it/sec) -training >> step=394900, episode=66 reward=0.7303434 (445.79 it/sec) -training >> step=395000, episode=66 reward=0.7493202 (438.09 it/sec) -training >> step=395100, episode=66 reward=0.7457427 (521.36 it/sec) -training >> step=395200, episode=66 reward=0.7503796 (402.62 it/sec) -training >> step=395300, episode=67 reward=0.7476983 (117.53 it/sec) -training >> step=395400, episode=67 reward=0.7495105 (482.51 it/sec) -training >> step=395500, episode=67 reward=0.7617011 (502.25 it/sec) -training >> step=395600, episode=67 reward=0.7664165 (535.58 it/sec) -training >> step=395700, episode=67 reward=0.7577516 (517.37 it/sec) -training >> step=395800, episode=67 reward=0.7552994 (527.90 it/sec) -training >> step=395900, episode=67 reward=0.7648078 (526.86 it/sec) -training >> step=396000, episode=67 reward=0.7629741 (510.10 it/sec) -training >> step=396100, episode=67 reward=0.7649366 (573.62 it/sec) -training >> step=396200, episode=67 reward=0.7614951 (508.61 it/sec) -training >> step=396300, episode=67 reward=0.7474123 (580.24 it/sec) -training >> step=396400, episode=67 reward=0.7747929 (546.87 it/sec) -training >> step=396500, episode=67 reward=0.7641163 (566.81 it/sec) -training >> step=396600, episode=67 reward=0.749094 (541.87 it/sec) -training >> step=396700, episode=67 reward=0.7523045 (537.63 it/sec) -training >> step=396800, episode=67 reward=0.7545791 (566.36 it/sec) -training >> step=396900, episode=67 reward=0.7789958 (546.50 it/sec) -training >> step=397000, episode=67 reward=0.7732901 (583.07 it/sec) -training >> step=397100, episode=67 reward=0.7549177 (525.93 it/sec) -training >> step=397200, episode=67 reward=0.7639793 (556.95 it/sec) -training >> step=397300, episode=67 reward=0.7624764 (556.85 it/sec) -training >> step=397400, episode=67 reward=0.7857117 (558.26 it/sec) -training >> step=397500, episode=67 reward=0.7577862 (516.62 it/sec) -training >> step=397600, episode=67 reward=0.7714731 (534.87 it/sec) -training >> step=397700, episode=67 reward=0.7627838 (503.62 it/sec) -training >> step=397800, episode=67 reward=0.773918 (516.01 it/sec) -training >> step=397900, episode=67 reward=0.7639381 (500.79 it/sec) -training >> step=398000, episode=67 reward=0.753301 (541.94 it/sec) -training >> step=398100, episode=67 reward=0.7487755 (547.18 it/sec) -training >> step=398200, episode=67 reward=0.7491451 (538.63 it/sec) -training >> step=398300, episode=67 reward=0.7774081 (543.10 it/sec) -training >> step=398400, episode=67 reward=0.7557436 (537.79 it/sec) -training >> step=398500, episode=67 reward=0.7371362 (561.44 it/sec) -training >> step=398600, episode=67 reward=0.7574369 (590.35 it/sec) -training >> step=398700, episode=67 reward=0.7671956 (555.22 it/sec) -training >> step=398800, episode=67 reward=0.7425244 (514.46 it/sec) -training >> step=398900, episode=67 reward=0.7753348 (551.02 it/sec) -training >> step=399000, episode=67 reward=0.746823 (495.68 it/sec) -training >> step=399100, episode=67 reward=0.7722156 (528.79 it/sec) -training >> step=399200, episode=67 reward=0.7968381 (467.23 it/sec) -training >> step=399300, episode=67 reward=0.7437683 (516.76 it/sec) -training >> step=399400, episode=67 reward=0.756844 (550.74 it/sec) -training >> step=399500, episode=67 reward=0.7473797 (513.29 it/sec) -training >> step=399600, episode=67 reward=0.7426029 (533.37 it/sec) -training >> step=399700, episode=67 reward=0.7596248 (514.24 it/sec) -training >> step=399800, episode=67 reward=0.7557099 (543.03 it/sec) -training >> step=399900, episode=67 reward=0.7326274 (474.89 it/sec) -training >> step=400000, episode=67 reward=0.7754603 (482.23 it/sec) -training >> step=400100, episode=67 reward=0.755668 (477.80 it/sec) -training >> step=400200, episode=67 reward=0.7492247 (499.05 it/sec) -training >> step=400300, episode=67 reward=0.7342618 (461.60 it/sec) -training >> step=400400, episode=67 reward=0.7678595 (491.85 it/sec) -training >> step=400500, episode=67 reward=0.7646276 (501.97 it/sec) -training >> step=400600, episode=67 reward=0.7320501 (475.18 it/sec) -training >> step=400700, episode=67 reward=0.7649056 (487.96 it/sec) -training >> step=400800, episode=67 reward=0.7309951 (447.48 it/sec) -training >> step=400900, episode=67 reward=0.7455006 (442.67 it/sec) -training >> step=401000, episode=67 reward=0.7415113 (491.86 it/sec) -training >> step=401100, episode=67 reward=0.7539369 (420.73 it/sec) -training >> step=401200, episode=67 reward=0.7497053 (498.46 it/sec) -training >> step=401300, episode=68 reward=0.7661611 (130.67 it/sec) -training >> step=401400, episode=68 reward=0.7432648 (505.94 it/sec) -training >> step=401500, episode=68 reward=0.7504657 (513.31 it/sec) -training >> step=401600, episode=68 reward=0.7748787 (525.23 it/sec) -training >> step=401700, episode=68 reward=0.7633305 (504.30 it/sec) -training >> step=401800, episode=68 reward=0.7710419 (460.42 it/sec) -training >> step=401900, episode=68 reward=0.7497841 (479.16 it/sec) -training >> step=402000, episode=68 reward=0.7550291 (531.43 it/sec) -training >> step=402100, episode=68 reward=0.7464246 (531.02 it/sec) -training >> step=402200, episode=68 reward=0.7621875 (499.81 it/sec) -training >> step=402300, episode=68 reward=0.7575966 (455.98 it/sec) -training >> step=402400, episode=68 reward=0.7586461 (452.53 it/sec) -training >> step=402500, episode=68 reward=0.7595534 (488.82 it/sec) -training >> step=402600, episode=68 reward=0.7754534 (480.75 it/sec) -training >> step=402700, episode=68 reward=0.7714903 (497.37 it/sec) -training >> step=402800, episode=68 reward=0.7781838 (481.47 it/sec) -training >> step=402900, episode=68 reward=0.7341905 (438.09 it/sec) -training >> step=403000, episode=68 reward=0.7515633 (457.52 it/sec) -training >> step=403100, episode=68 reward=0.7678642 (412.63 it/sec) -training >> step=403200, episode=68 reward=0.7563339 (485.18 it/sec) -training >> step=403300, episode=68 reward=0.7708414 (478.68 it/sec) -training >> step=403400, episode=68 reward=0.7560756 (493.34 it/sec) -training >> step=403500, episode=68 reward=0.7409777 (500.38 it/sec) -training >> step=403600, episode=68 reward=0.748148 (431.86 it/sec) -training >> step=403700, episode=68 reward=0.7855872 (483.20 it/sec) -training >> step=403800, episode=68 reward=0.7629327 (448.40 it/sec) -training >> step=403900, episode=68 reward=0.7819514 (442.00 it/sec) -training >> step=404000, episode=68 reward=0.7704274 (495.33 it/sec) -training >> step=404100, episode=68 reward=0.7512131 (480.81 it/sec) -training >> step=404200, episode=68 reward=0.7825922 (473.76 it/sec) -training >> step=404300, episode=68 reward=0.7786741 (475.65 it/sec) -training >> step=404400, episode=68 reward=0.7685542 (473.24 it/sec) -training >> step=404500, episode=68 reward=0.747878 (489.36 it/sec) -training >> step=404600, episode=68 reward=0.7761098 (457.62 it/sec) -training >> step=404700, episode=68 reward=0.7575967 (432.98 it/sec) -training >> step=404800, episode=68 reward=0.7547933 (487.55 it/sec) -training >> step=404900, episode=68 reward=0.7683623 (502.76 it/sec) -training >> step=405000, episode=68 reward=0.7525442 (497.67 it/sec) -training >> step=405100, episode=68 reward=0.7476234 (521.01 it/sec) -training >> step=405200, episode=68 reward=0.7579418 (494.71 it/sec) -training >> step=405300, episode=68 reward=0.7567498 (533.58 it/sec) -training >> step=405400, episode=68 reward=0.750289 (562.35 it/sec) -training >> step=405500, episode=68 reward=0.780778 (537.83 it/sec) -training >> step=405600, episode=68 reward=0.7612814 (535.74 it/sec) -training >> step=405700, episode=68 reward=0.7632338 (526.02 it/sec) -training >> step=405800, episode=68 reward=0.7682763 (482.33 it/sec) -training >> step=405900, episode=68 reward=0.7612407 (595.39 it/sec) -training >> step=406000, episode=68 reward=0.749538 (578.33 it/sec) -training >> step=406100, episode=68 reward=0.7383721 (539.85 it/sec) -training >> step=406200, episode=68 reward=0.7547163 (564.66 it/sec) -training >> step=406300, episode=68 reward=0.7447611 (505.38 it/sec) -training >> step=406400, episode=68 reward=0.7534065 (548.35 it/sec) -training >> step=406500, episode=68 reward=0.7601408 (593.24 it/sec) -training >> step=406600, episode=68 reward=0.760456 (562.58 it/sec) -training >> step=406700, episode=68 reward=0.7623765 (533.61 it/sec) -training >> step=406800, episode=68 reward=0.7626552 (528.95 it/sec) -training >> step=406900, episode=68 reward=0.7522175 (493.86 it/sec) -training >> step=407000, episode=68 reward=0.7677088 (562.73 it/sec) -training >> step=407100, episode=68 reward=0.748055 (590.51 it/sec) -training >> step=407200, episode=68 reward=0.7601353 (590.54 it/sec) -training >> step=407300, episode=69 reward=0.7685491 (127.41 it/sec) -training >> step=407400, episode=69 reward=0.7682484 (523.88 it/sec) -training >> step=407500, episode=69 reward=0.7551594 (533.17 it/sec) -training >> step=407600, episode=69 reward=0.7724066 (566.73 it/sec) -training >> step=407700, episode=69 reward=0.7518115 (521.75 it/sec) -training >> step=407800, episode=69 reward=0.7756431 (590.67 it/sec) -training >> step=407900, episode=69 reward=0.7600299 (565.34 it/sec) -training >> step=408000, episode=69 reward=0.7284173 (564.79 it/sec) -training >> step=408100, episode=69 reward=0.7641717 (571.64 it/sec) -training >> step=408200, episode=69 reward=0.7658582 (592.65 it/sec) -training >> step=408300, episode=69 reward=0.7629068 (493.21 it/sec) -training >> step=408400, episode=69 reward=0.7622329 (563.57 it/sec) -training >> step=408500, episode=69 reward=0.761286 (545.76 it/sec) -training >> step=408600, episode=69 reward=0.7777677 (523.26 it/sec) -training >> step=408700, episode=69 reward=0.7505584 (565.54 it/sec) -training >> step=408800, episode=69 reward=0.7576233 (555.56 it/sec) -training >> step=408900, episode=69 reward=0.7531813 (593.64 it/sec) -training >> step=409000, episode=69 reward=0.7339435 (544.02 it/sec) -training >> step=409100, episode=69 reward=0.7631794 (548.20 it/sec) -training >> step=409200, episode=69 reward=0.7576995 (541.25 it/sec) -training >> step=409300, episode=69 reward=0.7639659 (556.03 it/sec) -training >> step=409400, episode=69 reward=0.7512007 (524.14 it/sec) -training >> step=409500, episode=69 reward=0.7605234 (566.55 it/sec) -training >> step=409600, episode=69 reward=0.7522876 (523.26 it/sec) -training >> step=409700, episode=69 reward=0.7460914 (513.35 it/sec) -training >> step=409800, episode=69 reward=0.7721525 (491.04 it/sec) -training >> step=409900, episode=69 reward=0.7510716 (496.21 it/sec) -training >> step=410000, episode=69 reward=0.7679303 (556.63 it/sec) -training >> step=410100, episode=69 reward=0.7750136 (549.31 it/sec) -training >> step=410200, episode=69 reward=0.7676334 (553.92 it/sec) -training >> step=410300, episode=69 reward=0.764117 (512.46 it/sec) -training >> step=410400, episode=69 reward=0.7750193 (551.55 it/sec) -training >> step=410500, episode=69 reward=0.7661506 (530.73 it/sec) -training >> step=410600, episode=69 reward=0.7512662 (566.96 it/sec) -training >> step=410700, episode=69 reward=0.7808616 (579.30 it/sec) -training >> step=410800, episode=69 reward=0.7585555 (537.54 it/sec) -training >> step=410900, episode=69 reward=0.75015 (511.51 it/sec) -training >> step=411000, episode=69 reward=0.7620501 (567.35 it/sec) -training >> step=411100, episode=69 reward=0.774445 (517.29 it/sec) -training >> step=411200, episode=69 reward=0.7536315 (531.82 it/sec) -training >> step=411300, episode=69 reward=0.7478697 (565.14 it/sec) -training >> step=411400, episode=69 reward=0.7675067 (528.91 it/sec) -training >> step=411500, episode=69 reward=0.75121 (541.73 it/sec) -training >> step=411600, episode=69 reward=0.7365556 (522.95 it/sec) -training >> step=411700, episode=69 reward=0.7577013 (550.82 it/sec) -training >> step=411800, episode=69 reward=0.7467735 (569.76 it/sec) -training >> step=411900, episode=69 reward=0.7468129 (521.38 it/sec) -training >> step=412000, episode=69 reward=0.7845744 (534.75 it/sec) -training >> step=412100, episode=69 reward=0.7742867 (575.32 it/sec) -training >> step=412200, episode=69 reward=0.7835283 (514.85 it/sec) -training >> step=412300, episode=69 reward=0.7500207 (564.70 it/sec) -training >> step=412400, episode=69 reward=0.753659 (523.25 it/sec) -training >> step=412500, episode=69 reward=0.7469119 (510.84 it/sec) -training >> step=412600, episode=69 reward=0.7689677 (504.58 it/sec) -training >> step=412700, episode=69 reward=0.7486723 (494.13 it/sec) -training >> step=412800, episode=69 reward=0.7599559 (515.54 it/sec) -training >> step=412900, episode=69 reward=0.7502574 (528.35 it/sec) -training >> step=413000, episode=69 reward=0.7714878 (518.33 it/sec) -training >> step=413100, episode=69 reward=0.7632241 (547.26 it/sec) -training >> step=413200, episode=69 reward=0.7310204 (530.52 it/sec) -training >> step=413300, episode=70 reward=0.7671842 (118.36 it/sec) -training >> step=413400, episode=70 reward=0.7560811 (555.66 it/sec) -training >> step=413500, episode=70 reward=0.7798464 (555.36 it/sec) -training >> step=413600, episode=70 reward=0.7792681 (567.67 it/sec) -training >> step=413700, episode=70 reward=0.7653578 (553.44 it/sec) -training >> step=413800, episode=70 reward=0.7632056 (544.02 it/sec) -training >> step=413900, episode=70 reward=0.7542316 (561.87 it/sec) -training >> step=414000, episode=70 reward=0.7711847 (577.88 it/sec) -training >> step=414100, episode=70 reward=0.7750208 (542.34 it/sec) -training >> step=414200, episode=70 reward=0.7607044 (598.28 it/sec) -training >> step=414300, episode=70 reward=0.7692433 (561.83 it/sec) -training >> step=414400, episode=70 reward=0.7642499 (557.51 it/sec) -training >> step=414500, episode=70 reward=0.7433134 (590.34 it/sec) -training >> step=414600, episode=70 reward=0.7696263 (516.50 it/sec) -training >> step=414700, episode=70 reward=0.7661192 (581.39 it/sec) -training >> step=414800, episode=70 reward=0.7557092 (525.29 it/sec) -training >> step=414900, episode=70 reward=0.7668565 (505.24 it/sec) -training >> step=415000, episode=70 reward=0.7785501 (544.33 it/sec) -training >> step=415100, episode=70 reward=0.7510434 (540.43 it/sec) -training >> step=415200, episode=70 reward=0.7424485 (507.89 it/sec) -training >> step=415300, episode=70 reward=0.7758854 (594.80 it/sec) -training >> step=415400, episode=70 reward=0.7651039 (537.39 it/sec) -training >> step=415500, episode=70 reward=0.7321312 (540.16 it/sec) -training >> step=415600, episode=70 reward=0.7407776 (542.26 it/sec) -training >> step=415700, episode=70 reward=0.7509748 (537.33 it/sec) -training >> step=415800, episode=70 reward=0.7388615 (557.23 it/sec) -training >> step=415900, episode=70 reward=0.763643 (508.24 it/sec) -training >> step=416000, episode=70 reward=0.7604704 (539.62 it/sec) -training >> step=416100, episode=70 reward=0.7680638 (533.55 it/sec) -training >> step=416200, episode=70 reward=0.7541276 (543.01 it/sec) -training >> step=416300, episode=70 reward=0.7693501 (508.87 it/sec) -training >> step=416400, episode=70 reward=0.7604047 (564.93 it/sec) -training >> step=416500, episode=70 reward=0.7528675 (541.65 it/sec) -training >> step=416600, episode=70 reward=0.7749668 (551.33 it/sec) -training >> step=416700, episode=70 reward=0.7731676 (536.52 it/sec) -training >> step=416800, episode=70 reward=0.7451599 (533.78 it/sec) -training >> step=416900, episode=70 reward=0.7761738 (543.95 it/sec) -training >> step=417000, episode=70 reward=0.7724735 (554.66 it/sec) -training >> step=417100, episode=70 reward=0.7472098 (573.15 it/sec) -training >> step=417200, episode=70 reward=0.756066 (514.46 it/sec) -training >> step=417300, episode=70 reward=0.7637561 (551.10 it/sec) -training >> step=417400, episode=70 reward=0.7511335 (539.89 it/sec) -training >> step=417500, episode=70 reward=0.7337267 (551.40 it/sec) -training >> step=417600, episode=70 reward=0.7725666 (544.99 it/sec) -training >> step=417700, episode=70 reward=0.7462618 (541.15 it/sec) -training >> step=417800, episode=70 reward=0.7727501 (522.83 it/sec) -training >> step=417900, episode=70 reward=0.7774278 (517.91 it/sec) -training >> step=418000, episode=70 reward=0.7544205 (531.42 it/sec) -training >> step=418100, episode=70 reward=0.7583609 (538.12 it/sec) -training >> step=418200, episode=70 reward=0.7390636 (557.97 it/sec) -training >> step=418300, episode=70 reward=0.7541128 (510.52 it/sec) -training >> step=418400, episode=70 reward=0.7610238 (556.56 it/sec) -training >> step=418500, episode=70 reward=0.765116 (538.24 it/sec) -training >> step=418600, episode=70 reward=0.7533837 (539.45 it/sec) -training >> step=418700, episode=70 reward=0.7540971 (576.83 it/sec) -training >> step=418800, episode=70 reward=0.7464772 (543.11 it/sec) -training >> step=418900, episode=70 reward=0.7220571 (546.98 it/sec) -training >> step=419000, episode=70 reward=0.7778328 (551.68 it/sec) -training >> step=419100, episode=70 reward=0.7442039 (500.24 it/sec) -training >> step=419200, episode=70 reward=0.7536697 (537.10 it/sec) -training >> step=419300, episode=71 reward=0.7534355 (127.95 it/sec) -training >> step=419400, episode=71 reward=0.7600669 (546.49 it/sec) -training >> step=419500, episode=71 reward=0.7544066 (531.64 it/sec) -training >> step=419600, episode=71 reward=0.7456172 (579.15 it/sec) -training >> step=419700, episode=71 reward=0.777941 (559.99 it/sec) -training >> step=419800, episode=71 reward=0.762936 (549.52 it/sec) -training >> step=419900, episode=71 reward=0.7596332 (547.14 it/sec) -training >> step=420000, episode=71 reward=0.7659189 (517.58 it/sec) -training >> step=420100, episode=71 reward=0.7478271 (600.85 it/sec) -training >> step=420200, episode=71 reward=0.7631 (555.03 it/sec) -training >> step=420300, episode=71 reward=0.7531477 (572.31 it/sec) -training >> step=420400, episode=71 reward=0.764719 (577.77 it/sec) -training >> step=420500, episode=71 reward=0.7468402 (568.99 it/sec) -training >> step=420600, episode=71 reward=0.7600595 (555.18 it/sec) -training >> step=420700, episode=71 reward=0.7383872 (570.86 it/sec) -training >> step=420800, episode=71 reward=0.7707531 (509.52 it/sec) -training >> step=420900, episode=71 reward=0.7413146 (559.13 it/sec) -training >> step=421000, episode=71 reward=0.7548951 (507.57 it/sec) -training >> step=421100, episode=71 reward=0.7596132 (548.61 it/sec) -training >> step=421200, episode=71 reward=0.7679377 (555.82 it/sec) -training >> step=421300, episode=71 reward=0.7411334 (528.03 it/sec) -training >> step=421400, episode=71 reward=0.7603876 (568.57 it/sec) -training >> step=421500, episode=71 reward=0.7666115 (539.25 it/sec) -training >> step=421600, episode=71 reward=0.7761549 (499.80 it/sec) -training >> step=421700, episode=71 reward=0.7605708 (567.70 it/sec) -training >> step=421800, episode=71 reward=0.7721326 (563.67 it/sec) -training >> step=421900, episode=71 reward=0.7586925 (542.38 it/sec) -training >> step=422000, episode=71 reward=0.7668039 (543.50 it/sec) -training >> step=422100, episode=71 reward=0.7785155 (495.97 it/sec) -training >> step=422200, episode=71 reward=0.7447126 (573.18 it/sec) -training >> step=422300, episode=71 reward=0.7721041 (519.40 it/sec) -training >> step=422400, episode=71 reward=0.754479 (556.33 it/sec) -training >> step=422500, episode=71 reward=0.7484571 (527.67 it/sec) -training >> step=422600, episode=71 reward=0.7628638 (556.90 it/sec) -training >> step=422700, episode=71 reward=0.7720226 (540.02 it/sec) -training >> step=422800, episode=71 reward=0.7606614 (549.61 it/sec) -training >> step=422900, episode=71 reward=0.7533208 (565.21 it/sec) -training >> step=423000, episode=71 reward=0.7399539 (555.34 it/sec) -training >> step=423100, episode=71 reward=0.7459557 (512.18 it/sec) -training >> step=423200, episode=71 reward=0.7594768 (545.78 it/sec) -training >> step=423300, episode=71 reward=0.7725914 (560.30 it/sec) -training >> step=423400, episode=71 reward=0.7571757 (541.04 it/sec) -training >> step=423500, episode=71 reward=0.759737 (572.58 it/sec) -training >> step=423600, episode=71 reward=0.7756577 (514.66 it/sec) -training >> step=423700, episode=71 reward=0.775349 (541.18 it/sec) -training >> step=423800, episode=71 reward=0.7581879 (539.04 it/sec) -training >> step=423900, episode=71 reward=0.7643132 (547.16 it/sec) -training >> step=424000, episode=71 reward=0.7603874 (556.37 it/sec) -training >> step=424100, episode=71 reward=0.7509221 (559.68 it/sec) -training >> step=424200, episode=71 reward=0.7512198 (516.05 it/sec) -training >> step=424300, episode=71 reward=0.7561381 (541.31 it/sec) -training >> step=424400, episode=71 reward=0.7471501 (558.50 it/sec) -training >> step=424500, episode=71 reward=0.7553896 (541.04 it/sec) -training >> step=424600, episode=71 reward=0.7487769 (563.97 it/sec) -training >> step=424700, episode=71 reward=0.7293246 (530.31 it/sec) -training >> step=424800, episode=71 reward=0.7389526 (538.00 it/sec) -training >> step=424900, episode=71 reward=0.7556871 (532.61 it/sec) -training >> step=425000, episode=71 reward=0.7689047 (535.19 it/sec) -training >> step=425100, episode=71 reward=0.7633013 (561.06 it/sec) -training >> step=425200, episode=71 reward=0.7245634 (582.14 it/sec) -training >> step=425300, episode=72 reward=0.7793016 (122.51 it/sec) -training >> step=425400, episode=72 reward=0.757513 (531.66 it/sec) -training >> step=425500, episode=72 reward=0.7560973 (562.31 it/sec) -training >> step=425600, episode=72 reward=0.7186298 (545.43 it/sec) -training >> step=425700, episode=72 reward=0.7579267 (538.13 it/sec) -training >> step=425800, episode=72 reward=0.7612472 (600.77 it/sec) -training >> step=425900, episode=72 reward=0.7277936 (560.07 it/sec) -training >> step=426000, episode=72 reward=0.7369805 (566.65 it/sec) -training >> step=426100, episode=72 reward=0.7434835 (591.57 it/sec) -training >> step=426200, episode=72 reward=0.7521995 (534.36 it/sec) -training >> step=426300, episode=72 reward=0.7691048 (547.25 it/sec) -training >> step=426400, episode=72 reward=0.7714878 (562.47 it/sec) -training >> step=426500, episode=72 reward=0.7570724 (573.77 it/sec) -training >> step=426600, episode=72 reward=0.784255 (549.83 it/sec) -training >> step=426700, episode=72 reward=0.7562441 (527.44 it/sec) -training >> step=426800, episode=72 reward=0.774407 (538.64 it/sec) -training >> step=426900, episode=72 reward=0.7666329 (509.95 it/sec) -training >> step=427000, episode=72 reward=0.774702 (533.13 it/sec) -training >> step=427100, episode=72 reward=0.7661697 (573.00 it/sec) -training >> step=427200, episode=72 reward=0.7617045 (538.21 it/sec) -training >> step=427300, episode=72 reward=0.7816722 (573.51 it/sec) -training >> step=427400, episode=72 reward=0.7669542 (555.92 it/sec) -training >> step=427500, episode=72 reward=0.777509 (523.22 it/sec) -training >> step=427600, episode=72 reward=0.7703323 (552.47 it/sec) -training >> step=427700, episode=72 reward=0.7706442 (545.82 it/sec) -training >> step=427800, episode=72 reward=0.7633013 (543.10 it/sec) -training >> step=427900, episode=72 reward=0.7627501 (571.19 it/sec) -training >> step=428000, episode=72 reward=0.7566555 (530.23 it/sec) -training >> step=428100, episode=72 reward=0.7649864 (517.87 it/sec) -training >> step=428200, episode=72 reward=0.7474929 (574.66 it/sec) -training >> step=428300, episode=72 reward=0.7414863 (513.85 it/sec) -training >> step=428400, episode=72 reward=0.7358189 (502.30 it/sec) -training >> step=428500, episode=72 reward=0.7595155 (544.94 it/sec) -training >> step=428600, episode=72 reward=0.7580101 (513.64 it/sec) -training >> step=428700, episode=72 reward=0.7921909 (543.56 it/sec) -training >> step=428800, episode=72 reward=0.7563297 (513.14 it/sec) -training >> step=428900, episode=72 reward=0.7602811 (538.81 it/sec) -training >> step=429000, episode=72 reward=0.7627178 (571.75 it/sec) -training >> step=429100, episode=72 reward=0.7684925 (518.51 it/sec) -training >> step=429200, episode=72 reward=0.7520715 (533.67 it/sec) -training >> step=429300, episode=72 reward=0.7584258 (534.09 it/sec) -training >> step=429400, episode=72 reward=0.7350162 (505.33 it/sec) -training >> step=429500, episode=72 reward=0.7629023 (575.28 it/sec) -training >> step=429600, episode=72 reward=0.7472057 (538.45 it/sec) -training >> step=429700, episode=72 reward=0.7790223 (517.15 it/sec) -training >> step=429800, episode=72 reward=0.7611649 (561.68 it/sec) -training >> step=429900, episode=72 reward=0.7667415 (517.67 it/sec) -training >> step=430000, episode=72 reward=0.7587413 (567.85 it/sec) -training >> step=430100, episode=72 reward=0.7477764 (564.00 it/sec) -training >> step=430200, episode=72 reward=0.7673665 (523.65 it/sec) -training >> step=430300, episode=72 reward=0.7602684 (562.33 it/sec) -training >> step=430400, episode=72 reward=0.7639796 (542.23 it/sec) -training >> step=430500, episode=72 reward=0.7561008 (525.72 it/sec) -training >> step=430600, episode=72 reward=0.7686383 (493.90 it/sec) -training >> step=430700, episode=72 reward=0.7395189 (501.10 it/sec) -training >> step=430800, episode=72 reward=0.7592316 (494.60 it/sec) -training >> step=430900, episode=72 reward=0.74733 (538.14 it/sec) -training >> step=431000, episode=72 reward=0.7577953 (527.05 it/sec) -training >> step=431100, episode=72 reward=0.7547589 (591.36 it/sec) -training >> step=431200, episode=72 reward=0.7484151 (540.80 it/sec) -training >> step=431300, episode=73 reward=0.7527966 (122.64 it/sec) -training >> step=431400, episode=73 reward=0.7490562 (583.87 it/sec) -training >> step=431500, episode=73 reward=0.771255 (534.13 it/sec) -training >> step=431600, episode=73 reward=0.7675734 (535.92 it/sec) -training >> step=431700, episode=73 reward=0.781056 (556.65 it/sec) -training >> step=431800, episode=73 reward=0.782146 (515.91 it/sec) -training >> step=431900, episode=73 reward=0.7326894 (554.68 it/sec) -training >> step=432000, episode=73 reward=0.7576701 (573.70 it/sec) -training >> step=432100, episode=73 reward=0.7784743 (551.20 it/sec) -training >> step=432200, episode=73 reward=0.7437174 (562.93 it/sec) -training >> step=432300, episode=73 reward=0.772929 (535.45 it/sec) -training >> step=432400, episode=73 reward=0.7689635 (510.05 it/sec) -training >> step=432500, episode=73 reward=0.7446845 (594.71 it/sec) -training >> step=432600, episode=73 reward=0.7903488 (545.24 it/sec) -training >> step=432700, episode=73 reward=0.7715582 (569.72 it/sec) -training >> step=432800, episode=73 reward=0.7648355 (595.52 it/sec) -training >> step=432900, episode=73 reward=0.7623256 (528.78 it/sec) -training >> step=433000, episode=73 reward=0.7753947 (570.27 it/sec) -training >> step=433100, episode=73 reward=0.754514 (586.41 it/sec) -training >> step=433200, episode=73 reward=0.7611363 (532.04 it/sec) -training >> step=433300, episode=73 reward=0.7837325 (568.11 it/sec) -training >> step=433400, episode=73 reward=0.7607368 (558.43 it/sec) -training >> step=433500, episode=73 reward=0.7570899 (573.61 it/sec) -training >> step=433600, episode=73 reward=0.7662463 (602.43 it/sec) -training >> step=433700, episode=73 reward=0.7668031 (537.25 it/sec) -training >> step=433800, episode=73 reward=0.7642522 (522.09 it/sec) -training >> step=433900, episode=73 reward=0.7687879 (575.66 it/sec) -training >> step=434000, episode=73 reward=0.7351789 (536.21 it/sec) -training >> step=434100, episode=73 reward=0.736797 (536.09 it/sec) -training >> step=434200, episode=73 reward=0.7633296 (600.57 it/sec) -training >> step=434300, episode=73 reward=0.7492123 (520.32 it/sec) -training >> step=434400, episode=73 reward=0.7659363 (530.11 it/sec) -training >> step=434500, episode=73 reward=0.7841079 (533.82 it/sec) -training >> step=434600, episode=73 reward=0.7605958 (438.06 it/sec) -training >> step=434700, episode=73 reward=0.7597671 (515.66 it/sec) -training >> step=434800, episode=73 reward=0.7561326 (522.37 it/sec) -training >> step=434900, episode=73 reward=0.7430156 (483.16 it/sec) -training >> step=435000, episode=73 reward=0.7521039 (535.36 it/sec) -training >> step=435100, episode=73 reward=0.7554157 (521.68 it/sec) -training >> step=435200, episode=73 reward=0.784083 (587.61 it/sec) -training >> step=435300, episode=73 reward=0.7484248 (571.68 it/sec) -training >> step=435400, episode=73 reward=0.7620953 (501.18 it/sec) -training >> step=435500, episode=73 reward=0.7502215 (541.30 it/sec) -training >> step=435600, episode=73 reward=0.7682601 (513.93 it/sec) -training >> step=435700, episode=73 reward=0.7382227 (492.77 it/sec) -training >> step=435800, episode=73 reward=0.7677019 (499.51 it/sec) -training >> step=435900, episode=73 reward=0.7623801 (508.12 it/sec) -training >> step=436000, episode=73 reward=0.7596756 (543.69 it/sec) -training >> step=436100, episode=73 reward=0.7502958 (536.53 it/sec) -training >> step=436200, episode=73 reward=0.7371494 (530.52 it/sec) -training >> step=436300, episode=73 reward=0.7478452 (592.71 it/sec) -training >> step=436400, episode=73 reward=0.7705649 (546.75 it/sec) -training >> step=436500, episode=73 reward=0.7614998 (521.13 it/sec) -training >> step=436600, episode=73 reward=0.7349628 (533.69 it/sec) -training >> step=436700, episode=73 reward=0.7748436 (525.46 it/sec) -training >> step=436800, episode=73 reward=0.7637118 (568.84 it/sec) -training >> step=436900, episode=73 reward=0.7346351 (561.04 it/sec) -training >> step=437000, episode=73 reward=0.7571039 (545.26 it/sec) -training >> step=437100, episode=73 reward=0.7532906 (506.47 it/sec) -training >> step=437200, episode=73 reward=0.7584566 (527.11 it/sec) -training >> step=437300, episode=74 reward=0.7629366 (136.16 it/sec) -training >> step=437400, episode=74 reward=0.7643696 (545.57 it/sec) -training >> step=437500, episode=74 reward=0.7455338 (552.08 it/sec) -training >> step=437600, episode=74 reward=0.7718976 (550.67 it/sec) -training >> step=437700, episode=74 reward=0.7702427 (587.65 it/sec) -training >> step=437800, episode=74 reward=0.7563887 (549.32 it/sec) -training >> step=437900, episode=74 reward=0.7588683 (552.17 it/sec) -training >> step=438000, episode=74 reward=0.7692631 (569.67 it/sec) -training >> step=438100, episode=74 reward=0.7783986 (557.93 it/sec) -training >> step=438200, episode=74 reward=0.7509449 (596.21 it/sec) -training >> step=438300, episode=74 reward=0.7501157 (568.90 it/sec) -training >> step=438400, episode=74 reward=0.7903529 (594.24 it/sec) -training >> step=438500, episode=74 reward=0.7523026 (491.55 it/sec) -training >> step=438600, episode=74 reward=0.7822345 (544.44 it/sec) -training >> step=438700, episode=74 reward=0.7540158 (545.92 it/sec) -training >> step=438800, episode=74 reward=0.7794954 (542.00 it/sec) -training >> step=438900, episode=74 reward=0.7565152 (550.87 it/sec) -training >> step=439000, episode=74 reward=0.7681044 (546.44 it/sec) -training >> step=439100, episode=74 reward=0.7894127 (546.45 it/sec) -training >> step=439200, episode=74 reward=0.7558437 (566.31 it/sec) -training >> step=439300, episode=74 reward=0.7756428 (547.53 it/sec) -training >> step=439400, episode=74 reward=0.7623377 (564.85 it/sec) -training >> step=439500, episode=74 reward=0.7736883 (595.51 it/sec) -training >> step=439600, episode=74 reward=0.7565881 (526.82 it/sec) -training >> step=439700, episode=74 reward=0.7556109 (544.72 it/sec) -training >> step=439800, episode=74 reward=0.7743425 (546.64 it/sec) -training >> step=439900, episode=74 reward=0.7512333 (550.12 it/sec) -training >> step=440000, episode=74 reward=0.7649092 (608.02 it/sec) -training >> step=440100, episode=74 reward=0.7570899 (558.87 it/sec) -training >> step=440200, episode=74 reward=0.7479889 (505.51 it/sec) -training >> step=440300, episode=74 reward=0.7825816 (551.13 it/sec) -training >> step=440400, episode=74 reward=0.7765124 (535.13 it/sec) -training >> step=440500, episode=74 reward=0.7764375 (566.85 it/sec) -training >> step=440600, episode=74 reward=0.7646984 (575.00 it/sec) -training >> step=440700, episode=74 reward=0.7537079 (517.98 it/sec) -training >> step=440800, episode=74 reward=0.7591959 (522.34 it/sec) -training >> step=440900, episode=74 reward=0.772894 (489.08 it/sec) -training >> step=441000, episode=74 reward=0.7604877 (517.48 it/sec) -training >> step=441100, episode=74 reward=0.7756084 (569.87 it/sec) -training >> step=441200, episode=74 reward=0.7638948 (548.82 it/sec) -training >> step=441300, episode=74 reward=0.746295 (486.31 it/sec) -training >> step=441400, episode=74 reward=0.7707245 (530.43 it/sec) -training >> step=441500, episode=74 reward=0.7402691 (523.14 it/sec) -training >> step=441600, episode=74 reward=0.771711 (584.81 it/sec) -training >> step=441700, episode=74 reward=0.7485616 (559.50 it/sec) -training >> step=441800, episode=74 reward=0.7452719 (488.11 it/sec) -training >> step=441900, episode=74 reward=0.761801 (505.97 it/sec) -training >> step=442000, episode=74 reward=0.7347394 (522.50 it/sec) -training >> step=442100, episode=74 reward=0.7512137 (571.30 it/sec) -training >> step=442200, episode=74 reward=0.7657503 (543.14 it/sec) -training >> step=442300, episode=74 reward=0.743266 (544.57 it/sec) -training >> step=442400, episode=74 reward=0.7571295 (515.38 it/sec) -training >> step=442500, episode=74 reward=0.7479295 (509.40 it/sec) -training >> step=442600, episode=74 reward=0.7721115 (558.72 it/sec) -training >> step=442700, episode=74 reward=0.7450231 (574.31 it/sec) -training >> step=442800, episode=74 reward=0.7421535 (566.12 it/sec) -training >> step=442900, episode=74 reward=0.7779589 (551.92 it/sec) -training >> step=443000, episode=74 reward=0.7528198 (497.88 it/sec) -training >> step=443100, episode=74 reward=0.7457171 (533.44 it/sec) -training >> step=443200, episode=74 reward=0.7625161 (547.76 it/sec) -training >> step=443300, episode=75 reward=0.7458369 (131.57 it/sec) -training >> step=443400, episode=75 reward=0.770414 (539.01 it/sec) -training >> step=443500, episode=75 reward=0.7797406 (564.71 it/sec) -training >> step=443600, episode=75 reward=0.7552714 (567.61 it/sec) -training >> step=443700, episode=75 reward=0.7863683 (554.71 it/sec) -training >> step=443800, episode=75 reward=0.7966737 (532.75 it/sec) -training >> step=443900, episode=75 reward=0.7600579 (531.67 it/sec) -training >> step=444000, episode=75 reward=0.7449856 (601.43 it/sec) -training >> step=444100, episode=75 reward=0.755141 (577.36 it/sec) -training >> step=444200, episode=75 reward=0.7631076 (580.97 it/sec) -training >> step=444300, episode=75 reward=0.7509927 (547.73 it/sec) -training >> step=444400, episode=75 reward=0.7592806 (493.31 it/sec) -training >> step=444500, episode=75 reward=0.7615052 (538.25 it/sec) -training >> step=444600, episode=75 reward=0.7576415 (569.53 it/sec) -training >> step=444700, episode=75 reward=0.7786151 (566.72 it/sec) -training >> step=444800, episode=75 reward=0.7679452 (596.48 it/sec) -training >> step=444900, episode=75 reward=0.7629287 (496.87 it/sec) -training >> step=445000, episode=75 reward=0.7785926 (521.16 it/sec) -training >> step=445100, episode=75 reward=0.752193 (562.20 it/sec) -training >> step=445200, episode=75 reward=0.7583167 (567.12 it/sec) -training >> step=445300, episode=75 reward=0.7706352 (602.30 it/sec) -training >> step=445400, episode=75 reward=0.7867488 (539.98 it/sec) -training >> step=445500, episode=75 reward=0.7744621 (493.26 it/sec) -training >> step=445600, episode=75 reward=0.7762506 (517.43 it/sec) -training >> step=445700, episode=75 reward=0.7705143 (550.02 it/sec) -training >> step=445800, episode=75 reward=0.7790053 (593.17 it/sec) -training >> step=445900, episode=75 reward=0.771064 (560.81 it/sec) -training >> step=446000, episode=75 reward=0.7727594 (478.01 it/sec) -training >> step=446100, episode=75 reward=0.7756908 (477.78 it/sec) -training >> step=446200, episode=75 reward=0.7534696 (536.08 it/sec) -training >> step=446300, episode=75 reward=0.7792996 (543.69 it/sec) -training >> step=446400, episode=75 reward=0.7700951 (609.01 it/sec) -training >> step=446500, episode=75 reward=0.7326965 (534.82 it/sec) -training >> step=446600, episode=75 reward=0.7943392 (512.49 it/sec) -training >> step=446700, episode=75 reward=0.7612391 (539.57 it/sec) -training >> step=446800, episode=75 reward=0.7587336 (552.84 it/sec) -training >> step=446900, episode=75 reward=0.7775543 (562.54 it/sec) -training >> step=447000, episode=75 reward=0.7564378 (547.84 it/sec) -training >> step=447100, episode=75 reward=0.7624003 (513.84 it/sec) -training >> step=447200, episode=75 reward=0.7527265 (522.89 it/sec) -training >> step=447300, episode=75 reward=0.744579 (525.47 it/sec) -training >> step=447400, episode=75 reward=0.7418185 (524.70 it/sec) -training >> step=447500, episode=75 reward=0.7745816 (601.61 it/sec) -training >> step=447600, episode=75 reward=0.7604329 (526.85 it/sec) -training >> step=447700, episode=75 reward=0.7461713 (527.31 it/sec) -training >> step=447800, episode=75 reward=0.7583659 (515.26 it/sec) -training >> step=447900, episode=75 reward=0.7432914 (541.76 it/sec) -training >> step=448000, episode=75 reward=0.781898 (586.63 it/sec) -training >> step=448100, episode=75 reward=0.765166 (567.41 it/sec) -training >> step=448200, episode=75 reward=0.7561933 (532.22 it/sec) -training >> step=448300, episode=75 reward=0.7719911 (496.75 it/sec) -training >> step=448400, episode=75 reward=0.7609529 (520.66 it/sec) -training >> step=448500, episode=75 reward=0.7730545 (596.26 it/sec) -training >> step=448600, episode=75 reward=0.7468645 (541.26 it/sec) -training >> step=448700, episode=75 reward=0.77437 (557.07 it/sec) -training >> step=448800, episode=75 reward=0.7471258 (530.03 it/sec) -training >> step=448900, episode=75 reward=0.7540448 (481.19 it/sec) -training >> step=449000, episode=75 reward=0.7517921 (594.23 it/sec) -training >> step=449100, episode=75 reward=0.7558835 (567.68 it/sec) -training >> step=449200, episode=75 reward=0.7676317 (549.15 it/sec) -training >> step=449300, episode=76 reward=0.7685661 (124.10 it/sec) -training >> step=449400, episode=76 reward=0.7478791 (528.69 it/sec) -training >> step=449500, episode=76 reward=0.7492156 (517.36 it/sec) -training >> step=449600, episode=76 reward=0.7702961 (551.49 it/sec) -training >> step=449700, episode=76 reward=0.7771319 (548.44 it/sec) -training >> step=449800, episode=76 reward=0.763755 (576.49 it/sec) -training >> step=449900, episode=76 reward=0.7609817 (568.57 it/sec) -training >> step=450000, episode=76 reward=0.7709873 (596.49 it/sec) -training >> step=450100, episode=76 reward=0.7682396 (538.73 it/sec) -training >> step=450200, episode=76 reward=0.7740041 (583.02 it/sec) -training >> step=450300, episode=76 reward=0.7691483 (556.90 it/sec) -training >> step=450400, episode=76 reward=0.7770922 (573.64 it/sec) -training >> step=450500, episode=76 reward=0.7647547 (581.43 it/sec) -training >> step=450600, episode=76 reward=0.7563754 (571.72 it/sec) -training >> step=450700, episode=76 reward=0.7603353 (543.18 it/sec) -training >> step=450800, episode=76 reward=0.7469211 (541.11 it/sec) -training >> step=450900, episode=76 reward=0.7365814 (515.52 it/sec) -training >> step=451000, episode=76 reward=0.7644206 (593.48 it/sec) -training >> step=451100, episode=76 reward=0.7587522 (499.93 it/sec) -training >> step=451200, episode=76 reward=0.7484118 (508.23 it/sec) -training >> step=451300, episode=76 reward=0.7740752 (492.21 it/sec) -training >> step=451400, episode=76 reward=0.771177 (528.13 it/sec) -training >> step=451500, episode=76 reward=0.7653053 (591.78 it/sec) -training >> step=451600, episode=76 reward=0.764096 (554.01 it/sec) -training >> step=451700, episode=76 reward=0.7557336 (559.51 it/sec) -training >> step=451800, episode=76 reward=0.7587039 (535.33 it/sec) -training >> step=451900, episode=76 reward=0.7687227 (492.09 it/sec) -training >> step=452000, episode=76 reward=0.747324 (544.43 it/sec) -training >> step=452100, episode=76 reward=0.7559342 (542.48 it/sec) -training >> step=452200, episode=76 reward=0.77466 (561.74 it/sec) -training >> step=452300, episode=76 reward=0.7468097 (555.63 it/sec) -training >> step=452400, episode=76 reward=0.7731079 (505.21 it/sec) -training >> step=452500, episode=76 reward=0.7487292 (526.64 it/sec) -training >> step=452600, episode=76 reward=0.7548767 (586.83 it/sec) -training >> step=452700, episode=76 reward=0.7545227 (561.67 it/sec) -training >> step=452800, episode=76 reward=0.752508 (570.60 it/sec) -training >> step=452900, episode=76 reward=0.7549013 (538.28 it/sec) -training >> step=453000, episode=76 reward=0.7354379 (516.10 it/sec) -training >> step=453100, episode=76 reward=0.7461251 (561.40 it/sec) -training >> step=453200, episode=76 reward=0.7474282 (551.24 it/sec) -training >> step=453300, episode=76 reward=0.7687382 (543.20 it/sec) -training >> step=453400, episode=76 reward=0.7572865 (580.37 it/sec) -training >> step=453500, episode=76 reward=0.7519191 (501.18 it/sec) -training >> step=453600, episode=76 reward=0.7585 (518.28 it/sec) -training >> step=453700, episode=76 reward=0.7661864 (589.92 it/sec) -training >> step=453800, episode=76 reward=0.7440627 (541.98 it/sec) -training >> step=453900, episode=76 reward=0.7901579 (583.21 it/sec) -training >> step=454000, episode=76 reward=0.7514389 (537.70 it/sec) -training >> step=454100, episode=76 reward=0.7590188 (519.35 it/sec) -training >> step=454200, episode=76 reward=0.759114 (556.60 it/sec) -training >> step=454300, episode=76 reward=0.7386995 (539.99 it/sec) -training >> step=454400, episode=76 reward=0.7655283 (559.44 it/sec) -training >> step=454500, episode=76 reward=0.7629533 (589.30 it/sec) -training >> step=454600, episode=76 reward=0.7538953 (484.14 it/sec) -training >> step=454700, episode=76 reward=0.7694407 (535.92 it/sec) -training >> step=454800, episode=76 reward=0.7765439 (544.84 it/sec) -training >> step=454900, episode=76 reward=0.761534 (552.97 it/sec) -training >> step=455000, episode=76 reward=0.7445376 (592.74 it/sec) -training >> step=455100, episode=76 reward=0.7669486 (554.99 it/sec) -training >> step=455200, episode=76 reward=0.7836255 (516.44 it/sec) -training >> step=455300, episode=77 reward=0.7810458 (124.11 it/sec) -training >> step=455400, episode=77 reward=0.7566442 (534.48 it/sec) -training >> step=455500, episode=77 reward=0.7550659 (548.80 it/sec) -training >> step=455600, episode=77 reward=0.7795185 (581.85 it/sec) -training >> step=455700, episode=77 reward=0.7449263 (551.81 it/sec) -training >> step=455800, episode=77 reward=0.7664104 (578.80 it/sec) -training >> step=455900, episode=77 reward=0.7497132 (545.50 it/sec) -training >> step=456000, episode=77 reward=0.7568822 (529.03 it/sec) -training >> step=456100, episode=77 reward=0.7654235 (550.12 it/sec) -training >> step=456200, episode=77 reward=0.7767339 (570.29 it/sec) -training >> step=456300, episode=77 reward=0.7543031 (493.89 it/sec) -training >> step=456400, episode=77 reward=0.7743651 (531.00 it/sec) -training >> step=456500, episode=77 reward=0.7595233 (514.99 it/sec) -training >> step=456600, episode=77 reward=0.763626 (555.99 it/sec) -training >> step=456700, episode=77 reward=0.7723283 (530.28 it/sec) -training >> step=456800, episode=77 reward=0.7581812 (575.27 it/sec) -training >> step=456900, episode=77 reward=0.774574 (592.65 it/sec) -training >> step=457000, episode=77 reward=0.7555968 (514.09 it/sec) -training >> step=457100, episode=77 reward=0.7640392 (571.38 it/sec) -training >> step=457200, episode=77 reward=0.7649968 (523.10 it/sec) -training >> step=457300, episode=77 reward=0.7703568 (573.27 it/sec) -training >> step=457400, episode=77 reward=0.7705287 (575.14 it/sec) -training >> step=457500, episode=77 reward=0.7656448 (538.54 it/sec) -training >> step=457600, episode=77 reward=0.7661285 (529.66 it/sec) -training >> step=457700, episode=77 reward=0.7777898 (516.38 it/sec) -training >> step=457800, episode=77 reward=0.7495068 (547.96 it/sec) -training >> step=457900, episode=77 reward=0.7572598 (602.77 it/sec) -training >> step=458000, episode=77 reward=0.7633232 (572.81 it/sec) -training >> step=458100, episode=77 reward=0.7690998 (542.44 it/sec) -training >> step=458200, episode=77 reward=0.7410595 (559.16 it/sec) -training >> step=458300, episode=77 reward=0.78149 (503.80 it/sec) -training >> step=458400, episode=77 reward=0.7766441 (567.55 it/sec) -training >> step=458500, episode=77 reward=0.7857558 (570.04 it/sec) -training >> step=458600, episode=77 reward=0.7658061 (559.94 it/sec) -training >> step=458700, episode=77 reward=0.7662234 (584.19 it/sec) -training >> step=458800, episode=77 reward=0.740538 (514.97 it/sec) -training >> step=458900, episode=77 reward=0.7553066 (508.11 it/sec) -training >> step=459000, episode=77 reward=0.7589175 (569.96 it/sec) -training >> step=459100, episode=77 reward=0.7729755 (547.27 it/sec) -training >> step=459200, episode=77 reward=0.7737638 (558.13 it/sec) -training >> step=459300, episode=77 reward=0.7387088 (533.27 it/sec) -training >> step=459400, episode=77 reward=0.7569695 (506.19 it/sec) -training >> step=459500, episode=77 reward=0.7236446 (568.92 it/sec) -training >> step=459600, episode=77 reward=0.7598898 (571.79 it/sec) -training >> step=459700, episode=77 reward=0.7463658 (540.20 it/sec) -training >> step=459800, episode=77 reward=0.7648833 (559.09 it/sec) -training >> step=459900, episode=77 reward=0.7610974 (501.41 it/sec) -training >> step=460000, episode=77 reward=0.7421749 (532.14 it/sec) -training >> step=460100, episode=77 reward=0.7697849 (572.87 it/sec) -training >> step=460200, episode=77 reward=0.7576613 (536.13 it/sec) -training >> step=460300, episode=77 reward=0.7713413 (550.48 it/sec) -training >> step=460400, episode=77 reward=0.778219 (503.52 it/sec) -training >> step=460500, episode=77 reward=0.7644184 (532.33 it/sec) -training >> step=460600, episode=77 reward=0.7614687 (552.02 it/sec) -training >> step=460700, episode=77 reward=0.7598793 (568.54 it/sec) -training >> step=460800, episode=77 reward=0.746158 (577.71 it/sec) -training >> step=460900, episode=77 reward=0.7723245 (544.55 it/sec) -training >> step=461000, episode=77 reward=0.768245 (513.69 it/sec) -training >> step=461100, episode=77 reward=0.7508818 (492.95 it/sec) -training >> step=461200, episode=77 reward=0.7547206 (567.88 it/sec) -training >> step=461300, episode=78 reward=0.7521349 (128.43 it/sec) -training >> step=461400, episode=78 reward=0.765707 (521.41 it/sec) -training >> step=461500, episode=78 reward=0.7493746 (532.81 it/sec) -training >> step=461600, episode=78 reward=0.7678641 (558.33 it/sec) -training >> step=461700, episode=78 reward=0.729801 (541.62 it/sec) -training >> step=461800, episode=78 reward=0.7619312 (567.31 it/sec) -training >> step=461900, episode=78 reward=0.7385776 (525.57 it/sec) -training >> step=462000, episode=78 reward=0.7733186 (581.94 it/sec) -training >> step=462100, episode=78 reward=0.7618925 (538.60 it/sec) -training >> step=462200, episode=78 reward=0.768451 (584.14 it/sec) -training >> step=462300, episode=78 reward=0.747293 (522.40 it/sec) -training >> step=462400, episode=78 reward=0.7566823 (494.11 it/sec) -training >> step=462500, episode=78 reward=0.7872281 (563.16 it/sec) -training >> step=462600, episode=78 reward=0.7577984 (566.42 it/sec) -training >> step=462700, episode=78 reward=0.7293777 (559.72 it/sec) -training >> step=462800, episode=78 reward=0.7667252 (593.24 it/sec) -training >> step=462900, episode=78 reward=0.7803596 (552.07 it/sec) -training >> step=463000, episode=78 reward=0.7397984 (530.52 it/sec) -training >> step=463100, episode=78 reward=0.7599283 (567.31 it/sec) -training >> step=463200, episode=78 reward=0.765403 (564.27 it/sec) -training >> step=463300, episode=78 reward=0.756009 (595.78 it/sec) -training >> step=463400, episode=78 reward=0.7794734 (565.98 it/sec) -training >> step=463500, episode=78 reward=0.7557154 (500.91 it/sec) -training >> step=463600, episode=78 reward=0.7627748 (496.20 it/sec) -training >> step=463700, episode=78 reward=0.7754607 (538.67 it/sec) -training >> step=463800, episode=78 reward=0.7733864 (589.98 it/sec) -training >> step=463900, episode=78 reward=0.7569912 (566.07 it/sec) -training >> step=464000, episode=78 reward=0.7767832 (572.51 it/sec) -training >> step=464100, episode=78 reward=0.7613481 (551.15 it/sec) -training >> step=464200, episode=78 reward=0.7638131 (552.34 it/sec) -training >> step=464300, episode=78 reward=0.7575361 (546.44 it/sec) -training >> step=464400, episode=78 reward=0.758136 (562.96 it/sec) -training >> step=464500, episode=78 reward=0.755142 (574.31 it/sec) -training >> step=464600, episode=78 reward=0.754061 (549.14 it/sec) -training >> step=464700, episode=78 reward=0.7585612 (543.12 it/sec) -training >> step=464800, episode=78 reward=0.7611274 (572.07 it/sec) -training >> step=464900, episode=78 reward=0.7438604 (567.36 it/sec) -training >> step=465000, episode=78 reward=0.7552675 (567.75 it/sec) -training >> step=465100, episode=78 reward=0.7414222 (598.44 it/sec) -training >> step=465200, episode=78 reward=0.7693615 (550.38 it/sec) -training >> step=465300, episode=78 reward=0.7671878 (508.46 it/sec) -training >> step=465400, episode=78 reward=0.769344 (579.54 it/sec) -training >> step=465500, episode=78 reward=0.7750856 (494.40 it/sec) -training >> step=465600, episode=78 reward=0.7676676 (589.92 it/sec) -training >> step=465700, episode=78 reward=0.7379777 (524.24 it/sec) -training >> step=465800, episode=78 reward=0.766372 (516.61 it/sec) -training >> step=465900, episode=78 reward=0.745571 (606.83 it/sec) -training >> step=466000, episode=78 reward=0.7844307 (578.00 it/sec) -training >> step=466100, episode=78 reward=0.7561869 (562.41 it/sec) -training >> step=466200, episode=78 reward=0.7649705 (551.42 it/sec) -training >> step=466300, episode=78 reward=0.7672849 (511.20 it/sec) -training >> step=466400, episode=78 reward=0.764726 (530.99 it/sec) -training >> step=466500, episode=78 reward=0.7666582 (550.24 it/sec) -training >> step=466600, episode=78 reward=0.7587201 (573.34 it/sec) -training >> step=466700, episode=78 reward=0.7665416 (600.64 it/sec) -training >> step=466800, episode=78 reward=0.7536669 (547.86 it/sec) -training >> step=466900, episode=78 reward=0.7526552 (524.30 it/sec) -training >> step=467000, episode=78 reward=0.7646596 (492.64 it/sec) -training >> step=467100, episode=78 reward=0.7490523 (521.69 it/sec) -training >> step=467200, episode=78 reward=0.7785338 (585.12 it/sec) -training >> step=467300, episode=79 reward=0.7718911 (132.56 it/sec) -training >> step=467400, episode=79 reward=0.7606819 (528.33 it/sec) -training >> step=467500, episode=79 reward=0.7767643 (588.81 it/sec) -training >> step=467600, episode=79 reward=0.7589898 (564.70 it/sec) -training >> step=467700, episode=79 reward=0.7611729 (541.05 it/sec) -training >> step=467800, episode=79 reward=0.7566921 (576.75 it/sec) -training >> step=467900, episode=79 reward=0.7519472 (552.80 it/sec) -training >> step=468000, episode=79 reward=0.7693909 (554.62 it/sec) -training >> step=468100, episode=79 reward=0.7650653 (594.61 it/sec) -training >> step=468200, episode=79 reward=0.7367473 (563.84 it/sec) -training >> step=468300, episode=79 reward=0.7585036 (513.33 it/sec) -training >> step=468400, episode=79 reward=0.7605954 (586.00 it/sec) -training >> step=468500, episode=79 reward=0.7764661 (556.00 it/sec) -training >> step=468600, episode=79 reward=0.7648826 (522.41 it/sec) -training >> step=468700, episode=79 reward=0.7572089 (553.16 it/sec) -training >> step=468800, episode=79 reward=0.7409949 (523.37 it/sec) -training >> step=468900, episode=79 reward=0.7557659 (510.22 it/sec) -training >> step=469000, episode=79 reward=0.7603679 (478.52 it/sec) -training >> step=469100, episode=79 reward=0.7784361 (505.83 it/sec) -training >> step=469200, episode=79 reward=0.755841 (576.22 it/sec) -training >> step=469300, episode=79 reward=0.7654827 (561.00 it/sec) -training >> step=469400, episode=79 reward=0.7570348 (527.21 it/sec) -training >> step=469500, episode=79 reward=0.7482668 (511.08 it/sec) -training >> step=469600, episode=79 reward=0.7525476 (558.40 it/sec) -training >> step=469700, episode=79 reward=0.7535405 (582.85 it/sec) -training >> step=469800, episode=79 reward=0.7682006 (541.82 it/sec) -training >> step=469900, episode=79 reward=0.7399863 (557.01 it/sec) -training >> step=470000, episode=79 reward=0.7519525 (512.55 it/sec) -training >> step=470100, episode=79 reward=0.7623822 (515.18 it/sec) -training >> step=470200, episode=79 reward=0.7604254 (593.28 it/sec) -training >> step=470300, episode=79 reward=0.7617176 (523.03 it/sec) -training >> step=470400, episode=79 reward=0.7551948 (585.09 it/sec) -training >> step=470500, episode=79 reward=0.7891515 (533.74 it/sec) -training >> step=470600, episode=79 reward=0.762293 (503.97 it/sec) -training >> step=470700, episode=79 reward=0.775162 (577.64 it/sec) -training >> step=470800, episode=79 reward=0.7684797 (572.48 it/sec) -training >> step=470900, episode=79 reward=0.7653937 (566.22 it/sec) -training >> step=471000, episode=79 reward=0.7776444 (577.49 it/sec) -training >> step=471100, episode=79 reward=0.7240874 (478.42 it/sec) -training >> step=471200, episode=79 reward=0.7701874 (541.15 it/sec) -training >> step=471300, episode=79 reward=0.7403233 (572.32 it/sec) -training >> step=471400, episode=79 reward=0.723094 (559.76 it/sec) -training >> step=471500, episode=79 reward=0.7242989 (607.63 it/sec) -training >> step=471600, episode=79 reward=0.7453693 (537.16 it/sec) -training >> step=471700, episode=79 reward=0.7242722 (511.84 it/sec) -training >> step=471800, episode=79 reward=0.772808 (532.76 it/sec) -training >> step=471900, episode=79 reward=0.7524665 (565.90 it/sec) -training >> step=472000, episode=79 reward=0.7488945 (585.25 it/sec) -training >> step=472100, episode=79 reward=0.7588552 (543.79 it/sec) -training >> step=472200, episode=79 reward=0.7706212 (465.23 it/sec) -training >> step=472300, episode=79 reward=0.7560117 (519.89 it/sec) -training >> step=472400, episode=79 reward=0.7819968 (553.26 it/sec) -training >> step=472500, episode=79 reward=0.771305 (582.34 it/sec) -training >> step=472600, episode=79 reward=0.766524 (581.20 it/sec) -training >> step=472700, episode=79 reward=0.7773061 (541.66 it/sec) -training >> step=472800, episode=79 reward=0.7738323 (509.35 it/sec) -training >> step=472900, episode=79 reward=0.7684903 (561.27 it/sec) -training >> step=473000, episode=79 reward=0.7679313 (565.06 it/sec) -training >> step=473100, episode=79 reward=0.7693895 (567.36 it/sec) -training >> step=473200, episode=79 reward=0.7555792 (565.12 it/sec) -training >> step=473300, episode=80 reward=0.7609107 (132.12 it/sec) -training >> step=473400, episode=80 reward=0.7647412 (526.93 it/sec) -training >> step=473500, episode=80 reward=0.7392783 (531.80 it/sec) -training >> step=473600, episode=80 reward=0.7733911 (531.54 it/sec) -training >> step=473700, episode=80 reward=0.7757022 (588.12 it/sec) -training >> step=473800, episode=80 reward=0.7694989 (550.97 it/sec) -training >> step=473900, episode=80 reward=0.7751047 (553.83 it/sec) -training >> step=474000, episode=80 reward=0.7674631 (594.80 it/sec) -training >> step=474100, episode=80 reward=0.7554193 (556.31 it/sec) -training >> step=474200, episode=80 reward=0.7705258 (581.92 it/sec) -training >> step=474300, episode=80 reward=0.7716752 (571.93 it/sec) -training >> step=474400, episode=80 reward=0.779285 (568.07 it/sec) -training >> step=474500, episode=80 reward=0.7769541 (557.74 it/sec) -training >> step=474600, episode=80 reward=0.7640269 (572.00 it/sec) -training >> step=474700, episode=80 reward=0.7607344 (524.01 it/sec) -training >> step=474800, episode=80 reward=0.7609533 (521.67 it/sec) -training >> step=474900, episode=80 reward=0.7561341 (568.82 it/sec) -training >> step=475000, episode=80 reward=0.7726473 (598.95 it/sec) -training >> step=475100, episode=80 reward=0.7799785 (559.73 it/sec) -training >> step=475200, episode=80 reward=0.7663984 (585.19 it/sec) -training >> step=475300, episode=80 reward=0.7538077 (551.64 it/sec) -training >> step=475400, episode=80 reward=0.7803217 (533.27 it/sec) -training >> step=475500, episode=80 reward=0.7596903 (594.77 it/sec) -training >> step=475600, episode=80 reward=0.7554049 (550.10 it/sec) -training >> step=475700, episode=80 reward=0.7369631 (545.51 it/sec) -training >> step=475800, episode=80 reward=0.758395 (536.21 it/sec) -training >> step=475900, episode=80 reward=0.7738765 (499.14 it/sec) -training >> step=476000, episode=80 reward=0.743832 (559.42 it/sec) -training >> step=476100, episode=80 reward=0.761023 (575.70 it/sec) -training >> step=476200, episode=80 reward=0.7544342 (569.56 it/sec) -training >> step=476300, episode=80 reward=0.7628222 (572.14 it/sec) -training >> step=476400, episode=80 reward=0.7674755 (494.31 it/sec) -training >> step=476500, episode=80 reward=0.7561902 (537.08 it/sec) -training >> step=476600, episode=80 reward=0.7666903 (544.04 it/sec) -training >> step=476700, episode=80 reward=0.7618989 (549.83 it/sec) -training >> step=476800, episode=80 reward=0.7581743 (589.65 it/sec) -training >> step=476900, episode=80 reward=0.7696747 (550.20 it/sec) -training >> step=477000, episode=80 reward=0.7618411 (516.21 it/sec) -training >> step=477100, episode=80 reward=0.7530485 (552.68 it/sec) -training >> step=477200, episode=80 reward=0.7400388 (520.23 it/sec) -training >> step=477300, episode=80 reward=0.7603009 (559.88 it/sec) -training >> step=477400, episode=80 reward=0.7484473 (499.06 it/sec) -training >> step=477500, episode=80 reward=0.7599533 (511.52 it/sec) -training >> step=477600, episode=80 reward=0.7635623 (532.94 it/sec) -training >> step=477700, episode=80 reward=0.7615482 (545.25 it/sec) -training >> step=477800, episode=80 reward=0.7652513 (554.99 it/sec) -training >> step=477900, episode=80 reward=0.7636471 (565.94 it/sec) -training >> step=478000, episode=80 reward=0.7412578 (544.79 it/sec) -training >> step=478100, episode=80 reward=0.7398487 (542.66 it/sec) -training >> step=478200, episode=80 reward=0.7487517 (521.87 it/sec) -training >> step=478300, episode=80 reward=0.7407844 (561.57 it/sec) -training >> step=478400, episode=80 reward=0.7462609 (550.18 it/sec) -training >> step=478500, episode=80 reward=0.7647859 (543.94 it/sec) -training >> step=478600, episode=80 reward=0.7652795 (555.57 it/sec) -training >> step=478700, episode=80 reward=0.7618398 (512.18 it/sec) -training >> step=478800, episode=80 reward=0.7680238 (565.68 it/sec) -training >> step=478900, episode=80 reward=0.7621168 (560.89 it/sec) -training >> step=479000, episode=80 reward=0.761641 (545.87 it/sec) -training >> step=479100, episode=80 reward=0.7639714 (566.29 it/sec) -training >> step=479200, episode=80 reward=0.7848522 (461.52 it/sec) -training >> step=479300, episode=81 reward=0.7900137 (124.81 it/sec) -training >> step=479400, episode=81 reward=0.755605 (446.36 it/sec) -training >> step=479500, episode=81 reward=0.7668883 (458.56 it/sec) -training >> step=479600, episode=81 reward=0.759226 (418.85 it/sec) -training >> step=479700, episode=81 reward=0.7631494 (469.40 it/sec) -training >> step=479800, episode=81 reward=0.7393861 (460.60 it/sec) -training >> step=479900, episode=81 reward=0.7739537 (454.01 it/sec) -training >> step=480000, episode=81 reward=0.734874 (403.02 it/sec) -training >> step=480100, episode=81 reward=0.7463229 (420.57 it/sec) -training >> step=480200, episode=81 reward=0.7586949 (449.76 it/sec) -training >> step=480300, episode=81 reward=0.7677553 (467.98 it/sec) -training >> step=480400, episode=81 reward=0.7538323 (430.37 it/sec) -training >> step=480500, episode=81 reward=0.7653638 (448.85 it/sec) -training >> step=480600, episode=81 reward=0.7334098 (412.52 it/sec) -training >> step=480700, episode=81 reward=0.743427 (396.09 it/sec) -training >> step=480800, episode=81 reward=0.7450292 (372.85 it/sec) -training >> step=480900, episode=81 reward=0.7363994 (466.17 it/sec) -training >> step=481000, episode=81 reward=0.7669406 (455.33 it/sec) -training >> step=481100, episode=81 reward=0.7612317 (471.74 it/sec) -training >> step=481200, episode=81 reward=0.7695493 (444.70 it/sec) -training >> step=481300, episode=81 reward=0.7590441 (447.65 it/sec) -training >> step=481400, episode=81 reward=0.7563474 (482.09 it/sec) -training >> step=481500, episode=81 reward=0.7782923 (422.78 it/sec) -training >> step=481600, episode=81 reward=0.7893826 (407.74 it/sec) -training >> step=481700, episode=81 reward=0.7744142 (474.92 it/sec) -training >> step=481800, episode=81 reward=0.7563627 (459.13 it/sec) -training >> step=481900, episode=81 reward=0.7619779 (465.47 it/sec) -training >> step=482000, episode=81 reward=0.7763903 (520.86 it/sec) -training >> step=482100, episode=81 reward=0.7834448 (492.68 it/sec) -training >> step=482200, episode=81 reward=0.7656492 (458.02 it/sec) -training >> step=482300, episode=81 reward=0.7610875 (471.57 it/sec) -training >> step=482400, episode=81 reward=0.7667037 (476.93 it/sec) -training >> step=482500, episode=81 reward=0.770015 (480.03 it/sec) -training >> step=482600, episode=81 reward=0.7669051 (479.50 it/sec) -training >> step=482700, episode=81 reward=0.74475 (446.02 it/sec) -training >> step=482800, episode=81 reward=0.7471662 (376.41 it/sec) -training >> step=482900, episode=81 reward=0.7727303 (347.15 it/sec) -training >> step=483000, episode=81 reward=0.7598897 (386.17 it/sec) -training >> step=483100, episode=81 reward=0.7589645 (506.32 it/sec) -training >> step=483200, episode=81 reward=0.7646854 (432.58 it/sec) -training >> step=483300, episode=81 reward=0.7688648 (436.82 it/sec) -training >> step=483400, episode=81 reward=0.7396709 (393.55 it/sec) -training >> step=483500, episode=81 reward=0.7403557 (380.79 it/sec) -training >> step=483600, episode=81 reward=0.7715426 (521.27 it/sec) -training >> step=483700, episode=81 reward=0.7508905 (443.86 it/sec) -training >> step=483800, episode=81 reward=0.7496318 (443.11 it/sec) -training >> step=483900, episode=81 reward=0.7736721 (461.01 it/sec) -training >> step=484000, episode=81 reward=0.7642642 (463.07 it/sec) -training >> step=484100, episode=81 reward=0.7400424 (495.73 it/sec) -training >> step=484200, episode=81 reward=0.7585337 (503.78 it/sec) -training >> step=484300, episode=81 reward=0.7664481 (498.89 it/sec) -training >> step=484400, episode=81 reward=0.7722699 (500.59 it/sec) -training >> step=484500, episode=81 reward=0.7540737 (507.09 it/sec) -training >> step=484600, episode=81 reward=0.7785884 (490.55 it/sec) -training >> step=484700, episode=81 reward=0.7578139 (507.11 it/sec) -training >> step=484800, episode=81 reward=0.7490177 (481.24 it/sec) -training >> step=484900, episode=81 reward=0.7781439 (514.67 it/sec) -training >> step=485000, episode=81 reward=0.7591214 (510.33 it/sec) -training >> step=485100, episode=81 reward=0.7523841 (568.54 it/sec) -training >> step=485200, episode=81 reward=0.7539657 (520.08 it/sec) -training >> step=485300, episode=82 reward=0.7604849 (150.38 it/sec) -training >> step=485400, episode=82 reward=0.7803022 (499.76 it/sec) -training >> step=485500, episode=82 reward=0.7740772 (526.42 it/sec) -training >> step=485600, episode=82 reward=0.7617553 (537.35 it/sec) -training >> step=485700, episode=82 reward=0.7771901 (507.56 it/sec) -training >> step=485800, episode=82 reward=0.742075 (478.51 it/sec) -training >> step=485900, episode=82 reward=0.7762941 (547.86 it/sec) -training >> step=486000, episode=82 reward=0.7636015 (497.52 it/sec) -training >> step=486100, episode=82 reward=0.7690292 (519.65 it/sec) -training >> step=486200, episode=82 reward=0.772509 (550.61 it/sec) -training >> step=486300, episode=82 reward=0.7618796 (522.61 it/sec) -training >> step=486400, episode=82 reward=0.7408882 (480.42 it/sec) -training >> step=486500, episode=82 reward=0.7521507 (502.28 it/sec) -training >> step=486600, episode=82 reward=0.7658183 (510.23 it/sec) -training >> step=486700, episode=82 reward=0.7710004 (500.61 it/sec) -training >> step=486800, episode=82 reward=0.7806068 (516.79 it/sec) -training >> step=486900, episode=82 reward=0.7539476 (503.22 it/sec) -training >> step=487000, episode=82 reward=0.7785186 (582.49 it/sec) -training >> step=487100, episode=82 reward=0.745132 (501.92 it/sec) -training >> step=487200, episode=82 reward=0.7618616 (488.00 it/sec) -training >> step=487300, episode=82 reward=0.7444324 (542.63 it/sec) -training >> step=487400, episode=82 reward=0.7619317 (475.20 it/sec) -training >> step=487500, episode=82 reward=0.7621254 (554.68 it/sec) -training >> step=487600, episode=82 reward=0.7440674 (507.63 it/sec) -training >> step=487700, episode=82 reward=0.771129 (502.77 it/sec) -training >> step=487800, episode=82 reward=0.7804285 (522.08 it/sec) -training >> step=487900, episode=82 reward=0.7775075 (478.44 it/sec) -training >> step=488000, episode=82 reward=0.7596667 (508.33 it/sec) -training >> step=488100, episode=82 reward=0.7569368 (515.70 it/sec) -training >> step=488200, episode=82 reward=0.7712216 (486.69 it/sec) -training >> step=488300, episode=82 reward=0.7583334 (557.33 it/sec) -training >> step=488400, episode=82 reward=0.7592322 (500.80 it/sec) -training >> step=488500, episode=82 reward=0.740105 (490.46 it/sec) -training >> step=488600, episode=82 reward=0.7895296 (568.78 it/sec) -training >> step=488700, episode=82 reward=0.7517146 (493.25 it/sec) -training >> step=488800, episode=82 reward=0.7570773 (534.92 it/sec) -training >> step=488900, episode=82 reward=0.7725983 (510.23 it/sec) -training >> step=489000, episode=82 reward=0.7436777 (500.94 it/sec) -training >> step=489100, episode=82 reward=0.7522677 (525.12 it/sec) -training >> step=489200, episode=82 reward=0.7835661 (495.54 it/sec) -training >> step=489300, episode=82 reward=0.763854 (506.62 it/sec) -training >> step=489400, episode=82 reward=0.7701467 (537.08 it/sec) -training >> step=489500, episode=82 reward=0.7495811 (469.26 it/sec) -training >> step=489600, episode=82 reward=0.7649902 (551.85 it/sec) -training >> step=489700, episode=82 reward=0.7628257 (487.23 it/sec) -training >> step=489800, episode=82 reward=0.73562 (528.88 it/sec) -training >> step=489900, episode=82 reward=0.7496234 (534.68 it/sec) -training >> step=490000, episode=82 reward=0.7514388 (499.34 it/sec) -training >> step=490100, episode=82 reward=0.7671001 (514.45 it/sec) -training >> step=490200, episode=82 reward=0.7763281 (492.02 it/sec) -training >> step=490300, episode=82 reward=0.7522641 (513.49 it/sec) -training >> step=490400, episode=82 reward=0.7888705 (536.97 it/sec) -training >> step=490500, episode=82 reward=0.7483002 (479.20 it/sec) -training >> step=490600, episode=82 reward=0.7693428 (538.35 it/sec) -training >> step=490700, episode=82 reward=0.7838082 (527.65 it/sec) -training >> step=490800, episode=82 reward=0.7656975 (507.98 it/sec) -training >> step=490900, episode=82 reward=0.7726432 (522.98 it/sec) -training >> step=491000, episode=82 reward=0.7494756 (454.95 it/sec) -training >> step=491100, episode=82 reward=0.7609645 (468.11 it/sec) -training >> step=491200, episode=82 reward=0.7467148 (499.08 it/sec) -training >> step=491300, episode=83 reward=0.7645357 (345.26 it/sec) -training >> step=491400, episode=83 reward=0.746727 (510.25 it/sec) -training >> step=491500, episode=83 reward=0.7780526 (527.65 it/sec) -training >> step=491600, episode=83 reward=0.7588726 (525.26 it/sec) -training >> step=491700, episode=83 reward=0.7603444 (365.59 it/sec) -training >> step=491800, episode=83 reward=0.7843004 (551.19 it/sec) -training >> step=491900, episode=83 reward=0.7663407 (542.06 it/sec) -training >> step=492000, episode=83 reward=0.7591591 (499.83 it/sec) -training >> step=492100, episode=83 reward=0.7423993 (545.73 it/sec) -training >> step=492200, episode=83 reward=0.754061 (470.07 it/sec) -training >> step=492300, episode=83 reward=0.7835887 (506.26 it/sec) -training >> step=492400, episode=83 reward=0.7504659 (506.40 it/sec) -training >> step=492500, episode=83 reward=0.7567264 (524.97 it/sec) -training >> step=492600, episode=83 reward=0.7519934 (512.59 it/sec) -training >> step=492700, episode=83 reward=0.7503119 (512.00 it/sec) -training >> step=492800, episode=83 reward=0.7758577 (515.59 it/sec) -training >> step=492900, episode=83 reward=0.7523019 (502.02 it/sec) -training >> step=493000, episode=83 reward=0.7729872 (503.66 it/sec) -training >> step=493100, episode=83 reward=0.740768 (474.51 it/sec) -training >> step=493200, episode=83 reward=0.7498533 (495.87 it/sec) -training >> step=493300, episode=83 reward=0.7782076 (544.18 it/sec) -training >> step=493400, episode=83 reward=0.76051 (553.92 it/sec) -training >> step=493500, episode=83 reward=0.7712509 (541.82 it/sec) -training >> step=493600, episode=83 reward=0.7429247 (483.51 it/sec) -training >> step=493700, episode=83 reward=0.7621628 (507.73 it/sec) -training >> step=493800, episode=83 reward=0.780821 (465.21 it/sec) -training >> step=493900, episode=83 reward=0.769259 (470.88 it/sec) -training >> step=494000, episode=83 reward=0.7504212 (495.05 it/sec) -training >> step=494100, episode=83 reward=0.7831876 (454.97 it/sec) -training >> step=494200, episode=83 reward=0.757021 (516.40 it/sec) -training >> step=494300, episode=83 reward=0.7773266 (449.20 it/sec) -training >> step=494400, episode=83 reward=0.7445872 (510.91 it/sec) -training >> step=494500, episode=83 reward=0.7501876 (475.67 it/sec) -training >> step=494600, episode=83 reward=0.7670745 (423.15 it/sec) -training >> step=494700, episode=83 reward=0.7673177 (468.95 it/sec) -training >> step=494800, episode=83 reward=0.7508447 (477.95 it/sec) -training >> step=494900, episode=83 reward=0.7284485 (490.52 it/sec) -training >> step=495000, episode=83 reward=0.7784497 (532.71 it/sec) -training >> step=495100, episode=83 reward=0.7521034 (460.00 it/sec) -training >> step=495200, episode=83 reward=0.7778033 (455.57 it/sec) -training >> step=495300, episode=83 reward=0.759551 (455.02 it/sec) -training >> step=495400, episode=83 reward=0.7615969 (459.62 it/sec) -training >> step=495500, episode=83 reward=0.7449715 (478.70 it/sec) -training >> step=495600, episode=83 reward=0.7793986 (405.48 it/sec) -training >> step=495700, episode=83 reward=0.7537598 (498.60 it/sec) -training >> step=495800, episode=83 reward=0.7779703 (547.22 it/sec) -training >> step=495900, episode=83 reward=0.774614 (488.40 it/sec) -training >> step=496000, episode=83 reward=0.7605222 (479.93 it/sec) -training >> step=496100, episode=83 reward=0.7648928 (488.22 it/sec) -training >> step=496200, episode=83 reward=0.7465308 (514.64 it/sec) -training >> step=496300, episode=83 reward=0.7660692 (530.12 it/sec) -training >> step=496400, episode=83 reward=0.7704992 (507.13 it/sec) -training >> step=496500, episode=83 reward=0.7460225 (445.70 it/sec) -training >> step=496600, episode=83 reward=0.749598 (513.26 it/sec) -training >> step=496700, episode=83 reward=0.7664887 (503.36 it/sec) -training >> step=496800, episode=83 reward=0.7749025 (522.10 it/sec) -training >> step=496900, episode=83 reward=0.7519934 (513.76 it/sec) -training >> step=497000, episode=83 reward=0.7585793 (507.90 it/sec) -training >> step=497100, episode=83 reward=0.7774929 (457.01 it/sec) -training >> step=497200, episode=83 reward=0.7339678 (541.11 it/sec) -training >> step=497300, episode=84 reward=0.7648158 (167.46 it/sec) -training >> step=497400, episode=84 reward=0.7667688 (448.63 it/sec) -training >> step=497500, episode=84 reward=0.7622635 (504.39 it/sec) -training >> step=497600, episode=84 reward=0.7439634 (498.37 it/sec) -training >> step=497700, episode=84 reward=0.749771 (483.16 it/sec) -training >> step=497800, episode=84 reward=0.7797725 (544.23 it/sec) -training >> step=497900, episode=84 reward=0.7715288 (449.86 it/sec) -training >> step=498000, episode=84 reward=0.7481241 (393.70 it/sec) -training >> step=498100, episode=84 reward=0.7627822 (486.72 it/sec) -training >> step=498200, episode=84 reward=0.7855064 (512.61 it/sec) -training >> step=498300, episode=84 reward=0.7668085 (516.73 it/sec) -training >> step=498400, episode=84 reward=0.7602323 (485.65 it/sec) -training >> step=498500, episode=84 reward=0.7862921 (505.02 it/sec) -training >> step=498600, episode=84 reward=0.7533165 (487.53 it/sec) -training >> step=498700, episode=84 reward=0.755179 (516.30 it/sec) -training >> step=498800, episode=84 reward=0.7471567 (544.65 it/sec) -training >> step=498900, episode=84 reward=0.78127 (462.74 it/sec) -training >> step=499000, episode=84 reward=0.7497758 (497.91 it/sec) -training >> step=499100, episode=84 reward=0.7587015 (523.64 it/sec) -training >> step=499200, episode=84 reward=0.7589802 (492.18 it/sec) -training >> step=499300, episode=84 reward=0.763072 (539.90 it/sec) -training >> step=499400, episode=84 reward=0.758375 (492.70 it/sec) -training >> step=499500, episode=84 reward=0.7619414 (504.40 it/sec) -training >> step=499600, episode=84 reward=0.7361956 (537.77 it/sec) -training >> step=499700, episode=84 reward=0.7821466 (503.02 it/sec) -training >> step=499800, episode=84 reward=0.7749215 (531.98 it/sec) -training >> step=499900, episode=84 reward=0.7592461 (474.81 it/sec) -training >> step=500000, episode=84 reward=0.7628006 (495.80 it/sec) -training >> step=500100, episode=84 reward=0.7545558 (539.66 it/sec) -training >> step=500200, episode=84 reward=0.7479849 (511.04 it/sec) -training >> step=500300, episode=84 reward=0.7686255 (500.27 it/sec) -training >> step=500400, episode=84 reward=0.7500193 (517.81 it/sec) -training >> step=500500, episode=84 reward=0.761755 (462.90 it/sec) -training >> step=500600, episode=84 reward=0.76912 (550.53 it/sec) -training >> step=500700, episode=84 reward=0.7624349 (517.62 it/sec) -training >> step=500800, episode=84 reward=0.767296 (507.74 it/sec) -training >> step=500900, episode=84 reward=0.7689247 (532.84 it/sec) -training >> step=501000, episode=84 reward=0.7682974 (488.56 it/sec) -training >> step=501100, episode=84 reward=0.7692353 (524.18 it/sec) -training >> step=501200, episode=84 reward=0.75971 (516.91 it/sec) -training >> step=501300, episode=84 reward=0.7486982 (479.04 it/sec) -training >> step=501400, episode=84 reward=0.7659246 (507.04 it/sec) -training >> step=501500, episode=84 reward=0.7609881 (467.37 it/sec) -training >> step=501600, episode=84 reward=0.7490283 (516.40 it/sec) -training >> step=501700, episode=84 reward=0.7687877 (524.95 it/sec) -training >> step=501800, episode=84 reward=0.7544733 (502.36 it/sec) -training >> step=501900, episode=84 reward=0.7597609 (509.56 it/sec) -training >> step=502000, episode=84 reward=0.7434283 (477.69 it/sec) -training >> step=502100, episode=84 reward=0.7798253 (521.60 it/sec) -training >> step=502200, episode=84 reward=0.7659982 (508.59 it/sec) -training >> step=502300, episode=84 reward=0.7879685 (506.25 it/sec) -training >> step=502400, episode=84 reward=0.7518643 (492.87 it/sec) -training >> step=502500, episode=84 reward=0.7547451 (525.82 it/sec) -training >> step=502600, episode=84 reward=0.7678857 (463.82 it/sec) -training >> step=502700, episode=84 reward=0.7471456 (525.72 it/sec) -training >> step=502800, episode=84 reward=0.7485436 (517.08 it/sec) -training >> step=502900, episode=84 reward=0.7677927 (513.00 it/sec) -training >> step=503000, episode=84 reward=0.7334698 (539.04 it/sec) -training >> step=503100, episode=84 reward=0.7535969 (475.84 it/sec) -training >> step=503200, episode=84 reward=0.7286397 (542.58 it/sec) -training >> step=503300, episode=85 reward=0.7711352 (118.37 it/sec) -training >> step=503400, episode=85 reward=0.7646357 (498.67 it/sec) -training >> step=503500, episode=85 reward=0.7568177 (499.12 it/sec) -training >> step=503600, episode=85 reward=0.7627848 (522.01 it/sec) -training >> step=503700, episode=85 reward=0.7645782 (463.54 it/sec) -training >> step=503800, episode=85 reward=0.7653207 (491.25 it/sec) -training >> step=503900, episode=85 reward=0.7520995 (480.04 it/sec) -training >> step=504000, episode=85 reward=0.7611898 (522.49 it/sec) -training >> step=504100, episode=85 reward=0.7703385 (411.36 it/sec) -training >> step=504200, episode=85 reward=0.7655885 (499.67 it/sec) -training >> step=504300, episode=85 reward=0.7482173 (499.75 it/sec) -training >> step=504400, episode=85 reward=0.7691526 (522.82 it/sec) -training >> step=504500, episode=85 reward=0.7658482 (507.34 it/sec) -training >> step=504600, episode=85 reward=0.7781922 (507.54 it/sec) -training >> step=504700, episode=85 reward=0.769356 (501.34 it/sec) -training >> step=504800, episode=85 reward=0.7849075 (490.25 it/sec) -training >> step=504900, episode=85 reward=0.7637278 (504.52 it/sec) -training >> step=505000, episode=85 reward=0.756305 (523.23 it/sec) -training >> step=505100, episode=85 reward=0.7636244 (520.40 it/sec) -training >> step=505200, episode=85 reward=0.7624106 (549.21 it/sec) -training >> step=505300, episode=85 reward=0.7592877 (460.43 it/sec) -training >> step=505400, episode=85 reward=0.7764342 (479.65 it/sec) -training >> step=505500, episode=85 reward=0.747301 (492.67 it/sec) -training >> step=505600, episode=85 reward=0.735513 (503.38 it/sec) -training >> step=505700, episode=85 reward=0.7652655 (535.74 it/sec) -training >> step=505800, episode=85 reward=0.7839413 (477.91 it/sec) -training >> step=505900, episode=85 reward=0.7569173 (484.99 it/sec) -training >> step=506000, episode=85 reward=0.7567143 (503.67 it/sec) -training >> step=506100, episode=85 reward=0.7511145 (485.48 it/sec) -training >> step=506200, episode=85 reward=0.7671703 (529.85 it/sec) -training >> step=506300, episode=85 reward=0.753795 (498.76 it/sec) -training >> step=506400, episode=85 reward=0.7740122 (476.56 it/sec) -training >> step=506500, episode=85 reward=0.7475849 (513.50 it/sec) -training >> step=506600, episode=85 reward=0.7392281 (483.78 it/sec) -training >> step=506700, episode=85 reward=0.7489423 (510.04 it/sec) -training >> step=506800, episode=85 reward=0.7643019 (509.00 it/sec) -training >> step=506900, episode=85 reward=0.7513217 (415.29 it/sec) -training >> step=507000, episode=85 reward=0.7614337 (477.98 it/sec) -training >> step=507100, episode=85 reward=0.7322049 (456.05 it/sec) -training >> step=507200, episode=85 reward=0.7391199 (483.59 it/sec) -training >> step=507300, episode=85 reward=0.7655797 (476.26 it/sec) -training >> step=507400, episode=85 reward=0.7457655 (493.26 it/sec) -training >> step=507500, episode=85 reward=0.7615972 (485.68 it/sec) -training >> step=507600, episode=85 reward=0.7460759 (471.61 it/sec) -training >> step=507700, episode=85 reward=0.7302181 (434.75 it/sec) -training >> step=507800, episode=85 reward=0.7658386 (435.46 it/sec) -training >> step=507900, episode=85 reward=0.7558693 (483.95 it/sec) -training >> step=508000, episode=85 reward=0.7573197 (451.58 it/sec) -training >> step=508100, episode=85 reward=0.7722377 (518.06 it/sec) -training >> step=508200, episode=85 reward=0.736018 (426.03 it/sec) -training >> step=508300, episode=85 reward=0.7723209 (476.24 it/sec) -training >> step=508400, episode=85 reward=0.7425045 (508.64 it/sec) -training >> step=508500, episode=85 reward=0.7766924 (487.90 it/sec) -training >> step=508600, episode=85 reward=0.7421013 (462.62 it/sec) -training >> step=508700, episode=85 reward=0.7416116 (536.56 it/sec) -training >> step=508800, episode=85 reward=0.7702401 (459.00 it/sec) -training >> step=508900, episode=85 reward=0.7641107 (536.57 it/sec) -training >> step=509000, episode=85 reward=0.7677242 (522.97 it/sec) -training >> step=509100, episode=85 reward=0.7444866 (486.70 it/sec) -training >> step=509200, episode=85 reward=0.7352194 (505.79 it/sec) -training >> step=509300, episode=86 reward=0.7659701 (144.24 it/sec) -training >> step=509400, episode=86 reward=0.757031 (518.28 it/sec) -training >> step=509500, episode=86 reward=0.7596018 (466.78 it/sec) -training >> step=509600, episode=86 reward=0.7598238 (468.70 it/sec) -training >> step=509700, episode=86 reward=0.7686387 (431.16 it/sec) -training >> step=509800, episode=86 reward=0.7755648 (452.71 it/sec) -training >> step=509900, episode=86 reward=0.7610319 (475.68 it/sec) -training >> step=510000, episode=86 reward=0.753969 (387.59 it/sec) -training >> step=510100, episode=86 reward=0.7580177 (517.87 it/sec) -training >> step=510200, episode=86 reward=0.7584757 (374.81 it/sec) -training >> step=510300, episode=86 reward=0.7600541 (524.76 it/sec) -training >> step=510400, episode=86 reward=0.7615247 (493.24 it/sec) -training >> step=510500, episode=86 reward=0.7423832 (419.78 it/sec) -training >> step=510600, episode=86 reward=0.7631685 (496.16 it/sec) -training >> step=510700, episode=86 reward=0.7513998 (530.17 it/sec) -training >> step=510800, episode=86 reward=0.7726822 (549.89 it/sec) -training >> step=510900, episode=86 reward=0.754302 (504.83 it/sec) -training >> step=511000, episode=86 reward=0.769244 (561.72 it/sec) -training >> step=511100, episode=86 reward=0.77381 (522.32 it/sec) -training >> step=511200, episode=86 reward=0.7505116 (501.48 it/sec) -training >> step=511300, episode=86 reward=0.7667465 (498.22 it/sec) -training >> step=511400, episode=86 reward=0.7529145 (472.32 it/sec) -training >> step=511500, episode=86 reward=0.7730111 (506.83 it/sec) -training >> step=511600, episode=86 reward=0.7654105 (532.74 it/sec) -training >> step=511700, episode=86 reward=0.7514394 (525.62 it/sec) -training >> step=511800, episode=86 reward=0.7786292 (554.07 it/sec) -training >> step=511900, episode=86 reward=0.7699886 (503.68 it/sec) -training >> step=512000, episode=86 reward=0.7682506 (501.86 it/sec) -training >> step=512100, episode=86 reward=0.7433329 (477.06 it/sec) -training >> step=512200, episode=86 reward=0.7495266 (533.91 it/sec) -training >> step=512300, episode=86 reward=0.764231 (547.30 it/sec) -training >> step=512400, episode=86 reward=0.7509467 (542.74 it/sec) -training >> step=512500, episode=86 reward=0.7840439 (458.44 it/sec) -training >> step=512600, episode=86 reward=0.7699822 (549.64 it/sec) -training >> step=512700, episode=86 reward=0.7828493 (513.34 it/sec) -training >> step=512800, episode=86 reward=0.758285 (560.96 it/sec) -training >> step=512900, episode=86 reward=0.7426615 (542.95 it/sec) -training >> step=513000, episode=86 reward=0.7686514 (498.71 it/sec) -training >> step=513100, episode=86 reward=0.7684181 (540.80 it/sec) -training >> step=513200, episode=86 reward=0.7563233 (539.20 it/sec) -training >> step=513300, episode=86 reward=0.7638375 (539.61 it/sec) -training >> step=513400, episode=86 reward=0.7592499 (518.74 it/sec) -training >> step=513500, episode=86 reward=0.7634143 (510.57 it/sec) -training >> step=513600, episode=86 reward=0.7583425 (535.34 it/sec) -training >> step=513700, episode=86 reward=0.7510731 (488.10 it/sec) -training >> step=513800, episode=86 reward=0.76388 (541.05 it/sec) -training >> step=513900, episode=86 reward=0.7508512 (544.69 it/sec) -training >> step=514000, episode=86 reward=0.7483851 (499.45 it/sec) -training >> step=514100, episode=86 reward=0.7687936 (548.85 it/sec) -training >> step=514200, episode=86 reward=0.7493123 (510.88 it/sec) -training >> step=514300, episode=86 reward=0.7340497 (503.63 it/sec) -training >> step=514400, episode=86 reward=0.7634808 (535.12 it/sec) -training >> step=514500, episode=86 reward=0.7454941 (410.80 it/sec) -training >> step=514600, episode=86 reward=0.7445577 (435.27 it/sec) -training >> step=514700, episode=86 reward=0.771587 (433.60 it/sec) -training >> step=514800, episode=86 reward=0.7613665 (449.75 it/sec) -training >> step=514900, episode=86 reward=0.7805684 (512.69 it/sec) -training >> step=515000, episode=86 reward=0.7432637 (489.89 it/sec) -training >> step=515100, episode=86 reward=0.7568007 (483.48 it/sec) -training >> step=515200, episode=86 reward=0.749501 (444.54 it/sec) -training >> step=515300, episode=87 reward=0.7734834 (87.78 it/sec) -training >> step=515400, episode=87 reward=0.7528521 (548.00 it/sec) -training >> step=515500, episode=87 reward=0.7267079 (533.63 it/sec) -training >> step=515600, episode=87 reward=0.7738257 (493.24 it/sec) -training >> step=515700, episode=87 reward=0.7487843 (469.76 it/sec) -training >> step=515800, episode=87 reward=0.7755269 (484.98 it/sec) -training >> step=515900, episode=87 reward=0.7716984 (511.08 it/sec) -training >> step=516000, episode=87 reward=0.7684386 (535.43 it/sec) -training >> step=516100, episode=87 reward=0.760604 (493.27 it/sec) -training >> step=516200, episode=87 reward=0.7602035 (461.04 it/sec) -training >> step=516300, episode=87 reward=0.7591018 (494.09 it/sec) -training >> step=516400, episode=87 reward=0.7441925 (355.00 it/sec) -training >> step=516500, episode=87 reward=0.7547672 (518.56 it/sec) -training >> step=516600, episode=87 reward=0.776341 (472.18 it/sec) -training >> step=516700, episode=87 reward=0.7558225 (501.22 it/sec) -training >> step=516800, episode=87 reward=0.7617381 (507.78 it/sec) -training >> step=516900, episode=87 reward=0.7559882 (496.43 it/sec) -training >> step=517000, episode=87 reward=0.7680134 (529.39 it/sec) -training >> step=517100, episode=87 reward=0.7585185 (518.49 it/sec) -training >> step=517200, episode=87 reward=0.7503639 (514.68 it/sec) -training >> step=517300, episode=87 reward=0.762606 (447.43 it/sec) -training >> step=517400, episode=87 reward=0.7587732 (534.76 it/sec) -training >> step=517500, episode=87 reward=0.7263049 (506.86 it/sec) -training >> step=517600, episode=87 reward=0.7711266 (537.27 it/sec) -training >> step=517700, episode=87 reward=0.7514821 (513.97 it/sec) -training >> step=517800, episode=87 reward=0.7696567 (463.57 it/sec) -training >> step=517900, episode=87 reward=0.754988 (491.40 it/sec) -training >> step=518000, episode=87 reward=0.7669072 (507.22 it/sec) -training >> step=518100, episode=87 reward=0.766278 (526.09 it/sec) -training >> step=518200, episode=87 reward=0.7556897 (544.84 it/sec) -training >> step=518300, episode=87 reward=0.7551594 (476.88 it/sec) -training >> step=518400, episode=87 reward=0.7656519 (502.80 it/sec) -training >> step=518500, episode=87 reward=0.7844939 (531.18 it/sec) -training >> step=518600, episode=87 reward=0.7731413 (493.11 it/sec) -training >> step=518700, episode=87 reward=0.752432 (529.32 it/sec) -training >> step=518800, episode=87 reward=0.7465408 (457.98 it/sec) -training >> step=518900, episode=87 reward=0.7748205 (499.15 it/sec) -training >> step=519000, episode=87 reward=0.7645464 (505.40 it/sec) -training >> step=519100, episode=87 reward=0.7640316 (485.83 it/sec) -training >> step=519200, episode=87 reward=0.7617928 (541.47 it/sec) -training >> step=519300, episode=87 reward=0.7553685 (521.98 it/sec) -training >> step=519400, episode=87 reward=0.7663659 (468.11 it/sec) -training >> step=519500, episode=87 reward=0.7725062 (526.00 it/sec) -training >> step=519600, episode=87 reward=0.7530754 (537.49 it/sec) -training >> step=519700, episode=87 reward=0.7552092 (517.27 it/sec) -training >> step=519800, episode=87 reward=0.7605948 (506.96 it/sec) -training >> step=519900, episode=87 reward=0.7690021 (461.99 it/sec) -training >> step=520000, episode=87 reward=0.7598516 (525.56 it/sec) -training >> step=520100, episode=87 reward=0.762008 (540.64 it/sec) -training >> step=520200, episode=87 reward=0.7540752 (517.12 it/sec) -training >> step=520300, episode=87 reward=0.7493878 (535.88 it/sec) -training >> step=520400, episode=87 reward=0.7534029 (473.43 it/sec) -training >> step=520500, episode=87 reward=0.7609474 (492.07 it/sec) -training >> step=520600, episode=87 reward=0.7499338 (526.97 it/sec) -training >> step=520700, episode=87 reward=0.7584456 (492.58 it/sec) -training >> step=520800, episode=87 reward=0.7393247 (507.76 it/sec) -training >> step=520900, episode=87 reward=0.7493356 (423.47 it/sec) -training >> step=521000, episode=87 reward=0.7504407 (531.25 it/sec) -training >> step=521100, episode=87 reward=0.7515413 (525.82 it/sec) -training >> step=521200, episode=87 reward=0.7690414 (489.81 it/sec) -training >> step=521300, episode=88 reward=0.7642692 (203.53 it/sec) -training >> step=521400, episode=88 reward=0.767395 (495.33 it/sec) -training >> step=521500, episode=88 reward=0.7647581 (446.88 it/sec) -training >> step=521600, episode=88 reward=0.7401544 (531.18 it/sec) -training >> step=521700, episode=88 reward=0.747726 (536.64 it/sec) -training >> step=521800, episode=88 reward=0.7717413 (452.88 it/sec) -training >> step=521900, episode=88 reward=0.7552871 (523.18 it/sec) -training >> step=522000, episode=88 reward=0.7642332 (503.91 it/sec) -training >> step=522100, episode=88 reward=0.748461 (524.05 it/sec) -training >> step=522200, episode=88 reward=0.7598082 (513.39 it/sec) -training >> step=522300, episode=88 reward=0.7547654 (435.93 it/sec) -training >> step=522400, episode=88 reward=0.7828981 (408.73 it/sec) -training >> step=522500, episode=88 reward=0.7403116 (330.13 it/sec) -training >> step=522600, episode=88 reward=0.7547606 (360.86 it/sec) -training >> step=522700, episode=88 reward=0.7516594 (312.98 it/sec) -training >> step=522800, episode=88 reward=0.7749768 (482.74 it/sec) -training >> step=522900, episode=88 reward=0.7704499 (496.13 it/sec) -training >> step=523000, episode=88 reward=0.7546554 (526.65 it/sec) -training >> step=523100, episode=88 reward=0.7686636 (479.44 it/sec) -training >> step=523200, episode=88 reward=0.760598 (500.65 it/sec) -training >> step=523300, episode=88 reward=0.7658275 (482.57 it/sec) -training >> step=523400, episode=88 reward=0.7765012 (457.54 it/sec) -training >> step=523500, episode=88 reward=0.7492387 (486.00 it/sec) -training >> step=523600, episode=88 reward=0.7689057 (504.55 it/sec) -training >> step=523700, episode=88 reward=0.7446606 (496.01 it/sec) -training >> step=523800, episode=88 reward=0.7726409 (548.91 it/sec) -training >> step=523900, episode=88 reward=0.7735046 (553.80 it/sec) -training >> step=524000, episode=88 reward=0.7721992 (521.80 it/sec) -training >> step=524100, episode=88 reward=0.769794 (504.07 it/sec) -training >> step=524200, episode=88 reward=0.7540267 (398.36 it/sec) -training >> step=524300, episode=88 reward=0.7518641 (516.37 it/sec) -training >> step=524400, episode=88 reward=0.7836667 (507.69 it/sec) -training >> step=524500, episode=88 reward=0.7610692 (466.53 it/sec) -training >> step=524600, episode=88 reward=0.7683164 (529.68 it/sec) -training >> step=524700, episode=88 reward=0.7837096 (479.32 it/sec) -training >> step=524800, episode=88 reward=0.754674 (538.44 it/sec) -training >> step=524900, episode=88 reward=0.7573014 (523.86 it/sec) -training >> step=525000, episode=88 reward=0.7447636 (496.97 it/sec) -training >> step=525100, episode=88 reward=0.7548955 (517.36 it/sec) -training >> step=525200, episode=88 reward=0.7640278 (513.88 it/sec) -training >> step=525300, episode=88 reward=0.7518461 (489.34 it/sec) -training >> step=525400, episode=88 reward=0.7699355 (536.49 it/sec) -training >> step=525500, episode=88 reward=0.7567282 (545.47 it/sec) -training >> step=525600, episode=88 reward=0.7389382 (511.26 it/sec) -training >> step=525700, episode=88 reward=0.7383863 (505.37 it/sec) -training >> step=525800, episode=88 reward=0.7725343 (511.46 it/sec) -training >> step=525900, episode=88 reward=0.7764844 (527.97 it/sec) -training >> step=526000, episode=88 reward=0.7633177 (509.38 it/sec) -training >> step=526100, episode=88 reward=0.7760615 (523.64 it/sec) -training >> step=526200, episode=88 reward=0.7630226 (522.92 it/sec) -training >> step=526300, episode=88 reward=0.7622393 (501.61 it/sec) -training >> step=526400, episode=88 reward=0.7210398 (556.34 it/sec) -training >> step=526500, episode=88 reward=0.7510351 (547.48 it/sec) -training >> step=526600, episode=88 reward=0.7515153 (521.42 it/sec) -training >> step=526700, episode=88 reward=0.7673087 (495.03 it/sec) -training >> step=526800, episode=88 reward=0.7733688 (398.65 it/sec) -training >> step=526900, episode=88 reward=0.7412841 (485.75 it/sec) -training >> step=527000, episode=88 reward=0.7829865 (455.05 it/sec) -training >> step=527100, episode=88 reward=0.7367319 (512.44 it/sec) -training >> step=527200, episode=88 reward=0.744792 (561.89 it/sec) -training >> step=527300, episode=89 reward=0.7741132 (181.16 it/sec) -training >> step=527400, episode=89 reward=0.7773979 (452.26 it/sec) -training >> step=527500, episode=89 reward=0.7504358 (471.79 it/sec) -training >> step=527600, episode=89 reward=0.7532192 (480.93 it/sec) -training >> step=527700, episode=89 reward=0.7814534 (467.46 it/sec) -training >> step=527800, episode=89 reward=0.7554155 (526.39 it/sec) -training >> step=527900, episode=89 reward=0.7594213 (572.62 it/sec) -training >> step=528000, episode=89 reward=0.731704 (531.37 it/sec) -training >> step=528100, episode=89 reward=0.7436569 (508.45 it/sec) -training >> step=528200, episode=89 reward=0.7554933 (473.91 it/sec) -training >> step=528300, episode=89 reward=0.771972 (496.25 it/sec) -training >> step=528400, episode=89 reward=0.7732658 (568.08 it/sec) -training >> step=528500, episode=89 reward=0.7576838 (553.00 it/sec) -training >> step=528600, episode=89 reward=0.770624 (546.33 it/sec) -training >> step=528700, episode=89 reward=0.7614094 (599.94 it/sec) -training >> step=528800, episode=89 reward=0.7401093 (499.89 it/sec) -training >> step=528900, episode=89 reward=0.787363 (573.15 it/sec) -training >> step=529000, episode=89 reward=0.8083127 (417.74 it/sec) -training >> step=529100, episode=89 reward=0.7777995 (558.24 it/sec) -training >> step=529200, episode=89 reward=0.7623092 (601.57 it/sec) -training >> step=529300, episode=89 reward=0.7670881 (543.45 it/sec) -training >> step=529400, episode=89 reward=0.7693937 (570.98 it/sec) -training >> step=529500, episode=89 reward=0.7468446 (599.69 it/sec) -training >> step=529600, episode=89 reward=0.7639132 (544.00 it/sec) -training >> step=529700, episode=89 reward=0.773143 (566.95 it/sec) -training >> step=529800, episode=89 reward=0.7420391 (564.18 it/sec) -training >> step=529900, episode=89 reward=0.7772533 (524.70 it/sec) -training >> step=530000, episode=89 reward=0.7681358 (566.46 it/sec) -training >> step=530100, episode=89 reward=0.7507722 (567.03 it/sec) -training >> step=530200, episode=89 reward=0.7681803 (564.57 it/sec) -training >> step=530300, episode=89 reward=0.7775201 (590.69 it/sec) -training >> step=530400, episode=89 reward=0.7664079 (546.98 it/sec) -training >> step=530500, episode=89 reward=0.7565956 (558.26 it/sec) -training >> step=530600, episode=89 reward=0.7889519 (562.54 it/sec) -training >> step=530700, episode=89 reward=0.7657385 (561.80 it/sec) -training >> step=530800, episode=89 reward=0.768276 (577.69 it/sec) -training >> step=530900, episode=89 reward=0.7662075 (552.61 it/sec) -training >> step=531000, episode=89 reward=0.7471466 (563.01 it/sec) -training >> step=531100, episode=89 reward=0.7549744 (521.85 it/sec) -training >> step=531200, episode=89 reward=0.7714651 (577.83 it/sec) -training >> step=531300, episode=89 reward=0.7709792 (573.11 it/sec) -training >> step=531400, episode=89 reward=0.7527901 (582.06 it/sec) -training >> step=531500, episode=89 reward=0.7557889 (578.16 it/sec) -training >> step=531600, episode=89 reward=0.749857 (573.90 it/sec) -training >> step=531700, episode=89 reward=0.7451897 (532.81 it/sec) -training >> step=531800, episode=89 reward=0.7790995 (595.90 it/sec) -training >> step=531900, episode=89 reward=0.762895 (563.12 it/sec) -training >> step=532000, episode=89 reward=0.7358004 (560.97 it/sec) -training >> step=532100, episode=89 reward=0.7767103 (603.19 it/sec) -training >> step=532200, episode=89 reward=0.7310184 (561.46 it/sec) -training >> step=532300, episode=89 reward=0.738953 (553.77 it/sec) -training >> step=532400, episode=89 reward=0.7692006 (567.44 it/sec) -training >> step=532500, episode=89 reward=0.7527943 (549.74 it/sec) -training >> step=532600, episode=89 reward=0.7408224 (598.67 it/sec) -training >> step=532700, episode=89 reward=0.7467571 (554.85 it/sec) -training >> step=532800, episode=89 reward=0.7613515 (505.07 it/sec) -training >> step=532900, episode=89 reward=0.7498677 (564.89 it/sec) -training >> step=533000, episode=89 reward=0.7509266 (529.61 it/sec) -training >> step=533100, episode=89 reward=0.7557409 (556.56 it/sec) -training >> step=533200, episode=89 reward=0.7443852 (581.79 it/sec) -training >> step=533300, episode=90 reward=0.7582086 (189.46 it/sec) -training >> step=533400, episode=90 reward=0.7694 (569.07 it/sec) -training >> step=533500, episode=90 reward=0.7535934 (540.87 it/sec) -training >> step=533600, episode=90 reward=0.7482809 (532.91 it/sec) -training >> step=533700, episode=90 reward=0.7538832 (585.60 it/sec) -training >> step=533800, episode=90 reward=0.7604434 (567.57 it/sec) -training >> step=533900, episode=90 reward=0.7445393 (566.33 it/sec) -training >> step=534000, episode=90 reward=0.7604303 (567.90 it/sec) -training >> step=534100, episode=90 reward=0.7453781 (562.67 it/sec) -training >> step=534200, episode=90 reward=0.7632793 (589.95 it/sec) -training >> step=534300, episode=90 reward=0.7559106 (563.86 it/sec) -training >> step=534400, episode=90 reward=0.7737749 (551.09 it/sec) -training >> step=534500, episode=90 reward=0.7641437 (598.50 it/sec) -training >> step=534600, episode=90 reward=0.7746158 (546.81 it/sec) -training >> step=534700, episode=90 reward=0.7614601 (577.96 it/sec) -training >> step=534800, episode=90 reward=0.7560327 (599.04 it/sec) -training >> step=534900, episode=90 reward=0.7735586 (530.93 it/sec) -training >> step=535000, episode=90 reward=0.7751571 (576.50 it/sec) -training >> step=535100, episode=90 reward=0.7569276 (386.26 it/sec) -training >> step=535200, episode=90 reward=0.7981875 (559.48 it/sec) -training >> step=535300, episode=90 reward=0.7532156 (573.25 it/sec) -training >> step=535400, episode=90 reward=0.7591126 (530.47 it/sec) -training >> step=535500, episode=90 reward=0.7579651 (487.73 it/sec) -training >> step=535600, episode=90 reward=0.7911605 (566.01 it/sec) -training >> step=535700, episode=90 reward=0.76171 (545.59 it/sec) -training >> step=535800, episode=90 reward=0.768051 (569.40 it/sec) -training >> step=535900, episode=90 reward=0.7516665 (556.14 it/sec) -training >> step=536000, episode=90 reward=0.7686836 (526.49 it/sec) -training >> step=536100, episode=90 reward=0.7545209 (532.74 it/sec) -training >> step=536200, episode=90 reward=0.7750648 (480.86 it/sec) -training >> step=536300, episode=90 reward=0.7599196 (535.96 it/sec) -training >> step=536400, episode=90 reward=0.7499734 (566.05 it/sec) -training >> step=536500, episode=90 reward=0.7571493 (530.64 it/sec) -training >> step=536600, episode=90 reward=0.7704959 (512.32 it/sec) -training >> step=536700, episode=90 reward=0.7776774 (545.73 it/sec) -training >> step=536800, episode=90 reward=0.7878906 (498.03 it/sec) -training >> step=536900, episode=90 reward=0.7640479 (572.66 it/sec) -training >> step=537000, episode=90 reward=0.7739668 (536.69 it/sec) -training >> step=537100, episode=90 reward=0.7652218 (517.66 it/sec) -training >> step=537200, episode=90 reward=0.7678931 (556.18 it/sec) -training >> step=537300, episode=90 reward=0.7471914 (507.88 it/sec) -training >> step=537400, episode=90 reward=0.77246 (528.26 it/sec) -training >> step=537500, episode=90 reward=0.7598206 (545.25 it/sec) -training >> step=537600, episode=90 reward=0.7373804 (545.20 it/sec) -training >> step=537700, episode=90 reward=0.7621243 (525.05 it/sec) -training >> step=537800, episode=90 reward=0.743407 (508.32 it/sec) -training >> step=537900, episode=90 reward=0.7497974 (525.42 it/sec) -training >> step=538000, episode=90 reward=0.7582282 (560.01 it/sec) -training >> step=538100, episode=90 reward=0.7598181 (520.76 it/sec) -training >> step=538200, episode=90 reward=0.7493184 (540.63 it/sec) -training >> step=538300, episode=90 reward=0.7571819 (508.60 it/sec) -training >> step=538400, episode=90 reward=0.7479987 (540.30 it/sec) -training >> step=538500, episode=90 reward=0.7453418 (565.41 it/sec) -training >> step=538600, episode=90 reward=0.7495065 (527.49 it/sec) -training >> step=538700, episode=90 reward=0.7453001 (546.60 it/sec) -training >> step=538800, episode=90 reward=0.7506353 (500.69 it/sec) -training >> step=538900, episode=90 reward=0.7417692 (512.73 it/sec) -training >> step=539000, episode=90 reward=0.7479056 (566.86 it/sec) -training >> step=539100, episode=90 reward=0.7723175 (543.06 it/sec) -training >> step=539200, episode=90 reward=0.763054 (548.24 it/sec) -training >> step=539300, episode=91 reward=0.725089 (149.30 it/sec) -training >> step=539400, episode=91 reward=0.7479209 (524.96 it/sec) -training >> step=539500, episode=91 reward=0.7571796 (568.35 it/sec) -training >> step=539600, episode=91 reward=0.7549525 (542.45 it/sec) -training >> step=539700, episode=91 reward=0.7669529 (542.39 it/sec) -training >> step=539800, episode=91 reward=0.7643476 (564.38 it/sec) -training >> step=539900, episode=91 reward=0.7714311 (539.34 it/sec) -training >> step=540000, episode=91 reward=0.7497576 (556.95 it/sec) -training >> step=540100, episode=91 reward=0.7481954 (547.52 it/sec) -training >> step=540200, episode=91 reward=0.7680593 (530.50 it/sec) -training >> step=540300, episode=91 reward=0.7684464 (528.16 it/sec) -training >> step=540400, episode=91 reward=0.7455053 (548.01 it/sec) -training >> step=540500, episode=91 reward=0.7511352 (479.14 it/sec) -training >> step=540600, episode=91 reward=0.7469109 (519.29 it/sec) -training >> step=540700, episode=91 reward=0.7546826 (517.87 it/sec) -training >> step=540800, episode=91 reward=0.7465892 (525.89 it/sec) -training >> step=540900, episode=91 reward=0.7478327 (550.25 it/sec) -training >> step=541000, episode=91 reward=0.7540072 (527.36 it/sec) -training >> step=541100, episode=91 reward=0.7495028 (549.83 it/sec) -training >> step=541200, episode=91 reward=0.7636819 (570.48 it/sec) -training >> step=541300, episode=91 reward=0.7625685 (387.99 it/sec) -training >> step=541400, episode=91 reward=0.7359007 (583.48 it/sec) -training >> step=541500, episode=91 reward=0.7430405 (529.75 it/sec) -training >> step=541600, episode=91 reward=0.7730246 (555.42 it/sec) -training >> step=541700, episode=91 reward=0.7688051 (575.60 it/sec) -training >> step=541800, episode=91 reward=0.76881 (530.79 it/sec) -training >> step=541900, episode=91 reward=0.7812372 (555.45 it/sec) -training >> step=542000, episode=91 reward=0.7654917 (505.81 it/sec) -training >> step=542100, episode=91 reward=0.7778106 (547.93 it/sec) -training >> step=542200, episode=91 reward=0.7552068 (571.88 it/sec) -training >> step=542300, episode=91 reward=0.7582534 (540.72 it/sec) -training >> step=542400, episode=91 reward=0.784193 (505.84 it/sec) -training >> step=542500, episode=91 reward=0.7539482 (534.01 it/sec) -training >> step=542600, episode=91 reward=0.7777706 (515.55 it/sec) -training >> step=542700, episode=91 reward=0.7662764 (558.27 it/sec) -training >> step=542800, episode=91 reward=0.7582111 (561.31 it/sec) -training >> step=542900, episode=91 reward=0.7798194 (530.34 it/sec) -training >> step=543000, episode=91 reward=0.7569857 (568.23 it/sec) -training >> step=543100, episode=91 reward=0.7575307 (537.81 it/sec) -training >> step=543200, episode=91 reward=0.768486 (540.85 it/sec) -training >> step=543300, episode=91 reward=0.7406216 (543.13 it/sec) -training >> step=543400, episode=91 reward=0.7514184 (543.34 it/sec) -training >> step=543500, episode=91 reward=0.758819 (555.00 it/sec) -training >> step=543600, episode=91 reward=0.7423624 (518.82 it/sec) -training >> step=543700, episode=91 reward=0.7450381 (516.40 it/sec) -training >> step=543800, episode=91 reward=0.758938 (553.09 it/sec) -training >> step=543900, episode=91 reward=0.7379565 (546.23 it/sec) -training >> step=544000, episode=91 reward=0.7272542 (543.73 it/sec) -training >> step=544100, episode=91 reward=0.7574053 (527.94 it/sec) -training >> step=544200, episode=91 reward=0.760947 (513.47 it/sec) -training >> step=544300, episode=91 reward=0.7453809 (554.65 it/sec) -training >> step=544400, episode=91 reward=0.7681138 (492.44 it/sec) -training >> step=544500, episode=91 reward=0.7408468 (532.71 it/sec) -training >> step=544600, episode=91 reward=0.7531553 (516.16 it/sec) -training >> step=544700, episode=91 reward=0.7540535 (539.21 it/sec) -training >> step=544800, episode=91 reward=0.748549 (585.65 it/sec) -training >> step=544900, episode=91 reward=0.7540448 (523.59 it/sec) -training >> step=545000, episode=91 reward=0.7587674 (549.24 it/sec) -training >> step=545100, episode=91 reward=0.7418274 (538.29 it/sec) -training >> step=545200, episode=91 reward=0.7684842 (529.33 it/sec) -training >> step=545300, episode=92 reward=0.7386234 (153.05 it/sec) -training >> step=545400, episode=92 reward=0.7330623 (546.73 it/sec) -training >> step=545500, episode=92 reward=0.7327875 (554.25 it/sec) -training >> step=545600, episode=92 reward=0.7455142 (507.38 it/sec) -training >> step=545700, episode=92 reward=0.7535893 (540.38 it/sec) -training >> step=545800, episode=92 reward=0.7563442 (530.49 it/sec) -training >> step=545900, episode=92 reward=0.7697187 (594.51 it/sec) -training >> step=546000, episode=92 reward=0.7588913 (565.73 it/sec) -training >> step=546100, episode=92 reward=0.7718532 (536.38 it/sec) -training >> step=546200, episode=92 reward=0.7613885 (577.13 it/sec) -training >> step=546300, episode=92 reward=0.7844675 (525.99 it/sec) -training >> step=546400, episode=92 reward=0.7752417 (551.89 it/sec) -training >> step=546500, episode=92 reward=0.7542784 (506.42 it/sec) -training >> step=546600, episode=92 reward=0.7653819 (528.05 it/sec) -training >> step=546700, episode=92 reward=0.7589692 (581.96 it/sec) -training >> step=546800, episode=92 reward=0.7672032 (536.53 it/sec) -training >> step=546900, episode=92 reward=0.7813461 (550.58 it/sec) -training >> step=547000, episode=92 reward=0.7573933 (569.93 it/sec) -training >> step=547100, episode=92 reward=0.759366 (512.10 it/sec) -training >> step=547200, episode=92 reward=0.7796478 (538.81 it/sec) -training >> step=547300, episode=92 reward=0.7911062 (540.53 it/sec) -training >> step=547400, episode=92 reward=0.7509229 (373.77 it/sec) -training >> step=547500, episode=92 reward=0.7452325 (597.66 it/sec) -training >> step=547600, episode=92 reward=0.7493962 (545.00 it/sec) -training >> step=547700, episode=92 reward=0.759367 (500.92 it/sec) -training >> step=547800, episode=92 reward=0.7527357 (564.38 it/sec) -training >> step=547900, episode=92 reward=0.7617102 (532.35 it/sec) -training >> step=548000, episode=92 reward=0.7706075 (539.10 it/sec) -training >> step=548100, episode=92 reward=0.7692089 (578.74 it/sec) -training >> step=548200, episode=92 reward=0.7597567 (529.36 it/sec) -training >> step=548300, episode=92 reward=0.7779695 (522.98 it/sec) -training >> step=548400, episode=92 reward=0.7672666 (563.88 it/sec) -training >> step=548500, episode=92 reward=0.7533467 (545.04 it/sec) -training >> step=548600, episode=92 reward=0.7810903 (585.87 it/sec) -training >> step=548700, episode=92 reward=0.7600754 (559.63 it/sec) -training >> step=548800, episode=92 reward=0.7796311 (505.68 it/sec) -training >> step=548900, episode=92 reward=0.7505563 (534.34 it/sec) -training >> step=549000, episode=92 reward=0.7462277 (537.54 it/sec) -training >> step=549100, episode=92 reward=0.7688892 (567.64 it/sec) -training >> step=549200, episode=92 reward=0.7680848 (554.47 it/sec) -training >> step=549300, episode=92 reward=0.7757852 (545.50 it/sec) -training >> step=549400, episode=92 reward=0.7765207 (543.42 it/sec) -training >> step=549500, episode=92 reward=0.7506192 (532.85 it/sec) -training >> step=549600, episode=92 reward=0.7691097 (530.92 it/sec) -training >> step=549700, episode=92 reward=0.7357352 (544.48 it/sec) -training >> step=549800, episode=92 reward=0.7675385 (552.50 it/sec) -training >> step=549900, episode=92 reward=0.7729455 (528.10 it/sec) -training >> step=550000, episode=92 reward=0.7458847 (553.08 it/sec) -training >> step=550100, episode=92 reward=0.7616813 (563.44 it/sec) -training >> step=550200, episode=92 reward=0.7555031 (573.38 it/sec) -training >> step=550300, episode=92 reward=0.7829011 (536.32 it/sec) -training >> step=550400, episode=92 reward=0.7615455 (574.75 it/sec) -training >> step=550500, episode=92 reward=0.756802 (533.03 it/sec) -training >> step=550600, episode=92 reward=0.7320296 (539.14 it/sec) -training >> step=550700, episode=92 reward=0.7670113 (565.27 it/sec) -training >> step=550800, episode=92 reward=0.7603257 (536.68 it/sec) -training >> step=550900, episode=92 reward=0.7565182 (575.97 it/sec) -training >> step=551000, episode=92 reward=0.753387 (499.42 it/sec) -training >> step=551100, episode=92 reward=0.7487586 (495.25 it/sec) -training >> step=551200, episode=92 reward=0.740153 (553.21 it/sec) -training >> step=551300, episode=93 reward=0.7555853 (159.48 it/sec) -training >> step=551400, episode=93 reward=0.7591988 (540.44 it/sec) -training >> step=551500, episode=93 reward=0.7506633 (561.95 it/sec) -training >> step=551600, episode=93 reward=0.7703252 (531.43 it/sec) -training >> step=551700, episode=93 reward=0.7712913 (571.30 it/sec) -training >> step=551800, episode=93 reward=0.7465439 (598.70 it/sec) -training >> step=551900, episode=93 reward=0.7430762 (542.10 it/sec) -training >> step=552000, episode=93 reward=0.7704955 (567.24 it/sec) -training >> step=552100, episode=93 reward=0.7624319 (598.02 it/sec) -training >> step=552200, episode=93 reward=0.7742905 (551.93 it/sec) -training >> step=552300, episode=93 reward=0.7602234 (592.11 it/sec) -training >> step=552400, episode=93 reward=0.7694238 (546.72 it/sec) -training >> step=552500, episode=93 reward=0.7733226 (502.60 it/sec) -training >> step=552600, episode=93 reward=0.7783427 (566.69 it/sec) -training >> step=552700, episode=93 reward=0.757385 (560.96 it/sec) -training >> step=552800, episode=93 reward=0.7684672 (543.23 it/sec) -training >> step=552900, episode=93 reward=0.7610321 (575.96 it/sec) -training >> step=553000, episode=93 reward=0.7585626 (497.78 it/sec) -training >> step=553100, episode=93 reward=0.7468789 (534.11 it/sec) -training >> step=553200, episode=93 reward=0.778018 (544.70 it/sec) -training >> step=553300, episode=93 reward=0.7693481 (552.33 it/sec) -training >> step=553400, episode=93 reward=0.7521634 (556.77 it/sec) -training >> step=553500, episode=93 reward=0.7312569 (422.22 it/sec) -training >> step=553600, episode=93 reward=0.7653849 (487.02 it/sec) -training >> step=553700, episode=93 reward=0.7630053 (534.93 it/sec) -training >> step=553800, episode=93 reward=0.7798173 (554.98 it/sec) -training >> step=553900, episode=93 reward=0.7719855 (596.83 it/sec) -training >> step=554000, episode=93 reward=0.7548231 (544.81 it/sec) -training >> step=554100, episode=93 reward=0.7485935 (518.24 it/sec) -training >> step=554200, episode=93 reward=0.7643426 (556.73 it/sec) -training >> step=554300, episode=93 reward=0.7594145 (513.83 it/sec) -training >> step=554400, episode=93 reward=0.7806034 (587.33 it/sec) -training >> step=554500, episode=93 reward=0.7679901 (565.02 it/sec) -training >> step=554600, episode=93 reward=0.7724141 (568.69 it/sec) -training >> step=554700, episode=93 reward=0.7857823 (507.15 it/sec) -training >> step=554800, episode=93 reward=0.765417 (481.68 it/sec) -training >> step=554900, episode=93 reward=0.7457209 (533.84 it/sec) -training >> step=555000, episode=93 reward=0.7730117 (541.92 it/sec) -training >> step=555100, episode=93 reward=0.7423341 (548.70 it/sec) -training >> step=555200, episode=93 reward=0.7641403 (520.12 it/sec) -training >> step=555300, episode=93 reward=0.7551785 (531.84 it/sec) -training >> step=555400, episode=93 reward=0.7811722 (538.17 it/sec) -training >> step=555500, episode=93 reward=0.740335 (565.22 it/sec) -training >> step=555600, episode=93 reward=0.7556304 (548.25 it/sec) -training >> step=555700, episode=93 reward=0.744769 (545.80 it/sec) -training >> step=555800, episode=93 reward=0.7510245 (475.04 it/sec) -training >> step=555900, episode=93 reward=0.7490596 (541.85 it/sec) -training >> step=556000, episode=93 reward=0.7421389 (569.49 it/sec) -training >> step=556100, episode=93 reward=0.7482048 (555.00 it/sec) -training >> step=556200, episode=93 reward=0.7339809 (509.47 it/sec) -training >> step=556300, episode=93 reward=0.7424453 (452.36 it/sec) -training >> step=556400, episode=93 reward=0.7489055 (496.98 it/sec) -training >> step=556500, episode=93 reward=0.7646599 (578.08 it/sec) -training >> step=556600, episode=93 reward=0.744667 (555.33 it/sec) -training >> step=556700, episode=93 reward=0.7558834 (539.99 it/sec) -training >> step=556800, episode=93 reward=0.7644323 (564.07 it/sec) -training >> step=556900, episode=93 reward=0.7616697 (458.26 it/sec) -training >> step=557000, episode=93 reward=0.7693509 (527.12 it/sec) -training >> step=557100, episode=93 reward=0.7499818 (536.72 it/sec) -training >> step=557200, episode=93 reward=0.774352 (551.47 it/sec) -training >> step=557300, episode=94 reward=0.7483232 (169.85 it/sec) -training >> step=557400, episode=94 reward=0.7961708 (557.06 it/sec) -training >> step=557500, episode=94 reward=0.7475195 (558.68 it/sec) -training >> step=557600, episode=94 reward=0.7696761 (607.85 it/sec) -training >> step=557700, episode=94 reward=0.7616894 (546.16 it/sec) -training >> step=557800, episode=94 reward=0.7542786 (469.32 it/sec) -training >> step=557900, episode=94 reward=0.7517061 (580.13 it/sec) -training >> step=558000, episode=94 reward=0.7694318 (567.74 it/sec) -training >> step=558100, episode=94 reward=0.7712142 (569.75 it/sec) -training >> step=558200, episode=94 reward=0.7550111 (572.31 it/sec) -training >> step=558300, episode=94 reward=0.7805516 (541.58 it/sec) -training >> step=558400, episode=94 reward=0.7846227 (482.59 it/sec) -training >> step=558500, episode=94 reward=0.7793638 (516.02 it/sec) -training >> step=558600, episode=94 reward=0.7654997 (538.09 it/sec) -training >> step=558700, episode=94 reward=0.7766263 (562.50 it/sec) -training >> step=558800, episode=94 reward=0.7588825 (562.29 it/sec) -training >> step=558900, episode=94 reward=0.7588888 (512.32 it/sec) -training >> step=559000, episode=94 reward=0.7580533 (570.77 it/sec) -training >> step=559100, episode=94 reward=0.7679419 (541.57 it/sec) -training >> step=559200, episode=94 reward=0.7728369 (545.58 it/sec) -training >> step=559300, episode=94 reward=0.7535412 (532.28 it/sec) -training >> step=559400, episode=94 reward=0.7841007 (557.09 it/sec) -training >> step=559500, episode=94 reward=0.7631711 (525.89 it/sec) -training >> step=559600, episode=94 reward=0.7516183 (528.37 it/sec) -training >> step=559700, episode=94 reward=0.7380081 (394.51 it/sec) -training >> step=559800, episode=94 reward=0.7409901 (579.52 it/sec) -training >> step=559900, episode=94 reward=0.7599125 (558.32 it/sec) -training >> step=560000, episode=94 reward=0.7786602 (507.68 it/sec) -training >> step=560100, episode=94 reward=0.7756718 (551.29 it/sec) -training >> step=560200, episode=94 reward=0.7451124 (565.58 it/sec) -training >> step=560300, episode=94 reward=0.7613451 (545.58 it/sec) -training >> step=560400, episode=94 reward=0.7471527 (545.38 it/sec) -training >> step=560500, episode=94 reward=0.7764467 (544.36 it/sec) -training >> step=560600, episode=94 reward=0.7488008 (514.88 it/sec) -training >> step=560700, episode=94 reward=0.7555743 (532.33 it/sec) -training >> step=560800, episode=94 reward=0.7482203 (580.89 it/sec) -training >> step=560900, episode=94 reward=0.7408735 (540.23 it/sec) -training >> step=561000, episode=94 reward=0.7531133 (541.77 it/sec) -training >> step=561100, episode=94 reward=0.7607723 (555.67 it/sec) -training >> step=561200, episode=94 reward=0.763988 (524.14 it/sec) -training >> step=561300, episode=94 reward=0.7546217 (560.85 it/sec) -training >> step=561400, episode=94 reward=0.7681194 (520.90 it/sec) -training >> step=561500, episode=94 reward=0.7695397 (515.27 it/sec) -training >> step=561600, episode=94 reward=0.7721084 (529.94 it/sec) -training >> step=561700, episode=94 reward=0.7444715 (522.29 it/sec) -training >> step=561800, episode=94 reward=0.7507043 (530.44 it/sec) -training >> step=561900, episode=94 reward=0.7385899 (579.72 it/sec) -training >> step=562000, episode=94 reward=0.7487414 (526.48 it/sec) -training >> step=562100, episode=94 reward=0.7355673 (563.43 it/sec) -training >> step=562200, episode=94 reward=0.7402771 (536.60 it/sec) -training >> step=562300, episode=94 reward=0.7516955 (514.92 it/sec) -training >> step=562400, episode=94 reward=0.7498978 (566.51 it/sec) -training >> step=562500, episode=94 reward=0.7530462 (527.92 it/sec) -training >> step=562600, episode=94 reward=0.7364004 (551.15 it/sec) -training >> step=562700, episode=94 reward=0.7563308 (579.95 it/sec) -training >> step=562800, episode=94 reward=0.7698736 (520.75 it/sec) -training >> step=562900, episode=94 reward=0.750366 (552.34 it/sec) -training >> step=563000, episode=94 reward=0.7584672 (552.28 it/sec) -training >> step=563100, episode=94 reward=0.7574582 (550.99 it/sec) -training >> step=563200, episode=94 reward=0.7558315 (591.79 it/sec) -training >> step=563300, episode=95 reward=0.785003 (162.77 it/sec) -training >> step=563400, episode=95 reward=0.7483767 (561.38 it/sec) -training >> step=563500, episode=95 reward=0.7626556 (544.69 it/sec) -training >> step=563600, episode=95 reward=0.7569485 (554.24 it/sec) -training >> step=563700, episode=95 reward=0.7669333 (506.40 it/sec) -training >> step=563800, episode=95 reward=0.7739773 (535.42 it/sec) -training >> step=563900, episode=95 reward=0.741452 (510.60 it/sec) -training >> step=564000, episode=95 reward=0.7502324 (498.17 it/sec) -training >> step=564100, episode=95 reward=0.7651719 (533.50 it/sec) -training >> step=564200, episode=95 reward=0.7704532 (525.50 it/sec) -training >> step=564300, episode=95 reward=0.7568421 (562.60 it/sec) -training >> step=564400, episode=95 reward=0.7631975 (549.79 it/sec) -training >> step=564500, episode=95 reward=0.7671297 (526.65 it/sec) -training >> step=564600, episode=95 reward=0.7733774 (583.35 it/sec) -training >> step=564700, episode=95 reward=0.7644739 (520.70 it/sec) -training >> step=564800, episode=95 reward=0.731498 (522.83 it/sec) -training >> step=564900, episode=95 reward=0.7459615 (535.49 it/sec) -training >> step=565000, episode=95 reward=0.7704575 (542.35 it/sec) -training >> step=565100, episode=95 reward=0.7829412 (583.74 it/sec) -training >> step=565200, episode=95 reward=0.7690901 (542.70 it/sec) -training >> step=565300, episode=95 reward=0.7540966 (529.84 it/sec) -training >> step=565400, episode=95 reward=0.7615581 (512.67 it/sec) -training >> step=565500, episode=95 reward=0.7799475 (528.58 it/sec) -training >> step=565600, episode=95 reward=0.771132 (566.49 it/sec) -training >> step=565700, episode=95 reward=0.7732424 (560.34 it/sec) -training >> step=565800, episode=95 reward=0.7620558 (441.12 it/sec) -training >> step=565900, episode=95 reward=0.7716128 (556.38 it/sec) -training >> step=566000, episode=95 reward=0.7726622 (563.82 it/sec) -training >> step=566100, episode=95 reward=0.7671617 (588.18 it/sec) -training >> step=566200, episode=95 reward=0.7601339 (570.33 it/sec) -training >> step=566300, episode=95 reward=0.7610587 (534.68 it/sec) -training >> step=566400, episode=95 reward=0.7561042 (572.81 it/sec) -training >> step=566500, episode=95 reward=0.7285715 (550.41 it/sec) -training >> step=566600, episode=95 reward=0.7511694 (501.12 it/sec) -training >> step=566700, episode=95 reward=0.7613022 (521.25 it/sec) -training >> step=566800, episode=95 reward=0.7563778 (564.84 it/sec) -training >> step=566900, episode=95 reward=0.7562065 (584.16 it/sec) -training >> step=567000, episode=95 reward=0.7703105 (522.53 it/sec) -training >> step=567100, episode=95 reward=0.722467 (510.71 it/sec) -training >> step=567200, episode=95 reward=0.7415075 (540.14 it/sec) -training >> step=567300, episode=95 reward=0.7633592 (524.86 it/sec) -training >> step=567400, episode=95 reward=0.7528536 (580.28 it/sec) -training >> step=567500, episode=95 reward=0.7520906 (553.10 it/sec) -training >> step=567600, episode=95 reward=0.7656183 (500.80 it/sec) -training >> step=567700, episode=95 reward=0.7477228 (518.96 it/sec) -training >> step=567800, episode=95 reward=0.721334 (540.08 it/sec) -training >> step=567900, episode=95 reward=0.7346452 (560.09 it/sec) -training >> step=568000, episode=95 reward=0.7417746 (548.78 it/sec) -training >> step=568100, episode=95 reward=0.7460159 (522.70 it/sec) -training >> step=568200, episode=95 reward=0.7318418 (549.55 it/sec) -training >> step=568300, episode=95 reward=0.7497332 (537.15 it/sec) -training >> step=568400, episode=95 reward=0.7425012 (563.62 it/sec) -training >> step=568500, episode=95 reward=0.7569544 (572.26 it/sec) -training >> step=568600, episode=95 reward=0.7389365 (561.72 it/sec) -training >> step=568700, episode=95 reward=0.7577836 (517.31 it/sec) -training >> step=568800, episode=95 reward=0.7411053 (508.01 it/sec) -training >> step=568900, episode=95 reward=0.7615327 (552.99 it/sec) -training >> step=569000, episode=95 reward=0.7575253 (578.66 it/sec) -training >> step=569100, episode=95 reward=0.761466 (528.54 it/sec) -training >> step=569200, episode=95 reward=0.7622583 (527.54 it/sec) -training >> step=569300, episode=96 reward=0.783118 (164.04 it/sec) -training >> step=569400, episode=96 reward=0.7464457 (527.43 it/sec) -training >> step=569500, episode=96 reward=0.7488455 (599.29 it/sec) -training >> step=569600, episode=96 reward=0.7690443 (541.23 it/sec) -training >> step=569700, episode=96 reward=0.7622021 (542.20 it/sec) -training >> step=569800, episode=96 reward=0.761869 (572.07 it/sec) -training >> step=569900, episode=96 reward=0.7746699 (540.98 it/sec) -training >> step=570000, episode=96 reward=0.764535 (595.11 it/sec) -training >> step=570100, episode=96 reward=0.7465136 (528.05 it/sec) -training >> step=570200, episode=96 reward=0.7475575 (535.88 it/sec) -training >> step=570300, episode=96 reward=0.7607391 (577.99 it/sec) -training >> step=570400, episode=96 reward=0.7737449 (546.39 it/sec) -training >> step=570500, episode=96 reward=0.7720082 (539.38 it/sec) -training >> step=570600, episode=96 reward=0.7754163 (572.06 it/sec) -training >> step=570700, episode=96 reward=0.7499799 (507.45 it/sec) -training >> step=570800, episode=96 reward=0.7688057 (556.25 it/sec) -training >> step=570900, episode=96 reward=0.7611596 (545.24 it/sec) -training >> step=571000, episode=96 reward=0.7769165 (533.07 it/sec) -training >> step=571100, episode=96 reward=0.7389513 (564.67 it/sec) -training >> step=571200, episode=96 reward=0.7467976 (550.92 it/sec) -training >> step=571300, episode=96 reward=0.7774984 (473.26 it/sec) -training >> step=571400, episode=96 reward=0.7644381 (503.63 it/sec) -training >> step=571500, episode=96 reward=0.7301181 (511.46 it/sec) -training >> step=571600, episode=96 reward=0.7329211 (560.80 it/sec) -training >> step=571700, episode=96 reward=0.7545735 (558.65 it/sec) -training >> step=571800, episode=96 reward=0.7475485 (478.71 it/sec) -training >> step=571900, episode=96 reward=0.7651622 (352.17 it/sec) -training >> step=572000, episode=96 reward=0.7527595 (532.03 it/sec) -training >> step=572100, episode=96 reward=0.7463166 (597.29 it/sec) -training >> step=572200, episode=96 reward=0.761972 (571.30 it/sec) -training >> step=572300, episode=96 reward=0.7673917 (506.36 it/sec) -training >> step=572400, episode=96 reward=0.7693282 (533.66 it/sec) -training >> step=572500, episode=96 reward=0.7658705 (545.82 it/sec) -training >> step=572600, episode=96 reward=0.7653975 (558.16 it/sec) -training >> step=572700, episode=96 reward=0.749458 (553.77 it/sec) -training >> step=572800, episode=96 reward=0.7405046 (541.24 it/sec) -training >> step=572900, episode=96 reward=0.7877555 (509.32 it/sec) -training >> step=573000, episode=96 reward=0.7272219 (543.41 it/sec) -training >> step=573100, episode=96 reward=0.7693896 (532.34 it/sec) -training >> step=573200, episode=96 reward=0.7452575 (564.90 it/sec) -training >> step=573300, episode=96 reward=0.7429782 (533.29 it/sec) -training >> step=573400, episode=96 reward=0.7604074 (512.53 it/sec) -training >> step=573500, episode=96 reward=0.7671937 (548.53 it/sec) -training >> step=573600, episode=96 reward=0.7640548 (555.43 it/sec) -training >> step=573700, episode=96 reward=0.7813723 (588.08 it/sec) -training >> step=573800, episode=96 reward=0.7526149 (551.65 it/sec) -training >> step=573900, episode=96 reward=0.7516115 (561.97 it/sec) -training >> step=574000, episode=96 reward=0.7276728 (501.04 it/sec) -training >> step=574100, episode=96 reward=0.7573237 (518.10 it/sec) -training >> step=574200, episode=96 reward=0.7292207 (575.09 it/sec) -training >> step=574300, episode=96 reward=0.711211 (553.52 it/sec) -training >> step=574400, episode=96 reward=0.7482162 (540.64 it/sec) -training >> step=574500, episode=96 reward=0.7597256 (488.28 it/sec) -training >> step=574600, episode=96 reward=0.7457603 (488.91 it/sec) -training >> step=574700, episode=96 reward=0.7256863 (547.28 it/sec) -training >> step=574800, episode=96 reward=0.7602671 (549.53 it/sec) -training >> step=574900, episode=96 reward=0.7423577 (557.15 it/sec) -training >> step=575000, episode=96 reward=0.7669903 (583.67 it/sec) -training >> step=575100, episode=96 reward=0.752853 (508.96 it/sec) -training >> step=575200, episode=96 reward=0.7294232 (546.60 it/sec) -training >> step=575300, episode=97 reward=0.743242 (167.54 it/sec) -training >> step=575400, episode=97 reward=0.7475363 (491.94 it/sec) -training >> step=575500, episode=97 reward=0.7564187 (539.18 it/sec) -training >> step=575600, episode=97 reward=0.7853325 (581.26 it/sec) -training >> step=575700, episode=97 reward=0.7603279 (554.69 it/sec) -training >> step=575800, episode=97 reward=0.7710916 (589.91 it/sec) -training >> step=575900, episode=97 reward=0.770978 (540.54 it/sec) -training >> step=576000, episode=97 reward=0.7848271 (483.30 it/sec) -training >> step=576100, episode=97 reward=0.7357588 (590.86 it/sec) -training >> step=576200, episode=97 reward=0.7551925 (568.56 it/sec) -training >> step=576300, episode=97 reward=0.7405061 (575.03 it/sec) -training >> step=576400, episode=97 reward=0.7602029 (604.45 it/sec) -training >> step=576500, episode=97 reward=0.7711161 (530.78 it/sec) -training >> step=576600, episode=97 reward=0.766084 (521.33 it/sec) -training >> step=576700, episode=97 reward=0.8009026 (520.94 it/sec) -training >> step=576800, episode=97 reward=0.7573634 (546.24 it/sec) -training >> step=576900, episode=97 reward=0.7776147 (575.09 it/sec) -training >> step=577000, episode=97 reward=0.7650795 (491.67 it/sec) -training >> step=577100, episode=97 reward=0.7500089 (478.68 it/sec) -training >> step=577200, episode=97 reward=0.7588622 (529.90 it/sec) -training >> step=577300, episode=97 reward=0.8031834 (534.74 it/sec) -training >> step=577400, episode=97 reward=0.7569401 (591.74 it/sec) -training >> step=577500, episode=97 reward=0.7727157 (527.67 it/sec) -training >> step=577600, episode=97 reward=0.7640415 (540.89 it/sec) -training >> step=577700, episode=97 reward=0.7578581 (486.62 it/sec) -training >> step=577800, episode=97 reward=0.7875368 (492.62 it/sec) -training >> step=577900, episode=97 reward=0.7626762 (560.68 it/sec) -training >> step=578000, episode=97 reward=0.7427508 (375.33 it/sec) -training >> step=578100, episode=97 reward=0.756414 (546.91 it/sec) -training >> step=578200, episode=97 reward=0.7539333 (531.61 it/sec) -training >> step=578300, episode=97 reward=0.7559035 (570.46 it/sec) -training >> step=578400, episode=97 reward=0.7749292 (556.18 it/sec) -training >> step=578500, episode=97 reward=0.7656089 (570.90 it/sec) -training >> step=578600, episode=97 reward=0.7726339 (539.27 it/sec) -training >> step=578700, episode=97 reward=0.7566247 (588.50 it/sec) -training >> step=578800, episode=97 reward=0.7543734 (469.40 it/sec) -training >> step=578900, episode=97 reward=0.7769484 (523.15 it/sec) -training >> step=579000, episode=97 reward=0.7685584 (575.88 it/sec) -training >> step=579100, episode=97 reward=0.7681758 (523.43 it/sec) -training >> step=579200, episode=97 reward=0.7522064 (576.66 it/sec) -training >> step=579300, episode=97 reward=0.7516165 (514.91 it/sec) -training >> step=579400, episode=97 reward=0.7536606 (516.48 it/sec) -training >> step=579500, episode=97 reward=0.7568461 (547.10 it/sec) -training >> step=579600, episode=97 reward=0.7381498 (546.52 it/sec) -training >> step=579700, episode=97 reward=0.7600136 (538.03 it/sec) -training >> step=579800, episode=97 reward=0.7600907 (555.42 it/sec) -training >> step=579900, episode=97 reward=0.7423884 (498.13 it/sec) -training >> step=580000, episode=97 reward=0.7379436 (557.32 it/sec) -training >> step=580100, episode=97 reward=0.746531 (530.65 it/sec) -training >> step=580200, episode=97 reward=0.7305833 (514.18 it/sec) -training >> step=580300, episode=97 reward=0.7332458 (587.94 it/sec) -training >> step=580400, episode=97 reward=0.7388594 (533.33 it/sec) -training >> step=580500, episode=97 reward=0.749799 (475.48 it/sec) -training >> step=580600, episode=97 reward=0.7421088 (537.01 it/sec) -training >> step=580700, episode=97 reward=0.733434 (552.70 it/sec) -training >> step=580800, episode=97 reward=0.7528663 (582.53 it/sec) -training >> step=580900, episode=97 reward=0.7336226 (558.40 it/sec) -training >> step=581000, episode=97 reward=0.7510993 (493.27 it/sec) -training >> step=581100, episode=97 reward=0.7645044 (514.79 it/sec) -training >> step=581200, episode=97 reward=0.7463695 (499.73 it/sec) -training >> step=581300, episode=98 reward=0.7490602 (175.94 it/sec) -training >> step=581400, episode=98 reward=0.7658795 (513.91 it/sec) -training >> step=581500, episode=98 reward=0.7823657 (537.70 it/sec) -training >> step=581600, episode=98 reward=0.7457644 (539.87 it/sec) -training >> step=581700, episode=98 reward=0.7706639 (537.22 it/sec) -training >> step=581800, episode=98 reward=0.7603731 (515.58 it/sec) -training >> step=581900, episode=98 reward=0.7574093 (523.67 it/sec) -training >> step=582000, episode=98 reward=0.7747947 (491.32 it/sec) -training >> step=582100, episode=98 reward=0.7710813 (437.46 it/sec) -training >> step=582200, episode=98 reward=0.7704119 (520.79 it/sec) -training >> step=582300, episode=98 reward=0.7583536 (466.77 it/sec) -training >> step=582400, episode=98 reward=0.7533512 (424.49 it/sec) -training >> step=582500, episode=98 reward=0.7501797 (504.69 it/sec) -training >> step=582600, episode=98 reward=0.7836917 (505.25 it/sec) -training >> step=582700, episode=98 reward=0.7698042 (577.23 it/sec) -training >> step=582800, episode=98 reward=0.756685 (528.81 it/sec) -training >> step=582900, episode=98 reward=0.7520862 (547.27 it/sec) -training >> step=583000, episode=98 reward=0.7647706 (432.21 it/sec) -training >> step=583100, episode=98 reward=0.7771174 (455.11 it/sec) -training >> step=583200, episode=98 reward=0.7668413 (560.27 it/sec) -training >> step=583300, episode=98 reward=0.7739174 (536.13 it/sec) -training >> step=583400, episode=98 reward=0.7544757 (553.68 it/sec) -training >> step=583500, episode=98 reward=0.7752397 (521.09 it/sec) -training >> step=583600, episode=98 reward=0.7726572 (496.69 it/sec) -training >> step=583700, episode=98 reward=0.7517849 (498.47 it/sec) -training >> step=583800, episode=98 reward=0.7534353 (543.83 it/sec) -training >> step=583900, episode=98 reward=0.7647315 (537.23 it/sec) -training >> step=584000, episode=98 reward=0.7699228 (478.85 it/sec) -training >> step=584100, episode=98 reward=0.7449765 (539.05 it/sec) -training >> step=584200, episode=98 reward=0.7585616 (370.97 it/sec) -training >> step=584300, episode=98 reward=0.7491438 (538.13 it/sec) -training >> step=584400, episode=98 reward=0.7575817 (529.64 it/sec) -training >> step=584500, episode=98 reward=0.7762038 (454.76 it/sec) -training >> step=584600, episode=98 reward=0.7760605 (511.35 it/sec) -training >> step=584700, episode=98 reward=0.7400843 (539.40 it/sec) -training >> step=584800, episode=98 reward=0.7703985 (585.05 it/sec) -training >> step=584900, episode=98 reward=0.7807059 (515.74 it/sec) -training >> step=585000, episode=98 reward=0.7480398 (528.48 it/sec) -training >> step=585100, episode=98 reward=0.7408113 (492.00 it/sec) -training >> step=585200, episode=98 reward=0.7782202 (516.27 it/sec) -training >> step=585300, episode=98 reward=0.7562801 (559.31 it/sec) -training >> step=585400, episode=98 reward=0.7794575 (537.58 it/sec) -training >> step=585500, episode=98 reward=0.7371718 (517.35 it/sec) -training >> step=585600, episode=98 reward=0.7469622 (510.09 it/sec) -training >> step=585700, episode=98 reward=0.7527973 (522.58 it/sec) -training >> step=585800, episode=98 reward=0.7584581 (563.07 it/sec) -training >> step=585900, episode=98 reward=0.7505979 (515.75 it/sec) -training >> step=586000, episode=98 reward=0.7681871 (520.61 it/sec) -training >> step=586100, episode=98 reward=0.7594576 (564.81 it/sec) -training >> step=586200, episode=98 reward=0.74517 (484.77 it/sec) -training >> step=586300, episode=98 reward=0.7392913 (504.52 it/sec) -training >> step=586400, episode=98 reward=0.7424189 (545.83 it/sec) -training >> step=586500, episode=98 reward=0.7310175 (539.90 it/sec) -training >> step=586600, episode=98 reward=0.7600915 (521.15 it/sec) -training >> step=586700, episode=98 reward=0.7060479 (465.99 it/sec) -training >> step=586800, episode=98 reward=0.7433448 (524.86 it/sec) -training >> step=586900, episode=98 reward=0.7694086 (551.44 it/sec) -training >> step=587000, episode=98 reward=0.7530571 (562.11 it/sec) -training >> step=587100, episode=98 reward=0.7746406 (499.69 it/sec) -training >> step=587200, episode=98 reward=0.7313349 (504.45 it/sec) -training >> step=587300, episode=99 reward=0.7510786 (192.66 it/sec) -training >> step=587400, episode=99 reward=0.7544596 (578.82 it/sec) -training >> step=587500, episode=99 reward=0.7811805 (533.26 it/sec) -training >> step=587600, episode=99 reward=0.7697586 (469.62 it/sec) -training >> step=587700, episode=99 reward=0.7576397 (525.60 it/sec) -training >> step=587800, episode=99 reward=0.7726984 (545.06 it/sec) -training >> step=587900, episode=99 reward=0.7672816 (509.67 it/sec) -training >> step=588000, episode=99 reward=0.7656128 (582.96 it/sec) -training >> step=588100, episode=99 reward=0.7932463 (529.57 it/sec) -training >> step=588200, episode=99 reward=0.7619429 (452.34 it/sec) -training >> step=588300, episode=99 reward=0.7486032 (544.07 it/sec) -training >> step=588400, episode=99 reward=0.7587802 (519.50 it/sec) -training >> step=588500, episode=99 reward=0.7551146 (537.38 it/sec) -training >> step=588600, episode=99 reward=0.7064094 (542.52 it/sec) -training >> step=588700, episode=99 reward=0.758471 (499.30 it/sec) -training >> step=588800, episode=99 reward=0.7617323 (520.81 it/sec) -training >> step=588900, episode=99 reward=0.7593567 (532.28 it/sec) -training >> step=589000, episode=99 reward=0.7446201 (558.74 it/sec) -training >> step=589100, episode=99 reward=0.7527257 (527.46 it/sec) -training >> step=589200, episode=99 reward=0.7685853 (493.12 it/sec) -training >> step=589300, episode=99 reward=0.7675365 (473.17 it/sec) -training >> step=589400, episode=99 reward=0.7731073 (541.40 it/sec) -training >> step=589500, episode=99 reward=0.7559675 (562.65 it/sec) -training >> step=589600, episode=99 reward=0.76687 (551.49 it/sec) -training >> step=589700, episode=99 reward=0.7472618 (499.58 it/sec) -training >> step=589800, episode=99 reward=0.762646 (525.50 it/sec) -training >> step=589900, episode=99 reward=0.7691532 (520.54 it/sec) -training >> step=590000, episode=99 reward=0.7573593 (547.30 it/sec) -training >> step=590100, episode=99 reward=0.7699219 (537.92 it/sec) -training >> step=590200, episode=99 reward=0.7574484 (502.10 it/sec) -training >> step=590300, episode=99 reward=0.7749534 (449.32 it/sec) -training >> step=590400, episode=99 reward=0.7592306 (487.43 it/sec) -training >> step=590500, episode=99 reward=0.7639291 (510.08 it/sec) -training >> step=590600, episode=99 reward=0.7587684 (521.04 it/sec) -training >> step=590700, episode=99 reward=0.7726176 (514.33 it/sec) -training >> step=590800, episode=99 reward=0.7700449 (490.02 it/sec) -training >> step=590900, episode=99 reward=0.7614403 (493.70 it/sec) -training >> step=591000, episode=99 reward=0.7540475 (499.98 it/sec) -training >> step=591100, episode=99 reward=0.7660527 (545.22 it/sec) -training >> step=591200, episode=99 reward=0.769279 (508.76 it/sec) -training >> step=591300, episode=99 reward=0.7513373 (504.56 it/sec) -training >> step=591400, episode=99 reward=0.7832066 (505.19 it/sec) -training >> step=591500, episode=99 reward=0.7545036 (510.29 it/sec) -training >> step=591600, episode=99 reward=0.7531003 (524.44 it/sec) -training >> step=591700, episode=99 reward=0.7545744 (563.54 it/sec) -training >> step=591800, episode=99 reward=0.7510414 (534.04 it/sec) -training >> step=591900, episode=99 reward=0.7577602 (567.05 it/sec) -training >> step=592000, episode=99 reward=0.7594349 (479.39 it/sec) -training >> step=592100, episode=99 reward=0.7316734 (542.38 it/sec) -training >> step=592200, episode=99 reward=0.7355411 (548.79 it/sec) -training >> step=592300, episode=99 reward=0.7468176 (484.05 it/sec) -training >> step=592400, episode=99 reward=0.7830398 (501.42 it/sec) -training >> step=592500, episode=99 reward=0.7426025 (501.71 it/sec) -training >> step=592600, episode=99 reward=0.7436023 (474.76 it/sec) -training >> step=592700, episode=99 reward=0.7592851 (560.65 it/sec) -training >> step=592800, episode=99 reward=0.7361042 (518.13 it/sec) -training >> step=592900, episode=99 reward=0.7434363 (544.37 it/sec) -training >> step=593000, episode=99 reward=0.7679712 (474.28 it/sec) -training >> step=593100, episode=99 reward=0.772121 (490.08 it/sec) -training >> step=593200, episode=99 reward=0.7481626 (449.21 it/sec) -training >> step=593300, episode=100 reward=0.7614076 (245.08 it/sec) -training >> step=593400, episode=100 reward=0.779273 (492.80 it/sec) -training >> step=593500, episode=100 reward=0.7488468 (511.20 it/sec) -training >> step=593600, episode=100 reward=0.7569792 (577.80 it/sec) -training >> step=593700, episode=100 reward=0.7835719 (540.69 it/sec) -training >> step=593800, episode=100 reward=0.7699476 (533.85 it/sec) -training >> step=593900, episode=100 reward=0.7622617 (550.25 it/sec) -training >> step=594000, episode=100 reward=0.7781698 (476.22 it/sec) -training >> step=594100, episode=100 reward=0.7378063 (549.27 it/sec) -training >> step=594200, episode=100 reward=0.7667211 (510.57 it/sec) -training >> step=594300, episode=100 reward=0.7780086 (541.04 it/sec) -training >> step=594400, episode=100 reward=0.7665509 (541.98 it/sec) -training >> step=594500, episode=100 reward=0.7644659 (504.16 it/sec) -training >> step=594600, episode=100 reward=0.7543574 (491.54 it/sec) -training >> step=594700, episode=100 reward=0.7485463 (527.52 it/sec) -training >> step=594800, episode=100 reward=0.7611662 (564.66 it/sec) -training >> step=594900, episode=100 reward=0.7634716 (545.35 it/sec) -training >> step=595000, episode=100 reward=0.7902893 (547.24 it/sec) -training >> step=595100, episode=100 reward=0.745039 (482.54 it/sec) -training >> step=595200, episode=100 reward=0.7748712 (534.13 it/sec) -training >> step=595300, episode=100 reward=0.7635143 (518.89 it/sec) -training >> step=595400, episode=100 reward=0.7615595 (532.20 it/sec) -training >> step=595500, episode=100 reward=0.7688476 (534.45 it/sec) -training >> step=595600, episode=100 reward=0.7559733 (527.72 it/sec) -training >> step=595700, episode=100 reward=0.7631336 (475.34 it/sec) -training >> step=595800, episode=100 reward=0.7850691 (479.22 it/sec) -training >> step=595900, episode=100 reward=0.7564899 (489.27 it/sec) -training >> step=596000, episode=100 reward=0.755814 (503.23 it/sec) -training >> step=596100, episode=100 reward=0.7620528 (485.10 it/sec) -training >> step=596200, episode=100 reward=0.7503902 (429.08 it/sec) -training >> step=596300, episode=100 reward=0.7623015 (401.55 it/sec) -training >> step=596400, episode=100 reward=0.7749991 (363.09 it/sec) -training >> step=596500, episode=100 reward=0.7697203 (357.84 it/sec) -training >> step=596600, episode=100 reward=0.7908054 (422.87 it/sec) -training >> step=596700, episode=100 reward=0.7732325 (343.54 it/sec) -training >> step=596800, episode=100 reward=0.7658086 (427.63 it/sec) -training >> step=596900, episode=100 reward=0.7435513 (372.37 it/sec) -training >> step=597000, episode=100 reward=0.7594368 (392.19 it/sec) -training >> step=597100, episode=100 reward=0.7396589 (373.43 it/sec) -training >> step=597200, episode=100 reward=0.7472562 (473.86 it/sec) -training >> step=597300, episode=100 reward=0.7779741 (420.90 it/sec) -training >> step=597400, episode=100 reward=0.7364722 (386.51 it/sec) -training >> step=597500, episode=100 reward=0.7785149 (323.75 it/sec) -training >> step=597600, episode=100 reward=0.7482233 (356.92 it/sec) -training >> step=597700, episode=100 reward=0.7865721 (297.90 it/sec) -training >> step=597800, episode=100 reward=0.7585555 (342.15 it/sec) -training >> step=597900, episode=100 reward=0.7428154 (398.04 it/sec) -training >> step=598000, episode=100 reward=0.7380406 (477.52 it/sec) -training >> step=598100, episode=100 reward=0.7507557 (486.89 it/sec) -training >> step=598200, episode=100 reward=0.7524862 (481.63 it/sec) -training >> step=598300, episode=100 reward=0.771858 (453.20 it/sec) -training >> step=598400, episode=100 reward=0.7557644 (494.40 it/sec) -training >> step=598500, episode=100 reward=0.7479676 (491.70 it/sec) -training >> step=598600, episode=100 reward=0.7671282 (506.09 it/sec) -training >> step=598700, episode=100 reward=0.7662706 (509.54 it/sec) -training >> step=598800, episode=100 reward=0.7533012 (503.88 it/sec) -training >> step=598900, episode=100 reward=0.7535194 (482.74 it/sec) -training >> step=599000, episode=100 reward=0.7682924 (463.42 it/sec) -training >> step=599100, episode=100 reward=0.7601392 (462.09 it/sec) -training >> step=599200, episode=100 reward=0.7375955 (462.78 it/sec) -training >> step=599300, episode=101 reward=0.7512038 (167.44 it/sec) -training >> step=599400, episode=101 reward=0.7692341 (459.44 it/sec) -training >> step=599500, episode=101 reward=0.7706954 (485.98 it/sec) -training >> step=599600, episode=101 reward=0.7591271 (516.82 it/sec) -training >> step=599700, episode=101 reward=0.7562171 (515.01 it/sec) -training >> step=599800, episode=101 reward=0.773486 (533.75 it/sec) -training >> step=599900, episode=101 reward=0.7764817 (479.16 it/sec) -training >> step=600000, episode=101 reward=0.7494785 (458.84 it/sec) -training >> step=600100, episode=101 reward=0.7649702 (476.90 it/sec) -training >> step=600200, episode=101 reward=0.752482 (571.41 it/sec) -training >> step=600300, episode=101 reward=0.7570675 (513.21 it/sec) -training >> step=600400, episode=101 reward=0.7673653 (533.26 it/sec) -training >> step=600500, episode=101 reward=0.7568206 (505.32 it/sec) -training >> step=600600, episode=101 reward=0.7507833 (506.02 it/sec) -training >> step=600700, episode=101 reward=0.7366376 (543.73 it/sec) -training >> step=600800, episode=101 reward=0.7593045 (555.64 it/sec) -training >> step=600900, episode=101 reward=0.7614111 (557.89 it/sec) -training >> step=601000, episode=101 reward=0.7493292 (469.52 it/sec) -training >> step=601100, episode=101 reward=0.7793956 (492.48 it/sec) -training >> step=601200, episode=101 reward=0.7569693 (509.94 it/sec) -training >> step=601300, episode=101 reward=0.7667232 (557.25 it/sec) -training >> step=601400, episode=101 reward=0.7688495 (472.24 it/sec) -training >> step=601500, episode=101 reward=0.7695861 (418.80 it/sec) -training >> step=601600, episode=101 reward=0.7809525 (375.74 it/sec) -training >> step=601700, episode=101 reward=0.7792464 (405.51 it/sec) -training >> step=601800, episode=101 reward=0.7708848 (392.60 it/sec) -training >> step=601900, episode=101 reward=0.744608 (423.23 it/sec) -training >> step=602000, episode=101 reward=0.7670896 (435.04 it/sec) -training >> step=602100, episode=101 reward=0.7767142 (390.40 it/sec) -training >> step=602200, episode=101 reward=0.7477881 (373.37 it/sec) -training >> step=602300, episode=101 reward=0.7772845 (359.16 it/sec) -training >> step=602400, episode=101 reward=0.7737097 (411.65 it/sec) -training >> step=602500, episode=101 reward=0.7755067 (494.11 it/sec) -training >> step=602600, episode=101 reward=0.7635358 (401.18 it/sec) -training >> step=602700, episode=101 reward=0.775679 (524.40 it/sec) -training >> step=602800, episode=101 reward=0.7661867 (519.27 it/sec) -training >> step=602900, episode=101 reward=0.7358062 (508.65 it/sec) -training >> step=603000, episode=101 reward=0.7448523 (523.00 it/sec) -training >> step=603100, episode=101 reward=0.7683961 (529.56 it/sec) -training >> step=603200, episode=101 reward=0.7644537 (545.61 it/sec) -training >> step=603300, episode=101 reward=0.7521003 (478.06 it/sec) -training >> step=603400, episode=101 reward=0.7176304 (574.07 it/sec) -training >> step=603500, episode=101 reward=0.7445524 (518.55 it/sec) -training >> step=603600, episode=101 reward=0.7568706 (583.04 it/sec) -training >> step=603700, episode=101 reward=0.7638898 (552.51 it/sec) -training >> step=603800, episode=101 reward=0.7523817 (495.30 it/sec) -training >> step=603900, episode=101 reward=0.7531442 (554.09 it/sec) -training >> step=604000, episode=101 reward=0.7508824 (556.40 it/sec) -training >> step=604100, episode=101 reward=0.7369992 (559.45 it/sec) -training >> step=604200, episode=101 reward=0.7410874 (535.55 it/sec) -training >> step=604300, episode=101 reward=0.7554768 (553.30 it/sec) -training >> step=604400, episode=101 reward=0.7543049 (499.99 it/sec) -training >> step=604500, episode=101 reward=0.7604819 (555.77 it/sec) -training >> step=604600, episode=101 reward=0.728222 (539.16 it/sec) -training >> step=604700, episode=101 reward=0.7485829 (564.26 it/sec) -training >> step=604800, episode=101 reward=0.7409902 (517.94 it/sec) -training >> step=604900, episode=101 reward=0.7300012 (501.65 it/sec) -training >> step=605000, episode=101 reward=0.7563544 (407.32 it/sec) -training >> step=605100, episode=101 reward=0.7684775 (500.57 it/sec) -training >> step=605200, episode=101 reward=0.756678 (549.78 it/sec) -training >> step=605300, episode=102 reward=0.7696633 (118.01 it/sec) -training >> step=605400, episode=102 reward=0.7730671 (421.15 it/sec) -training >> step=605500, episode=102 reward=0.7552543 (350.46 it/sec) -training >> step=605600, episode=102 reward=0.7791076 (410.79 it/sec) -training >> step=605700, episode=102 reward=0.7653307 (431.23 it/sec) -training >> step=605800, episode=102 reward=0.7363648 (408.25 it/sec) -training >> step=605900, episode=102 reward=0.7581169 (497.63 it/sec) -training >> step=606000, episode=102 reward=0.7765963 (433.58 it/sec) -training >> step=606100, episode=102 reward=0.7322015 (479.70 it/sec) -training >> step=606200, episode=102 reward=0.7661278 (503.46 it/sec) -training >> step=606300, episode=102 reward=0.7614974 (504.76 it/sec) -training >> step=606400, episode=102 reward=0.7744716 (516.30 it/sec) -training >> step=606500, episode=102 reward=0.7646153 (500.67 it/sec) -training >> step=606600, episode=102 reward=0.7573687 (476.60 it/sec) -training >> step=606700, episode=102 reward=0.7735655 (490.77 it/sec) -training >> step=606800, episode=102 reward=0.7769927 (522.04 it/sec) -training >> step=606900, episode=102 reward=0.7719452 (487.94 it/sec) -training >> step=607000, episode=102 reward=0.7697363 (508.24 it/sec) -training >> step=607100, episode=102 reward=0.7586975 (499.97 it/sec) -training >> step=607200, episode=102 reward=0.7661567 (468.63 it/sec) -training >> step=607300, episode=102 reward=0.7564695 (528.46 it/sec) -training >> step=607400, episode=102 reward=0.7746421 (503.56 it/sec) -training >> step=607500, episode=102 reward=0.7626913 (508.66 it/sec) -training >> step=607600, episode=102 reward=0.7592813 (473.41 it/sec) -training >> step=607700, episode=102 reward=0.7419211 (500.71 it/sec) -training >> step=607800, episode=102 reward=0.7568486 (516.97 it/sec) -training >> step=607900, episode=102 reward=0.7525235 (533.71 it/sec) -training >> step=608000, episode=102 reward=0.759608 (503.26 it/sec) -training >> step=608100, episode=102 reward=0.7635697 (492.74 it/sec) -training >> step=608200, episode=102 reward=0.7565119 (510.06 it/sec) -training >> step=608300, episode=102 reward=0.7622316 (504.22 it/sec) -training >> step=608400, episode=102 reward=0.7524365 (523.15 it/sec) -training >> step=608500, episode=102 reward=0.7721904 (520.95 it/sec) -training >> step=608600, episode=102 reward=0.7518528 (464.22 it/sec) -training >> step=608700, episode=102 reward=0.7702312 (397.70 it/sec) -training >> step=608800, episode=102 reward=0.7646341 (494.22 it/sec) -training >> step=608900, episode=102 reward=0.7618247 (557.23 it/sec) -training >> step=609000, episode=102 reward=0.7883435 (539.50 it/sec) -training >> step=609100, episode=102 reward=0.7637212 (485.52 it/sec) -training >> step=609200, episode=102 reward=0.769906 (476.79 it/sec) -training >> step=609300, episode=102 reward=0.7684495 (515.02 it/sec) -training >> step=609400, episode=102 reward=0.7521418 (539.62 it/sec) -training >> step=609500, episode=102 reward=0.7499084 (502.65 it/sec) -training >> step=609600, episode=102 reward=0.7052943 (483.61 it/sec) -training >> step=609700, episode=102 reward=0.7578813 (492.94 it/sec) -training >> step=609800, episode=102 reward=0.7714689 (462.43 it/sec) -training >> step=609900, episode=102 reward=0.7624392 (504.99 it/sec) -training >> step=610000, episode=102 reward=0.7297921 (517.18 it/sec) -training >> step=610100, episode=102 reward=0.7537445 (473.90 it/sec) -training >> step=610200, episode=102 reward=0.765918 (502.66 it/sec) -training >> step=610300, episode=102 reward=0.7699758 (512.01 it/sec) -training >> step=610400, episode=102 reward=0.7413829 (500.29 it/sec) -training >> step=610500, episode=102 reward=0.7450289 (502.44 it/sec) -training >> step=610600, episode=102 reward=0.7451587 (461.63 it/sec) -training >> step=610700, episode=102 reward=0.746814 (473.92 it/sec) -training >> step=610800, episode=102 reward=0.7611184 (490.51 it/sec) -training >> step=610900, episode=102 reward=0.742139 (503.18 it/sec) -training >> step=611000, episode=102 reward=0.7416474 (478.44 it/sec) -training >> step=611100, episode=102 reward=0.7522331 (395.84 it/sec) -training >> step=611200, episode=102 reward=0.7439013 (486.72 it/sec) -training >> step=611300, episode=103 reward=0.7487259 (166.16 it/sec) -training >> step=611400, episode=103 reward=0.7645838 (476.55 it/sec) -training >> step=611500, episode=103 reward=0.7560545 (453.86 it/sec) -training >> step=611600, episode=103 reward=0.7623 (524.39 it/sec) -training >> step=611700, episode=103 reward=0.7569181 (445.72 it/sec) -training >> step=611800, episode=103 reward=0.7498898 (504.54 it/sec) -training >> step=611900, episode=103 reward=0.7664172 (445.22 it/sec) -training >> step=612000, episode=103 reward=0.7555801 (476.34 it/sec) -training >> step=612100, episode=103 reward=0.7408329 (462.66 it/sec) -training >> step=612200, episode=103 reward=0.7649458 (529.94 it/sec) -training >> step=612300, episode=103 reward=0.7808095 (486.94 it/sec) -training >> step=612400, episode=103 reward=0.7755481 (424.99 it/sec) -training >> step=612500, episode=103 reward=0.754838 (472.43 it/sec) -training >> step=612600, episode=103 reward=0.7667891 (490.40 it/sec) -training >> step=612700, episode=103 reward=0.7666731 (357.22 it/sec) -training >> step=612800, episode=103 reward=0.7716195 (412.64 it/sec) -training >> step=612900, episode=103 reward=0.7817771 (383.13 it/sec) -training >> step=613000, episode=103 reward=0.7693118 (393.01 it/sec) -training >> step=613100, episode=103 reward=0.7822125 (461.88 it/sec) -training >> step=613200, episode=103 reward=0.7859533 (452.87 it/sec) -training >> step=613300, episode=103 reward=0.7556291 (366.91 it/sec) -training >> step=613400, episode=103 reward=0.7636542 (407.95 it/sec) -training >> step=613500, episode=103 reward=0.7793542 (444.07 it/sec) -training >> step=613600, episode=103 reward=0.7664095 (436.73 it/sec) -training >> step=613700, episode=103 reward=0.7516176 (446.11 it/sec) -training >> step=613800, episode=103 reward=0.7703123 (381.08 it/sec) -training >> step=613900, episode=103 reward=0.7495291 (434.98 it/sec) -training >> step=614000, episode=103 reward=0.7613909 (393.14 it/sec) -training >> step=614100, episode=103 reward=0.7753565 (370.00 it/sec) -training >> step=614200, episode=103 reward=0.7429402 (242.82 it/sec) -training >> step=614300, episode=103 reward=0.7897061 (308.60 it/sec) -training >> step=614400, episode=103 reward=0.7799745 (321.51 it/sec) -training >> step=614500, episode=103 reward=0.7644102 (316.18 it/sec) -training >> step=614600, episode=103 reward=0.7659413 (356.48 it/sec) -training >> step=614700, episode=103 reward=0.7792222 (346.23 it/sec) -training >> step=614800, episode=103 reward=0.7557794 (277.06 it/sec) -training >> step=614900, episode=103 reward=0.7549205 (333.40 it/sec) -training >> step=615000, episode=103 reward=0.7303172 (396.68 it/sec) -training >> step=615100, episode=103 reward=0.7559392 (306.87 it/sec) -training >> step=615200, episode=103 reward=0.7636908 (335.01 it/sec) -training >> step=615300, episode=103 reward=0.7372788 (351.85 it/sec) -training >> step=615400, episode=103 reward=0.7519847 (337.51 it/sec) -training >> step=615500, episode=103 reward=0.7726229 (341.88 it/sec) -training >> step=615600, episode=103 reward=0.7447622 (324.04 it/sec) -training >> step=615700, episode=103 reward=0.7687273 (376.00 it/sec) -training >> step=615800, episode=103 reward=0.7660553 (393.13 it/sec) -training >> step=615900, episode=103 reward=0.7688778 (368.52 it/sec) -training >> step=616000, episode=103 reward=0.7614416 (424.11 it/sec) -training >> step=616100, episode=103 reward=0.756079 (424.63 it/sec) -training >> step=616200, episode=103 reward=0.7712295 (441.60 it/sec) -training >> step=616300, episode=103 reward=0.7629009 (412.12 it/sec) -training >> step=616400, episode=103 reward=0.7458259 (417.21 it/sec) -training >> step=616500, episode=103 reward=0.7538485 (422.15 it/sec) -training >> step=616600, episode=103 reward=0.755658 (425.31 it/sec) -training >> step=616700, episode=103 reward=0.7730414 (382.15 it/sec) -training >> step=616800, episode=103 reward=0.7592559 (396.26 it/sec) -training >> step=616900, episode=103 reward=0.7675345 (382.11 it/sec) -training >> step=617000, episode=103 reward=0.7596712 (379.23 it/sec) -training >> step=617100, episode=103 reward=0.7462087 (348.11 it/sec) -training >> step=617200, episode=103 reward=0.7453521 (315.38 it/sec) -training >> step=617300, episode=104 reward=0.7569478 (188.46 it/sec) -training >> step=617400, episode=104 reward=0.7552825 (394.57 it/sec) -training >> step=617500, episode=104 reward=0.7768521 (439.87 it/sec) -training >> step=617600, episode=104 reward=0.7443093 (435.05 it/sec) -training >> step=617700, episode=104 reward=0.77053 (448.55 it/sec) -training >> step=617800, episode=104 reward=0.7770292 (449.14 it/sec) -training >> step=617900, episode=104 reward=0.7720265 (409.01 it/sec) -training >> step=618000, episode=104 reward=0.7524952 (441.72 it/sec) -training >> step=618100, episode=104 reward=0.7596347 (430.21 it/sec) -training >> step=618200, episode=104 reward=0.7728718 (455.49 it/sec) -training >> step=618300, episode=104 reward=0.7434404 (428.21 it/sec) -training >> step=618400, episode=104 reward=0.7508015 (454.01 it/sec) -training >> step=618500, episode=104 reward=0.7739885 (453.52 it/sec) -training >> step=618600, episode=104 reward=0.7695462 (410.38 it/sec) -training >> step=618700, episode=104 reward=0.7592784 (434.30 it/sec) -training >> step=618800, episode=104 reward=0.73952 (422.18 it/sec) -training >> step=618900, episode=104 reward=0.7643777 (422.04 it/sec) -training >> step=619000, episode=104 reward=0.7572975 (418.24 it/sec) -training >> step=619100, episode=104 reward=0.7575656 (460.08 it/sec) -training >> step=619200, episode=104 reward=0.7621949 (440.86 it/sec) -training >> step=619300, episode=104 reward=0.7584893 (458.48 it/sec) -training >> step=619400, episode=104 reward=0.7600058 (436.93 it/sec) -training >> step=619500, episode=104 reward=0.7562796 (434.48 it/sec) -training >> step=619600, episode=104 reward=0.7601613 (422.95 it/sec) -training >> step=619700, episode=104 reward=0.7626864 (426.14 it/sec) -training >> step=619800, episode=104 reward=0.745117 (422.51 it/sec) -training >> step=619900, episode=104 reward=0.7587042 (437.00 it/sec) -training >> step=620000, episode=104 reward=0.7674778 (392.35 it/sec) -training >> step=620100, episode=104 reward=0.7656546 (341.94 it/sec) -training >> step=620200, episode=104 reward=0.7650359 (402.36 it/sec) -training >> step=620300, episode=104 reward=0.7615768 (315.21 it/sec) -training >> step=620400, episode=104 reward=0.7482269 (339.35 it/sec) -training >> step=620500, episode=104 reward=0.7484475 (402.99 it/sec) -training >> step=620600, episode=104 reward=0.7819407 (441.16 it/sec) -training >> step=620700, episode=104 reward=0.7525914 (393.07 it/sec) -training >> step=620800, episode=104 reward=0.7627371 (383.28 it/sec) -training >> step=620900, episode=104 reward=0.7501769 (416.98 it/sec) -training >> step=621000, episode=104 reward=0.7547658 (395.90 it/sec) -training >> step=621100, episode=104 reward=0.7753602 (417.45 it/sec) -training >> step=621200, episode=104 reward=0.7376782 (339.60 it/sec) -training >> step=621300, episode=104 reward=0.7563925 (443.86 it/sec) -training >> step=621400, episode=104 reward=0.7255531 (347.54 it/sec) -training >> step=621500, episode=104 reward=0.7749851 (369.31 it/sec) -training >> step=621600, episode=104 reward=0.7722063 (406.74 it/sec) -training >> step=621700, episode=104 reward=0.7576187 (422.09 it/sec) -training >> step=621800, episode=104 reward=0.7447981 (431.71 it/sec) -training >> step=621900, episode=104 reward=0.7684636 (434.64 it/sec) -training >> step=622000, episode=104 reward=0.7403512 (373.62 it/sec) -training >> step=622100, episode=104 reward=0.7560205 (315.62 it/sec) -training >> step=622200, episode=104 reward=0.7562871 (353.04 it/sec) -training >> step=622300, episode=104 reward=0.7597206 (336.47 it/sec) -training >> step=622400, episode=104 reward=0.7833806 (418.06 it/sec) -training >> step=622500, episode=104 reward=0.7510068 (376.33 it/sec) -training >> step=622600, episode=104 reward=0.7499703 (391.17 it/sec) -training >> step=622700, episode=104 reward=0.7759721 (385.55 it/sec) -training >> step=622800, episode=104 reward=0.7509977 (377.38 it/sec) -training >> step=622900, episode=104 reward=0.750272 (378.69 it/sec) -training >> step=623000, episode=104 reward=0.7591102 (453.33 it/sec) -training >> step=623100, episode=104 reward=0.7518786 (499.16 it/sec) -training >> step=623200, episode=104 reward=0.765761 (449.41 it/sec) -training >> step=623300, episode=105 reward=0.7725334 (170.93 it/sec) -training >> step=623400, episode=105 reward=0.762714 (484.89 it/sec) -training >> step=623500, episode=105 reward=0.7812864 (446.47 it/sec) -training >> step=623600, episode=105 reward=0.7678291 (416.99 it/sec) -training >> step=623700, episode=105 reward=0.7598359 (454.24 it/sec) -training >> step=623800, episode=105 reward=0.763874 (474.87 it/sec) -training >> step=623900, episode=105 reward=0.7513388 (408.70 it/sec) -training >> step=624000, episode=105 reward=0.7517122 (456.83 it/sec) -training >> step=624100, episode=105 reward=0.7797086 (440.44 it/sec) -training >> step=624200, episode=105 reward=0.7622234 (495.49 it/sec) -training >> step=624300, episode=105 reward=0.7584441 (432.25 it/sec) -training >> step=624400, episode=105 reward=0.7639642 (421.82 it/sec) -training >> step=624500, episode=105 reward=0.7522873 (416.44 it/sec) -training >> step=624600, episode=105 reward=0.7446862 (411.23 it/sec) -training >> step=624700, episode=105 reward=0.7736647 (460.71 it/sec) -training >> step=624800, episode=105 reward=0.7680508 (381.84 it/sec) -training >> step=624900, episode=105 reward=0.7615166 (314.92 it/sec) -training >> step=625000, episode=105 reward=0.7669126 (296.32 it/sec) -training >> step=625100, episode=105 reward=0.7699705 (296.59 it/sec) -training >> step=625200, episode=105 reward=0.7571039 (276.60 it/sec) -training >> step=625300, episode=105 reward=0.7625805 (317.91 it/sec) -training >> step=625400, episode=105 reward=0.7599909 (341.91 it/sec) -training >> step=625500, episode=105 reward=0.7647527 (319.82 it/sec) -training >> step=625600, episode=105 reward=0.7726678 (342.30 it/sec) -training >> step=625700, episode=105 reward=0.7715086 (367.18 it/sec) -training >> step=625800, episode=105 reward=0.7826204 (344.11 it/sec) -training >> step=625900, episode=105 reward=0.7577185 (346.99 it/sec) -training >> step=626000, episode=105 reward=0.7568746 (370.45 it/sec) -training >> step=626100, episode=105 reward=0.7872269 (374.64 it/sec) -training >> step=626200, episode=105 reward=0.7369428 (364.71 it/sec) -training >> step=626300, episode=105 reward=0.7521766 (352.75 it/sec) -training >> step=626400, episode=105 reward=0.7588941 (383.30 it/sec) -training >> step=626500, episode=105 reward=0.7817841 (343.17 it/sec) -training >> step=626600, episode=105 reward=0.7705429 (306.78 it/sec) -training >> step=626700, episode=105 reward=0.7719257 (386.36 it/sec) -training >> step=626800, episode=105 reward=0.7684214 (366.67 it/sec) -training >> step=626900, episode=105 reward=0.7619248 (322.30 it/sec) -training >> step=627000, episode=105 reward=0.7424831 (339.40 it/sec) -training >> step=627100, episode=105 reward=0.762711 (352.39 it/sec) -training >> step=627200, episode=105 reward=0.7743549 (264.37 it/sec) -training >> step=627300, episode=105 reward=0.7485126 (311.73 it/sec) -training >> step=627400, episode=105 reward=0.7688999 (412.25 it/sec) -training >> step=627500, episode=105 reward=0.7541147 (356.37 it/sec) -training >> step=627600, episode=105 reward=0.7487321 (343.23 it/sec) -training >> step=627700, episode=105 reward=0.7569129 (303.84 it/sec) -training >> step=627800, episode=105 reward=0.7736575 (384.87 it/sec) -training >> step=627900, episode=105 reward=0.7610658 (392.49 it/sec) -training >> step=628000, episode=105 reward=0.7638427 (330.21 it/sec) -training >> step=628100, episode=105 reward=0.757304 (354.60 it/sec) -training >> step=628200, episode=105 reward=0.7620069 (448.10 it/sec) -training >> step=628300, episode=105 reward=0.7393561 (489.23 it/sec) -training >> step=628400, episode=105 reward=0.7743514 (475.83 it/sec) -training >> step=628500, episode=105 reward=0.762274 (468.44 it/sec) -training >> step=628600, episode=105 reward=0.7539435 (480.31 it/sec) -training >> step=628700, episode=105 reward=0.7612408 (469.44 it/sec) -training >> step=628800, episode=105 reward=0.7471191 (378.84 it/sec) -training >> step=628900, episode=105 reward=0.7326787 (369.53 it/sec) -training >> step=629000, episode=105 reward=0.7826022 (410.86 it/sec) -training >> step=629100, episode=105 reward=0.7652658 (360.54 it/sec) -training >> step=629200, episode=105 reward=0.7560713 (359.29 it/sec) -training >> step=629300, episode=106 reward=0.7644291 (128.66 it/sec) -training >> step=629400, episode=106 reward=0.7970561 (367.66 it/sec) -training >> step=629500, episode=106 reward=0.769818 (344.13 it/sec) -training >> step=629600, episode=106 reward=0.7688527 (401.54 it/sec) -training >> step=629700, episode=106 reward=0.7606671 (434.02 it/sec) -training >> step=629800, episode=106 reward=0.7581291 (437.90 it/sec) -training >> step=629900, episode=106 reward=0.765555 (411.06 it/sec) -training >> step=630000, episode=106 reward=0.7592676 (348.44 it/sec) -training >> step=630100, episode=106 reward=0.738551 (463.40 it/sec) -training >> step=630200, episode=106 reward=0.7552781 (445.22 it/sec) -training >> step=630300, episode=106 reward=0.7403413 (470.42 it/sec) -training >> step=630400, episode=106 reward=0.729629 (456.23 it/sec) -training >> step=630500, episode=106 reward=0.7501786 (457.23 it/sec) -training >> step=630600, episode=106 reward=0.7673586 (498.39 it/sec) -training >> step=630700, episode=106 reward=0.7542774 (518.31 it/sec) -training >> step=630800, episode=106 reward=0.751064 (517.38 it/sec) -training >> step=630900, episode=106 reward=0.7591184 (451.86 it/sec) -training >> step=631000, episode=106 reward=0.7607936 (548.15 it/sec) -training >> step=631100, episode=106 reward=0.7426353 (508.44 it/sec) -training >> step=631200, episode=106 reward=0.7544965 (518.58 it/sec) -training >> step=631300, episode=106 reward=0.7678002 (526.73 it/sec) -training >> step=631400, episode=106 reward=0.7713733 (559.75 it/sec) -training >> step=631500, episode=106 reward=0.7725099 (508.11 it/sec) -training >> step=631600, episode=106 reward=0.7472897 (524.39 it/sec) -training >> step=631700, episode=106 reward=0.7780803 (518.73 it/sec) -training >> step=631800, episode=106 reward=0.7629649 (536.43 it/sec) -training >> step=631900, episode=106 reward=0.7539123 (510.82 it/sec) -training >> step=632000, episode=106 reward=0.749848 (482.60 it/sec) -training >> step=632100, episode=106 reward=0.7759535 (520.17 it/sec) -training >> step=632200, episode=106 reward=0.7528933 (492.22 it/sec) -training >> step=632300, episode=106 reward=0.7559902 (512.88 it/sec) -training >> step=632400, episode=106 reward=0.7842784 (550.09 it/sec) -training >> step=632500, episode=106 reward=0.745728 (534.55 it/sec) -training >> step=632600, episode=106 reward=0.7852459 (544.40 it/sec) -training >> step=632700, episode=106 reward=0.7622972 (526.21 it/sec) -training >> step=632800, episode=106 reward=0.7570947 (565.61 it/sec) -training >> step=632900, episode=106 reward=0.7722544 (528.06 it/sec) -training >> step=633000, episode=106 reward=0.7712287 (549.61 it/sec) -training >> step=633100, episode=106 reward=0.7844999 (523.81 it/sec) -training >> step=633200, episode=106 reward=0.7541519 (534.12 it/sec) -training >> step=633300, episode=106 reward=0.7471626 (375.80 it/sec) -training >> step=633400, episode=106 reward=0.7476155 (514.81 it/sec) -training >> step=633500, episode=106 reward=0.7633855 (526.39 it/sec) -training >> step=633600, episode=106 reward=0.7545286 (493.82 it/sec) -training >> step=633700, episode=106 reward=0.7638818 (542.46 it/sec) -training >> step=633800, episode=106 reward=0.7691831 (463.82 it/sec) -training >> step=633900, episode=106 reward=0.7673078 (488.09 it/sec) -training >> step=634000, episode=106 reward=0.7473183 (544.54 it/sec) -training >> step=634100, episode=106 reward=0.7536796 (535.82 it/sec) -training >> step=634200, episode=106 reward=0.7320765 (514.02 it/sec) -training >> step=634300, episode=106 reward=0.7681111 (502.28 it/sec) -training >> step=634400, episode=106 reward=0.7755088 (556.65 it/sec) -training >> step=634500, episode=106 reward=0.7672557 (508.87 it/sec) -training >> step=634600, episode=106 reward=0.7449667 (502.17 it/sec) -training >> step=634700, episode=106 reward=0.7538543 (512.41 it/sec) -training >> step=634800, episode=106 reward=0.7553088 (526.08 it/sec) -training >> step=634900, episode=106 reward=0.7714528 (469.73 it/sec) -training >> step=635000, episode=106 reward=0.7435782 (448.61 it/sec) -training >> step=635100, episode=106 reward=0.7407501 (448.87 it/sec) -training >> step=635200, episode=106 reward=0.7638867 (480.53 it/sec) -training >> step=635300, episode=107 reward=0.7629018 (164.03 it/sec) -training >> step=635400, episode=107 reward=0.7669501 (531.55 it/sec) -training >> step=635500, episode=107 reward=0.7628154 (490.05 it/sec) -training >> step=635600, episode=107 reward=0.774262 (497.17 it/sec) -training >> step=635700, episode=107 reward=0.7712658 (521.14 it/sec) -training >> step=635800, episode=107 reward=0.7554278 (497.93 it/sec) -training >> step=635900, episode=107 reward=0.7456201 (512.12 it/sec) -training >> step=636000, episode=107 reward=0.7561446 (511.53 it/sec) -training >> step=636100, episode=107 reward=0.7631474 (503.71 it/sec) -training >> step=636200, episode=107 reward=0.7873061 (555.73 it/sec) -training >> step=636300, episode=107 reward=0.7603533 (498.87 it/sec) -training >> step=636400, episode=107 reward=0.7764419 (514.46 it/sec) -training >> step=636500, episode=107 reward=0.7824401 (515.85 it/sec) -training >> step=636600, episode=107 reward=0.7738244 (552.65 it/sec) -training >> step=636700, episode=107 reward=0.7207086 (522.13 it/sec) -training >> step=636800, episode=107 reward=0.7778825 (508.72 it/sec) -training >> step=636900, episode=107 reward=0.7571213 (490.48 it/sec) -training >> step=637000, episode=107 reward=0.7596537 (512.62 it/sec) -training >> step=637100, episode=107 reward=0.7594414 (516.36 it/sec) -training >> step=637200, episode=107 reward=0.76615 (524.77 it/sec) -training >> step=637300, episode=107 reward=0.7707347 (570.13 it/sec) -training >> step=637400, episode=107 reward=0.7607843 (520.07 it/sec) -training >> step=637500, episode=107 reward=0.7683524 (510.73 it/sec) -training >> step=637600, episode=107 reward=0.7563137 (505.67 it/sec) -training >> step=637700, episode=107 reward=0.7772549 (541.55 it/sec) -training >> step=637800, episode=107 reward=0.768532 (539.02 it/sec) -training >> step=637900, episode=107 reward=0.7613919 (506.91 it/sec) -training >> step=638000, episode=107 reward=0.7465715 (546.36 it/sec) -training >> step=638100, episode=107 reward=0.760286 (500.58 it/sec) -training >> step=638200, episode=107 reward=0.7568485 (494.28 it/sec) -training >> step=638300, episode=107 reward=0.7502993 (540.66 it/sec) -training >> step=638400, episode=107 reward=0.76069 (536.57 it/sec) -training >> step=638500, episode=107 reward=0.7624036 (514.21 it/sec) -training >> step=638600, episode=107 reward=0.7490879 (539.38 it/sec) -training >> step=638700, episode=107 reward=0.7376993 (510.73 it/sec) -training >> step=638800, episode=107 reward=0.7854037 (532.43 it/sec) -training >> step=638900, episode=107 reward=0.7555375 (501.20 it/sec) -training >> step=639000, episode=107 reward=0.750028 (467.53 it/sec) -training >> step=639100, episode=107 reward=0.7655319 (540.84 it/sec) -training >> step=639200, episode=107 reward=0.7384997 (509.30 it/sec) -training >> step=639300, episode=107 reward=0.7703291 (387.13 it/sec) -training >> step=639400, episode=107 reward=0.7509688 (465.06 it/sec) -training >> step=639500, episode=107 reward=0.7732036 (486.51 it/sec) -training >> step=639600, episode=107 reward=0.7654697 (465.67 it/sec) -training >> step=639700, episode=107 reward=0.7738981 (451.34 it/sec) -training >> step=639800, episode=107 reward=0.7721519 (472.12 it/sec) -training >> step=639900, episode=107 reward=0.7401768 (490.06 it/sec) -training >> step=640000, episode=107 reward=0.7306188 (443.44 it/sec) -training >> step=640100, episode=107 reward=0.7433043 (440.59 it/sec) -training >> step=640200, episode=107 reward=0.7650632 (435.09 it/sec) -training >> step=640300, episode=107 reward=0.7375957 (398.37 it/sec) -training >> step=640400, episode=107 reward=0.7394357 (382.42 it/sec) -training >> step=640500, episode=107 reward=0.7617747 (351.59 it/sec) -training >> step=640600, episode=107 reward=0.728388 (382.63 it/sec) -training >> step=640700, episode=107 reward=0.7449742 (430.10 it/sec) -training >> step=640800, episode=107 reward=0.7796286 (440.08 it/sec) -training >> step=640900, episode=107 reward=0.7322481 (485.83 it/sec) -training >> step=641000, episode=107 reward=0.7586823 (463.54 it/sec) -training >> step=641100, episode=107 reward=0.7509167 (477.67 it/sec) -training >> step=641200, episode=107 reward=0.7518697 (421.54 it/sec) -training >> step=641300, episode=108 reward=0.7530075 (222.60 it/sec) -training >> step=641400, episode=108 reward=0.7367026 (462.00 it/sec) -training >> step=641500, episode=108 reward=0.7474753 (450.66 it/sec) -training >> step=641600, episode=108 reward=0.7653005 (466.79 it/sec) -training >> step=641700, episode=108 reward=0.7546688 (513.00 it/sec) -training >> step=641800, episode=108 reward=0.7460855 (478.85 it/sec) -training >> step=641900, episode=108 reward=0.7612841 (489.33 it/sec) -training >> step=642000, episode=108 reward=0.754499 (525.07 it/sec) -training >> step=642100, episode=108 reward=0.7561826 (493.00 it/sec) -training >> step=642200, episode=108 reward=0.7827907 (515.43 it/sec) -training >> step=642300, episode=108 reward=0.7530558 (489.93 it/sec) -training >> step=642400, episode=108 reward=0.7401521 (468.84 it/sec) -training >> step=642500, episode=108 reward=0.7668374 (538.30 it/sec) -training >> step=642600, episode=108 reward=0.7532365 (470.97 it/sec) -training >> step=642700, episode=108 reward=0.7639579 (508.35 it/sec) -training >> step=642800, episode=108 reward=0.7744673 (544.78 it/sec) -training >> step=642900, episode=108 reward=0.7612684 (498.74 it/sec) -training >> step=643000, episode=108 reward=0.7643791 (475.64 it/sec) -training >> step=643100, episode=108 reward=0.722967 (471.81 it/sec) -training >> step=643200, episode=108 reward=0.773333 (512.87 it/sec) -training >> step=643300, episode=108 reward=0.7861047 (482.48 it/sec) -training >> step=643400, episode=108 reward=0.7372972 (493.56 it/sec) -training >> step=643500, episode=108 reward=0.7631001 (533.60 it/sec) -training >> step=643600, episode=108 reward=0.7670048 (503.27 it/sec) -training >> step=643700, episode=108 reward=0.7905596 (504.14 it/sec) -training >> step=643800, episode=108 reward=0.7658717 (523.79 it/sec) -training >> step=643900, episode=108 reward=0.7595304 (488.10 it/sec) -training >> step=644000, episode=108 reward=0.7639334 (484.62 it/sec) -training >> step=644100, episode=108 reward=0.7532735 (483.64 it/sec) -training >> step=644200, episode=108 reward=0.7800012 (490.65 it/sec) -training >> step=644300, episode=108 reward=0.7445461 (520.06 it/sec) -training >> step=644400, episode=108 reward=0.7603763 (497.64 it/sec) -training >> step=644500, episode=108 reward=0.7791777 (511.22 it/sec) -training >> step=644600, episode=108 reward=0.7724257 (462.74 it/sec) -training >> step=644700, episode=108 reward=0.7914509 (488.02 it/sec) -training >> step=644800, episode=108 reward=0.7693915 (480.80 it/sec) -training >> step=644900, episode=108 reward=0.7591646 (482.86 it/sec) -training >> step=645000, episode=108 reward=0.7570606 (507.18 it/sec) -training >> step=645100, episode=108 reward=0.7642026 (495.36 it/sec) -training >> step=645200, episode=108 reward=0.7315532 (487.86 it/sec) -training >> step=645300, episode=108 reward=0.7456329 (520.40 it/sec) -training >> step=645400, episode=108 reward=0.7620537 (494.30 it/sec) -training >> step=645500, episode=108 reward=0.7515196 (511.72 it/sec) -training >> step=645600, episode=108 reward=0.7532865 (362.04 it/sec) -training >> step=645700, episode=108 reward=0.7677689 (520.53 it/sec) -training >> step=645800, episode=108 reward=0.7462773 (476.15 it/sec) -training >> step=645900, episode=108 reward=0.7588546 (482.90 it/sec) -training >> step=646000, episode=108 reward=0.7545671 (517.20 it/sec) -training >> step=646100, episode=108 reward=0.7502517 (508.11 it/sec) -training >> step=646200, episode=108 reward=0.7537157 (545.04 it/sec) -training >> step=646300, episode=108 reward=0.7535512 (483.49 it/sec) -training >> step=646400, episode=108 reward=0.7341701 (481.39 it/sec) -training >> step=646500, episode=108 reward=0.7792996 (475.40 it/sec) -training >> step=646600, episode=108 reward=0.7594262 (540.33 it/sec) -training >> step=646700, episode=108 reward=0.751222 (505.59 it/sec) -training >> step=646800, episode=108 reward=0.7495189 (499.76 it/sec) -training >> step=646900, episode=108 reward=0.7767429 (483.28 it/sec) -training >> step=647000, episode=108 reward=0.7545901 (530.58 it/sec) -training >> step=647100, episode=108 reward=0.7257691 (500.11 it/sec) -training >> step=647200, episode=108 reward=0.7507263 (452.02 it/sec) -training >> step=647300, episode=109 reward=0.7695543 (227.87 it/sec) -training >> step=647400, episode=109 reward=0.766345 (502.54 it/sec) -training >> step=647500, episode=109 reward=0.7512777 (506.44 it/sec) -training >> step=647600, episode=109 reward=0.7591366 (486.37 it/sec) -training >> step=647700, episode=109 reward=0.7750955 (470.08 it/sec) -training >> step=647800, episode=109 reward=0.7554988 (479.31 it/sec) -training >> step=647900, episode=109 reward=0.7416197 (491.68 it/sec) -training >> step=648000, episode=109 reward=0.7827466 (499.81 it/sec) -training >> step=648100, episode=109 reward=0.770033 (515.10 it/sec) -training >> step=648200, episode=109 reward=0.7487795 (489.20 it/sec) -training >> step=648300, episode=109 reward=0.7591396 (495.98 it/sec) -training >> step=648400, episode=109 reward=0.7438135 (514.73 it/sec) -training >> step=648500, episode=109 reward=0.7560052 (520.73 it/sec) -training >> step=648600, episode=109 reward=0.7656975 (506.57 it/sec) -training >> step=648700, episode=109 reward=0.7470096 (472.38 it/sec) -training >> step=648800, episode=109 reward=0.7737558 (504.82 it/sec) -training >> step=648900, episode=109 reward=0.7807941 (545.21 it/sec) -training >> step=649000, episode=109 reward=0.7762325 (478.76 it/sec) -training >> step=649100, episode=109 reward=0.7743894 (473.15 it/sec) -training >> step=649200, episode=109 reward=0.7651749 (412.04 it/sec) -training >> step=649300, episode=109 reward=0.7686403 (393.14 it/sec) -training >> step=649400, episode=109 reward=0.76726 (375.85 it/sec) -training >> step=649500, episode=109 reward=0.7770999 (352.49 it/sec) -training >> step=649600, episode=109 reward=0.7727064 (447.57 it/sec) -training >> step=649700, episode=109 reward=0.7607074 (485.04 it/sec) -training >> step=649800, episode=109 reward=0.7669835 (462.07 it/sec) -training >> step=649900, episode=109 reward=0.7403196 (482.42 it/sec) -training >> step=650000, episode=109 reward=0.7889466 (463.12 it/sec) -training >> step=650100, episode=109 reward=0.7502798 (474.64 it/sec) -training >> step=650200, episode=109 reward=0.7738463 (426.54 it/sec) -training >> step=650300, episode=109 reward=0.7672415 (490.36 it/sec) -training >> step=650400, episode=109 reward=0.76342 (461.00 it/sec) -training >> step=650500, episode=109 reward=0.7909536 (449.03 it/sec) -training >> step=650600, episode=109 reward=0.7942923 (474.56 it/sec) -training >> step=650700, episode=109 reward=0.7796546 (453.56 it/sec) -training >> step=650800, episode=109 reward=0.7584029 (443.19 it/sec) -training >> step=650900, episode=109 reward=0.7762786 (401.46 it/sec) -training >> step=651000, episode=109 reward=0.7498877 (459.48 it/sec) -training >> step=651100, episode=109 reward=0.7646315 (422.37 it/sec) -training >> step=651200, episode=109 reward=0.7458364 (448.45 it/sec) -training >> step=651300, episode=109 reward=0.7558092 (474.08 it/sec) -training >> step=651400, episode=109 reward=0.7673424 (444.75 it/sec) -training >> step=651500, episode=109 reward=0.7559924 (455.72 it/sec) -training >> step=651600, episode=109 reward=0.7390159 (464.27 it/sec) -training >> step=651700, episode=109 reward=0.7518445 (490.97 it/sec) -training >> step=651800, episode=109 reward=0.763382 (433.68 it/sec) -training >> step=651900, episode=109 reward=0.7331297 (345.74 it/sec) -training >> step=652000, episode=109 reward=0.7463807 (500.14 it/sec) -training >> step=652100, episode=109 reward=0.7522207 (468.08 it/sec) -training >> step=652200, episode=109 reward=0.7483326 (515.67 it/sec) -training >> step=652300, episode=109 reward=0.7375264 (436.33 it/sec) -training >> step=652400, episode=109 reward=0.7702071 (421.16 it/sec) -training >> step=652500, episode=109 reward=0.7659065 (473.34 it/sec) -training >> step=652600, episode=109 reward=0.766744 (488.51 it/sec) -training >> step=652700, episode=109 reward=0.7370161 (483.97 it/sec) -training >> step=652800, episode=109 reward=0.7520863 (474.95 it/sec) -training >> step=652900, episode=109 reward=0.7605043 (509.03 it/sec) -training >> step=653000, episode=109 reward=0.7619156 (511.97 it/sec) -training >> step=653100, episode=109 reward=0.7567617 (450.45 it/sec) -training >> step=653200, episode=109 reward=0.7645981 (423.73 it/sec) -training >> step=653300, episode=110 reward=0.7470056 (196.89 it/sec) -training >> step=653400, episode=110 reward=0.7696015 (425.07 it/sec) -training >> step=653500, episode=110 reward=0.7621651 (386.38 it/sec) -training >> step=653600, episode=110 reward=0.7513387 (399.49 it/sec) -training >> step=653700, episode=110 reward=0.754431 (427.27 it/sec) -training >> step=653800, episode=110 reward=0.7355164 (384.26 it/sec) -training >> step=653900, episode=110 reward=0.7551166 (434.21 it/sec) -training >> step=654000, episode=110 reward=0.7566836 (416.97 it/sec) -training >> step=654100, episode=110 reward=0.7709952 (435.20 it/sec) -training >> step=654200, episode=110 reward=0.7347651 (474.32 it/sec) -training >> step=654300, episode=110 reward=0.7608693 (475.22 it/sec) -training >> step=654400, episode=110 reward=0.7489162 (456.85 it/sec) -training >> step=654500, episode=110 reward=0.7633372 (469.38 it/sec) -training >> step=654600, episode=110 reward=0.7525366 (522.16 it/sec) -training >> step=654700, episode=110 reward=0.7656975 (491.13 it/sec) -training >> step=654800, episode=110 reward=0.7467266 (463.02 it/sec) -training >> step=654900, episode=110 reward=0.7590106 (470.28 it/sec) -training >> step=655000, episode=110 reward=0.772234 (479.41 it/sec) -training >> step=655100, episode=110 reward=0.7761897 (462.40 it/sec) -training >> step=655200, episode=110 reward=0.7731563 (460.27 it/sec) -training >> step=655300, episode=110 reward=0.7475458 (461.61 it/sec) -training >> step=655400, episode=110 reward=0.765022 (438.20 it/sec) -training >> step=655500, episode=110 reward=0.7858137 (465.02 it/sec) -training >> step=655600, episode=110 reward=0.7701637 (481.85 it/sec) -training >> step=655700, episode=110 reward=0.7497377 (503.59 it/sec) -training >> step=655800, episode=110 reward=0.7482942 (520.33 it/sec) -training >> step=655900, episode=110 reward=0.764263 (474.63 it/sec) -training >> step=656000, episode=110 reward=0.7728483 (520.83 it/sec) -training >> step=656100, episode=110 reward=0.7675998 (505.55 it/sec) -training >> step=656200, episode=110 reward=0.7483408 (497.78 it/sec) -training >> step=656300, episode=110 reward=0.7698437 (519.16 it/sec) -training >> step=656400, episode=110 reward=0.7814175 (486.21 it/sec) -training >> step=656500, episode=110 reward=0.7859728 (474.57 it/sec) -training >> step=656600, episode=110 reward=0.7738243 (501.69 it/sec) -training >> step=656700, episode=110 reward=0.7740271 (491.97 it/sec) -training >> step=656800, episode=110 reward=0.7557101 (478.90 it/sec) -training >> step=656900, episode=110 reward=0.7463606 (458.97 it/sec) -training >> step=657000, episode=110 reward=0.7566702 (475.27 it/sec) -training >> step=657100, episode=110 reward=0.7474861 (494.17 it/sec) -training >> step=657200, episode=110 reward=0.7811089 (465.57 it/sec) -training >> step=657300, episode=110 reward=0.7567889 (445.79 it/sec) -training >> step=657400, episode=110 reward=0.7475668 (450.19 it/sec) -training >> step=657500, episode=110 reward=0.7498804 (492.18 it/sec) -training >> step=657600, episode=110 reward=0.7702245 (481.54 it/sec) -training >> step=657700, episode=110 reward=0.7730715 (468.38 it/sec) -training >> step=657800, episode=110 reward=0.7491671 (454.11 it/sec) -training >> step=657900, episode=110 reward=0.7569975 (511.07 it/sec) -training >> step=658000, episode=110 reward=0.757587 (494.38 it/sec) -training >> step=658100, episode=110 reward=0.7501606 (463.19 it/sec) -training >> step=658200, episode=110 reward=0.7438104 (419.13 it/sec) -training >> step=658300, episode=110 reward=0.7465492 (481.83 it/sec) -training >> step=658400, episode=110 reward=0.753848 (483.34 it/sec) -training >> step=658500, episode=110 reward=0.7297806 (489.11 it/sec) -training >> step=658600, episode=110 reward=0.7456992 (460.07 it/sec) -training >> step=658700, episode=110 reward=0.7675547 (508.62 it/sec) -training >> step=658800, episode=110 reward=0.7544758 (481.12 it/sec) -training >> step=658900, episode=110 reward=0.7851677 (426.18 it/sec) -training >> step=659000, episode=110 reward=0.7734597 (530.26 it/sec) -training >> step=659100, episode=110 reward=0.7801217 (480.57 it/sec) -training >> step=659200, episode=110 reward=0.7569154 (441.87 it/sec) -training >> step=659300, episode=111 reward=0.7762399 (159.41 it/sec) -training >> step=659400, episode=111 reward=0.7611282 (489.34 it/sec) -training >> step=659500, episode=111 reward=0.7638943 (469.92 it/sec) -training >> step=659600, episode=111 reward=0.7666112 (442.08 it/sec) -training >> step=659700, episode=111 reward=0.7489092 (466.93 it/sec) -training >> step=659800, episode=111 reward=0.7618931 (500.95 it/sec) -training >> step=659900, episode=111 reward=0.7546741 (510.15 it/sec) -training >> step=660000, episode=111 reward=0.7742606 (501.80 it/sec) -training >> step=660100, episode=111 reward=0.7386327 (506.58 it/sec) -training >> step=660200, episode=111 reward=0.7558692 (505.87 it/sec) -training >> step=660300, episode=111 reward=0.7685868 (506.23 it/sec) -training >> step=660400, episode=111 reward=0.7484626 (502.60 it/sec) -training >> step=660500, episode=111 reward=0.7609003 (484.61 it/sec) -training >> step=660600, episode=111 reward=0.7740744 (444.91 it/sec) -training >> step=660700, episode=111 reward=0.7627495 (449.75 it/sec) -training >> step=660800, episode=111 reward=0.7508471 (479.72 it/sec) -training >> step=660900, episode=111 reward=0.762958 (506.45 it/sec) -training >> step=661000, episode=111 reward=0.7570062 (476.26 it/sec) -training >> step=661100, episode=111 reward=0.7793646 (478.14 it/sec) -training >> step=661200, episode=111 reward=0.7622256 (486.89 it/sec) -training >> step=661300, episode=111 reward=0.7732604 (493.63 it/sec) -training >> step=661400, episode=111 reward=0.8011996 (492.88 it/sec) -training >> step=661500, episode=111 reward=0.7810217 (489.88 it/sec) -training >> step=661600, episode=111 reward=0.7393453 (499.07 it/sec) -training >> step=661700, episode=111 reward=0.7603266 (497.17 it/sec) -training >> step=661800, episode=111 reward=0.7685099 (498.32 it/sec) -training >> step=661900, episode=111 reward=0.7712743 (476.13 it/sec) -training >> step=662000, episode=111 reward=0.7537758 (479.51 it/sec) -training >> step=662100, episode=111 reward=0.7824051 (513.36 it/sec) -training >> step=662200, episode=111 reward=0.7551385 (485.45 it/sec) -training >> step=662300, episode=111 reward=0.7703668 (526.07 it/sec) -training >> step=662400, episode=111 reward=0.7498713 (511.33 it/sec) -training >> step=662500, episode=111 reward=0.7546314 (492.22 it/sec) -training >> step=662600, episode=111 reward=0.7540245 (507.00 it/sec) -training >> step=662700, episode=111 reward=0.7765428 (443.57 it/sec) -training >> step=662800, episode=111 reward=0.7489712 (458.16 it/sec) -training >> step=662900, episode=111 reward=0.7872038 (401.32 it/sec) -training >> step=663000, episode=111 reward=0.7714589 (378.08 it/sec) -training >> step=663100, episode=111 reward=0.7352442 (397.97 it/sec) -training >> step=663200, episode=111 reward=0.7620386 (357.90 it/sec) -training >> step=663300, episode=111 reward=0.7410098 (430.47 it/sec) -training >> step=663400, episode=111 reward=0.7459432 (454.33 it/sec) -training >> step=663500, episode=111 reward=0.7495703 (505.35 it/sec) -training >> step=663600, episode=111 reward=0.7676961 (490.89 it/sec) -training >> step=663700, episode=111 reward=0.7719439 (522.86 it/sec) -training >> step=663800, episode=111 reward=0.7504652 (456.04 it/sec) -training >> step=663900, episode=111 reward=0.7734739 (516.42 it/sec) -training >> step=664000, episode=111 reward=0.7653047 (511.16 it/sec) -training >> step=664100, episode=111 reward=0.7401522 (528.41 it/sec) -training >> step=664200, episode=111 reward=0.7681372 (508.18 it/sec) -training >> step=664300, episode=111 reward=0.7477298 (462.02 it/sec) -training >> step=664400, episode=111 reward=0.7924529 (426.35 it/sec) -training >> step=664500, episode=111 reward=0.7479793 (520.50 it/sec) -training >> step=664600, episode=111 reward=0.7481248 (501.12 it/sec) -training >> step=664700, episode=111 reward=0.7446638 (499.85 it/sec) -training >> step=664800, episode=111 reward=0.7687908 (430.68 it/sec) -training >> step=664900, episode=111 reward=0.7529233 (523.00 it/sec) -training >> step=665000, episode=111 reward=0.756066 (485.70 it/sec) -training >> step=665100, episode=111 reward=0.7612475 (513.45 it/sec) -training >> step=665200, episode=111 reward=0.760105 (517.69 it/sec) -training >> step=665300, episode=112 reward=0.764676 (199.01 it/sec) -training >> step=665400, episode=112 reward=0.786957 (530.97 it/sec) -training >> step=665500, episode=112 reward=0.7429022 (501.18 it/sec) -training >> step=665600, episode=112 reward=0.776405 (480.14 it/sec) -training >> step=665700, episode=112 reward=0.7463884 (469.99 it/sec) -training >> step=665800, episode=112 reward=0.7616059 (517.79 it/sec) -training >> step=665900, episode=112 reward=0.7501804 (509.42 it/sec) -training >> step=666000, episode=112 reward=0.7786207 (520.30 it/sec) -training >> step=666100, episode=112 reward=0.7685903 (492.21 it/sec) -training >> step=666200, episode=112 reward=0.7476833 (467.27 it/sec) -training >> step=666300, episode=112 reward=0.7659503 (532.12 it/sec) -training >> step=666400, episode=112 reward=0.7585701 (510.39 it/sec) -training >> step=666500, episode=112 reward=0.7540283 (489.72 it/sec) -training >> step=666600, episode=112 reward=0.7454865 (474.65 it/sec) -training >> step=666700, episode=112 reward=0.7639477 (374.81 it/sec) -training >> step=666800, episode=112 reward=0.7557144 (421.73 it/sec) -training >> step=666900, episode=112 reward=0.7823371 (380.39 it/sec) -training >> step=667000, episode=112 reward=0.7643203 (346.89 it/sec) -training >> step=667100, episode=112 reward=0.7800106 (448.36 it/sec) -training >> step=667200, episode=112 reward=0.747543 (422.34 it/sec) -training >> step=667300, episode=112 reward=0.76626 (488.57 it/sec) -training >> step=667400, episode=112 reward=0.7587617 (433.10 it/sec) -training >> step=667500, episode=112 reward=0.7611486 (470.08 it/sec) -training >> step=667600, episode=112 reward=0.7483991 (463.07 it/sec) -training >> step=667700, episode=112 reward=0.7482098 (426.29 it/sec) -training >> step=667800, episode=112 reward=0.7488631 (406.55 it/sec) -training >> step=667900, episode=112 reward=0.7680215 (457.36 it/sec) -training >> step=668000, episode=112 reward=0.7517423 (428.17 it/sec) -training >> step=668100, episode=112 reward=0.7555733 (501.12 it/sec) -training >> step=668200, episode=112 reward=0.7812318 (501.43 it/sec) -training >> step=668300, episode=112 reward=0.7540416 (505.27 it/sec) -training >> step=668400, episode=112 reward=0.7675838 (482.19 it/sec) -training >> step=668500, episode=112 reward=0.7590564 (479.92 it/sec) -training >> step=668600, episode=112 reward=0.7787499 (508.15 it/sec) -training >> step=668700, episode=112 reward=0.7756815 (510.35 it/sec) -training >> step=668800, episode=112 reward=0.7519287 (488.34 it/sec) -training >> step=668900, episode=112 reward=0.79177 (489.08 it/sec) -training >> step=669000, episode=112 reward=0.7749883 (526.82 it/sec) -training >> step=669100, episode=112 reward=0.7674975 (454.41 it/sec) -training >> step=669200, episode=112 reward=0.7607946 (469.01 it/sec) -training >> step=669300, episode=112 reward=0.7687327 (484.30 it/sec) -training >> step=669400, episode=112 reward=0.7533144 (502.97 it/sec) -training >> step=669500, episode=112 reward=0.758841 (488.90 it/sec) -training >> step=669600, episode=112 reward=0.7418236 (495.63 it/sec) -training >> step=669700, episode=112 reward=0.7397603 (544.83 it/sec) -training >> step=669800, episode=112 reward=0.7704861 (493.70 it/sec) -training >> step=669900, episode=112 reward=0.746343 (481.79 it/sec) -training >> step=670000, episode=112 reward=0.7670839 (496.80 it/sec) -training >> step=670100, episode=112 reward=0.7645295 (527.10 it/sec) -training >> step=670200, episode=112 reward=0.7576412 (491.69 it/sec) -training >> step=670300, episode=112 reward=0.7668787 (471.46 it/sec) -training >> step=670400, episode=112 reward=0.7498553 (500.08 it/sec) -training >> step=670500, episode=112 reward=0.7582708 (507.71 it/sec) -training >> step=670600, episode=112 reward=0.7596807 (511.70 it/sec) -training >> step=670700, episode=112 reward=0.7382154 (390.26 it/sec) -training >> step=670800, episode=112 reward=0.74019 (473.28 it/sec) -training >> step=670900, episode=112 reward=0.7795214 (470.89 it/sec) -training >> step=671000, episode=112 reward=0.786818 (470.93 it/sec) -training >> step=671100, episode=112 reward=0.7451767 (512.30 it/sec) -training >> step=671200, episode=112 reward=0.7520387 (499.02 it/sec) -training >> step=671300, episode=113 reward=0.7602389 (181.03 it/sec) -training >> step=671400, episode=113 reward=0.7483027 (492.93 it/sec) -training >> step=671500, episode=113 reward=0.7398661 (502.40 it/sec) -training >> step=671600, episode=113 reward=0.7316899 (491.53 it/sec) -training >> step=671700, episode=113 reward=0.736918 (480.09 it/sec) -training >> step=671800, episode=113 reward=0.7819221 (434.40 it/sec) -training >> step=671900, episode=113 reward=0.7524868 (484.66 it/sec) -training >> step=672000, episode=113 reward=0.7363456 (501.49 it/sec) -training >> step=672100, episode=113 reward=0.7425461 (453.33 it/sec) -training >> step=672200, episode=113 reward=0.7643976 (495.87 it/sec) -training >> step=672300, episode=113 reward=0.7554229 (485.26 it/sec) -training >> step=672400, episode=113 reward=0.7760538 (526.00 it/sec) -training >> step=672500, episode=113 reward=0.7490306 (490.42 it/sec) -training >> step=672600, episode=113 reward=0.7597689 (477.44 it/sec) -training >> step=672700, episode=113 reward=0.7562513 (505.44 it/sec) -training >> step=672800, episode=113 reward=0.7779751 (473.84 it/sec) -training >> step=672900, episode=113 reward=0.7647426 (500.74 it/sec) -training >> step=673000, episode=113 reward=0.7557255 (530.16 it/sec) -training >> step=673100, episode=113 reward=0.7523212 (513.49 it/sec) -training >> step=673200, episode=113 reward=0.7666476 (484.78 it/sec) -training >> step=673300, episode=113 reward=0.73172 (482.41 it/sec) -training >> step=673400, episode=113 reward=0.7951332 (487.47 it/sec) -training >> step=673500, episode=113 reward=0.746089 (526.79 it/sec) -training >> step=673600, episode=113 reward=0.7572395 (482.20 it/sec) -training >> step=673700, episode=113 reward=0.7818843 (504.93 it/sec) -training >> step=673800, episode=113 reward=0.7469512 (499.83 it/sec) -training >> step=673900, episode=113 reward=0.7695577 (490.71 it/sec) -training >> step=674000, episode=113 reward=0.7546695 (511.63 it/sec) -training >> step=674100, episode=113 reward=0.7702661 (480.46 it/sec) -training >> step=674200, episode=113 reward=0.7883543 (523.08 it/sec) -training >> step=674300, episode=113 reward=0.755996 (505.21 it/sec) -training >> step=674400, episode=113 reward=0.7587738 (483.89 it/sec) -training >> step=674500, episode=113 reward=0.7704701 (515.38 it/sec) -training >> step=674600, episode=113 reward=0.7533884 (495.96 it/sec) -training >> step=674700, episode=113 reward=0.7548891 (483.30 it/sec) -training >> step=674800, episode=113 reward=0.7545867 (509.21 it/sec) -training >> step=674900, episode=113 reward=0.7686089 (491.07 it/sec) -training >> step=675000, episode=113 reward=0.7823976 (512.61 it/sec) -training >> step=675100, episode=113 reward=0.7545056 (472.62 it/sec) -training >> step=675200, episode=113 reward=0.7713633 (444.56 it/sec) -training >> step=675300, episode=113 reward=0.7609044 (473.36 it/sec) -training >> step=675400, episode=113 reward=0.7715437 (447.53 it/sec) -training >> step=675500, episode=113 reward=0.7585838 (485.01 it/sec) -training >> step=675600, episode=113 reward=0.7734595 (450.65 it/sec) -training >> step=675700, episode=113 reward=0.752915 (491.21 it/sec) -training >> step=675800, episode=113 reward=0.7619287 (448.54 it/sec) -training >> step=675900, episode=113 reward=0.7442825 (453.42 it/sec) -training >> step=676000, episode=113 reward=0.7609792 (495.10 it/sec) -training >> step=676100, episode=113 reward=0.7730817 (504.60 it/sec) -training >> step=676200, episode=113 reward=0.7643457 (495.36 it/sec) -training >> step=676300, episode=113 reward=0.7672623 (483.84 it/sec) -training >> step=676400, episode=113 reward=0.7654712 (475.35 it/sec) -training >> step=676500, episode=113 reward=0.745397 (423.30 it/sec) -training >> step=676600, episode=113 reward=0.7551295 (452.60 it/sec) -training >> step=676700, episode=113 reward=0.758587 (462.72 it/sec) -training >> step=676800, episode=113 reward=0.7578236 (305.34 it/sec) -training >> step=676900, episode=113 reward=0.7490364 (382.65 it/sec) -training >> step=677000, episode=113 reward=0.7572584 (305.45 it/sec) -training >> step=677100, episode=113 reward=0.7876097 (381.67 it/sec) -training >> step=677200, episode=113 reward=0.7587185 (401.25 it/sec) -training >> step=677300, episode=114 reward=0.7742187 (153.72 it/sec) -training >> step=677400, episode=114 reward=0.7596191 (491.61 it/sec) -training >> step=677500, episode=114 reward=0.7582336 (444.33 it/sec) -training >> step=677600, episode=114 reward=0.7781209 (440.33 it/sec) -training >> step=677700, episode=114 reward=0.7590223 (487.44 it/sec) -training >> step=677800, episode=114 reward=0.7737642 (515.09 it/sec) -training >> step=677900, episode=114 reward=0.7681777 (520.15 it/sec) -training >> step=678000, episode=114 reward=0.7366171 (487.33 it/sec) -training >> step=678100, episode=114 reward=0.7682765 (501.34 it/sec) -training >> step=678200, episode=114 reward=0.7592443 (515.11 it/sec) -training >> step=678300, episode=114 reward=0.7463525 (500.40 it/sec) -training >> step=678400, episode=114 reward=0.7610197 (535.13 it/sec) -training >> step=678500, episode=114 reward=0.7674704 (502.63 it/sec) -training >> step=678600, episode=114 reward=0.7541268 (504.07 it/sec) -training >> step=678700, episode=114 reward=0.7610146 (530.98 it/sec) -training >> step=678800, episode=114 reward=0.7530792 (514.94 it/sec) -training >> step=678900, episode=114 reward=0.7575229 (504.34 it/sec) -training >> step=679000, episode=114 reward=0.7614535 (524.65 it/sec) -training >> step=679100, episode=114 reward=0.7627804 (505.57 it/sec) -training >> step=679200, episode=114 reward=0.7659175 (528.38 it/sec) -training >> step=679300, episode=114 reward=0.777364 (500.45 it/sec) -training >> step=679400, episode=114 reward=0.7815395 (560.11 it/sec) -training >> step=679500, episode=114 reward=0.75163 (484.86 it/sec) -training >> step=679600, episode=114 reward=0.7748548 (495.11 it/sec) -training >> step=679700, episode=114 reward=0.7469465 (534.47 it/sec) -training >> step=679800, episode=114 reward=0.768055 (542.17 it/sec) -training >> step=679900, episode=114 reward=0.7837005 (537.25 it/sec) -training >> step=680000, episode=114 reward=0.7601355 (548.44 it/sec) -training >> step=680100, episode=114 reward=0.7715961 (466.44 it/sec) -training >> step=680200, episode=114 reward=0.7650301 (543.26 it/sec) -training >> step=680300, episode=114 reward=0.7657824 (570.92 it/sec) -training >> step=680400, episode=114 reward=0.753824 (544.40 it/sec) -training >> step=680500, episode=114 reward=0.7564026 (521.12 it/sec) -training >> step=680600, episode=114 reward=0.7680499 (547.32 it/sec) -training >> step=680700, episode=114 reward=0.7652961 (535.04 it/sec) -training >> step=680800, episode=114 reward=0.7560331 (557.89 it/sec) -training >> step=680900, episode=114 reward=0.7566293 (528.15 it/sec) -training >> step=681000, episode=114 reward=0.7504942 (551.20 it/sec) -training >> step=681100, episode=114 reward=0.780223 (483.61 it/sec) -training >> step=681200, episode=114 reward=0.7558808 (486.06 it/sec) -training >> step=681300, episode=114 reward=0.7646648 (535.69 it/sec) -training >> step=681400, episode=114 reward=0.7743602 (489.03 it/sec) -training >> step=681500, episode=114 reward=0.7484211 (467.24 it/sec) -training >> step=681600, episode=114 reward=0.7534972 (455.89 it/sec) -training >> step=681700, episode=114 reward=0.7766746 (470.79 it/sec) -training >> step=681800, episode=114 reward=0.7594411 (450.20 it/sec) -training >> step=681900, episode=114 reward=0.7640202 (463.81 it/sec) -training >> step=682000, episode=114 reward=0.762871 (402.09 it/sec) -training >> step=682100, episode=114 reward=0.7611268 (380.53 it/sec) -training >> step=682200, episode=114 reward=0.7543108 (406.54 it/sec) -training >> step=682300, episode=114 reward=0.7693372 (437.20 it/sec) -training >> step=682400, episode=114 reward=0.7586324 (469.76 it/sec) -training >> step=682500, episode=114 reward=0.76098 (474.24 it/sec) -training >> step=682600, episode=114 reward=0.7742819 (422.26 it/sec) -training >> step=682700, episode=114 reward=0.7475768 (412.91 it/sec) -training >> step=682800, episode=114 reward=0.7539972 (409.91 it/sec) -training >> step=682900, episode=114 reward=0.7711341 (475.95 it/sec) -training >> step=683000, episode=114 reward=0.7606067 (352.54 it/sec) -training >> step=683100, episode=114 reward=0.7687061 (494.14 it/sec) -training >> step=683200, episode=114 reward=0.7562444 (498.25 it/sec) -training >> step=683300, episode=115 reward=0.7473953 (158.24 it/sec) -training >> step=683400, episode=115 reward=0.7741234 (445.96 it/sec) -training >> step=683500, episode=115 reward=0.7648197 (431.29 it/sec) -training >> step=683600, episode=115 reward=0.7687654 (464.63 it/sec) -training >> step=683700, episode=115 reward=0.8005264 (570.42 it/sec) -training >> step=683800, episode=115 reward=0.7547799 (496.92 it/sec) -training >> step=683900, episode=115 reward=0.7563885 (439.47 it/sec) -training >> step=684000, episode=115 reward=0.7528821 (441.19 it/sec) -training >> step=684100, episode=115 reward=0.7758665 (482.77 it/sec) -training >> step=684200, episode=115 reward=0.7742683 (443.24 it/sec) -training >> step=684300, episode=115 reward=0.7460722 (456.68 it/sec) -training >> step=684400, episode=115 reward=0.7515361 (480.53 it/sec) -training >> step=684500, episode=115 reward=0.7615371 (505.71 it/sec) -training >> step=684600, episode=115 reward=0.7607576 (519.29 it/sec) -training >> step=684700, episode=115 reward=0.7492915 (509.31 it/sec) -training >> step=684800, episode=115 reward=0.7669083 (531.28 it/sec) -training >> step=684900, episode=115 reward=0.7698045 (542.37 it/sec) -training >> step=685000, episode=115 reward=0.7601124 (509.69 it/sec) -training >> step=685100, episode=115 reward=0.7627766 (539.14 it/sec) -training >> step=685200, episode=115 reward=0.7620857 (570.37 it/sec) -training >> step=685300, episode=115 reward=0.7658871 (528.18 it/sec) -training >> step=685400, episode=115 reward=0.7478026 (533.61 it/sec) -training >> step=685500, episode=115 reward=0.7770041 (586.22 it/sec) -training >> step=685600, episode=115 reward=0.7622276 (522.25 it/sec) -training >> step=685700, episode=115 reward=0.782866 (532.90 it/sec) -training >> step=685800, episode=115 reward=0.7732788 (511.28 it/sec) -training >> step=685900, episode=115 reward=0.751349 (520.22 it/sec) -training >> step=686000, episode=115 reward=0.7503051 (549.99 it/sec) -training >> step=686100, episode=115 reward=0.764436 (475.33 it/sec) -training >> step=686200, episode=115 reward=0.7614999 (513.68 it/sec) -training >> step=686300, episode=115 reward=0.7693025 (542.37 it/sec) -training >> step=686400, episode=115 reward=0.762807 (500.30 it/sec) -training >> step=686500, episode=115 reward=0.7698103 (510.44 it/sec) -training >> step=686600, episode=115 reward=0.74938 (530.11 it/sec) -training >> step=686700, episode=115 reward=0.7703186 (443.83 it/sec) -training >> step=686800, episode=115 reward=0.7761549 (482.95 it/sec) -training >> step=686900, episode=115 reward=0.7665824 (534.78 it/sec) -training >> step=687000, episode=115 reward=0.760969 (561.25 it/sec) -training >> step=687100, episode=115 reward=0.7480261 (489.90 it/sec) -training >> step=687200, episode=115 reward=0.7391315 (531.75 it/sec) -training >> step=687300, episode=115 reward=0.7879313 (549.55 it/sec) -training >> step=687400, episode=115 reward=0.7655128 (501.45 it/sec) -training >> step=687500, episode=115 reward=0.7568694 (542.11 it/sec) -training >> step=687600, episode=115 reward=0.7622947 (536.32 it/sec) -training >> step=687700, episode=115 reward=0.7716272 (523.68 it/sec) -training >> step=687800, episode=115 reward=0.7482128 (511.53 it/sec) -training >> step=687900, episode=115 reward=0.7515363 (562.71 it/sec) -training >> step=688000, episode=115 reward=0.7356024 (524.12 it/sec) -training >> step=688100, episode=115 reward=0.7497194 (530.79 it/sec) -training >> step=688200, episode=115 reward=0.7578515 (530.82 it/sec) -training >> step=688300, episode=115 reward=0.7588906 (529.93 it/sec) -training >> step=688400, episode=115 reward=0.7354794 (568.05 it/sec) -training >> step=688500, episode=115 reward=0.7770944 (494.05 it/sec) -training >> step=688600, episode=115 reward=0.7439785 (498.36 it/sec) -training >> step=688700, episode=115 reward=0.7674193 (533.62 it/sec) -training >> step=688800, episode=115 reward=0.7550888 (541.45 it/sec) -training >> step=688900, episode=115 reward=0.750096 (520.40 it/sec) -training >> step=689000, episode=115 reward=0.745463 (545.16 it/sec) -training >> step=689100, episode=115 reward=0.7577859 (532.04 it/sec) -training >> step=689200, episode=115 reward=0.7719975 (386.91 it/sec) -training >> step=689300, episode=116 reward=0.7783481 (163.47 it/sec) -training >> step=689400, episode=116 reward=0.7523944 (492.91 it/sec) -training >> step=689500, episode=116 reward=0.7500376 (495.90 it/sec) -training >> step=689600, episode=116 reward=0.7586429 (533.65 it/sec) -training >> step=689700, episode=116 reward=0.7315391 (514.75 it/sec) -training >> step=689800, episode=116 reward=0.7307435 (505.08 it/sec) -training >> step=689900, episode=116 reward=0.7565741 (517.81 it/sec) -training >> step=690000, episode=116 reward=0.7478044 (542.65 it/sec) -training >> step=690100, episode=116 reward=0.7854377 (463.48 it/sec) -training >> step=690200, episode=116 reward=0.7641283 (504.07 it/sec) -training >> step=690300, episode=116 reward=0.7697511 (542.24 it/sec) -training >> step=690400, episode=116 reward=0.7582362 (491.56 it/sec) -training >> step=690500, episode=116 reward=0.7733017 (543.49 it/sec) -training >> step=690600, episode=116 reward=0.7785504 (546.05 it/sec) -training >> step=690700, episode=116 reward=0.7569521 (552.43 it/sec) -training >> step=690800, episode=116 reward=0.7390601 (504.94 it/sec) -training >> step=690900, episode=116 reward=0.7759386 (521.43 it/sec) -training >> step=691000, episode=116 reward=0.7745751 (417.69 it/sec) -training >> step=691100, episode=116 reward=0.7762854 (356.18 it/sec) -training >> step=691200, episode=116 reward=0.7778906 (436.71 it/sec) -training >> step=691300, episode=116 reward=0.7776515 (398.38 it/sec) -training >> step=691400, episode=116 reward=0.7753221 (491.07 it/sec) -training >> step=691500, episode=116 reward=0.7661697 (503.19 it/sec) -training >> step=691600, episode=116 reward=0.7812859 (434.68 it/sec) -training >> step=691700, episode=116 reward=0.7681376 (535.75 it/sec) -training >> step=691800, episode=116 reward=0.7799826 (525.49 it/sec) -training >> step=691900, episode=116 reward=0.7614656 (495.73 it/sec) -training >> step=692000, episode=116 reward=0.7694275 (505.80 it/sec) -training >> step=692100, episode=116 reward=0.7546006 (529.64 it/sec) -training >> step=692200, episode=116 reward=0.7448233 (525.52 it/sec) -training >> step=692300, episode=116 reward=0.7532971 (491.90 it/sec) -training >> step=692400, episode=116 reward=0.7851341 (478.21 it/sec) -training >> step=692500, episode=116 reward=0.7431902 (526.05 it/sec) -training >> step=692600, episode=116 reward=0.7584034 (465.73 it/sec) -training >> step=692700, episode=116 reward=0.7557092 (487.40 it/sec) -training >> step=692800, episode=116 reward=0.7451531 (527.35 it/sec) -training >> step=692900, episode=116 reward=0.7481793 (481.54 it/sec) -training >> step=693000, episode=116 reward=0.763537 (482.30 it/sec) -training >> step=693100, episode=116 reward=0.7473651 (501.08 it/sec) -training >> step=693200, episode=116 reward=0.7527463 (530.31 it/sec) -training >> step=693300, episode=116 reward=0.7715272 (498.62 it/sec) -training >> step=693400, episode=116 reward=0.7641814 (493.23 it/sec) -training >> step=693500, episode=116 reward=0.7467659 (517.43 it/sec) -training >> step=693600, episode=116 reward=0.7528146 (491.70 it/sec) -training >> step=693700, episode=116 reward=0.7437708 (510.60 it/sec) -training >> step=693800, episode=116 reward=0.7618849 (508.80 it/sec) -training >> step=693900, episode=116 reward=0.7503956 (516.13 it/sec) -training >> step=694000, episode=116 reward=0.7799814 (512.81 it/sec) -training >> step=694100, episode=116 reward=0.7407584 (521.24 it/sec) -training >> step=694200, episode=116 reward=0.76011 (549.01 it/sec) -training >> step=694300, episode=116 reward=0.7716495 (495.31 it/sec) -training >> step=694400, episode=116 reward=0.7525531 (495.16 it/sec) -training >> step=694500, episode=116 reward=0.7662443 (492.54 it/sec) -training >> step=694600, episode=116 reward=0.779211 (495.88 it/sec) -training >> step=694700, episode=116 reward=0.7763485 (493.87 it/sec) -training >> step=694800, episode=116 reward=0.7576483 (529.03 it/sec) -training >> step=694900, episode=116 reward=0.7566363 (517.64 it/sec) -training >> step=695000, episode=116 reward=0.7864679 (514.89 it/sec) -training >> step=695100, episode=116 reward=0.763055 (535.08 it/sec) -training >> step=695200, episode=116 reward=0.7535688 (530.29 it/sec) -training >> step=695300, episode=117 reward=0.7593661 (170.75 it/sec) -training >> step=695400, episode=117 reward=0.7542276 (491.03 it/sec) -training >> step=695500, episode=117 reward=0.7627404 (479.03 it/sec) -training >> step=695600, episode=117 reward=0.7419645 (527.69 it/sec) -training >> step=695700, episode=117 reward=0.7583432 (461.82 it/sec) -training >> step=695800, episode=117 reward=0.7697891 (474.68 it/sec) -training >> step=695900, episode=117 reward=0.7383286 (481.64 it/sec) -training >> step=696000, episode=117 reward=0.7750535 (467.69 it/sec) -training >> step=696100, episode=117 reward=0.7611687 (501.80 it/sec) -training >> step=696200, episode=117 reward=0.7611911 (487.40 it/sec) -training >> step=696300, episode=117 reward=0.7614563 (471.61 it/sec) -training >> step=696400, episode=117 reward=0.7676314 (493.16 it/sec) -training >> step=696500, episode=117 reward=0.7485387 (502.07 it/sec) -training >> step=696600, episode=117 reward=0.7624617 (516.56 it/sec) -training >> step=696700, episode=117 reward=0.7722238 (471.12 it/sec) -training >> step=696800, episode=117 reward=0.7601746 (509.36 it/sec) -training >> step=696900, episode=117 reward=0.7925805 (533.76 it/sec) -training >> step=697000, episode=117 reward=0.766864 (520.86 it/sec) -training >> step=697100, episode=117 reward=0.7546247 (508.58 it/sec) -training >> step=697200, episode=117 reward=0.751364 (520.61 it/sec) -training >> step=697300, episode=117 reward=0.7570754 (513.37 it/sec) -training >> step=697400, episode=117 reward=0.7918904 (477.29 it/sec) -training >> step=697500, episode=117 reward=0.7554784 (523.61 it/sec) -training >> step=697600, episode=117 reward=0.7660595 (491.52 it/sec) -training >> step=697700, episode=117 reward=0.7375386 (487.88 it/sec) -training >> step=697800, episode=117 reward=0.7704381 (499.28 it/sec) -training >> step=697900, episode=117 reward=0.7634635 (471.18 it/sec) -training >> step=698000, episode=117 reward=0.7904499 (520.02 it/sec) -training >> step=698100, episode=117 reward=0.7621778 (520.94 it/sec) -training >> step=698200, episode=117 reward=0.7459963 (514.75 it/sec) -training >> step=698300, episode=117 reward=0.7480653 (554.46 it/sec) -training >> step=698400, episode=117 reward=0.7503609 (487.44 it/sec) -training >> step=698500, episode=117 reward=0.7584274 (492.81 it/sec) -training >> step=698600, episode=117 reward=0.7483845 (546.57 it/sec) -training >> step=698700, episode=117 reward=0.7712795 (466.46 it/sec) -training >> step=698800, episode=117 reward=0.7612542 (478.34 it/sec) -training >> step=698900, episode=117 reward=0.7419864 (476.55 it/sec) -training >> step=699000, episode=117 reward=0.7711842 (529.44 it/sec) -training >> step=699100, episode=117 reward=0.7588068 (528.55 it/sec) -training >> step=699200, episode=117 reward=0.7491606 (468.89 it/sec) -training >> step=699300, episode=117 reward=0.7648886 (524.50 it/sec) -training >> step=699400, episode=117 reward=0.7655655 (547.79 it/sec) -training >> step=699500, episode=117 reward=0.7639729 (509.03 it/sec) -training >> step=699600, episode=117 reward=0.7707159 (517.67 it/sec) -training >> step=699700, episode=117 reward=0.764114 (499.33 it/sec) -training >> step=699800, episode=117 reward=0.7648719 (518.48 it/sec) -training >> step=699900, episode=117 reward=0.7482703 (536.18 it/sec) -training >> step=700000, episode=117 reward=0.7371814 (491.18 it/sec) -training >> step=700100, episode=117 reward=0.7441354 (545.58 it/sec) -training >> step=700200, episode=117 reward=0.7829345 (544.01 it/sec) -training >> step=700300, episode=117 reward=0.782501 (508.87 it/sec) -training >> step=700400, episode=117 reward=0.7563807 (517.96 it/sec) -training >> step=700500, episode=117 reward=0.7581142 (504.08 it/sec) -training >> step=700600, episode=117 reward=0.7678885 (513.42 it/sec) -training >> step=700700, episode=117 reward=0.7837487 (503.17 it/sec) -training >> step=700800, episode=117 reward=0.7522573 (507.89 it/sec) -training >> step=700900, episode=117 reward=0.755781 (562.76 it/sec) -training >> step=701000, episode=117 reward=0.7901232 (479.58 it/sec) -training >> step=701100, episode=117 reward=0.7673705 (539.35 it/sec) -training >> step=701200, episode=117 reward=0.7632638 (497.63 it/sec) -training >> step=701300, episode=118 reward=0.7678018 (156.95 it/sec) -training >> step=701400, episode=118 reward=0.7532374 (521.10 it/sec) -training >> step=701500, episode=118 reward=0.7449389 (522.17 it/sec) -training >> step=701600, episode=118 reward=0.7562216 (524.39 it/sec) -training >> step=701700, episode=118 reward=0.760513 (472.80 it/sec) -training >> step=701800, episode=118 reward=0.7548153 (498.63 it/sec) -training >> step=701900, episode=118 reward=0.7474447 (494.05 it/sec) -training >> step=702000, episode=118 reward=0.7662597 (543.97 it/sec) -training >> step=702100, episode=118 reward=0.7500274 (510.86 it/sec) -training >> step=702200, episode=118 reward=0.7665756 (524.79 it/sec) -training >> step=702300, episode=118 reward=0.7644396 (519.70 it/sec) -training >> step=702400, episode=118 reward=0.7486961 (477.87 it/sec) -training >> step=702500, episode=118 reward=0.7666804 (530.18 it/sec) -training >> step=702600, episode=118 reward=0.7671773 (473.89 it/sec) -training >> step=702700, episode=118 reward=0.7840482 (545.33 it/sec) -training >> step=702800, episode=118 reward=0.7641455 (519.37 it/sec) -training >> step=702900, episode=118 reward=0.7455243 (466.42 it/sec) -training >> step=703000, episode=118 reward=0.766473 (493.56 it/sec) -training >> step=703100, episode=118 reward=0.7705204 (481.11 it/sec) -training >> step=703200, episode=118 reward=0.7571311 (486.73 it/sec) -training >> step=703300, episode=118 reward=0.7622588 (494.98 it/sec) -training >> step=703400, episode=118 reward=0.7719502 (504.81 it/sec) -training >> step=703500, episode=118 reward=0.7690899 (489.49 it/sec) -training >> step=703600, episode=118 reward=0.7638693 (454.33 it/sec) -training >> step=703700, episode=118 reward=0.7679842 (484.08 it/sec) -training >> step=703800, episode=118 reward=0.7710347 (539.69 it/sec) -training >> step=703900, episode=118 reward=0.748256 (514.20 it/sec) -training >> step=704000, episode=118 reward=0.7645562 (504.85 it/sec) -training >> step=704100, episode=118 reward=0.7565238 (515.54 it/sec) -training >> step=704200, episode=118 reward=0.7650151 (489.68 it/sec) -training >> step=704300, episode=118 reward=0.7410102 (518.40 it/sec) -training >> step=704400, episode=118 reward=0.7646451 (533.54 it/sec) -training >> step=704500, episode=118 reward=0.7710362 (458.68 it/sec) -training >> step=704600, episode=118 reward=0.7693872 (472.06 it/sec) -training >> step=704700, episode=118 reward=0.7742324 (481.47 it/sec) -training >> step=704800, episode=118 reward=0.7559843 (547.81 it/sec) -training >> step=704900, episode=118 reward=0.770209 (528.42 it/sec) -training >> step=705000, episode=118 reward=0.7826829 (479.67 it/sec) -training >> step=705100, episode=118 reward=0.7596614 (492.40 it/sec) -training >> step=705200, episode=118 reward=0.756034 (505.76 it/sec) -training >> step=705300, episode=118 reward=0.7691287 (502.19 it/sec) -training >> step=705400, episode=118 reward=0.7473531 (540.36 it/sec) -training >> step=705500, episode=118 reward=0.7647448 (481.67 it/sec) -training >> step=705600, episode=118 reward=0.7544953 (459.42 it/sec) -training >> step=705700, episode=118 reward=0.7780904 (506.47 it/sec) -training >> step=705800, episode=118 reward=0.7719188 (525.12 it/sec) -training >> step=705900, episode=118 reward=0.7644889 (492.19 it/sec) -training >> step=706000, episode=118 reward=0.7608175 (450.45 it/sec) -training >> step=706100, episode=118 reward=0.7600252 (483.07 it/sec) -training >> step=706200, episode=118 reward=0.7570559 (513.45 it/sec) -training >> step=706300, episode=118 reward=0.7505113 (504.79 it/sec) -training >> step=706400, episode=118 reward=0.7743288 (501.69 it/sec) -training >> step=706500, episode=118 reward=0.7638105 (519.13 it/sec) -training >> step=706600, episode=118 reward=0.7557794 (424.25 it/sec) -training >> step=706700, episode=118 reward=0.7759584 (500.90 it/sec) -training >> step=706800, episode=118 reward=0.7633392 (507.27 it/sec) -training >> step=706900, episode=118 reward=0.7514175 (522.63 it/sec) -training >> step=707000, episode=118 reward=0.7617304 (475.78 it/sec) -training >> step=707100, episode=118 reward=0.7400617 (457.10 it/sec) -training >> step=707200, episode=118 reward=0.766187 (339.99 it/sec) -training >> step=707300, episode=119 reward=0.7559623 (210.06 it/sec) -training >> step=707400, episode=119 reward=0.7788934 (464.60 it/sec) -training >> step=707500, episode=119 reward=0.7674223 (517.16 it/sec) -training >> step=707600, episode=119 reward=0.7567847 (486.53 it/sec) -training >> step=707700, episode=119 reward=0.751116 (509.18 it/sec) -training >> step=707800, episode=119 reward=0.772604 (520.59 it/sec) -training >> step=707900, episode=119 reward=0.7661979 (442.08 it/sec) -training >> step=708000, episode=119 reward=0.7588775 (510.63 it/sec) -training >> step=708100, episode=119 reward=0.7440752 (531.07 it/sec) -training >> step=708200, episode=119 reward=0.7787496 (505.46 it/sec) -training >> step=708300, episode=119 reward=0.7575172 (523.50 it/sec) -training >> step=708400, episode=119 reward=0.7572891 (431.61 it/sec) -training >> step=708500, episode=119 reward=0.7723433 (496.02 it/sec) -training >> step=708600, episode=119 reward=0.7574575 (540.91 it/sec) -training >> step=708700, episode=119 reward=0.7610884 (506.51 it/sec) -training >> step=708800, episode=119 reward=0.7733339 (491.77 it/sec) -training >> step=708900, episode=119 reward=0.767246 (444.00 it/sec) -training >> step=709000, episode=119 reward=0.7494015 (482.40 it/sec) -training >> step=709100, episode=119 reward=0.7482923 (508.50 it/sec) -training >> step=709200, episode=119 reward=0.7509691 (495.02 it/sec) -training >> step=709300, episode=119 reward=0.7673808 (489.55 it/sec) -training >> step=709400, episode=119 reward=0.7898375 (381.89 it/sec) -training >> step=709500, episode=119 reward=0.7678025 (386.50 it/sec) -training >> step=709600, episode=119 reward=0.7717639 (435.69 it/sec) -training >> step=709700, episode=119 reward=0.7739932 (484.42 it/sec) -training >> step=709800, episode=119 reward=0.7559271 (463.64 it/sec) -training >> step=709900, episode=119 reward=0.7651936 (478.68 it/sec) -training >> step=710000, episode=119 reward=0.7669563 (506.80 it/sec) -training >> step=710100, episode=119 reward=0.774492 (497.78 it/sec) -training >> step=710200, episode=119 reward=0.7871628 (485.01 it/sec) -training >> step=710300, episode=119 reward=0.7491379 (490.03 it/sec) -training >> step=710400, episode=119 reward=0.7697675 (475.86 it/sec) -training >> step=710500, episode=119 reward=0.7740571 (486.86 it/sec) -training >> step=710600, episode=119 reward=0.7541386 (557.13 it/sec) -training >> step=710700, episode=119 reward=0.7630581 (517.47 it/sec) -training >> step=710800, episode=119 reward=0.7605966 (489.96 it/sec) -training >> step=710900, episode=119 reward=0.7768667 (515.22 it/sec) -training >> step=711000, episode=119 reward=0.7703711 (479.40 it/sec) -training >> step=711100, episode=119 reward=0.7535743 (501.72 it/sec) -training >> step=711200, episode=119 reward=0.7673342 (505.87 it/sec) -training >> step=711300, episode=119 reward=0.7481169 (509.65 it/sec) -training >> step=711400, episode=119 reward=0.7536026 (533.04 it/sec) -training >> step=711500, episode=119 reward=0.765502 (540.53 it/sec) -training >> step=711600, episode=119 reward=0.7503635 (521.84 it/sec) -training >> step=711700, episode=119 reward=0.7594107 (523.84 it/sec) -training >> step=711800, episode=119 reward=0.7374064 (527.04 it/sec) -training >> step=711900, episode=119 reward=0.7571741 (521.67 it/sec) -training >> step=712000, episode=119 reward=0.7647792 (498.09 it/sec) -training >> step=712100, episode=119 reward=0.7685599 (517.59 it/sec) -training >> step=712200, episode=119 reward=0.7590443 (536.13 it/sec) -training >> step=712300, episode=119 reward=0.7803595 (506.02 it/sec) -training >> step=712400, episode=119 reward=0.7426869 (501.33 it/sec) -training >> step=712500, episode=119 reward=0.7443003 (513.31 it/sec) -training >> step=712600, episode=119 reward=0.7571153 (505.28 it/sec) -training >> step=712700, episode=119 reward=0.7902265 (528.70 it/sec) -training >> step=712800, episode=119 reward=0.7585252 (514.15 it/sec) -training >> step=712900, episode=119 reward=0.7557326 (490.62 it/sec) -training >> step=713000, episode=119 reward=0.7616242 (480.59 it/sec) -training >> step=713100, episode=119 reward=0.7634667 (484.18 it/sec) -training >> step=713200, episode=119 reward=0.7875078 (533.90 it/sec) -training >> step=713300, episode=120 reward=0.7585771 (153.92 it/sec) -training >> step=713400, episode=120 reward=0.7378461 (487.36 it/sec) -training >> step=713500, episode=120 reward=0.7508134 (529.09 it/sec) -training >> step=713600, episode=120 reward=0.7229922 (491.97 it/sec) -training >> step=713700, episode=120 reward=0.7519565 (466.20 it/sec) -training >> step=713800, episode=120 reward=0.7538112 (492.59 it/sec) -training >> step=713900, episode=120 reward=0.7704614 (512.36 it/sec) -training >> step=714000, episode=120 reward=0.7458666 (561.18 it/sec) -training >> step=714100, episode=120 reward=0.7446072 (499.02 it/sec) -training >> step=714200, episode=120 reward=0.7725393 (518.49 it/sec) -training >> step=714300, episode=120 reward=0.7888458 (513.77 it/sec) -training >> step=714400, episode=120 reward=0.7765526 (525.63 it/sec) -training >> step=714500, episode=120 reward=0.7481121 (503.77 it/sec) -training >> step=714600, episode=120 reward=0.7915469 (501.51 it/sec) -training >> step=714700, episode=120 reward=0.7719098 (543.69 it/sec) -training >> step=714800, episode=120 reward=0.7484297 (493.84 it/sec) -training >> step=714900, episode=120 reward=0.7556112 (480.74 it/sec) -training >> step=715000, episode=120 reward=0.7463039 (534.65 it/sec) -training >> step=715100, episode=120 reward=0.7831829 (511.78 it/sec) -training >> step=715200, episode=120 reward=0.755828 (498.36 it/sec) -training >> step=715300, episode=120 reward=0.7573031 (543.52 it/sec) -training >> step=715400, episode=120 reward=0.7543405 (521.97 it/sec) -training >> step=715500, episode=120 reward=0.7763544 (558.33 it/sec) -training >> step=715600, episode=120 reward=0.7610716 (516.01 it/sec) -training >> step=715700, episode=120 reward=0.7854499 (494.75 it/sec) -training >> step=715800, episode=120 reward=0.7578079 (525.57 it/sec) -training >> step=715900, episode=120 reward=0.7484255 (476.66 it/sec) -training >> step=716000, episode=120 reward=0.7789764 (543.96 it/sec) -training >> step=716100, episode=120 reward=0.7679399 (543.51 it/sec) -training >> step=716200, episode=120 reward=0.7592762 (528.97 it/sec) -training >> step=716300, episode=120 reward=0.7522118 (557.24 it/sec) -training >> step=716400, episode=120 reward=0.7728879 (514.76 it/sec) -training >> step=716500, episode=120 reward=0.7611254 (531.76 it/sec) -training >> step=716600, episode=120 reward=0.7685895 (578.48 it/sec) -training >> step=716700, episode=120 reward=0.7704886 (552.09 it/sec) -training >> step=716800, episode=120 reward=0.7503377 (516.26 it/sec) -training >> step=716900, episode=120 reward=0.7433583 (538.49 it/sec) -training >> step=717000, episode=120 reward=0.7696545 (523.89 it/sec) -training >> step=717100, episode=120 reward=0.7600608 (539.02 it/sec) -training >> step=717200, episode=120 reward=0.7612091 (546.22 it/sec) -training >> step=717300, episode=120 reward=0.7557571 (562.07 it/sec) -training >> step=717400, episode=120 reward=0.7690117 (525.09 it/sec) -training >> step=717500, episode=120 reward=0.7582701 (529.60 it/sec) -training >> step=717600, episode=120 reward=0.7424026 (523.65 it/sec) -training >> step=717700, episode=120 reward=0.77852 (509.06 it/sec) -training >> step=717800, episode=120 reward=0.7615639 (511.10 it/sec) -training >> step=717900, episode=120 reward=0.7589132 (523.36 it/sec) -training >> step=718000, episode=120 reward=0.7538164 (576.57 it/sec) -training >> step=718100, episode=120 reward=0.7716222 (484.21 it/sec) -training >> step=718200, episode=120 reward=0.7523637 (504.55 it/sec) -training >> step=718300, episode=120 reward=0.7502616 (519.20 it/sec) -training >> step=718400, episode=120 reward=0.7443488 (544.11 it/sec) -training >> step=718500, episode=120 reward=0.75789 (513.04 it/sec) -training >> step=718600, episode=120 reward=0.74449 (492.80 it/sec) -training >> step=718700, episode=120 reward=0.7675066 (486.10 it/sec) -training >> step=718800, episode=120 reward=0.7462133 (546.58 it/sec) -training >> step=718900, episode=120 reward=0.7638483 (499.27 it/sec) -training >> step=719000, episode=120 reward=0.7542335 (536.78 it/sec) -training >> step=719100, episode=120 reward=0.7881229 (542.54 it/sec) -training >> step=719200, episode=120 reward=0.7695969 (513.71 it/sec) -training >> step=719300, episode=121 reward=0.7695458 (132.96 it/sec) -training >> step=719400, episode=121 reward=0.755986 (517.91 it/sec) -training >> step=719500, episode=121 reward=0.7693583 (534.83 it/sec) -training >> step=719600, episode=121 reward=0.7562441 (474.51 it/sec) -training >> step=719700, episode=121 reward=0.762192 (468.78 it/sec) -training >> step=719800, episode=121 reward=0.7808521 (530.70 it/sec) -training >> step=719900, episode=121 reward=0.7595791 (532.47 it/sec) -training >> step=720000, episode=121 reward=0.7568539 (543.95 it/sec) -training >> step=720100, episode=121 reward=0.7500051 (479.50 it/sec) -training >> step=720200, episode=121 reward=0.7623292 (540.25 it/sec) -training >> step=720300, episode=121 reward=0.769271 (554.79 it/sec) -training >> step=720400, episode=121 reward=0.7483481 (546.69 it/sec) -training >> step=720500, episode=121 reward=0.7537275 (530.18 it/sec) -training >> step=720600, episode=121 reward=0.7524124 (482.66 it/sec) -training >> step=720700, episode=121 reward=0.7463366 (519.26 it/sec) -training >> step=720800, episode=121 reward=0.7825507 (494.41 it/sec) -training >> step=720900, episode=121 reward=0.7593018 (500.62 it/sec) -training >> step=721000, episode=121 reward=0.7731683 (507.61 it/sec) -training >> step=721100, episode=121 reward=0.7565948 (498.52 it/sec) -training >> step=721200, episode=121 reward=0.7625149 (524.10 it/sec) -training >> step=721300, episode=121 reward=0.7661424 (498.35 it/sec) -training >> step=721400, episode=121 reward=0.7537201 (528.10 it/sec) -training >> step=721500, episode=121 reward=0.7652482 (506.13 it/sec) -training >> step=721600, episode=121 reward=0.7627996 (521.74 it/sec) -training >> step=721700, episode=121 reward=0.7616081 (494.04 it/sec) -training >> step=721800, episode=121 reward=0.7515318 (469.42 it/sec) -training >> step=721900, episode=121 reward=0.7779567 (515.85 it/sec) -training >> step=722000, episode=121 reward=0.729338 (515.62 it/sec) -training >> step=722100, episode=121 reward=0.7506293 (509.51 it/sec) -training >> step=722200, episode=121 reward=0.7979761 (510.46 it/sec) -training >> step=722300, episode=121 reward=0.7673807 (489.36 it/sec) -training >> step=722400, episode=121 reward=0.7660357 (525.13 it/sec) -training >> step=722500, episode=121 reward=0.7516353 (520.45 it/sec) -training >> step=722600, episode=121 reward=0.7581797 (477.94 it/sec) -training >> step=722700, episode=121 reward=0.7626898 (526.43 it/sec) -training >> step=722800, episode=121 reward=0.7541653 (531.76 it/sec) -training >> step=722900, episode=121 reward=0.7670025 (543.27 it/sec) -training >> step=723000, episode=121 reward=0.7789471 (553.07 it/sec) -training >> step=723100, episode=121 reward=0.7615041 (441.27 it/sec) -training >> step=723200, episode=121 reward=0.7651785 (538.85 it/sec) -training >> step=723300, episode=121 reward=0.766313 (538.41 it/sec) -training >> step=723400, episode=121 reward=0.76574 (530.97 it/sec) -training >> step=723500, episode=121 reward=0.7698907 (530.61 it/sec) -training >> step=723600, episode=121 reward=0.7447973 (550.65 it/sec) -training >> step=723700, episode=121 reward=0.7714103 (488.20 it/sec) -training >> step=723800, episode=121 reward=0.7536283 (474.04 it/sec) -training >> step=723900, episode=121 reward=0.7659435 (466.69 it/sec) -training >> step=724000, episode=121 reward=0.7653536 (482.37 it/sec) -training >> step=724100, episode=121 reward=0.7549675 (388.34 it/sec) -training >> step=724200, episode=121 reward=0.7613855 (427.07 it/sec) -training >> step=724300, episode=121 reward=0.7540462 (403.53 it/sec) -training >> step=724400, episode=121 reward=0.7507256 (373.78 it/sec) -training >> step=724500, episode=121 reward=0.7674528 (405.09 it/sec) -training >> step=724600, episode=121 reward=0.765519 (396.88 it/sec) -training >> step=724700, episode=121 reward=0.7492892 (477.08 it/sec) -training >> step=724800, episode=121 reward=0.770382 (497.45 it/sec) -training >> step=724900, episode=121 reward=0.7758799 (459.46 it/sec) -training >> step=725000, episode=121 reward=0.7493762 (465.35 it/sec) -training >> step=725100, episode=121 reward=0.7609277 (507.42 it/sec) -training >> step=725200, episode=121 reward=0.7716134 (484.73 it/sec) -training >> step=725300, episode=122 reward=0.7475737 (131.66 it/sec) -training >> step=725400, episode=122 reward=0.7622188 (458.17 it/sec) -training >> step=725500, episode=122 reward=0.7732989 (464.81 it/sec) -training >> step=725600, episode=122 reward=0.7614274 (490.41 it/sec) -training >> step=725700, episode=122 reward=0.7474105 (487.32 it/sec) -training >> step=725800, episode=122 reward=0.7640335 (520.12 it/sec) -training >> step=725900, episode=122 reward=0.7494787 (529.20 it/sec) -training >> step=726000, episode=122 reward=0.766925 (483.37 it/sec) -training >> step=726100, episode=122 reward=0.7615097 (462.56 it/sec) -training >> step=726200, episode=122 reward=0.7634141 (426.96 it/sec) -training >> step=726300, episode=122 reward=0.7597396 (470.46 it/sec) -training >> step=726400, episode=122 reward=0.75407 (474.85 it/sec) -training >> step=726500, episode=122 reward=0.778195 (463.25 it/sec) -training >> step=726600, episode=122 reward=0.7528188 (485.21 it/sec) -training >> step=726700, episode=122 reward=0.7490079 (498.62 it/sec) -training >> step=726800, episode=122 reward=0.7643913 (490.21 it/sec) -training >> step=726900, episode=122 reward=0.7364764 (513.99 it/sec) -training >> step=727000, episode=122 reward=0.7483134 (494.08 it/sec) -training >> step=727100, episode=122 reward=0.7590157 (516.26 it/sec) -training >> step=727200, episode=122 reward=0.7690661 (505.41 it/sec) -training >> step=727300, episode=122 reward=0.7499769 (496.65 it/sec) -training >> step=727400, episode=122 reward=0.7631065 (548.51 it/sec) -training >> step=727500, episode=122 reward=0.7437054 (549.59 it/sec) -training >> step=727600, episode=122 reward=0.757741 (517.15 it/sec) -training >> step=727700, episode=122 reward=0.7441427 (543.09 it/sec) -training >> step=727800, episode=122 reward=0.7718232 (544.59 it/sec) -training >> step=727900, episode=122 reward=0.7540025 (522.85 it/sec) -training >> step=728000, episode=122 reward=0.7625573 (538.11 it/sec) -training >> step=728100, episode=122 reward=0.7553543 (544.70 it/sec) -training >> step=728200, episode=122 reward=0.7349343 (554.28 it/sec) -training >> step=728300, episode=122 reward=0.7749194 (521.17 it/sec) -training >> step=728400, episode=122 reward=0.7672992 (514.97 it/sec) -training >> step=728500, episode=122 reward=0.7695717 (544.48 it/sec) -training >> step=728600, episode=122 reward=0.7811251 (521.19 it/sec) -training >> step=728700, episode=122 reward=0.76521 (523.35 it/sec) -training >> step=728800, episode=122 reward=0.7562739 (535.16 it/sec) -training >> step=728900, episode=122 reward=0.7651011 (530.33 it/sec) -training >> step=729000, episode=122 reward=0.7750067 (563.92 it/sec) -training >> step=729100, episode=122 reward=0.7610559 (534.04 it/sec) -training >> step=729200, episode=122 reward=0.7512203 (552.35 it/sec) -training >> step=729300, episode=122 reward=0.770256 (534.41 it/sec) -training >> step=729400, episode=122 reward=0.7425777 (521.24 it/sec) -training >> step=729500, episode=122 reward=0.7731115 (543.36 it/sec) -training >> step=729600, episode=122 reward=0.7685881 (573.10 it/sec) -training >> step=729700, episode=122 reward=0.7565079 (545.80 it/sec) -training >> step=729800, episode=122 reward=0.746551 (488.65 it/sec) -training >> step=729900, episode=122 reward=0.7691283 (549.01 it/sec) -training >> step=730000, episode=122 reward=0.7827106 (538.57 it/sec) -training >> step=730100, episode=122 reward=0.7580116 (543.54 it/sec) -training >> step=730200, episode=122 reward=0.7561734 (549.97 it/sec) -training >> step=730300, episode=122 reward=0.7638373 (535.21 it/sec) -training >> step=730400, episode=122 reward=0.7614616 (517.63 it/sec) -training >> step=730500, episode=122 reward=0.7563787 (448.82 it/sec) -training >> step=730600, episode=122 reward=0.7565178 (516.99 it/sec) -training >> step=730700, episode=122 reward=0.7419856 (487.18 it/sec) -training >> step=730800, episode=122 reward=0.7819226 (448.62 it/sec) -training >> step=730900, episode=122 reward=0.7582843 (478.35 it/sec) -training >> step=731000, episode=122 reward=0.7463213 (478.27 it/sec) -training >> step=731100, episode=122 reward=0.7584357 (517.82 it/sec) -training >> step=731200, episode=122 reward=0.7538221 (560.04 it/sec) -training >> step=731300, episode=123 reward=0.7641269 (66.63 it/sec) -training >> step=731400, episode=123 reward=0.7632611 (499.89 it/sec) -training >> step=731500, episode=123 reward=0.7555332 (498.15 it/sec) -training >> step=731600, episode=123 reward=0.7781634 (519.59 it/sec) -training >> step=731700, episode=123 reward=0.7443141 (521.69 it/sec) -training >> step=731800, episode=123 reward=0.7557346 (513.98 it/sec) -training >> step=731900, episode=123 reward=0.7717119 (528.16 it/sec) -training >> step=732000, episode=123 reward=0.7361442 (527.91 it/sec) -training >> step=732100, episode=123 reward=0.7562369 (490.09 it/sec) -training >> step=732200, episode=123 reward=0.7634228 (560.86 it/sec) -training >> step=732300, episode=123 reward=0.7613016 (525.46 it/sec) -training >> step=732400, episode=123 reward=0.7633142 (544.42 it/sec) -training >> step=732500, episode=123 reward=0.7526441 (562.84 it/sec) -training >> step=732600, episode=123 reward=0.7778508 (544.64 it/sec) -training >> step=732700, episode=123 reward=0.7679263 (523.07 it/sec) -training >> step=732800, episode=123 reward=0.7682494 (522.15 it/sec) -training >> step=732900, episode=123 reward=0.7577628 (574.00 it/sec) -training >> step=733000, episode=123 reward=0.7780811 (533.43 it/sec) -training >> step=733100, episode=123 reward=0.7560382 (536.73 it/sec) -training >> step=733200, episode=123 reward=0.7488397 (544.63 it/sec) -training >> step=733300, episode=123 reward=0.7638125 (513.92 it/sec) -training >> step=733400, episode=123 reward=0.7687607 (507.17 it/sec) -training >> step=733500, episode=123 reward=0.7667992 (539.45 it/sec) -training >> step=733600, episode=123 reward=0.7723395 (559.18 it/sec) -training >> step=733700, episode=123 reward=0.7392635 (535.45 it/sec) -training >> step=733800, episode=123 reward=0.7364897 (482.01 it/sec) -training >> step=733900, episode=123 reward=0.7738793 (473.12 it/sec) -training >> step=734000, episode=123 reward=0.7736831 (556.12 it/sec) -training >> step=734100, episode=123 reward=0.7513905 (538.04 it/sec) -training >> step=734200, episode=123 reward=0.7577169 (508.87 it/sec) -training >> step=734300, episode=123 reward=0.7300186 (586.86 it/sec) -training >> step=734400, episode=123 reward=0.765402 (516.52 it/sec) -training >> step=734500, episode=123 reward=0.7623035 (560.24 it/sec) -training >> step=734600, episode=123 reward=0.7748627 (504.32 it/sec) -training >> step=734700, episode=123 reward=0.7554275 (565.13 it/sec) -training >> step=734800, episode=123 reward=0.7505066 (542.89 it/sec) -training >> step=734900, episode=123 reward=0.7467821 (506.96 it/sec) -training >> step=735000, episode=123 reward=0.7702404 (533.69 it/sec) -training >> step=735100, episode=123 reward=0.7507479 (585.17 it/sec) -training >> step=735200, episode=123 reward=0.7756304 (492.65 it/sec) -training >> step=735300, episode=123 reward=0.7589166 (524.30 it/sec) -training >> step=735400, episode=123 reward=0.7517607 (576.53 it/sec) -training >> step=735500, episode=123 reward=0.7761434 (488.89 it/sec) -training >> step=735600, episode=123 reward=0.7704704 (538.82 it/sec) -training >> step=735700, episode=123 reward=0.7744879 (547.94 it/sec) -training >> step=735800, episode=123 reward=0.7946645 (571.58 it/sec) -training >> step=735900, episode=123 reward=0.7645705 (566.55 it/sec) -training >> step=736000, episode=123 reward=0.7527367 (524.06 it/sec) -training >> step=736100, episode=123 reward=0.7688463 (474.99 it/sec) -training >> step=736200, episode=123 reward=0.7523888 (489.22 it/sec) -training >> step=736300, episode=123 reward=0.7587425 (538.64 it/sec) -training >> step=736400, episode=123 reward=0.7614902 (523.57 it/sec) -training >> step=736500, episode=123 reward=0.7652499 (562.95 it/sec) -training >> step=736600, episode=123 reward=0.7801149 (510.81 it/sec) -training >> step=736700, episode=123 reward=0.7670863 (509.26 it/sec) -training >> step=736800, episode=123 reward=0.7710875 (514.19 it/sec) -training >> step=736900, episode=123 reward=0.7530193 (512.80 it/sec) -training >> step=737000, episode=123 reward=0.7692207 (531.78 it/sec) -training >> step=737100, episode=123 reward=0.7633523 (499.09 it/sec) -training >> step=737200, episode=123 reward=0.7575811 (541.43 it/sec) -training >> step=737300, episode=124 reward=0.7694251 (72.95 it/sec) -training >> step=737400, episode=124 reward=0.7572849 (474.30 it/sec) -training >> step=737500, episode=124 reward=0.7676533 (487.08 it/sec) -training >> step=737600, episode=124 reward=0.7751451 (476.46 it/sec) -training >> step=737700, episode=124 reward=0.772207 (492.02 it/sec) -training >> step=737800, episode=124 reward=0.776718 (501.62 it/sec) -training >> step=737900, episode=124 reward=0.7606918 (504.76 it/sec) -training >> step=738000, episode=124 reward=0.738523 (548.95 it/sec) -training >> step=738100, episode=124 reward=0.7676631 (540.38 it/sec) -training >> step=738200, episode=124 reward=0.7614369 (520.00 it/sec) -training >> step=738300, episode=124 reward=0.7546446 (459.56 it/sec) -training >> step=738400, episode=124 reward=0.7501488 (507.49 it/sec) -training >> step=738500, episode=124 reward=0.7489383 (539.80 it/sec) -training >> step=738600, episode=124 reward=0.7675287 (498.41 it/sec) -training >> step=738700, episode=124 reward=0.7531549 (524.05 it/sec) -training >> step=738800, episode=124 reward=0.7608642 (534.41 it/sec) -training >> step=738900, episode=124 reward=0.7670773 (547.55 it/sec) -training >> step=739000, episode=124 reward=0.7566442 (511.99 it/sec) -training >> step=739100, episode=124 reward=0.7859681 (531.07 it/sec) -training >> step=739200, episode=124 reward=0.7634305 (535.43 it/sec) -training >> step=739300, episode=124 reward=0.7309125 (502.64 it/sec) -training >> step=739400, episode=124 reward=0.779887 (534.18 it/sec) -training >> step=739500, episode=124 reward=0.7533674 (502.43 it/sec) -training >> step=739600, episode=124 reward=0.7723863 (528.44 it/sec) -training >> step=739700, episode=124 reward=0.779203 (472.17 it/sec) -training >> step=739800, episode=124 reward=0.7482157 (496.83 it/sec) -training >> step=739900, episode=124 reward=0.7518619 (513.23 it/sec) -training >> step=740000, episode=124 reward=0.7534844 (509.73 it/sec) -training >> step=740100, episode=124 reward=0.7590426 (506.00 it/sec) -training >> step=740200, episode=124 reward=0.7798599 (507.84 it/sec) -training >> step=740300, episode=124 reward=0.777775 (481.45 it/sec) -training >> step=740400, episode=124 reward=0.754459 (500.64 it/sec) -training >> step=740500, episode=124 reward=0.7633817 (530.01 it/sec) -training >> step=740600, episode=124 reward=0.751892 (557.50 it/sec) -training >> step=740700, episode=124 reward=0.7732628 (514.79 it/sec) -training >> step=740800, episode=124 reward=0.7502273 (511.80 it/sec) -training >> step=740900, episode=124 reward=0.7437657 (503.40 it/sec) -training >> step=741000, episode=124 reward=0.7614302 (523.03 it/sec) -training >> step=741100, episode=124 reward=0.7657231 (528.45 it/sec) -training >> step=741200, episode=124 reward=0.7784891 (504.98 it/sec) -training >> step=741300, episode=124 reward=0.7666528 (505.75 it/sec) -training >> step=741400, episode=124 reward=0.7494025 (471.90 it/sec) -training >> step=741500, episode=124 reward=0.7724131 (495.89 it/sec) -training >> step=741600, episode=124 reward=0.7754148 (508.77 it/sec) -training >> step=741700, episode=124 reward=0.749402 (494.81 it/sec) -training >> step=741800, episode=124 reward=0.7627945 (447.99 it/sec) -training >> step=741900, episode=124 reward=0.7674745 (491.47 it/sec) -training >> step=742000, episode=124 reward=0.7505893 (519.77 it/sec) -training >> step=742100, episode=124 reward=0.7417697 (504.99 it/sec) -training >> step=742200, episode=124 reward=0.7603796 (513.08 it/sec) -training >> step=742300, episode=124 reward=0.7553315 (536.30 it/sec) -training >> step=742400, episode=124 reward=0.7407844 (508.84 it/sec) -training >> step=742500, episode=124 reward=0.7760733 (503.73 it/sec) -training >> step=742600, episode=124 reward=0.7562032 (515.99 it/sec) -training >> step=742700, episode=124 reward=0.7642447 (546.80 it/sec) -training >> step=742800, episode=124 reward=0.7720394 (514.95 it/sec) -training >> step=742900, episode=124 reward=0.746549 (498.87 it/sec) -training >> step=743000, episode=124 reward=0.7254971 (522.75 it/sec) -training >> step=743100, episode=124 reward=0.7631211 (458.02 it/sec) -training >> step=743200, episode=124 reward=0.7784705 (510.47 it/sec) -training >> step=743300, episode=125 reward=0.765363 (202.21 it/sec) -training >> step=743400, episode=125 reward=0.767765 (468.22 it/sec) -training >> step=743500, episode=125 reward=0.7699704 (516.09 it/sec) -training >> step=743600, episode=125 reward=0.7456871 (502.49 it/sec) -training >> step=743700, episode=125 reward=0.7646496 (539.62 it/sec) -training >> step=743800, episode=125 reward=0.7579194 (499.81 it/sec) -training >> step=743900, episode=125 reward=0.7730023 (487.44 it/sec) -training >> step=744000, episode=125 reward=0.7594271 (514.92 it/sec) -training >> step=744100, episode=125 reward=0.7531927 (533.09 it/sec) -training >> step=744200, episode=125 reward=0.7747512 (522.42 it/sec) -training >> step=744300, episode=125 reward=0.7595036 (480.70 it/sec) -training >> step=744400, episode=125 reward=0.7823073 (523.30 it/sec) -training >> step=744500, episode=125 reward=0.7565743 (477.50 it/sec) -training >> step=744600, episode=125 reward=0.7448746 (580.57 it/sec) -training >> step=744700, episode=125 reward=0.7563854 (529.57 it/sec) -training >> step=744800, episode=125 reward=0.7600448 (472.03 it/sec) -training >> step=744900, episode=125 reward=0.7612668 (513.02 it/sec) -training >> step=745000, episode=125 reward=0.7739681 (531.47 it/sec) -training >> step=745100, episode=125 reward=0.7600169 (531.40 it/sec) -training >> step=745200, episode=125 reward=0.7791054 (503.83 it/sec) -training >> step=745300, episode=125 reward=0.778691 (486.98 it/sec) -training >> step=745400, episode=125 reward=0.7582852 (526.74 it/sec) -training >> step=745500, episode=125 reward=0.7763202 (540.65 it/sec) -training >> step=745600, episode=125 reward=0.7677504 (478.91 it/sec) -training >> step=745700, episode=125 reward=0.7417219 (541.01 it/sec) -training >> step=745800, episode=125 reward=0.7673872 (520.68 it/sec) -training >> step=745900, episode=125 reward=0.7845638 (539.18 it/sec) -training >> step=746000, episode=125 reward=0.7816904 (513.93 it/sec) -training >> step=746100, episode=125 reward=0.7608548 (510.55 it/sec) -training >> step=746200, episode=125 reward=0.7711921 (544.44 it/sec) -training >> step=746300, episode=125 reward=0.7796425 (454.99 it/sec) -training >> step=746400, episode=125 reward=0.7581344 (483.81 it/sec) -training >> step=746500, episode=125 reward=0.7558504 (537.60 it/sec) -training >> step=746600, episode=125 reward=0.7719175 (514.60 it/sec) -training >> step=746700, episode=125 reward=0.7755564 (526.33 it/sec) -training >> step=746800, episode=125 reward=0.7569372 (519.20 it/sec) -training >> step=746900, episode=125 reward=0.775471 (479.74 it/sec) -training >> step=747000, episode=125 reward=0.7550864 (524.79 it/sec) -training >> step=747100, episode=125 reward=0.7769045 (516.16 it/sec) -training >> step=747200, episode=125 reward=0.7537104 (568.65 it/sec) -training >> step=747300, episode=125 reward=0.7620661 (536.36 it/sec) -training >> step=747400, episode=125 reward=0.7381585 (467.35 it/sec) -training >> step=747500, episode=125 reward=0.7552757 (555.02 it/sec) -training >> step=747600, episode=125 reward=0.7636458 (551.89 it/sec) -training >> step=747700, episode=125 reward=0.7540392 (474.40 it/sec) -training >> step=747800, episode=125 reward=0.7646172 (510.80 it/sec) -training >> step=747900, episode=125 reward=0.7471492 (520.29 it/sec) -training >> step=748000, episode=125 reward=0.755795 (481.44 it/sec) -training >> step=748100, episode=125 reward=0.767703 (495.83 it/sec) -training >> step=748200, episode=125 reward=0.7532652 (537.07 it/sec) -training >> step=748300, episode=125 reward=0.7488717 (573.30 it/sec) -training >> step=748400, episode=125 reward=0.7757761 (476.00 it/sec) -training >> step=748500, episode=125 reward=0.7482702 (511.22 it/sec) -training >> step=748600, episode=125 reward=0.763796 (503.92 it/sec) -training >> step=748700, episode=125 reward=0.7599004 (503.42 it/sec) -training >> step=748800, episode=125 reward=0.7454044 (523.56 it/sec) -training >> step=748900, episode=125 reward=0.7506704 (478.39 it/sec) -training >> step=749000, episode=125 reward=0.7595628 (513.25 it/sec) -training >> step=749100, episode=125 reward=0.7584518 (537.29 it/sec) -training >> step=749200, episode=125 reward=0.7727716 (471.90 it/sec) -training >> step=749300, episode=126 reward=0.7575972 (163.14 it/sec) -training >> step=749400, episode=126 reward=0.7504214 (498.21 it/sec) -training >> step=749500, episode=126 reward=0.7604172 (503.35 it/sec) -training >> step=749600, episode=126 reward=0.760004 (537.28 it/sec) -training >> step=749700, episode=126 reward=0.7618216 (507.99 it/sec) -training >> step=749800, episode=126 reward=0.7502398 (418.34 it/sec) -training >> step=749900, episode=126 reward=0.7720993 (529.00 it/sec) -training >> step=750000, episode=126 reward=0.7559354 (510.03 it/sec) -training >> step=750100, episode=126 reward=0.7523617 (534.53 it/sec) -training >> step=750200, episode=126 reward=0.7731136 (527.13 it/sec) -training >> step=750300, episode=126 reward=0.7543908 (511.51 it/sec) -training >> step=750400, episode=126 reward=0.7613288 (497.02 it/sec) -training >> step=750500, episode=126 reward=0.7511659 (527.40 it/sec) -training >> step=750600, episode=126 reward=0.7677713 (563.99 it/sec) -training >> step=750700, episode=126 reward=0.7723082 (482.27 it/sec) -training >> step=750800, episode=126 reward=0.7580907 (529.27 it/sec) -training >> step=750900, episode=126 reward=0.7708916 (500.85 it/sec) -training >> step=751000, episode=126 reward=0.7755506 (537.39 it/sec) -training >> step=751100, episode=126 reward=0.7623864 (553.40 it/sec) -training >> step=751200, episode=126 reward=0.7608488 (539.19 it/sec) -training >> step=751300, episode=126 reward=0.7822304 (517.23 it/sec) -training >> step=751400, episode=126 reward=0.7724681 (505.60 it/sec) -training >> step=751500, episode=126 reward=0.7535485 (506.92 it/sec) -training >> step=751600, episode=126 reward=0.7590201 (523.04 it/sec) -training >> step=751700, episode=126 reward=0.7810745 (490.54 it/sec) -training >> step=751800, episode=126 reward=0.7742003 (534.00 it/sec) -training >> step=751900, episode=126 reward=0.7598117 (505.32 it/sec) -training >> step=752000, episode=126 reward=0.7548699 (551.16 it/sec) -training >> step=752100, episode=126 reward=0.7467091 (511.97 it/sec) -training >> step=752200, episode=126 reward=0.7635517 (517.73 it/sec) -training >> step=752300, episode=126 reward=0.7697102 (526.74 it/sec) -training >> step=752400, episode=126 reward=0.7625463 (503.26 it/sec) -training >> step=752500, episode=126 reward=0.7591664 (466.80 it/sec) -training >> step=752600, episode=126 reward=0.7899056 (541.99 it/sec) -training >> step=752700, episode=126 reward=0.7542019 (511.89 it/sec) -training >> step=752800, episode=126 reward=0.7625574 (550.85 it/sec) -training >> step=752900, episode=126 reward=0.7629284 (498.92 it/sec) -training >> step=753000, episode=126 reward=0.7354853 (449.52 it/sec) -training >> step=753100, episode=126 reward=0.7580156 (473.85 it/sec) -training >> step=753200, episode=126 reward=0.7545272 (496.01 it/sec) -training >> step=753300, episode=126 reward=0.7867434 (471.27 it/sec) -training >> step=753400, episode=126 reward=0.7652228 (522.69 it/sec) -training >> step=753500, episode=126 reward=0.7587997 (428.58 it/sec) -training >> step=753600, episode=126 reward=0.7687961 (467.62 it/sec) -training >> step=753700, episode=126 reward=0.7523565 (458.38 it/sec) -training >> step=753800, episode=126 reward=0.7600641 (481.55 it/sec) -training >> step=753900, episode=126 reward=0.7736797 (475.74 it/sec) -training >> step=754000, episode=126 reward=0.7823334 (464.64 it/sec) -training >> step=754100, episode=126 reward=0.7857927 (452.00 it/sec) -training >> step=754200, episode=126 reward=0.7753206 (499.08 it/sec) -training >> step=754300, episode=126 reward=0.7696587 (472.66 it/sec) -training >> step=754400, episode=126 reward=0.7569954 (460.35 it/sec) -training >> step=754500, episode=126 reward=0.7651863 (455.21 it/sec) -training >> step=754600, episode=126 reward=0.7573566 (466.70 it/sec) -training >> step=754700, episode=126 reward=0.7569144 (471.50 it/sec) -training >> step=754800, episode=126 reward=0.7593662 (461.48 it/sec) -training >> step=754900, episode=126 reward=0.7448909 (482.52 it/sec) -training >> step=755000, episode=126 reward=0.7521535 (485.87 it/sec) -training >> step=755100, episode=126 reward=0.7414429 (507.31 it/sec) -training >> step=755200, episode=126 reward=0.7501249 (385.36 it/sec) -training >> step=755300, episode=127 reward=0.7768034 (140.12 it/sec) -training >> step=755400, episode=127 reward=0.7670092 (475.42 it/sec) -training >> step=755500, episode=127 reward=0.7398096 (492.88 it/sec) -training >> step=755600, episode=127 reward=0.7543331 (493.54 it/sec) -training >> step=755700, episode=127 reward=0.7561128 (428.13 it/sec) -training >> step=755800, episode=127 reward=0.7319236 (470.91 it/sec) -training >> step=755900, episode=127 reward=0.7676085 (460.57 it/sec) -training >> step=756000, episode=127 reward=0.7661461 (446.02 it/sec) -training >> step=756100, episode=127 reward=0.761064 (461.06 it/sec) -training >> step=756200, episode=127 reward=0.7297656 (449.25 it/sec) -training >> step=756300, episode=127 reward=0.7386656 (454.48 it/sec) -training >> step=756400, episode=127 reward=0.7467385 (455.31 it/sec) -training >> step=756500, episode=127 reward=0.7435182 (454.38 it/sec) -training >> step=756600, episode=127 reward=0.7413971 (471.26 it/sec) -training >> step=756700, episode=127 reward=0.7498975 (457.43 it/sec) -training >> step=756800, episode=127 reward=0.7622153 (468.16 it/sec) -training >> step=756900, episode=127 reward=0.753897 (462.96 it/sec) -training >> step=757000, episode=127 reward=0.7559596 (416.35 it/sec) -training >> step=757100, episode=127 reward=0.758512 (409.66 it/sec) -training >> step=757200, episode=127 reward=0.7469035 (450.55 it/sec) -training >> step=757300, episode=127 reward=0.7604469 (421.20 it/sec) -training >> step=757400, episode=127 reward=0.7551856 (405.87 it/sec) -training >> step=757500, episode=127 reward=0.7373865 (400.27 it/sec) -training >> step=757600, episode=127 reward=0.75753 (431.55 it/sec) -training >> step=757700, episode=127 reward=0.7822348 (403.64 it/sec) -training >> step=757800, episode=127 reward=0.7692941 (384.74 it/sec) -training >> step=757900, episode=127 reward=0.7777886 (410.48 it/sec) -training >> step=758000, episode=127 reward=0.7666002 (446.04 it/sec) -training >> step=758100, episode=127 reward=0.7711562 (484.68 it/sec) -training >> step=758200, episode=127 reward=0.7479178 (449.64 it/sec) -training >> step=758300, episode=127 reward=0.7600188 (401.38 it/sec) -training >> step=758400, episode=127 reward=0.7710282 (422.34 it/sec) -training >> step=758500, episode=127 reward=0.7513276 (450.03 it/sec) -training >> step=758600, episode=127 reward=0.7633286 (445.75 it/sec) -training >> step=758700, episode=127 reward=0.7823762 (472.34 it/sec) -training >> step=758800, episode=127 reward=0.7555239 (447.72 it/sec) -training >> step=758900, episode=127 reward=0.7739323 (407.43 it/sec) -training >> step=759000, episode=127 reward=0.7575854 (402.80 it/sec) -training >> step=759100, episode=127 reward=0.7583679 (442.09 it/sec) -training >> step=759200, episode=127 reward=0.7760439 (465.48 it/sec) -training >> step=759300, episode=127 reward=0.7669561 (436.79 it/sec) -training >> step=759400, episode=127 reward=0.7414746 (416.92 it/sec) -training >> step=759500, episode=127 reward=0.7630518 (465.14 it/sec) -training >> step=759600, episode=127 reward=0.7477223 (428.15 it/sec) -training >> step=759700, episode=127 reward=0.765403 (481.17 it/sec) -training >> step=759800, episode=127 reward=0.7675228 (517.26 it/sec) -training >> step=759900, episode=127 reward=0.7496822 (457.69 it/sec) -training >> step=760000, episode=127 reward=0.7647108 (510.48 it/sec) -training >> step=760100, episode=127 reward=0.7571032 (493.71 it/sec) -training >> step=760200, episode=127 reward=0.7514948 (511.50 it/sec) -training >> step=760300, episode=127 reward=0.7852226 (452.75 it/sec) -training >> step=760400, episode=127 reward=0.7532749 (533.04 it/sec) -training >> step=760500, episode=127 reward=0.748027 (441.83 it/sec) -training >> step=760600, episode=127 reward=0.7411613 (502.28 it/sec) -training >> step=760700, episode=127 reward=0.758763 (532.86 it/sec) -training >> step=760800, episode=127 reward=0.7794117 (424.67 it/sec) -training >> step=760900, episode=127 reward=0.7681966 (495.08 it/sec) -training >> step=761000, episode=127 reward=0.7646074 (489.09 it/sec) -training >> step=761100, episode=127 reward=0.7480056 (508.30 it/sec) -training >> step=761200, episode=127 reward=0.7791414 (517.61 it/sec) -training >> step=761300, episode=128 reward=0.7772741 (120.68 it/sec) -training >> step=761400, episode=128 reward=0.7566848 (487.77 it/sec) -training >> step=761500, episode=128 reward=0.7508754 (463.55 it/sec) -training >> step=761600, episode=128 reward=0.7508887 (501.38 it/sec) -training >> step=761700, episode=128 reward=0.7288023 (481.59 it/sec) -training >> step=761800, episode=128 reward=0.7784481 (521.27 it/sec) -training >> step=761900, episode=128 reward=0.7381954 (457.14 it/sec) -training >> step=762000, episode=128 reward=0.7703302 (485.50 it/sec) -training >> step=762100, episode=128 reward=0.7387741 (488.71 it/sec) -training >> step=762200, episode=128 reward=0.7514429 (509.69 it/sec) -training >> step=762300, episode=128 reward=0.7638706 (484.16 it/sec) -training >> step=762400, episode=128 reward=0.7755322 (498.13 it/sec) -training >> step=762500, episode=128 reward=0.761874 (509.48 it/sec) -training >> step=762600, episode=128 reward=0.7692378 (488.82 it/sec) -training >> step=762700, episode=128 reward=0.7461022 (506.75 it/sec) -training >> step=762800, episode=128 reward=0.769492 (539.94 it/sec) -training >> step=762900, episode=128 reward=0.7660086 (511.34 it/sec) -training >> step=763000, episode=128 reward=0.768244 (511.76 it/sec) -training >> step=763100, episode=128 reward=0.7595084 (487.72 it/sec) -training >> step=763200, episode=128 reward=0.7425946 (512.18 it/sec) -training >> step=763300, episode=128 reward=0.7803677 (513.62 it/sec) -training >> step=763400, episode=128 reward=0.7625602 (497.13 it/sec) -training >> step=763500, episode=128 reward=0.767917 (506.46 it/sec) -training >> step=763600, episode=128 reward=0.7614998 (448.76 it/sec) -training >> step=763700, episode=128 reward=0.7607406 (504.76 it/sec) -training >> step=763800, episode=128 reward=0.7685877 (456.06 it/sec) -training >> step=763900, episode=128 reward=0.7406696 (482.24 it/sec) -training >> step=764000, episode=128 reward=0.7459817 (509.70 it/sec) -training >> step=764100, episode=128 reward=0.762221 (528.19 it/sec) -training >> step=764200, episode=128 reward=0.7700778 (466.31 it/sec) -training >> step=764300, episode=128 reward=0.7476058 (458.51 it/sec) -training >> step=764400, episode=128 reward=0.7679101 (492.18 it/sec) -training >> step=764500, episode=128 reward=0.7606778 (542.68 it/sec) -training >> step=764600, episode=128 reward=0.7673077 (512.08 it/sec) -training >> step=764700, episode=128 reward=0.7948143 (502.84 it/sec) -training >> step=764800, episode=128 reward=0.7606181 (458.62 it/sec) -training >> step=764900, episode=128 reward=0.7782236 (454.69 it/sec) -training >> step=765000, episode=128 reward=0.7672884 (426.11 it/sec) -training >> step=765100, episode=128 reward=0.7620684 (460.31 it/sec) -training >> step=765200, episode=128 reward=0.770446 (468.61 it/sec) -training >> step=765300, episode=128 reward=0.7507229 (422.15 it/sec) -training >> step=765400, episode=128 reward=0.7651916 (419.22 it/sec) -training >> step=765500, episode=128 reward=0.7769922 (398.59 it/sec) -training >> step=765600, episode=128 reward=0.751565 (491.25 it/sec) -training >> step=765700, episode=128 reward=0.7719551 (530.98 it/sec) -training >> step=765800, episode=128 reward=0.7355602 (482.21 it/sec) -training >> step=765900, episode=128 reward=0.7668021 (529.71 it/sec) -training >> step=766000, episode=128 reward=0.7694283 (545.15 it/sec) -training >> step=766100, episode=128 reward=0.7552394 (506.61 it/sec) -training >> step=766200, episode=128 reward=0.760798 (519.54 it/sec) -training >> step=766300, episode=128 reward=0.7708559 (534.29 it/sec) -training >> step=766400, episode=128 reward=0.7571655 (525.00 it/sec) -training >> step=766500, episode=128 reward=0.7249336 (531.40 it/sec) -training >> step=766600, episode=128 reward=0.7820247 (517.30 it/sec) -training >> step=766700, episode=128 reward=0.7933722 (540.12 it/sec) -training >> step=766800, episode=128 reward=0.7797323 (501.47 it/sec) -training >> step=766900, episode=128 reward=0.7424318 (517.25 it/sec) -training >> step=767000, episode=128 reward=0.7705275 (545.66 it/sec) -training >> step=767100, episode=128 reward=0.7490273 (526.81 it/sec) -training >> step=767200, episode=128 reward=0.7779812 (552.57 it/sec) -training >> step=767300, episode=129 reward=0.7609819 (105.84 it/sec) -training >> step=767400, episode=129 reward=0.7494501 (409.00 it/sec) -training >> step=767500, episode=129 reward=0.7430452 (520.61 it/sec) -training >> step=767600, episode=129 reward=0.7695729 (522.38 it/sec) -training >> step=767700, episode=129 reward=0.7786416 (497.45 it/sec) -training >> step=767800, episode=129 reward=0.7749896 (525.96 it/sec) -training >> step=767900, episode=129 reward=0.7626354 (536.93 it/sec) -training >> step=768000, episode=129 reward=0.7550819 (560.28 it/sec) -training >> step=768100, episode=129 reward=0.7620297 (578.14 it/sec) -training >> step=768200, episode=129 reward=0.7626696 (537.23 it/sec) -training >> step=768300, episode=129 reward=0.7660481 (521.01 it/sec) -training >> step=768400, episode=129 reward=0.766533 (510.92 it/sec) -training >> step=768500, episode=129 reward=0.7734779 (499.89 it/sec) -training >> step=768600, episode=129 reward=0.7680464 (491.05 it/sec) -training >> step=768700, episode=129 reward=0.7616161 (538.63 it/sec) -training >> step=768800, episode=129 reward=0.7505186 (524.68 it/sec) -training >> step=768900, episode=129 reward=0.7345222 (463.57 it/sec) -training >> step=769000, episode=129 reward=0.7707972 (479.73 it/sec) -training >> step=769100, episode=129 reward=0.757187 (509.92 it/sec) -training >> step=769200, episode=129 reward=0.7728911 (492.55 it/sec) -training >> step=769300, episode=129 reward=0.7735929 (417.56 it/sec) -training >> step=769400, episode=129 reward=0.7622645 (449.00 it/sec) -training >> step=769500, episode=129 reward=0.770083 (437.92 it/sec) -training >> step=769600, episode=129 reward=0.744132 (382.18 it/sec) -training >> step=769700, episode=129 reward=0.7660762 (430.09 it/sec) -training >> step=769800, episode=129 reward=0.740852 (452.39 it/sec) -training >> step=769900, episode=129 reward=0.7581082 (462.92 it/sec) -training >> step=770000, episode=129 reward=0.7679085 (401.89 it/sec) -training >> step=770100, episode=129 reward=0.7585368 (429.10 it/sec) -training >> step=770200, episode=129 reward=0.7675036 (421.02 it/sec) -training >> step=770300, episode=129 reward=0.7725219 (433.72 it/sec) -training >> step=770400, episode=129 reward=0.7536676 (408.12 it/sec) -training >> step=770500, episode=129 reward=0.7531313 (417.83 it/sec) -training >> step=770600, episode=129 reward=0.7499886 (375.40 it/sec) -training >> step=770700, episode=129 reward=0.7685413 (399.10 it/sec) -training >> step=770800, episode=129 reward=0.7487308 (419.48 it/sec) -training >> step=770900, episode=129 reward=0.7481632 (410.12 it/sec) -training >> step=771000, episode=129 reward=0.761383 (460.34 it/sec) -training >> step=771100, episode=129 reward=0.7316046 (438.87 it/sec) -training >> step=771200, episode=129 reward=0.7617536 (447.72 it/sec) -training >> step=771300, episode=129 reward=0.775427 (432.22 it/sec) -training >> step=771400, episode=129 reward=0.7762603 (452.33 it/sec) -training >> step=771500, episode=129 reward=0.7306772 (428.36 it/sec) -training >> step=771600, episode=129 reward=0.7609776 (445.48 it/sec) -training >> step=771700, episode=129 reward=0.7494039 (429.36 it/sec) -training >> step=771800, episode=129 reward=0.7558987 (454.90 it/sec) -training >> step=771900, episode=129 reward=0.7545233 (453.11 it/sec) -training >> step=772000, episode=129 reward=0.7509851 (440.05 it/sec) -training >> step=772100, episode=129 reward=0.7408398 (417.03 it/sec) -training >> step=772200, episode=129 reward=0.748911 (399.77 it/sec) -training >> step=772300, episode=129 reward=0.7854871 (381.63 it/sec) -training >> step=772400, episode=129 reward=0.7787438 (440.87 it/sec) -training >> step=772500, episode=129 reward=0.7457163 (430.89 it/sec) -training >> step=772600, episode=129 reward=0.7665173 (389.48 it/sec) -training >> step=772700, episode=129 reward=0.7512091 (393.99 it/sec) -training >> step=772800, episode=129 reward=0.7738465 (427.14 it/sec) -training >> step=772900, episode=129 reward=0.7625564 (428.93 it/sec) -training >> step=773000, episode=129 reward=0.7694291 (427.90 it/sec) -training >> step=773100, episode=129 reward=0.779614 (482.71 it/sec) -training >> step=773200, episode=129 reward=0.7542844 (475.43 it/sec) -training >> step=773300, episode=130 reward=0.758393 (92.81 it/sec) -training >> step=773400, episode=130 reward=0.7647513 (238.68 it/sec) -training >> step=773500, episode=130 reward=0.7561608 (445.00 it/sec) -training >> step=773600, episode=130 reward=0.7605782 (458.59 it/sec) -training >> step=773700, episode=130 reward=0.7642798 (516.95 it/sec) -training >> step=773800, episode=130 reward=0.7345697 (459.40 it/sec) -training >> step=773900, episode=130 reward=0.7709212 (440.68 it/sec) -training >> step=774000, episode=130 reward=0.7475496 (490.18 it/sec) -training >> step=774100, episode=130 reward=0.7504825 (452.69 it/sec) -training >> step=774200, episode=130 reward=0.7551602 (452.72 it/sec) -training >> step=774300, episode=130 reward=0.7657509 (439.35 it/sec) -training >> step=774400, episode=130 reward=0.7566562 (438.30 it/sec) -training >> step=774500, episode=130 reward=0.7457713 (472.99 it/sec) -training >> step=774600, episode=130 reward=0.7860559 (456.48 it/sec) -training >> step=774700, episode=130 reward=0.7685511 (432.24 it/sec) -training >> step=774800, episode=130 reward=0.7780151 (454.27 it/sec) -training >> step=774900, episode=130 reward=0.7541382 (387.32 it/sec) -training >> step=775000, episode=130 reward=0.7538811 (430.99 it/sec) -training >> step=775100, episode=130 reward=0.7637434 (442.32 it/sec) -training >> step=775200, episode=130 reward=0.7483885 (432.82 it/sec) -training >> step=775300, episode=130 reward=0.7689963 (465.47 it/sec) -training >> step=775400, episode=130 reward=0.7557595 (448.32 it/sec) -training >> step=775500, episode=130 reward=0.7480126 (470.36 it/sec) -training >> step=775600, episode=130 reward=0.7465938 (462.92 it/sec) -training >> step=775700, episode=130 reward=0.7881492 (451.45 it/sec) -training >> step=775800, episode=130 reward=0.7658157 (441.72 it/sec) -training >> step=775900, episode=130 reward=0.7588333 (465.91 it/sec) -training >> step=776000, episode=130 reward=0.7631364 (470.92 it/sec) -training >> step=776100, episode=130 reward=0.7501248 (448.95 it/sec) -training >> step=776200, episode=130 reward=0.7620202 (498.52 it/sec) -training >> step=776300, episode=130 reward=0.7706338 (447.40 it/sec) -training >> step=776400, episode=130 reward=0.7478499 (461.35 it/sec) -training >> step=776500, episode=130 reward=0.7779246 (410.61 it/sec) -training >> step=776600, episode=130 reward=0.7566764 (448.83 it/sec) -training >> step=776700, episode=130 reward=0.7664946 (465.48 it/sec) -training >> step=776800, episode=130 reward=0.7475219 (471.64 it/sec) -training >> step=776900, episode=130 reward=0.77896 (454.50 it/sec) -training >> step=777000, episode=130 reward=0.7511386 (418.29 it/sec) -training >> step=777100, episode=130 reward=0.7493212 (459.40 it/sec) -training >> step=777200, episode=130 reward=0.7485462 (473.32 it/sec) -training >> step=777300, episode=130 reward=0.7451492 (436.30 it/sec) -training >> step=777400, episode=130 reward=0.7510996 (439.34 it/sec) -training >> step=777500, episode=130 reward=0.7732763 (400.39 it/sec) -training >> step=777600, episode=130 reward=0.7608396 (444.78 it/sec) -training >> step=777700, episode=130 reward=0.7542307 (429.21 it/sec) -training >> step=777800, episode=130 reward=0.7471505 (401.35 it/sec) -training >> step=777900, episode=130 reward=0.7518811 (383.38 it/sec) -training >> step=778000, episode=130 reward=0.7690681 (397.57 it/sec) -training >> step=778100, episode=130 reward=0.7516227 (402.86 it/sec) -training >> step=778200, episode=130 reward=0.7437064 (457.03 it/sec) -training >> step=778300, episode=130 reward=0.7562522 (400.60 it/sec) -training >> step=778400, episode=130 reward=0.7692342 (396.36 it/sec) -training >> step=778500, episode=130 reward=0.7553225 (405.46 it/sec) -training >> step=778600, episode=130 reward=0.76256 (446.24 it/sec) -training >> step=778700, episode=130 reward=0.7523955 (455.18 it/sec) -training >> step=778800, episode=130 reward=0.7636946 (447.35 it/sec) -training >> step=778900, episode=130 reward=0.7500361 (492.54 it/sec) -training >> step=779000, episode=130 reward=0.7771437 (468.73 it/sec) -training >> step=779100, episode=130 reward=0.7433919 (507.69 it/sec) -training >> step=779200, episode=130 reward=0.7598687 (454.08 it/sec) -training >> step=779300, episode=131 reward=0.7562208 (89.52 it/sec) -training >> step=779400, episode=131 reward=0.7372038 (443.94 it/sec) -training >> step=779500, episode=131 reward=0.7452642 (463.41 it/sec) -training >> step=779600, episode=131 reward=0.7629436 (474.28 it/sec) -training >> step=779700, episode=131 reward=0.7476082 (504.27 it/sec) -training >> step=779800, episode=131 reward=0.7828692 (475.89 it/sec) -training >> step=779900, episode=131 reward=0.7632723 (522.64 it/sec) -training >> step=780000, episode=131 reward=0.7734242 (521.21 it/sec) -training >> step=780100, episode=131 reward=0.7489145 (489.66 it/sec) -training >> step=780200, episode=131 reward=0.7537481 (490.96 it/sec) -training >> step=780300, episode=131 reward=0.7654427 (466.92 it/sec) -training >> step=780400, episode=131 reward=0.7693974 (449.63 it/sec) -training >> step=780500, episode=131 reward=0.746271 (517.11 it/sec) -training >> step=780600, episode=131 reward=0.7639354 (493.35 it/sec) -training >> step=780700, episode=131 reward=0.771238 (479.68 it/sec) -training >> step=780800, episode=131 reward=0.7350718 (432.81 it/sec) -training >> step=780900, episode=131 reward=0.7605667 (510.14 it/sec) -training >> step=781000, episode=131 reward=0.7331988 (442.94 it/sec) -training >> step=781100, episode=131 reward=0.7587739 (415.47 it/sec) -training >> step=781200, episode=131 reward=0.7679002 (481.34 it/sec) -training >> step=781300, episode=131 reward=0.7462202 (482.09 it/sec) -training >> step=781400, episode=131 reward=0.7647446 (412.52 it/sec) -training >> step=781500, episode=131 reward=0.7569413 (446.12 it/sec) -training >> step=781600, episode=131 reward=0.7545081 (478.89 it/sec) -training >> step=781700, episode=131 reward=0.7591245 (457.50 it/sec) -training >> step=781800, episode=131 reward=0.7886155 (448.81 it/sec) -training >> step=781900, episode=131 reward=0.7617499 (464.14 it/sec) -training >> step=782000, episode=131 reward=0.7692093 (418.79 it/sec) -training >> step=782100, episode=131 reward=0.764147 (500.79 it/sec) -training >> step=782200, episode=131 reward=0.7593305 (456.10 it/sec) -training >> step=782300, episode=131 reward=0.7371898 (408.61 it/sec) -training >> step=782400, episode=131 reward=0.7621485 (429.36 it/sec) -training >> step=782500, episode=131 reward=0.7595727 (426.55 it/sec) -training >> step=782600, episode=131 reward=0.7652125 (433.84 it/sec) -training >> step=782700, episode=131 reward=0.7539558 (447.91 it/sec) -training >> step=782800, episode=131 reward=0.7548708 (376.98 it/sec) -training >> step=782900, episode=131 reward=0.7470016 (444.91 it/sec) -training >> step=783000, episode=131 reward=0.7469571 (415.75 it/sec) -training >> step=783100, episode=131 reward=0.7392309 (440.18 it/sec) -training >> step=783200, episode=131 reward=0.7675866 (458.06 it/sec) -training >> step=783300, episode=131 reward=0.7749923 (459.14 it/sec) -training >> step=783400, episode=131 reward=0.7815055 (451.29 it/sec) -training >> step=783500, episode=131 reward=0.7668651 (431.11 it/sec) -training >> step=783600, episode=131 reward=0.7665244 (449.95 it/sec) -training >> step=783700, episode=131 reward=0.7567013 (459.73 it/sec) -training >> step=783800, episode=131 reward=0.757207 (431.04 it/sec) -training >> step=783900, episode=131 reward=0.7432073 (434.96 it/sec) -training >> step=784000, episode=131 reward=0.7477956 (448.97 it/sec) -training >> step=784100, episode=131 reward=0.7651629 (426.75 it/sec) -training >> step=784200, episode=131 reward=0.7578872 (413.03 it/sec) -training >> step=784300, episode=131 reward=0.7369561 (496.02 it/sec) -training >> step=784400, episode=131 reward=0.7622492 (423.39 it/sec) -training >> step=784500, episode=131 reward=0.7573797 (444.34 it/sec) -training >> step=784600, episode=131 reward=0.7638538 (451.48 it/sec) -training >> step=784700, episode=131 reward=0.7688325 (459.51 it/sec) -training >> step=784800, episode=131 reward=0.7440539 (423.22 it/sec) -training >> step=784900, episode=131 reward=0.7894844 (465.98 it/sec) -training >> step=785000, episode=131 reward=0.7640029 (486.03 it/sec) -training >> step=785100, episode=131 reward=0.7433826 (505.80 it/sec) -training >> step=785200, episode=131 reward=0.7479962 (463.05 it/sec) -training >> step=785300, episode=132 reward=0.7562845 (116.56 it/sec) -training >> step=785400, episode=132 reward=0.7344018 (458.80 it/sec) -training >> step=785500, episode=132 reward=0.7446265 (410.93 it/sec) -training >> step=785600, episode=132 reward=0.7657609 (491.58 it/sec) -training >> step=785700, episode=132 reward=0.7422221 (497.81 it/sec) -training >> step=785800, episode=132 reward=0.7554018 (465.35 it/sec) -training >> step=785900, episode=132 reward=0.7658096 (496.25 it/sec) -training >> step=786000, episode=132 reward=0.7403989 (439.96 it/sec) -training >> step=786100, episode=132 reward=0.7542898 (494.84 it/sec) -training >> step=786200, episode=132 reward=0.7690699 (508.89 it/sec) -training >> step=786300, episode=132 reward=0.7507748 (527.18 it/sec) -training >> step=786400, episode=132 reward=0.7454486 (546.18 it/sec) -training >> step=786500, episode=132 reward=0.7278848 (504.76 it/sec) -training >> step=786600, episode=132 reward=0.7585857 (515.72 it/sec) -training >> step=786700, episode=132 reward=0.7536348 (579.01 it/sec) -training >> step=786800, episode=132 reward=0.7746096 (486.16 it/sec) -training >> step=786900, episode=132 reward=0.7489323 (514.14 it/sec) -training >> step=787000, episode=132 reward=0.7511383 (523.63 it/sec) -training >> step=787100, episode=132 reward=0.7582472 (495.33 it/sec) -training >> step=787200, episode=132 reward=0.7645749 (517.46 it/sec) -training >> step=787300, episode=132 reward=0.7723938 (540.53 it/sec) -training >> step=787400, episode=132 reward=0.7670173 (463.07 it/sec) -training >> step=787500, episode=132 reward=0.7584231 (507.36 it/sec) -training >> step=787600, episode=132 reward=0.7618591 (528.40 it/sec) -training >> step=787700, episode=132 reward=0.7739252 (505.53 it/sec) -training >> step=787800, episode=132 reward=0.7469008 (490.82 it/sec) -training >> step=787900, episode=132 reward=0.7793053 (477.90 it/sec) -training >> step=788000, episode=132 reward=0.7800072 (513.92 it/sec) -training >> step=788100, episode=132 reward=0.7594897 (496.65 it/sec) -training >> step=788200, episode=132 reward=0.7521151 (521.26 it/sec) -training >> step=788300, episode=132 reward=0.7619906 (511.93 it/sec) -training >> step=788400, episode=132 reward=0.7575358 (456.50 it/sec) -training >> step=788500, episode=132 reward=0.76475 (469.40 it/sec) -training >> step=788600, episode=132 reward=0.7238759 (502.20 it/sec) -training >> step=788700, episode=132 reward=0.784331 (504.51 it/sec) -training >> step=788800, episode=132 reward=0.7658251 (520.58 it/sec) -training >> step=788900, episode=132 reward=0.7530597 (511.38 it/sec) -training >> step=789000, episode=132 reward=0.7424951 (484.58 it/sec) -training >> step=789100, episode=132 reward=0.7679935 (520.62 it/sec) -training >> step=789200, episode=132 reward=0.7824002 (493.18 it/sec) -training >> step=789300, episode=132 reward=0.7413129 (504.62 it/sec) -training >> step=789400, episode=132 reward=0.7753975 (483.92 it/sec) -training >> step=789500, episode=132 reward=0.7814317 (465.59 it/sec) -training >> step=789600, episode=132 reward=0.746969 (531.12 it/sec) -training >> step=789700, episode=132 reward=0.7402799 (506.90 it/sec) -training >> step=789800, episode=132 reward=0.7467011 (515.04 it/sec) -training >> step=789900, episode=132 reward=0.770382 (486.75 it/sec) -training >> step=790000, episode=132 reward=0.7624925 (467.42 it/sec) -training >> step=790100, episode=132 reward=0.7584218 (524.04 it/sec) -training >> step=790200, episode=132 reward=0.7848887 (467.08 it/sec) -training >> step=790300, episode=132 reward=0.7511029 (406.37 it/sec) -training >> step=790400, episode=132 reward=0.7540022 (434.20 it/sec) -training >> step=790500, episode=132 reward=0.7711449 (428.65 it/sec) -training >> step=790600, episode=132 reward=0.7472843 (494.67 it/sec) -training >> step=790700, episode=132 reward=0.7622554 (469.39 it/sec) -training >> step=790800, episode=132 reward=0.7491555 (406.19 it/sec) -training >> step=790900, episode=132 reward=0.7466824 (411.19 it/sec) -training >> step=791000, episode=132 reward=0.7441174 (452.92 it/sec) -training >> step=791100, episode=132 reward=0.7668481 (464.49 it/sec) -training >> step=791200, episode=132 reward=0.7320684 (497.54 it/sec) -training >> step=791300, episode=133 reward=0.7547119 (111.82 it/sec) -training >> step=791400, episode=133 reward=0.7388695 (439.80 it/sec) -training >> step=791500, episode=133 reward=0.7569978 (427.15 it/sec) -training >> step=791600, episode=133 reward=0.7588663 (444.76 it/sec) -training >> step=791700, episode=133 reward=0.7559547 (452.76 it/sec) -training >> step=791800, episode=133 reward=0.7534543 (455.32 it/sec) -training >> step=791900, episode=133 reward=0.7523115 (416.43 it/sec) -training >> step=792000, episode=133 reward=0.7443112 (448.20 it/sec) -training >> step=792100, episode=133 reward=0.7715386 (461.28 it/sec) -training >> step=792200, episode=133 reward=0.762834 (443.88 it/sec) -training >> step=792300, episode=133 reward=0.7648278 (394.96 it/sec) -training >> step=792400, episode=133 reward=0.7611482 (416.44 it/sec) -training >> step=792500, episode=133 reward=0.755949 (467.61 it/sec) -training >> step=792600, episode=133 reward=0.7494387 (449.59 it/sec) -training >> step=792700, episode=133 reward=0.7660239 (478.92 it/sec) -training >> step=792800, episode=133 reward=0.7640143 (468.70 it/sec) -training >> step=792900, episode=133 reward=0.7397949 (455.06 it/sec) -training >> step=793000, episode=133 reward=0.759519 (464.21 it/sec) -training >> step=793100, episode=133 reward=0.7334746 (474.15 it/sec) -training >> step=793200, episode=133 reward=0.7624316 (480.99 it/sec) -training >> step=793300, episode=133 reward=0.7651216 (432.39 it/sec) -training >> step=793400, episode=133 reward=0.754851 (475.99 it/sec) -training >> step=793500, episode=133 reward=0.7703374 (472.27 it/sec) -training >> step=793600, episode=133 reward=0.7869818 (480.00 it/sec) -training >> step=793700, episode=133 reward=0.7640955 (458.43 it/sec) -training >> step=793800, episode=133 reward=0.7644419 (438.60 it/sec) -training >> step=793900, episode=133 reward=0.7614433 (461.25 it/sec) -training >> step=794000, episode=133 reward=0.7697508 (485.55 it/sec) -training >> step=794100, episode=133 reward=0.7744195 (479.34 it/sec) -training >> step=794200, episode=133 reward=0.7604414 (448.14 it/sec) -training >> step=794300, episode=133 reward=0.7730201 (433.81 it/sec) -training >> step=794400, episode=133 reward=0.7571649 (477.51 it/sec) -training >> step=794500, episode=133 reward=0.7910895 (454.55 it/sec) -training >> step=794600, episode=133 reward=0.764015 (457.07 it/sec) -training >> step=794700, episode=133 reward=0.7661134 (462.53 it/sec) -training >> step=794800, episode=133 reward=0.811867 (466.85 it/sec) -training >> step=794900, episode=133 reward=0.7672402 (471.81 it/sec) -training >> step=795000, episode=133 reward=0.7444077 (494.33 it/sec) -training >> step=795100, episode=133 reward=0.7567272 (476.09 it/sec) -training >> step=795200, episode=133 reward=0.7491637 (465.69 it/sec) -training >> step=795300, episode=133 reward=0.7815216 (472.08 it/sec) -training >> step=795400, episode=133 reward=0.7838817 (465.54 it/sec) -training >> step=795500, episode=133 reward=0.7562668 (457.77 it/sec) -training >> step=795600, episode=133 reward=0.748958 (465.97 it/sec) -training >> step=795700, episode=133 reward=0.764093 (476.72 it/sec) -training >> step=795800, episode=133 reward=0.747911 (466.82 it/sec) -training >> step=795900, episode=133 reward=0.7402756 (495.16 it/sec) -training >> step=796000, episode=133 reward=0.7390724 (446.46 it/sec) -training >> step=796100, episode=133 reward=0.7503056 (479.47 it/sec) -training >> step=796200, episode=133 reward=0.7339891 (508.14 it/sec) -training >> step=796300, episode=133 reward=0.7739306 (474.39 it/sec) -training >> step=796400, episode=133 reward=0.7447826 (519.76 it/sec) -training >> step=796500, episode=133 reward=0.7202571 (444.60 it/sec) -training >> step=796600, episode=133 reward=0.7495598 (516.88 it/sec) -training >> step=796700, episode=133 reward=0.7719253 (527.19 it/sec) -training >> step=796800, episode=133 reward=0.7653056 (446.78 it/sec) -training >> step=796900, episode=133 reward=0.7599832 (540.96 it/sec) -training >> step=797000, episode=133 reward=0.7589332 (505.88 it/sec) -training >> step=797100, episode=133 reward=0.7613472 (477.44 it/sec) -training >> step=797200, episode=133 reward=0.7747735 (509.44 it/sec) -training >> step=797300, episode=134 reward=0.7505748 (129.25 it/sec) -training >> step=797400, episode=134 reward=0.7609622 (491.76 it/sec) -training >> step=797500, episode=134 reward=0.7515455 (477.59 it/sec) -training >> step=797600, episode=134 reward=0.7337153 (490.55 it/sec) -training >> step=797700, episode=134 reward=0.7674751 (501.26 it/sec) -training >> step=797800, episode=134 reward=0.7728688 (495.10 it/sec) -training >> step=797900, episode=134 reward=0.7788435 (465.39 it/sec) -training >> step=798000, episode=134 reward=0.7907321 (495.63 it/sec) -training >> step=798100, episode=134 reward=0.7516779 (523.86 it/sec) -training >> step=798200, episode=134 reward=0.7558082 (529.53 it/sec) -training >> step=798300, episode=134 reward=0.7457278 (482.73 it/sec) -training >> step=798400, episode=134 reward=0.7582713 (501.44 it/sec) -training >> step=798500, episode=134 reward=0.7589993 (468.76 it/sec) -training >> step=798600, episode=134 reward=0.7628478 (526.70 it/sec) -training >> step=798700, episode=134 reward=0.7805151 (516.19 it/sec) -training >> step=798800, episode=134 reward=0.7702866 (450.29 it/sec) -training >> step=798900, episode=134 reward=0.7639472 (465.77 it/sec) -training >> step=799000, episode=134 reward=0.7474456 (506.43 it/sec) -training >> step=799100, episode=134 reward=0.7374887 (435.66 it/sec) -training >> step=799200, episode=134 reward=0.7563348 (513.07 it/sec) -training >> step=799300, episode=134 reward=0.749342 (511.00 it/sec) -training >> step=799400, episode=134 reward=0.7630064 (506.63 it/sec) -training >> step=799500, episode=134 reward=0.7503049 (506.03 it/sec) -training >> step=799600, episode=134 reward=0.761802 (499.56 it/sec) -training >> step=799700, episode=134 reward=0.7435452 (517.74 it/sec) -training >> step=799800, episode=134 reward=0.7663768 (475.25 it/sec) -training >> step=799900, episode=134 reward=0.7639537 (475.46 it/sec) -training >> step=800000, episode=134 reward=0.758194 (515.86 it/sec) -training >> step=800100, episode=134 reward=0.7423918 (524.81 it/sec) -training >> step=800200, episode=134 reward=0.7728151 (443.47 it/sec) -training >> step=800300, episode=134 reward=0.7672814 (454.34 it/sec) -training >> step=800400, episode=134 reward=0.7649494 (442.50 it/sec) -training >> step=800500, episode=134 reward=0.7656748 (450.52 it/sec) -training >> step=800600, episode=134 reward=0.7594646 (482.36 it/sec) -training >> step=800700, episode=134 reward=0.781317 (500.11 it/sec) -training >> step=800800, episode=134 reward=0.7578617 (494.56 it/sec) -training >> step=800900, episode=134 reward=0.7596982 (478.01 it/sec) -training >> step=801000, episode=134 reward=0.7588172 (507.79 it/sec) -training >> step=801100, episode=134 reward=0.7703052 (520.19 it/sec) -training >> step=801200, episode=134 reward=0.75438 (504.09 it/sec) -training >> step=801300, episode=134 reward=0.7508967 (491.03 it/sec) -training >> step=801400, episode=134 reward=0.7463586 (460.07 it/sec) -training >> step=801500, episode=134 reward=0.7439398 (517.55 it/sec) -training >> step=801600, episode=134 reward=0.7469035 (489.41 it/sec) -training >> step=801700, episode=134 reward=0.7446058 (504.11 it/sec) -training >> step=801800, episode=134 reward=0.7645649 (488.11 it/sec) -training >> step=801900, episode=134 reward=0.7738715 (503.37 it/sec) -training >> step=802000, episode=134 reward=0.7622525 (516.28 it/sec) -training >> step=802100, episode=134 reward=0.7641587 (486.79 it/sec) -training >> step=802200, episode=134 reward=0.7625062 (510.00 it/sec) -training >> step=802300, episode=134 reward=0.7807391 (519.09 it/sec) -training >> step=802400, episode=134 reward=0.7581621 (515.02 it/sec) -training >> step=802500, episode=134 reward=0.7657132 (510.58 it/sec) -training >> step=802600, episode=134 reward=0.7584277 (498.04 it/sec) -training >> step=802700, episode=134 reward=0.7503045 (524.50 it/sec) -training >> step=802800, episode=134 reward=0.7594314 (495.82 it/sec) -training >> step=802900, episode=134 reward=0.7614744 (524.27 it/sec) -training >> step=803000, episode=134 reward=0.7620569 (539.47 it/sec) -training >> step=803100, episode=134 reward=0.7712854 (492.48 it/sec) -training >> step=803200, episode=134 reward=0.7507594 (478.97 it/sec) -training >> step=803300, episode=135 reward=0.7627888 (128.22 it/sec) -training >> step=803400, episode=135 reward=0.7516343 (548.68 it/sec) -training >> step=803500, episode=135 reward=0.7650714 (474.26 it/sec) -training >> step=803600, episode=135 reward=0.7670873 (472.17 it/sec) -training >> step=803700, episode=135 reward=0.7857864 (492.20 it/sec) -training >> step=803800, episode=135 reward=0.7698694 (500.99 it/sec) -training >> step=803900, episode=135 reward=0.7845989 (478.77 it/sec) -training >> step=804000, episode=135 reward=0.7245664 (531.13 it/sec) -training >> step=804100, episode=135 reward=0.7639881 (502.35 it/sec) -training >> step=804200, episode=135 reward=0.7514461 (508.67 it/sec) -training >> step=804300, episode=135 reward=0.7640001 (494.17 it/sec) -training >> step=804400, episode=135 reward=0.7595879 (474.62 it/sec) -training >> step=804500, episode=135 reward=0.7405717 (522.41 it/sec) -training >> step=804600, episode=135 reward=0.7679268 (503.91 it/sec) -training >> step=804700, episode=135 reward=0.778503 (516.95 it/sec) -training >> step=804800, episode=135 reward=0.7525882 (516.92 it/sec) -training >> step=804900, episode=135 reward=0.7746485 (473.70 it/sec) -training >> step=805000, episode=135 reward=0.7787701 (504.01 it/sec) -training >> step=805100, episode=135 reward=0.7675478 (509.13 it/sec) -training >> step=805200, episode=135 reward=0.7399229 (487.48 it/sec) -training >> step=805300, episode=135 reward=0.7521043 (520.52 it/sec) -training >> step=805400, episode=135 reward=0.7791747 (500.29 it/sec) -training >> step=805500, episode=135 reward=0.7572168 (510.55 it/sec) -training >> step=805600, episode=135 reward=0.7686065 (497.99 it/sec) -training >> step=805700, episode=135 reward=0.7560223 (530.29 it/sec) -training >> step=805800, episode=135 reward=0.7502793 (541.65 it/sec) -training >> step=805900, episode=135 reward=0.7544374 (513.04 it/sec) -training >> step=806000, episode=135 reward=0.7788694 (504.25 it/sec) -training >> step=806100, episode=135 reward=0.7452358 (523.91 it/sec) -training >> step=806200, episode=135 reward=0.7518578 (489.08 it/sec) -training >> step=806300, episode=135 reward=0.7589786 (502.95 it/sec) -training >> step=806400, episode=135 reward=0.7593338 (514.00 it/sec) -training >> step=806500, episode=135 reward=0.7825523 (538.28 it/sec) -training >> step=806600, episode=135 reward=0.7548836 (473.37 it/sec) -training >> step=806700, episode=135 reward=0.7700657 (470.91 it/sec) -training >> step=806800, episode=135 reward=0.7673144 (545.17 it/sec) -training >> step=806900, episode=135 reward=0.7874495 (500.41 it/sec) -training >> step=807000, episode=135 reward=0.7732453 (494.36 it/sec) -training >> step=807100, episode=135 reward=0.7683483 (504.80 it/sec) -training >> step=807200, episode=135 reward=0.7513118 (518.72 it/sec) -training >> step=807300, episode=135 reward=0.771253 (521.22 it/sec) -training >> step=807400, episode=135 reward=0.7724265 (460.03 it/sec) -training >> step=807500, episode=135 reward=0.7489111 (521.06 it/sec) -training >> step=807600, episode=135 reward=0.747591 (500.65 it/sec) -training >> step=807700, episode=135 reward=0.777486 (515.52 it/sec) -training >> step=807800, episode=135 reward=0.7571635 (514.95 it/sec) -training >> step=807900, episode=135 reward=0.7749216 (550.71 it/sec) -training >> step=808000, episode=135 reward=0.7295609 (490.48 it/sec) -training >> step=808100, episode=135 reward=0.7686216 (500.98 it/sec) -training >> step=808200, episode=135 reward=0.7525588 (497.54 it/sec) -training >> step=808300, episode=135 reward=0.7522008 (518.32 it/sec) -training >> step=808400, episode=135 reward=0.7349491 (520.27 it/sec) -training >> step=808500, episode=135 reward=0.7531248 (480.98 it/sec) -training >> step=808600, episode=135 reward=0.7432235 (478.44 it/sec) -training >> step=808700, episode=135 reward=0.7480453 (490.52 it/sec) -training >> step=808800, episode=135 reward=0.7436893 (501.55 it/sec) -training >> step=808900, episode=135 reward=0.7431854 (506.86 it/sec) -training >> step=809000, episode=135 reward=0.7698432 (530.20 it/sec) -training >> step=809100, episode=135 reward=0.7564911 (492.00 it/sec) -training >> step=809200, episode=135 reward=0.7562364 (520.94 it/sec) -training >> step=809300, episode=136 reward=0.7522075 (126.19 it/sec) -training >> step=809400, episode=136 reward=0.7735232 (499.02 it/sec) -training >> step=809500, episode=136 reward=0.7653627 (476.05 it/sec) -training >> step=809600, episode=136 reward=0.7584215 (483.04 it/sec) -training >> step=809700, episode=136 reward=0.7690171 (489.70 it/sec) -training >> step=809800, episode=136 reward=0.773407 (494.44 it/sec) -training >> step=809900, episode=136 reward=0.7326522 (474.62 it/sec) -training >> step=810000, episode=136 reward=0.7528244 (499.24 it/sec) -training >> step=810100, episode=136 reward=0.7801603 (525.09 it/sec) -training >> step=810200, episode=136 reward=0.7522262 (486.70 it/sec) -training >> step=810300, episode=136 reward=0.7652192 (505.81 it/sec) -training >> step=810400, episode=136 reward=0.7713361 (446.90 it/sec) -training >> step=810500, episode=136 reward=0.7458555 (531.43 it/sec) -training >> step=810600, episode=136 reward=0.7678365 (520.78 it/sec) -training >> step=810700, episode=136 reward=0.729611 (511.18 it/sec) -training >> step=810800, episode=136 reward=0.7413928 (499.94 it/sec) -training >> step=810900, episode=136 reward=0.7646083 (506.29 it/sec) -training >> step=811000, episode=136 reward=0.7473485 (511.59 it/sec) -training >> step=811100, episode=136 reward=0.7559201 (501.26 it/sec) -training >> step=811200, episode=136 reward=0.7635229 (514.33 it/sec) -training >> step=811300, episode=136 reward=0.7364495 (484.18 it/sec) -training >> step=811400, episode=136 reward=0.7882611 (494.60 it/sec) -training >> step=811500, episode=136 reward=0.7711428 (498.40 it/sec) -training >> step=811600, episode=136 reward=0.788959 (520.32 it/sec) -training >> step=811700, episode=136 reward=0.761075 (500.00 it/sec) -training >> step=811800, episode=136 reward=0.7612067 (479.93 it/sec) -training >> step=811900, episode=136 reward=0.7439611 (501.70 it/sec) -training >> step=812000, episode=136 reward=0.7794725 (543.10 it/sec) -training >> step=812100, episode=136 reward=0.7470461 (497.94 it/sec) -training >> step=812200, episode=136 reward=0.7639965 (531.38 it/sec) -training >> step=812300, episode=136 reward=0.7649185 (492.10 it/sec) -training >> step=812400, episode=136 reward=0.7648548 (503.22 it/sec) -training >> step=812500, episode=136 reward=0.7542725 (491.60 it/sec) -training >> step=812600, episode=136 reward=0.7444435 (503.67 it/sec) -training >> step=812700, episode=136 reward=0.765671 (547.93 it/sec) -training >> step=812800, episode=136 reward=0.7559664 (507.44 it/sec) -training >> step=812900, episode=136 reward=0.7408274 (485.25 it/sec) -training >> step=813000, episode=136 reward=0.7619994 (543.54 it/sec) -training >> step=813100, episode=136 reward=0.7864908 (502.13 it/sec) -training >> step=813200, episode=136 reward=0.7665113 (501.57 it/sec) -training >> step=813300, episode=136 reward=0.7486933 (463.93 it/sec) -training >> step=813400, episode=136 reward=0.7454743 (508.32 it/sec) -training >> step=813500, episode=136 reward=0.7672669 (503.13 it/sec) -training >> step=813600, episode=136 reward=0.7651188 (497.19 it/sec) -training >> step=813700, episode=136 reward=0.7850528 (514.47 it/sec) -training >> step=813800, episode=136 reward=0.7547311 (528.24 it/sec) -training >> step=813900, episode=136 reward=0.7269297 (482.48 it/sec) -training >> step=814000, episode=136 reward=0.7679609 (508.59 it/sec) -training >> step=814100, episode=136 reward=0.7790831 (514.46 it/sec) -training >> step=814200, episode=136 reward=0.7385351 (502.25 it/sec) -training >> step=814300, episode=136 reward=0.7468082 (512.34 it/sec) -training >> step=814400, episode=136 reward=0.7734656 (474.90 it/sec) -training >> step=814500, episode=136 reward=0.7563835 (495.28 it/sec) -training >> step=814600, episode=136 reward=0.761283 (519.05 it/sec) -training >> step=814700, episode=136 reward=0.7589462 (499.67 it/sec) -training >> step=814800, episode=136 reward=0.747725 (510.88 it/sec) -training >> step=814900, episode=136 reward=0.7599869 (495.31 it/sec) -training >> step=815000, episode=136 reward=0.7689211 (507.22 it/sec) -training >> step=815100, episode=136 reward=0.7505451 (513.34 it/sec) -training >> step=815200, episode=136 reward=0.7666072 (512.83 it/sec) -training >> step=815300, episode=137 reward=0.7574537 (130.58 it/sec) -training >> step=815400, episode=137 reward=0.7530148 (471.57 it/sec) -training >> step=815500, episode=137 reward=0.7601582 (491.16 it/sec) -training >> step=815600, episode=137 reward=0.7675412 (497.78 it/sec) -training >> step=815700, episode=137 reward=0.7763662 (525.17 it/sec) -training >> step=815800, episode=137 reward=0.7622588 (532.41 it/sec) -training >> step=815900, episode=137 reward=0.7315937 (495.82 it/sec) -training >> step=816000, episode=137 reward=0.7700112 (513.32 it/sec) -training >> step=816100, episode=137 reward=0.7629572 (479.08 it/sec) -training >> step=816200, episode=137 reward=0.7843231 (505.95 it/sec) -training >> step=816300, episode=137 reward=0.7542144 (533.95 it/sec) -training >> step=816400, episode=137 reward=0.7608783 (486.45 it/sec) -training >> step=816500, episode=137 reward=0.7553424 (505.21 it/sec) -training >> step=816600, episode=137 reward=0.7459344 (474.56 it/sec) -training >> step=816700, episode=137 reward=0.7425167 (497.96 it/sec) -training >> step=816800, episode=137 reward=0.7454715 (552.20 it/sec) -training >> step=816900, episode=137 reward=0.746243 (514.76 it/sec) -training >> step=817000, episode=137 reward=0.7645366 (485.49 it/sec) -training >> step=817100, episode=137 reward=0.7569404 (487.53 it/sec) -training >> step=817200, episode=137 reward=0.7514091 (500.87 it/sec) -training >> step=817300, episode=137 reward=0.774438 (447.17 it/sec) -training >> step=817400, episode=137 reward=0.7452549 (501.14 it/sec) -training >> step=817500, episode=137 reward=0.7620247 (506.96 it/sec) -training >> step=817600, episode=137 reward=0.7788965 (498.00 it/sec) -training >> step=817700, episode=137 reward=0.7598488 (481.71 it/sec) -training >> step=817800, episode=137 reward=0.7579259 (525.73 it/sec) -training >> step=817900, episode=137 reward=0.7645253 (523.54 it/sec) -training >> step=818000, episode=137 reward=0.755394 (486.79 it/sec) -training >> step=818100, episode=137 reward=0.7658855 (469.52 it/sec) -training >> step=818200, episode=137 reward=0.7669421 (513.38 it/sec) -training >> step=818300, episode=137 reward=0.7623917 (542.81 it/sec) -training >> step=818400, episode=137 reward=0.784349 (489.88 it/sec) -training >> step=818500, episode=137 reward=0.7665673 (469.71 it/sec) -training >> step=818600, episode=137 reward=0.7858881 (496.74 it/sec) -training >> step=818700, episode=137 reward=0.7520326 (502.09 it/sec) -training >> step=818800, episode=137 reward=0.7693696 (511.29 it/sec) -training >> step=818900, episode=137 reward=0.771309 (499.47 it/sec) -training >> step=819000, episode=137 reward=0.7558317 (442.58 it/sec) -training >> step=819100, episode=137 reward=0.7571782 (467.25 it/sec) -training >> step=819200, episode=137 reward=0.762255 (504.39 it/sec) -training >> step=819300, episode=137 reward=0.7663274 (518.51 it/sec) -training >> step=819400, episode=137 reward=0.7622858 (519.47 it/sec) -training >> step=819500, episode=137 reward=0.7500742 (523.39 it/sec) -training >> step=819600, episode=137 reward=0.7648842 (496.89 it/sec) -training >> step=819700, episode=137 reward=0.7460753 (524.48 it/sec) -training >> step=819800, episode=137 reward=0.7445976 (493.16 it/sec) -training >> step=819900, episode=137 reward=0.7642353 (513.76 it/sec) -training >> step=820000, episode=137 reward=0.7694623 (523.73 it/sec) -training >> step=820100, episode=137 reward=0.7433066 (491.50 it/sec) -training >> step=820200, episode=137 reward=0.7795852 (523.38 it/sec) -training >> step=820300, episode=137 reward=0.7515829 (495.09 it/sec) -training >> step=820400, episode=137 reward=0.7486801 (499.58 it/sec) -training >> step=820500, episode=137 reward=0.7607012 (536.68 it/sec) -training >> step=820600, episode=137 reward=0.7463909 (497.22 it/sec) -training >> step=820700, episode=137 reward=0.7551126 (496.43 it/sec) -training >> step=820800, episode=137 reward=0.7545221 (540.44 it/sec) -training >> step=820900, episode=137 reward=0.7514348 (509.26 it/sec) -training >> step=821000, episode=137 reward=0.7445496 (482.34 it/sec) -training >> step=821100, episode=137 reward=0.7562642 (506.13 it/sec) -training >> step=821200, episode=137 reward=0.7622167 (527.72 it/sec) -training >> step=821300, episode=138 reward=0.7721363 (123.25 it/sec) -training >> step=821400, episode=138 reward=0.7687052 (462.78 it/sec) -training >> step=821500, episode=138 reward=0.775969 (484.11 it/sec) -training >> step=821600, episode=138 reward=0.747531 (488.46 it/sec) -training >> step=821700, episode=138 reward=0.7478796 (479.25 it/sec) -training >> step=821800, episode=138 reward=0.7572103 (454.16 it/sec) -training >> step=821900, episode=138 reward=0.7730574 (444.12 it/sec) -training >> step=822000, episode=138 reward=0.745203 (493.31 it/sec) -training >> step=822100, episode=138 reward=0.765167 (527.12 it/sec) -training >> step=822200, episode=138 reward=0.7774711 (511.45 it/sec) -training >> step=822300, episode=138 reward=0.7654561 (449.78 it/sec) -training >> step=822400, episode=138 reward=0.7514835 (483.91 it/sec) -training >> step=822500, episode=138 reward=0.7753947 (526.94 it/sec) -training >> step=822600, episode=138 reward=0.7664841 (517.46 it/sec) -training >> step=822700, episode=138 reward=0.7785778 (534.27 it/sec) -training >> step=822800, episode=138 reward=0.7834085 (488.42 it/sec) -training >> step=822900, episode=138 reward=0.765322 (512.67 it/sec) -training >> step=823000, episode=138 reward=0.7520512 (504.71 it/sec) -training >> step=823100, episode=138 reward=0.7526018 (530.89 it/sec) -training >> step=823200, episode=138 reward=0.7476369 (538.71 it/sec) -training >> step=823300, episode=138 reward=0.7642846 (460.33 it/sec) -training >> step=823400, episode=138 reward=0.7757208 (447.75 it/sec) -training >> step=823500, episode=138 reward=0.7720749 (413.49 it/sec) -training >> step=823600, episode=138 reward=0.7672489 (480.89 it/sec) -training >> step=823700, episode=138 reward=0.7637208 (520.80 it/sec) -training >> step=823800, episode=138 reward=0.7399142 (477.18 it/sec) -training >> step=823900, episode=138 reward=0.7638136 (479.79 it/sec) -training >> step=824000, episode=138 reward=0.7632563 (484.55 it/sec) -training >> step=824100, episode=138 reward=0.7676057 (473.01 it/sec) -training >> step=824200, episode=138 reward=0.7555419 (467.21 it/sec) -training >> step=824300, episode=138 reward=0.7678574 (503.13 it/sec) -training >> step=824400, episode=138 reward=0.7688851 (493.58 it/sec) -training >> step=824500, episode=138 reward=0.7565086 (474.16 it/sec) -training >> step=824600, episode=138 reward=0.7805655 (503.21 it/sec) -training >> step=824700, episode=138 reward=0.752076 (487.83 it/sec) -training >> step=824800, episode=138 reward=0.7794282 (457.65 it/sec) -training >> step=824900, episode=138 reward=0.7843673 (496.70 it/sec) -training >> step=825000, episode=138 reward=0.7480894 (502.33 it/sec) -training >> step=825100, episode=138 reward=0.7700601 (488.01 it/sec) -training >> step=825200, episode=138 reward=0.753215 (481.25 it/sec) -training >> step=825300, episode=138 reward=0.7575039 (474.03 it/sec) -training >> step=825400, episode=138 reward=0.7685727 (479.13 it/sec) -training >> step=825500, episode=138 reward=0.7639797 (527.05 it/sec) -training >> step=825600, episode=138 reward=0.7529395 (501.44 it/sec) -training >> step=825700, episode=138 reward=0.7725095 (452.42 it/sec) -training >> step=825800, episode=138 reward=0.7526034 (438.23 it/sec) -training >> step=825900, episode=138 reward=0.7601919 (450.86 it/sec) -training >> step=826000, episode=138 reward=0.7520405 (475.79 it/sec) -training >> step=826100, episode=138 reward=0.7636224 (464.73 it/sec) -training >> step=826200, episode=138 reward=0.7607297 (430.23 it/sec) -training >> step=826300, episode=138 reward=0.7517321 (424.74 it/sec) -training >> step=826400, episode=138 reward=0.7597372 (462.79 it/sec) -training >> step=826500, episode=138 reward=0.7511416 (389.37 it/sec) -training >> step=826600, episode=138 reward=0.7545435 (405.60 it/sec) -training >> step=826700, episode=138 reward=0.7627708 (437.12 it/sec) -training >> step=826800, episode=138 reward=0.7447339 (498.33 it/sec) -training >> step=826900, episode=138 reward=0.7540407 (441.17 it/sec) -training >> step=827000, episode=138 reward=0.7643011 (471.79 it/sec) -training >> step=827100, episode=138 reward=0.7543533 (469.01 it/sec) -training >> step=827200, episode=138 reward=0.7626827 (446.88 it/sec) -training >> step=827300, episode=139 reward=0.7734594 (156.74 it/sec) -training >> step=827400, episode=139 reward=0.7620763 (343.70 it/sec) -training >> step=827500, episode=139 reward=0.7519785 (455.32 it/sec) -training >> step=827600, episode=139 reward=0.7564043 (489.50 it/sec) -training >> step=827700, episode=139 reward=0.7416206 (490.63 it/sec) -training >> step=827800, episode=139 reward=0.7592719 (494.53 it/sec) -training >> step=827900, episode=139 reward=0.7609267 (472.29 it/sec) -training >> step=828000, episode=139 reward=0.7545952 (561.01 it/sec) -training >> step=828100, episode=139 reward=0.7694009 (551.12 it/sec) -training >> step=828200, episode=139 reward=0.7464045 (551.92 it/sec) -training >> step=828300, episode=139 reward=0.75916 (531.12 it/sec) -training >> step=828400, episode=139 reward=0.7320931 (521.44 it/sec) -training >> step=828500, episode=139 reward=0.7593636 (490.00 it/sec) -training >> step=828600, episode=139 reward=0.7722704 (445.19 it/sec) -training >> step=828700, episode=139 reward=0.7713885 (521.06 it/sec) -training >> step=828800, episode=139 reward=0.7709752 (489.44 it/sec) -training >> step=828900, episode=139 reward=0.7660367 (487.79 it/sec) -training >> step=829000, episode=139 reward=0.7698622 (515.01 it/sec) -training >> step=829100, episode=139 reward=0.7510159 (558.49 it/sec) -training >> step=829200, episode=139 reward=0.7643963 (536.88 it/sec) -training >> step=829300, episode=139 reward=0.7467171 (535.21 it/sec) -training >> step=829400, episode=139 reward=0.7519286 (557.32 it/sec) -training >> step=829500, episode=139 reward=0.7605677 (497.63 it/sec) -training >> step=829600, episode=139 reward=0.7481677 (474.72 it/sec) -training >> step=829700, episode=139 reward=0.7583517 (491.62 it/sec) -training >> step=829800, episode=139 reward=0.7478314 (542.91 it/sec) -training >> step=829900, episode=139 reward=0.7748212 (495.52 it/sec) -training >> step=830000, episode=139 reward=0.7602285 (476.48 it/sec) -training >> step=830100, episode=139 reward=0.7879403 (501.28 it/sec) -training >> step=830200, episode=139 reward=0.767293 (436.93 it/sec) -training >> step=830300, episode=139 reward=0.7329235 (443.53 it/sec) -training >> step=830400, episode=139 reward=0.7422356 (459.60 it/sec) -training >> step=830500, episode=139 reward=0.7431289 (421.46 it/sec) -training >> step=830600, episode=139 reward=0.759622 (444.57 it/sec) -training >> step=830700, episode=139 reward=0.7748096 (364.83 it/sec) -training >> step=830800, episode=139 reward=0.7451262 (454.98 it/sec) -training >> step=830900, episode=139 reward=0.7649021 (408.87 it/sec) -training >> step=831000, episode=139 reward=0.7622995 (483.70 it/sec) -training >> step=831100, episode=139 reward=0.7781987 (500.40 it/sec) -training >> step=831200, episode=139 reward=0.7775277 (509.67 it/sec) -training >> step=831300, episode=139 reward=0.7446961 (466.46 it/sec) -training >> step=831400, episode=139 reward=0.7694927 (433.70 it/sec) -training >> step=831500, episode=139 reward=0.7627381 (448.88 it/sec) -training >> step=831600, episode=139 reward=0.7458718 (479.00 it/sec) -training >> step=831700, episode=139 reward=0.7715446 (412.15 it/sec) -training >> step=831800, episode=139 reward=0.7595739 (455.75 it/sec) -training >> step=831900, episode=139 reward=0.7732307 (397.45 it/sec) -training >> step=832000, episode=139 reward=0.7490638 (458.28 it/sec) -training >> step=832100, episode=139 reward=0.7524478 (481.41 it/sec) -training >> step=832200, episode=139 reward=0.7626754 (456.28 it/sec) -training >> step=832300, episode=139 reward=0.7547127 (472.27 it/sec) -training >> step=832400, episode=139 reward=0.7726272 (460.09 it/sec) -training >> step=832500, episode=139 reward=0.7548137 (492.96 it/sec) -training >> step=832600, episode=139 reward=0.7432421 (479.84 it/sec) -training >> step=832700, episode=139 reward=0.7705314 (480.67 it/sec) -training >> step=832800, episode=139 reward=0.75005 (453.80 it/sec) -training >> step=832900, episode=139 reward=0.7457708 (497.20 it/sec) -training >> step=833000, episode=139 reward=0.7449573 (465.38 it/sec) -training >> step=833100, episode=139 reward=0.7364558 (424.89 it/sec) -training >> step=833200, episode=139 reward=0.7353866 (440.42 it/sec) -training >> step=833300, episode=140 reward=0.7486008 (171.44 it/sec) -training >> step=833400, episode=140 reward=0.7576154 (454.31 it/sec) -training >> step=833500, episode=140 reward=0.7477762 (459.85 it/sec) -training >> step=833600, episode=140 reward=0.7744208 (362.66 it/sec) -training >> step=833700, episode=140 reward=0.7648271 (488.18 it/sec) -training >> step=833800, episode=140 reward=0.7449816 (424.63 it/sec) -training >> step=833900, episode=140 reward=0.7660936 (496.74 it/sec) -training >> step=834000, episode=140 reward=0.7577632 (542.01 it/sec) -training >> step=834100, episode=140 reward=0.7552698 (548.81 it/sec) -training >> step=834200, episode=140 reward=0.7568758 (506.75 it/sec) -training >> step=834300, episode=140 reward=0.7621444 (513.48 it/sec) -training >> step=834400, episode=140 reward=0.7868307 (512.46 it/sec) -training >> step=834500, episode=140 reward=0.7570765 (511.03 it/sec) -training >> step=834600, episode=140 reward=0.7521762 (514.47 it/sec) -training >> step=834700, episode=140 reward=0.7850278 (502.84 it/sec) -training >> step=834800, episode=140 reward=0.784391 (512.91 it/sec) -training >> step=834900, episode=140 reward=0.7677541 (504.91 it/sec) -training >> step=835000, episode=140 reward=0.7534891 (525.01 it/sec) -training >> step=835100, episode=140 reward=0.7730395 (486.55 it/sec) -training >> step=835200, episode=140 reward=0.7455102 (513.59 it/sec) -training >> step=835300, episode=140 reward=0.7930371 (524.92 it/sec) -training >> step=835400, episode=140 reward=0.7295427 (490.87 it/sec) -training >> step=835500, episode=140 reward=0.7661153 (480.42 it/sec) -training >> step=835600, episode=140 reward=0.7587838 (520.65 it/sec) -training >> step=835700, episode=140 reward=0.7492469 (478.22 it/sec) -training >> step=835800, episode=140 reward=0.7458958 (437.81 it/sec) -training >> step=835900, episode=140 reward=0.7679431 (479.83 it/sec) -training >> step=836000, episode=140 reward=0.7768422 (486.26 it/sec) -training >> step=836100, episode=140 reward=0.7603322 (423.51 it/sec) -training >> step=836200, episode=140 reward=0.7569516 (489.35 it/sec) -training >> step=836300, episode=140 reward=0.7595127 (431.58 it/sec) -training >> step=836400, episode=140 reward=0.7516642 (447.86 it/sec) -training >> step=836500, episode=140 reward=0.7825546 (468.69 it/sec) -training >> step=836600, episode=140 reward=0.7643991 (460.60 it/sec) -training >> step=836700, episode=140 reward=0.7488886 (454.84 it/sec) -training >> step=836800, episode=140 reward=0.7716759 (457.74 it/sec) -training >> step=836900, episode=140 reward=0.7578177 (475.60 it/sec) -training >> step=837000, episode=140 reward=0.7558089 (514.14 it/sec) -training >> step=837100, episode=140 reward=0.7677283 (443.66 it/sec) -training >> step=837200, episode=140 reward=0.7483045 (473.96 it/sec) -training >> step=837300, episode=140 reward=0.7693833 (493.07 it/sec) -training >> step=837400, episode=140 reward=0.74982 (464.12 it/sec) -training >> step=837500, episode=140 reward=0.7630065 (445.18 it/sec) -training >> step=837600, episode=140 reward=0.765137 (421.85 it/sec) -training >> step=837700, episode=140 reward=0.7686986 (434.91 it/sec) -training >> step=837800, episode=140 reward=0.745122 (505.86 it/sec) -training >> step=837900, episode=140 reward=0.743303 (457.15 it/sec) -training >> step=838000, episode=140 reward=0.76792 (459.98 it/sec) -training >> step=838100, episode=140 reward=0.7684011 (388.50 it/sec) -training >> step=838200, episode=140 reward=0.7494804 (414.38 it/sec) -training >> step=838300, episode=140 reward=0.7553349 (421.24 it/sec) -training >> step=838400, episode=140 reward=0.7469352 (438.76 it/sec) -training >> step=838500, episode=140 reward=0.7454221 (392.79 it/sec) -training >> step=838600, episode=140 reward=0.7531 (392.45 it/sec) -training >> step=838700, episode=140 reward=0.7588269 (412.92 it/sec) -training >> step=838800, episode=140 reward=0.7702147 (459.41 it/sec) -training >> step=838900, episode=140 reward=0.7642437 (430.58 it/sec) -training >> step=839000, episode=140 reward=0.7781284 (406.93 it/sec) -training >> step=839100, episode=140 reward=0.7611941 (402.43 it/sec) -training >> step=839200, episode=140 reward=0.7339981 (409.43 it/sec) -training >> step=839300, episode=141 reward=0.7716917 (153.71 it/sec) -training >> step=839400, episode=141 reward=0.7596443 (422.47 it/sec) -training >> step=839500, episode=141 reward=0.7645738 (401.09 it/sec) -training >> step=839600, episode=141 reward=0.7618937 (417.28 it/sec) -training >> step=839700, episode=141 reward=0.7559708 (443.84 it/sec) -training >> step=839800, episode=141 reward=0.7501391 (277.06 it/sec) -training >> step=839900, episode=141 reward=0.7553269 (392.05 it/sec) -training >> step=840000, episode=141 reward=0.7719172 (411.77 it/sec) -training >> step=840100, episode=141 reward=0.7694786 (465.99 it/sec) -training >> step=840200, episode=141 reward=0.7718118 (387.36 it/sec) -training >> step=840300, episode=141 reward=0.7344587 (360.03 it/sec) -training >> step=840400, episode=141 reward=0.7493158 (389.87 it/sec) -training >> step=840500, episode=141 reward=0.7520986 (422.49 it/sec) -training >> step=840600, episode=141 reward=0.7624313 (428.07 it/sec) -training >> step=840700, episode=141 reward=0.7746162 (434.61 it/sec) -training >> step=840800, episode=141 reward=0.7644618 (361.55 it/sec) -training >> step=840900, episode=141 reward=0.7602812 (403.87 it/sec) -training >> step=841000, episode=141 reward=0.7617958 (374.79 it/sec) -training >> step=841100, episode=141 reward=0.7568594 (431.97 it/sec) -training >> step=841200, episode=141 reward=0.7665599 (397.28 it/sec) -training >> step=841300, episode=141 reward=0.7801403 (443.46 it/sec) -training >> step=841400, episode=141 reward=0.7529675 (407.45 it/sec) -training >> step=841500, episode=141 reward=0.7574952 (400.79 it/sec) -training >> step=841600, episode=141 reward=0.781404 (386.60 it/sec) -training >> step=841700, episode=141 reward=0.7544505 (396.85 it/sec) -training >> step=841800, episode=141 reward=0.770882 (401.41 it/sec) -training >> step=841900, episode=141 reward=0.75979 (394.76 it/sec) -training >> step=842000, episode=141 reward=0.7634944 (415.63 it/sec) -training >> step=842100, episode=141 reward=0.7574024 (465.33 it/sec) -training >> step=842200, episode=141 reward=0.7674867 (415.01 it/sec) -training >> step=842300, episode=141 reward=0.771029 (387.67 it/sec) -training >> step=842400, episode=141 reward=0.7565225 (422.70 it/sec) -training >> step=842500, episode=141 reward=0.7752964 (432.92 it/sec) -training >> step=842600, episode=141 reward=0.7572956 (410.47 it/sec) -training >> step=842700, episode=141 reward=0.778776 (402.64 it/sec) -training >> step=842800, episode=141 reward=0.7702162 (456.63 it/sec) -training >> step=842900, episode=141 reward=0.771438 (399.15 it/sec) -training >> step=843000, episode=141 reward=0.7665424 (447.96 it/sec) -training >> step=843100, episode=141 reward=0.7612025 (477.05 it/sec) -training >> step=843200, episode=141 reward=0.7732973 (460.64 it/sec) -training >> step=843300, episode=141 reward=0.7864971 (468.02 it/sec) -training >> step=843400, episode=141 reward=0.7569006 (437.56 it/sec) -training >> step=843500, episode=141 reward=0.7621729 (428.68 it/sec) -training >> step=843600, episode=141 reward=0.7524963 (497.81 it/sec) -training >> step=843700, episode=141 reward=0.7574503 (528.33 it/sec) -training >> step=843800, episode=141 reward=0.7586589 (524.02 it/sec) -training >> step=843900, episode=141 reward=0.7691166 (486.78 it/sec) -training >> step=844000, episode=141 reward=0.7581643 (505.90 it/sec) -training >> step=844100, episode=141 reward=0.7611929 (502.79 it/sec) -training >> step=844200, episode=141 reward=0.7852495 (528.91 it/sec) -training >> step=844300, episode=141 reward=0.7522669 (515.77 it/sec) -training >> step=844400, episode=141 reward=0.7713563 (533.81 it/sec) -training >> step=844500, episode=141 reward=0.7555634 (513.58 it/sec) -training >> step=844600, episode=141 reward=0.7631571 (426.80 it/sec) -training >> step=844700, episode=141 reward=0.7710871 (532.74 it/sec) -training >> step=844800, episode=141 reward=0.7687587 (502.00 it/sec) -training >> step=844900, episode=141 reward=0.7426401 (431.44 it/sec) -training >> step=845000, episode=141 reward=0.7592882 (467.00 it/sec) -training >> step=845100, episode=141 reward=0.7483734 (532.88 it/sec) -training >> step=845200, episode=141 reward=0.761583 (460.36 it/sec) -training >> step=845300, episode=142 reward=0.7474769 (138.40 it/sec) -training >> step=845400, episode=142 reward=0.7472689 (507.54 it/sec) -training >> step=845500, episode=142 reward=0.7483836 (492.14 it/sec) -training >> step=845600, episode=142 reward=0.7852827 (525.47 it/sec) -training >> step=845700, episode=142 reward=0.7524629 (483.17 it/sec) -training >> step=845800, episode=142 reward=0.7927475 (501.19 it/sec) -training >> step=845900, episode=142 reward=0.7762492 (490.11 it/sec) -training >> step=846000, episode=142 reward=0.7677294 (413.45 it/sec) -training >> step=846100, episode=142 reward=0.7470143 (490.90 it/sec) -training >> step=846200, episode=142 reward=0.7440868 (486.94 it/sec) -training >> step=846300, episode=142 reward=0.7315678 (505.83 it/sec) -training >> step=846400, episode=142 reward=0.7426951 (480.79 it/sec) -training >> step=846500, episode=142 reward=0.7451432 (507.02 it/sec) -training >> step=846600, episode=142 reward=0.779041 (491.04 it/sec) -training >> step=846700, episode=142 reward=0.7481028 (513.93 it/sec) -training >> step=846800, episode=142 reward=0.7658672 (474.11 it/sec) -training >> step=846900, episode=142 reward=0.7487785 (447.65 it/sec) -training >> step=847000, episode=142 reward=0.7737672 (461.83 it/sec) -training >> step=847100, episode=142 reward=0.7670888 (471.00 it/sec) -training >> step=847200, episode=142 reward=0.7676997 (498.05 it/sec) -training >> step=847300, episode=142 reward=0.7631838 (483.85 it/sec) -training >> step=847400, episode=142 reward=0.7794251 (466.43 it/sec) -training >> step=847500, episode=142 reward=0.7518873 (479.98 it/sec) -training >> step=847600, episode=142 reward=0.7669288 (489.31 it/sec) -training >> step=847700, episode=142 reward=0.7498173 (417.83 it/sec) -training >> step=847800, episode=142 reward=0.7705674 (464.08 it/sec) -training >> step=847900, episode=142 reward=0.7513466 (420.77 it/sec) -training >> step=848000, episode=142 reward=0.7636114 (428.89 it/sec) -training >> step=848100, episode=142 reward=0.7482355 (412.17 it/sec) -training >> step=848200, episode=142 reward=0.7529424 (380.58 it/sec) -training >> step=848300, episode=142 reward=0.7558182 (467.41 it/sec) -training >> step=848400, episode=142 reward=0.7672555 (458.08 it/sec) -training >> step=848500, episode=142 reward=0.7633028 (395.48 it/sec) -training >> step=848600, episode=142 reward=0.7781088 (384.12 it/sec) -training >> step=848700, episode=142 reward=0.7702552 (427.75 it/sec) -training >> step=848800, episode=142 reward=0.762913 (431.97 it/sec) -training >> step=848900, episode=142 reward=0.7517633 (393.77 it/sec) -training >> step=849000, episode=142 reward=0.7635172 (427.99 it/sec) -training >> step=849100, episode=142 reward=0.7633408 (406.70 it/sec) -training >> step=849200, episode=142 reward=0.7751184 (417.44 it/sec) -training >> step=849300, episode=142 reward=0.760163 (410.55 it/sec) -training >> step=849400, episode=142 reward=0.7739625 (428.91 it/sec) -training >> step=849500, episode=142 reward=0.7763935 (422.55 it/sec) -training >> step=849600, episode=142 reward=0.7529703 (392.23 it/sec) -training >> step=849700, episode=142 reward=0.7400166 (392.35 it/sec) -training >> step=849800, episode=142 reward=0.7850659 (464.83 it/sec) -training >> step=849900, episode=142 reward=0.7525344 (422.10 it/sec) -training >> step=850000, episode=142 reward=0.7618785 (410.70 it/sec) -training >> step=850100, episode=142 reward=0.7702355 (446.10 it/sec) -training >> step=850200, episode=142 reward=0.7652416 (402.28 it/sec) -training >> step=850300, episode=142 reward=0.7657863 (435.47 it/sec) -training >> step=850400, episode=142 reward=0.76062 (427.68 it/sec) -training >> step=850500, episode=142 reward=0.7576879 (353.69 it/sec) -training >> step=850600, episode=142 reward=0.7717312 (405.24 it/sec) -training >> step=850700, episode=142 reward=0.7672129 (400.78 it/sec) -training >> step=850800, episode=142 reward=0.7637861 (458.50 it/sec) -training >> step=850900, episode=142 reward=0.7823407 (401.64 it/sec) -training >> step=851000, episode=142 reward=0.7631223 (420.04 it/sec) -training >> step=851100, episode=142 reward=0.7756392 (428.76 it/sec) -training >> step=851200, episode=142 reward=0.7678016 (397.99 it/sec) -training >> step=851300, episode=143 reward=0.7780389 (66.47 it/sec) -training >> step=851400, episode=143 reward=0.7322516 (497.81 it/sec) -training >> step=851500, episode=143 reward=0.7694167 (528.63 it/sec) -training >> step=851600, episode=143 reward=0.7692657 (532.99 it/sec) -training >> step=851700, episode=143 reward=0.7437638 (542.21 it/sec) -training >> step=851800, episode=143 reward=0.7479412 (511.96 it/sec) -training >> step=851900, episode=143 reward=0.7362766 (539.74 it/sec) -training >> step=852000, episode=143 reward=0.7536889 (549.98 it/sec) -training >> step=852100, episode=143 reward=0.7443041 (569.00 it/sec) -training >> step=852200, episode=143 reward=0.7683919 (526.75 it/sec) -training >> step=852300, episode=143 reward=0.7545606 (377.36 it/sec) -training >> step=852400, episode=143 reward=0.7354987 (493.24 it/sec) -training >> step=852500, episode=143 reward=0.7649457 (525.14 it/sec) -training >> step=852600, episode=143 reward=0.7587382 (550.66 it/sec) -training >> step=852700, episode=143 reward=0.7506203 (540.05 it/sec) -training >> step=852800, episode=143 reward=0.7552636 (571.42 it/sec) -training >> step=852900, episode=143 reward=0.7636126 (537.91 it/sec) -training >> step=853000, episode=143 reward=0.7414908 (465.58 it/sec) -training >> step=853100, episode=143 reward=0.769879 (541.45 it/sec) -training >> step=853200, episode=143 reward=0.7789055 (569.10 it/sec) -training >> step=853300, episode=143 reward=0.745633 (540.72 it/sec) -training >> step=853400, episode=143 reward=0.7723457 (535.29 it/sec) -training >> step=853500, episode=143 reward=0.7588149 (497.49 it/sec) -training >> step=853600, episode=143 reward=0.7807987 (537.07 it/sec) -training >> step=853700, episode=143 reward=0.7633051 (515.52 it/sec) -training >> step=853800, episode=143 reward=0.7500899 (533.02 it/sec) -training >> step=853900, episode=143 reward=0.7769873 (544.93 it/sec) -training >> step=854000, episode=143 reward=0.7386063 (499.51 it/sec) -training >> step=854100, episode=143 reward=0.7827724 (513.29 it/sec) -training >> step=854200, episode=143 reward=0.7615053 (536.76 it/sec) -training >> step=854300, episode=143 reward=0.7504902 (500.92 it/sec) -training >> step=854400, episode=143 reward=0.7468266 (480.31 it/sec) -training >> step=854500, episode=143 reward=0.7432183 (534.39 it/sec) -training >> step=854600, episode=143 reward=0.7628728 (531.39 it/sec) -training >> step=854700, episode=143 reward=0.7872081 (531.45 it/sec) -training >> step=854800, episode=143 reward=0.7790455 (483.19 it/sec) -training >> step=854900, episode=143 reward=0.758368 (513.32 it/sec) -training >> step=855000, episode=143 reward=0.7528635 (540.33 it/sec) -training >> step=855100, episode=143 reward=0.7695103 (490.73 it/sec) -training >> step=855200, episode=143 reward=0.7375782 (550.24 it/sec) -training >> step=855300, episode=143 reward=0.7671474 (524.85 it/sec) -training >> step=855400, episode=143 reward=0.7665968 (542.15 it/sec) -training >> step=855500, episode=143 reward=0.755079 (468.70 it/sec) -training >> step=855600, episode=143 reward=0.7696295 (528.64 it/sec) -training >> step=855700, episode=143 reward=0.7265524 (542.11 it/sec) -training >> step=855800, episode=143 reward=0.7616271 (547.80 it/sec) -training >> step=855900, episode=143 reward=0.7581258 (465.73 it/sec) -training >> step=856000, episode=143 reward=0.741923 (527.79 it/sec) -training >> step=856100, episode=143 reward=0.7548681 (499.71 it/sec) -training >> step=856200, episode=143 reward=0.7638817 (444.50 it/sec) -training >> step=856300, episode=143 reward=0.7714032 (513.22 it/sec) -training >> step=856400, episode=143 reward=0.7564881 (534.72 it/sec) -training >> step=856500, episode=143 reward=0.7529515 (494.99 it/sec) -training >> step=856600, episode=143 reward=0.7431199 (473.94 it/sec) -training >> step=856700, episode=143 reward=0.7485443 (474.04 it/sec) -training >> step=856800, episode=143 reward=0.7521997 (513.10 it/sec) -training >> step=856900, episode=143 reward=0.7318997 (461.44 it/sec) -training >> step=857000, episode=143 reward=0.7587295 (459.45 it/sec) -training >> step=857100, episode=143 reward=0.7343726 (524.98 it/sec) -training >> step=857200, episode=143 reward=0.7763305 (459.12 it/sec) -training >> step=857300, episode=144 reward=0.7692484 (192.16 it/sec) -training >> step=857400, episode=144 reward=0.7726231 (502.79 it/sec) -training >> step=857500, episode=144 reward=0.7684842 (537.13 it/sec) -training >> step=857600, episode=144 reward=0.7696685 (439.44 it/sec) -training >> step=857700, episode=144 reward=0.7425392 (458.11 it/sec) -training >> step=857800, episode=144 reward=0.7788316 (462.09 it/sec) -training >> step=857900, episode=144 reward=0.7651188 (503.74 it/sec) -training >> step=858000, episode=144 reward=0.7542866 (495.72 it/sec) -training >> step=858100, episode=144 reward=0.7740787 (497.93 it/sec) -training >> step=858200, episode=144 reward=0.718371 (513.35 it/sec) -training >> step=858300, episode=144 reward=0.7620092 (487.69 it/sec) -training >> step=858400, episode=144 reward=0.7650291 (473.47 it/sec) -training >> step=858500, episode=144 reward=0.7618362 (406.11 it/sec) -training >> step=858600, episode=144 reward=0.7647586 (339.38 it/sec) -training >> step=858700, episode=144 reward=0.7751614 (469.58 it/sec) -training >> step=858800, episode=144 reward=0.7775465 (446.73 it/sec) -training >> step=858900, episode=144 reward=0.7607287 (429.50 it/sec) -training >> step=859000, episode=144 reward=0.7583586 (465.95 it/sec) -training >> step=859100, episode=144 reward=0.7399867 (452.43 it/sec) -training >> step=859200, episode=144 reward=0.7491553 (469.42 it/sec) -training >> step=859300, episode=144 reward=0.7400652 (446.29 it/sec) -training >> step=859400, episode=144 reward=0.7630762 (457.85 it/sec) -training >> step=859500, episode=144 reward=0.7682948 (489.51 it/sec) -training >> step=859600, episode=144 reward=0.7823752 (533.37 it/sec) -training >> step=859700, episode=144 reward=0.7684666 (461.29 it/sec) -training >> step=859800, episode=144 reward=0.7681968 (507.55 it/sec) -training >> step=859900, episode=144 reward=0.7767648 (502.52 it/sec) -training >> step=860000, episode=144 reward=0.7483885 (508.13 it/sec) -training >> step=860100, episode=144 reward=0.7730431 (542.92 it/sec) -training >> step=860200, episode=144 reward=0.742752 (530.87 it/sec) -training >> step=860300, episode=144 reward=0.7632786 (546.48 it/sec) -training >> step=860400, episode=144 reward=0.7901143 (521.20 it/sec) -training >> step=860500, episode=144 reward=0.761238 (544.02 it/sec) -training >> step=860600, episode=144 reward=0.765424 (551.38 it/sec) -training >> step=860700, episode=144 reward=0.7701165 (493.48 it/sec) -training >> step=860800, episode=144 reward=0.7614364 (486.13 it/sec) -training >> step=860900, episode=144 reward=0.746651 (494.84 it/sec) -training >> step=861000, episode=144 reward=0.7883704 (428.39 it/sec) -training >> step=861100, episode=144 reward=0.7404224 (470.04 it/sec) -training >> step=861200, episode=144 reward=0.7548927 (492.70 it/sec) -training >> step=861300, episode=144 reward=0.7628372 (407.63 it/sec) -training >> step=861400, episode=144 reward=0.7748585 (457.93 it/sec) -training >> step=861500, episode=144 reward=0.7537777 (456.68 it/sec) -training >> step=861600, episode=144 reward=0.7632654 (454.47 it/sec) -training >> step=861700, episode=144 reward=0.745567 (491.04 it/sec) -training >> step=861800, episode=144 reward=0.7669776 (436.62 it/sec) -training >> step=861900, episode=144 reward=0.7872083 (445.13 it/sec) -training >> step=862000, episode=144 reward=0.7653106 (435.86 it/sec) -training >> step=862100, episode=144 reward=0.7390724 (446.21 it/sec) -training >> step=862200, episode=144 reward=0.7302303 (478.20 it/sec) -training >> step=862300, episode=144 reward=0.750874 (473.77 it/sec) -training >> step=862400, episode=144 reward=0.744202 (420.18 it/sec) -training >> step=862500, episode=144 reward=0.7616489 (426.65 it/sec) -training >> step=862600, episode=144 reward=0.7808679 (451.57 it/sec) -training >> step=862700, episode=144 reward=0.7496168 (452.31 it/sec) -training >> step=862800, episode=144 reward=0.7680147 (488.52 it/sec) -training >> step=862900, episode=144 reward=0.7484736 (454.25 it/sec) -training >> step=863000, episode=144 reward=0.7715254 (468.86 it/sec) -training >> step=863100, episode=144 reward=0.7676426 (420.96 it/sec) -training >> step=863200, episode=144 reward=0.7516732 (417.16 it/sec) -training >> step=863300, episode=145 reward=0.7541159 (98.11 it/sec) -training >> step=863400, episode=145 reward=0.7712063 (427.78 it/sec) -training >> step=863500, episode=145 reward=0.7805934 (501.18 it/sec) -training >> step=863600, episode=145 reward=0.7804227 (441.52 it/sec) -training >> step=863700, episode=145 reward=0.7560858 (448.16 it/sec) -training >> step=863800, episode=145 reward=0.7517528 (392.51 it/sec) -training >> step=863900, episode=145 reward=0.761359 (413.99 it/sec) -training >> step=864000, episode=145 reward=0.7583836 (442.48 it/sec) -training >> step=864100, episode=145 reward=0.7683855 (434.68 it/sec) -training >> step=864200, episode=145 reward=0.7752324 (439.74 it/sec) -training >> step=864300, episode=145 reward=0.753675 (410.66 it/sec) -training >> step=864400, episode=145 reward=0.7462468 (468.70 it/sec) -training >> step=864500, episode=145 reward=0.7664199 (466.77 it/sec) -training >> step=864600, episode=145 reward=0.7492864 (481.50 it/sec) -training >> step=864700, episode=145 reward=0.7453354 (443.79 it/sec) -training >> step=864800, episode=145 reward=0.75681 (297.15 it/sec) -training >> step=864900, episode=145 reward=0.7568927 (485.60 it/sec) -training >> step=865000, episode=145 reward=0.7768998 (531.81 it/sec) -training >> step=865100, episode=145 reward=0.7674937 (467.86 it/sec) -training >> step=865200, episode=145 reward=0.7651185 (499.79 it/sec) -training >> step=865300, episode=145 reward=0.7615071 (537.77 it/sec) -training >> step=865400, episode=145 reward=0.7587468 (573.14 it/sec) -training >> step=865500, episode=145 reward=0.7708147 (547.68 it/sec) -training >> step=865600, episode=145 reward=0.7373189 (532.89 it/sec) -training >> step=865700, episode=145 reward=0.7588847 (513.63 it/sec) -training >> step=865800, episode=145 reward=0.7557429 (543.69 it/sec) -training >> step=865900, episode=145 reward=0.7605959 (503.88 it/sec) -training >> step=866000, episode=145 reward=0.7810813 (510.59 it/sec) -training >> step=866100, episode=145 reward=0.7754591 (524.28 it/sec) -training >> step=866200, episode=145 reward=0.7571093 (567.86 it/sec) -training >> step=866300, episode=145 reward=0.7444584 (502.27 it/sec) -training >> step=866400, episode=145 reward=0.7550733 (535.18 it/sec) -training >> step=866500, episode=145 reward=0.7555547 (538.45 it/sec) -training >> step=866600, episode=145 reward=0.7672793 (545.81 it/sec) -training >> step=866700, episode=145 reward=0.7519313 (492.20 it/sec) -training >> step=866800, episode=145 reward=0.7477102 (549.30 it/sec) -training >> step=866900, episode=145 reward=0.764371 (563.86 it/sec) -training >> step=867000, episode=145 reward=0.7450916 (526.56 it/sec) -training >> step=867100, episode=145 reward=0.7410347 (519.28 it/sec) -training >> step=867200, episode=145 reward=0.7609434 (481.26 it/sec) -training >> step=867300, episode=145 reward=0.7532461 (572.91 it/sec) -training >> step=867400, episode=145 reward=0.7631328 (549.83 it/sec) -training >> step=867500, episode=145 reward=0.7443665 (511.26 it/sec) -training >> step=867600, episode=145 reward=0.7651393 (522.35 it/sec) -training >> step=867700, episode=145 reward=0.7666014 (482.05 it/sec) -training >> step=867800, episode=145 reward=0.7629829 (515.40 it/sec) -training >> step=867900, episode=145 reward=0.7680781 (539.82 it/sec) -training >> step=868000, episode=145 reward=0.7649978 (513.79 it/sec) -training >> step=868100, episode=145 reward=0.7475936 (556.80 it/sec) -training >> step=868200, episode=145 reward=0.7538058 (530.92 it/sec) -training >> step=868300, episode=145 reward=0.7730138 (489.42 it/sec) -training >> step=868400, episode=145 reward=0.7481833 (558.21 it/sec) -training >> step=868500, episode=145 reward=0.7583181 (497.54 it/sec) -training >> step=868600, episode=145 reward=0.7505775 (505.86 it/sec) -training >> step=868700, episode=145 reward=0.7549533 (530.04 it/sec) -training >> step=868800, episode=145 reward=0.7774383 (540.83 it/sec) -training >> step=868900, episode=145 reward=0.7483253 (517.14 it/sec) -training >> step=869000, episode=145 reward=0.7601579 (572.48 it/sec) -training >> step=869100, episode=145 reward=0.7593945 (491.24 it/sec) -training >> step=869200, episode=145 reward=0.7452921 (518.63 it/sec) -training >> step=869300, episode=146 reward=0.7576071 (187.09 it/sec) -training >> step=869400, episode=146 reward=0.7665433 (514.63 it/sec) -training >> step=869500, episode=146 reward=0.7482443 (507.24 it/sec) -training >> step=869600, episode=146 reward=0.7535033 (531.08 it/sec) -training >> step=869700, episode=146 reward=0.7729911 (499.89 it/sec) -training >> step=869800, episode=146 reward=0.7719896 (523.08 it/sec) -training >> step=869900, episode=146 reward=0.7648553 (512.62 it/sec) -training >> step=870000, episode=146 reward=0.7547551 (524.94 it/sec) -training >> step=870100, episode=146 reward=0.7594346 (497.78 it/sec) -training >> step=870200, episode=146 reward=0.7812454 (511.92 it/sec) -training >> step=870300, episode=146 reward=0.7616918 (533.24 it/sec) -training >> step=870400, episode=146 reward=0.7640546 (502.53 it/sec) -training >> step=870500, episode=146 reward=0.7811489 (522.08 it/sec) -training >> step=870600, episode=146 reward=0.7597802 (549.83 it/sec) -training >> step=870700, episode=146 reward=0.7629633 (499.52 it/sec) -training >> step=870800, episode=146 reward=0.7447696 (388.39 it/sec) -training >> step=870900, episode=146 reward=0.7681237 (529.86 it/sec) -training >> step=871000, episode=146 reward=0.767364 (531.58 it/sec) -training >> step=871100, episode=146 reward=0.7572283 (525.71 it/sec) -training >> step=871200, episode=146 reward=0.7676713 (548.32 it/sec) -training >> step=871300, episode=146 reward=0.7552362 (525.84 it/sec) -training >> step=871400, episode=146 reward=0.7686513 (544.89 it/sec) -training >> step=871500, episode=146 reward=0.7601129 (552.27 it/sec) -training >> step=871600, episode=146 reward=0.7699757 (514.81 it/sec) -training >> step=871700, episode=146 reward=0.7716736 (474.81 it/sec) -training >> step=871800, episode=146 reward=0.7753773 (501.97 it/sec) -training >> step=871900, episode=146 reward=0.7767222 (555.85 it/sec) -training >> step=872000, episode=146 reward=0.772145 (513.90 it/sec) -training >> step=872100, episode=146 reward=0.7825607 (494.79 it/sec) -training >> step=872200, episode=146 reward=0.7697949 (507.01 it/sec) -training >> step=872300, episode=146 reward=0.7671784 (575.73 it/sec) -training >> step=872400, episode=146 reward=0.7920768 (528.86 it/sec) -training >> step=872500, episode=146 reward=0.7673829 (497.45 it/sec) -training >> step=872600, episode=146 reward=0.7299452 (549.41 it/sec) -training >> step=872700, episode=146 reward=0.7709225 (551.74 it/sec) -training >> step=872800, episode=146 reward=0.7663038 (520.14 it/sec) -training >> step=872900, episode=146 reward=0.7859791 (508.97 it/sec) -training >> step=873000, episode=146 reward=0.7621548 (556.12 it/sec) -training >> step=873100, episode=146 reward=0.7622674 (510.42 it/sec) -training >> step=873200, episode=146 reward=0.7635932 (519.88 it/sec) -training >> step=873300, episode=146 reward=0.7754477 (516.24 it/sec) -training >> step=873400, episode=146 reward=0.7837363 (590.86 it/sec) -training >> step=873500, episode=146 reward=0.7598248 (556.02 it/sec) -training >> step=873600, episode=146 reward=0.7661328 (500.00 it/sec) -training >> step=873700, episode=146 reward=0.7651332 (503.99 it/sec) -training >> step=873800, episode=146 reward=0.7635393 (536.89 it/sec) -training >> step=873900, episode=146 reward=0.7679553 (575.04 it/sec) -training >> step=874000, episode=146 reward=0.7785978 (496.80 it/sec) -training >> step=874100, episode=146 reward=0.7644925 (577.12 it/sec) -training >> step=874200, episode=146 reward=0.7480836 (536.43 it/sec) -training >> step=874300, episode=146 reward=0.7656961 (532.83 it/sec) -training >> step=874400, episode=146 reward=0.7426891 (539.30 it/sec) -training >> step=874500, episode=146 reward=0.7482859 (592.23 it/sec) -training >> step=874600, episode=146 reward=0.7634598 (556.65 it/sec) -training >> step=874700, episode=146 reward=0.7711456 (541.85 it/sec) -training >> step=874800, episode=146 reward=0.7648375 (555.91 it/sec) -training >> step=874900, episode=146 reward=0.7542504 (480.55 it/sec) -training >> step=875000, episode=146 reward=0.7639817 (536.35 it/sec) -training >> step=875100, episode=146 reward=0.7389377 (534.24 it/sec) -training >> step=875200, episode=146 reward=0.7600747 (526.98 it/sec) -training >> step=875300, episode=147 reward=0.7681074 (192.28 it/sec) -training >> step=875400, episode=147 reward=0.7654757 (519.93 it/sec) -training >> step=875500, episode=147 reward=0.7360605 (518.67 it/sec) -training >> step=875600, episode=147 reward=0.7916434 (550.19 it/sec) -training >> step=875700, episode=147 reward=0.7404842 (535.66 it/sec) -training >> step=875800, episode=147 reward=0.7650182 (527.92 it/sec) -training >> step=875900, episode=147 reward=0.7725935 (530.48 it/sec) -training >> step=876000, episode=147 reward=0.7471367 (580.21 it/sec) -training >> step=876100, episode=147 reward=0.7618844 (568.17 it/sec) -training >> step=876200, episode=147 reward=0.7609465 (526.13 it/sec) -training >> step=876300, episode=147 reward=0.7688102 (544.71 it/sec) -training >> step=876400, episode=147 reward=0.7730695 (570.67 it/sec) -training >> step=876500, episode=147 reward=0.7566518 (552.31 it/sec) -training >> step=876600, episode=147 reward=0.7545822 (529.38 it/sec) -training >> step=876700, episode=147 reward=0.7694787 (554.58 it/sec) -training >> step=876800, episode=147 reward=0.7510326 (544.47 it/sec) -training >> step=876900, episode=147 reward=0.7473243 (526.34 it/sec) -training >> step=877000, episode=147 reward=0.7892484 (388.98 it/sec) -training >> step=877100, episode=147 reward=0.7731244 (563.39 it/sec) -training >> step=877200, episode=147 reward=0.7451571 (564.41 it/sec) -training >> step=877300, episode=147 reward=0.7840825 (547.42 it/sec) -training >> step=877400, episode=147 reward=0.7918237 (544.92 it/sec) -training >> step=877500, episode=147 reward=0.7460328 (564.18 it/sec) -training >> step=877600, episode=147 reward=0.7827068 (537.18 it/sec) -training >> step=877700, episode=147 reward=0.7648547 (490.93 it/sec) -training >> step=877800, episode=147 reward=0.7587194 (573.34 it/sec) -training >> step=877900, episode=147 reward=0.787089 (523.14 it/sec) -training >> step=878000, episode=147 reward=0.7581861 (521.34 it/sec) -training >> step=878100, episode=147 reward=0.776903 (528.00 it/sec) -training >> step=878200, episode=147 reward=0.7496038 (544.66 it/sec) -training >> step=878300, episode=147 reward=0.7683296 (531.17 it/sec) -training >> step=878400, episode=147 reward=0.7424358 (509.35 it/sec) -training >> step=878500, episode=147 reward=0.7447685 (526.59 it/sec) -training >> step=878600, episode=147 reward=0.7697638 (556.54 it/sec) -training >> step=878700, episode=147 reward=0.7859631 (532.59 it/sec) -training >> step=878800, episode=147 reward=0.7711532 (522.24 it/sec) -training >> step=878900, episode=147 reward=0.7690598 (533.05 it/sec) -training >> step=879000, episode=147 reward=0.7480623 (539.50 it/sec) -training >> step=879100, episode=147 reward=0.7459776 (543.19 it/sec) -training >> step=879200, episode=147 reward=0.77114 (549.10 it/sec) -training >> step=879300, episode=147 reward=0.7758293 (518.64 it/sec) -training >> step=879400, episode=147 reward=0.7505964 (563.16 it/sec) -training >> step=879500, episode=147 reward=0.7546118 (554.08 it/sec) -training >> step=879600, episode=147 reward=0.7515489 (539.54 it/sec) -training >> step=879700, episode=147 reward=0.7495405 (555.71 it/sec) -training >> step=879800, episode=147 reward=0.7832738 (554.34 it/sec) -training >> step=879900, episode=147 reward=0.7610271 (567.58 it/sec) -training >> step=880000, episode=147 reward=0.7699505 (569.15 it/sec) -training >> step=880100, episode=147 reward=0.7448792 (498.77 it/sec) -training >> step=880200, episode=147 reward=0.7457673 (528.79 it/sec) -training >> step=880300, episode=147 reward=0.7749549 (568.83 it/sec) -training >> step=880400, episode=147 reward=0.7721884 (550.47 it/sec) -training >> step=880500, episode=147 reward=0.7535135 (572.78 it/sec) -training >> step=880600, episode=147 reward=0.7326437 (526.72 it/sec) -training >> step=880700, episode=147 reward=0.7691359 (577.52 it/sec) -training >> step=880800, episode=147 reward=0.7506515 (529.41 it/sec) -training >> step=880900, episode=147 reward=0.7695658 (514.30 it/sec) -training >> step=881000, episode=147 reward=0.7518613 (535.09 it/sec) -training >> step=881100, episode=147 reward=0.7474752 (563.62 it/sec) -training >> step=881200, episode=147 reward=0.7808228 (536.92 it/sec) -training >> step=881300, episode=148 reward=0.7699081 (199.63 it/sec) -training >> step=881400, episode=148 reward=0.7682294 (550.26 it/sec) -training >> step=881500, episode=148 reward=0.7767463 (563.36 it/sec) -training >> step=881600, episode=148 reward=0.7467613 (515.27 it/sec) -training >> step=881700, episode=148 reward=0.7449376 (465.97 it/sec) -training >> step=881800, episode=148 reward=0.7390389 (502.70 it/sec) -training >> step=881900, episode=148 reward=0.7500556 (515.86 it/sec) -training >> step=882000, episode=148 reward=0.7544849 (555.79 it/sec) -training >> step=882100, episode=148 reward=0.7677875 (537.57 it/sec) -training >> step=882200, episode=148 reward=0.7703506 (474.12 it/sec) -training >> step=882300, episode=148 reward=0.7507463 (543.17 it/sec) -training >> step=882400, episode=148 reward=0.7643486 (510.13 it/sec) -training >> step=882500, episode=148 reward=0.7712711 (549.46 it/sec) -training >> step=882600, episode=148 reward=0.7611277 (512.53 it/sec) -training >> step=882700, episode=148 reward=0.7666456 (497.07 it/sec) -training >> step=882800, episode=148 reward=0.7480936 (504.49 it/sec) -training >> step=882900, episode=148 reward=0.7634488 (496.99 it/sec) -training >> step=883000, episode=148 reward=0.7937213 (519.27 it/sec) -training >> step=883100, episode=148 reward=0.7573374 (500.46 it/sec) -training >> step=883200, episode=148 reward=0.7798471 (452.99 it/sec) -training >> step=883300, episode=148 reward=0.7781061 (323.80 it/sec) -training >> step=883400, episode=148 reward=0.7558298 (515.18 it/sec) -training >> step=883500, episode=148 reward=0.7570376 (500.55 it/sec) -training >> step=883600, episode=148 reward=0.7652941 (511.43 it/sec) -training >> step=883700, episode=148 reward=0.7733794 (481.31 it/sec) -training >> step=883800, episode=148 reward=0.7686088 (464.93 it/sec) -training >> step=883900, episode=148 reward=0.7726586 (512.96 it/sec) -training >> step=884000, episode=148 reward=0.7805386 (515.93 it/sec) -training >> step=884100, episode=148 reward=0.7550781 (540.74 it/sec) -training >> step=884200, episode=148 reward=0.7418478 (502.08 it/sec) -training >> step=884300, episode=148 reward=0.7629972 (519.69 it/sec) -training >> step=884400, episode=148 reward=0.7436987 (553.44 it/sec) -training >> step=884500, episode=148 reward=0.7637585 (521.94 it/sec) -training >> step=884600, episode=148 reward=0.7655923 (538.99 it/sec) -training >> step=884700, episode=148 reward=0.7734031 (479.85 it/sec) -training >> step=884800, episode=148 reward=0.7507866 (476.52 it/sec) -training >> step=884900, episode=148 reward=0.7732381 (564.11 it/sec) -training >> step=885000, episode=148 reward=0.7763951 (534.47 it/sec) -training >> step=885100, episode=148 reward=0.7669632 (532.83 it/sec) -training >> step=885200, episode=148 reward=0.7490201 (502.15 it/sec) -training >> step=885300, episode=148 reward=0.7522884 (538.10 it/sec) -training >> step=885400, episode=148 reward=0.7602624 (516.19 it/sec) -training >> step=885500, episode=148 reward=0.7583637 (492.33 it/sec) -training >> step=885600, episode=148 reward=0.7751249 (535.17 it/sec) -training >> step=885700, episode=148 reward=0.7569575 (515.78 it/sec) -training >> step=885800, episode=148 reward=0.7489966 (486.67 it/sec) -training >> step=885900, episode=148 reward=0.7518114 (532.11 it/sec) -training >> step=886000, episode=148 reward=0.7650503 (497.03 it/sec) -training >> step=886100, episode=148 reward=0.7801899 (501.70 it/sec) -training >> step=886200, episode=148 reward=0.750121 (528.30 it/sec) -training >> step=886300, episode=148 reward=0.7480876 (497.51 it/sec) -training >> step=886400, episode=148 reward=0.7618108 (536.45 it/sec) -training >> step=886500, episode=148 reward=0.7585912 (536.99 it/sec) -training >> step=886600, episode=148 reward=0.7538592 (551.99 it/sec) -training >> step=886700, episode=148 reward=0.7463468 (585.72 it/sec) -training >> step=886800, episode=148 reward=0.77537 (430.60 it/sec) -training >> step=886900, episode=148 reward=0.7565065 (381.84 it/sec) -training >> step=887000, episode=148 reward=0.7626417 (431.46 it/sec) -training >> step=887100, episode=148 reward=0.752795 (483.54 it/sec) -training >> step=887200, episode=148 reward=0.7493291 (481.67 it/sec) -training >> step=887300, episode=149 reward=0.755532 (151.73 it/sec) -training >> step=887400, episode=149 reward=0.7597159 (421.29 it/sec) -training >> step=887500, episode=149 reward=0.7516957 (461.92 it/sec) -training >> step=887600, episode=149 reward=0.7500594 (437.16 it/sec) -training >> step=887700, episode=149 reward=0.7434981 (440.23 it/sec) -training >> step=887800, episode=149 reward=0.7422645 (417.86 it/sec) -training >> step=887900, episode=149 reward=0.7651052 (425.33 it/sec) -training >> step=888000, episode=149 reward=0.747139 (409.99 it/sec) -training >> step=888100, episode=149 reward=0.7612537 (462.34 it/sec) -training >> step=888200, episode=149 reward=0.7570966 (429.27 it/sec) -training >> step=888300, episode=149 reward=0.770191 (480.62 it/sec) -training >> step=888400, episode=149 reward=0.7565221 (418.89 it/sec) -training >> step=888500, episode=149 reward=0.7598835 (404.31 it/sec) -training >> step=888600, episode=149 reward=0.7500017 (464.10 it/sec) -training >> step=888700, episode=149 reward=0.7601679 (479.79 it/sec) -training >> step=888800, episode=149 reward=0.7725966 (383.73 it/sec) -training >> step=888900, episode=149 reward=0.7747508 (392.90 it/sec) -training >> step=889000, episode=149 reward=0.7560372 (436.98 it/sec) -training >> step=889100, episode=149 reward=0.7653705 (466.13 it/sec) -training >> step=889200, episode=149 reward=0.7844692 (466.52 it/sec) -training >> step=889300, episode=149 reward=0.7764461 (410.97 it/sec) -training >> step=889400, episode=149 reward=0.7496537 (337.10 it/sec) -training >> step=889500, episode=149 reward=0.7741411 (458.46 it/sec) -training >> step=889600, episode=149 reward=0.7538069 (469.66 it/sec) -training >> step=889700, episode=149 reward=0.7417622 (431.68 it/sec) -training >> step=889800, episode=149 reward=0.7517186 (418.27 it/sec) -training >> step=889900, episode=149 reward=0.7668074 (424.80 it/sec) -training >> step=890000, episode=149 reward=0.7596529 (461.04 it/sec) -training >> step=890100, episode=149 reward=0.7517154 (469.21 it/sec) -training >> step=890200, episode=149 reward=0.7482439 (415.33 it/sec) -training >> step=890300, episode=149 reward=0.7720894 (437.10 it/sec) -training >> step=890400, episode=149 reward=0.7937986 (434.02 it/sec) -training >> step=890500, episode=149 reward=0.7809983 (413.74 it/sec) -training >> step=890600, episode=149 reward=0.7537223 (475.99 it/sec) -training >> step=890700, episode=149 reward=0.7427178 (444.72 it/sec) -training >> step=890800, episode=149 reward=0.7824082 (414.18 it/sec) -training >> step=890900, episode=149 reward=0.7572451 (444.88 it/sec) -training >> step=891000, episode=149 reward=0.7560829 (424.95 it/sec) -training >> step=891100, episode=149 reward=0.7588656 (407.38 it/sec) -training >> step=891200, episode=149 reward=0.764925 (441.60 it/sec) -training >> step=891300, episode=149 reward=0.7754645 (437.41 it/sec) -training >> step=891400, episode=149 reward=0.7260059 (400.62 it/sec) -training >> step=891500, episode=149 reward=0.7629429 (458.98 it/sec) -training >> step=891600, episode=149 reward=0.7501111 (472.40 it/sec) -training >> step=891700, episode=149 reward=0.7533162 (516.19 it/sec) -training >> step=891800, episode=149 reward=0.763081 (526.50 it/sec) -training >> step=891900, episode=149 reward=0.7672724 (520.30 it/sec) -training >> step=892000, episode=149 reward=0.761089 (516.94 it/sec) -training >> step=892100, episode=149 reward=0.7589284 (519.10 it/sec) -training >> step=892200, episode=149 reward=0.7933955 (554.81 it/sec) -training >> step=892300, episode=149 reward=0.7293923 (519.99 it/sec) -training >> step=892400, episode=149 reward=0.7443531 (507.19 it/sec) -training >> step=892500, episode=149 reward=0.7397388 (585.68 it/sec) -training >> step=892600, episode=149 reward=0.7749758 (505.12 it/sec) -training >> step=892700, episode=149 reward=0.7583644 (503.13 it/sec) -training >> step=892800, episode=149 reward=0.7438953 (552.07 it/sec) -training >> step=892900, episode=149 reward=0.7631533 (578.09 it/sec) -training >> step=893000, episode=149 reward=0.7699043 (555.80 it/sec) -training >> step=893100, episode=149 reward=0.7797955 (536.77 it/sec) -training >> step=893200, episode=149 reward=0.7726617 (508.70 it/sec) -training >> step=893300, episode=150 reward=0.7641254 (165.98 it/sec) -training >> step=893400, episode=150 reward=0.7653238 (557.54 it/sec) -training >> step=893500, episode=150 reward=0.7529662 (529.22 it/sec) -training >> step=893600, episode=150 reward=0.7521856 (536.43 it/sec) -training >> step=893700, episode=150 reward=0.7662833 (583.85 it/sec) -training >> step=893800, episode=150 reward=0.7918161 (570.20 it/sec) -training >> step=893900, episode=150 reward=0.7347367 (475.48 it/sec) -training >> step=894000, episode=150 reward=0.7600942 (460.62 it/sec) -training >> step=894100, episode=150 reward=0.7562917 (475.37 it/sec) -training >> step=894200, episode=150 reward=0.7730103 (488.35 it/sec) -training >> step=894300, episode=150 reward=0.7547108 (455.28 it/sec) -training >> step=894400, episode=150 reward=0.7876485 (334.57 it/sec) -training >> step=894500, episode=150 reward=0.7715299 (311.05 it/sec) -training >> step=894600, episode=150 reward=0.7525266 (340.34 it/sec) -training >> step=894700, episode=150 reward=0.770546 (425.81 it/sec) -training >> step=894800, episode=150 reward=0.7716609 (451.30 it/sec) -training >> step=894900, episode=150 reward=0.7698666 (416.01 it/sec) -training >> step=895000, episode=150 reward=0.775629 (477.43 it/sec) -training >> step=895100, episode=150 reward=0.7703841 (492.42 it/sec) -training >> step=895200, episode=150 reward=0.7512864 (457.41 it/sec) -training >> step=895300, episode=150 reward=0.7644006 (457.18 it/sec) -training >> step=895400, episode=150 reward=0.7734733 (478.24 it/sec) -training >> step=895500, episode=150 reward=0.7786575 (358.11 it/sec) -training >> step=895600, episode=150 reward=0.7512156 (485.19 it/sec) -training >> step=895700, episode=150 reward=0.7577422 (441.50 it/sec) -training >> step=895800, episode=150 reward=0.7675083 (469.05 it/sec) -training >> step=895900, episode=150 reward=0.748869 (501.63 it/sec) -training >> step=896000, episode=150 reward=0.7583992 (544.88 it/sec) -training >> step=896100, episode=150 reward=0.7615005 (520.14 it/sec) -training >> step=896200, episode=150 reward=0.771185 (465.22 it/sec) -training >> step=896300, episode=150 reward=0.7626587 (422.41 it/sec) -training >> step=896400, episode=150 reward=0.7715086 (495.60 it/sec) -training >> step=896500, episode=150 reward=0.7766175 (520.65 it/sec) -training >> step=896600, episode=150 reward=0.7375426 (509.88 it/sec) -training >> step=896700, episode=150 reward=0.7721304 (547.20 it/sec) -training >> step=896800, episode=150 reward=0.7627372 (466.02 it/sec) -training >> step=896900, episode=150 reward=0.7783445 (492.82 it/sec) -training >> step=897000, episode=150 reward=0.7534711 (491.65 it/sec) -training >> step=897100, episode=150 reward=0.7570764 (526.96 it/sec) -training >> step=897200, episode=150 reward=0.7514918 (514.95 it/sec) -training >> step=897300, episode=150 reward=0.7561386 (471.55 it/sec) -training >> step=897400, episode=150 reward=0.7689978 (529.14 it/sec) -training >> step=897500, episode=150 reward=0.7655828 (535.02 it/sec) -training >> step=897600, episode=150 reward=0.7720569 (528.89 it/sec) -training >> step=897700, episode=150 reward=0.7702799 (535.34 it/sec) -training >> step=897800, episode=150 reward=0.7618959 (542.87 it/sec) -training >> step=897900, episode=150 reward=0.7402684 (511.70 it/sec) -training >> step=898000, episode=150 reward=0.7797859 (486.69 it/sec) -training >> step=898100, episode=150 reward=0.7562242 (547.91 it/sec) -training >> step=898200, episode=150 reward=0.7766191 (563.50 it/sec) -training >> step=898300, episode=150 reward=0.7455485 (528.25 it/sec) -training >> step=898400, episode=150 reward=0.7597115 (496.07 it/sec) -training >> step=898500, episode=150 reward=0.7659127 (560.39 it/sec) -training >> step=898600, episode=150 reward=0.7600579 (519.06 it/sec) -training >> step=898700, episode=150 reward=0.7545373 (530.07 it/sec) -training >> step=898800, episode=150 reward=0.7548785 (523.14 it/sec) -training >> step=898900, episode=150 reward=0.7480443 (533.53 it/sec) -training >> step=899000, episode=150 reward=0.7576407 (496.74 it/sec) -training >> step=899100, episode=150 reward=0.7493791 (497.92 it/sec) -training >> step=899200, episode=150 reward=0.7619902 (485.34 it/sec) -training >> step=899300, episode=151 reward=0.7435585 (139.50 it/sec) -training >> step=899400, episode=151 reward=0.7800233 (535.74 it/sec) -training >> step=899500, episode=151 reward=0.7589965 (501.26 it/sec) -training >> step=899600, episode=151 reward=0.7518585 (519.92 it/sec) -training >> step=899700, episode=151 reward=0.759901 (540.16 it/sec) -training >> step=899800, episode=151 reward=0.7489219 (517.57 it/sec) -training >> step=899900, episode=151 reward=0.7690308 (535.81 it/sec) -training >> step=900000, episode=151 reward=0.7587805 (518.07 it/sec) -training >> step=900100, episode=151 reward=0.7450073 (537.28 it/sec) -training >> step=900200, episode=151 reward=0.7703636 (477.83 it/sec) -training >> step=900300, episode=151 reward=0.7522894 (520.59 it/sec) -training >> step=900400, episode=151 reward=0.7553427 (528.62 it/sec) -training >> step=900500, episode=151 reward=0.7761035 (504.26 it/sec) -training >> step=900600, episode=151 reward=0.7594012 (494.78 it/sec) -training >> step=900700, episode=151 reward=0.7734815 (452.38 it/sec) -training >> step=900800, episode=151 reward=0.7580017 (486.25 it/sec) -training >> step=900900, episode=151 reward=0.7453109 (507.68 it/sec) -training >> step=901000, episode=151 reward=0.7728806 (490.98 it/sec) -training >> step=901100, episode=151 reward=0.7657722 (496.74 it/sec) -training >> step=901200, episode=151 reward=0.753599 (509.41 it/sec) -training >> step=901300, episode=151 reward=0.7546827 (529.38 it/sec) -training >> step=901400, episode=151 reward=0.764124 (534.98 it/sec) -training >> step=901500, episode=151 reward=0.7647748 (537.34 it/sec) -training >> step=901600, episode=151 reward=0.7476997 (522.52 it/sec) -training >> step=901700, episode=151 reward=0.7651507 (534.39 it/sec) -training >> step=901800, episode=151 reward=0.7539755 (341.83 it/sec) -training >> step=901900, episode=151 reward=0.7706738 (564.11 it/sec) -training >> step=902000, episode=151 reward=0.7681941 (537.38 it/sec) -training >> step=902100, episode=151 reward=0.7557649 (518.17 it/sec) -training >> step=902200, episode=151 reward=0.7427664 (526.09 it/sec) -training >> step=902300, episode=151 reward=0.7487843 (495.77 it/sec) -training >> step=902400, episode=151 reward=0.7636561 (485.63 it/sec) -training >> step=902500, episode=151 reward=0.7788728 (508.74 it/sec) -training >> step=902600, episode=151 reward=0.7656392 (527.82 it/sec) -training >> step=902700, episode=151 reward=0.7524527 (517.28 it/sec) -training >> step=902800, episode=151 reward=0.7569913 (466.97 it/sec) -training >> step=902900, episode=151 reward=0.7575525 (498.60 it/sec) -training >> step=903000, episode=151 reward=0.7690104 (484.18 it/sec) -training >> step=903100, episode=151 reward=0.776597 (466.74 it/sec) -training >> step=903200, episode=151 reward=0.7709434 (472.91 it/sec) -training >> step=903300, episode=151 reward=0.7761077 (472.64 it/sec) -training >> step=903400, episode=151 reward=0.766376 (475.74 it/sec) -training >> step=903500, episode=151 reward=0.7658886 (419.14 it/sec) -training >> step=903600, episode=151 reward=0.7612664 (364.24 it/sec) -training >> step=903700, episode=151 reward=0.7716823 (460.76 it/sec) -training >> step=903800, episode=151 reward=0.7456838 (468.33 it/sec) -training >> step=903900, episode=151 reward=0.7546921 (469.33 it/sec) -training >> step=904000, episode=151 reward=0.7502576 (453.61 it/sec) -training >> step=904100, episode=151 reward=0.7643591 (453.04 it/sec) -training >> step=904200, episode=151 reward=0.7527583 (452.61 it/sec) -training >> step=904300, episode=151 reward=0.7814454 (422.12 it/sec) -training >> step=904400, episode=151 reward=0.7608351 (409.68 it/sec) -training >> step=904500, episode=151 reward=0.761208 (491.26 it/sec) -training >> step=904600, episode=151 reward=0.754632 (508.67 it/sec) -training >> step=904700, episode=151 reward=0.7627158 (500.81 it/sec) -training >> step=904800, episode=151 reward=0.7572963 (510.37 it/sec) -training >> step=904900, episode=151 reward=0.7655179 (534.35 it/sec) -training >> step=905000, episode=151 reward=0.7540158 (548.22 it/sec) -training >> step=905100, episode=151 reward=0.7694671 (511.63 it/sec) -training >> step=905200, episode=151 reward=0.7616401 (527.72 it/sec) -training >> step=905300, episode=152 reward=0.7698091 (159.92 it/sec) -training >> step=905400, episode=152 reward=0.7991126 (468.31 it/sec) -training >> step=905500, episode=152 reward=0.7376025 (498.65 it/sec) -training >> step=905600, episode=152 reward=0.7521114 (490.06 it/sec) -training >> step=905700, episode=152 reward=0.7416257 (541.77 it/sec) -training >> step=905800, episode=152 reward=0.7702392 (507.55 it/sec) -training >> step=905900, episode=152 reward=0.7787365 (459.12 it/sec) -training >> step=906000, episode=152 reward=0.7503849 (526.38 it/sec) -training >> step=906100, episode=152 reward=0.7628859 (559.27 it/sec) -training >> step=906200, episode=152 reward=0.7722494 (539.13 it/sec) -training >> step=906300, episode=152 reward=0.7528903 (481.08 it/sec) -training >> step=906400, episode=152 reward=0.7562929 (496.96 it/sec) -training >> step=906500, episode=152 reward=0.7872727 (471.19 it/sec) -training >> step=906600, episode=152 reward=0.7692782 (487.96 it/sec) -training >> step=906700, episode=152 reward=0.7520022 (544.33 it/sec) -training >> step=906800, episode=152 reward=0.7735143 (536.16 it/sec) -training >> step=906900, episode=152 reward=0.7621042 (492.68 it/sec) -training >> step=907000, episode=152 reward=0.7503859 (538.26 it/sec) -training >> step=907100, episode=152 reward=0.7885968 (540.28 it/sec) -training >> step=907200, episode=152 reward=0.7419571 (490.67 it/sec) -training >> step=907300, episode=152 reward=0.7587808 (532.60 it/sec) -training >> step=907400, episode=152 reward=0.7432674 (495.91 it/sec) -training >> step=907500, episode=152 reward=0.773562 (480.37 it/sec) -training >> step=907600, episode=152 reward=0.7601446 (543.57 it/sec) -training >> step=907700, episode=152 reward=0.7345212 (515.72 it/sec) -training >> step=907800, episode=152 reward=0.7524613 (504.70 it/sec) -training >> step=907900, episode=152 reward=0.7592516 (456.84 it/sec) -training >> step=908000, episode=152 reward=0.7560307 (332.24 it/sec) -training >> step=908100, episode=152 reward=0.765337 (445.03 it/sec) -training >> step=908200, episode=152 reward=0.7806718 (477.46 it/sec) -training >> step=908300, episode=152 reward=0.7765467 (462.06 it/sec) -training >> step=908400, episode=152 reward=0.7800516 (485.21 it/sec) -training >> step=908500, episode=152 reward=0.7815273 (457.63 it/sec) -training >> step=908600, episode=152 reward=0.7526919 (426.20 it/sec) -training >> step=908700, episode=152 reward=0.7545918 (473.72 it/sec) -training >> step=908800, episode=152 reward=0.7514402 (454.06 it/sec) -training >> step=908900, episode=152 reward=0.7409745 (405.36 it/sec) -training >> step=909000, episode=152 reward=0.7654016 (434.78 it/sec) -training >> step=909100, episode=152 reward=0.7710222 (468.98 it/sec) -training >> step=909200, episode=152 reward=0.7559773 (458.51 it/sec) -training >> step=909300, episode=152 reward=0.7521033 (453.47 it/sec) -training >> step=909400, episode=152 reward=0.7750965 (470.94 it/sec) -training >> step=909500, episode=152 reward=0.7606193 (454.71 it/sec) -training >> step=909600, episode=152 reward=0.7715623 (477.57 it/sec) -training >> step=909700, episode=152 reward=0.7797578 (455.39 it/sec) -training >> step=909800, episode=152 reward=0.7666615 (442.22 it/sec) -training >> step=909900, episode=152 reward=0.766712 (493.80 it/sec) -training >> step=910000, episode=152 reward=0.7771984 (457.47 it/sec) -training >> step=910100, episode=152 reward=0.7616302 (450.16 it/sec) -training >> step=910200, episode=152 reward=0.7367749 (461.68 it/sec) -training >> step=910300, episode=152 reward=0.7734956 (489.78 it/sec) -training >> step=910400, episode=152 reward=0.7542502 (489.28 it/sec) -training >> step=910500, episode=152 reward=0.7478905 (462.87 it/sec) -training >> step=910600, episode=152 reward=0.7478122 (434.10 it/sec) -training >> step=910700, episode=152 reward=0.769124 (463.63 it/sec) -training >> step=910800, episode=152 reward=0.7782953 (458.99 it/sec) -training >> step=910900, episode=152 reward=0.7431406 (453.47 it/sec) -training >> step=911000, episode=152 reward=0.7659572 (450.26 it/sec) -training >> step=911100, episode=152 reward=0.7801583 (440.87 it/sec) -training >> step=911200, episode=152 reward=0.7591855 (427.08 it/sec) -training >> step=911300, episode=153 reward=0.7364632 (153.06 it/sec) -training >> step=911400, episode=153 reward=0.7975234 (498.54 it/sec) -training >> step=911500, episode=153 reward=0.7515779 (452.10 it/sec) -training >> step=911600, episode=153 reward=0.7850658 (438.81 it/sec) -training >> step=911700, episode=153 reward=0.769195 (448.14 it/sec) -training >> step=911800, episode=153 reward=0.7650269 (429.36 it/sec) -training >> step=911900, episode=153 reward=0.7634746 (466.68 it/sec) -training >> step=912000, episode=153 reward=0.7530266 (466.03 it/sec) -training >> step=912100, episode=153 reward=0.7667708 (410.23 it/sec) -training >> step=912200, episode=153 reward=0.7633801 (414.70 it/sec) -training >> step=912300, episode=153 reward=0.7485992 (407.79 it/sec) -training >> step=912400, episode=153 reward=0.7477332 (469.30 it/sec) -training >> step=912500, episode=153 reward=0.7482201 (494.15 it/sec) -training >> step=912600, episode=153 reward=0.7599986 (496.17 it/sec) -training >> step=912700, episode=153 reward=0.7683252 (542.13 it/sec) -training >> step=912800, episode=153 reward=0.7580261 (495.60 it/sec) -training >> step=912900, episode=153 reward=0.7619932 (520.34 it/sec) -training >> step=913000, episode=153 reward=0.7776685 (547.63 it/sec) -training >> step=913100, episode=153 reward=0.7739796 (524.30 it/sec) -training >> step=913200, episode=153 reward=0.7809895 (439.34 it/sec) -training >> step=913300, episode=153 reward=0.7580165 (366.36 it/sec) -training >> step=913400, episode=153 reward=0.7564737 (479.51 it/sec) -training >> step=913500, episode=153 reward=0.7700599 (471.63 it/sec) -training >> step=913600, episode=153 reward=0.7380048 (501.14 it/sec) -training >> step=913700, episode=153 reward=0.7532266 (481.32 it/sec) -training >> step=913800, episode=153 reward=0.7684727 (514.63 it/sec) -training >> step=913900, episode=153 reward=0.7656196 (495.36 it/sec) -training >> step=914000, episode=153 reward=0.7617282 (327.10 it/sec) -training >> step=914100, episode=153 reward=0.76403 (481.17 it/sec) -training >> step=914200, episode=153 reward=0.7593864 (508.86 it/sec) -training >> step=914300, episode=153 reward=0.7654764 (526.21 it/sec) -training >> step=914400, episode=153 reward=0.7373337 (481.35 it/sec) -training >> step=914500, episode=153 reward=0.7511114 (506.06 it/sec) -training >> step=914600, episode=153 reward=0.768916 (487.81 it/sec) -training >> step=914700, episode=153 reward=0.7508382 (486.01 it/sec) -training >> step=914800, episode=153 reward=0.7413163 (510.75 it/sec) -training >> step=914900, episode=153 reward=0.7514333 (491.25 it/sec) -training >> step=915000, episode=153 reward=0.7644516 (512.58 it/sec) -training >> step=915100, episode=153 reward=0.7469262 (473.61 it/sec) -training >> step=915200, episode=153 reward=0.7544005 (437.38 it/sec) -training >> step=915300, episode=153 reward=0.7329692 (528.76 it/sec) -training >> step=915400, episode=153 reward=0.7607908 (495.00 it/sec) -training >> step=915500, episode=153 reward=0.7593117 (505.02 it/sec) -training >> step=915600, episode=153 reward=0.7527894 (432.15 it/sec) -training >> step=915700, episode=153 reward=0.7693107 (411.76 it/sec) -training >> step=915800, episode=153 reward=0.7625167 (435.82 it/sec) -training >> step=915900, episode=153 reward=0.7502126 (413.18 it/sec) -training >> step=916000, episode=153 reward=0.758957 (423.46 it/sec) -training >> step=916100, episode=153 reward=0.7293942 (406.19 it/sec) -training >> step=916200, episode=153 reward=0.7483553 (456.11 it/sec) -training >> step=916300, episode=153 reward=0.7642241 (384.70 it/sec) -training >> step=916400, episode=153 reward=0.7657754 (399.41 it/sec) -training >> step=916500, episode=153 reward=0.7712588 (448.60 it/sec) -training >> step=916600, episode=153 reward=0.7581291 (431.62 it/sec) -training >> step=916700, episode=153 reward=0.7777527 (487.84 it/sec) -training >> step=916800, episode=153 reward=0.7403999 (453.03 it/sec) -training >> step=916900, episode=153 reward=0.7719622 (478.72 it/sec) -training >> step=917000, episode=153 reward=0.7500604 (468.90 it/sec) -training >> step=917100, episode=153 reward=0.7367014 (486.53 it/sec) -training >> step=917200, episode=153 reward=0.7625501 (481.62 it/sec) -training >> step=917300, episode=154 reward=0.7496748 (195.12 it/sec) -training >> step=917400, episode=154 reward=0.7443166 (474.77 it/sec) -training >> step=917500, episode=154 reward=0.7511833 (477.97 it/sec) -training >> step=917600, episode=154 reward=0.7648054 (500.45 it/sec) -training >> step=917700, episode=154 reward=0.7591077 (521.95 it/sec) -training >> step=917800, episode=154 reward=0.7508808 (542.90 it/sec) -training >> step=917900, episode=154 reward=0.7457065 (515.25 it/sec) -training >> step=918000, episode=154 reward=0.7568518 (493.21 it/sec) -training >> step=918100, episode=154 reward=0.7633513 (549.52 it/sec) -training >> step=918200, episode=154 reward=0.7552853 (523.05 it/sec) -training >> step=918300, episode=154 reward=0.7673776 (522.53 it/sec) -training >> step=918400, episode=154 reward=0.7534612 (489.68 it/sec) -training >> step=918500, episode=154 reward=0.7411071 (549.12 it/sec) -training >> step=918600, episode=154 reward=0.7684473 (539.64 it/sec) -training >> step=918700, episode=154 reward=0.7642964 (531.27 it/sec) -training >> step=918800, episode=154 reward=0.7918977 (541.25 it/sec) -training >> step=918900, episode=154 reward=0.7650824 (518.53 it/sec) -training >> step=919000, episode=154 reward=0.7497462 (530.77 it/sec) -training >> step=919100, episode=154 reward=0.7568073 (523.38 it/sec) -training >> step=919200, episode=154 reward=0.772186 (552.98 it/sec) -training >> step=919300, episode=154 reward=0.7733767 (485.28 it/sec) -training >> step=919400, episode=154 reward=0.741347 (483.94 it/sec) -training >> step=919500, episode=154 reward=0.7463558 (514.84 it/sec) -training >> step=919600, episode=154 reward=0.74198 (540.27 it/sec) -training >> step=919700, episode=154 reward=0.7477996 (532.16 it/sec) -training >> step=919800, episode=154 reward=0.7788054 (517.19 it/sec) -training >> step=919900, episode=154 reward=0.7510462 (508.49 it/sec) -training >> step=920000, episode=154 reward=0.7631242 (485.25 it/sec) -training >> step=920100, episode=154 reward=0.757443 (516.00 it/sec) -training >> step=920200, episode=154 reward=0.7609069 (429.00 it/sec) -training >> step=920300, episode=154 reward=0.7635127 (505.62 it/sec) -training >> step=920400, episode=154 reward=0.7582749 (464.49 it/sec) -training >> step=920500, episode=154 reward=0.7441275 (434.62 it/sec) -training >> step=920600, episode=154 reward=0.7530942 (496.75 it/sec) -training >> step=920700, episode=154 reward=0.7576486 (491.65 it/sec) -training >> step=920800, episode=154 reward=0.7398529 (502.87 it/sec) -training >> step=920900, episode=154 reward=0.7730779 (511.55 it/sec) -training >> step=921000, episode=154 reward=0.7878159 (489.96 it/sec) -training >> step=921100, episode=154 reward=0.7692537 (509.93 it/sec) -training >> step=921200, episode=154 reward=0.7515656 (493.84 it/sec) -training >> step=921300, episode=154 reward=0.7460462 (508.91 it/sec) -training >> step=921400, episode=154 reward=0.7579727 (488.55 it/sec) -training >> step=921500, episode=154 reward=0.7669587 (490.51 it/sec) -training >> step=921600, episode=154 reward=0.7799528 (489.82 it/sec) -training >> step=921700, episode=154 reward=0.7519997 (536.85 it/sec) -training >> step=921800, episode=154 reward=0.7589632 (527.89 it/sec) -training >> step=921900, episode=154 reward=0.7454756 (546.59 it/sec) -training >> step=922000, episode=154 reward=0.7467996 (518.63 it/sec) -training >> step=922100, episode=154 reward=0.7663748 (526.03 it/sec) -training >> step=922200, episode=154 reward=0.7356338 (543.72 it/sec) -training >> step=922300, episode=154 reward=0.7387397 (525.94 it/sec) -training >> step=922400, episode=154 reward=0.7443302 (533.23 it/sec) -training >> step=922500, episode=154 reward=0.7464026 (507.75 it/sec) -training >> step=922600, episode=154 reward=0.7587176 (505.82 it/sec) -training >> step=922700, episode=154 reward=0.7326719 (553.44 it/sec) -training >> step=922800, episode=154 reward=0.7381691 (484.62 it/sec) -training >> step=922900, episode=154 reward=0.7551888 (538.41 it/sec) -training >> step=923000, episode=154 reward=0.7447084 (504.69 it/sec) -training >> step=923100, episode=154 reward=0.7342582 (509.08 it/sec) -training >> step=923200, episode=154 reward=0.748589 (496.38 it/sec) -training >> step=923300, episode=155 reward=0.7677906 (272.77 it/sec) -training >> step=923400, episode=155 reward=0.7413872 (492.77 it/sec) -training >> step=923500, episode=155 reward=0.7385281 (462.06 it/sec) -training >> step=923600, episode=155 reward=0.7472429 (497.59 it/sec) -training >> step=923700, episode=155 reward=0.7888936 (537.87 it/sec) -training >> step=923800, episode=155 reward=0.7713807 (581.96 it/sec) -training >> step=923900, episode=155 reward=0.75561 (528.66 it/sec) -training >> step=924000, episode=155 reward=0.7673629 (523.72 it/sec) -training >> step=924100, episode=155 reward=0.7489456 (536.69 it/sec) -training >> step=924200, episode=155 reward=0.7578843 (529.93 it/sec) -training >> step=924300, episode=155 reward=0.7703141 (520.19 it/sec) -training >> step=924400, episode=155 reward=0.7576181 (536.92 it/sec) -training >> step=924500, episode=155 reward=0.769428 (485.77 it/sec) -training >> step=924600, episode=155 reward=0.747777 (534.58 it/sec) -training >> step=924700, episode=155 reward=0.7413575 (486.36 it/sec) -training >> step=924800, episode=155 reward=0.777086 (543.26 it/sec) -training >> step=924900, episode=155 reward=0.7781429 (555.45 it/sec) -training >> step=925000, episode=155 reward=0.7660416 (496.47 it/sec) -training >> step=925100, episode=155 reward=0.7555671 (505.85 it/sec) -training >> step=925200, episode=155 reward=0.7581604 (539.84 it/sec) -training >> step=925300, episode=155 reward=0.7583405 (529.01 it/sec) -training >> step=925400, episode=155 reward=0.7405937 (517.97 it/sec) -training >> step=925500, episode=155 reward=0.7712235 (469.80 it/sec) -training >> step=925600, episode=155 reward=0.7524321 (498.63 it/sec) -training >> step=925700, episode=155 reward=0.7490954 (544.28 it/sec) -training >> step=925800, episode=155 reward=0.7690495 (506.39 it/sec) -training >> step=925900, episode=155 reward=0.7460558 (532.76 it/sec) -training >> step=926000, episode=155 reward=0.7564189 (468.63 it/sec) -training >> step=926100, episode=155 reward=0.755046 (513.05 it/sec) -training >> step=926200, episode=155 reward=0.7748951 (521.36 it/sec) -training >> step=926300, episode=155 reward=0.7834872 (440.08 it/sec) -training >> step=926400, episode=155 reward=0.7493076 (551.42 it/sec) -training >> step=926500, episode=155 reward=0.7506163 (502.11 it/sec) -training >> step=926600, episode=155 reward=0.7611012 (505.59 it/sec) -training >> step=926700, episode=155 reward=0.7431483 (516.37 it/sec) -training >> step=926800, episode=155 reward=0.7666073 (543.50 it/sec) -training >> step=926900, episode=155 reward=0.7729706 (520.72 it/sec) -training >> step=927000, episode=155 reward=0.7521176 (536.38 it/sec) -training >> step=927100, episode=155 reward=0.7410012 (540.09 it/sec) -training >> step=927200, episode=155 reward=0.7732561 (470.30 it/sec) -training >> step=927300, episode=155 reward=0.750168 (538.60 it/sec) -training >> step=927400, episode=155 reward=0.7526986 (558.45 it/sec) -training >> step=927500, episode=155 reward=0.7388967 (544.18 it/sec) -training >> step=927600, episode=155 reward=0.7548085 (520.50 it/sec) -training >> step=927700, episode=155 reward=0.7562317 (502.07 it/sec) -training >> step=927800, episode=155 reward=0.7747355 (547.10 it/sec) -training >> step=927900, episode=155 reward=0.7335908 (520.81 it/sec) -training >> step=928000, episode=155 reward=0.7424965 (543.06 it/sec) -training >> step=928100, episode=155 reward=0.7260358 (557.21 it/sec) -training >> step=928200, episode=155 reward=0.7749577 (489.47 it/sec) -training >> step=928300, episode=155 reward=0.7788778 (485.08 it/sec) -training >> step=928400, episode=155 reward=0.7580204 (528.79 it/sec) -training >> step=928500, episode=155 reward=0.7575619 (515.80 it/sec) -training >> step=928600, episode=155 reward=0.7668868 (505.63 it/sec) -training >> step=928700, episode=155 reward=0.7461429 (480.23 it/sec) -training >> step=928800, episode=155 reward=0.7380072 (520.33 it/sec) -training >> step=928900, episode=155 reward=0.7640941 (517.90 it/sec) -training >> step=929000, episode=155 reward=0.7630847 (499.14 it/sec) -training >> step=929100, episode=155 reward=0.7598342 (539.18 it/sec) -training >> step=929200, episode=155 reward=0.7585045 (480.08 it/sec) -training >> step=929300, episode=156 reward=0.758876 (209.16 it/sec) -training >> step=929400, episode=156 reward=0.7641764 (515.28 it/sec) -training >> step=929500, episode=156 reward=0.7740558 (516.95 it/sec) -training >> step=929600, episode=156 reward=0.7392573 (520.87 it/sec) -training >> step=929700, episode=156 reward=0.7734522 (484.78 it/sec) -training >> step=929800, episode=156 reward=0.7712469 (519.51 it/sec) -training >> step=929900, episode=156 reward=0.7366379 (542.86 it/sec) -training >> step=930000, episode=156 reward=0.7823789 (472.82 it/sec) -training >> step=930100, episode=156 reward=0.7592819 (508.81 it/sec) -training >> step=930200, episode=156 reward=0.7487009 (500.69 it/sec) -training >> step=930300, episode=156 reward=0.7449929 (538.62 it/sec) -training >> step=930400, episode=156 reward=0.7496054 (463.15 it/sec) -training >> step=930500, episode=156 reward=0.7734595 (465.14 it/sec) -training >> step=930600, episode=156 reward=0.7589928 (460.19 it/sec) -training >> step=930700, episode=156 reward=0.7759556 (527.89 it/sec) -training >> step=930800, episode=156 reward=0.775261 (539.44 it/sec) -training >> step=930900, episode=156 reward=0.7651416 (511.40 it/sec) -training >> step=931000, episode=156 reward=0.770496 (497.80 it/sec) -training >> step=931100, episode=156 reward=0.7249593 (523.64 it/sec) -training >> step=931200, episode=156 reward=0.7546719 (499.11 it/sec) -training >> step=931300, episode=156 reward=0.76757 (527.60 it/sec) -training >> step=931400, episode=156 reward=0.757696 (502.23 it/sec) -training >> step=931500, episode=156 reward=0.7278648 (566.46 it/sec) -training >> step=931600, episode=156 reward=0.7427905 (539.90 it/sec) -training >> step=931700, episode=156 reward=0.7504004 (500.03 it/sec) -training >> step=931800, episode=156 reward=0.770698 (517.22 it/sec) -training >> step=931900, episode=156 reward=0.7476329 (539.08 it/sec) -training >> step=932000, episode=156 reward=0.7509228 (530.10 it/sec) -training >> step=932100, episode=156 reward=0.753207 (431.07 it/sec) -training >> step=932200, episode=156 reward=0.7723181 (496.15 it/sec) -training >> step=932300, episode=156 reward=0.7648342 (377.47 it/sec) -training >> step=932400, episode=156 reward=0.7724435 (495.03 it/sec) -training >> step=932500, episode=156 reward=0.755337 (499.00 it/sec) -training >> step=932600, episode=156 reward=0.7734032 (517.44 it/sec) -training >> step=932700, episode=156 reward=0.7403796 (503.25 it/sec) -training >> step=932800, episode=156 reward=0.7599404 (529.00 it/sec) -training >> step=932900, episode=156 reward=0.7417588 (509.93 it/sec) -training >> step=933000, episode=156 reward=0.7624652 (534.20 it/sec) -training >> step=933100, episode=156 reward=0.7524273 (548.17 it/sec) -training >> step=933200, episode=156 reward=0.7796689 (480.48 it/sec) -training >> step=933300, episode=156 reward=0.7525098 (497.75 it/sec) -training >> step=933400, episode=156 reward=0.7795956 (510.05 it/sec) -training >> step=933500, episode=156 reward=0.7601191 (573.72 it/sec) -training >> step=933600, episode=156 reward=0.7443145 (523.74 it/sec) -training >> step=933700, episode=156 reward=0.7503459 (504.17 it/sec) -training >> step=933800, episode=156 reward=0.7569513 (535.38 it/sec) -training >> step=933900, episode=156 reward=0.750537 (480.11 it/sec) -training >> step=934000, episode=156 reward=0.7680938 (519.49 it/sec) -training >> step=934100, episode=156 reward=0.771625 (543.36 it/sec) -training >> step=934200, episode=156 reward=0.7433081 (551.71 it/sec) -training >> step=934300, episode=156 reward=0.7431593 (497.74 it/sec) -training >> step=934400, episode=156 reward=0.7653981 (488.33 it/sec) -training >> step=934500, episode=156 reward=0.7280186 (510.24 it/sec) -training >> step=934600, episode=156 reward=0.7457217 (437.57 it/sec) -training >> step=934700, episode=156 reward=0.7264438 (529.33 it/sec) -training >> step=934800, episode=156 reward=0.7876821 (515.99 it/sec) -training >> step=934900, episode=156 reward=0.7442415 (445.18 it/sec) -training >> step=935000, episode=156 reward=0.7615207 (541.03 it/sec) -training >> step=935100, episode=156 reward=0.7344329 (438.94 it/sec) -training >> step=935200, episode=156 reward=0.7496759 (438.82 it/sec) -training >> step=935300, episode=157 reward=0.7481037 (262.44 it/sec) -training >> step=935400, episode=157 reward=0.7797793 (515.21 it/sec) -training >> step=935500, episode=157 reward=0.7594732 (464.83 it/sec) -training >> step=935600, episode=157 reward=0.7512843 (496.42 it/sec) -training >> step=935700, episode=157 reward=0.7758366 (525.72 it/sec) -training >> step=935800, episode=157 reward=0.7362821 (488.55 it/sec) -training >> step=935900, episode=157 reward=0.7481084 (510.76 it/sec) -training >> step=936000, episode=157 reward=0.7855175 (504.05 it/sec) -training >> step=936100, episode=157 reward=0.7623062 (543.94 it/sec) -training >> step=936200, episode=157 reward=0.7774813 (544.38 it/sec) -training >> step=936300, episode=157 reward=0.7742 (500.26 it/sec) -training >> step=936400, episode=157 reward=0.744558 (497.55 it/sec) -training >> step=936500, episode=157 reward=0.7577083 (507.54 it/sec) -training >> step=936600, episode=157 reward=0.7404377 (543.43 it/sec) -training >> step=936700, episode=157 reward=0.7453708 (534.37 it/sec) -training >> step=936800, episode=157 reward=0.7773418 (512.15 it/sec) -training >> step=936900, episode=157 reward=0.7652113 (527.54 it/sec) -training >> step=937000, episode=157 reward=0.7678595 (513.51 it/sec) -training >> step=937100, episode=157 reward=0.7574067 (525.64 it/sec) -training >> step=937200, episode=157 reward=0.7652665 (556.44 it/sec) -training >> step=937300, episode=157 reward=0.7413022 (533.24 it/sec) -training >> step=937400, episode=157 reward=0.782405 (495.36 it/sec) -training >> step=937500, episode=157 reward=0.7604301 (503.87 it/sec) -training >> step=937600, episode=157 reward=0.759914 (495.00 it/sec) -training >> step=937700, episode=157 reward=0.7765118 (515.69 it/sec) -training >> step=937800, episode=157 reward=0.7568679 (504.61 it/sec) -training >> step=937900, episode=157 reward=0.7610561 (513.35 it/sec) -training >> step=938000, episode=157 reward=0.7467747 (523.16 it/sec) -training >> step=938100, episode=157 reward=0.7439758 (529.52 it/sec) -training >> step=938200, episode=157 reward=0.7664397 (548.57 it/sec) -training >> step=938300, episode=157 reward=0.7372066 (515.37 it/sec) -training >> step=938400, episode=157 reward=0.7918162 (513.93 it/sec) -training >> step=938500, episode=157 reward=0.7324897 (469.49 it/sec) -training >> step=938600, episode=157 reward=0.7569482 (517.11 it/sec) -training >> step=938700, episode=157 reward=0.783567 (368.44 it/sec) -training >> step=938800, episode=157 reward=0.7678958 (533.30 it/sec) -training >> step=938900, episode=157 reward=0.7522992 (530.61 it/sec) -training >> step=939000, episode=157 reward=0.7824301 (507.09 it/sec) -training >> step=939100, episode=157 reward=0.7612899 (535.82 it/sec) -training >> step=939200, episode=157 reward=0.7599269 (521.95 it/sec) -training >> step=939300, episode=157 reward=0.781386 (510.55 it/sec) -training >> step=939400, episode=157 reward=0.7476539 (510.75 it/sec) -training >> step=939500, episode=157 reward=0.7783629 (496.85 it/sec) -training >> step=939600, episode=157 reward=0.7645579 (515.54 it/sec) -training >> step=939700, episode=157 reward=0.7431921 (537.60 it/sec) -training >> step=939800, episode=157 reward=0.7468419 (544.57 it/sec) -training >> step=939900, episode=157 reward=0.7570864 (537.84 it/sec) -training >> step=940000, episode=157 reward=0.7483125 (462.08 it/sec) -training >> step=940100, episode=157 reward=0.7670116 (525.93 it/sec) -training >> step=940200, episode=157 reward=0.7157182 (561.94 it/sec) -training >> step=940300, episode=157 reward=0.762317 (509.11 it/sec) -training >> step=940400, episode=157 reward=0.7431611 (527.44 it/sec) -training >> step=940500, episode=157 reward=0.7627146 (538.09 it/sec) -training >> step=940600, episode=157 reward=0.7567346 (470.02 it/sec) -training >> step=940700, episode=157 reward=0.7529827 (445.48 it/sec) -training >> step=940800, episode=157 reward=0.7464418 (530.04 it/sec) -training >> step=940900, episode=157 reward=0.7441593 (567.47 it/sec) -training >> step=941000, episode=157 reward=0.7401553 (522.90 it/sec) -training >> step=941100, episode=157 reward=0.7682514 (460.90 it/sec) -training >> step=941200, episode=157 reward=0.7641149 (460.64 it/sec) -training >> step=941300, episode=158 reward=0.7438675 (256.72 it/sec) -training >> step=941400, episode=158 reward=0.7678124 (519.15 it/sec) -training >> step=941500, episode=158 reward=0.7474102 (481.33 it/sec) -training >> step=941600, episode=158 reward=0.7488192 (536.39 it/sec) -training >> step=941700, episode=158 reward=0.7547581 (553.87 it/sec) -training >> step=941800, episode=158 reward=0.7556613 (528.63 it/sec) -training >> step=941900, episode=158 reward=0.7760239 (436.79 it/sec) -training >> step=942000, episode=158 reward=0.7745437 (493.75 it/sec) -training >> step=942100, episode=158 reward=0.7610027 (473.13 it/sec) -training >> step=942200, episode=158 reward=0.7476965 (522.29 it/sec) -training >> step=942300, episode=158 reward=0.7501952 (530.24 it/sec) -training >> step=942400, episode=158 reward=0.7690016 (537.16 it/sec) -training >> step=942500, episode=158 reward=0.7626958 (488.60 it/sec) -training >> step=942600, episode=158 reward=0.7691249 (517.34 it/sec) -training >> step=942700, episode=158 reward=0.7744471 (523.13 it/sec) -training >> step=942800, episode=158 reward=0.7641578 (557.42 it/sec) -training >> step=942900, episode=158 reward=0.7678414 (489.81 it/sec) -training >> step=943000, episode=158 reward=0.7802969 (525.95 it/sec) -training >> step=943100, episode=158 reward=0.7416552 (478.35 it/sec) -training >> step=943200, episode=158 reward=0.762683 (531.55 it/sec) -training >> step=943300, episode=158 reward=0.7577888 (491.75 it/sec) -training >> step=943400, episode=158 reward=0.7556064 (511.03 it/sec) -training >> step=943500, episode=158 reward=0.7579047 (535.39 it/sec) -training >> step=943600, episode=158 reward=0.7699193 (471.33 it/sec) -training >> step=943700, episode=158 reward=0.7728962 (496.10 it/sec) -training >> step=943800, episode=158 reward=0.7652166 (524.20 it/sec) -training >> step=943900, episode=158 reward=0.7782807 (558.13 it/sec) -training >> step=944000, episode=158 reward=0.7284243 (490.24 it/sec) -training >> step=944100, episode=158 reward=0.7541072 (483.16 it/sec) -training >> step=944200, episode=158 reward=0.7377826 (532.90 it/sec) -training >> step=944300, episode=158 reward=0.7537484 (536.76 it/sec) -training >> step=944400, episode=158 reward=0.7609637 (541.32 it/sec) -training >> step=944500, episode=158 reward=0.7737655 (481.01 it/sec) -training >> step=944600, episode=158 reward=0.7835863 (536.48 it/sec) -training >> step=944700, episode=158 reward=0.7847991 (501.72 it/sec) -training >> step=944800, episode=158 reward=0.7547212 (543.17 it/sec) -training >> step=944900, episode=158 reward=0.7292596 (523.93 it/sec) -training >> step=945000, episode=158 reward=0.7841007 (506.44 it/sec) -training >> step=945100, episode=158 reward=0.7694868 (344.20 it/sec) -training >> step=945200, episode=158 reward=0.747988 (535.88 it/sec) -training >> step=945300, episode=158 reward=0.748236 (502.70 it/sec) -training >> step=945400, episode=158 reward=0.7376346 (539.51 it/sec) -training >> step=945500, episode=158 reward=0.7671076 (494.56 it/sec) -training >> step=945600, episode=158 reward=0.7597247 (454.43 it/sec) -training >> step=945700, episode=158 reward=0.7643712 (460.12 it/sec) -training >> step=945800, episode=158 reward=0.7439555 (524.60 it/sec) -training >> step=945900, episode=158 reward=0.7309508 (527.18 it/sec) -training >> step=946000, episode=158 reward=0.7581037 (521.18 it/sec) -training >> step=946100, episode=158 reward=0.7765059 (474.05 it/sec) -training >> step=946200, episode=158 reward=0.7597687 (485.67 it/sec) -training >> step=946300, episode=158 reward=0.7320462 (514.06 it/sec) -training >> step=946400, episode=158 reward=0.7313662 (526.17 it/sec) -training >> step=946500, episode=158 reward=0.7514927 (567.93 it/sec) -training >> step=946600, episode=158 reward=0.7503697 (482.68 it/sec) -training >> step=946700, episode=158 reward=0.7445806 (489.38 it/sec) -training >> step=946800, episode=158 reward=0.740257 (493.10 it/sec) -training >> step=946900, episode=158 reward=0.761981 (515.61 it/sec) -training >> step=947000, episode=158 reward=0.7545722 (533.26 it/sec) -training >> step=947100, episode=158 reward=0.7706626 (467.17 it/sec) -training >> step=947200, episode=158 reward=0.7407911 (470.43 it/sec) -training >> step=947300, episode=159 reward=0.752871 (264.84 it/sec) -training >> step=947400, episode=159 reward=0.7614239 (490.35 it/sec) -training >> step=947500, episode=159 reward=0.7451059 (486.99 it/sec) -training >> step=947600, episode=159 reward=0.7733781 (468.35 it/sec) -training >> step=947700, episode=159 reward=0.7736925 (527.87 it/sec) -training >> step=947800, episode=159 reward=0.7628354 (513.82 it/sec) -training >> step=947900, episode=159 reward=0.7502405 (510.18 it/sec) -training >> step=948000, episode=159 reward=0.7617255 (525.53 it/sec) -training >> step=948100, episode=159 reward=0.7625865 (464.95 it/sec) -training >> step=948200, episode=159 reward=0.7408007 (520.80 it/sec) -training >> step=948300, episode=159 reward=0.772095 (510.95 it/sec) -training >> step=948400, episode=159 reward=0.7585902 (559.01 it/sec) -training >> step=948500, episode=159 reward=0.74426 (430.36 it/sec) -training >> step=948600, episode=159 reward=0.7690215 (492.12 it/sec) -training >> step=948700, episode=159 reward=0.7493309 (470.75 it/sec) -training >> step=948800, episode=159 reward=0.7572082 (530.47 it/sec) -training >> step=948900, episode=159 reward=0.7623696 (532.43 it/sec) -training >> step=949000, episode=159 reward=0.7760379 (502.71 it/sec) -training >> step=949100, episode=159 reward=0.7680365 (510.86 it/sec) -training >> step=949200, episode=159 reward=0.7423868 (514.03 it/sec) -training >> step=949300, episode=159 reward=0.7579224 (493.00 it/sec) -training >> step=949400, episode=159 reward=0.7529088 (527.16 it/sec) -training >> step=949500, episode=159 reward=0.7593324 (524.47 it/sec) -training >> step=949600, episode=159 reward=0.7659449 (512.05 it/sec) -training >> step=949700, episode=159 reward=0.7485375 (499.33 it/sec) -training >> step=949800, episode=159 reward=0.7711858 (545.21 it/sec) -training >> step=949900, episode=159 reward=0.7607433 (478.73 it/sec) -training >> step=950000, episode=159 reward=0.7495649 (527.72 it/sec) -training >> step=950100, episode=159 reward=0.7735291 (464.18 it/sec) -training >> step=950200, episode=159 reward=0.773148 (532.47 it/sec) -training >> step=950300, episode=159 reward=0.7639008 (509.25 it/sec) -training >> step=950400, episode=159 reward=0.7759238 (506.07 it/sec) -training >> step=950500, episode=159 reward=0.7745924 (501.82 it/sec) -training >> step=950600, episode=159 reward=0.7702038 (478.85 it/sec) -training >> step=950700, episode=159 reward=0.7716548 (494.89 it/sec) -training >> step=950800, episode=159 reward=0.7600403 (523.41 it/sec) -training >> step=950900, episode=159 reward=0.7507729 (545.96 it/sec) -training >> step=951000, episode=159 reward=0.768539 (535.70 it/sec) -training >> step=951100, episode=159 reward=0.7392324 (466.91 it/sec) -training >> step=951200, episode=159 reward=0.7528517 (530.75 it/sec) -training >> step=951300, episode=159 reward=0.7636731 (504.88 it/sec) -training >> step=951400, episode=159 reward=0.7578838 (448.89 it/sec) -training >> step=951500, episode=159 reward=0.7630531 (509.40 it/sec) -training >> step=951600, episode=159 reward=0.7529912 (510.47 it/sec) -training >> step=951700, episode=159 reward=0.757359 (487.19 it/sec) -training >> step=951800, episode=159 reward=0.7614752 (521.31 it/sec) -training >> step=951900, episode=159 reward=0.769734 (479.14 it/sec) -training >> step=952000, episode=159 reward=0.7750819 (508.18 it/sec) -training >> step=952100, episode=159 reward=0.7750383 (470.67 it/sec) -training >> step=952200, episode=159 reward=0.7400995 (455.93 it/sec) -training >> step=952300, episode=159 reward=0.7539499 (473.34 it/sec) -training >> step=952400, episode=159 reward=0.7554938 (498.23 it/sec) -training >> step=952500, episode=159 reward=0.7433774 (472.64 it/sec) -training >> step=952600, episode=159 reward=0.7583746 (406.26 it/sec) -training >> step=952700, episode=159 reward=0.7310262 (436.76 it/sec) -training >> step=952800, episode=159 reward=0.7483338 (496.89 it/sec) -training >> step=952900, episode=159 reward=0.7523724 (476.78 it/sec) -training >> step=953000, episode=159 reward=0.7550061 (464.17 it/sec) -training >> step=953100, episode=159 reward=0.7409735 (426.05 it/sec) -training >> step=953200, episode=159 reward=0.7572432 (436.92 it/sec) -training >> step=953300, episode=160 reward=0.7642573 (239.64 it/sec) -training >> step=953400, episode=160 reward=0.7789605 (435.39 it/sec) -training >> step=953500, episode=160 reward=0.7164876 (443.93 it/sec) -training >> step=953600, episode=160 reward=0.7621722 (464.01 it/sec) -training >> step=953700, episode=160 reward=0.7387401 (478.29 it/sec) -training >> step=953800, episode=160 reward=0.763642 (475.35 it/sec) -training >> step=953900, episode=160 reward=0.7733413 (460.58 it/sec) -training >> step=954000, episode=160 reward=0.7488703 (447.88 it/sec) -training >> step=954100, episode=160 reward=0.7760254 (422.63 it/sec) -training >> step=954200, episode=160 reward=0.766632 (384.25 it/sec) -training >> step=954300, episode=160 reward=0.784955 (469.78 it/sec) -training >> step=954400, episode=160 reward=0.7598394 (491.33 it/sec) -training >> step=954500, episode=160 reward=0.7801965 (440.83 it/sec) -training >> step=954600, episode=160 reward=0.7757176 (504.12 it/sec) -training >> step=954700, episode=160 reward=0.7621711 (466.38 it/sec) -training >> step=954800, episode=160 reward=0.7721083 (460.87 it/sec) -training >> step=954900, episode=160 reward=0.7637609 (490.55 it/sec) -training >> step=955000, episode=160 reward=0.7615747 (495.83 it/sec) -training >> step=955100, episode=160 reward=0.7635576 (500.76 it/sec) -training >> step=955200, episode=160 reward=0.7454921 (448.08 it/sec) -training >> step=955300, episode=160 reward=0.7594625 (431.07 it/sec) -training >> step=955400, episode=160 reward=0.7686055 (473.98 it/sec) -training >> step=955500, episode=160 reward=0.7537276 (496.33 it/sec) -training >> step=955600, episode=160 reward=0.76807 (501.12 it/sec) -training >> step=955700, episode=160 reward=0.7438257 (451.35 it/sec) -training >> step=955800, episode=160 reward=0.7402382 (486.08 it/sec) -training >> step=955900, episode=160 reward=0.77033 (441.72 it/sec) -training >> step=956000, episode=160 reward=0.763602 (475.95 it/sec) -training >> step=956100, episode=160 reward=0.7751068 (536.21 it/sec) -training >> step=956200, episode=160 reward=0.7616673 (475.76 it/sec) -training >> step=956300, episode=160 reward=0.7646039 (501.63 it/sec) -training >> step=956400, episode=160 reward=0.7570607 (438.53 it/sec) -training >> step=956500, episode=160 reward=0.753636 (419.13 it/sec) -training >> step=956600, episode=160 reward=0.7472165 (470.65 it/sec) -training >> step=956700, episode=160 reward=0.7907695 (394.12 it/sec) -training >> step=956800, episode=160 reward=0.75841 (459.44 it/sec) -training >> step=956900, episode=160 reward=0.7425492 (396.06 it/sec) -training >> step=957000, episode=160 reward=0.7707324 (428.65 it/sec) -training >> step=957100, episode=160 reward=0.775061 (412.52 it/sec) -training >> step=957200, episode=160 reward=0.7602035 (475.94 it/sec) -training >> step=957300, episode=160 reward=0.7531593 (440.60 it/sec) -training >> step=957400, episode=160 reward=0.7428969 (413.46 it/sec) -training >> step=957500, episode=160 reward=0.7610837 (436.74 it/sec) -training >> step=957600, episode=160 reward=0.7547872 (479.75 it/sec) -training >> step=957700, episode=160 reward=0.7459154 (451.67 it/sec) -training >> step=957800, episode=160 reward=0.7531624 (306.46 it/sec) -training >> step=957900, episode=160 reward=0.7809073 (434.28 it/sec) -training >> step=958000, episode=160 reward=0.748863 (431.57 it/sec) -training >> step=958100, episode=160 reward=0.7455687 (455.92 it/sec) -training >> step=958200, episode=160 reward=0.7462934 (444.26 it/sec) -training >> step=958300, episode=160 reward=0.7687581 (437.64 it/sec) -training >> step=958400, episode=160 reward=0.7655017 (418.56 it/sec) -training >> step=958500, episode=160 reward=0.764382 (432.18 it/sec) -training >> step=958600, episode=160 reward=0.7446333 (458.25 it/sec) -training >> step=958700, episode=160 reward=0.7614313 (393.96 it/sec) -training >> step=958800, episode=160 reward=0.7618697 (434.49 it/sec) -training >> step=958900, episode=160 reward=0.7391101 (399.55 it/sec) -training >> step=959000, episode=160 reward=0.7540261 (453.22 it/sec) -training >> step=959100, episode=160 reward=0.7574846 (429.54 it/sec) -training >> step=959200, episode=160 reward=0.7504771 (437.07 it/sec) -training >> step=959300, episode=161 reward=0.7690482 (176.41 it/sec) -training >> step=959400, episode=161 reward=0.7859175 (430.08 it/sec) -training >> step=959500, episode=161 reward=0.7821094 (383.71 it/sec) -training >> step=959600, episode=161 reward=0.7495441 (437.48 it/sec) -training >> step=959700, episode=161 reward=0.7587833 (352.52 it/sec) -training >> step=959800, episode=161 reward=0.7653217 (389.21 it/sec) -training >> step=959900, episode=161 reward=0.7645529 (380.10 it/sec) -training >> step=960000, episode=161 reward=0.76015 (417.36 it/sec) -training >> step=960100, episode=161 reward=0.7487058 (470.90 it/sec) -training >> step=960200, episode=161 reward=0.7623004 (446.23 it/sec) -training >> step=960300, episode=161 reward=0.7611296 (348.15 it/sec) -training >> step=960400, episode=161 reward=0.7447003 (464.73 it/sec) -training >> step=960500, episode=161 reward=0.7626373 (538.28 it/sec) -training >> step=960600, episode=161 reward=0.7593982 (484.43 it/sec) -training >> step=960700, episode=161 reward=0.7776307 (432.60 it/sec) -training >> step=960800, episode=161 reward=0.7483495 (494.58 it/sec) -training >> step=960900, episode=161 reward=0.7691339 (479.26 it/sec) -training >> step=961000, episode=161 reward=0.7649964 (502.47 it/sec) -training >> step=961100, episode=161 reward=0.7660457 (456.52 it/sec) -training >> step=961200, episode=161 reward=0.7482448 (421.01 it/sec) -training >> step=961300, episode=161 reward=0.7692682 (472.19 it/sec) -training >> step=961400, episode=161 reward=0.7642135 (452.65 it/sec) -training >> step=961500, episode=161 reward=0.771099 (489.20 it/sec) -training >> step=961600, episode=161 reward=0.7750031 (492.35 it/sec) -training >> step=961700, episode=161 reward=0.7522862 (410.72 it/sec) -training >> step=961800, episode=161 reward=0.7589182 (470.11 it/sec) -training >> step=961900, episode=161 reward=0.7710062 (491.67 it/sec) -training >> step=962000, episode=161 reward=0.7789853 (497.64 it/sec) -training >> step=962100, episode=161 reward=0.7765925 (408.12 it/sec) -training >> step=962200, episode=161 reward=0.7615921 (411.02 it/sec) -training >> step=962300, episode=161 reward=0.7300919 (432.82 it/sec) -training >> step=962400, episode=161 reward=0.7659213 (467.08 it/sec) -training >> step=962500, episode=161 reward=0.7541331 (491.65 it/sec) -training >> step=962600, episode=161 reward=0.7537654 (504.25 it/sec) -training >> step=962700, episode=161 reward=0.749156 (509.39 it/sec) -training >> step=962800, episode=161 reward=0.7572941 (507.81 it/sec) -training >> step=962900, episode=161 reward=0.7624309 (516.15 it/sec) -training >> step=963000, episode=161 reward=0.7583708 (468.90 it/sec) -training >> step=963100, episode=161 reward=0.7555619 (465.57 it/sec) -training >> step=963200, episode=161 reward=0.7491184 (494.65 it/sec) -training >> step=963300, episode=161 reward=0.7764741 (518.92 it/sec) -training >> step=963400, episode=161 reward=0.7687271 (504.68 it/sec) -training >> step=963500, episode=161 reward=0.7617719 (532.64 it/sec) -training >> step=963600, episode=161 reward=0.7494256 (473.90 it/sec) -training >> step=963700, episode=161 reward=0.7896737 (454.85 it/sec) -training >> step=963800, episode=161 reward=0.7368734 (529.55 it/sec) -training >> step=963900, episode=161 reward=0.7628714 (477.30 it/sec) -training >> step=964000, episode=161 reward=0.7603964 (418.61 it/sec) -training >> step=964100, episode=161 reward=0.7358438 (275.95 it/sec) -training >> step=964200, episode=161 reward=0.7511305 (426.53 it/sec) -training >> step=964300, episode=161 reward=0.7683513 (438.57 it/sec) -training >> step=964400, episode=161 reward=0.7378939 (384.09 it/sec) -training >> step=964500, episode=161 reward=0.7606478 (421.08 it/sec) -training >> step=964600, episode=161 reward=0.7657987 (446.73 it/sec) -training >> step=964700, episode=161 reward=0.7602422 (449.71 it/sec) -training >> step=964800, episode=161 reward=0.765713 (464.76 it/sec) -training >> step=964900, episode=161 reward=0.7399157 (448.61 it/sec) -training >> step=965000, episode=161 reward=0.7336487 (460.81 it/sec) -training >> step=965100, episode=161 reward=0.7428638 (481.98 it/sec) -training >> step=965200, episode=161 reward=0.7574807 (533.74 it/sec) -training >> step=965300, episode=162 reward=0.7573455 (151.79 it/sec) -training >> step=965400, episode=162 reward=0.7593838 (523.05 it/sec) -training >> step=965500, episode=162 reward=0.7746617 (448.29 it/sec) -training >> step=965600, episode=162 reward=0.7567757 (388.26 it/sec) -training >> step=965700, episode=162 reward=0.7751925 (418.96 it/sec) -training >> step=965800, episode=162 reward=0.7633166 (466.98 it/sec) -training >> step=965900, episode=162 reward=0.7595034 (496.88 it/sec) -training >> step=966000, episode=162 reward=0.7532723 (485.08 it/sec) -training >> step=966100, episode=162 reward=0.7531091 (469.17 it/sec) -training >> step=966200, episode=162 reward=0.774713 (471.16 it/sec) -training >> step=966300, episode=162 reward=0.7444025 (517.29 it/sec) -training >> step=966400, episode=162 reward=0.7574444 (517.73 it/sec) -training >> step=966500, episode=162 reward=0.7495835 (535.60 it/sec) -training >> step=966600, episode=162 reward=0.7628408 (503.63 it/sec) -training >> step=966700, episode=162 reward=0.7673796 (512.12 it/sec) -training >> step=966800, episode=162 reward=0.7678738 (506.56 it/sec) -training >> step=966900, episode=162 reward=0.7742353 (550.71 it/sec) -training >> step=967000, episode=162 reward=0.7505236 (548.33 it/sec) -training >> step=967100, episode=162 reward=0.7366365 (517.58 it/sec) -training >> step=967200, episode=162 reward=0.7396618 (505.70 it/sec) -training >> step=967300, episode=162 reward=0.7512884 (538.38 it/sec) -training >> step=967400, episode=162 reward=0.7512128 (544.45 it/sec) -training >> step=967500, episode=162 reward=0.7577381 (528.37 it/sec) -training >> step=967600, episode=162 reward=0.7530167 (504.86 it/sec) -training >> step=967700, episode=162 reward=0.7541183 (529.33 it/sec) -training >> step=967800, episode=162 reward=0.7658347 (516.20 it/sec) -training >> step=967900, episode=162 reward=0.7704424 (536.61 it/sec) -training >> step=968000, episode=162 reward=0.7761737 (554.39 it/sec) -training >> step=968100, episode=162 reward=0.7742487 (564.94 it/sec) -training >> step=968200, episode=162 reward=0.7650094 (483.33 it/sec) -training >> step=968300, episode=162 reward=0.7535166 (505.59 it/sec) -training >> step=968400, episode=162 reward=0.7844319 (526.88 it/sec) -training >> step=968500, episode=162 reward=0.7733198 (534.71 it/sec) -training >> step=968600, episode=162 reward=0.7452923 (527.01 it/sec) -training >> step=968700, episode=162 reward=0.7716721 (510.61 it/sec) -training >> step=968800, episode=162 reward=0.7500152 (478.94 it/sec) -training >> step=968900, episode=162 reward=0.7675629 (530.28 it/sec) -training >> step=969000, episode=162 reward=0.7642819 (525.31 it/sec) -training >> step=969100, episode=162 reward=0.7750148 (565.17 it/sec) -training >> step=969200, episode=162 reward=0.7663165 (540.63 it/sec) -training >> step=969300, episode=162 reward=0.7431071 (530.48 it/sec) -training >> step=969400, episode=162 reward=0.7651147 (508.75 it/sec) -training >> step=969500, episode=162 reward=0.7589305 (572.87 it/sec) -training >> step=969600, episode=162 reward=0.7595716 (551.68 it/sec) -training >> step=969700, episode=162 reward=0.7541601 (535.73 it/sec) -training >> step=969800, episode=162 reward=0.7369512 (517.02 it/sec) -training >> step=969900, episode=162 reward=0.7567656 (527.35 it/sec) -training >> step=970000, episode=162 reward=0.7425674 (524.18 it/sec) -training >> step=970100, episode=162 reward=0.7538252 (546.62 it/sec) -training >> step=970200, episode=162 reward=0.7603202 (376.49 it/sec) -training >> step=970300, episode=162 reward=0.7566011 (515.08 it/sec) -training >> step=970400, episode=162 reward=0.7551185 (489.60 it/sec) -training >> step=970500, episode=162 reward=0.7535149 (521.65 it/sec) -training >> step=970600, episode=162 reward=0.7416604 (501.68 it/sec) -training >> step=970700, episode=162 reward=0.7466102 (525.51 it/sec) -training >> step=970800, episode=162 reward=0.7649221 (476.89 it/sec) -training >> step=970900, episode=162 reward=0.7202225 (501.19 it/sec) -training >> step=971000, episode=162 reward=0.7652572 (514.59 it/sec) -training >> step=971100, episode=162 reward=0.7457453 (503.33 it/sec) -training >> step=971200, episode=162 reward=0.73357 (439.95 it/sec) -training >> step=971300, episode=163 reward=0.7502794 (209.74 it/sec) -training >> step=971400, episode=163 reward=0.7645338 (516.90 it/sec) -training >> step=971500, episode=163 reward=0.7347142 (500.81 it/sec) -training >> step=971600, episode=163 reward=0.7773648 (494.52 it/sec) -training >> step=971700, episode=163 reward=0.7560651 (494.68 it/sec) -training >> step=971800, episode=163 reward=0.7719225 (432.27 it/sec) -training >> step=971900, episode=163 reward=0.7549462 (469.74 it/sec) -training >> step=972000, episode=163 reward=0.7440281 (453.83 it/sec) -training >> step=972100, episode=163 reward=0.7608648 (451.45 it/sec) -training >> step=972200, episode=163 reward=0.7785356 (468.88 it/sec) -training >> step=972300, episode=163 reward=0.7630096 (486.89 it/sec) -training >> step=972400, episode=163 reward=0.7641442 (448.86 it/sec) -training >> step=972500, episode=163 reward=0.751899 (406.01 it/sec) -training >> step=972600, episode=163 reward=0.7528921 (488.69 it/sec) -training >> step=972700, episode=163 reward=0.7837867 (366.38 it/sec) -training >> step=972800, episode=163 reward=0.7603721 (389.21 it/sec) -training >> step=972900, episode=163 reward=0.7538965 (417.47 it/sec) -training >> step=973000, episode=163 reward=0.771027 (370.08 it/sec) -training >> step=973100, episode=163 reward=0.7664289 (399.69 it/sec) -training >> step=973200, episode=163 reward=0.7716252 (418.29 it/sec) -training >> step=973300, episode=163 reward=0.7366772 (405.66 it/sec) -training >> step=973400, episode=163 reward=0.774086 (431.09 it/sec) -training >> step=973500, episode=163 reward=0.7793991 (431.35 it/sec) -training >> step=973600, episode=163 reward=0.7273642 (453.96 it/sec) -training >> step=973700, episode=163 reward=0.7584705 (465.56 it/sec) -training >> step=973800, episode=163 reward=0.7560358 (466.42 it/sec) -training >> step=973900, episode=163 reward=0.7520292 (439.52 it/sec) -training >> step=974000, episode=163 reward=0.745627 (453.98 it/sec) -training >> step=974100, episode=163 reward=0.7625453 (451.22 it/sec) -training >> step=974200, episode=163 reward=0.7557202 (464.73 it/sec) -training >> step=974300, episode=163 reward=0.7634872 (399.40 it/sec) -training >> step=974400, episode=163 reward=0.7624108 (375.84 it/sec) -training >> step=974500, episode=163 reward=0.7684612 (469.76 it/sec) -training >> step=974600, episode=163 reward=0.772666 (471.27 it/sec) -training >> step=974700, episode=163 reward=0.7809995 (465.64 it/sec) -training >> step=974800, episode=163 reward=0.7505773 (507.00 it/sec) -training >> step=974900, episode=163 reward=0.7428216 (519.58 it/sec) -training >> step=975000, episode=163 reward=0.7644064 (519.07 it/sec) -training >> step=975100, episode=163 reward=0.7662119 (481.91 it/sec) -training >> step=975200, episode=163 reward=0.7581298 (517.30 it/sec) -training >> step=975300, episode=163 reward=0.7700189 (538.28 it/sec) -training >> step=975400, episode=163 reward=0.7495502 (522.39 it/sec) -training >> step=975500, episode=163 reward=0.7737081 (503.37 it/sec) -training >> step=975600, episode=163 reward=0.7534636 (466.53 it/sec) -training >> step=975700, episode=163 reward=0.761124 (456.79 it/sec) -training >> step=975800, episode=163 reward=0.7648558 (462.39 it/sec) -training >> step=975900, episode=163 reward=0.7549901 (480.57 it/sec) -training >> step=976000, episode=163 reward=0.7671189 (503.85 it/sec) -training >> step=976100, episode=163 reward=0.7534677 (525.48 it/sec) -training >> step=976200, episode=163 reward=0.7586909 (490.58 it/sec) -training >> step=976300, episode=163 reward=0.7392857 (497.92 it/sec) -training >> step=976400, episode=163 reward=0.7559419 (540.17 it/sec) -training >> step=976500, episode=163 reward=0.7653565 (357.66 it/sec) -training >> step=976600, episode=163 reward=0.7614787 (510.87 it/sec) -training >> step=976700, episode=163 reward=0.7609821 (495.04 it/sec) -training >> step=976800, episode=163 reward=0.767486 (517.07 it/sec) -training >> step=976900, episode=163 reward=0.7362552 (510.83 it/sec) -training >> step=977000, episode=163 reward=0.7506444 (486.35 it/sec) -training >> step=977100, episode=163 reward=0.741734 (457.17 it/sec) -training >> step=977200, episode=163 reward=0.7480415 (440.81 it/sec) -training >> step=977300, episode=164 reward=0.7500305 (100.13 it/sec) -training >> step=977400, episode=164 reward=0.752488 (485.47 it/sec) -training >> step=977500, episode=164 reward=0.7584196 (435.94 it/sec) -training >> step=977600, episode=164 reward=0.7541803 (404.38 it/sec) -training >> step=977700, episode=164 reward=0.7677922 (413.88 it/sec) -training >> step=977800, episode=164 reward=0.7469822 (382.92 it/sec) -training >> step=977900, episode=164 reward=0.7728693 (422.41 it/sec) -training >> step=978000, episode=164 reward=0.7640108 (424.30 it/sec) -training >> step=978100, episode=164 reward=0.7514973 (405.87 it/sec) -training >> step=978200, episode=164 reward=0.7619685 (414.09 it/sec) -training >> step=978300, episode=164 reward=0.7647498 (475.52 it/sec) -training >> step=978400, episode=164 reward=0.7688068 (429.91 it/sec) -training >> step=978500, episode=164 reward=0.7449139 (391.59 it/sec) -training >> step=978600, episode=164 reward=0.7734716 (407.06 it/sec) -training >> step=978700, episode=164 reward=0.7730236 (475.65 it/sec) -training >> step=978800, episode=164 reward=0.7431445 (523.83 it/sec) -training >> step=978900, episode=164 reward=0.7338244 (477.96 it/sec) -training >> step=979000, episode=164 reward=0.7676357 (504.97 it/sec) -training >> step=979100, episode=164 reward=0.7396964 (521.76 it/sec) -training >> step=979200, episode=164 reward=0.7538497 (530.89 it/sec) -training >> step=979300, episode=164 reward=0.7724249 (534.52 it/sec) -training >> step=979400, episode=164 reward=0.7665392 (506.76 it/sec) -training >> step=979500, episode=164 reward=0.7680143 (520.70 it/sec) -training >> step=979600, episode=164 reward=0.7824588 (517.87 it/sec) -training >> step=979700, episode=164 reward=0.7605943 (521.55 it/sec) -training >> step=979800, episode=164 reward=0.7491278 (482.57 it/sec) -training >> step=979900, episode=164 reward=0.7577852 (488.28 it/sec) -training >> step=980000, episode=164 reward=0.7521675 (478.03 it/sec) -training >> step=980100, episode=164 reward=0.7572183 (494.13 it/sec) -training >> step=980200, episode=164 reward=0.7633048 (489.14 it/sec) -training >> step=980300, episode=164 reward=0.7715808 (529.80 it/sec) -training >> step=980400, episode=164 reward=0.7848329 (518.00 it/sec) -training >> step=980500, episode=164 reward=0.7731835 (515.13 it/sec) -training >> step=980600, episode=164 reward=0.7495085 (488.32 it/sec) -training >> step=980700, episode=164 reward=0.7703554 (535.03 it/sec) -training >> step=980800, episode=164 reward=0.7563926 (535.10 it/sec) -training >> step=980900, episode=164 reward=0.7606456 (553.11 it/sec) -training >> step=981000, episode=164 reward=0.778566 (514.22 it/sec) -training >> step=981100, episode=164 reward=0.7801932 (536.00 it/sec) -training >> step=981200, episode=164 reward=0.7573791 (503.10 it/sec) -training >> step=981300, episode=164 reward=0.7659878 (516.98 it/sec) -training >> step=981400, episode=164 reward=0.7472374 (512.09 it/sec) -training >> step=981500, episode=164 reward=0.7630283 (506.67 it/sec) -training >> step=981600, episode=164 reward=0.7418606 (520.09 it/sec) -training >> step=981700, episode=164 reward=0.7828436 (518.17 it/sec) -training >> step=981800, episode=164 reward=0.7714689 (479.05 it/sec) -training >> step=981900, episode=164 reward=0.7709213 (516.17 it/sec) -training >> step=982000, episode=164 reward=0.7604009 (512.87 it/sec) -training >> step=982100, episode=164 reward=0.7477461 (519.94 it/sec) -training >> step=982200, episode=164 reward=0.7561793 (508.48 it/sec) -training >> step=982300, episode=164 reward=0.7405445 (443.46 it/sec) -training >> step=982400, episode=164 reward=0.7214912 (523.73 it/sec) -training >> step=982500, episode=164 reward=0.7547216 (508.57 it/sec) -training >> step=982600, episode=164 reward=0.7407329 (484.96 it/sec) -training >> step=982700, episode=164 reward=0.7255716 (487.35 it/sec) -training >> step=982800, episode=164 reward=0.7590709 (429.20 it/sec) -training >> step=982900, episode=164 reward=0.7333192 (519.67 it/sec) -training >> step=983000, episode=164 reward=0.7647232 (472.29 it/sec) -training >> step=983100, episode=164 reward=0.7543907 (468.28 it/sec) -training >> step=983200, episode=164 reward=0.7442194 (466.77 it/sec) -training >> step=983300, episode=165 reward=0.7633886 (139.99 it/sec) -training >> step=983400, episode=165 reward=0.7420021 (484.31 it/sec) -training >> step=983500, episode=165 reward=0.7727733 (489.15 it/sec) -training >> step=983600, episode=165 reward=0.7514015 (522.00 it/sec) -training >> step=983700, episode=165 reward=0.7737296 (496.26 it/sec) -training >> step=983800, episode=165 reward=0.7605968 (486.11 it/sec) -training >> step=983900, episode=165 reward=0.7595314 (520.20 it/sec) -training >> step=984000, episode=165 reward=0.75981 (494.42 it/sec) -training >> step=984100, episode=165 reward=0.775043 (534.72 it/sec) -training >> step=984200, episode=165 reward=0.7663854 (513.75 it/sec) -training >> step=984300, episode=165 reward=0.7684137 (471.48 it/sec) -training >> step=984400, episode=165 reward=0.7727939 (524.45 it/sec) -training >> step=984500, episode=165 reward=0.7654756 (569.29 it/sec) -training >> step=984600, episode=165 reward=0.7746075 (526.73 it/sec) -training >> step=984700, episode=165 reward=0.7679756 (532.08 it/sec) -training >> step=984800, episode=165 reward=0.7566726 (538.47 it/sec) -training >> step=984900, episode=165 reward=0.7654938 (486.72 it/sec) -training >> step=985000, episode=165 reward=0.7508473 (481.82 it/sec) -training >> step=985100, episode=165 reward=0.7775175 (498.23 it/sec) -training >> step=985200, episode=165 reward=0.7629197 (508.01 it/sec) -training >> step=985300, episode=165 reward=0.7558993 (489.19 it/sec) -training >> step=985400, episode=165 reward=0.7630576 (452.78 it/sec) -training >> step=985500, episode=165 reward=0.7605175 (520.87 it/sec) -training >> step=985600, episode=165 reward=0.7788441 (516.04 it/sec) -training >> step=985700, episode=165 reward=0.7787221 (533.99 it/sec) -training >> step=985800, episode=165 reward=0.7638252 (537.16 it/sec) -training >> step=985900, episode=165 reward=0.7664369 (403.74 it/sec) -training >> step=986000, episode=165 reward=0.7579507 (467.68 it/sec) -training >> step=986100, episode=165 reward=0.757121 (445.19 it/sec) -training >> step=986200, episode=165 reward=0.7661576 (482.55 it/sec) -training >> step=986300, episode=165 reward=0.7430286 (510.98 it/sec) -training >> step=986400, episode=165 reward=0.7599492 (412.38 it/sec) -training >> step=986500, episode=165 reward=0.7663722 (411.48 it/sec) -training >> step=986600, episode=165 reward=0.7652178 (479.17 it/sec) -training >> step=986700, episode=165 reward=0.752676 (479.41 it/sec) -training >> step=986800, episode=165 reward=0.7638714 (470.71 it/sec) -training >> step=986900, episode=165 reward=0.7743033 (468.42 it/sec) -training >> step=987000, episode=165 reward=0.7730085 (483.18 it/sec) -training >> step=987100, episode=165 reward=0.7637481 (488.39 it/sec) -training >> step=987200, episode=165 reward=0.7324188 (463.55 it/sec) -training >> step=987300, episode=165 reward=0.7565433 (424.94 it/sec) -training >> step=987400, episode=165 reward=0.7485609 (483.31 it/sec) -training >> step=987500, episode=165 reward=0.7528089 (441.90 it/sec) -training >> step=987600, episode=165 reward=0.7415062 (405.40 it/sec) -training >> step=987700, episode=165 reward=0.7105058 (453.30 it/sec) -training >> step=987800, episode=165 reward=0.7592831 (465.31 it/sec) -training >> step=987900, episode=165 reward=0.7572919 (464.29 it/sec) -training >> step=988000, episode=165 reward=0.7545233 (447.75 it/sec) -training >> step=988100, episode=165 reward=0.757096 (468.43 it/sec) -training >> step=988200, episode=165 reward=0.7701947 (487.12 it/sec) -training >> step=988300, episode=165 reward=0.7406362 (428.33 it/sec) -training >> step=988400, episode=165 reward=0.7841905 (489.24 it/sec) -training >> step=988500, episode=165 reward=0.7409176 (475.67 it/sec) -training >> step=988600, episode=165 reward=0.7206268 (457.91 it/sec) -training >> step=988700, episode=165 reward=0.7437986 (444.82 it/sec) -training >> step=988800, episode=165 reward=0.736224 (446.21 it/sec) -training >> step=988900, episode=165 reward=0.7610069 (308.74 it/sec) -training >> step=989000, episode=165 reward=0.7506924 (460.12 it/sec) -training >> step=989100, episode=165 reward=0.7148346 (440.60 it/sec) -training >> step=989200, episode=165 reward=0.7395788 (458.03 it/sec) -training >> step=989300, episode=166 reward=0.7698 (209.16 it/sec) -training >> step=989400, episode=166 reward=0.778382 (542.90 it/sec) -training >> step=989500, episode=166 reward=0.7549462 (488.29 it/sec) -training >> step=989600, episode=166 reward=0.7260784 (504.49 it/sec) -training >> step=989700, episode=166 reward=0.7524544 (511.77 it/sec) -training >> step=989800, episode=166 reward=0.744688 (528.07 it/sec) -training >> step=989900, episode=166 reward=0.7622681 (522.17 it/sec) -training >> step=990000, episode=166 reward=0.7713944 (447.66 it/sec) -training >> step=990100, episode=166 reward=0.7675329 (530.40 it/sec) -training >> step=990200, episode=166 reward=0.7657602 (543.78 it/sec) -training >> step=990300, episode=166 reward=0.7584351 (480.76 it/sec) -training >> step=990400, episode=166 reward=0.7776651 (504.81 it/sec) -training >> step=990500, episode=166 reward=0.7546403 (524.01 it/sec) -training >> step=990600, episode=166 reward=0.7503349 (490.05 it/sec) -training >> step=990700, episode=166 reward=0.760893 (470.86 it/sec) -training >> step=990800, episode=166 reward=0.7666836 (513.85 it/sec) -training >> step=990900, episode=166 reward=0.771427 (513.29 it/sec) -training >> step=991000, episode=166 reward=0.7837663 (501.01 it/sec) -training >> step=991100, episode=166 reward=0.7479745 (496.95 it/sec) -training >> step=991200, episode=166 reward=0.7630717 (497.21 it/sec) -training >> step=991300, episode=166 reward=0.7522473 (515.33 it/sec) -training >> step=991400, episode=166 reward=0.7615275 (501.90 it/sec) -training >> step=991500, episode=166 reward=0.7780503 (475.68 it/sec) -training >> step=991600, episode=166 reward=0.7774352 (506.64 it/sec) -training >> step=991700, episode=166 reward=0.7569418 (499.56 it/sec) -training >> step=991800, episode=166 reward=0.7715086 (539.87 it/sec) -training >> step=991900, episode=166 reward=0.7762714 (514.25 it/sec) -training >> step=992000, episode=166 reward=0.764113 (478.82 it/sec) -training >> step=992100, episode=166 reward=0.7810807 (496.52 it/sec) -training >> step=992200, episode=166 reward=0.754364 (511.59 it/sec) -training >> step=992300, episode=166 reward=0.745738 (511.23 it/sec) -training >> step=992400, episode=166 reward=0.7598823 (537.13 it/sec) -training >> step=992500, episode=166 reward=0.7765081 (519.09 it/sec) -training >> step=992600, episode=166 reward=0.7653695 (508.70 it/sec) -training >> step=992700, episode=166 reward=0.7662855 (528.97 it/sec) -training >> step=992800, episode=166 reward=0.761974 (486.82 it/sec) -training >> step=992900, episode=166 reward=0.7433309 (505.90 it/sec) -training >> step=993000, episode=166 reward=0.7660879 (469.81 it/sec) -training >> step=993100, episode=166 reward=0.7439507 (527.76 it/sec) -training >> step=993200, episode=166 reward=0.7817652 (515.32 it/sec) -training >> step=993300, episode=166 reward=0.7620121 (517.49 it/sec) -training >> step=993400, episode=166 reward=0.7483625 (537.22 it/sec) -training >> step=993500, episode=166 reward=0.7561329 (529.00 it/sec) -training >> step=993600, episode=166 reward=0.7799198 (524.97 it/sec) -training >> step=993700, episode=166 reward=0.7806921 (504.94 it/sec) -training >> step=993800, episode=166 reward=0.7532376 (517.17 it/sec) -training >> step=993900, episode=166 reward=0.7784213 (518.80 it/sec) -training >> step=994000, episode=166 reward=0.7521117 (508.50 it/sec) -training >> step=994100, episode=166 reward=0.7305298 (491.43 it/sec) -training >> step=994200, episode=166 reward=0.7503482 (536.16 it/sec) -training >> step=994300, episode=166 reward=0.7575186 (529.84 it/sec) -training >> step=994400, episode=166 reward=0.7768419 (511.22 it/sec) -training >> step=994500, episode=166 reward=0.769419 (530.44 it/sec) -training >> step=994600, episode=166 reward=0.7828884 (446.42 it/sec) -training >> step=994700, episode=166 reward=0.7625461 (495.32 it/sec) -training >> step=994800, episode=166 reward=0.7431185 (517.21 it/sec) -training >> step=994900, episode=166 reward=0.7433081 (523.91 it/sec) -training >> step=995000, episode=166 reward=0.7472655 (526.87 it/sec) -training >> step=995100, episode=166 reward=0.7392461 (509.89 it/sec) -training >> step=995200, episode=166 reward=0.7852089 (329.65 it/sec) -training >> step=995300, episode=167 reward=0.7711326 (208.68 it/sec) -training >> step=995400, episode=167 reward=0.7699837 (487.39 it/sec) -training >> step=995500, episode=167 reward=0.7567809 (499.56 it/sec) -training >> step=995600, episode=167 reward=0.7534393 (491.91 it/sec) -training >> step=995700, episode=167 reward=0.7628289 (510.31 it/sec) -training >> step=995800, episode=167 reward=0.7651047 (529.82 it/sec) -training >> step=995900, episode=167 reward=0.7423428 (479.26 it/sec) -training >> step=996000, episode=167 reward=0.7753025 (472.24 it/sec) -training >> step=996100, episode=167 reward=0.7439782 (487.03 it/sec) -training >> step=996200, episode=167 reward=0.7567469 (491.27 it/sec) -training >> step=996300, episode=167 reward=0.7738736 (527.61 it/sec) -training >> step=996400, episode=167 reward=0.7793989 (487.11 it/sec) -training >> step=996500, episode=167 reward=0.7849736 (538.01 it/sec) -training >> step=996600, episode=167 reward=0.7554414 (486.21 it/sec) -training >> step=996700, episode=167 reward=0.749086 (513.12 it/sec) -training >> step=996800, episode=167 reward=0.7620003 (509.10 it/sec) -training >> step=996900, episode=167 reward=0.7641764 (479.72 it/sec) -training >> step=997000, episode=167 reward=0.7710909 (525.51 it/sec) -training >> step=997100, episode=167 reward=0.788237 (485.24 it/sec) -training >> step=997200, episode=167 reward=0.7474276 (489.23 it/sec) -training >> step=997300, episode=167 reward=0.7699587 (449.60 it/sec) -training >> step=997400, episode=167 reward=0.7575827 (493.31 it/sec) -training >> step=997500, episode=167 reward=0.7715218 (526.92 it/sec) -training >> step=997600, episode=167 reward=0.7310411 (495.94 it/sec) -training >> step=997700, episode=167 reward=0.7759526 (452.37 it/sec) -training >> step=997800, episode=167 reward=0.7860931 (523.37 it/sec) -training >> step=997900, episode=167 reward=0.763292 (531.73 it/sec) -training >> step=998000, episode=167 reward=0.769788 (479.36 it/sec) -training >> step=998100, episode=167 reward=0.7621248 (481.86 it/sec) -training >> step=998200, episode=167 reward=0.7689265 (509.79 it/sec) -training >> step=998300, episode=167 reward=0.7555283 (510.80 it/sec) -training >> step=998400, episode=167 reward=0.753397 (492.36 it/sec) -training >> step=998500, episode=167 reward=0.7821155 (538.17 it/sec) -training >> step=998600, episode=167 reward=0.7645766 (497.13 it/sec) -training >> step=998700, episode=167 reward=0.7737237 (471.21 it/sec) -training >> step=998800, episode=167 reward=0.7479918 (509.42 it/sec) -training >> step=998900, episode=167 reward=0.7646675 (470.99 it/sec) -training >> step=999000, episode=167 reward=0.7813196 (555.99 it/sec) -training >> step=999100, episode=167 reward=0.7396638 (532.49 it/sec) -training >> step=999200, episode=167 reward=0.7771587 (475.32 it/sec) -training >> step=999300, episode=167 reward=0.7569355 (534.10 it/sec) -training >> step=999400, episode=167 reward=0.7690471 (484.67 it/sec) -training >> step=999500, episode=167 reward=0.7466918 (456.29 it/sec) -training >> step=999600, episode=167 reward=0.7538185 (519.76 it/sec) -training >> step=999700, episode=167 reward=0.7384737 (509.06 it/sec) -training >> step=999800, episode=167 reward=0.7558832 (508.14 it/sec) -training >> step=999900, episode=167 reward=0.766223 (502.79 it/sec) -training >> step=1000000, episode=167 reward=0.7592972 (522.38 it/sec) -training >> step=1000100, episode=167 reward=0.7601039 (537.87 it/sec) -training >> step=1000200, episode=167 reward=0.7488747 (523.19 it/sec) -training >> step=1000300, episode=167 reward=0.7728227 (503.94 it/sec) -training >> step=1000400, episode=167 reward=0.7454813 (484.24 it/sec) -training >> step=1000500, episode=167 reward=0.7360783 (528.47 it/sec) -training >> step=1000600, episode=167 reward=0.7443991 (536.33 it/sec) -training >> step=1000700, episode=167 reward=0.7610543 (487.78 it/sec) -training >> step=1000800, episode=167 reward=0.7470703 (478.00 it/sec) -training >> step=1000900, episode=167 reward=0.7259712 (499.39 it/sec) -training >> step=1001000, episode=167 reward=0.7493567 (486.52 it/sec) -training >> step=1001100, episode=167 reward=0.7281985 (346.83 it/sec) -training >> step=1001200, episode=167 reward=0.7260424 (508.24 it/sec) -training >> step=1001300, episode=168 reward=0.7506557 (173.97 it/sec) -training >> step=1001400, episode=168 reward=0.7705163 (472.92 it/sec) -training >> step=1001500, episode=168 reward=0.7750814 (505.24 it/sec) -training >> step=1001600, episode=168 reward=0.7099586 (497.54 it/sec) -training >> step=1001700, episode=168 reward=0.7650039 (523.39 it/sec) -training >> step=1001800, episode=168 reward=0.7506641 (476.01 it/sec) -training >> step=1001900, episode=168 reward=0.7675558 (456.17 it/sec) -training >> step=1002000, episode=168 reward=0.7461146 (476.43 it/sec) -training >> step=1002100, episode=168 reward=0.7586883 (481.48 it/sec) -training >> step=1002200, episode=168 reward=0.7666025 (509.39 it/sec) -training >> step=1002300, episode=168 reward=0.7831666 (533.77 it/sec) -training >> step=1002400, episode=168 reward=0.7701592 (475.47 it/sec) -training >> step=1002500, episode=168 reward=0.7404655 (536.74 it/sec) -training >> step=1002600, episode=168 reward=0.7592945 (493.09 it/sec) -training >> step=1002700, episode=168 reward=0.7709721 (548.33 it/sec) -training >> step=1002800, episode=168 reward=0.7725142 (555.06 it/sec) -training >> step=1002900, episode=168 reward=0.7479294 (485.61 it/sec) -training >> step=1003000, episode=168 reward=0.7618948 (513.81 it/sec) -training >> step=1003100, episode=168 reward=0.7575688 (484.56 it/sec) -training >> step=1003200, episode=168 reward=0.7873228 (493.92 it/sec) -training >> step=1003300, episode=168 reward=0.7600103 (532.00 it/sec) -training >> step=1003400, episode=168 reward=0.780362 (514.90 it/sec) -training >> step=1003500, episode=168 reward=0.7641432 (539.94 it/sec) -training >> step=1003600, episode=168 reward=0.749756 (548.26 it/sec) -training >> step=1003700, episode=168 reward=0.7445487 (488.40 it/sec) -training >> step=1003800, episode=168 reward=0.7566743 (553.95 it/sec) -training >> step=1003900, episode=168 reward=0.7747644 (511.89 it/sec) -training >> step=1004000, episode=168 reward=0.7505084 (550.48 it/sec) -training >> step=1004100, episode=168 reward=0.7756268 (540.27 it/sec) -training >> step=1004200, episode=168 reward=0.7562436 (511.64 it/sec) -training >> step=1004300, episode=168 reward=0.7768399 (520.31 it/sec) -training >> step=1004400, episode=168 reward=0.7613662 (486.18 it/sec) -training >> step=1004500, episode=168 reward=0.7650245 (518.98 it/sec) -training >> step=1004600, episode=168 reward=0.7805216 (521.25 it/sec) -training >> step=1004700, episode=168 reward=0.7659764 (517.25 it/sec) -training >> step=1004800, episode=168 reward=0.7688632 (461.47 it/sec) -training >> step=1004900, episode=168 reward=0.7832182 (547.08 it/sec) -training >> step=1005000, episode=168 reward=0.7791664 (500.10 it/sec) -training >> step=1005100, episode=168 reward=0.7725682 (545.48 it/sec) -training >> step=1005200, episode=168 reward=0.7567057 (534.41 it/sec) -training >> step=1005300, episode=168 reward=0.7612544 (492.83 it/sec) -training >> step=1005400, episode=168 reward=0.7570876 (497.10 it/sec) -training >> step=1005500, episode=168 reward=0.7365249 (522.72 it/sec) -training >> step=1005600, episode=168 reward=0.7744802 (503.95 it/sec) -training >> step=1005700, episode=168 reward=0.779055 (513.17 it/sec) -training >> step=1005800, episode=168 reward=0.7494178 (488.15 it/sec) -training >> step=1005900, episode=168 reward=0.7490151 (497.61 it/sec) -training >> step=1006000, episode=168 reward=0.7497596 (538.38 it/sec) -training >> step=1006100, episode=168 reward=0.7469292 (488.42 it/sec) -training >> step=1006200, episode=168 reward=0.7600615 (492.03 it/sec) -training >> step=1006300, episode=168 reward=0.7424451 (495.76 it/sec) -training >> step=1006400, episode=168 reward=0.7517659 (497.99 it/sec) -training >> step=1006500, episode=168 reward=0.7652911 (509.24 it/sec) -training >> step=1006600, episode=168 reward=0.7668864 (521.57 it/sec) -training >> step=1006700, episode=168 reward=0.7361071 (516.59 it/sec) -training >> step=1006800, episode=168 reward=0.7410805 (467.17 it/sec) -training >> step=1006900, episode=168 reward=0.7346167 (486.43 it/sec) -training >> step=1007000, episode=168 reward=0.7169375 (488.69 it/sec) -training >> step=1007100, episode=168 reward=0.7129858 (534.86 it/sec) -training >> step=1007200, episode=168 reward=0.7502961 (471.43 it/sec) -training >> step=1007300, episode=169 reward=0.744033 (170.74 it/sec) -training >> step=1007400, episode=169 reward=0.7500406 (513.27 it/sec) -training >> step=1007500, episode=169 reward=0.7342276 (495.05 it/sec) -training >> step=1007600, episode=169 reward=0.7512252 (509.78 it/sec) -training >> step=1007700, episode=169 reward=0.7439692 (404.58 it/sec) -training >> step=1007800, episode=169 reward=0.7591865 (447.48 it/sec) -training >> step=1007900, episode=169 reward=0.7492386 (492.76 it/sec) -training >> step=1008000, episode=169 reward=0.7657087 (481.54 it/sec) -training >> step=1008100, episode=169 reward=0.7563624 (474.75 it/sec) -training >> step=1008200, episode=169 reward=0.760579 (489.00 it/sec) -training >> step=1008300, episode=169 reward=0.7428378 (443.42 it/sec) -training >> step=1008400, episode=169 reward=0.7729167 (465.03 it/sec) -training >> step=1008500, episode=169 reward=0.7405806 (481.02 it/sec) -training >> step=1008600, episode=169 reward=0.7716297 (491.35 it/sec) -training >> step=1008700, episode=169 reward=0.7688937 (443.95 it/sec) -training >> step=1008800, episode=169 reward=0.7810509 (404.74 it/sec) -training >> step=1008900, episode=169 reward=0.779036 (476.23 it/sec) -training >> step=1009000, episode=169 reward=0.7472821 (489.69 it/sec) -training >> step=1009100, episode=169 reward=0.767676 (479.46 it/sec) -training >> step=1009200, episode=169 reward=0.7429729 (487.51 it/sec) -training >> step=1009300, episode=169 reward=0.7799597 (492.05 it/sec) -training >> step=1009400, episode=169 reward=0.7516658 (492.55 it/sec) -training >> step=1009500, episode=169 reward=0.7508644 (392.49 it/sec) -training >> step=1009600, episode=169 reward=0.7658639 (447.29 it/sec) -training >> step=1009700, episode=169 reward=0.760951 (493.32 it/sec) -training >> step=1009800, episode=169 reward=0.7592171 (484.45 it/sec) -training >> step=1009900, episode=169 reward=0.7723231 (454.11 it/sec) -training >> step=1010000, episode=169 reward=0.7582453 (487.11 it/sec) -training >> step=1010100, episode=169 reward=0.7641299 (410.96 it/sec) -training >> step=1010200, episode=169 reward=0.7567655 (448.22 it/sec) -training >> step=1010300, episode=169 reward=0.7419218 (490.70 it/sec) -training >> step=1010400, episode=169 reward=0.7572621 (467.50 it/sec) -training >> step=1010500, episode=169 reward=0.7439638 (461.27 it/sec) -training >> step=1010600, episode=169 reward=0.7469678 (393.21 it/sec) -training >> step=1010700, episode=169 reward=0.7802207 (465.11 it/sec) -training >> step=1010800, episode=169 reward=0.7528927 (476.18 it/sec) -training >> step=1010900, episode=169 reward=0.7532372 (465.06 it/sec) -training >> step=1011000, episode=169 reward=0.7544521 (457.89 it/sec) -training >> step=1011100, episode=169 reward=0.7750183 (437.93 it/sec) -training >> step=1011200, episode=169 reward=0.7304629 (475.18 it/sec) -training >> step=1011300, episode=169 reward=0.7501587 (427.48 it/sec) -training >> step=1011400, episode=169 reward=0.7468626 (476.70 it/sec) -training >> step=1011500, episode=169 reward=0.7732512 (443.13 it/sec) -training >> step=1011600, episode=169 reward=0.7589589 (448.61 it/sec) -training >> step=1011700, episode=169 reward=0.7735662 (438.29 it/sec) -training >> step=1011800, episode=169 reward=0.7582107 (454.24 it/sec) -training >> step=1011900, episode=169 reward=0.7646192 (511.62 it/sec) -training >> step=1012000, episode=169 reward=0.7568653 (441.00 it/sec) -training >> step=1012100, episode=169 reward=0.766023 (471.05 it/sec) -training >> step=1012200, episode=169 reward=0.7388922 (458.04 it/sec) -training >> step=1012300, episode=169 reward=0.7705362 (410.98 it/sec) -training >> step=1012400, episode=169 reward=0.7542381 (510.60 it/sec) -training >> step=1012500, episode=169 reward=0.7253051 (488.91 it/sec) -training >> step=1012600, episode=169 reward=0.7502828 (531.18 it/sec) -training >> step=1012700, episode=169 reward=0.7472748 (532.64 it/sec) -training >> step=1012800, episode=169 reward=0.7504195 (484.89 it/sec) -training >> step=1012900, episode=169 reward=0.7333142 (512.75 it/sec) -training >> step=1013000, episode=169 reward=0.7288334 (482.39 it/sec) -training >> step=1013100, episode=169 reward=0.7305729 (487.54 it/sec) -training >> step=1013200, episode=169 reward=0.721818 (383.33 it/sec) -training >> step=1013300, episode=170 reward=0.7491831 (214.29 it/sec) -training >> step=1013400, episode=170 reward=0.7250408 (500.61 it/sec) -training >> step=1013500, episode=170 reward=0.7526442 (470.91 it/sec) -training >> step=1013600, episode=170 reward=0.7564553 (511.76 it/sec) -training >> step=1013700, episode=170 reward=0.753548 (521.74 it/sec) -training >> step=1013800, episode=170 reward=0.7204621 (423.65 it/sec) -training >> step=1013900, episode=170 reward=0.7585852 (503.97 it/sec) -training >> step=1014000, episode=170 reward=0.741549 (545.57 it/sec) -training >> step=1014100, episode=170 reward=0.7348111 (509.28 it/sec) -training >> step=1014200, episode=170 reward=0.7786189 (532.69 it/sec) -training >> step=1014300, episode=170 reward=0.764337 (513.51 it/sec) -training >> step=1014400, episode=170 reward=0.7733415 (506.02 it/sec) -training >> step=1014500, episode=170 reward=0.7552572 (524.36 it/sec) -training >> step=1014600, episode=170 reward=0.7459946 (519.37 it/sec) -training >> step=1014700, episode=170 reward=0.755906 (533.74 it/sec) -training >> step=1014800, episode=170 reward=0.769937 (501.46 it/sec) -training >> step=1014900, episode=170 reward=0.7580518 (505.19 it/sec) -training >> step=1015000, episode=170 reward=0.7476261 (521.65 it/sec) -training >> step=1015100, episode=170 reward=0.7590011 (492.56 it/sec) -training >> step=1015200, episode=170 reward=0.7598406 (505.86 it/sec) -training >> step=1015300, episode=170 reward=0.7939507 (533.12 it/sec) -training >> step=1015400, episode=170 reward=0.7574473 (483.21 it/sec) -training >> step=1015500, episode=170 reward=0.7713822 (525.83 it/sec) -training >> step=1015600, episode=170 reward=0.7578952 (467.80 it/sec) -training >> step=1015700, episode=170 reward=0.7613221 (536.90 it/sec) -training >> step=1015800, episode=170 reward=0.7796217 (532.21 it/sec) -training >> step=1015900, episode=170 reward=0.7681994 (485.23 it/sec) -training >> step=1016000, episode=170 reward=0.7852676 (523.66 it/sec) -training >> step=1016100, episode=170 reward=0.7561109 (480.75 it/sec) -training >> step=1016200, episode=170 reward=0.7659779 (476.81 it/sec) -training >> step=1016300, episode=170 reward=0.7507334 (498.47 it/sec) -training >> step=1016400, episode=170 reward=0.753218 (458.28 it/sec) -training >> step=1016500, episode=170 reward=0.7212497 (498.88 it/sec) -training >> step=1016600, episode=170 reward=0.7566166 (570.28 it/sec) -training >> step=1016700, episode=170 reward=0.7503377 (488.29 it/sec) -training >> step=1016800, episode=170 reward=0.7481064 (511.57 it/sec) -training >> step=1016900, episode=170 reward=0.7731827 (442.04 it/sec) -training >> step=1017000, episode=170 reward=0.7597577 (547.95 it/sec) -training >> step=1017100, episode=170 reward=0.7576673 (544.40 it/sec) -training >> step=1017200, episode=170 reward=0.7617055 (508.94 it/sec) -training >> step=1017300, episode=170 reward=0.7616834 (522.83 it/sec) -training >> step=1017400, episode=170 reward=0.7903172 (534.24 it/sec) -training >> step=1017500, episode=170 reward=0.7564297 (488.29 it/sec) -training >> step=1017600, episode=170 reward=0.7314258 (539.79 it/sec) -training >> step=1017700, episode=170 reward=0.7501578 (521.96 it/sec) -training >> step=1017800, episode=170 reward=0.7900786 (501.07 it/sec) -training >> step=1017900, episode=170 reward=0.7535642 (520.98 it/sec) -training >> step=1018000, episode=170 reward=0.7434366 (485.32 it/sec) -training >> step=1018100, episode=170 reward=0.7888537 (543.18 it/sec) -training >> step=1018200, episode=170 reward=0.7790402 (484.06 it/sec) -training >> step=1018300, episode=170 reward=0.7688931 (510.23 it/sec) -training >> step=1018400, episode=170 reward=0.7376291 (542.42 it/sec) -training >> step=1018500, episode=170 reward=0.743911 (498.31 it/sec) -training >> step=1018600, episode=170 reward=0.7785807 (547.19 it/sec) -training >> step=1018700, episode=170 reward=0.7308773 (549.62 it/sec) -training >> step=1018800, episode=170 reward=0.7541974 (507.24 it/sec) -training >> step=1018900, episode=170 reward=0.7557342 (531.44 it/sec) -training >> step=1019000, episode=170 reward=0.7433555 (506.33 it/sec) -training >> step=1019100, episode=170 reward=0.7722393 (536.96 it/sec) -training >> step=1019200, episode=170 reward=0.7543143 (372.71 it/sec) -training >> step=1019300, episode=171 reward=0.7835078 (231.22 it/sec) -training >> step=1019400, episode=171 reward=0.7552109 (474.91 it/sec) -training >> step=1019500, episode=171 reward=0.743928 (512.68 it/sec) -training >> step=1019600, episode=171 reward=0.7766362 (547.40 it/sec) -training >> step=1019700, episode=171 reward=0.7674168 (512.62 it/sec) -training >> step=1019800, episode=171 reward=0.7773411 (504.20 it/sec) -training >> step=1019900, episode=171 reward=0.7484817 (469.80 it/sec) -training >> step=1020000, episode=171 reward=0.7854493 (518.48 it/sec) -training >> step=1020100, episode=171 reward=0.7538851 (517.24 it/sec) -training >> step=1020200, episode=171 reward=0.7761452 (521.05 it/sec) -training >> step=1020300, episode=171 reward=0.7510207 (524.51 it/sec) -training >> step=1020400, episode=171 reward=0.7600353 (483.81 it/sec) -training >> step=1020500, episode=171 reward=0.7686926 (507.35 it/sec) -training >> step=1020600, episode=171 reward=0.7435819 (529.49 it/sec) -training >> step=1020700, episode=171 reward=0.7959644 (518.47 it/sec) -training >> step=1020800, episode=171 reward=0.7564108 (513.81 it/sec) -training >> step=1020900, episode=171 reward=0.7852641 (480.72 it/sec) -training >> step=1021000, episode=171 reward=0.7710425 (519.77 it/sec) -training >> step=1021100, episode=171 reward=0.768136 (511.75 it/sec) -training >> step=1021200, episode=171 reward=0.7653747 (474.55 it/sec) -training >> step=1021300, episode=171 reward=0.7570979 (538.12 it/sec) -training >> step=1021400, episode=171 reward=0.7685357 (516.63 it/sec) -training >> step=1021500, episode=171 reward=0.7788728 (512.78 it/sec) -training >> step=1021600, episode=171 reward=0.7477746 (552.59 it/sec) -training >> step=1021700, episode=171 reward=0.7587556 (561.10 it/sec) -training >> step=1021800, episode=171 reward=0.7707187 (498.36 it/sec) -training >> step=1021900, episode=171 reward=0.755289 (441.37 it/sec) -training >> step=1022000, episode=171 reward=0.7406078 (467.34 it/sec) -training >> step=1022100, episode=171 reward=0.751047 (494.17 it/sec) -training >> step=1022200, episode=171 reward=0.7807259 (490.56 it/sec) -training >> step=1022300, episode=171 reward=0.7583494 (528.44 it/sec) -training >> step=1022400, episode=171 reward=0.7548515 (479.36 it/sec) -training >> step=1022500, episode=171 reward=0.7652216 (390.39 it/sec) -training >> step=1022600, episode=171 reward=0.7525065 (456.31 it/sec) -training >> step=1022700, episode=171 reward=0.7715913 (401.42 it/sec) -training >> step=1022800, episode=171 reward=0.7615424 (425.85 it/sec) -training >> step=1022900, episode=171 reward=0.7646697 (453.35 it/sec) -training >> step=1023000, episode=171 reward=0.7569404 (479.78 it/sec) -training >> step=1023100, episode=171 reward=0.7544142 (523.52 it/sec) -training >> step=1023200, episode=171 reward=0.7598006 (526.63 it/sec) -training >> step=1023300, episode=171 reward=0.7632876 (491.13 it/sec) -training >> step=1023400, episode=171 reward=0.7597914 (491.81 it/sec) -training >> step=1023500, episode=171 reward=0.7506318 (474.37 it/sec) -training >> step=1023600, episode=171 reward=0.7417479 (472.29 it/sec) -training >> step=1023700, episode=171 reward=0.7674502 (471.72 it/sec) -training >> step=1023800, episode=171 reward=0.7600577 (432.12 it/sec) -training >> step=1023900, episode=171 reward=0.755787 (390.00 it/sec) -training >> step=1024000, episode=171 reward=0.7725626 (522.94 it/sec) -training >> step=1024100, episode=171 reward=0.7495461 (496.34 it/sec) -training >> step=1024200, episode=171 reward=0.7629456 (495.78 it/sec) -training >> step=1024300, episode=171 reward=0.7546185 (522.02 it/sec) -training >> step=1024400, episode=171 reward=0.747727 (473.27 it/sec) -training >> step=1024500, episode=171 reward=0.7750002 (496.64 it/sec) -training >> step=1024600, episode=171 reward=0.7694162 (527.88 it/sec) -training >> step=1024700, episode=171 reward=0.7379162 (502.92 it/sec) -training >> step=1024800, episode=171 reward=0.75626 (510.51 it/sec) -training >> step=1024900, episode=171 reward=0.7316831 (521.77 it/sec) -training >> step=1025000, episode=171 reward=0.7399411 (506.06 it/sec) -training >> step=1025100, episode=171 reward=0.7465924 (537.64 it/sec) -training >> step=1025200, episode=171 reward=0.7445617 (435.33 it/sec) -training >> step=1025300, episode=172 reward=0.7423606 (197.00 it/sec) -training >> step=1025400, episode=172 reward=0.7676477 (524.88 it/sec) -training >> step=1025500, episode=172 reward=0.7828197 (522.39 it/sec) -training >> step=1025600, episode=172 reward=0.7832076 (507.20 it/sec) -training >> step=1025700, episode=172 reward=0.7677423 (473.81 it/sec) -training >> step=1025800, episode=172 reward=0.7613274 (512.89 it/sec) -training >> step=1025900, episode=172 reward=0.7515141 (480.80 it/sec) -training >> step=1026000, episode=172 reward=0.7441625 (516.72 it/sec) -training >> step=1026100, episode=172 reward=0.7741601 (497.83 it/sec) -training >> step=1026200, episode=172 reward=0.7683118 (527.03 it/sec) -training >> step=1026300, episode=172 reward=0.7591844 (478.69 it/sec) -training >> step=1026400, episode=172 reward=0.7771031 (406.39 it/sec) -training >> step=1026500, episode=172 reward=0.7789395 (324.23 it/sec) -training >> step=1026600, episode=172 reward=0.7505537 (428.74 it/sec) -training >> step=1026700, episode=172 reward=0.7696765 (418.28 it/sec) -training >> step=1026800, episode=172 reward=0.7487496 (460.21 it/sec) -training >> step=1026900, episode=172 reward=0.7754759 (533.25 it/sec) -training >> step=1027000, episode=172 reward=0.7574934 (548.82 it/sec) -training >> step=1027100, episode=172 reward=0.7522235 (513.55 it/sec) -training >> step=1027200, episode=172 reward=0.7631248 (528.25 it/sec) -training >> step=1027300, episode=172 reward=0.7500522 (463.53 it/sec) -training >> step=1027400, episode=172 reward=0.7514662 (524.21 it/sec) -training >> step=1027500, episode=172 reward=0.7688824 (510.61 it/sec) -training >> step=1027600, episode=172 reward=0.7528572 (532.48 it/sec) -training >> step=1027700, episode=172 reward=0.7532794 (439.40 it/sec) -training >> step=1027800, episode=172 reward=0.7618913 (468.41 it/sec) -training >> step=1027900, episode=172 reward=0.7517548 (502.21 it/sec) -training >> step=1028000, episode=172 reward=0.7413226 (439.71 it/sec) -training >> step=1028100, episode=172 reward=0.754356 (409.48 it/sec) -training >> step=1028200, episode=172 reward=0.7833182 (517.19 it/sec) -training >> step=1028300, episode=172 reward=0.7395304 (537.73 it/sec) -training >> step=1028400, episode=172 reward=0.7265651 (535.90 it/sec) -training >> step=1028500, episode=172 reward=0.7770491 (488.66 it/sec) -training >> step=1028600, episode=172 reward=0.7508987 (402.43 it/sec) -training >> step=1028700, episode=172 reward=0.7614133 (432.74 it/sec) -training >> step=1028800, episode=172 reward=0.7764134 (492.50 it/sec) -training >> step=1028900, episode=172 reward=0.7442407 (459.04 it/sec) -training >> step=1029000, episode=172 reward=0.7732206 (393.16 it/sec) -training >> step=1029100, episode=172 reward=0.7595237 (552.57 it/sec) -training >> step=1029200, episode=172 reward=0.7374814 (549.64 it/sec) -training >> step=1029300, episode=172 reward=0.7599826 (565.55 it/sec) -training >> step=1029400, episode=172 reward=0.7917879 (506.97 it/sec) -training >> step=1029500, episode=172 reward=0.7556871 (524.85 it/sec) -training >> step=1029600, episode=172 reward=0.7743403 (555.08 it/sec) -training >> step=1029700, episode=172 reward=0.7507863 (573.00 it/sec) -training >> step=1029800, episode=172 reward=0.7735289 (538.30 it/sec) -training >> step=1029900, episode=172 reward=0.7856551 (552.70 it/sec) -training >> step=1030000, episode=172 reward=0.752285 (550.26 it/sec) -training >> step=1030100, episode=172 reward=0.7409513 (540.41 it/sec) -training >> step=1030200, episode=172 reward=0.7467666 (578.49 it/sec) -training >> step=1030300, episode=172 reward=0.7391054 (561.35 it/sec) -training >> step=1030400, episode=172 reward=0.7737656 (562.72 it/sec) -training >> step=1030500, episode=172 reward=0.7693568 (484.58 it/sec) -training >> step=1030600, episode=172 reward=0.7728814 (403.38 it/sec) -training >> step=1030700, episode=172 reward=0.7357974 (416.86 it/sec) -training >> step=1030800, episode=172 reward=0.7629063 (425.27 it/sec) -training >> step=1030900, episode=172 reward=0.7363536 (446.26 it/sec) -training >> step=1031000, episode=172 reward=0.7449521 (341.21 it/sec) -training >> step=1031100, episode=172 reward=0.7600307 (361.35 it/sec) -training >> step=1031200, episode=172 reward=0.7317271 (407.52 it/sec) -training >> step=1031300, episode=173 reward=0.7486015 (164.47 it/sec) -training >> step=1031400, episode=173 reward=0.7528939 (430.25 it/sec) -training >> step=1031500, episode=173 reward=0.7484116 (401.38 it/sec) -training >> step=1031600, episode=173 reward=0.7478068 (481.16 it/sec) -training >> step=1031700, episode=173 reward=0.7549294 (465.34 it/sec) -training >> step=1031800, episode=173 reward=0.748031 (455.50 it/sec) -training >> step=1031900, episode=173 reward=0.7405272 (437.76 it/sec) -training >> step=1032000, episode=173 reward=0.7735118 (443.34 it/sec) -training >> step=1032100, episode=173 reward=0.7441168 (489.48 it/sec) -training >> step=1032200, episode=173 reward=0.7737322 (445.51 it/sec) -training >> step=1032300, episode=173 reward=0.7406403 (421.90 it/sec) -training >> step=1032400, episode=173 reward=0.783747 (457.88 it/sec) -training >> step=1032500, episode=173 reward=0.7920518 (465.07 it/sec) -training >> step=1032600, episode=173 reward=0.7778298 (437.13 it/sec) -training >> step=1032700, episode=173 reward=0.7561079 (471.15 it/sec) -training >> step=1032800, episode=173 reward=0.7481714 (506.86 it/sec) -training >> step=1032900, episode=173 reward=0.7646583 (479.18 it/sec) -training >> step=1033000, episode=173 reward=0.7620634 (477.90 it/sec) -training >> step=1033100, episode=173 reward=0.7654301 (486.70 it/sec) -training >> step=1033200, episode=173 reward=0.7544155 (493.06 it/sec) -training >> step=1033300, episode=173 reward=0.7732912 (501.86 it/sec) -training >> step=1033400, episode=173 reward=0.7658299 (460.09 it/sec) -training >> step=1033500, episode=173 reward=0.7474574 (499.40 it/sec) -training >> step=1033600, episode=173 reward=0.7942063 (546.94 it/sec) -training >> step=1033700, episode=173 reward=0.7726583 (471.91 it/sec) -training >> step=1033800, episode=173 reward=0.7724099 (445.52 it/sec) -training >> step=1033900, episode=173 reward=0.7791995 (424.24 it/sec) -training >> step=1034000, episode=173 reward=0.7552248 (412.38 it/sec) -training >> step=1034100, episode=173 reward=0.7366779 (378.99 it/sec) -training >> step=1034200, episode=173 reward=0.7678919 (412.68 it/sec) -training >> step=1034300, episode=173 reward=0.7625736 (403.87 it/sec) -training >> step=1034400, episode=173 reward=0.7477947 (458.15 it/sec) -training >> step=1034500, episode=173 reward=0.7620299 (474.33 it/sec) -training >> step=1034600, episode=173 reward=0.768118 (452.21 it/sec) -training >> step=1034700, episode=173 reward=0.7926015 (473.96 it/sec) -training >> step=1034800, episode=173 reward=0.7595418 (475.04 it/sec) -training >> step=1034900, episode=173 reward=0.7882619 (508.08 it/sec) -training >> step=1035000, episode=173 reward=0.7669384 (463.99 it/sec) -training >> step=1035100, episode=173 reward=0.7290626 (413.59 it/sec) -training >> step=1035200, episode=173 reward=0.7716792 (468.32 it/sec) -training >> step=1035300, episode=173 reward=0.7640026 (473.19 it/sec) -training >> step=1035400, episode=173 reward=0.7630646 (525.29 it/sec) -training >> step=1035500, episode=173 reward=0.7652395 (462.72 it/sec) -training >> step=1035600, episode=173 reward=0.7653069 (442.31 it/sec) -training >> step=1035700, episode=173 reward=0.7481593 (404.03 it/sec) -training >> step=1035800, episode=173 reward=0.7520447 (423.70 it/sec) -training >> step=1035900, episode=173 reward=0.7806931 (401.17 it/sec) -training >> step=1036000, episode=173 reward=0.7704046 (427.68 it/sec) -training >> step=1036100, episode=173 reward=0.777831 (491.54 it/sec) -training >> step=1036200, episode=173 reward=0.7412803 (497.62 it/sec) -training >> step=1036300, episode=173 reward=0.746708 (548.51 it/sec) -training >> step=1036400, episode=173 reward=0.7302986 (498.30 it/sec) -training >> step=1036500, episode=173 reward=0.720402 (476.45 it/sec) -training >> step=1036600, episode=173 reward=0.739344 (530.18 it/sec) -training >> step=1036700, episode=173 reward=0.7606475 (524.49 it/sec) -training >> step=1036800, episode=173 reward=0.7433993 (541.60 it/sec) -training >> step=1036900, episode=173 reward=0.7591968 (531.07 it/sec) -training >> step=1037000, episode=173 reward=0.7340146 (501.52 it/sec) -training >> step=1037100, episode=173 reward=0.7267043 (551.13 it/sec) -training >> step=1037200, episode=173 reward=0.7379744 (513.51 it/sec) -training >> step=1037300, episode=174 reward=0.76392 (164.41 it/sec) -training >> step=1037400, episode=174 reward=0.7585545 (523.95 it/sec) -training >> step=1037500, episode=174 reward=0.7640793 (497.22 it/sec) -training >> step=1037600, episode=174 reward=0.7683877 (522.56 it/sec) -training >> step=1037700, episode=174 reward=0.7733678 (500.52 it/sec) -training >> step=1037800, episode=174 reward=0.772648 (542.18 it/sec) -training >> step=1037900, episode=174 reward=0.7582164 (478.28 it/sec) -training >> step=1038000, episode=174 reward=0.7639066 (542.29 it/sec) -training >> step=1038100, episode=174 reward=0.7530999 (537.68 it/sec) -training >> step=1038200, episode=174 reward=0.7440418 (525.77 it/sec) -training >> step=1038300, episode=174 reward=0.7555287 (517.03 it/sec) -training >> step=1038400, episode=174 reward=0.7664574 (490.81 it/sec) -training >> step=1038500, episode=174 reward=0.7719751 (536.45 it/sec) -training >> step=1038600, episode=174 reward=0.7654439 (555.58 it/sec) -training >> step=1038700, episode=174 reward=0.7489687 (522.74 it/sec) -training >> step=1038800, episode=174 reward=0.7544411 (537.76 it/sec) -training >> step=1038900, episode=174 reward=0.7545823 (509.39 it/sec) -training >> step=1039000, episode=174 reward=0.7657819 (539.36 it/sec) -training >> step=1039100, episode=174 reward=0.7487231 (479.01 it/sec) -training >> step=1039200, episode=174 reward=0.7496355 (432.77 it/sec) -training >> step=1039300, episode=174 reward=0.7756591 (427.36 it/sec) -training >> step=1039400, episode=174 reward=0.7754796 (406.99 it/sec) -training >> step=1039500, episode=174 reward=0.751485 (449.67 it/sec) -training >> step=1039600, episode=174 reward=0.7670363 (496.94 it/sec) -training >> step=1039700, episode=174 reward=0.7752579 (534.72 it/sec) -training >> step=1039800, episode=174 reward=0.7301302 (496.42 it/sec) -training >> step=1039900, episode=174 reward=0.7546999 (434.77 it/sec) -training >> step=1040000, episode=174 reward=0.7762742 (448.16 it/sec) -training >> step=1040100, episode=174 reward=0.7694179 (420.87 it/sec) -training >> step=1040200, episode=174 reward=0.7482861 (383.59 it/sec) -training >> step=1040300, episode=174 reward=0.7603216 (298.52 it/sec) -training >> step=1040400, episode=174 reward=0.7790073 (352.63 it/sec) -training >> step=1040500, episode=174 reward=0.7572355 (320.55 it/sec) -training >> step=1040600, episode=174 reward=0.7674398 (381.10 it/sec) -training >> step=1040700, episode=174 reward=0.7411476 (386.64 it/sec) -training >> step=1040800, episode=174 reward=0.7492048 (393.99 it/sec) -training >> step=1040900, episode=174 reward=0.7736059 (425.30 it/sec) -training >> step=1041000, episode=174 reward=0.7518029 (460.07 it/sec) -training >> step=1041100, episode=174 reward=0.7653409 (457.14 it/sec) -training >> step=1041200, episode=174 reward=0.7568048 (497.11 it/sec) -training >> step=1041300, episode=174 reward=0.7588477 (503.41 it/sec) -training >> step=1041400, episode=174 reward=0.7587186 (472.86 it/sec) -training >> step=1041500, episode=174 reward=0.7438896 (387.12 it/sec) -training >> step=1041600, episode=174 reward=0.7669685 (456.84 it/sec) -training >> step=1041700, episode=174 reward=0.7521423 (466.60 it/sec) -training >> step=1041800, episode=174 reward=0.7405097 (481.34 it/sec) -training >> step=1041900, episode=174 reward=0.7738148 (481.32 it/sec) -training >> step=1042000, episode=174 reward=0.7755631 (504.42 it/sec) -training >> step=1042100, episode=174 reward=0.7656572 (469.45 it/sec) -training >> step=1042200, episode=174 reward=0.7675707 (479.39 it/sec) -training >> step=1042300, episode=174 reward=0.7767304 (489.51 it/sec) -training >> step=1042400, episode=174 reward=0.7696139 (480.23 it/sec) -training >> step=1042500, episode=174 reward=0.7536789 (417.83 it/sec) -training >> step=1042600, episode=174 reward=0.7333253 (416.82 it/sec) -training >> step=1042700, episode=174 reward=0.7482306 (457.76 it/sec) -training >> step=1042800, episode=174 reward=0.7513669 (495.47 it/sec) -training >> step=1042900, episode=174 reward=0.7768537 (478.63 it/sec) -training >> step=1043000, episode=174 reward=0.7442238 (488.68 it/sec) -training >> step=1043100, episode=174 reward=0.7414873 (483.59 it/sec) -training >> step=1043200, episode=174 reward=0.733955 (455.64 it/sec) -training >> step=1043300, episode=175 reward=0.7661566 (156.18 it/sec) -training >> step=1043400, episode=175 reward=0.7654203 (501.70 it/sec) -training >> step=1043500, episode=175 reward=0.7170168 (465.16 it/sec) -training >> step=1043600, episode=175 reward=0.7228294 (527.02 it/sec) -training >> step=1043700, episode=175 reward=0.7622627 (520.27 it/sec) -training >> step=1043800, episode=175 reward=0.7619053 (497.52 it/sec) -training >> step=1043900, episode=175 reward=0.7630517 (537.57 it/sec) -training >> step=1044000, episode=175 reward=0.7734848 (552.79 it/sec) -training >> step=1044100, episode=175 reward=0.7669956 (489.90 it/sec) -training >> step=1044200, episode=175 reward=0.7630271 (492.68 it/sec) -training >> step=1044300, episode=175 reward=0.7590562 (479.55 it/sec) -training >> step=1044400, episode=175 reward=0.7438263 (459.31 it/sec) -training >> step=1044500, episode=175 reward=0.7574282 (480.41 it/sec) -training >> step=1044600, episode=175 reward=0.7698345 (526.27 it/sec) -training >> step=1044700, episode=175 reward=0.7729577 (552.49 it/sec) -training >> step=1044800, episode=175 reward=0.767557 (471.70 it/sec) -training >> step=1044900, episode=175 reward=0.7677855 (506.46 it/sec) -training >> step=1045000, episode=175 reward=0.7617589 (519.43 it/sec) -training >> step=1045100, episode=175 reward=0.7700675 (433.95 it/sec) -training >> step=1045200, episode=175 reward=0.7594949 (517.51 it/sec) -training >> step=1045300, episode=175 reward=0.7663602 (521.01 it/sec) -training >> step=1045400, episode=175 reward=0.7698739 (479.91 it/sec) -training >> step=1045500, episode=175 reward=0.7557364 (541.51 it/sec) -training >> step=1045600, episode=175 reward=0.7746113 (505.05 it/sec) -training >> step=1045700, episode=175 reward=0.7782563 (540.81 it/sec) -training >> step=1045800, episode=175 reward=0.773142 (439.92 it/sec) -training >> step=1045900, episode=175 reward=0.7686225 (380.98 it/sec) -training >> step=1046000, episode=175 reward=0.7835748 (335.17 it/sec) -training >> step=1046100, episode=175 reward=0.7609619 (439.31 it/sec) -training >> step=1046200, episode=175 reward=0.7798203 (391.71 it/sec) -training >> step=1046300, episode=175 reward=0.7638992 (410.45 it/sec) -training >> step=1046400, episode=175 reward=0.7535858 (471.88 it/sec) -training >> step=1046500, episode=175 reward=0.7465675 (509.92 it/sec) -training >> step=1046600, episode=175 reward=0.7577676 (525.48 it/sec) -training >> step=1046700, episode=175 reward=0.7479387 (515.48 it/sec) -training >> step=1046800, episode=175 reward=0.7432107 (478.38 it/sec) -training >> step=1046900, episode=175 reward=0.7604687 (531.64 it/sec) -training >> step=1047000, episode=175 reward=0.7576997 (549.50 it/sec) -training >> step=1047100, episode=175 reward=0.7642806 (536.07 it/sec) -training >> step=1047200, episode=175 reward=0.7814402 (508.90 it/sec) -training >> step=1047300, episode=175 reward=0.7548469 (471.48 it/sec) -training >> step=1047400, episode=175 reward=0.762484 (525.68 it/sec) -training >> step=1047500, episode=175 reward=0.7681297 (495.62 it/sec) -training >> step=1047600, episode=175 reward=0.7684547 (559.47 it/sec) -training >> step=1047700, episode=175 reward=0.7540658 (557.63 it/sec) -training >> step=1047800, episode=175 reward=0.7593294 (503.36 it/sec) -training >> step=1047900, episode=175 reward=0.7569118 (522.60 it/sec) -training >> step=1048000, episode=175 reward=0.7589253 (530.93 it/sec) -training >> step=1048100, episode=175 reward=0.7597077 (511.08 it/sec) -training >> step=1048200, episode=175 reward=0.7918445 (547.48 it/sec) -training >> step=1048300, episode=175 reward=0.7703568 (472.72 it/sec) -training >> step=1048400, episode=175 reward=0.7567826 (534.68 it/sec) -training >> step=1048500, episode=175 reward=0.736797 (578.15 it/sec) -training >> step=1048600, episode=175 reward=0.7570477 (532.06 it/sec) -training >> step=1048700, episode=175 reward=0.7368151 (516.63 it/sec) -training >> step=1048800, episode=175 reward=0.7486581 (541.76 it/sec) -training >> step=1048900, episode=175 reward=0.7632259 (537.06 it/sec) -training >> step=1049000, episode=175 reward=0.769724 (555.03 it/sec) -training >> step=1049100, episode=175 reward=0.7469397 (503.78 it/sec) -training >> step=1049200, episode=175 reward=0.7453715 (444.36 it/sec) -training >> step=1049300, episode=176 reward=0.772713 (176.88 it/sec) -training >> step=1049400, episode=176 reward=0.7688033 (433.43 it/sec) -training >> step=1049500, episode=176 reward=0.7590969 (544.21 it/sec) -training >> step=1049600, episode=176 reward=0.7555516 (545.02 it/sec) -training >> step=1049700, episode=176 reward=0.7671278 (485.38 it/sec) -training >> step=1049800, episode=176 reward=0.7245854 (478.15 it/sec) -training >> step=1049900, episode=176 reward=0.7813069 (491.65 it/sec) -training >> step=1050000, episode=176 reward=0.7607115 (495.54 it/sec) -training >> step=1050100, episode=176 reward=0.7359636 (472.41 it/sec) -training >> step=1050200, episode=176 reward=0.7747185 (491.02 it/sec) -training >> step=1050300, episode=176 reward=0.7559453 (459.68 it/sec) -training >> step=1050400, episode=176 reward=0.7808066 (480.73 it/sec) -training >> step=1050500, episode=176 reward=0.7576257 (462.97 it/sec) -training >> step=1050600, episode=176 reward=0.752675 (429.44 it/sec) -training >> step=1050700, episode=176 reward=0.735831 (449.66 it/sec) -training >> step=1050800, episode=176 reward=0.7700742 (487.05 it/sec) -training >> step=1050900, episode=176 reward=0.7547869 (488.80 it/sec) -training >> step=1051000, episode=176 reward=0.7677036 (477.07 it/sec) -training >> step=1051100, episode=176 reward=0.7493412 (442.08 it/sec) -training >> step=1051200, episode=176 reward=0.7703522 (403.31 it/sec) -training >> step=1051300, episode=176 reward=0.7583931 (446.19 it/sec) -training >> step=1051400, episode=176 reward=0.7566184 (433.05 it/sec) -training >> step=1051500, episode=176 reward=0.7676204 (501.76 it/sec) -training >> step=1051600, episode=176 reward=0.770286 (475.87 it/sec) -training >> step=1051700, episode=176 reward=0.7457561 (396.60 it/sec) -training >> step=1051800, episode=176 reward=0.7548387 (454.85 it/sec) -training >> step=1051900, episode=176 reward=0.7674834 (539.83 it/sec) -training >> step=1052000, episode=176 reward=0.7888222 (529.19 it/sec) -training >> step=1052100, episode=176 reward=0.7655067 (537.28 it/sec) -training >> step=1052200, episode=176 reward=0.7694157 (462.57 it/sec) -training >> step=1052300, episode=176 reward=0.7478029 (526.38 it/sec) -training >> step=1052400, episode=176 reward=0.7700171 (507.28 it/sec) -training >> step=1052500, episode=176 reward=0.7669157 (540.30 it/sec) -training >> step=1052600, episode=176 reward=0.7717119 (550.74 it/sec) -training >> step=1052700, episode=176 reward=0.769408 (493.55 it/sec) -training >> step=1052800, episode=176 reward=0.7525716 (467.65 it/sec) -training >> step=1052900, episode=176 reward=0.7875008 (451.26 it/sec) -training >> step=1053000, episode=176 reward=0.7432766 (450.29 it/sec) -training >> step=1053100, episode=176 reward=0.768143 (514.20 it/sec) -training >> step=1053200, episode=176 reward=0.7682484 (408.79 it/sec) -training >> step=1053300, episode=176 reward=0.7735564 (352.74 it/sec) -training >> step=1053400, episode=176 reward=0.7511887 (381.88 it/sec) -training >> step=1053500, episode=176 reward=0.7733896 (412.74 it/sec) -training >> step=1053600, episode=176 reward=0.7571258 (393.63 it/sec) -training >> step=1053700, episode=176 reward=0.753246 (426.19 it/sec) -training >> step=1053800, episode=176 reward=0.7592995 (491.34 it/sec) -training >> step=1053900, episode=176 reward=0.7592479 (461.59 it/sec) -training >> step=1054000, episode=176 reward=0.7643742 (463.73 it/sec) -training >> step=1054100, episode=176 reward=0.7744073 (393.45 it/sec) -training >> step=1054200, episode=176 reward=0.742338 (452.77 it/sec) -training >> step=1054300, episode=176 reward=0.7548336 (400.15 it/sec) -training >> step=1054400, episode=176 reward=0.7588426 (400.26 it/sec) -training >> step=1054500, episode=176 reward=0.7385829 (364.22 it/sec) -training >> step=1054600, episode=176 reward=0.7380863 (452.74 it/sec) -training >> step=1054700, episode=176 reward=0.7429267 (437.74 it/sec) -training >> step=1054800, episode=176 reward=0.7284318 (410.94 it/sec) -training >> step=1054900, episode=176 reward=0.737431 (491.49 it/sec) -training >> step=1055000, episode=176 reward=0.7472185 (465.63 it/sec) -training >> step=1055100, episode=176 reward=0.7618843 (455.21 it/sec) -training >> step=1055200, episode=176 reward=0.7291496 (468.37 it/sec) -training >> step=1055300, episode=177 reward=0.7731153 (90.76 it/sec) -training >> step=1055400, episode=177 reward=0.7763135 (428.74 it/sec) -training >> step=1055500, episode=177 reward=0.791692 (437.93 it/sec) -training >> step=1055600, episode=177 reward=0.7567649 (494.19 it/sec) -training >> step=1055700, episode=177 reward=0.7814708 (473.36 it/sec) -training >> step=1055800, episode=177 reward=0.7550429 (490.54 it/sec) -training >> step=1055900, episode=177 reward=0.7491747 (529.74 it/sec) -training >> step=1056000, episode=177 reward=0.7442029 (444.76 it/sec) -training >> step=1056100, episode=177 reward=0.7683522 (538.76 it/sec) -training >> step=1056200, episode=177 reward=0.7435654 (535.72 it/sec) -training >> step=1056300, episode=177 reward=0.7549418 (492.24 it/sec) -training >> step=1056400, episode=177 reward=0.777088 (547.96 it/sec) -training >> step=1056500, episode=177 reward=0.7763084 (564.79 it/sec) -training >> step=1056600, episode=177 reward=0.7578734 (518.85 it/sec) -training >> step=1056700, episode=177 reward=0.7516278 (556.80 it/sec) -training >> step=1056800, episode=177 reward=0.7512152 (562.69 it/sec) -training >> step=1056900, episode=177 reward=0.7416064 (552.85 it/sec) -training >> step=1057000, episode=177 reward=0.7707717 (548.00 it/sec) -training >> step=1057100, episode=177 reward=0.744879 (496.26 it/sec) -training >> step=1057200, episode=177 reward=0.7758223 (579.16 it/sec) -training >> step=1057300, episode=177 reward=0.7585414 (558.82 it/sec) -training >> step=1057400, episode=177 reward=0.7723964 (527.79 it/sec) -training >> step=1057500, episode=177 reward=0.7528844 (559.12 it/sec) -training >> step=1057600, episode=177 reward=0.7500369 (562.58 it/sec) -training >> step=1057700, episode=177 reward=0.7691478 (466.80 it/sec) -training >> step=1057800, episode=177 reward=0.7544391 (490.45 it/sec) -training >> step=1057900, episode=177 reward=0.7304878 (554.99 it/sec) -training >> step=1058000, episode=177 reward=0.7558253 (611.38 it/sec) -training >> step=1058100, episode=177 reward=0.7499564 (482.07 it/sec) -training >> step=1058200, episode=177 reward=0.7402697 (522.78 it/sec) -training >> step=1058300, episode=177 reward=0.7548834 (540.23 it/sec) -training >> step=1058400, episode=177 reward=0.7650318 (507.12 it/sec) -training >> step=1058500, episode=177 reward=0.7728161 (501.04 it/sec) -training >> step=1058600, episode=177 reward=0.7429864 (494.08 it/sec) -training >> step=1058700, episode=177 reward=0.7595291 (406.82 it/sec) -training >> step=1058800, episode=177 reward=0.7577732 (460.89 it/sec) -training >> step=1058900, episode=177 reward=0.7734367 (500.26 it/sec) -training >> step=1059000, episode=177 reward=0.7634517 (466.10 it/sec) -training >> step=1059100, episode=177 reward=0.7596316 (462.74 it/sec) -training >> step=1059200, episode=177 reward=0.7601551 (452.47 it/sec) -training >> step=1059300, episode=177 reward=0.7572244 (486.01 it/sec) -training >> step=1059400, episode=177 reward=0.7643386 (504.98 it/sec) -training >> step=1059500, episode=177 reward=0.7682568 (475.95 it/sec) -training >> step=1059600, episode=177 reward=0.7716841 (449.49 it/sec) -training >> step=1059700, episode=177 reward=0.7497495 (440.94 it/sec) -training >> step=1059800, episode=177 reward=0.7651609 (443.19 it/sec) -training >> step=1059900, episode=177 reward=0.7313996 (470.03 it/sec) -training >> step=1060000, episode=177 reward=0.7654459 (522.63 it/sec) -training >> step=1060100, episode=177 reward=0.7620086 (486.23 it/sec) -training >> step=1060200, episode=177 reward=0.7668297 (507.48 it/sec) -training >> step=1060300, episode=177 reward=0.7355196 (487.73 it/sec) -training >> step=1060400, episode=177 reward=0.7626265 (486.45 it/sec) -training >> step=1060500, episode=177 reward=0.7346929 (538.62 it/sec) -training >> step=1060600, episode=177 reward=0.7429171 (581.74 it/sec) -training >> step=1060700, episode=177 reward=0.7454765 (535.41 it/sec) -training >> step=1060800, episode=177 reward=0.7411454 (573.42 it/sec) -training >> step=1060900, episode=177 reward=0.7466276 (553.24 it/sec) -training >> step=1061000, episode=177 reward=0.7505965 (566.91 it/sec) -training >> step=1061100, episode=177 reward=0.7346839 (566.37 it/sec) -training >> step=1061200, episode=177 reward=0.7537296 (533.05 it/sec) -training >> step=1061300, episode=178 reward=0.7534607 (43.95 it/sec) -training >> step=1061400, episode=178 reward=0.7859038 (440.86 it/sec) -training >> step=1061500, episode=178 reward=0.766681 (543.06 it/sec) -training >> step=1061600, episode=178 reward=0.7727593 (412.42 it/sec) -training >> step=1061700, episode=178 reward=0.7689924 (457.47 it/sec) -training >> step=1061800, episode=178 reward=0.7468161 (516.55 it/sec) -training >> step=1061900, episode=178 reward=0.7636058 (429.02 it/sec) -training >> step=1062000, episode=178 reward=0.7523782 (447.87 it/sec) -training >> step=1062100, episode=178 reward=0.7781376 (479.96 it/sec) -training >> step=1062200, episode=178 reward=0.7618539 (458.20 it/sec) -training >> step=1062300, episode=178 reward=0.7535501 (463.72 it/sec) -training >> step=1062400, episode=178 reward=0.7692116 (443.28 it/sec) -training >> step=1062500, episode=178 reward=0.7810777 (492.57 it/sec) -training >> step=1062600, episode=178 reward=0.726974 (474.20 it/sec) -training >> step=1062700, episode=178 reward=0.7484878 (424.87 it/sec) -training >> step=1062800, episode=178 reward=0.7862191 (435.98 it/sec) -training >> step=1062900, episode=178 reward=0.7701274 (497.47 it/sec) -training >> step=1063000, episode=178 reward=0.7874752 (489.24 it/sec) -training >> step=1063100, episode=178 reward=0.7590725 (511.18 it/sec) -training >> step=1063200, episode=178 reward=0.7459618 (443.00 it/sec) -training >> step=1063300, episode=178 reward=0.7460558 (424.90 it/sec) -training >> step=1063400, episode=178 reward=0.7501697 (478.34 it/sec) -training >> step=1063500, episode=178 reward=0.7675074 (452.35 it/sec) -training >> step=1063600, episode=178 reward=0.7563953 (474.47 it/sec) -training >> step=1063700, episode=178 reward=0.7298655 (461.63 it/sec) -training >> step=1063800, episode=178 reward=0.7564176 (520.88 it/sec) -training >> step=1063900, episode=178 reward=0.7584828 (545.81 it/sec) -training >> step=1064000, episode=178 reward=0.7621279 (442.64 it/sec) -training >> step=1064100, episode=178 reward=0.7551414 (452.97 it/sec) -training >> step=1064200, episode=178 reward=0.7509287 (439.00 it/sec) -training >> step=1064300, episode=178 reward=0.7780484 (494.29 it/sec) -training >> step=1064400, episode=178 reward=0.7598252 (428.37 it/sec) -training >> step=1064500, episode=178 reward=0.7582353 (485.12 it/sec) -training >> step=1064600, episode=178 reward=0.747542 (438.81 it/sec) -training >> step=1064700, episode=178 reward=0.7883339 (472.09 it/sec) -training >> step=1064800, episode=178 reward=0.7572342 (440.32 it/sec) -training >> step=1064900, episode=178 reward=0.7509604 (494.30 it/sec) -training >> step=1065000, episode=178 reward=0.7682467 (503.57 it/sec) -training >> step=1065100, episode=178 reward=0.7561536 (459.07 it/sec) -training >> step=1065200, episode=178 reward=0.759383 (432.86 it/sec) -training >> step=1065300, episode=178 reward=0.7296786 (486.99 it/sec) -training >> step=1065400, episode=178 reward=0.7824459 (461.79 it/sec) -training >> step=1065500, episode=178 reward=0.7573596 (446.38 it/sec) -training >> step=1065600, episode=178 reward=0.7565228 (451.74 it/sec) -training >> step=1065700, episode=178 reward=0.7485067 (430.06 it/sec) -training >> step=1065800, episode=178 reward=0.7455955 (422.22 it/sec) -training >> step=1065900, episode=178 reward=0.7455382 (424.24 it/sec) -training >> step=1066000, episode=178 reward=0.7390438 (449.16 it/sec) -training >> step=1066100, episode=178 reward=0.7503095 (463.12 it/sec) -training >> step=1066200, episode=178 reward=0.7614598 (438.48 it/sec) -training >> step=1066300, episode=178 reward=0.7874021 (445.74 it/sec) -training >> step=1066400, episode=178 reward=0.7561358 (438.26 it/sec) -training >> step=1066500, episode=178 reward=0.7523155 (479.10 it/sec) -training >> step=1066600, episode=178 reward=0.7646098 (426.11 it/sec) -training >> step=1066700, episode=178 reward=0.7555921 (438.34 it/sec) -training >> step=1066800, episode=178 reward=0.738149 (442.02 it/sec) -training >> step=1066900, episode=178 reward=0.7308534 (414.92 it/sec) -training >> step=1067000, episode=178 reward=0.7599254 (388.66 it/sec) -training >> step=1067100, episode=178 reward=0.7645924 (416.08 it/sec) -training >> step=1067200, episode=178 reward=0.7415048 (390.15 it/sec) -training >> step=1067300, episode=179 reward=0.74529 (36.99 it/sec) -training >> step=1067400, episode=179 reward=0.7468867 (497.24 it/sec) -training >> step=1067500, episode=179 reward=0.758255 (505.66 it/sec) -training >> step=1067600, episode=179 reward=0.7650887 (530.85 it/sec) -training >> step=1067700, episode=179 reward=0.764848 (569.26 it/sec) -training >> step=1067800, episode=179 reward=0.7515869 (469.39 it/sec) -training >> step=1067900, episode=179 reward=0.7750536 (503.76 it/sec) -training >> step=1068000, episode=179 reward=0.7630939 (519.48 it/sec) -training >> step=1068100, episode=179 reward=0.7917299 (519.85 it/sec) -training >> step=1068200, episode=179 reward=0.7715956 (537.30 it/sec) -training >> step=1068300, episode=179 reward=0.7449434 (532.95 it/sec) -training >> step=1068400, episode=179 reward=0.7660634 (528.39 it/sec) -training >> step=1068500, episode=179 reward=0.7665409 (528.90 it/sec) -training >> step=1068600, episode=179 reward=0.7804874 (527.33 it/sec) -training >> step=1068700, episode=179 reward=0.7662168 (531.10 it/sec) -training >> step=1068800, episode=179 reward=0.7752031 (497.69 it/sec) -training >> step=1068900, episode=179 reward=0.766374 (532.36 it/sec) -training >> step=1069000, episode=179 reward=0.7899753 (521.59 it/sec) -training >> step=1069100, episode=179 reward=0.7621102 (569.25 it/sec) -training >> step=1069200, episode=179 reward=0.7758446 (541.12 it/sec) -training >> step=1069300, episode=179 reward=0.763159 (446.31 it/sec) -training >> step=1069400, episode=179 reward=0.7489592 (470.16 it/sec) -training >> step=1069500, episode=179 reward=0.753336 (567.83 it/sec) -training >> step=1069600, episode=179 reward=0.7683749 (507.38 it/sec) -training >> step=1069700, episode=179 reward=0.760579 (545.11 it/sec) -training >> step=1069800, episode=179 reward=0.7551227 (536.02 it/sec) -training >> step=1069900, episode=179 reward=0.7626947 (539.81 it/sec) -training >> step=1070000, episode=179 reward=0.7452116 (499.53 it/sec) -training >> step=1070100, episode=179 reward=0.7621899 (497.42 it/sec) -training >> step=1070200, episode=179 reward=0.7404576 (551.94 it/sec) -training >> step=1070300, episode=179 reward=0.7554795 (527.46 it/sec) -training >> step=1070400, episode=179 reward=0.7516351 (505.78 it/sec) -training >> step=1070500, episode=179 reward=0.7682169 (531.00 it/sec) -training >> step=1070600, episode=179 reward=0.7591541 (487.14 it/sec) -training >> step=1070700, episode=179 reward=0.7822195 (492.12 it/sec) -training >> step=1070800, episode=179 reward=0.7354063 (459.04 it/sec) -training >> step=1070900, episode=179 reward=0.747173 (472.90 it/sec) -training >> step=1071000, episode=179 reward=0.754178 (490.77 it/sec) -training >> step=1071100, episode=179 reward=0.7698784 (498.16 it/sec) -training >> step=1071200, episode=179 reward=0.7483143 (478.81 it/sec) -training >> step=1071300, episode=179 reward=0.7553478 (490.40 it/sec) -training >> step=1071400, episode=179 reward=0.763256 (458.34 it/sec) -training >> step=1071500, episode=179 reward=0.761748 (534.70 it/sec) -training >> step=1071600, episode=179 reward=0.7869261 (493.58 it/sec) -training >> step=1071700, episode=179 reward=0.7345639 (499.80 it/sec) -training >> step=1071800, episode=179 reward=0.7493506 (496.74 it/sec) -training >> step=1071900, episode=179 reward=0.7753476 (458.43 it/sec) -training >> step=1072000, episode=179 reward=0.7669084 (443.18 it/sec) -training >> step=1072100, episode=179 reward=0.7662812 (488.75 it/sec) -training >> step=1072200, episode=179 reward=0.7554178 (510.84 it/sec) -training >> step=1072300, episode=179 reward=0.755714 (458.65 it/sec) -training >> step=1072400, episode=179 reward=0.7570284 (457.59 it/sec) -training >> step=1072500, episode=179 reward=0.7527561 (454.52 it/sec) -training >> step=1072600, episode=179 reward=0.7667807 (412.13 it/sec) -training >> step=1072700, episode=179 reward=0.7565669 (443.38 it/sec) -training >> step=1072800, episode=179 reward=0.7557696 (469.75 it/sec) -training >> step=1072900, episode=179 reward=0.7639241 (477.99 it/sec) -training >> step=1073000, episode=179 reward=0.7493066 (500.90 it/sec) -training >> step=1073100, episode=179 reward=0.7450447 (536.40 it/sec) -training >> step=1073200, episode=179 reward=0.7472848 (502.68 it/sec) -training >> step=1073300, episode=180 reward=0.7589973 (102.54 it/sec) -training >> step=1073400, episode=180 reward=0.7700948 (374.12 it/sec) -training >> step=1073500, episode=180 reward=0.7874222 (498.94 it/sec) -training >> step=1073600, episode=180 reward=0.7660503 (524.64 it/sec) -training >> step=1073700, episode=180 reward=0.7834173 (541.85 it/sec) -training >> step=1073800, episode=180 reward=0.7604824 (496.95 it/sec) -training >> step=1073900, episode=180 reward=0.7568938 (575.26 it/sec) -training >> step=1074000, episode=180 reward=0.759806 (545.66 it/sec) -training >> step=1074100, episode=180 reward=0.7735298 (533.05 it/sec) -training >> step=1074200, episode=180 reward=0.7673533 (561.94 it/sec) -training >> step=1074300, episode=180 reward=0.7556133 (581.80 it/sec) -training >> step=1074400, episode=180 reward=0.7556233 (540.54 it/sec) -training >> step=1074500, episode=180 reward=0.7692578 (547.27 it/sec) -training >> step=1074600, episode=180 reward=0.7714496 (500.93 it/sec) -training >> step=1074700, episode=180 reward=0.7790947 (495.91 it/sec) -training >> step=1074800, episode=180 reward=0.7841898 (532.27 it/sec) -training >> step=1074900, episode=180 reward=0.7640772 (561.62 it/sec) -training >> step=1075000, episode=180 reward=0.7569507 (468.91 it/sec) -training >> step=1075100, episode=180 reward=0.7701095 (497.37 it/sec) -training >> step=1075200, episode=180 reward=0.7519009 (505.55 it/sec) -training >> step=1075300, episode=180 reward=0.7727618 (530.08 it/sec) -training >> step=1075400, episode=180 reward=0.7674066 (512.15 it/sec) -training >> step=1075500, episode=180 reward=0.7597327 (525.74 it/sec) -training >> step=1075600, episode=180 reward=0.7638241 (540.96 it/sec) -training >> step=1075700, episode=180 reward=0.774537 (545.95 it/sec) -training >> step=1075800, episode=180 reward=0.7758129 (507.71 it/sec) -training >> step=1075900, episode=180 reward=0.7767237 (540.83 it/sec) -training >> step=1076000, episode=180 reward=0.7479286 (559.59 it/sec) -training >> step=1076100, episode=180 reward=0.7711697 (500.02 it/sec) -training >> step=1076200, episode=180 reward=0.7798675 (499.88 it/sec) -training >> step=1076300, episode=180 reward=0.7844842 (491.63 it/sec) -training >> step=1076400, episode=180 reward=0.742179 (506.23 it/sec) -training >> step=1076500, episode=180 reward=0.7503456 (484.88 it/sec) -training >> step=1076600, episode=180 reward=0.7717662 (506.03 it/sec) -training >> step=1076700, episode=180 reward=0.7820159 (510.49 it/sec) -training >> step=1076800, episode=180 reward=0.7612371 (505.07 it/sec) -training >> step=1076900, episode=180 reward=0.7501318 (487.55 it/sec) -training >> step=1077000, episode=180 reward=0.7578557 (472.50 it/sec) -training >> step=1077100, episode=180 reward=0.7595105 (481.04 it/sec) -training >> step=1077200, episode=180 reward=0.7802169 (506.54 it/sec) -training >> step=1077300, episode=180 reward=0.7575715 (517.53 it/sec) -training >> step=1077400, episode=180 reward=0.7538456 (504.32 it/sec) -training >> step=1077500, episode=180 reward=0.7649639 (532.54 it/sec) -training >> step=1077600, episode=180 reward=0.7494571 (549.85 it/sec) -training >> step=1077700, episode=180 reward=0.7649045 (501.49 it/sec) -training >> step=1077800, episode=180 reward=0.7434663 (498.49 it/sec) -training >> step=1077900, episode=180 reward=0.7693644 (476.42 it/sec) -training >> step=1078000, episode=180 reward=0.7733654 (462.42 it/sec) -training >> step=1078100, episode=180 reward=0.7535344 (503.70 it/sec) -training >> step=1078200, episode=180 reward=0.7660822 (515.60 it/sec) -training >> step=1078300, episode=180 reward=0.7431991 (473.09 it/sec) -training >> step=1078400, episode=180 reward=0.7352663 (485.78 it/sec) -training >> step=1078500, episode=180 reward=0.7666018 (517.80 it/sec) -training >> step=1078600, episode=180 reward=0.7710752 (515.83 it/sec) -training >> step=1078700, episode=180 reward=0.7844861 (538.58 it/sec) -training >> step=1078800, episode=180 reward=0.7455303 (496.64 it/sec) -training >> step=1078900, episode=180 reward=0.7450475 (465.03 it/sec) -training >> step=1079000, episode=180 reward=0.7424553 (517.49 it/sec) -training >> step=1079100, episode=180 reward=0.7512693 (489.76 it/sec) -training >> step=1079200, episode=180 reward=0.7435519 (518.76 it/sec) -training >> step=1079300, episode=181 reward=0.7584242 (156.98 it/sec) -training >> step=1079400, episode=181 reward=0.7594005 (543.62 it/sec) -training >> step=1079500, episode=181 reward=0.7639031 (522.26 it/sec) -training >> step=1079600, episode=181 reward=0.7680835 (554.54 it/sec) -training >> step=1079700, episode=181 reward=0.7608952 (494.31 it/sec) -training >> step=1079800, episode=181 reward=0.7551865 (522.36 it/sec) -training >> step=1079900, episode=181 reward=0.7692802 (474.79 it/sec) -training >> step=1080000, episode=181 reward=0.7652922 (466.35 it/sec) -training >> step=1080100, episode=181 reward=0.756467 (458.02 it/sec) -training >> step=1080200, episode=181 reward=0.7841821 (378.03 it/sec) -training >> step=1080300, episode=181 reward=0.7526768 (468.45 it/sec) -training >> step=1080400, episode=181 reward=0.7834399 (411.39 it/sec) -training >> step=1080500, episode=181 reward=0.7521563 (403.66 it/sec) -training >> step=1080600, episode=181 reward=0.7644594 (382.68 it/sec) -training >> step=1080700, episode=181 reward=0.7772864 (456.06 it/sec) -training >> step=1080800, episode=181 reward=0.774497 (420.26 it/sec) -training >> step=1080900, episode=181 reward=0.7574191 (424.64 it/sec) -training >> step=1081000, episode=181 reward=0.7628658 (501.46 it/sec) -training >> step=1081100, episode=181 reward=0.7478495 (509.86 it/sec) -training >> step=1081200, episode=181 reward=0.7614208 (519.58 it/sec) -training >> step=1081300, episode=181 reward=0.7567506 (497.65 it/sec) -training >> step=1081400, episode=181 reward=0.7428417 (557.00 it/sec) -training >> step=1081500, episode=181 reward=0.7673684 (488.29 it/sec) -training >> step=1081600, episode=181 reward=0.7728777 (556.28 it/sec) -training >> step=1081700, episode=181 reward=0.7682259 (507.60 it/sec) -training >> step=1081800, episode=181 reward=0.7490082 (516.86 it/sec) -training >> step=1081900, episode=181 reward=0.7703648 (507.18 it/sec) -training >> step=1082000, episode=181 reward=0.7815316 (524.62 it/sec) -training >> step=1082100, episode=181 reward=0.7719283 (498.07 it/sec) -training >> step=1082200, episode=181 reward=0.749836 (515.36 it/sec) -training >> step=1082300, episode=181 reward=0.7541202 (488.82 it/sec) -training >> step=1082400, episode=181 reward=0.768443 (530.40 it/sec) -training >> step=1082500, episode=181 reward=0.7845521 (478.54 it/sec) -training >> step=1082600, episode=181 reward=0.7656167 (513.70 it/sec) -training >> step=1082700, episode=181 reward=0.7675573 (579.96 it/sec) -training >> step=1082800, episode=181 reward=0.7621985 (515.02 it/sec) -training >> step=1082900, episode=181 reward=0.7588502 (521.68 it/sec) -training >> step=1083000, episode=181 reward=0.7471182 (515.31 it/sec) -training >> step=1083100, episode=181 reward=0.7750881 (542.83 it/sec) -training >> step=1083200, episode=181 reward=0.7678744 (509.69 it/sec) -training >> step=1083300, episode=181 reward=0.7649497 (475.19 it/sec) -training >> step=1083400, episode=181 reward=0.7617443 (550.19 it/sec) -training >> step=1083500, episode=181 reward=0.7647301 (546.60 it/sec) -training >> step=1083600, episode=181 reward=0.7541005 (481.70 it/sec) -training >> step=1083700, episode=181 reward=0.7271433 (510.21 it/sec) -training >> step=1083800, episode=181 reward=0.7632884 (506.65 it/sec) -training >> step=1083900, episode=181 reward=0.7683293 (514.69 it/sec) -training >> step=1084000, episode=181 reward=0.7454797 (500.51 it/sec) -training >> step=1084100, episode=181 reward=0.7462978 (498.56 it/sec) -training >> step=1084200, episode=181 reward=0.7290023 (474.45 it/sec) -training >> step=1084300, episode=181 reward=0.751818 (449.47 it/sec) -training >> step=1084400, episode=181 reward=0.7425532 (543.85 it/sec) -training >> step=1084500, episode=181 reward=0.7521393 (533.21 it/sec) -training >> step=1084600, episode=181 reward=0.7376246 (545.33 it/sec) -training >> step=1084700, episode=181 reward=0.7521727 (503.14 it/sec) -training >> step=1084800, episode=181 reward=0.7687387 (520.50 it/sec) -training >> step=1084900, episode=181 reward=0.7394841 (494.43 it/sec) -training >> step=1085000, episode=181 reward=0.7605351 (458.08 it/sec) -training >> step=1085100, episode=181 reward=0.7491795 (491.73 it/sec) -training >> step=1085200, episode=181 reward=0.7346464 (516.91 it/sec) -training >> step=1085300, episode=182 reward=0.7652565 (161.33 it/sec) -training >> step=1085400, episode=182 reward=0.7393011 (515.24 it/sec) -training >> step=1085500, episode=182 reward=0.7535259 (500.39 it/sec) -training >> step=1085600, episode=182 reward=0.741038 (523.90 it/sec) -training >> step=1085700, episode=182 reward=0.762822 (499.97 it/sec) -training >> step=1085800, episode=182 reward=0.7550269 (515.28 it/sec) -training >> step=1085900, episode=182 reward=0.7753676 (517.64 it/sec) -training >> step=1086000, episode=182 reward=0.7614794 (540.52 it/sec) -training >> step=1086100, episode=182 reward=0.7738141 (512.92 it/sec) -training >> step=1086200, episode=182 reward=0.774473 (480.50 it/sec) -training >> step=1086300, episode=182 reward=0.7578058 (524.69 it/sec) -training >> step=1086400, episode=182 reward=0.7691016 (511.61 it/sec) -training >> step=1086500, episode=182 reward=0.7608883 (494.99 it/sec) -training >> step=1086600, episode=182 reward=0.7649915 (500.62 it/sec) -training >> step=1086700, episode=182 reward=0.7611057 (475.50 it/sec) -training >> step=1086800, episode=182 reward=0.7624355 (537.27 it/sec) -training >> step=1086900, episode=182 reward=0.7488442 (488.98 it/sec) -training >> step=1087000, episode=182 reward=0.7688097 (469.32 it/sec) -training >> step=1087100, episode=182 reward=0.7435275 (505.54 it/sec) -training >> step=1087200, episode=182 reward=0.7326821 (483.97 it/sec) -training >> step=1087300, episode=182 reward=0.7472759 (487.95 it/sec) -training >> step=1087400, episode=182 reward=0.7625694 (496.87 it/sec) -training >> step=1087500, episode=182 reward=0.7486663 (496.42 it/sec) -training >> step=1087600, episode=182 reward=0.7549788 (521.18 it/sec) -training >> step=1087700, episode=182 reward=0.7567831 (501.65 it/sec) -training >> step=1087800, episode=182 reward=0.7665078 (529.76 it/sec) -training >> step=1087900, episode=182 reward=0.76318 (532.54 it/sec) -training >> step=1088000, episode=182 reward=0.7524454 (541.13 it/sec) -training >> step=1088100, episode=182 reward=0.7702813 (514.06 it/sec) -training >> step=1088200, episode=182 reward=0.7551626 (494.12 it/sec) -training >> step=1088300, episode=182 reward=0.7511567 (528.24 it/sec) -training >> step=1088400, episode=182 reward=0.7737955 (513.85 it/sec) -training >> step=1088500, episode=182 reward=0.7587825 (508.25 it/sec) -training >> step=1088600, episode=182 reward=0.7788659 (533.60 it/sec) -training >> step=1088700, episode=182 reward=0.7494239 (512.19 it/sec) -training >> step=1088800, episode=182 reward=0.7688247 (533.81 it/sec) -training >> step=1088900, episode=182 reward=0.73452 (489.18 it/sec) -training >> step=1089000, episode=182 reward=0.7663552 (521.46 it/sec) -training >> step=1089100, episode=182 reward=0.7299891 (493.22 it/sec) -training >> step=1089200, episode=182 reward=0.7766949 (478.74 it/sec) -training >> step=1089300, episode=182 reward=0.7591183 (489.61 it/sec) -training >> step=1089400, episode=182 reward=0.7395579 (526.00 it/sec) -training >> step=1089500, episode=182 reward=0.7655461 (502.43 it/sec) -training >> step=1089600, episode=182 reward=0.7696791 (518.78 it/sec) -training >> step=1089700, episode=182 reward=0.7550856 (491.52 it/sec) -training >> step=1089800, episode=182 reward=0.7575448 (538.46 it/sec) -training >> step=1089900, episode=182 reward=0.7517723 (560.22 it/sec) -training >> step=1090000, episode=182 reward=0.7620719 (500.70 it/sec) -training >> step=1090100, episode=182 reward=0.7869825 (493.00 it/sec) -training >> step=1090200, episode=182 reward=0.747524 (527.56 it/sec) -training >> step=1090300, episode=182 reward=0.7382079 (509.59 it/sec) -training >> step=1090400, episode=182 reward=0.765237 (539.58 it/sec) -training >> step=1090500, episode=182 reward=0.743829 (544.45 it/sec) -training >> step=1090600, episode=182 reward=0.759032 (565.12 it/sec) -training >> step=1090700, episode=182 reward=0.7537237 (520.11 it/sec) -training >> step=1090800, episode=182 reward=0.7496175 (516.05 it/sec) -training >> step=1090900, episode=182 reward=0.7635499 (502.30 it/sec) -training >> step=1091000, episode=182 reward=0.7257777 (557.38 it/sec) -training >> step=1091100, episode=182 reward=0.7560087 (549.48 it/sec) -training >> step=1091200, episode=182 reward=0.7509034 (513.23 it/sec) -training >> step=1091300, episode=183 reward=0.7562872 (174.48 it/sec) -training >> step=1091400, episode=183 reward=0.7699522 (501.75 it/sec) -training >> step=1091500, episode=183 reward=0.7603008 (518.19 it/sec) -training >> step=1091600, episode=183 reward=0.7424629 (549.90 it/sec) -training >> step=1091700, episode=183 reward=0.7638229 (533.53 it/sec) -training >> step=1091800, episode=183 reward=0.7739273 (521.33 it/sec) -training >> step=1091900, episode=183 reward=0.7611151 (517.64 it/sec) -training >> step=1092000, episode=183 reward=0.7470841 (535.03 it/sec) -training >> step=1092100, episode=183 reward=0.7543517 (519.77 it/sec) -training >> step=1092200, episode=183 reward=0.780602 (484.36 it/sec) -training >> step=1092300, episode=183 reward=0.7736591 (489.85 it/sec) -training >> step=1092400, episode=183 reward=0.7711204 (538.15 it/sec) -training >> step=1092500, episode=183 reward=0.7479326 (570.76 it/sec) -training >> step=1092600, episode=183 reward=0.7746726 (521.42 it/sec) -training >> step=1092700, episode=183 reward=0.7736371 (484.29 it/sec) -training >> step=1092800, episode=183 reward=0.7473657 (519.57 it/sec) -training >> step=1092900, episode=183 reward=0.7528794 (513.41 it/sec) -training >> step=1093000, episode=183 reward=0.7613345 (533.17 it/sec) -training >> step=1093100, episode=183 reward=0.7649651 (533.30 it/sec) -training >> step=1093200, episode=183 reward=0.7448773 (511.77 it/sec) -training >> step=1093300, episode=183 reward=0.7771007 (526.84 it/sec) -training >> step=1093400, episode=183 reward=0.7535039 (532.58 it/sec) -training >> step=1093500, episode=183 reward=0.7455236 (521.82 it/sec) -training >> step=1093600, episode=183 reward=0.7404135 (527.49 it/sec) -training >> step=1093700, episode=183 reward=0.7383682 (546.01 it/sec) -training >> step=1093800, episode=183 reward=0.7666084 (506.00 it/sec) -training >> step=1093900, episode=183 reward=0.7695936 (503.18 it/sec) -training >> step=1094000, episode=183 reward=0.7511062 (520.29 it/sec) -training >> step=1094100, episode=183 reward=0.7544667 (537.13 it/sec) -training >> step=1094200, episode=183 reward=0.7670123 (489.63 it/sec) -training >> step=1094300, episode=183 reward=0.7471276 (507.49 it/sec) -training >> step=1094400, episode=183 reward=0.7703488 (529.38 it/sec) -training >> step=1094500, episode=183 reward=0.743566 (522.73 it/sec) -training >> step=1094600, episode=183 reward=0.7481719 (513.93 it/sec) -training >> step=1094700, episode=183 reward=0.7678411 (508.20 it/sec) -training >> step=1094800, episode=183 reward=0.7547483 (489.90 it/sec) -training >> step=1094900, episode=183 reward=0.7827016 (483.22 it/sec) -training >> step=1095000, episode=183 reward=0.7947242 (527.70 it/sec) -training >> step=1095100, episode=183 reward=0.7723986 (522.35 it/sec) -training >> step=1095200, episode=183 reward=0.7654887 (468.58 it/sec) -training >> step=1095300, episode=183 reward=0.7772511 (429.52 it/sec) -training >> step=1095400, episode=183 reward=0.7686254 (435.34 it/sec) -training >> step=1095500, episode=183 reward=0.7631871 (451.22 it/sec) -training >> step=1095600, episode=183 reward=0.7586293 (471.48 it/sec) -training >> step=1095700, episode=183 reward=0.7568291 (454.27 it/sec) -training >> step=1095800, episode=183 reward=0.7653045 (446.95 it/sec) -training >> step=1095900, episode=183 reward=0.7644029 (537.34 it/sec) -training >> step=1096000, episode=183 reward=0.7409239 (529.85 it/sec) -training >> step=1096100, episode=183 reward=0.7543632 (497.52 it/sec) -training >> step=1096200, episode=183 reward=0.7588938 (508.78 it/sec) -training >> step=1096300, episode=183 reward=0.7547509 (533.58 it/sec) -training >> step=1096400, episode=183 reward=0.7704506 (504.41 it/sec) -training >> step=1096500, episode=183 reward=0.7579584 (526.21 it/sec) -training >> step=1096600, episode=183 reward=0.7663264 (546.97 it/sec) -training >> step=1096700, episode=183 reward=0.7748529 (495.34 it/sec) -training >> step=1096800, episode=183 reward=0.7474431 (542.59 it/sec) -training >> step=1096900, episode=183 reward=0.7313663 (519.93 it/sec) -training >> step=1097000, episode=183 reward=0.764475 (539.71 it/sec) -training >> step=1097100, episode=183 reward=0.7511147 (518.37 it/sec) -training >> step=1097200, episode=183 reward=0.7758542 (486.77 it/sec) -training >> step=1097300, episode=184 reward=0.769138 (182.94 it/sec) -training >> step=1097400, episode=184 reward=0.7580281 (506.42 it/sec) -training >> step=1097500, episode=184 reward=0.7696063 (519.63 it/sec) -training >> step=1097600, episode=184 reward=0.772979 (481.87 it/sec) -training >> step=1097700, episode=184 reward=0.7791593 (466.77 it/sec) -training >> step=1097800, episode=184 reward=0.7615464 (507.37 it/sec) -training >> step=1097900, episode=184 reward=0.7370382 (530.48 it/sec) -training >> step=1098000, episode=184 reward=0.7552145 (532.72 it/sec) -training >> step=1098100, episode=184 reward=0.7446257 (476.62 it/sec) -training >> step=1098200, episode=184 reward=0.7784678 (549.98 it/sec) -training >> step=1098300, episode=184 reward=0.777536 (549.84 it/sec) -training >> step=1098400, episode=184 reward=0.7573319 (559.27 it/sec) -training >> step=1098500, episode=184 reward=0.7833428 (522.86 it/sec) -training >> step=1098600, episode=184 reward=0.7592096 (444.32 it/sec) -training >> step=1098700, episode=184 reward=0.7613494 (502.37 it/sec) -training >> step=1098800, episode=184 reward=0.7474356 (546.13 it/sec) -training >> step=1098900, episode=184 reward=0.7622809 (559.85 it/sec) -training >> step=1099000, episode=184 reward=0.7532158 (545.88 it/sec) -training >> step=1099100, episode=184 reward=0.7626226 (480.64 it/sec) -training >> step=1099200, episode=184 reward=0.7647786 (506.81 it/sec) -training >> step=1099300, episode=184 reward=0.7693172 (438.33 it/sec) -training >> step=1099400, episode=184 reward=0.7562312 (483.78 it/sec) -training >> step=1099500, episode=184 reward=0.7607414 (501.01 it/sec) -training >> step=1099600, episode=184 reward=0.7686262 (410.28 it/sec) -training >> step=1099700, episode=184 reward=0.7634842 (493.19 it/sec) -training >> step=1099800, episode=184 reward=0.7676401 (515.07 it/sec) -training >> step=1099900, episode=184 reward=0.7750931 (499.83 it/sec) -training >> step=1100000, episode=184 reward=0.787194 (523.91 it/sec) -training >> step=1100100, episode=184 reward=0.7568498 (475.75 it/sec) -training >> step=1100200, episode=184 reward=0.7596908 (531.60 it/sec) -training >> step=1100300, episode=184 reward=0.7564605 (483.07 it/sec) -training >> step=1100400, episode=184 reward=0.7614354 (493.42 it/sec) -training >> step=1100500, episode=184 reward=0.7613618 (519.05 it/sec) -training >> step=1100600, episode=184 reward=0.7721814 (496.59 it/sec) -training >> step=1100700, episode=184 reward=0.7641592 (531.82 it/sec) -training >> step=1100800, episode=184 reward=0.7542694 (524.38 it/sec) -training >> step=1100900, episode=184 reward=0.7567969 (498.70 it/sec) -training >> step=1101000, episode=184 reward=0.7593782 (482.36 it/sec) -training >> step=1101100, episode=184 reward=0.7768059 (517.10 it/sec) -training >> step=1101200, episode=184 reward=0.7783687 (539.04 it/sec) -training >> step=1101300, episode=184 reward=0.7763098 (530.62 it/sec) -training >> step=1101400, episode=184 reward=0.7502039 (523.31 it/sec) -training >> step=1101500, episode=184 reward=0.7728457 (546.59 it/sec) -training >> step=1101600, episode=184 reward=0.7603983 (549.74 it/sec) -training >> step=1101700, episode=184 reward=0.7610152 (506.37 it/sec) -training >> step=1101800, episode=184 reward=0.7357642 (528.86 it/sec) -training >> step=1101900, episode=184 reward=0.7503819 (497.09 it/sec) -training >> step=1102000, episode=184 reward=0.7634056 (501.85 it/sec) -training >> step=1102100, episode=184 reward=0.7784222 (497.27 it/sec) -training >> step=1102200, episode=184 reward=0.7480924 (479.18 it/sec) -training >> step=1102300, episode=184 reward=0.7542908 (472.35 it/sec) -training >> step=1102400, episode=184 reward=0.7477275 (539.84 it/sec) -training >> step=1102500, episode=184 reward=0.7511905 (492.76 it/sec) -training >> step=1102600, episode=184 reward=0.7524431 (489.69 it/sec) -training >> step=1102700, episode=184 reward=0.7855151 (511.51 it/sec) -training >> step=1102800, episode=184 reward=0.7252265 (553.98 it/sec) -training >> step=1102900, episode=184 reward=0.7564907 (507.70 it/sec) -training >> step=1103000, episode=184 reward=0.7782153 (518.00 it/sec) -training >> step=1103100, episode=184 reward=0.7749541 (526.00 it/sec) -training >> step=1103200, episode=184 reward=0.736415 (531.15 it/sec) -training >> step=1103300, episode=185 reward=0.7639027 (92.50 it/sec) -training >> step=1103400, episode=185 reward=0.7748278 (496.79 it/sec) -training >> step=1103500, episode=185 reward=0.7625031 (458.41 it/sec) -training >> step=1103600, episode=185 reward=0.7682197 (484.83 it/sec) -training >> step=1103700, episode=185 reward=0.7421247 (517.46 it/sec) -training >> step=1103800, episode=185 reward=0.7320064 (527.57 it/sec) -training >> step=1103900, episode=185 reward=0.7688025 (507.87 it/sec) -training >> step=1104000, episode=185 reward=0.7618669 (493.16 it/sec) -training >> step=1104100, episode=185 reward=0.7662669 (508.20 it/sec) -training >> step=1104200, episode=185 reward=0.7707178 (489.18 it/sec) -training >> step=1104300, episode=185 reward=0.7694556 (490.83 it/sec) -training >> step=1104400, episode=185 reward=0.7453291 (507.82 it/sec) -training >> step=1104500, episode=185 reward=0.7778838 (508.44 it/sec) -training >> step=1104600, episode=185 reward=0.7510949 (524.12 it/sec) -training >> step=1104700, episode=185 reward=0.7754394 (488.05 it/sec) -training >> step=1104800, episode=185 reward=0.7578779 (527.72 it/sec) -training >> step=1104900, episode=185 reward=0.7766428 (504.75 it/sec) -training >> step=1105000, episode=185 reward=0.7724353 (507.47 it/sec) -training >> step=1105100, episode=185 reward=0.7628067 (511.37 it/sec) -training >> step=1105200, episode=185 reward=0.7375935 (506.44 it/sec) -training >> step=1105300, episode=185 reward=0.7648705 (517.93 it/sec) -training >> step=1105400, episode=185 reward=0.770772 (501.51 it/sec) -training >> step=1105500, episode=185 reward=0.7571219 (514.73 it/sec) -training >> step=1105600, episode=185 reward=0.7874396 (536.20 it/sec) -training >> step=1105700, episode=185 reward=0.7601531 (508.60 it/sec) -training >> step=1105800, episode=185 reward=0.7651561 (468.30 it/sec) -training >> step=1105900, episode=185 reward=0.7752187 (480.65 it/sec) -training >> step=1106000, episode=185 reward=0.767962 (554.79 it/sec) -training >> step=1106100, episode=185 reward=0.7677709 (503.68 it/sec) -training >> step=1106200, episode=185 reward=0.7789736 (510.19 it/sec) -training >> step=1106300, episode=185 reward=0.7467558 (463.39 it/sec) -training >> step=1106400, episode=185 reward=0.7550827 (520.44 it/sec) -training >> step=1106500, episode=185 reward=0.7850211 (444.25 it/sec) -training >> step=1106600, episode=185 reward=0.7870789 (508.59 it/sec) -training >> step=1106700, episode=185 reward=0.7642312 (532.07 it/sec) -training >> step=1106800, episode=185 reward=0.7653515 (488.59 it/sec) -training >> step=1106900, episode=185 reward=0.7649297 (481.76 it/sec) -training >> step=1107000, episode=185 reward=0.7620399 (494.57 it/sec) -training >> step=1107100, episode=185 reward=0.7506233 (530.08 it/sec) -training >> step=1107200, episode=185 reward=0.7643233 (506.01 it/sec) -training >> step=1107300, episode=185 reward=0.7483454 (530.61 it/sec) -training >> step=1107400, episode=185 reward=0.7684261 (461.09 it/sec) -training >> step=1107500, episode=185 reward=0.7524067 (534.95 it/sec) -training >> step=1107600, episode=185 reward=0.7775329 (518.92 it/sec) -training >> step=1107700, episode=185 reward=0.7753102 (525.34 it/sec) -training >> step=1107800, episode=185 reward=0.7729673 (508.49 it/sec) -training >> step=1107900, episode=185 reward=0.7713254 (489.96 it/sec) -training >> step=1108000, episode=185 reward=0.7456722 (500.02 it/sec) -training >> step=1108100, episode=185 reward=0.7445394 (534.13 it/sec) -training >> step=1108200, episode=185 reward=0.7573467 (519.52 it/sec) -training >> step=1108300, episode=185 reward=0.7665116 (525.37 it/sec) -training >> step=1108400, episode=185 reward=0.7512388 (499.67 it/sec) -training >> step=1108500, episode=185 reward=0.7679624 (533.02 it/sec) -training >> step=1108600, episode=185 reward=0.7558272 (486.16 it/sec) -training >> step=1108700, episode=185 reward=0.764219 (531.02 it/sec) -training >> step=1108800, episode=185 reward=0.7461926 (512.16 it/sec) -training >> step=1108900, episode=185 reward=0.7533172 (511.34 it/sec) -training >> step=1109000, episode=185 reward=0.7563215 (541.34 it/sec) -training >> step=1109100, episode=185 reward=0.7536438 (463.94 it/sec) -training >> step=1109200, episode=185 reward=0.7692529 (524.85 it/sec) -training >> step=1109300, episode=186 reward=0.7541108 (111.82 it/sec) -training >> step=1109400, episode=186 reward=0.7636561 (471.68 it/sec) -training >> step=1109500, episode=186 reward=0.7536338 (536.01 it/sec) -training >> step=1109600, episode=186 reward=0.7565516 (475.17 it/sec) -training >> step=1109700, episode=186 reward=0.7468353 (415.79 it/sec) -training >> step=1109800, episode=186 reward=0.7532256 (510.28 it/sec) -training >> step=1109900, episode=186 reward=0.75519 (490.71 it/sec) -training >> step=1110000, episode=186 reward=0.7759944 (518.75 it/sec) -training >> step=1110100, episode=186 reward=0.7472759 (526.38 it/sec) -training >> step=1110200, episode=186 reward=0.7618479 (508.65 it/sec) -training >> step=1110300, episode=186 reward=0.7645205 (487.37 it/sec) -training >> step=1110400, episode=186 reward=0.7726391 (510.36 it/sec) -training >> step=1110500, episode=186 reward=0.7881162 (483.00 it/sec) -training >> step=1110600, episode=186 reward=0.7675909 (515.60 it/sec) -training >> step=1110700, episode=186 reward=0.7456593 (550.42 it/sec) -training >> step=1110800, episode=186 reward=0.7721553 (493.28 it/sec) -training >> step=1110900, episode=186 reward=0.7689642 (490.56 it/sec) -training >> step=1111000, episode=186 reward=0.7683283 (481.23 it/sec) -training >> step=1111100, episode=186 reward=0.7685664 (516.28 it/sec) -training >> step=1111200, episode=186 reward=0.7732289 (502.97 it/sec) -training >> step=1111300, episode=186 reward=0.7600574 (481.47 it/sec) -training >> step=1111400, episode=186 reward=0.7633514 (511.18 it/sec) -training >> step=1111500, episode=186 reward=0.7455973 (503.87 it/sec) -training >> step=1111600, episode=186 reward=0.7576765 (493.32 it/sec) -training >> step=1111700, episode=186 reward=0.7296581 (529.44 it/sec) -training >> step=1111800, episode=186 reward=0.7616947 (539.38 it/sec) -training >> step=1111900, episode=186 reward=0.7841127 (477.86 it/sec) -training >> step=1112000, episode=186 reward=0.7707246 (412.52 it/sec) -training >> step=1112100, episode=186 reward=0.778887 (463.66 it/sec) -training >> step=1112200, episode=186 reward=0.7709588 (435.16 it/sec) -training >> step=1112300, episode=186 reward=0.7978027 (437.07 it/sec) -training >> step=1112400, episode=186 reward=0.7683957 (455.62 it/sec) -training >> step=1112500, episode=186 reward=0.7754879 (511.64 it/sec) -training >> step=1112600, episode=186 reward=0.7363399 (503.56 it/sec) -training >> step=1112700, episode=186 reward=0.731958 (503.01 it/sec) -training >> step=1112800, episode=186 reward=0.7770323 (452.07 it/sec) -training >> step=1112900, episode=186 reward=0.759274 (402.16 it/sec) -training >> step=1113000, episode=186 reward=0.7689264 (494.57 it/sec) -training >> step=1113100, episode=186 reward=0.7438593 (506.61 it/sec) -training >> step=1113200, episode=186 reward=0.7632105 (519.37 it/sec) -training >> step=1113300, episode=186 reward=0.7898396 (503.15 it/sec) -training >> step=1113400, episode=186 reward=0.7674959 (512.52 it/sec) -training >> step=1113500, episode=186 reward=0.7734792 (498.77 it/sec) -training >> step=1113600, episode=186 reward=0.7530867 (542.15 it/sec) -training >> step=1113700, episode=186 reward=0.7592289 (504.91 it/sec) -training >> step=1113800, episode=186 reward=0.7576339 (473.26 it/sec) -training >> step=1113900, episode=186 reward=0.7580681 (491.68 it/sec) -training >> step=1114000, episode=186 reward=0.7559406 (492.24 it/sec) -training >> step=1114100, episode=186 reward=0.7285613 (521.06 it/sec) -training >> step=1114200, episode=186 reward=0.7603967 (525.88 it/sec) -training >> step=1114300, episode=186 reward=0.770663 (494.94 it/sec) -training >> step=1114400, episode=186 reward=0.7852192 (519.63 it/sec) -training >> step=1114500, episode=186 reward=0.7502609 (544.18 it/sec) -training >> step=1114600, episode=186 reward=0.7538999 (540.97 it/sec) -training >> step=1114700, episode=186 reward=0.7504876 (544.55 it/sec) -training >> step=1114800, episode=186 reward=0.7542709 (527.21 it/sec) -training >> step=1114900, episode=186 reward=0.7193491 (527.49 it/sec) -training >> step=1115000, episode=186 reward=0.7495818 (529.20 it/sec) -training >> step=1115100, episode=186 reward=0.7423984 (479.27 it/sec) -training >> step=1115200, episode=186 reward=0.7703323 (506.10 it/sec) -training >> step=1115300, episode=187 reward=0.7522861 (117.71 it/sec) -training >> step=1115400, episode=187 reward=0.7636523 (507.00 it/sec) -training >> step=1115500, episode=187 reward=0.7754603 (489.08 it/sec) -training >> step=1115600, episode=187 reward=0.7809306 (504.24 it/sec) -training >> step=1115700, episode=187 reward=0.7452211 (497.78 it/sec) -training >> step=1115800, episode=187 reward=0.7576166 (522.41 it/sec) -training >> step=1115900, episode=187 reward=0.7665688 (494.52 it/sec) -training >> step=1116000, episode=187 reward=0.7493412 (505.48 it/sec) -training >> step=1116100, episode=187 reward=0.7546596 (526.74 it/sec) -training >> step=1116200, episode=187 reward=0.7642167 (521.74 it/sec) -training >> step=1116300, episode=187 reward=0.7784105 (512.02 it/sec) -training >> step=1116400, episode=187 reward=0.7640283 (510.00 it/sec) -training >> step=1116500, episode=187 reward=0.7692292 (501.33 it/sec) -training >> step=1116600, episode=187 reward=0.7547055 (464.60 it/sec) -training >> step=1116700, episode=187 reward=0.7647086 (467.22 it/sec) -training >> step=1116800, episode=187 reward=0.7558495 (463.07 it/sec) -training >> step=1116900, episode=187 reward=0.7673388 (466.08 it/sec) -training >> step=1117000, episode=187 reward=0.7654461 (459.81 it/sec) -training >> step=1117100, episode=187 reward=0.7410184 (461.75 it/sec) -training >> step=1117200, episode=187 reward=0.7660701 (443.85 it/sec) -training >> step=1117300, episode=187 reward=0.7648236 (380.43 it/sec) -training >> step=1117400, episode=187 reward=0.7515168 (458.12 it/sec) -training >> step=1117500, episode=187 reward=0.7508998 (472.72 it/sec) -training >> step=1117600, episode=187 reward=0.764604 (462.14 it/sec) -training >> step=1117700, episode=187 reward=0.7693389 (418.67 it/sec) -training >> step=1117800, episode=187 reward=0.7686632 (434.03 it/sec) -training >> step=1117900, episode=187 reward=0.7486406 (447.90 it/sec) -training >> step=1118000, episode=187 reward=0.7711799 (392.47 it/sec) -training >> step=1118100, episode=187 reward=0.7691521 (390.26 it/sec) -training >> step=1118200, episode=187 reward=0.7423635 (423.53 it/sec) -training >> step=1118300, episode=187 reward=0.7618256 (433.72 it/sec) -training >> step=1118400, episode=187 reward=0.7602102 (467.40 it/sec) -training >> step=1118500, episode=187 reward=0.7788925 (443.29 it/sec) -training >> step=1118600, episode=187 reward=0.750056 (453.28 it/sec) -training >> step=1118700, episode=187 reward=0.7359266 (467.95 it/sec) -training >> step=1118800, episode=187 reward=0.7522882 (478.21 it/sec) -training >> step=1118900, episode=187 reward=0.7594127 (424.83 it/sec) -training >> step=1119000, episode=187 reward=0.7678151 (444.79 it/sec) -training >> step=1119100, episode=187 reward=0.76293 (490.53 it/sec) -training >> step=1119200, episode=187 reward=0.7533152 (420.72 it/sec) -training >> step=1119300, episode=187 reward=0.7695474 (375.28 it/sec) -training >> step=1119400, episode=187 reward=0.7565464 (437.25 it/sec) -training >> step=1119500, episode=187 reward=0.7369453 (418.27 it/sec) -training >> step=1119600, episode=187 reward=0.7644468 (421.87 it/sec) -training >> step=1119700, episode=187 reward=0.7660834 (425.01 it/sec) -training >> step=1119800, episode=187 reward=0.7556135 (456.27 it/sec) -training >> step=1119900, episode=187 reward=0.7582268 (470.94 it/sec) -training >> step=1120000, episode=187 reward=0.7401972 (440.41 it/sec) -training >> step=1120100, episode=187 reward=0.7513917 (444.56 it/sec) -training >> step=1120200, episode=187 reward=0.7640159 (487.78 it/sec) -training >> step=1120300, episode=187 reward=0.753171 (461.46 it/sec) -training >> step=1120400, episode=187 reward=0.7723603 (453.24 it/sec) -training >> step=1120500, episode=187 reward=0.7678362 (448.88 it/sec) -training >> step=1120600, episode=187 reward=0.7593563 (447.53 it/sec) -training >> step=1120700, episode=187 reward=0.7438019 (428.14 it/sec) -training >> step=1120800, episode=187 reward=0.7628002 (432.71 it/sec) -training >> step=1120900, episode=187 reward=0.7769945 (466.22 it/sec) -training >> step=1121000, episode=187 reward=0.7685106 (442.94 it/sec) -training >> step=1121100, episode=187 reward=0.7628554 (491.29 it/sec) -training >> step=1121200, episode=187 reward=0.7469561 (471.39 it/sec) -training >> step=1121300, episode=188 reward=0.765907 (163.36 it/sec) -training >> step=1121400, episode=188 reward=0.7571687 (515.63 it/sec) -training >> step=1121500, episode=188 reward=0.7470833 (518.31 it/sec) -training >> step=1121600, episode=188 reward=0.760391 (401.71 it/sec) -training >> step=1121700, episode=188 reward=0.7626994 (502.93 it/sec) -training >> step=1121800, episode=188 reward=0.7555275 (523.14 it/sec) -training >> step=1121900, episode=188 reward=0.7425599 (479.28 it/sec) -training >> step=1122000, episode=188 reward=0.7581853 (447.72 it/sec) -training >> step=1122100, episode=188 reward=0.7588807 (473.21 it/sec) -training >> step=1122200, episode=188 reward=0.7658668 (486.71 it/sec) -training >> step=1122300, episode=188 reward=0.7750713 (514.66 it/sec) -training >> step=1122400, episode=188 reward=0.7589424 (513.65 it/sec) -training >> step=1122500, episode=188 reward=0.7529763 (468.39 it/sec) -training >> step=1122600, episode=188 reward=0.7653034 (494.20 it/sec) -training >> step=1122700, episode=188 reward=0.762432 (478.83 it/sec) -training >> step=1122800, episode=188 reward=0.7629108 (488.77 it/sec) -training >> step=1122900, episode=188 reward=0.7657121 (413.21 it/sec) -training >> step=1123000, episode=188 reward=0.7637663 (438.49 it/sec) -training >> step=1123100, episode=188 reward=0.7622526 (429.89 it/sec) -training >> step=1123200, episode=188 reward=0.7515541 (476.66 it/sec) -training >> step=1123300, episode=188 reward=0.7399654 (470.23 it/sec) -training >> step=1123400, episode=188 reward=0.7644902 (465.69 it/sec) -training >> step=1123500, episode=188 reward=0.7520582 (494.39 it/sec) -training >> step=1123600, episode=188 reward=0.7678194 (477.38 it/sec) -training >> step=1123700, episode=188 reward=0.7765142 (455.25 it/sec) -training >> step=1123800, episode=188 reward=0.7461575 (437.54 it/sec) -training >> step=1123900, episode=188 reward=0.7399338 (460.12 it/sec) -training >> step=1124000, episode=188 reward=0.7758085 (466.15 it/sec) -training >> step=1124100, episode=188 reward=0.7754157 (475.49 it/sec) -training >> step=1124200, episode=188 reward=0.7677833 (422.80 it/sec) -training >> step=1124300, episode=188 reward=0.7623895 (424.99 it/sec) -training >> step=1124400, episode=188 reward=0.7545334 (462.36 it/sec) -training >> step=1124500, episode=188 reward=0.7573822 (445.03 it/sec) -training >> step=1124600, episode=188 reward=0.7353904 (461.82 it/sec) -training >> step=1124700, episode=188 reward=0.7655356 (486.12 it/sec) -training >> step=1124800, episode=188 reward=0.7571983 (447.17 it/sec) -training >> step=1124900, episode=188 reward=0.7573397 (479.47 it/sec) -training >> step=1125000, episode=188 reward=0.7287436 (494.13 it/sec) -training >> step=1125100, episode=188 reward=0.7565688 (525.90 it/sec) -training >> step=1125200, episode=188 reward=0.7615537 (467.34 it/sec) -training >> step=1125300, episode=188 reward=0.7560892 (470.76 it/sec) -training >> step=1125400, episode=188 reward=0.7804826 (438.99 it/sec) -training >> step=1125500, episode=188 reward=0.7681709 (420.94 it/sec) -training >> step=1125600, episode=188 reward=0.76052 (458.34 it/sec) -training >> step=1125700, episode=188 reward=0.7310708 (414.32 it/sec) -training >> step=1125800, episode=188 reward=0.765321 (454.60 it/sec) -training >> step=1125900, episode=188 reward=0.7742403 (476.69 it/sec) -training >> step=1126000, episode=188 reward=0.7601613 (466.78 it/sec) -training >> step=1126100, episode=188 reward=0.7725363 (495.59 it/sec) -training >> step=1126200, episode=188 reward=0.7519673 (452.57 it/sec) -training >> step=1126300, episode=188 reward=0.784155 (469.07 it/sec) -training >> step=1126400, episode=188 reward=0.7396914 (436.63 it/sec) -training >> step=1126500, episode=188 reward=0.7501072 (394.83 it/sec) -training >> step=1126600, episode=188 reward=0.7534376 (427.01 it/sec) -training >> step=1126700, episode=188 reward=0.7564023 (453.65 it/sec) -training >> step=1126800, episode=188 reward=0.7224875 (456.37 it/sec) -training >> step=1126900, episode=188 reward=0.737649 (445.86 it/sec) -training >> step=1127000, episode=188 reward=0.7639307 (398.52 it/sec) -training >> step=1127100, episode=188 reward=0.7569126 (425.36 it/sec) -training >> step=1127200, episode=188 reward=0.7632356 (431.84 it/sec) -training >> step=1127300, episode=189 reward=0.798335 (164.83 it/sec) -training >> step=1127400, episode=189 reward=0.7484903 (324.74 it/sec) -training >> step=1127500, episode=189 reward=0.7501368 (302.73 it/sec) -training >> step=1127600, episode=189 reward=0.7662092 (308.42 it/sec) -training >> step=1127700, episode=189 reward=0.7555095 (320.99 it/sec) -training >> step=1127800, episode=189 reward=0.7484007 (291.94 it/sec) -training >> step=1127900, episode=189 reward=0.7360781 (441.50 it/sec) -training >> step=1128000, episode=189 reward=0.7614192 (437.03 it/sec) -training >> step=1128100, episode=189 reward=0.7683339 (356.91 it/sec) -training >> step=1128200, episode=189 reward=0.7567303 (357.16 it/sec) -training >> step=1128300, episode=189 reward=0.7610387 (420.87 it/sec) -training >> step=1128400, episode=189 reward=0.7603471 (468.36 it/sec) -training >> step=1128500, episode=189 reward=0.7659496 (413.84 it/sec) -training >> step=1128600, episode=189 reward=0.7660077 (368.58 it/sec) -training >> step=1128700, episode=189 reward=0.7745233 (383.93 it/sec) -training >> step=1128800, episode=189 reward=0.7358609 (428.62 it/sec) -training >> step=1128900, episode=189 reward=0.7556723 (427.38 it/sec) -training >> step=1129000, episode=189 reward=0.74611 (438.27 it/sec) -training >> step=1129100, episode=189 reward=0.7605637 (404.50 it/sec) -training >> step=1129200, episode=189 reward=0.7499182 (477.17 it/sec) -training >> step=1129300, episode=189 reward=0.7606279 (407.53 it/sec) -training >> step=1129400, episode=189 reward=0.7564194 (392.52 it/sec) -training >> step=1129500, episode=189 reward=0.76198 (451.51 it/sec) -training >> step=1129600, episode=189 reward=0.7796728 (469.70 it/sec) -training >> step=1129700, episode=189 reward=0.7450162 (489.95 it/sec) -training >> step=1129800, episode=189 reward=0.7553589 (460.31 it/sec) -training >> step=1129900, episode=189 reward=0.7745018 (465.66 it/sec) -training >> step=1130000, episode=189 reward=0.7568336 (506.01 it/sec) -training >> step=1130100, episode=189 reward=0.7567371 (519.13 it/sec) -training >> step=1130200, episode=189 reward=0.7687811 (451.23 it/sec) -training >> step=1130300, episode=189 reward=0.7617015 (493.03 it/sec) -training >> step=1130400, episode=189 reward=0.8000376 (485.50 it/sec) -training >> step=1130500, episode=189 reward=0.7614058 (521.62 it/sec) -training >> step=1130600, episode=189 reward=0.7715638 (509.53 it/sec) -training >> step=1130700, episode=189 reward=0.7788749 (516.50 it/sec) -training >> step=1130800, episode=189 reward=0.7758883 (502.57 it/sec) -training >> step=1130900, episode=189 reward=0.7815633 (463.62 it/sec) -training >> step=1131000, episode=189 reward=0.754981 (524.22 it/sec) -training >> step=1131100, episode=189 reward=0.774249 (525.85 it/sec) -training >> step=1131200, episode=189 reward=0.7637128 (531.94 it/sec) -training >> step=1131300, episode=189 reward=0.7505897 (519.52 it/sec) -training >> step=1131400, episode=189 reward=0.7657363 (491.13 it/sec) -training >> step=1131500, episode=189 reward=0.7775048 (481.84 it/sec) -training >> step=1131600, episode=189 reward=0.7597852 (426.17 it/sec) -training >> step=1131700, episode=189 reward=0.754704 (451.82 it/sec) -training >> step=1131800, episode=189 reward=0.7604488 (532.15 it/sec) -training >> step=1131900, episode=189 reward=0.7679158 (500.84 it/sec) -training >> step=1132000, episode=189 reward=0.775806 (548.16 it/sec) -training >> step=1132100, episode=189 reward=0.7782 (517.43 it/sec) -training >> step=1132200, episode=189 reward=0.7654625 (521.47 it/sec) -training >> step=1132300, episode=189 reward=0.7542716 (548.58 it/sec) -training >> step=1132400, episode=189 reward=0.7583134 (503.61 it/sec) -training >> step=1132500, episode=189 reward=0.7732782 (461.32 it/sec) -training >> step=1132600, episode=189 reward=0.7697386 (524.19 it/sec) -training >> step=1132700, episode=189 reward=0.7624125 (463.67 it/sec) -training >> step=1132800, episode=189 reward=0.7617922 (528.70 it/sec) -training >> step=1132900, episode=189 reward=0.7670881 (514.75 it/sec) -training >> step=1133000, episode=189 reward=0.7772767 (522.53 it/sec) -training >> step=1133100, episode=189 reward=0.7585157 (515.86 it/sec) -training >> step=1133200, episode=189 reward=0.7523681 (533.65 it/sec) -training >> step=1133300, episode=190 reward=0.7534998 (79.80 it/sec) -training >> step=1133400, episode=190 reward=0.749769 (479.67 it/sec) -training >> step=1133500, episode=190 reward=0.7351252 (485.11 it/sec) -training >> step=1133600, episode=190 reward=0.7392791 (486.08 it/sec) -training >> step=1133700, episode=190 reward=0.7597322 (511.94 it/sec) -training >> step=1133800, episode=190 reward=0.7465158 (528.04 it/sec) -training >> step=1133900, episode=190 reward=0.7875271 (528.04 it/sec) -training >> step=1134000, episode=190 reward=0.749119 (521.37 it/sec) -training >> step=1134100, episode=190 reward=0.7598749 (470.11 it/sec) -training >> step=1134200, episode=190 reward=0.7606825 (541.48 it/sec) -training >> step=1134300, episode=190 reward=0.7643188 (349.93 it/sec) -training >> step=1134400, episode=190 reward=0.7553174 (523.71 it/sec) -training >> step=1134500, episode=190 reward=0.7577787 (524.52 it/sec) -training >> step=1134600, episode=190 reward=0.7635584 (519.15 it/sec) -training >> step=1134700, episode=190 reward=0.7414899 (549.26 it/sec) -training >> step=1134800, episode=190 reward=0.768751 (516.21 it/sec) -training >> step=1134900, episode=190 reward=0.7525268 (522.59 it/sec) -training >> step=1135000, episode=190 reward=0.7721445 (569.34 it/sec) -training >> step=1135100, episode=190 reward=0.7406455 (488.19 it/sec) -training >> step=1135200, episode=190 reward=0.7655364 (507.31 it/sec) -training >> step=1135300, episode=190 reward=0.7538336 (492.70 it/sec) -training >> step=1135400, episode=190 reward=0.7591594 (527.06 it/sec) -training >> step=1135500, episode=190 reward=0.7426866 (509.51 it/sec) -training >> step=1135600, episode=190 reward=0.7723729 (534.94 it/sec) -training >> step=1135700, episode=190 reward=0.7490554 (546.69 it/sec) -training >> step=1135800, episode=190 reward=0.7891551 (517.89 it/sec) -training >> step=1135900, episode=190 reward=0.762863 (524.41 it/sec) -training >> step=1136000, episode=190 reward=0.7567632 (536.57 it/sec) -training >> step=1136100, episode=190 reward=0.7864667 (491.19 it/sec) -training >> step=1136200, episode=190 reward=0.7428483 (504.10 it/sec) -training >> step=1136300, episode=190 reward=0.7739108 (509.92 it/sec) -training >> step=1136400, episode=190 reward=0.7541774 (539.33 it/sec) -training >> step=1136500, episode=190 reward=0.7526824 (533.96 it/sec) -training >> step=1136600, episode=190 reward=0.7558028 (505.37 it/sec) -training >> step=1136700, episode=190 reward=0.7620833 (482.54 it/sec) -training >> step=1136800, episode=190 reward=0.7260639 (551.19 it/sec) -training >> step=1136900, episode=190 reward=0.7652427 (526.58 it/sec) -training >> step=1137000, episode=190 reward=0.7607397 (535.62 it/sec) -training >> step=1137100, episode=190 reward=0.7472038 (532.80 it/sec) -training >> step=1137200, episode=190 reward=0.7482685 (543.50 it/sec) -training >> step=1137300, episode=190 reward=0.7591128 (509.37 it/sec) -training >> step=1137400, episode=190 reward=0.7531137 (516.19 it/sec) -training >> step=1137500, episode=190 reward=0.7400544 (507.88 it/sec) -training >> step=1137600, episode=190 reward=0.7378715 (502.37 it/sec) -training >> step=1137700, episode=190 reward=0.7646509 (542.05 it/sec) -training >> step=1137800, episode=190 reward=0.7528498 (514.10 it/sec) -training >> step=1137900, episode=190 reward=0.7745667 (500.08 it/sec) -training >> step=1138000, episode=190 reward=0.7606939 (527.65 it/sec) -training >> step=1138100, episode=190 reward=0.7655993 (524.28 it/sec) -training >> step=1138200, episode=190 reward=0.7461186 (514.18 it/sec) -training >> step=1138300, episode=190 reward=0.7636446 (506.97 it/sec) -training >> step=1138400, episode=190 reward=0.7776623 (527.69 it/sec) -training >> step=1138500, episode=190 reward=0.7853754 (525.57 it/sec) -training >> step=1138600, episode=190 reward=0.7622356 (542.68 it/sec) -training >> step=1138700, episode=190 reward=0.7470804 (525.79 it/sec) -training >> step=1138800, episode=190 reward=0.7441241 (462.77 it/sec) -training >> step=1138900, episode=190 reward=0.7727117 (520.86 it/sec) -training >> step=1139000, episode=190 reward=0.750341 (521.86 it/sec) -training >> step=1139100, episode=190 reward=0.7849514 (557.88 it/sec) -training >> step=1139200, episode=190 reward=0.7469999 (518.33 it/sec) -training >> step=1139300, episode=191 reward=0.7677013 (115.92 it/sec) -training >> step=1139400, episode=191 reward=0.7602597 (494.87 it/sec) -training >> step=1139500, episode=191 reward=0.7477336 (500.75 it/sec) -training >> step=1139600, episode=191 reward=0.7441521 (507.39 it/sec) -training >> step=1139700, episode=191 reward=0.7670399 (481.68 it/sec) -training >> step=1139800, episode=191 reward=0.7823315 (531.11 it/sec) -training >> step=1139900, episode=191 reward=0.7717698 (488.09 it/sec) -training >> step=1140000, episode=191 reward=0.7791434 (538.32 it/sec) -training >> step=1140100, episode=191 reward=0.7610617 (497.31 it/sec) -training >> step=1140200, episode=191 reward=0.774882 (507.44 it/sec) -training >> step=1140300, episode=191 reward=0.7473325 (513.86 it/sec) -training >> step=1140400, episode=191 reward=0.7324053 (364.20 it/sec) -training >> step=1140500, episode=191 reward=0.7441665 (527.97 it/sec) -training >> step=1140600, episode=191 reward=0.7727101 (554.63 it/sec) -training >> step=1140700, episode=191 reward=0.751789 (492.62 it/sec) -training >> step=1140800, episode=191 reward=0.774909 (553.52 it/sec) -training >> step=1140900, episode=191 reward=0.7667844 (483.84 it/sec) -training >> step=1141000, episode=191 reward=0.7390869 (447.79 it/sec) -training >> step=1141100, episode=191 reward=0.7699752 (522.02 it/sec) -training >> step=1141200, episode=191 reward=0.7370074 (542.14 it/sec) -training >> step=1141300, episode=191 reward=0.7707547 (551.57 it/sec) -training >> step=1141400, episode=191 reward=0.7338932 (506.66 it/sec) -training >> step=1141500, episode=191 reward=0.7475288 (479.88 it/sec) -training >> step=1141600, episode=191 reward=0.7721707 (539.29 it/sec) -training >> step=1141700, episode=191 reward=0.7405414 (569.21 it/sec) -training >> step=1141800, episode=191 reward=0.7432896 (540.38 it/sec) -training >> step=1141900, episode=191 reward=0.7703332 (486.87 it/sec) -training >> step=1142000, episode=191 reward=0.7675873 (517.84 it/sec) -training >> step=1142100, episode=191 reward=0.7508813 (545.26 it/sec) -training >> step=1142200, episode=191 reward=0.7468731 (537.26 it/sec) -training >> step=1142300, episode=191 reward=0.7754164 (552.20 it/sec) -training >> step=1142400, episode=191 reward=0.7945203 (551.27 it/sec) -training >> step=1142500, episode=191 reward=0.7587826 (545.00 it/sec) -training >> step=1142600, episode=191 reward=0.7619296 (493.73 it/sec) -training >> step=1142700, episode=191 reward=0.7842613 (511.74 it/sec) -training >> step=1142800, episode=191 reward=0.7396801 (541.16 it/sec) -training >> step=1142900, episode=191 reward=0.7425998 (551.19 it/sec) -training >> step=1143000, episode=191 reward=0.7724407 (506.74 it/sec) -training >> step=1143100, episode=191 reward=0.7681432 (529.20 it/sec) -training >> step=1143200, episode=191 reward=0.7587192 (516.03 it/sec) -training >> step=1143300, episode=191 reward=0.7771754 (482.92 it/sec) -training >> step=1143400, episode=191 reward=0.7731031 (528.39 it/sec) -training >> step=1143500, episode=191 reward=0.7502739 (521.35 it/sec) -training >> step=1143600, episode=191 reward=0.7610125 (533.90 it/sec) -training >> step=1143700, episode=191 reward=0.7656808 (510.91 it/sec) -training >> step=1143800, episode=191 reward=0.7347695 (524.57 it/sec) -training >> step=1143900, episode=191 reward=0.7471991 (500.99 it/sec) -training >> step=1144000, episode=191 reward=0.7458338 (494.81 it/sec) -training >> step=1144100, episode=191 reward=0.7468632 (529.72 it/sec) -training >> step=1144200, episode=191 reward=0.7583302 (523.04 it/sec) -training >> step=1144300, episode=191 reward=0.7552984 (569.13 it/sec) -training >> step=1144400, episode=191 reward=0.7470332 (537.57 it/sec) -training >> step=1144500, episode=191 reward=0.7560006 (540.74 it/sec) -training >> step=1144600, episode=191 reward=0.7777885 (568.05 it/sec) -training >> step=1144700, episode=191 reward=0.7443645 (505.52 it/sec) -training >> step=1144800, episode=191 reward=0.726302 (524.85 it/sec) -training >> step=1144900, episode=191 reward=0.7485028 (519.13 it/sec) -training >> step=1145000, episode=191 reward=0.7496856 (485.89 it/sec) -training >> step=1145100, episode=191 reward=0.7781165 (518.67 it/sec) -training >> step=1145200, episode=191 reward=0.740168 (498.73 it/sec) -training >> step=1145300, episode=192 reward=0.7649652 (120.45 it/sec) -training >> step=1145400, episode=192 reward=0.7344509 (515.06 it/sec) -training >> step=1145500, episode=192 reward=0.7587661 (507.83 it/sec) -training >> step=1145600, episode=192 reward=0.7398096 (493.01 it/sec) -training >> step=1145700, episode=192 reward=0.7524879 (548.20 it/sec) -training >> step=1145800, episode=192 reward=0.7385017 (503.57 it/sec) -training >> step=1145900, episode=192 reward=0.756016 (501.97 it/sec) -training >> step=1146000, episode=192 reward=0.7422777 (481.68 it/sec) -training >> step=1146100, episode=192 reward=0.7595128 (502.21 it/sec) -training >> step=1146200, episode=192 reward=0.7573385 (548.93 it/sec) -training >> step=1146300, episode=192 reward=0.7660555 (558.19 it/sec) -training >> step=1146400, episode=192 reward=0.7645776 (474.32 it/sec) -training >> step=1146500, episode=192 reward=0.7630081 (381.82 it/sec) -training >> step=1146600, episode=192 reward=0.756703 (523.26 it/sec) -training >> step=1146700, episode=192 reward=0.762641 (519.60 it/sec) -training >> step=1146800, episode=192 reward=0.7636498 (527.64 it/sec) -training >> step=1146900, episode=192 reward=0.7501739 (502.35 it/sec) -training >> step=1147000, episode=192 reward=0.7686816 (535.16 it/sec) -training >> step=1147100, episode=192 reward=0.7687141 (550.96 it/sec) -training >> step=1147200, episode=192 reward=0.7690123 (519.00 it/sec) -training >> step=1147300, episode=192 reward=0.7721618 (549.78 it/sec) -training >> step=1147400, episode=192 reward=0.7412794 (522.11 it/sec) -training >> step=1147500, episode=192 reward=0.7588731 (547.62 it/sec) -training >> step=1147600, episode=192 reward=0.7598999 (520.25 it/sec) -training >> step=1147700, episode=192 reward=0.7518861 (515.08 it/sec) -training >> step=1147800, episode=192 reward=0.7490306 (511.88 it/sec) -training >> step=1147900, episode=192 reward=0.7640473 (496.73 it/sec) -training >> step=1148000, episode=192 reward=0.7759907 (553.29 it/sec) -training >> step=1148100, episode=192 reward=0.7501754 (514.75 it/sec) -training >> step=1148200, episode=192 reward=0.7679493 (551.20 it/sec) -training >> step=1148300, episode=192 reward=0.7610558 (547.39 it/sec) -training >> step=1148400, episode=192 reward=0.7578219 (487.60 it/sec) -training >> step=1148500, episode=192 reward=0.7567717 (512.15 it/sec) -training >> step=1148600, episode=192 reward=0.7644821 (550.11 it/sec) -training >> step=1148700, episode=192 reward=0.7676513 (534.69 it/sec) -training >> step=1148800, episode=192 reward=0.769092 (531.91 it/sec) -training >> step=1148900, episode=192 reward=0.7826586 (558.78 it/sec) -training >> step=1149000, episode=192 reward=0.7314019 (517.81 it/sec) -training >> step=1149100, episode=192 reward=0.7642123 (543.53 it/sec) -training >> step=1149200, episode=192 reward=0.7528871 (545.07 it/sec) -training >> step=1149300, episode=192 reward=0.7605283 (509.99 it/sec) -training >> step=1149400, episode=192 reward=0.7610022 (532.94 it/sec) -training >> step=1149500, episode=192 reward=0.7646364 (523.01 it/sec) -training >> step=1149600, episode=192 reward=0.7568091 (484.03 it/sec) -training >> step=1149700, episode=192 reward=0.7453726 (552.51 it/sec) -training >> step=1149800, episode=192 reward=0.7619125 (527.42 it/sec) -training >> step=1149900, episode=192 reward=0.758218 (559.25 it/sec) -training >> step=1150000, episode=192 reward=0.7706293 (531.74 it/sec) -training >> step=1150100, episode=192 reward=0.721083 (505.47 it/sec) -training >> step=1150200, episode=192 reward=0.751431 (558.24 it/sec) -training >> step=1150300, episode=192 reward=0.7486665 (524.71 it/sec) -training >> step=1150400, episode=192 reward=0.7774285 (506.36 it/sec) -training >> step=1150500, episode=192 reward=0.7730487 (543.08 it/sec) -training >> step=1150600, episode=192 reward=0.7619416 (528.29 it/sec) -training >> step=1150700, episode=192 reward=0.7444927 (546.84 it/sec) -training >> step=1150800, episode=192 reward=0.7398754 (537.41 it/sec) -training >> step=1150900, episode=192 reward=0.7592635 (508.40 it/sec) -training >> step=1151000, episode=192 reward=0.771437 (522.64 it/sec) -training >> step=1151100, episode=192 reward=0.7556294 (515.73 it/sec) -training >> step=1151200, episode=192 reward=0.7764947 (505.07 it/sec) -training >> step=1151300, episode=193 reward=0.7695124 (118.51 it/sec) -training >> step=1151400, episode=193 reward=0.7611749 (490.21 it/sec) -training >> step=1151500, episode=193 reward=0.7463312 (495.09 it/sec) -training >> step=1151600, episode=193 reward=0.7529939 (521.91 it/sec) -training >> step=1151700, episode=193 reward=0.7707878 (520.41 it/sec) -training >> step=1151800, episode=193 reward=0.7445243 (561.65 it/sec) -training >> step=1151900, episode=193 reward=0.7476255 (535.70 it/sec) -training >> step=1152000, episode=193 reward=0.7608664 (553.06 it/sec) -training >> step=1152100, episode=193 reward=0.7567212 (548.48 it/sec) -training >> step=1152200, episode=193 reward=0.7670134 (540.97 it/sec) -training >> step=1152300, episode=193 reward=0.7795702 (494.28 it/sec) -training >> step=1152400, episode=193 reward=0.755528 (544.10 it/sec) -training >> step=1152500, episode=193 reward=0.7736157 (520.91 it/sec) -training >> step=1152600, episode=193 reward=0.7470574 (355.12 it/sec) -training >> step=1152700, episode=193 reward=0.7555364 (502.15 it/sec) -training >> step=1152800, episode=193 reward=0.7675568 (494.80 it/sec) -training >> step=1152900, episode=193 reward=0.7605909 (555.67 it/sec) -training >> step=1153000, episode=193 reward=0.7501217 (497.25 it/sec) -training >> step=1153100, episode=193 reward=0.7589496 (534.66 it/sec) -training >> step=1153200, episode=193 reward=0.7593116 (537.41 it/sec) -training >> step=1153300, episode=193 reward=0.7685119 (531.78 it/sec) -training >> step=1153400, episode=193 reward=0.7561925 (508.00 it/sec) -training >> step=1153500, episode=193 reward=0.7667769 (553.31 it/sec) -training >> step=1153600, episode=193 reward=0.7793213 (497.81 it/sec) -training >> step=1153700, episode=193 reward=0.7477457 (553.70 it/sec) -training >> step=1153800, episode=193 reward=0.7372262 (549.46 it/sec) -training >> step=1153900, episode=193 reward=0.7815098 (470.67 it/sec) -training >> step=1154000, episode=193 reward=0.7751291 (490.28 it/sec) -training >> step=1154100, episode=193 reward=0.7585179 (487.75 it/sec) -training >> step=1154200, episode=193 reward=0.7436152 (540.32 it/sec) -training >> step=1154300, episode=193 reward=0.7467748 (552.57 it/sec) -training >> step=1154400, episode=193 reward=0.7741517 (483.35 it/sec) -training >> step=1154500, episode=193 reward=0.7525921 (514.16 it/sec) -training >> step=1154600, episode=193 reward=0.7661831 (541.25 it/sec) -training >> step=1154700, episode=193 reward=0.7643298 (467.22 it/sec) -training >> step=1154800, episode=193 reward=0.7366916 (536.40 it/sec) -training >> step=1154900, episode=193 reward=0.7682097 (565.17 it/sec) -training >> step=1155000, episode=193 reward=0.7534595 (502.06 it/sec) -training >> step=1155100, episode=193 reward=0.7581512 (499.51 it/sec) -training >> step=1155200, episode=193 reward=0.7452841 (542.92 it/sec) -training >> step=1155300, episode=193 reward=0.7632517 (534.25 it/sec) -training >> step=1155400, episode=193 reward=0.7418419 (538.11 it/sec) -training >> step=1155500, episode=193 reward=0.7432867 (511.27 it/sec) -training >> step=1155600, episode=193 reward=0.7582089 (546.42 it/sec) -training >> step=1155700, episode=193 reward=0.7715034 (566.95 it/sec) -training >> step=1155800, episode=193 reward=0.7549039 (513.91 it/sec) -training >> step=1155900, episode=193 reward=0.7475183 (492.95 it/sec) -training >> step=1156000, episode=193 reward=0.7440987 (532.66 it/sec) -training >> step=1156100, episode=193 reward=0.7544421 (559.57 it/sec) -training >> step=1156200, episode=193 reward=0.7466252 (551.55 it/sec) -training >> step=1156300, episode=193 reward=0.7525719 (529.33 it/sec) -training >> step=1156400, episode=193 reward=0.7369576 (549.60 it/sec) -training >> step=1156500, episode=193 reward=0.742869 (516.51 it/sec) -training >> step=1156600, episode=193 reward=0.763171 (524.71 it/sec) -training >> step=1156700, episode=193 reward=0.7486528 (556.22 it/sec) -training >> step=1156800, episode=193 reward=0.7651588 (523.66 it/sec) -training >> step=1156900, episode=193 reward=0.7384285 (529.97 it/sec) -training >> step=1157000, episode=193 reward=0.7389762 (542.40 it/sec) -training >> step=1157100, episode=193 reward=0.7720998 (507.92 it/sec) -training >> step=1157200, episode=193 reward=0.7377176 (523.22 it/sec) -training >> step=1157300, episode=194 reward=0.7760097 (96.44 it/sec) -training >> step=1157400, episode=194 reward=0.7745125 (503.81 it/sec) -training >> step=1157500, episode=194 reward=0.7683842 (532.31 it/sec) -training >> step=1157600, episode=194 reward=0.7679411 (538.61 it/sec) -training >> step=1157700, episode=194 reward=0.7491853 (477.81 it/sec) -training >> step=1157800, episode=194 reward=0.7519407 (538.15 it/sec) -training >> step=1157900, episode=194 reward=0.7579475 (534.01 it/sec) -training >> step=1158000, episode=194 reward=0.7732282 (525.26 it/sec) -training >> step=1158100, episode=194 reward=0.7584251 (540.09 it/sec) -training >> step=1158200, episode=194 reward=0.7511692 (535.90 it/sec) -training >> step=1158300, episode=194 reward=0.7756194 (531.05 it/sec) -training >> step=1158400, episode=194 reward=0.7596066 (523.43 it/sec) -training >> step=1158500, episode=194 reward=0.7540525 (539.12 it/sec) -training >> step=1158600, episode=194 reward=0.7569406 (510.92 it/sec) -training >> step=1158700, episode=194 reward=0.7816231 (390.18 it/sec) -training >> step=1158800, episode=194 reward=0.7516845 (494.03 it/sec) -training >> step=1158900, episode=194 reward=0.7567331 (508.91 it/sec) -training >> step=1159000, episode=194 reward=0.766852 (541.18 it/sec) -training >> step=1159100, episode=194 reward=0.7842294 (499.84 it/sec) -training >> step=1159200, episode=194 reward=0.7495077 (499.20 it/sec) -training >> step=1159300, episode=194 reward=0.76323 (562.29 it/sec) -training >> step=1159400, episode=194 reward=0.7718234 (516.08 it/sec) -training >> step=1159500, episode=194 reward=0.7696203 (526.67 it/sec) -training >> step=1159600, episode=194 reward=0.7406256 (506.06 it/sec) -training >> step=1159700, episode=194 reward=0.7704679 (532.36 it/sec) -training >> step=1159800, episode=194 reward=0.7487416 (539.42 it/sec) -training >> step=1159900, episode=194 reward=0.7273448 (527.24 it/sec) -training >> step=1160000, episode=194 reward=0.8028993 (529.48 it/sec) -training >> step=1160100, episode=194 reward=0.7657197 (494.45 it/sec) -training >> step=1160200, episode=194 reward=0.7637983 (524.77 it/sec) -training >> step=1160300, episode=194 reward=0.7642292 (525.68 it/sec) -training >> step=1160400, episode=194 reward=0.7714075 (516.61 it/sec) -training >> step=1160500, episode=194 reward=0.751614 (475.29 it/sec) -training >> step=1160600, episode=194 reward=0.7538393 (536.24 it/sec) -training >> step=1160700, episode=194 reward=0.7564142 (487.26 it/sec) -training >> step=1160800, episode=194 reward=0.7567742 (510.19 it/sec) -training >> step=1160900, episode=194 reward=0.7741699 (507.76 it/sec) -training >> step=1161000, episode=194 reward=0.7665769 (510.21 it/sec) -training >> step=1161100, episode=194 reward=0.7821127 (534.18 it/sec) -training >> step=1161200, episode=194 reward=0.7764123 (571.13 it/sec) -training >> step=1161300, episode=194 reward=0.7781082 (491.24 it/sec) -training >> step=1161400, episode=194 reward=0.7648631 (512.69 it/sec) -training >> step=1161500, episode=194 reward=0.7555575 (502.50 it/sec) -training >> step=1161600, episode=194 reward=0.7617452 (555.03 it/sec) -training >> step=1161700, episode=194 reward=0.7394833 (532.25 it/sec) -training >> step=1161800, episode=194 reward=0.735809 (512.10 it/sec) -training >> step=1161900, episode=194 reward=0.7991256 (497.50 it/sec) -training >> step=1162000, episode=194 reward=0.7501675 (502.93 it/sec) -training >> step=1162100, episode=194 reward=0.726892 (498.83 it/sec) -training >> step=1162200, episode=194 reward=0.7483864 (504.38 it/sec) -training >> step=1162300, episode=194 reward=0.7575678 (498.11 it/sec) -training >> step=1162400, episode=194 reward=0.7670012 (517.23 it/sec) -training >> step=1162500, episode=194 reward=0.7171687 (549.81 it/sec) -training >> step=1162600, episode=194 reward=0.7663685 (512.76 it/sec) -training >> step=1162700, episode=194 reward=0.775533 (559.13 it/sec) -training >> step=1162800, episode=194 reward=0.7657079 (517.02 it/sec) -training >> step=1162900, episode=194 reward=0.7692067 (568.83 it/sec) -training >> step=1163000, episode=194 reward=0.7488753 (555.97 it/sec) -training >> step=1163100, episode=194 reward=0.7652715 (527.69 it/sec) -training >> step=1163200, episode=194 reward=0.7339678 (530.53 it/sec) -training >> step=1163300, episode=195 reward=0.7419141 (65.40 it/sec) -training >> step=1163400, episode=195 reward=0.7578188 (492.01 it/sec) -training >> step=1163500, episode=195 reward=0.7515524 (509.08 it/sec) -training >> step=1163600, episode=195 reward=0.7459674 (527.84 it/sec) -training >> step=1163700, episode=195 reward=0.7471458 (532.95 it/sec) -training >> step=1163800, episode=195 reward=0.7581307 (499.49 it/sec) -training >> step=1163900, episode=195 reward=0.7596716 (584.40 it/sec) -training >> step=1164000, episode=195 reward=0.75896 (525.59 it/sec) -training >> step=1164100, episode=195 reward=0.7564912 (507.99 it/sec) -training >> step=1164200, episode=195 reward=0.7531711 (519.84 it/sec) -training >> step=1164300, episode=195 reward=0.7616153 (499.42 it/sec) -training >> step=1164400, episode=195 reward=0.7788453 (485.16 it/sec) -training >> step=1164500, episode=195 reward=0.7569551 (532.26 it/sec) -training >> step=1164600, episode=195 reward=0.7446485 (552.79 it/sec) -training >> step=1164700, episode=195 reward=0.7603058 (504.43 it/sec) -training >> step=1164800, episode=195 reward=0.7817824 (493.59 it/sec) -training >> step=1164900, episode=195 reward=0.7660963 (382.30 it/sec) -training >> step=1165000, episode=195 reward=0.77061 (546.96 it/sec) -training >> step=1165100, episode=195 reward=0.7649879 (537.72 it/sec) -training >> step=1165200, episode=195 reward=0.7771574 (542.46 it/sec) -training >> step=1165300, episode=195 reward=0.7780164 (562.67 it/sec) -training >> step=1165400, episode=195 reward=0.7618095 (513.43 it/sec) -training >> step=1165500, episode=195 reward=0.7496553 (526.90 it/sec) -training >> step=1165600, episode=195 reward=0.7323551 (548.15 it/sec) -training >> step=1165700, episode=195 reward=0.7834735 (525.56 it/sec) -training >> step=1165800, episode=195 reward=0.7708529 (544.14 it/sec) -training >> step=1165900, episode=195 reward=0.7434433 (516.35 it/sec) -training >> step=1166000, episode=195 reward=0.7680283 (514.40 it/sec) -training >> step=1166100, episode=195 reward=0.7633219 (557.65 it/sec) -training >> step=1166200, episode=195 reward=0.7278904 (477.90 it/sec) -training >> step=1166300, episode=195 reward=0.7626585 (548.74 it/sec) -training >> step=1166400, episode=195 reward=0.7622353 (540.42 it/sec) -training >> step=1166500, episode=195 reward=0.7572067 (554.99 it/sec) -training >> step=1166600, episode=195 reward=0.7545373 (522.45 it/sec) -training >> step=1166700, episode=195 reward=0.7685145 (504.14 it/sec) -training >> step=1166800, episode=195 reward=0.7406298 (451.50 it/sec) -training >> step=1166900, episode=195 reward=0.7520195 (379.51 it/sec) -training >> step=1167000, episode=195 reward=0.7642335 (378.75 it/sec) -training >> step=1167100, episode=195 reward=0.7663108 (423.18 it/sec) -training >> step=1167200, episode=195 reward=0.7768459 (425.05 it/sec) -training >> step=1167300, episode=195 reward=0.7379377 (388.92 it/sec) -training >> step=1167400, episode=195 reward=0.7549905 (336.60 it/sec) -training >> step=1167500, episode=195 reward=0.7590679 (356.11 it/sec) -training >> step=1167600, episode=195 reward=0.7354265 (323.13 it/sec) -training >> step=1167700, episode=195 reward=0.7564435 (306.73 it/sec) -training >> step=1167800, episode=195 reward=0.7571101 (351.69 it/sec) -training >> step=1167900, episode=195 reward=0.7432605 (398.79 it/sec) -training >> step=1168000, episode=195 reward=0.7510329 (430.92 it/sec) -training >> step=1168100, episode=195 reward=0.7733051 (441.09 it/sec) -training >> step=1168200, episode=195 reward=0.761178 (501.07 it/sec) -training >> step=1168300, episode=195 reward=0.7639112 (463.08 it/sec) -training >> step=1168400, episode=195 reward=0.7752105 (543.25 it/sec) -training >> step=1168500, episode=195 reward=0.7506645 (498.47 it/sec) -training >> step=1168600, episode=195 reward=0.7661389 (411.68 it/sec) -training >> step=1168700, episode=195 reward=0.7572544 (440.07 it/sec) -training >> step=1168800, episode=195 reward=0.7509356 (474.19 it/sec) -training >> step=1168900, episode=195 reward=0.7718605 (436.48 it/sec) -training >> step=1169000, episode=195 reward=0.7589397 (430.78 it/sec) -training >> step=1169100, episode=195 reward=0.7366191 (450.03 it/sec) -training >> step=1169200, episode=195 reward=0.7484525 (491.62 it/sec) -training >> step=1169300, episode=196 reward=0.745098 (93.87 it/sec) -training >> step=1169400, episode=196 reward=0.7852449 (492.59 it/sec) -training >> step=1169500, episode=196 reward=0.7635466 (477.16 it/sec) -training >> step=1169600, episode=196 reward=0.7736905 (519.78 it/sec) -training >> step=1169700, episode=196 reward=0.7417695 (506.11 it/sec) -training >> step=1169800, episode=196 reward=0.7656918 (506.55 it/sec) -training >> step=1169900, episode=196 reward=0.7639596 (525.13 it/sec) -training >> step=1170000, episode=196 reward=0.7436051 (500.41 it/sec) -training >> step=1170100, episode=196 reward=0.7629218 (505.05 it/sec) -training >> step=1170200, episode=196 reward=0.7539504 (546.06 it/sec) -training >> step=1170300, episode=196 reward=0.7510006 (525.51 it/sec) -training >> step=1170400, episode=196 reward=0.7558503 (508.84 it/sec) -training >> step=1170500, episode=196 reward=0.7657704 (529.94 it/sec) -training >> step=1170600, episode=196 reward=0.7745467 (502.61 it/sec) -training >> step=1170700, episode=196 reward=0.7773266 (536.25 it/sec) -training >> step=1170800, episode=196 reward=0.7752563 (512.70 it/sec) -training >> step=1170900, episode=196 reward=0.7398499 (494.59 it/sec) -training >> step=1171000, episode=196 reward=0.7612603 (564.16 it/sec) -training >> step=1171100, episode=196 reward=0.7692894 (372.98 it/sec) -training >> step=1171200, episode=196 reward=0.7775143 (496.11 it/sec) -training >> step=1171300, episode=196 reward=0.7498487 (518.05 it/sec) -training >> step=1171400, episode=196 reward=0.7744389 (529.75 it/sec) -training >> step=1171500, episode=196 reward=0.7718028 (480.64 it/sec) -training >> step=1171600, episode=196 reward=0.753566 (490.57 it/sec) -training >> step=1171700, episode=196 reward=0.7619057 (514.09 it/sec) -training >> step=1171800, episode=196 reward=0.7820527 (495.71 it/sec) -training >> step=1171900, episode=196 reward=0.7534742 (508.45 it/sec) -training >> step=1172000, episode=196 reward=0.763879 (528.83 it/sec) -training >> step=1172100, episode=196 reward=0.76013 (521.74 it/sec) -training >> step=1172200, episode=196 reward=0.748877 (502.34 it/sec) -training >> step=1172300, episode=196 reward=0.7677808 (504.41 it/sec) -training >> step=1172400, episode=196 reward=0.7810003 (451.89 it/sec) -training >> step=1172500, episode=196 reward=0.7665136 (548.99 it/sec) -training >> step=1172600, episode=196 reward=0.7664442 (492.54 it/sec) -training >> step=1172700, episode=196 reward=0.7572174 (487.55 it/sec) -training >> step=1172800, episode=196 reward=0.7692705 (502.15 it/sec) -training >> step=1172900, episode=196 reward=0.7345312 (428.17 it/sec) -training >> step=1173000, episode=196 reward=0.7653763 (431.76 it/sec) -training >> step=1173100, episode=196 reward=0.7612684 (440.80 it/sec) -training >> step=1173200, episode=196 reward=0.7510018 (432.64 it/sec) -training >> step=1173300, episode=196 reward=0.7350693 (444.75 it/sec) -training >> step=1173400, episode=196 reward=0.7709321 (468.56 it/sec) -training >> step=1173500, episode=196 reward=0.748692 (486.11 it/sec) -training >> step=1173600, episode=196 reward=0.7605543 (495.98 it/sec) -training >> step=1173700, episode=196 reward=0.7724013 (505.80 it/sec) -training >> step=1173800, episode=196 reward=0.7665907 (495.34 it/sec) -training >> step=1173900, episode=196 reward=0.7473002 (446.51 it/sec) -training >> step=1174000, episode=196 reward=0.7704924 (524.38 it/sec) -training >> step=1174100, episode=196 reward=0.7508341 (496.40 it/sec) -training >> step=1174200, episode=196 reward=0.7789264 (482.21 it/sec) -training >> step=1174300, episode=196 reward=0.7868526 (487.23 it/sec) -training >> step=1174400, episode=196 reward=0.7539433 (484.17 it/sec) -training >> step=1174500, episode=196 reward=0.7686474 (480.05 it/sec) -training >> step=1174600, episode=196 reward=0.7586659 (484.88 it/sec) -training >> step=1174700, episode=196 reward=0.7548923 (494.12 it/sec) -training >> step=1174800, episode=196 reward=0.7502373 (481.71 it/sec) -training >> step=1174900, episode=196 reward=0.75406 (531.08 it/sec) -training >> step=1175000, episode=196 reward=0.7691957 (548.80 it/sec) -training >> step=1175100, episode=196 reward=0.7537302 (522.88 it/sec) -training >> step=1175200, episode=196 reward=0.7638817 (484.32 it/sec) -training >> step=1175300, episode=197 reward=0.750627 (162.64 it/sec) -training >> step=1175400, episode=197 reward=0.744496 (494.18 it/sec) -training >> step=1175500, episode=197 reward=0.73954 (509.38 it/sec) -training >> step=1175600, episode=197 reward=0.7578004 (536.33 it/sec) -training >> step=1175700, episode=197 reward=0.7531813 (516.19 it/sec) -training >> step=1175800, episode=197 reward=0.7639245 (498.49 it/sec) -training >> step=1175900, episode=197 reward=0.7733178 (472.15 it/sec) -training >> step=1176000, episode=197 reward=0.7700579 (512.79 it/sec) -training >> step=1176100, episode=197 reward=0.7725788 (521.97 it/sec) -training >> step=1176200, episode=197 reward=0.7771862 (488.46 it/sec) -training >> step=1176300, episode=197 reward=0.7668482 (542.97 it/sec) -training >> step=1176400, episode=197 reward=0.7540288 (542.72 it/sec) -training >> step=1176500, episode=197 reward=0.7409944 (524.88 it/sec) -training >> step=1176600, episode=197 reward=0.7606155 (516.02 it/sec) -training >> step=1176700, episode=197 reward=0.7650918 (510.55 it/sec) -training >> step=1176800, episode=197 reward=0.7574728 (560.31 it/sec) -training >> step=1176900, episode=197 reward=0.7619421 (512.51 it/sec) -training >> step=1177000, episode=197 reward=0.7417063 (526.94 it/sec) -training >> step=1177100, episode=197 reward=0.7429516 (488.87 it/sec) -training >> step=1177200, episode=197 reward=0.7547 (474.68 it/sec) -training >> step=1177300, episode=197 reward=0.7427229 (488.32 it/sec) -training >> step=1177400, episode=197 reward=0.751654 (339.38 it/sec) -training >> step=1177500, episode=197 reward=0.7523297 (540.04 it/sec) -training >> step=1177600, episode=197 reward=0.7625874 (485.92 it/sec) -training >> step=1177700, episode=197 reward=0.7829214 (492.74 it/sec) -training >> step=1177800, episode=197 reward=0.7321981 (539.20 it/sec) -training >> step=1177900, episode=197 reward=0.7782202 (529.40 it/sec) -training >> step=1178000, episode=197 reward=0.7469729 (533.58 it/sec) -training >> step=1178100, episode=197 reward=0.7540082 (518.02 it/sec) -training >> step=1178200, episode=197 reward=0.7739689 (504.69 it/sec) -training >> step=1178300, episode=197 reward=0.7603832 (524.55 it/sec) -training >> step=1178400, episode=197 reward=0.7658243 (533.13 it/sec) -training >> step=1178500, episode=197 reward=0.7571268 (512.48 it/sec) -training >> step=1178600, episode=197 reward=0.784055 (473.09 it/sec) -training >> step=1178700, episode=197 reward=0.7651097 (495.25 it/sec) -training >> step=1178800, episode=197 reward=0.752319 (506.36 it/sec) -training >> step=1178900, episode=197 reward=0.7597973 (543.63 it/sec) -training >> step=1179000, episode=197 reward=0.7752399 (532.78 it/sec) -training >> step=1179100, episode=197 reward=0.7375043 (523.62 it/sec) -training >> step=1179200, episode=197 reward=0.7740555 (469.73 it/sec) -training >> step=1179300, episode=197 reward=0.7571329 (513.80 it/sec) -training >> step=1179400, episode=197 reward=0.7416214 (523.34 it/sec) -training >> step=1179500, episode=197 reward=0.7519842 (506.77 it/sec) -training >> step=1179600, episode=197 reward=0.7664135 (489.53 it/sec) -training >> step=1179700, episode=197 reward=0.7695257 (542.51 it/sec) -training >> step=1179800, episode=197 reward=0.7617271 (512.17 it/sec) -training >> step=1179900, episode=197 reward=0.7578899 (478.80 it/sec) -training >> step=1180000, episode=197 reward=0.7451041 (426.94 it/sec) -training >> step=1180100, episode=197 reward=0.7570988 (361.12 it/sec) -training >> step=1180200, episode=197 reward=0.7289285 (322.36 it/sec) -training >> step=1180300, episode=197 reward=0.7621413 (358.23 it/sec) -training >> step=1180400, episode=197 reward=0.7670349 (361.35 it/sec) -training >> step=1180500, episode=197 reward=0.7627335 (396.67 it/sec) -training >> step=1180600, episode=197 reward=0.7445918 (470.81 it/sec) -training >> step=1180700, episode=197 reward=0.7646084 (502.41 it/sec) -training >> step=1180800, episode=197 reward=0.7536203 (511.80 it/sec) -training >> step=1180900, episode=197 reward=0.7815796 (508.43 it/sec) -training >> step=1181000, episode=197 reward=0.7564089 (536.12 it/sec) -training >> step=1181100, episode=197 reward=0.7524181 (496.04 it/sec) -training >> step=1181200, episode=197 reward=0.7657154 (510.43 it/sec) -training >> step=1181300, episode=198 reward=0.7505299 (142.63 it/sec) -training >> step=1181400, episode=198 reward=0.7650193 (478.58 it/sec) -training >> step=1181500, episode=198 reward=0.7736513 (538.59 it/sec) -training >> step=1181600, episode=198 reward=0.7792989 (514.80 it/sec) -training >> step=1181700, episode=198 reward=0.7428246 (508.88 it/sec) -training >> step=1181800, episode=198 reward=0.7575738 (543.18 it/sec) -training >> step=1181900, episode=198 reward=0.7698469 (493.56 it/sec) -training >> step=1182000, episode=198 reward=0.7537649 (525.14 it/sec) -training >> step=1182100, episode=198 reward=0.7571555 (533.02 it/sec) -training >> step=1182200, episode=198 reward=0.7567167 (559.92 it/sec) -training >> step=1182300, episode=198 reward=0.7515665 (501.06 it/sec) -training >> step=1182400, episode=198 reward=0.7666475 (514.79 it/sec) -training >> step=1182500, episode=198 reward=0.7616978 (535.29 it/sec) -training >> step=1182600, episode=198 reward=0.7530861 (573.57 it/sec) -training >> step=1182700, episode=198 reward=0.7383602 (520.85 it/sec) -training >> step=1182800, episode=198 reward=0.7637645 (502.49 it/sec) -training >> step=1182900, episode=198 reward=0.77996 (464.94 it/sec) -training >> step=1183000, episode=198 reward=0.7277502 (457.05 it/sec) -training >> step=1183100, episode=198 reward=0.7477561 (489.35 it/sec) -training >> step=1183200, episode=198 reward=0.7408109 (485.89 it/sec) -training >> step=1183300, episode=198 reward=0.7651846 (470.05 it/sec) -training >> step=1183400, episode=198 reward=0.7631437 (487.74 it/sec) -training >> step=1183500, episode=198 reward=0.7770864 (337.35 it/sec) -training >> step=1183600, episode=198 reward=0.7620247 (498.73 it/sec) -training >> step=1183700, episode=198 reward=0.7634017 (482.12 it/sec) -training >> step=1183800, episode=198 reward=0.7541837 (491.85 it/sec) -training >> step=1183900, episode=198 reward=0.770476 (485.65 it/sec) -training >> step=1184000, episode=198 reward=0.7451911 (528.69 it/sec) -training >> step=1184100, episode=198 reward=0.76375 (536.74 it/sec) -training >> step=1184200, episode=198 reward=0.7535068 (485.66 it/sec) -training >> step=1184300, episode=198 reward=0.769939 (487.52 it/sec) -training >> step=1184400, episode=198 reward=0.7641906 (433.48 it/sec) -training >> step=1184500, episode=198 reward=0.7700593 (425.12 it/sec) -training >> step=1184600, episode=198 reward=0.7523848 (416.54 it/sec) -training >> step=1184700, episode=198 reward=0.7536156 (425.60 it/sec) -training >> step=1184800, episode=198 reward=0.7431118 (436.08 it/sec) -training >> step=1184900, episode=198 reward=0.759379 (432.93 it/sec) -training >> step=1185000, episode=198 reward=0.7562835 (482.27 it/sec) -training >> step=1185100, episode=198 reward=0.7495539 (502.19 it/sec) -training >> step=1185200, episode=198 reward=0.7751893 (414.42 it/sec) -training >> step=1185300, episode=198 reward=0.7275471 (450.44 it/sec) -training >> step=1185400, episode=198 reward=0.7611589 (440.42 it/sec) -training >> step=1185500, episode=198 reward=0.7484917 (417.13 it/sec) -training >> step=1185600, episode=198 reward=0.7527403 (378.76 it/sec) -training >> step=1185700, episode=198 reward=0.7479627 (426.23 it/sec) -training >> step=1185800, episode=198 reward=0.7658493 (453.98 it/sec) -training >> step=1185900, episode=198 reward=0.7604703 (507.90 it/sec) -training >> step=1186000, episode=198 reward=0.7627675 (499.06 it/sec) -training >> step=1186100, episode=198 reward=0.7470267 (484.12 it/sec) -training >> step=1186200, episode=198 reward=0.7724322 (503.56 it/sec) -training >> step=1186300, episode=198 reward=0.7640345 (421.55 it/sec) -training >> step=1186400, episode=198 reward=0.7546955 (345.03 it/sec) -training >> step=1186500, episode=198 reward=0.7663452 (375.28 it/sec) -training >> step=1186600, episode=198 reward=0.7661752 (436.34 it/sec) -training >> step=1186700, episode=198 reward=0.7512153 (447.04 it/sec) -training >> step=1186800, episode=198 reward=0.7712272 (474.38 it/sec) -training >> step=1186900, episode=198 reward=0.7581476 (435.11 it/sec) -training >> step=1187000, episode=198 reward=0.7495464 (481.60 it/sec) -training >> step=1187100, episode=198 reward=0.7600421 (447.40 it/sec) -training >> step=1187200, episode=198 reward=0.7337918 (515.33 it/sec) -training >> step=1187300, episode=199 reward=0.7503024 (131.03 it/sec) -training >> step=1187400, episode=199 reward=0.7582095 (491.23 it/sec) -training >> step=1187500, episode=199 reward=0.7665539 (505.67 it/sec) -training >> step=1187600, episode=199 reward=0.7429506 (447.85 it/sec) -training >> step=1187700, episode=199 reward=0.7585971 (423.22 it/sec) -training >> step=1187800, episode=199 reward=0.7614595 (434.63 it/sec) -training >> step=1187900, episode=199 reward=0.7437363 (449.59 it/sec) -training >> step=1188000, episode=199 reward=0.7493451 (410.05 it/sec) -training >> step=1188100, episode=199 reward=0.7390505 (400.89 it/sec) -training >> step=1188200, episode=199 reward=0.7645587 (462.27 it/sec) -training >> step=1188300, episode=199 reward=0.7543314 (435.53 it/sec) -training >> step=1188400, episode=199 reward=0.7798516 (426.80 it/sec) -training >> step=1188500, episode=199 reward=0.7802848 (457.06 it/sec) -training >> step=1188600, episode=199 reward=0.7750914 (457.21 it/sec) -training >> step=1188700, episode=199 reward=0.7456853 (451.11 it/sec) -training >> step=1188800, episode=199 reward=0.7613779 (450.75 it/sec) -training >> step=1188900, episode=199 reward=0.7544648 (442.83 it/sec) -training >> step=1189000, episode=199 reward=0.7670366 (492.07 it/sec) -training >> step=1189100, episode=199 reward=0.7510853 (533.44 it/sec) -training >> step=1189200, episode=199 reward=0.7639549 (465.15 it/sec) -training >> step=1189300, episode=199 reward=0.743134 (499.16 it/sec) -training >> step=1189400, episode=199 reward=0.7341928 (509.03 it/sec) -training >> step=1189500, episode=199 reward=0.785871 (471.93 it/sec) -training >> step=1189600, episode=199 reward=0.7638695 (512.83 it/sec) -training >> step=1189700, episode=199 reward=0.7627466 (352.15 it/sec) -training >> step=1189800, episode=199 reward=0.7577252 (508.35 it/sec) -training >> step=1189900, episode=199 reward=0.7671512 (510.52 it/sec) -training >> step=1190000, episode=199 reward=0.7653607 (498.20 it/sec) -training >> step=1190100, episode=199 reward=0.7368509 (517.89 it/sec) -training >> step=1190200, episode=199 reward=0.7596462 (479.40 it/sec) -training >> step=1190300, episode=199 reward=0.7722808 (485.76 it/sec) -training >> step=1190400, episode=199 reward=0.7857071 (412.82 it/sec) -training >> step=1190500, episode=199 reward=0.7520133 (497.75 it/sec) -training >> step=1190600, episode=199 reward=0.741253 (467.62 it/sec) -training >> step=1190700, episode=199 reward=0.7239841 (470.67 it/sec) -training >> step=1190800, episode=199 reward=0.7575491 (516.59 it/sec) -training >> step=1190900, episode=199 reward=0.7758123 (494.06 it/sec) -training >> step=1191000, episode=199 reward=0.7766276 (467.23 it/sec) -training >> step=1191100, episode=199 reward=0.772931 (391.25 it/sec) -training >> step=1191200, episode=199 reward=0.7746585 (463.13 it/sec) -training >> step=1191300, episode=199 reward=0.7756037 (503.01 it/sec) -training >> step=1191400, episode=199 reward=0.7633173 (457.30 it/sec) -training >> step=1191500, episode=199 reward=0.7705197 (484.28 it/sec) -training >> step=1191600, episode=199 reward=0.7680291 (468.64 it/sec) -training >> step=1191700, episode=199 reward=0.7609004 (520.57 it/sec) -training >> step=1191800, episode=199 reward=0.7716876 (539.03 it/sec) -training >> step=1191900, episode=199 reward=0.7554793 (474.33 it/sec) -training >> step=1192000, episode=199 reward=0.7488313 (537.87 it/sec) -training >> step=1192100, episode=199 reward=0.7477823 (501.73 it/sec) -training >> step=1192200, episode=199 reward=0.7737033 (525.31 it/sec) -training >> step=1192300, episode=199 reward=0.7500271 (493.54 it/sec) -training >> step=1192400, episode=199 reward=0.7708336 (484.68 it/sec) -training >> step=1192500, episode=199 reward=0.7660505 (507.33 it/sec) -training >> step=1192600, episode=199 reward=0.7450003 (472.58 it/sec) -training >> step=1192700, episode=199 reward=0.7607434 (520.15 it/sec) -training >> step=1192800, episode=199 reward=0.7519382 (499.99 it/sec) -training >> step=1192900, episode=199 reward=0.757457 (493.52 it/sec) -training >> step=1193000, episode=199 reward=0.7591757 (506.24 it/sec) -training >> step=1193100, episode=199 reward=0.7317235 (482.33 it/sec) -training >> step=1193200, episode=199 reward=0.7688979 (551.04 it/sec) -training >> step=1193300, episode=200 reward=0.7682056 (169.13 it/sec) -training >> step=1193400, episode=200 reward=0.7395839 (346.46 it/sec) -training >> step=1193500, episode=200 reward=0.7576796 (378.34 it/sec) -training >> step=1193600, episode=200 reward=0.7674733 (417.38 it/sec) -training >> step=1193700, episode=200 reward=0.7531888 (477.16 it/sec) -training >> step=1193800, episode=200 reward=0.7715993 (474.24 it/sec) -training >> step=1193900, episode=200 reward=0.7595084 (510.03 it/sec) -training >> step=1194000, episode=200 reward=0.7351028 (513.30 it/sec) -training >> step=1194100, episode=200 reward=0.7501297 (468.07 it/sec) -training >> step=1194200, episode=200 reward=0.7431672 (440.56 it/sec) -training >> step=1194300, episode=200 reward=0.7991908 (334.37 it/sec) -training >> step=1194400, episode=200 reward=0.788792 (369.30 it/sec) -training >> step=1194500, episode=200 reward=0.754029 (431.43 it/sec) -training >> step=1194600, episode=200 reward=0.7394772 (365.56 it/sec) -training >> step=1194700, episode=200 reward=0.758382 (391.84 it/sec) -training >> step=1194800, episode=200 reward=0.7641826 (425.59 it/sec) -training >> step=1194900, episode=200 reward=0.7474859 (395.43 it/sec) -training >> step=1195000, episode=200 reward=0.7603694 (420.01 it/sec) -training >> step=1195100, episode=200 reward=0.7756682 (371.52 it/sec) -training >> step=1195200, episode=200 reward=0.7603807 (444.95 it/sec) -training >> step=1195300, episode=200 reward=0.7658845 (479.64 it/sec) -training >> step=1195400, episode=200 reward=0.7733486 (490.00 it/sec) -training >> step=1195500, episode=200 reward=0.7596834 (486.99 it/sec) -training >> step=1195600, episode=200 reward=0.7376069 (447.72 it/sec) -training >> step=1195700, episode=200 reward=0.7539237 (449.43 it/sec) -training >> step=1195800, episode=200 reward=0.7607774 (289.02 it/sec) -training >> step=1195900, episode=200 reward=0.7634943 (407.63 it/sec) -training >> step=1196000, episode=200 reward=0.7681776 (453.59 it/sec) -training >> step=1196100, episode=200 reward=0.7701705 (474.99 it/sec) -training >> step=1196200, episode=200 reward=0.7762801 (495.27 it/sec) -training >> step=1196300, episode=200 reward=0.7633265 (480.42 it/sec) -training >> step=1196400, episode=200 reward=0.7587202 (511.97 it/sec) -training >> step=1196500, episode=200 reward=0.7399645 (544.99 it/sec) -training >> step=1196600, episode=200 reward=0.7615947 (520.99 it/sec) -training >> step=1196700, episode=200 reward=0.7723231 (523.09 it/sec) -training >> step=1196800, episode=200 reward=0.7542259 (513.81 it/sec) -training >> step=1196900, episode=200 reward=0.7535698 (539.96 it/sec) -training >> step=1197000, episode=200 reward=0.7573714 (519.63 it/sec) -training >> step=1197100, episode=200 reward=0.7795565 (526.00 it/sec) -training >> step=1197200, episode=200 reward=0.7448847 (494.81 it/sec) -training >> step=1197300, episode=200 reward=0.771006 (490.31 it/sec) -training >> step=1197400, episode=200 reward=0.7403453 (482.75 it/sec) -training >> step=1197500, episode=200 reward=0.7721938 (521.62 it/sec) -training >> step=1197600, episode=200 reward=0.7533277 (543.80 it/sec) -training >> step=1197700, episode=200 reward=0.757683 (576.83 it/sec) -training >> step=1197800, episode=200 reward=0.7539501 (498.77 it/sec) -training >> step=1197900, episode=200 reward=0.7354673 (460.90 it/sec) -training >> step=1198000, episode=200 reward=0.7519525 (552.97 it/sec) -training >> step=1198100, episode=200 reward=0.76019 (488.07 it/sec) -training >> step=1198200, episode=200 reward=0.7422981 (507.29 it/sec) -training >> step=1198300, episode=200 reward=0.7663393 (468.26 it/sec) -training >> step=1198400, episode=200 reward=0.7395595 (427.63 it/sec) -training >> step=1198500, episode=200 reward=0.766175 (480.60 it/sec) -training >> step=1198600, episode=200 reward=0.7272747 (456.21 it/sec) -training >> step=1198700, episode=200 reward=0.7507564 (470.81 it/sec) -training >> step=1198800, episode=200 reward=0.7382929 (488.15 it/sec) -training >> step=1198900, episode=200 reward=0.7567136 (426.80 it/sec) -training >> step=1199000, episode=200 reward=0.7468532 (488.13 it/sec) -training >> step=1199100, episode=200 reward=0.7520862 (473.69 it/sec) -training >> step=1199200, episode=200 reward=0.7587454 (472.01 it/sec) -training >> step=1199300, episode=201 reward=0.7441929 (150.74 it/sec) -training >> step=1199400, episode=201 reward=0.7582545 (467.09 it/sec) -training >> step=1199500, episode=201 reward=0.7553505 (462.88 it/sec) -training >> step=1199600, episode=201 reward=0.7469884 (410.01 it/sec) -training >> step=1199700, episode=201 reward=0.7390463 (473.79 it/sec) -training >> step=1199800, episode=201 reward=0.7562972 (456.66 it/sec) -training >> step=1199900, episode=201 reward=0.760152 (485.37 it/sec) -training >> step=1200000, episode=201 reward=0.7648504 (489.70 it/sec) -training >> step=1200100, episode=201 reward=0.7513354 (355.86 it/sec) -training >> step=1200200, episode=201 reward=0.7726596 (469.36 it/sec) -training >> step=1200300, episode=201 reward=0.7527137 (502.35 it/sec) -training >> step=1200400, episode=201 reward=0.7629025 (504.22 it/sec) -training >> step=1200500, episode=201 reward=0.7682943 (440.15 it/sec) -training >> step=1200600, episode=201 reward=0.7648491 (437.40 it/sec) -training >> step=1200700, episode=201 reward=0.7631437 (460.18 it/sec) -training >> step=1200800, episode=201 reward=0.759194 (489.48 it/sec) -training >> step=1200900, episode=201 reward=0.7777896 (463.21 it/sec) -training >> step=1201000, episode=201 reward=0.7651886 (425.29 it/sec) -training >> step=1201100, episode=201 reward=0.7597013 (436.00 it/sec) -training >> step=1201200, episode=201 reward=0.7566565 (408.94 it/sec) -training >> step=1201300, episode=201 reward=0.7492805 (417.72 it/sec) -training >> step=1201400, episode=201 reward=0.7542354 (402.14 it/sec) -training >> step=1201500, episode=201 reward=0.7702209 (369.88 it/sec) -training >> step=1201600, episode=201 reward=0.730249 (428.98 it/sec) -training >> step=1201700, episode=201 reward=0.7544384 (398.86 it/sec) -training >> step=1201800, episode=201 reward=0.7575788 (400.08 it/sec) -training >> step=1201900, episode=201 reward=0.75638 (431.34 it/sec) -training >> step=1202000, episode=201 reward=0.7672372 (299.68 it/sec) -training >> step=1202100, episode=201 reward=0.7505704 (424.57 it/sec) -training >> step=1202200, episode=201 reward=0.773647 (446.79 it/sec) -training >> step=1202300, episode=201 reward=0.7714221 (446.39 it/sec) -training >> step=1202400, episode=201 reward=0.7706928 (466.08 it/sec) -training >> step=1202500, episode=201 reward=0.7440105 (476.69 it/sec) -training >> step=1202600, episode=201 reward=0.7684566 (455.83 it/sec) -training >> step=1202700, episode=201 reward=0.7607302 (417.46 it/sec) -training >> step=1202800, episode=201 reward=0.7315702 (444.98 it/sec) -training >> step=1202900, episode=201 reward=0.7630446 (490.26 it/sec) -training >> step=1203000, episode=201 reward=0.7586039 (523.01 it/sec) -training >> step=1203100, episode=201 reward=0.7563968 (538.58 it/sec) -training >> step=1203200, episode=201 reward=0.7689816 (507.28 it/sec) -training >> step=1203300, episode=201 reward=0.7518315 (498.17 it/sec) -training >> step=1203400, episode=201 reward=0.7555748 (511.66 it/sec) -training >> step=1203500, episode=201 reward=0.7430525 (486.77 it/sec) -training >> step=1203600, episode=201 reward=0.7730508 (503.21 it/sec) -training >> step=1203700, episode=201 reward=0.7470229 (528.15 it/sec) -training >> step=1203800, episode=201 reward=0.7406384 (516.89 it/sec) -training >> step=1203900, episode=201 reward=0.7548584 (520.81 it/sec) -training >> step=1204000, episode=201 reward=0.7826581 (493.79 it/sec) -training >> step=1204100, episode=201 reward=0.76936 (483.49 it/sec) -training >> step=1204200, episode=201 reward=0.7654499 (534.60 it/sec) -training >> step=1204300, episode=201 reward=0.7522572 (509.83 it/sec) -training >> step=1204400, episode=201 reward=0.7721369 (485.97 it/sec) -training >> step=1204500, episode=201 reward=0.7535958 (483.39 it/sec) -training >> step=1204600, episode=201 reward=0.7531355 (537.96 it/sec) -training >> step=1204700, episode=201 reward=0.7686234 (464.61 it/sec) -training >> step=1204800, episode=201 reward=0.7570255 (480.25 it/sec) -training >> step=1204900, episode=201 reward=0.7569656 (468.44 it/sec) -training >> step=1205000, episode=201 reward=0.7510851 (445.04 it/sec) -training >> step=1205100, episode=201 reward=0.7524101 (464.43 it/sec) -training >> step=1205200, episode=201 reward=0.7642123 (468.79 it/sec) -training >> step=1205300, episode=202 reward=0.7662739 (155.79 it/sec) -training >> step=1205400, episode=202 reward=0.7774838 (512.63 it/sec) -training >> step=1205500, episode=202 reward=0.7590962 (512.42 it/sec) -training >> step=1205600, episode=202 reward=0.75225 (536.83 it/sec) -training >> step=1205700, episode=202 reward=0.7466368 (512.81 it/sec) -training >> step=1205800, episode=202 reward=0.7484708 (531.09 it/sec) -training >> step=1205900, episode=202 reward=0.7558656 (525.43 it/sec) -training >> step=1206000, episode=202 reward=0.7638435 (505.44 it/sec) -training >> step=1206100, episode=202 reward=0.7593575 (486.79 it/sec) -training >> step=1206200, episode=202 reward=0.753696 (492.05 it/sec) -training >> step=1206300, episode=202 reward=0.7759297 (514.62 it/sec) -training >> step=1206400, episode=202 reward=0.7549911 (486.47 it/sec) -training >> step=1206500, episode=202 reward=0.7732183 (510.12 it/sec) -training >> step=1206600, episode=202 reward=0.7801985 (570.42 it/sec) -training >> step=1206700, episode=202 reward=0.7763123 (521.38 it/sec) -training >> step=1206800, episode=202 reward=0.7591599 (491.87 it/sec) -training >> step=1206900, episode=202 reward=0.7721007 (476.01 it/sec) -training >> step=1207000, episode=202 reward=0.7476606 (463.31 it/sec) -training >> step=1207100, episode=202 reward=0.7651553 (414.82 it/sec) -training >> step=1207200, episode=202 reward=0.7683808 (405.02 it/sec) -training >> step=1207300, episode=202 reward=0.8004672 (361.16 it/sec) -training >> step=1207400, episode=202 reward=0.7759183 (418.54 it/sec) -training >> step=1207500, episode=202 reward=0.7671787 (357.99 it/sec) -training >> step=1207600, episode=202 reward=0.7552665 (350.95 it/sec) -training >> step=1207700, episode=202 reward=0.7555184 (394.81 it/sec) -training >> step=1207800, episode=202 reward=0.750083 (427.84 it/sec) -training >> step=1207900, episode=202 reward=0.7580903 (446.68 it/sec) -training >> step=1208000, episode=202 reward=0.7578842 (455.46 it/sec) -training >> step=1208100, episode=202 reward=0.7706979 (466.99 it/sec) -training >> step=1208200, episode=202 reward=0.7416487 (308.04 it/sec) -training >> step=1208300, episode=202 reward=0.7328804 (470.70 it/sec) -training >> step=1208400, episode=202 reward=0.7572477 (511.98 it/sec) -training >> step=1208500, episode=202 reward=0.7684503 (506.23 it/sec) -training >> step=1208600, episode=202 reward=0.7645658 (535.20 it/sec) -training >> step=1208700, episode=202 reward=0.7530915 (517.92 it/sec) -training >> step=1208800, episode=202 reward=0.7545133 (560.60 it/sec) -training >> step=1208900, episode=202 reward=0.7316054 (506.95 it/sec) -training >> step=1209000, episode=202 reward=0.7336491 (542.92 it/sec) -training >> step=1209100, episode=202 reward=0.7646703 (516.59 it/sec) -training >> step=1209200, episode=202 reward=0.7566289 (550.09 it/sec) -training >> step=1209300, episode=202 reward=0.7418432 (534.36 it/sec) -training >> step=1209400, episode=202 reward=0.7661126 (507.41 it/sec) -training >> step=1209500, episode=202 reward=0.7821285 (393.31 it/sec) -training >> step=1209600, episode=202 reward=0.7660413 (471.84 it/sec) -training >> step=1209700, episode=202 reward=0.7692139 (539.86 it/sec) -training >> step=1209800, episode=202 reward=0.7805546 (534.27 it/sec) -training >> step=1209900, episode=202 reward=0.7668243 (544.97 it/sec) -training >> step=1210000, episode=202 reward=0.7711546 (504.60 it/sec) -training >> step=1210100, episode=202 reward=0.7395199 (416.17 it/sec) -training >> step=1210200, episode=202 reward=0.7408163 (487.61 it/sec) -training >> step=1210300, episode=202 reward=0.7708641 (504.93 it/sec) -training >> step=1210400, episode=202 reward=0.7616602 (536.09 it/sec) -training >> step=1210500, episode=202 reward=0.7544867 (494.80 it/sec) -training >> step=1210600, episode=202 reward=0.7641239 (511.07 it/sec) -training >> step=1210700, episode=202 reward=0.7352878 (551.69 it/sec) -training >> step=1210800, episode=202 reward=0.741832 (548.60 it/sec) -training >> step=1210900, episode=202 reward=0.7484921 (497.63 it/sec) -training >> step=1211000, episode=202 reward=0.7503308 (516.16 it/sec) -training >> step=1211100, episode=202 reward=0.7444242 (518.28 it/sec) -training >> step=1211200, episode=202 reward=0.7578719 (542.59 it/sec) -training >> step=1211300, episode=203 reward=0.7568755 (66.68 it/sec) -training >> step=1211400, episode=203 reward=0.7398555 (482.73 it/sec) -training >> step=1211500, episode=203 reward=0.7508298 (505.13 it/sec) -training >> step=1211600, episode=203 reward=0.7862444 (509.43 it/sec) -training >> step=1211700, episode=203 reward=0.7439425 (487.03 it/sec) -training >> step=1211800, episode=203 reward=0.782464 (503.34 it/sec) -training >> step=1211900, episode=203 reward=0.7750042 (457.56 it/sec) -training >> step=1212000, episode=203 reward=0.7747875 (536.50 it/sec) -training >> step=1212100, episode=203 reward=0.7766255 (559.65 it/sec) -training >> step=1212200, episode=203 reward=0.7455447 (499.12 it/sec) -training >> step=1212300, episode=203 reward=0.7466181 (508.55 it/sec) -training >> step=1212400, episode=203 reward=0.7487832 (478.37 it/sec) -training >> step=1212500, episode=203 reward=0.7423407 (547.86 it/sec) -training >> step=1212600, episode=203 reward=0.7582079 (508.80 it/sec) -training >> step=1212700, episode=203 reward=0.7659335 (488.11 it/sec) -training >> step=1212800, episode=203 reward=0.7625216 (510.86 it/sec) -training >> step=1212900, episode=203 reward=0.7682389 (509.46 it/sec) -training >> step=1213000, episode=203 reward=0.7685987 (516.61 it/sec) -training >> step=1213100, episode=203 reward=0.7492706 (485.76 it/sec) -training >> step=1213200, episode=203 reward=0.760063 (468.89 it/sec) -training >> step=1213300, episode=203 reward=0.7553365 (488.42 it/sec) -training >> step=1213400, episode=203 reward=0.769506 (509.17 it/sec) -training >> step=1213500, episode=203 reward=0.7720854 (520.38 it/sec) -training >> step=1213600, episode=203 reward=0.7651023 (479.17 it/sec) -training >> step=1213700, episode=203 reward=0.7373152 (489.00 it/sec) -training >> step=1213800, episode=203 reward=0.7703908 (453.20 it/sec) -training >> step=1213900, episode=203 reward=0.7755437 (483.99 it/sec) -training >> step=1214000, episode=203 reward=0.7634244 (431.48 it/sec) -training >> step=1214100, episode=203 reward=0.7489615 (436.86 it/sec) -training >> step=1214200, episode=203 reward=0.7576859 (351.73 it/sec) -training >> step=1214300, episode=203 reward=0.7452248 (441.54 it/sec) -training >> step=1214400, episode=203 reward=0.7662277 (506.94 it/sec) -training >> step=1214500, episode=203 reward=0.7485514 (471.55 it/sec) -training >> step=1214600, episode=203 reward=0.7651365 (442.68 it/sec) -training >> step=1214700, episode=203 reward=0.7535367 (470.91 it/sec) -training >> step=1214800, episode=203 reward=0.7483799 (445.83 it/sec) -training >> step=1214900, episode=203 reward=0.7563283 (473.99 it/sec) -training >> step=1215000, episode=203 reward=0.7636955 (512.10 it/sec) -training >> step=1215100, episode=203 reward=0.7610919 (521.88 it/sec) -training >> step=1215200, episode=203 reward=0.7467147 (426.68 it/sec) -training >> step=1215300, episode=203 reward=0.7484362 (457.30 it/sec) -training >> step=1215400, episode=203 reward=0.7558566 (471.48 it/sec) -training >> step=1215500, episode=203 reward=0.7649607 (432.08 it/sec) -training >> step=1215600, episode=203 reward=0.7699345 (468.93 it/sec) -training >> step=1215700, episode=203 reward=0.7737088 (454.22 it/sec) -training >> step=1215800, episode=203 reward=0.7607108 (493.89 it/sec) -training >> step=1215900, episode=203 reward=0.7588888 (492.70 it/sec) -training >> step=1216000, episode=203 reward=0.7369132 (467.49 it/sec) -training >> step=1216100, episode=203 reward=0.7669926 (444.17 it/sec) -training >> step=1216200, episode=203 reward=0.7693207 (393.72 it/sec) -training >> step=1216300, episode=203 reward=0.737467 (416.16 it/sec) -training >> step=1216400, episode=203 reward=0.7577724 (422.31 it/sec) -training >> step=1216500, episode=203 reward=0.7354859 (477.16 it/sec) -training >> step=1216600, episode=203 reward=0.7337596 (402.78 it/sec) -training >> step=1216700, episode=203 reward=0.7702046 (398.94 it/sec) -training >> step=1216800, episode=203 reward=0.7538 (414.70 it/sec) -training >> step=1216900, episode=203 reward=0.7814246 (490.69 it/sec) -training >> step=1217000, episode=203 reward=0.7822546 (465.38 it/sec) -training >> step=1217100, episode=203 reward=0.7747793 (427.00 it/sec) -training >> step=1217200, episode=203 reward=0.7588931 (419.63 it/sec) -training >> step=1217300, episode=204 reward=0.7340702 (217.45 it/sec) -training >> step=1217400, episode=204 reward=0.7506649 (487.13 it/sec) -training >> step=1217500, episode=204 reward=0.763985 (499.79 it/sec) -training >> step=1217600, episode=204 reward=0.7603715 (486.88 it/sec) -training >> step=1217700, episode=204 reward=0.7184957 (527.16 it/sec) -training >> step=1217800, episode=204 reward=0.7533414 (539.72 it/sec) -training >> step=1217900, episode=204 reward=0.7535813 (519.41 it/sec) -training >> step=1218000, episode=204 reward=0.7609611 (517.49 it/sec) -training >> step=1218100, episode=204 reward=0.7840145 (523.72 it/sec) -training >> step=1218200, episode=204 reward=0.7527037 (467.24 it/sec) -training >> step=1218300, episode=204 reward=0.7631881 (489.03 it/sec) -training >> step=1218400, episode=204 reward=0.75772 (457.21 it/sec) -training >> step=1218500, episode=204 reward=0.7508824 (528.74 it/sec) -training >> step=1218600, episode=204 reward=0.7368789 (478.76 it/sec) -training >> step=1218700, episode=204 reward=0.7780784 (512.63 it/sec) -training >> step=1218800, episode=204 reward=0.7560842 (553.82 it/sec) -training >> step=1218900, episode=204 reward=0.7809612 (539.27 it/sec) -training >> step=1219000, episode=204 reward=0.7839123 (543.01 it/sec) -training >> step=1219100, episode=204 reward=0.7605799 (521.21 it/sec) -training >> step=1219200, episode=204 reward=0.7541521 (519.20 it/sec) -training >> step=1219300, episode=204 reward=0.752507 (475.59 it/sec) -training >> step=1219400, episode=204 reward=0.7753507 (460.83 it/sec) -training >> step=1219500, episode=204 reward=0.7629141 (518.42 it/sec) -training >> step=1219600, episode=204 reward=0.7651815 (501.08 it/sec) -training >> step=1219700, episode=204 reward=0.7530898 (535.55 it/sec) -training >> step=1219800, episode=204 reward=0.7762824 (476.05 it/sec) -training >> step=1219900, episode=204 reward=0.7636336 (378.59 it/sec) -training >> step=1220000, episode=204 reward=0.7556049 (435.25 it/sec) -training >> step=1220100, episode=204 reward=0.754716 (524.58 it/sec) -training >> step=1220200, episode=204 reward=0.7696012 (445.83 it/sec) -training >> step=1220300, episode=204 reward=0.7796686 (459.72 it/sec) -training >> step=1220400, episode=204 reward=0.7659041 (440.49 it/sec) -training >> step=1220500, episode=204 reward=0.7608724 (375.33 it/sec) -training >> step=1220600, episode=204 reward=0.7432138 (503.32 it/sec) -training >> step=1220700, episode=204 reward=0.7670385 (430.29 it/sec) -training >> step=1220800, episode=204 reward=0.7551696 (466.56 it/sec) -training >> step=1220900, episode=204 reward=0.772506 (418.53 it/sec) -training >> step=1221000, episode=204 reward=0.7442855 (499.67 it/sec) -training >> step=1221100, episode=204 reward=0.7578866 (541.80 it/sec) -training >> step=1221200, episode=204 reward=0.774569 (560.23 it/sec) -training >> step=1221300, episode=204 reward=0.7527421 (551.79 it/sec) -training >> step=1221400, episode=204 reward=0.7838369 (478.83 it/sec) -training >> step=1221500, episode=204 reward=0.7797253 (528.82 it/sec) -training >> step=1221600, episode=204 reward=0.7665074 (565.68 it/sec) -training >> step=1221700, episode=204 reward=0.741118 (561.82 it/sec) -training >> step=1221800, episode=204 reward=0.7583444 (505.07 it/sec) -training >> step=1221900, episode=204 reward=0.7876541 (544.89 it/sec) -training >> step=1222000, episode=204 reward=0.7669618 (510.90 it/sec) -training >> step=1222100, episode=204 reward=0.7663115 (501.06 it/sec) -training >> step=1222200, episode=204 reward=0.765668 (561.27 it/sec) -training >> step=1222300, episode=204 reward=0.7704036 (504.36 it/sec) -training >> step=1222400, episode=204 reward=0.7628705 (527.66 it/sec) -training >> step=1222500, episode=204 reward=0.7585832 (531.54 it/sec) -training >> step=1222600, episode=204 reward=0.74582 (520.15 it/sec) -training >> step=1222700, episode=204 reward=0.7524211 (529.95 it/sec) -training >> step=1222800, episode=204 reward=0.74958 (520.50 it/sec) -training >> step=1222900, episode=204 reward=0.7363964 (555.61 it/sec) -training >> step=1223000, episode=204 reward=0.7695901 (534.79 it/sec) -training >> step=1223100, episode=204 reward=0.758759 (530.64 it/sec) -training >> step=1223200, episode=204 reward=0.7618853 (542.23 it/sec) -training >> step=1223300, episode=205 reward=0.7720752 (61.68 it/sec) -training >> step=1223400, episode=205 reward=0.746843 (496.63 it/sec) -training >> step=1223500, episode=205 reward=0.7537974 (513.41 it/sec) -training >> step=1223600, episode=205 reward=0.7461193 (564.75 it/sec) -training >> step=1223700, episode=205 reward=0.7660733 (515.59 it/sec) -training >> step=1223800, episode=205 reward=0.7709819 (516.38 it/sec) -training >> step=1223900, episode=205 reward=0.7525778 (544.20 it/sec) -training >> step=1224000, episode=205 reward=0.7674971 (577.62 it/sec) -training >> step=1224100, episode=205 reward=0.769154 (526.92 it/sec) -training >> step=1224200, episode=205 reward=0.7453607 (521.35 it/sec) -training >> step=1224300, episode=205 reward=0.772296 (557.37 it/sec) -training >> step=1224400, episode=205 reward=0.7730454 (527.60 it/sec) -training >> step=1224500, episode=205 reward=0.7569689 (529.99 it/sec) -training >> step=1224600, episode=205 reward=0.7624178 (523.93 it/sec) -training >> step=1224700, episode=205 reward=0.7545364 (550.09 it/sec) -training >> step=1224800, episode=205 reward=0.7579851 (568.16 it/sec) -training >> step=1224900, episode=205 reward=0.7482519 (542.13 it/sec) -training >> step=1225000, episode=205 reward=0.7527297 (553.47 it/sec) -training >> step=1225100, episode=205 reward=0.7677958 (560.23 it/sec) -training >> step=1225200, episode=205 reward=0.7510826 (521.09 it/sec) -training >> step=1225300, episode=205 reward=0.7624918 (569.45 it/sec) -training >> step=1225400, episode=205 reward=0.7636279 (538.44 it/sec) -training >> step=1225500, episode=205 reward=0.7760965 (516.88 it/sec) -training >> step=1225600, episode=205 reward=0.7662318 (489.94 it/sec) -training >> step=1225700, episode=205 reward=0.766739 (536.98 it/sec) -training >> step=1225800, episode=205 reward=0.7921236 (549.51 it/sec) -training >> step=1225900, episode=205 reward=0.7667351 (524.83 it/sec) -training >> step=1226000, episode=205 reward=0.7568932 (567.79 it/sec) -training >> step=1226100, episode=205 reward=0.7741997 (473.61 it/sec) -training >> step=1226200, episode=205 reward=0.7398623 (526.98 it/sec) -training >> step=1226300, episode=205 reward=0.7657932 (562.34 it/sec) -training >> step=1226400, episode=205 reward=0.7677442 (541.39 it/sec) -training >> step=1226500, episode=205 reward=0.7584416 (490.10 it/sec) -training >> step=1226600, episode=205 reward=0.7721531 (505.09 it/sec) -training >> step=1226700, episode=205 reward=0.7741606 (392.70 it/sec) -training >> step=1226800, episode=205 reward=0.7576694 (568.03 it/sec) -training >> step=1226900, episode=205 reward=0.7499768 (523.56 it/sec) -training >> step=1227000, episode=205 reward=0.7555107 (502.68 it/sec) -training >> step=1227100, episode=205 reward=0.7479794 (569.74 it/sec) -training >> step=1227200, episode=205 reward=0.7470313 (520.74 it/sec) -training >> step=1227300, episode=205 reward=0.770288 (543.31 it/sec) -training >> step=1227400, episode=205 reward=0.7567836 (556.05 it/sec) -training >> step=1227500, episode=205 reward=0.759627 (545.11 it/sec) -training >> step=1227600, episode=205 reward=0.7583241 (547.07 it/sec) -training >> step=1227700, episode=205 reward=0.7456194 (554.28 it/sec) -training >> step=1227800, episode=205 reward=0.7558243 (538.04 it/sec) -training >> step=1227900, episode=205 reward=0.7778149 (561.68 it/sec) -training >> step=1228000, episode=205 reward=0.7449145 (541.20 it/sec) -training >> step=1228100, episode=205 reward=0.7623364 (545.17 it/sec) -training >> step=1228200, episode=205 reward=0.7718861 (519.52 it/sec) -training >> step=1228300, episode=205 reward=0.7625663 (522.94 it/sec) -training >> step=1228400, episode=205 reward=0.7540405 (519.62 it/sec) -training >> step=1228500, episode=205 reward=0.7639228 (545.34 it/sec) -training >> step=1228600, episode=205 reward=0.7304157 (486.38 it/sec) -training >> step=1228700, episode=205 reward=0.762009 (545.72 it/sec) -training >> step=1228800, episode=205 reward=0.779893 (532.93 it/sec) -training >> step=1228900, episode=205 reward=0.7627501 (538.44 it/sec) -training >> step=1229000, episode=205 reward=0.739495 (541.64 it/sec) -training >> step=1229100, episode=205 reward=0.7561647 (537.84 it/sec) -training >> step=1229200, episode=205 reward=0.7514139 (538.45 it/sec) -training >> step=1229300, episode=206 reward=0.7758023 (51.76 it/sec) -training >> step=1229400, episode=206 reward=0.763826 (480.05 it/sec) -training >> step=1229500, episode=206 reward=0.7551599 (492.23 it/sec) -training >> step=1229600, episode=206 reward=0.7527835 (543.65 it/sec) -training >> step=1229700, episode=206 reward=0.7586777 (533.39 it/sec) -training >> step=1229800, episode=206 reward=0.7401891 (495.08 it/sec) -training >> step=1229900, episode=206 reward=0.7729471 (530.86 it/sec) -training >> step=1230000, episode=206 reward=0.7414929 (537.32 it/sec) -training >> step=1230100, episode=206 reward=0.7730545 (521.02 it/sec) -training >> step=1230200, episode=206 reward=0.7328396 (553.93 it/sec) -training >> step=1230300, episode=206 reward=0.7597269 (562.96 it/sec) -training >> step=1230400, episode=206 reward=0.7440571 (550.00 it/sec) -training >> step=1230500, episode=206 reward=0.7492546 (487.16 it/sec) -training >> step=1230600, episode=206 reward=0.7560325 (547.72 it/sec) -training >> step=1230700, episode=206 reward=0.7417585 (548.01 it/sec) -training >> step=1230800, episode=206 reward=0.7519325 (542.22 it/sec) -training >> step=1230900, episode=206 reward=0.7512149 (547.10 it/sec) -training >> step=1231000, episode=206 reward=0.760534 (577.86 it/sec) -training >> step=1231100, episode=206 reward=0.7805635 (548.14 it/sec) -training >> step=1231200, episode=206 reward=0.7561623 (519.69 it/sec) -training >> step=1231300, episode=206 reward=0.769403 (557.48 it/sec) -training >> step=1231400, episode=206 reward=0.7786455 (555.66 it/sec) -training >> step=1231500, episode=206 reward=0.7675384 (563.59 it/sec) -training >> step=1231600, episode=206 reward=0.7687014 (554.93 it/sec) -training >> step=1231700, episode=206 reward=0.7471484 (487.17 it/sec) -training >> step=1231800, episode=206 reward=0.7528529 (570.54 it/sec) -training >> step=1231900, episode=206 reward=0.7691193 (550.96 it/sec) -training >> step=1232000, episode=206 reward=0.7794041 (562.18 it/sec) -training >> step=1232100, episode=206 reward=0.7467532 (539.75 it/sec) -training >> step=1232200, episode=206 reward=0.7738661 (548.55 it/sec) -training >> step=1232300, episode=206 reward=0.7603337 (543.45 it/sec) -training >> step=1232400, episode=206 reward=0.7500265 (568.75 it/sec) -training >> step=1232500, episode=206 reward=0.7786543 (493.50 it/sec) -training >> step=1232600, episode=206 reward=0.7469485 (510.82 it/sec) -training >> step=1232700, episode=206 reward=0.74216 (544.86 it/sec) -training >> step=1232800, episode=206 reward=0.7391448 (443.46 it/sec) -training >> step=1232900, episode=206 reward=0.7520583 (526.43 it/sec) -training >> step=1233000, episode=206 reward=0.768644 (506.43 it/sec) -training >> step=1233100, episode=206 reward=0.7452763 (557.23 it/sec) -training >> step=1233200, episode=206 reward=0.7857724 (559.03 it/sec) -training >> step=1233300, episode=206 reward=0.7613999 (541.43 it/sec) -training >> step=1233400, episode=206 reward=0.7912169 (560.73 it/sec) -training >> step=1233500, episode=206 reward=0.7630547 (555.68 it/sec) -training >> step=1233600, episode=206 reward=0.753464 (553.83 it/sec) -training >> step=1233700, episode=206 reward=0.7592184 (521.41 it/sec) -training >> step=1233800, episode=206 reward=0.7537244 (558.21 it/sec) -training >> step=1233900, episode=206 reward=0.750175 (544.19 it/sec) -training >> step=1234000, episode=206 reward=0.735998 (524.29 it/sec) -training >> step=1234100, episode=206 reward=0.758873 (554.29 it/sec) -training >> step=1234200, episode=206 reward=0.754398 (535.47 it/sec) -training >> step=1234300, episode=206 reward=0.7363306 (501.05 it/sec) -training >> step=1234400, episode=206 reward=0.733979 (307.55 it/sec) -training >> step=1234500, episode=206 reward=0.7681634 (318.85 it/sec) -training >> step=1234600, episode=206 reward=0.7674811 (343.47 it/sec) -training >> step=1234700, episode=206 reward=0.7743828 (499.71 it/sec) -training >> step=1234800, episode=206 reward=0.7452726 (534.22 it/sec) -training >> step=1234900, episode=206 reward=0.7492081 (456.35 it/sec) -training >> step=1235000, episode=206 reward=0.7273272 (488.04 it/sec) -training >> step=1235100, episode=206 reward=0.7497016 (540.54 it/sec) -training >> step=1235200, episode=206 reward=0.7441691 (544.95 it/sec) -training >> step=1235300, episode=207 reward=0.7563979 (73.43 it/sec) -training >> step=1235400, episode=207 reward=0.7711174 (498.27 it/sec) -training >> step=1235500, episode=207 reward=0.7557706 (504.09 it/sec) -training >> step=1235600, episode=207 reward=0.7627591 (453.22 it/sec) -training >> step=1235700, episode=207 reward=0.7368822 (525.77 it/sec) -training >> step=1235800, episode=207 reward=0.7707173 (521.89 it/sec) -training >> step=1235900, episode=207 reward=0.7800068 (555.31 it/sec) -training >> step=1236000, episode=207 reward=0.7549462 (546.78 it/sec) -training >> step=1236100, episode=207 reward=0.7636424 (552.16 it/sec) -training >> step=1236200, episode=207 reward=0.7730221 (518.54 it/sec) -training >> step=1236300, episode=207 reward=0.7802209 (524.45 it/sec) -training >> step=1236400, episode=207 reward=0.7559921 (469.83 it/sec) -training >> step=1236500, episode=207 reward=0.7646899 (545.48 it/sec) -training >> step=1236600, episode=207 reward=0.7681192 (538.54 it/sec) -training >> step=1236700, episode=207 reward=0.7542047 (585.99 it/sec) -training >> step=1236800, episode=207 reward=0.7429224 (510.74 it/sec) -training >> step=1236900, episode=207 reward=0.7913567 (495.08 it/sec) -training >> step=1237000, episode=207 reward=0.7715389 (541.25 it/sec) -training >> step=1237100, episode=207 reward=0.7442867 (559.35 it/sec) -training >> step=1237200, episode=207 reward=0.7572333 (558.75 it/sec) -training >> step=1237300, episode=207 reward=0.7643694 (531.72 it/sec) -training >> step=1237400, episode=207 reward=0.7766523 (511.24 it/sec) -training >> step=1237500, episode=207 reward=0.7426739 (525.00 it/sec) -training >> step=1237600, episode=207 reward=0.7708727 (512.87 it/sec) -training >> step=1237700, episode=207 reward=0.7507086 (564.29 it/sec) -training >> step=1237800, episode=207 reward=0.7523723 (536.82 it/sec) -training >> step=1237900, episode=207 reward=0.7464769 (518.67 it/sec) -training >> step=1238000, episode=207 reward=0.7678948 (516.35 it/sec) -training >> step=1238100, episode=207 reward=0.7722088 (525.12 it/sec) -training >> step=1238200, episode=207 reward=0.7417365 (536.25 it/sec) -training >> step=1238300, episode=207 reward=0.7529563 (543.17 it/sec) -training >> step=1238400, episode=207 reward=0.7373471 (480.79 it/sec) -training >> step=1238500, episode=207 reward=0.7692067 (508.19 it/sec) -training >> step=1238600, episode=207 reward=0.7652659 (513.56 it/sec) -training >> step=1238700, episode=207 reward=0.7593208 (513.58 it/sec) -training >> step=1238800, episode=207 reward=0.7651179 (395.83 it/sec) -training >> step=1238900, episode=207 reward=0.767167 (516.82 it/sec) -training >> step=1239000, episode=207 reward=0.7556358 (483.69 it/sec) -training >> step=1239100, episode=207 reward=0.786025 (535.42 it/sec) -training >> step=1239200, episode=207 reward=0.7544411 (548.19 it/sec) -training >> step=1239300, episode=207 reward=0.7648726 (523.31 it/sec) -training >> step=1239400, episode=207 reward=0.7726274 (478.03 it/sec) -training >> step=1239500, episode=207 reward=0.7411439 (528.70 it/sec) -training >> step=1239600, episode=207 reward=0.7246812 (554.56 it/sec) -training >> step=1239700, episode=207 reward=0.7775795 (500.13 it/sec) -training >> step=1239800, episode=207 reward=0.7471038 (531.90 it/sec) -training >> step=1239900, episode=207 reward=0.7916563 (542.41 it/sec) -training >> step=1240000, episode=207 reward=0.7490991 (497.56 it/sec) -training >> step=1240100, episode=207 reward=0.7835625 (511.55 it/sec) -training >> step=1240200, episode=207 reward=0.7717816 (529.77 it/sec) -training >> step=1240300, episode=207 reward=0.7760779 (535.21 it/sec) -training >> step=1240400, episode=207 reward=0.7335662 (516.78 it/sec) -training >> step=1240500, episode=207 reward=0.7499994 (483.56 it/sec) -training >> step=1240600, episode=207 reward=0.7415231 (503.60 it/sec) -training >> step=1240700, episode=207 reward=0.7429929 (529.12 it/sec) -training >> step=1240800, episode=207 reward=0.7616223 (514.32 it/sec) -training >> step=1240900, episode=207 reward=0.7571774 (491.86 it/sec) -training >> step=1241000, episode=207 reward=0.762931 (513.83 it/sec) -training >> step=1241100, episode=207 reward=0.7367328 (507.85 it/sec) -training >> step=1241200, episode=207 reward=0.7578011 (528.42 it/sec) -training >> step=1241300, episode=208 reward=0.7541555 (226.13 it/sec) -training >> step=1241400, episode=208 reward=0.7517473 (500.88 it/sec) -training >> step=1241500, episode=208 reward=0.7710876 (507.36 it/sec) -training >> step=1241600, episode=208 reward=0.7600399 (542.84 it/sec) -training >> step=1241700, episode=208 reward=0.7553495 (520.45 it/sec) -training >> step=1241800, episode=208 reward=0.7494953 (529.53 it/sec) -training >> step=1241900, episode=208 reward=0.7707285 (459.58 it/sec) -training >> step=1242000, episode=208 reward=0.7557808 (505.98 it/sec) -training >> step=1242100, episode=208 reward=0.7594806 (543.30 it/sec) -training >> step=1242200, episode=208 reward=0.7542604 (568.23 it/sec) -training >> step=1242300, episode=208 reward=0.7458912 (521.52 it/sec) -training >> step=1242400, episode=208 reward=0.7623206 (500.66 it/sec) -training >> step=1242500, episode=208 reward=0.7616274 (551.15 it/sec) -training >> step=1242600, episode=208 reward=0.7558922 (515.79 it/sec) -training >> step=1242700, episode=208 reward=0.7397725 (528.32 it/sec) -training >> step=1242800, episode=208 reward=0.757807 (536.50 it/sec) -training >> step=1242900, episode=208 reward=0.7618463 (533.96 it/sec) -training >> step=1243000, episode=208 reward=0.7680044 (493.73 it/sec) -training >> step=1243100, episode=208 reward=0.7561131 (519.76 it/sec) -training >> step=1243200, episode=208 reward=0.7705464 (503.16 it/sec) -training >> step=1243300, episode=208 reward=0.7482558 (532.11 it/sec) -training >> step=1243400, episode=208 reward=0.7551334 (521.33 it/sec) -training >> step=1243500, episode=208 reward=0.7452663 (467.07 it/sec) -training >> step=1243600, episode=208 reward=0.7541253 (473.01 it/sec) -training >> step=1243700, episode=208 reward=0.7732835 (530.41 it/sec) -training >> step=1243800, episode=208 reward=0.7743466 (532.92 it/sec) -training >> step=1243900, episode=208 reward=0.7897959 (496.89 it/sec) -training >> step=1244000, episode=208 reward=0.7542178 (513.30 it/sec) -training >> step=1244100, episode=208 reward=0.7534149 (513.57 it/sec) -training >> step=1244200, episode=208 reward=0.7755003 (530.80 it/sec) -training >> step=1244300, episode=208 reward=0.7783751 (519.75 it/sec) -training >> step=1244400, episode=208 reward=0.769181 (552.37 it/sec) -training >> step=1244500, episode=208 reward=0.7529202 (513.75 it/sec) -training >> step=1244600, episode=208 reward=0.7827495 (499.57 it/sec) -training >> step=1244700, episode=208 reward=0.7614985 (528.98 it/sec) -training >> step=1244800, episode=208 reward=0.7506854 (516.33 it/sec) -training >> step=1244900, episode=208 reward=0.7658963 (535.28 it/sec) -training >> step=1245000, episode=208 reward=0.7391671 (503.72 it/sec) -training >> step=1245100, episode=208 reward=0.7681018 (542.97 it/sec) -training >> step=1245200, episode=208 reward=0.7488889 (512.44 it/sec) -training >> step=1245300, episode=208 reward=0.7539219 (535.84 it/sec) -training >> step=1245400, episode=208 reward=0.7737328 (427.93 it/sec) -training >> step=1245500, episode=208 reward=0.7629242 (499.22 it/sec) -training >> step=1245600, episode=208 reward=0.7632467 (520.35 it/sec) -training >> step=1245700, episode=208 reward=0.7463083 (501.00 it/sec) -training >> step=1245800, episode=208 reward=0.7649468 (550.55 it/sec) -training >> step=1245900, episode=208 reward=0.7529145 (530.99 it/sec) -training >> step=1246000, episode=208 reward=0.7595356 (502.50 it/sec) -training >> step=1246100, episode=208 reward=0.7448959 (521.73 it/sec) -training >> step=1246200, episode=208 reward=0.7542247 (537.98 it/sec) -training >> step=1246300, episode=208 reward=0.753018 (548.25 it/sec) -training >> step=1246400, episode=208 reward=0.7463505 (529.73 it/sec) -training >> step=1246500, episode=208 reward=0.7586603 (538.84 it/sec) -training >> step=1246600, episode=208 reward=0.7623408 (541.06 it/sec) -training >> step=1246700, episode=208 reward=0.7382637 (487.69 it/sec) -training >> step=1246800, episode=208 reward=0.7648963 (509.92 it/sec) -training >> step=1246900, episode=208 reward=0.7606792 (544.65 it/sec) -training >> step=1247000, episode=208 reward=0.7603502 (549.55 it/sec) -training >> step=1247100, episode=208 reward=0.7344832 (480.87 it/sec) -training >> step=1247200, episode=208 reward=0.7462691 (491.03 it/sec) -training >> step=1247300, episode=209 reward=0.7534945 (134.26 it/sec) -training >> step=1247400, episode=209 reward=0.7452163 (507.35 it/sec) -training >> step=1247500, episode=209 reward=0.7586092 (491.09 it/sec) -training >> step=1247600, episode=209 reward=0.7557951 (519.99 it/sec) -training >> step=1247700, episode=209 reward=0.7706224 (510.43 it/sec) -training >> step=1247800, episode=209 reward=0.7396566 (557.77 it/sec) -training >> step=1247900, episode=209 reward=0.7530729 (493.58 it/sec) -training >> step=1248000, episode=209 reward=0.7674025 (514.57 it/sec) -training >> step=1248100, episode=209 reward=0.7622119 (529.93 it/sec) -training >> step=1248200, episode=209 reward=0.7746238 (482.84 it/sec) -training >> step=1248300, episode=209 reward=0.7387429 (503.53 it/sec) -training >> step=1248400, episode=209 reward=0.7680349 (457.80 it/sec) -training >> step=1248500, episode=209 reward=0.7567306 (469.43 it/sec) -training >> step=1248600, episode=209 reward=0.7600093 (485.69 it/sec) -training >> step=1248700, episode=209 reward=0.7768933 (479.16 it/sec) -training >> step=1248800, episode=209 reward=0.7530097 (500.15 it/sec) -training >> step=1248900, episode=209 reward=0.7756332 (494.42 it/sec) -training >> step=1249000, episode=209 reward=0.7738433 (492.73 it/sec) -training >> step=1249100, episode=209 reward=0.7507471 (500.06 it/sec) -training >> step=1249200, episode=209 reward=0.7615383 (460.59 it/sec) -training >> step=1249300, episode=209 reward=0.7691793 (398.17 it/sec) -training >> step=1249400, episode=209 reward=0.7695483 (415.55 it/sec) -training >> step=1249500, episode=209 reward=0.7581606 (480.92 it/sec) -training >> step=1249600, episode=209 reward=0.7695833 (453.99 it/sec) -training >> step=1249700, episode=209 reward=0.7531726 (470.45 it/sec) -training >> step=1249800, episode=209 reward=0.7669978 (506.93 it/sec) -training >> step=1249900, episode=209 reward=0.7611027 (442.77 it/sec) -training >> step=1250000, episode=209 reward=0.7621864 (510.15 it/sec) -training >> step=1250100, episode=209 reward=0.7573306 (525.47 it/sec) -training >> step=1250200, episode=209 reward=0.7489215 (497.20 it/sec) -training >> step=1250300, episode=209 reward=0.7556854 (465.60 it/sec) -training >> step=1250400, episode=209 reward=0.7461722 (554.37 it/sec) -training >> step=1250500, episode=209 reward=0.7576699 (475.85 it/sec) -training >> step=1250600, episode=209 reward=0.7682292 (517.81 it/sec) -training >> step=1250700, episode=209 reward=0.764527 (528.34 it/sec) -training >> step=1250800, episode=209 reward=0.7706388 (533.83 it/sec) -training >> step=1250900, episode=209 reward=0.7473615 (440.54 it/sec) -training >> step=1251000, episode=209 reward=0.7674901 (494.06 it/sec) -training >> step=1251100, episode=209 reward=0.7905139 (528.27 it/sec) -training >> step=1251200, episode=209 reward=0.7612464 (528.80 it/sec) -training >> step=1251300, episode=209 reward=0.7634341 (528.83 it/sec) -training >> step=1251400, episode=209 reward=0.7553905 (543.98 it/sec) -training >> step=1251500, episode=209 reward=0.7777653 (538.45 it/sec) -training >> step=1251600, episode=209 reward=0.7495717 (380.12 it/sec) -training >> step=1251700, episode=209 reward=0.7548322 (535.12 it/sec) -training >> step=1251800, episode=209 reward=0.7711002 (518.60 it/sec) -training >> step=1251900, episode=209 reward=0.7498224 (519.17 it/sec) -training >> step=1252000, episode=209 reward=0.7790862 (550.21 it/sec) -training >> step=1252100, episode=209 reward=0.7646589 (533.56 it/sec) -training >> step=1252200, episode=209 reward=0.7670054 (503.17 it/sec) -training >> step=1252300, episode=209 reward=0.7592282 (568.26 it/sec) -training >> step=1252400, episode=209 reward=0.7622859 (541.87 it/sec) -training >> step=1252500, episode=209 reward=0.7645868 (411.77 it/sec) -training >> step=1252600, episode=209 reward=0.7687747 (524.64 it/sec) -training >> step=1252700, episode=209 reward=0.7502965 (527.68 it/sec) -training >> step=1252800, episode=209 reward=0.762572 (503.90 it/sec) -training >> step=1252900, episode=209 reward=0.7734186 (506.22 it/sec) -training >> step=1253000, episode=209 reward=0.7501227 (505.11 it/sec) -training >> step=1253100, episode=209 reward=0.7510055 (532.10 it/sec) -training >> step=1253200, episode=209 reward=0.743365 (531.42 it/sec) -training >> step=1253300, episode=210 reward=0.7639453 (127.63 it/sec) -training >> step=1253400, episode=210 reward=0.7407652 (486.23 it/sec) -training >> step=1253500, episode=210 reward=0.7315306 (542.07 it/sec) -training >> step=1253600, episode=210 reward=0.7720211 (504.40 it/sec) -training >> step=1253700, episode=210 reward=0.7463169 (495.37 it/sec) -training >> step=1253800, episode=210 reward=0.7619006 (477.54 it/sec) -training >> step=1253900, episode=210 reward=0.7510315 (515.11 it/sec) -training >> step=1254000, episode=210 reward=0.7542669 (504.52 it/sec) -training >> step=1254100, episode=210 reward=0.7578052 (506.15 it/sec) -training >> step=1254200, episode=210 reward=0.7606014 (523.29 it/sec) -training >> step=1254300, episode=210 reward=0.7698299 (535.76 it/sec) -training >> step=1254400, episode=210 reward=0.7550901 (511.59 it/sec) -training >> step=1254500, episode=210 reward=0.7610246 (529.02 it/sec) -training >> step=1254600, episode=210 reward=0.7675004 (501.88 it/sec) -training >> step=1254700, episode=210 reward=0.763018 (551.51 it/sec) -training >> step=1254800, episode=210 reward=0.7654875 (538.35 it/sec) -training >> step=1254900, episode=210 reward=0.7553448 (517.30 it/sec) -training >> step=1255000, episode=210 reward=0.7448488 (510.51 it/sec) -training >> step=1255100, episode=210 reward=0.7492794 (539.43 it/sec) -training >> step=1255200, episode=210 reward=0.7509258 (503.84 it/sec) -training >> step=1255300, episode=210 reward=0.7846963 (523.97 it/sec) -training >> step=1255400, episode=210 reward=0.7626186 (552.67 it/sec) -training >> step=1255500, episode=210 reward=0.7495159 (542.88 it/sec) -training >> step=1255600, episode=210 reward=0.7621459 (478.48 it/sec) -training >> step=1255700, episode=210 reward=0.7657545 (506.12 it/sec) -training >> step=1255800, episode=210 reward=0.7511374 (507.40 it/sec) -training >> step=1255900, episode=210 reward=0.7495381 (470.88 it/sec) -training >> step=1256000, episode=210 reward=0.7566378 (517.21 it/sec) -training >> step=1256100, episode=210 reward=0.7592153 (500.03 it/sec) -training >> step=1256200, episode=210 reward=0.7700841 (496.19 it/sec) -training >> step=1256300, episode=210 reward=0.7556488 (468.73 it/sec) -training >> step=1256400, episode=210 reward=0.742367 (508.71 it/sec) -training >> step=1256500, episode=210 reward=0.7537028 (521.08 it/sec) -training >> step=1256600, episode=210 reward=0.7680367 (547.31 it/sec) -training >> step=1256700, episode=210 reward=0.7568427 (507.22 it/sec) -training >> step=1256800, episode=210 reward=0.7818776 (487.52 it/sec) -training >> step=1256900, episode=210 reward=0.7492655 (460.59 it/sec) -training >> step=1257000, episode=210 reward=0.7832016 (510.64 it/sec) -training >> step=1257100, episode=210 reward=0.7498139 (514.85 it/sec) -training >> step=1257200, episode=210 reward=0.7487635 (498.42 it/sec) -training >> step=1257300, episode=210 reward=0.7534645 (530.93 it/sec) -training >> step=1257400, episode=210 reward=0.7479215 (515.48 it/sec) -training >> step=1257500, episode=210 reward=0.7727644 (499.96 it/sec) -training >> step=1257600, episode=210 reward=0.7423604 (500.92 it/sec) -training >> step=1257700, episode=210 reward=0.7729719 (517.25 it/sec) -training >> step=1257800, episode=210 reward=0.7401779 (364.32 it/sec) -training >> step=1257900, episode=210 reward=0.7811559 (531.85 it/sec) -training >> step=1258000, episode=210 reward=0.7632139 (508.29 it/sec) -training >> step=1258100, episode=210 reward=0.7652878 (501.25 it/sec) -training >> step=1258200, episode=210 reward=0.7628671 (483.84 it/sec) -training >> step=1258300, episode=210 reward=0.7609913 (520.08 it/sec) -training >> step=1258400, episode=210 reward=0.7441553 (516.79 it/sec) -training >> step=1258500, episode=210 reward=0.7676901 (501.84 it/sec) -training >> step=1258600, episode=210 reward=0.7462888 (517.99 it/sec) -training >> step=1258700, episode=210 reward=0.7699467 (435.29 it/sec) -training >> step=1258800, episode=210 reward=0.7778064 (486.11 it/sec) -training >> step=1258900, episode=210 reward=0.7533509 (525.04 it/sec) -training >> step=1259000, episode=210 reward=0.7726642 (497.27 it/sec) -training >> step=1259100, episode=210 reward=0.7528887 (536.71 it/sec) -training >> step=1259200, episode=210 reward=0.7370704 (477.77 it/sec) -training >> step=1259300, episode=211 reward=0.7457243 (213.56 it/sec) -training >> step=1259400, episode=211 reward=0.7702023 (551.73 it/sec) -training >> step=1259500, episode=211 reward=0.7561819 (526.79 it/sec) -training >> step=1259600, episode=211 reward=0.7333747 (508.75 it/sec) -training >> step=1259700, episode=211 reward=0.774034 (509.17 it/sec) -training >> step=1259800, episode=211 reward=0.7541643 (530.76 it/sec) -training >> step=1259900, episode=211 reward=0.7611338 (475.42 it/sec) -training >> step=1260000, episode=211 reward=0.7775183 (496.73 it/sec) -training >> step=1260100, episode=211 reward=0.7656574 (518.07 it/sec) -training >> step=1260200, episode=211 reward=0.7497226 (590.92 it/sec) -training >> step=1260300, episode=211 reward=0.7682925 (513.49 it/sec) -training >> step=1260400, episode=211 reward=0.7730638 (525.92 it/sec) -training >> step=1260500, episode=211 reward=0.7511349 (536.42 it/sec) -training >> step=1260600, episode=211 reward=0.7591196 (503.94 it/sec) -training >> step=1260700, episode=211 reward=0.7791876 (544.37 it/sec) -training >> step=1260800, episode=211 reward=0.7556843 (530.53 it/sec) -training >> step=1260900, episode=211 reward=0.7699519 (524.99 it/sec) -training >> step=1261000, episode=211 reward=0.7155676 (546.74 it/sec) -training >> step=1261100, episode=211 reward=0.7569057 (472.64 it/sec) -training >> step=1261200, episode=211 reward=0.7632833 (533.12 it/sec) -training >> step=1261300, episode=211 reward=0.7586733 (508.83 it/sec) -training >> step=1261400, episode=211 reward=0.7631899 (534.74 it/sec) -training >> step=1261500, episode=211 reward=0.7735539 (516.01 it/sec) -training >> step=1261600, episode=211 reward=0.7746035 (478.57 it/sec) -training >> step=1261700, episode=211 reward=0.7576471 (492.65 it/sec) -training >> step=1261800, episode=211 reward=0.7516999 (502.82 it/sec) -training >> step=1261900, episode=211 reward=0.7629645 (539.89 it/sec) -training >> step=1262000, episode=211 reward=0.7526467 (526.43 it/sec) -training >> step=1262100, episode=211 reward=0.7431881 (491.40 it/sec) -training >> step=1262200, episode=211 reward=0.7706172 (503.87 it/sec) -training >> step=1262300, episode=211 reward=0.7905976 (495.08 it/sec) -training >> step=1262400, episode=211 reward=0.76191 (505.53 it/sec) -training >> step=1262500, episode=211 reward=0.7550721 (560.43 it/sec) -training >> step=1262600, episode=211 reward=0.7787312 (524.46 it/sec) -training >> step=1262700, episode=211 reward=0.7741854 (517.84 it/sec) -training >> step=1262800, episode=211 reward=0.745645 (545.46 it/sec) -training >> step=1262900, episode=211 reward=0.7545698 (517.48 it/sec) -training >> step=1263000, episode=211 reward=0.7629949 (545.61 it/sec) -training >> step=1263100, episode=211 reward=0.7426769 (527.21 it/sec) -training >> step=1263200, episode=211 reward=0.7699057 (488.68 it/sec) -training >> step=1263300, episode=211 reward=0.7396333 (549.55 it/sec) -training >> step=1263400, episode=211 reward=0.7487458 (513.86 it/sec) -training >> step=1263500, episode=211 reward=0.7712876 (478.62 it/sec) -training >> step=1263600, episode=211 reward=0.7400576 (470.48 it/sec) -training >> step=1263700, episode=211 reward=0.7364783 (508.44 it/sec) -training >> step=1263800, episode=211 reward=0.7604495 (496.68 it/sec) -training >> step=1263900, episode=211 reward=0.7573229 (504.60 it/sec) -training >> step=1264000, episode=211 reward=0.7660292 (495.21 it/sec) -training >> step=1264100, episode=211 reward=0.751276 (420.05 it/sec) -training >> step=1264200, episode=211 reward=0.7487695 (390.87 it/sec) -training >> step=1264300, episode=211 reward=0.7486718 (547.64 it/sec) -training >> step=1264400, episode=211 reward=0.7653646 (545.92 it/sec) -training >> step=1264500, episode=211 reward=0.7445815 (521.03 it/sec) -training >> step=1264600, episode=211 reward=0.7553009 (510.55 it/sec) -training >> step=1264700, episode=211 reward=0.7562523 (437.33 it/sec) -training >> step=1264800, episode=211 reward=0.7597929 (520.13 it/sec) -training >> step=1264900, episode=211 reward=0.7865675 (495.39 it/sec) -training >> step=1265000, episode=211 reward=0.7577775 (522.37 it/sec) -training >> step=1265100, episode=211 reward=0.7573495 (530.21 it/sec) -training >> step=1265200, episode=211 reward=0.7639795 (495.54 it/sec) -training >> step=1265300, episode=212 reward=0.7840495 (166.65 it/sec) -training >> step=1265400, episode=212 reward=0.761058 (505.70 it/sec) -training >> step=1265500, episode=212 reward=0.755367 (468.95 it/sec) -training >> step=1265600, episode=212 reward=0.7799771 (517.65 it/sec) -training >> step=1265700, episode=212 reward=0.7607499 (540.23 it/sec) -training >> step=1265800, episode=212 reward=0.7403366 (514.38 it/sec) -training >> step=1265900, episode=212 reward=0.775058 (520.42 it/sec) -training >> step=1266000, episode=212 reward=0.743961 (531.07 it/sec) -training >> step=1266100, episode=212 reward=0.78596 (536.03 it/sec) -training >> step=1266200, episode=212 reward=0.7649769 (514.17 it/sec) -training >> step=1266300, episode=212 reward=0.7592022 (530.30 it/sec) -training >> step=1266400, episode=212 reward=0.7642574 (538.86 it/sec) -training >> step=1266500, episode=212 reward=0.7603345 (478.33 it/sec) -training >> step=1266600, episode=212 reward=0.785091 (503.28 it/sec) -training >> step=1266700, episode=212 reward=0.7908764 (514.35 it/sec) -training >> step=1266800, episode=212 reward=0.7400751 (502.98 it/sec) -training >> step=1266900, episode=212 reward=0.7628664 (505.38 it/sec) -training >> step=1267000, episode=212 reward=0.7666594 (530.95 it/sec) -training >> step=1267100, episode=212 reward=0.7505742 (471.07 it/sec) -training >> step=1267200, episode=212 reward=0.757485 (528.55 it/sec) -training >> step=1267300, episode=212 reward=0.7805182 (548.18 it/sec) -training >> step=1267400, episode=212 reward=0.7749323 (471.34 it/sec) -training >> step=1267500, episode=212 reward=0.7581562 (550.88 it/sec) -training >> step=1267600, episode=212 reward=0.7509543 (476.17 it/sec) -training >> step=1267700, episode=212 reward=0.7673374 (517.93 it/sec) -training >> step=1267800, episode=212 reward=0.7651019 (543.67 it/sec) -training >> step=1267900, episode=212 reward=0.7380341 (516.05 it/sec) -training >> step=1268000, episode=212 reward=0.7636819 (520.29 it/sec) -training >> step=1268100, episode=212 reward=0.7656511 (440.73 it/sec) -training >> step=1268200, episode=212 reward=0.7443671 (488.77 it/sec) -training >> step=1268300, episode=212 reward=0.7556182 (508.64 it/sec) -training >> step=1268400, episode=212 reward=0.756259 (487.31 it/sec) -training >> step=1268500, episode=212 reward=0.7495818 (530.50 it/sec) -training >> step=1268600, episode=212 reward=0.7514783 (474.08 it/sec) -training >> step=1268700, episode=212 reward=0.7589537 (560.26 it/sec) -training >> step=1268800, episode=212 reward=0.7727568 (482.87 it/sec) -training >> step=1268900, episode=212 reward=0.7663628 (491.44 it/sec) -training >> step=1269000, episode=212 reward=0.7680324 (526.42 it/sec) -training >> step=1269100, episode=212 reward=0.7590176 (489.95 it/sec) -training >> step=1269200, episode=212 reward=0.7416583 (511.82 it/sec) -training >> step=1269300, episode=212 reward=0.7320253 (531.28 it/sec) -training >> step=1269400, episode=212 reward=0.7634574 (528.54 it/sec) -training >> step=1269500, episode=212 reward=0.7747525 (513.78 it/sec) -training >> step=1269600, episode=212 reward=0.7624941 (509.59 it/sec) -training >> step=1269700, episode=212 reward=0.7580278 (509.26 it/sec) -training >> step=1269800, episode=212 reward=0.7457771 (542.26 it/sec) -training >> step=1269900, episode=212 reward=0.7485608 (497.11 it/sec) -training >> step=1270000, episode=212 reward=0.7581769 (484.54 it/sec) -training >> step=1270100, episode=212 reward=0.7380158 (505.85 it/sec) -training >> step=1270200, episode=212 reward=0.7638134 (472.02 it/sec) -training >> step=1270300, episode=212 reward=0.7534472 (532.03 it/sec) -training >> step=1270400, episode=212 reward=0.7643681 (377.44 it/sec) -training >> step=1270500, episode=212 reward=0.7450942 (533.54 it/sec) -training >> step=1270600, episode=212 reward=0.740927 (503.10 it/sec) -training >> step=1270700, episode=212 reward=0.7525563 (503.05 it/sec) -training >> step=1270800, episode=212 reward=0.7519147 (468.53 it/sec) -training >> step=1270900, episode=212 reward=0.7628675 (555.38 it/sec) -training >> step=1271000, episode=212 reward=0.7551616 (509.21 it/sec) -training >> step=1271100, episode=212 reward=0.7615248 (513.58 it/sec) -training >> step=1271200, episode=212 reward=0.7640863 (465.64 it/sec) -training >> step=1271300, episode=213 reward=0.7551438 (197.13 it/sec) -training >> step=1271400, episode=213 reward=0.7613737 (518.89 it/sec) -training >> step=1271500, episode=213 reward=0.7643028 (497.10 it/sec) -training >> step=1271600, episode=213 reward=0.7562873 (507.96 it/sec) -training >> step=1271700, episode=213 reward=0.7626134 (533.48 it/sec) -training >> step=1271800, episode=213 reward=0.7530194 (528.79 it/sec) -training >> step=1271900, episode=213 reward=0.7595159 (506.05 it/sec) -training >> step=1272000, episode=213 reward=0.7279003 (506.66 it/sec) -training >> step=1272100, episode=213 reward=0.7645053 (497.89 it/sec) -training >> step=1272200, episode=213 reward=0.7539215 (532.02 it/sec) -training >> step=1272300, episode=213 reward=0.7801578 (534.63 it/sec) -training >> step=1272400, episode=213 reward=0.7664385 (495.35 it/sec) -training >> step=1272500, episode=213 reward=0.7556635 (533.46 it/sec) -training >> step=1272600, episode=213 reward=0.767459 (534.66 it/sec) -training >> step=1272700, episode=213 reward=0.7720788 (507.12 it/sec) -training >> step=1272800, episode=213 reward=0.7551668 (503.54 it/sec) -training >> step=1272900, episode=213 reward=0.7715158 (537.17 it/sec) -training >> step=1273000, episode=213 reward=0.7592449 (515.29 it/sec) -training >> step=1273100, episode=213 reward=0.7795759 (524.00 it/sec) -training >> step=1273200, episode=213 reward=0.7465085 (524.59 it/sec) -training >> step=1273300, episode=213 reward=0.7752869 (530.46 it/sec) -training >> step=1273400, episode=213 reward=0.7785102 (507.83 it/sec) -training >> step=1273500, episode=213 reward=0.750324 (500.81 it/sec) -training >> step=1273600, episode=213 reward=0.7496662 (493.11 it/sec) -training >> step=1273700, episode=213 reward=0.7697439 (503.90 it/sec) -training >> step=1273800, episode=213 reward=0.7665098 (498.59 it/sec) -training >> step=1273900, episode=213 reward=0.7634923 (528.40 it/sec) -training >> step=1274000, episode=213 reward=0.7521306 (520.68 it/sec) -training >> step=1274100, episode=213 reward=0.7777833 (495.95 it/sec) -training >> step=1274200, episode=213 reward=0.7620187 (513.80 it/sec) -training >> step=1274300, episode=213 reward=0.7602764 (545.38 it/sec) -training >> step=1274400, episode=213 reward=0.7690505 (511.26 it/sec) -training >> step=1274500, episode=213 reward=0.7469057 (490.57 it/sec) -training >> step=1274600, episode=213 reward=0.7424663 (530.47 it/sec) -training >> step=1274700, episode=213 reward=0.7491291 (532.70 it/sec) -training >> step=1274800, episode=213 reward=0.7515057 (526.67 it/sec) -training >> step=1274900, episode=213 reward=0.7477258 (528.10 it/sec) -training >> step=1275000, episode=213 reward=0.7420933 (473.25 it/sec) -training >> step=1275100, episode=213 reward=0.7561762 (473.04 it/sec) -training >> step=1275200, episode=213 reward=0.7617428 (553.42 it/sec) -training >> step=1275300, episode=213 reward=0.7617638 (515.35 it/sec) -training >> step=1275400, episode=213 reward=0.7509496 (484.41 it/sec) -training >> step=1275500, episode=213 reward=0.7530146 (522.89 it/sec) -training >> step=1275600, episode=213 reward=0.774691 (481.95 it/sec) -training >> step=1275700, episode=213 reward=0.7500425 (516.34 it/sec) -training >> step=1275800, episode=213 reward=0.7552397 (488.45 it/sec) -training >> step=1275900, episode=213 reward=0.7626854 (508.63 it/sec) -training >> step=1276000, episode=213 reward=0.7577203 (539.03 it/sec) -training >> step=1276100, episode=213 reward=0.7621312 (496.04 it/sec) -training >> step=1276200, episode=213 reward=0.75969 (505.18 it/sec) -training >> step=1276300, episode=213 reward=0.7633684 (477.98 it/sec) -training >> step=1276400, episode=213 reward=0.7438457 (499.35 it/sec) -training >> step=1276500, episode=213 reward=0.7356015 (512.14 it/sec) -training >> step=1276600, episode=213 reward=0.7619595 (514.44 it/sec) -training >> step=1276700, episode=213 reward=0.7767477 (546.27 it/sec) -training >> step=1276800, episode=213 reward=0.7436418 (419.36 it/sec) -training >> step=1276900, episode=213 reward=0.7540724 (488.78 it/sec) -training >> step=1277000, episode=213 reward=0.7236423 (523.16 it/sec) -training >> step=1277100, episode=213 reward=0.7382435 (511.94 it/sec) -training >> step=1277200, episode=213 reward=0.7629286 (488.60 it/sec) -training >> step=1277300, episode=214 reward=0.7560705 (177.06 it/sec) -training >> step=1277400, episode=214 reward=0.759105 (493.72 it/sec) -training >> step=1277500, episode=214 reward=0.7463526 (503.99 it/sec) -training >> step=1277600, episode=214 reward=0.7341387 (523.19 it/sec) -training >> step=1277700, episode=214 reward=0.7505074 (536.47 it/sec) -training >> step=1277800, episode=214 reward=0.7659231 (511.60 it/sec) -training >> step=1277900, episode=214 reward=0.7419817 (497.87 it/sec) -training >> step=1278000, episode=214 reward=0.7593376 (536.90 it/sec) -training >> step=1278100, episode=214 reward=0.7472622 (492.69 it/sec) -training >> step=1278200, episode=214 reward=0.7428058 (475.50 it/sec) -training >> step=1278300, episode=214 reward=0.7756773 (547.06 it/sec) -training >> step=1278400, episode=214 reward=0.7644957 (518.69 it/sec) -training >> step=1278500, episode=214 reward=0.7689555 (500.83 it/sec) -training >> step=1278600, episode=214 reward=0.7549072 (534.81 it/sec) -training >> step=1278700, episode=214 reward=0.7607133 (415.94 it/sec) -training >> step=1278800, episode=214 reward=0.7525169 (486.05 it/sec) -training >> step=1278900, episode=214 reward=0.7627366 (506.68 it/sec) -training >> step=1279000, episode=214 reward=0.7751397 (502.89 it/sec) -training >> step=1279100, episode=214 reward=0.7584379 (505.47 it/sec) -training >> step=1279200, episode=214 reward=0.7658756 (503.83 it/sec) -training >> step=1279300, episode=214 reward=0.7487699 (501.36 it/sec) -training >> step=1279400, episode=214 reward=0.7506964 (543.65 it/sec) -training >> step=1279500, episode=214 reward=0.77306 (528.02 it/sec) -training >> step=1279600, episode=214 reward=0.7612244 (487.99 it/sec) -training >> step=1279700, episode=214 reward=0.7722309 (542.28 it/sec) -training >> step=1279800, episode=214 reward=0.7690616 (524.86 it/sec) -training >> step=1279900, episode=214 reward=0.7530308 (514.15 it/sec) -training >> step=1280000, episode=214 reward=0.756882 (519.09 it/sec) -training >> step=1280100, episode=214 reward=0.7751682 (530.38 it/sec) -training >> step=1280200, episode=214 reward=0.766621 (548.87 it/sec) -training >> step=1280300, episode=214 reward=0.7508464 (493.35 it/sec) -training >> step=1280400, episode=214 reward=0.7620396 (525.06 it/sec) -training >> step=1280500, episode=214 reward=0.7546405 (502.14 it/sec) -training >> step=1280600, episode=214 reward=0.7527476 (537.53 it/sec) -training >> step=1280700, episode=214 reward=0.7564886 (502.91 it/sec) -training >> step=1280800, episode=214 reward=0.7692822 (547.98 it/sec) -training >> step=1280900, episode=214 reward=0.776548 (477.84 it/sec) -training >> step=1281000, episode=214 reward=0.7550611 (514.64 it/sec) -training >> step=1281100, episode=214 reward=0.7815994 (533.61 it/sec) -training >> step=1281200, episode=214 reward=0.7622324 (530.73 it/sec) -training >> step=1281300, episode=214 reward=0.7520657 (522.36 it/sec) -training >> step=1281400, episode=214 reward=0.7541144 (471.79 it/sec) -training >> step=1281500, episode=214 reward=0.7639049 (490.32 it/sec) -training >> step=1281600, episode=214 reward=0.7643275 (519.56 it/sec) -training >> step=1281700, episode=214 reward=0.7501317 (535.56 it/sec) -training >> step=1281800, episode=214 reward=0.7645822 (521.71 it/sec) -training >> step=1281900, episode=214 reward=0.7555346 (482.67 it/sec) -training >> step=1282000, episode=214 reward=0.7937807 (492.22 it/sec) -training >> step=1282100, episode=214 reward=0.7535743 (528.94 it/sec) -training >> step=1282200, episode=214 reward=0.7497629 (521.61 it/sec) -training >> step=1282300, episode=214 reward=0.7643953 (532.88 it/sec) -training >> step=1282400, episode=214 reward=0.7612203 (540.94 it/sec) -training >> step=1282500, episode=214 reward=0.737332 (522.79 it/sec) -training >> step=1282600, episode=214 reward=0.758631 (511.91 it/sec) -training >> step=1282700, episode=214 reward=0.7670097 (513.42 it/sec) -training >> step=1282800, episode=214 reward=0.7717554 (518.40 it/sec) -training >> step=1282900, episode=214 reward=0.7375385 (343.18 it/sec) -training >> step=1283000, episode=214 reward=0.7741868 (507.43 it/sec) -training >> step=1283100, episode=214 reward=0.7609995 (508.46 it/sec) -training >> step=1283200, episode=214 reward=0.7246362 (534.70 it/sec) -training >> step=1283300, episode=215 reward=0.7564155 (177.37 it/sec) -training >> step=1283400, episode=215 reward=0.7410268 (479.24 it/sec) -training >> step=1283500, episode=215 reward=0.7693777 (510.86 it/sec) -training >> step=1283600, episode=215 reward=0.745014 (537.75 it/sec) -training >> step=1283700, episode=215 reward=0.7673777 (510.60 it/sec) -training >> step=1283800, episode=215 reward=0.761153 (507.77 it/sec) -training >> step=1283900, episode=215 reward=0.7563988 (512.59 it/sec) -training >> step=1284000, episode=215 reward=0.7471891 (567.32 it/sec) -training >> step=1284100, episode=215 reward=0.731506 (501.89 it/sec) -training >> step=1284200, episode=215 reward=0.7663988 (553.25 it/sec) -training >> step=1284300, episode=215 reward=0.7541538 (490.95 it/sec) -training >> step=1284400, episode=215 reward=0.7659571 (525.12 it/sec) -training >> step=1284500, episode=215 reward=0.7582626 (518.77 it/sec) -training >> step=1284600, episode=215 reward=0.7613379 (504.82 it/sec) -training >> step=1284700, episode=215 reward=0.7714899 (539.39 it/sec) -training >> step=1284800, episode=215 reward=0.758925 (539.21 it/sec) -training >> step=1284900, episode=215 reward=0.7658991 (487.66 it/sec) -training >> step=1285000, episode=215 reward=0.7699553 (513.02 it/sec) -training >> step=1285100, episode=215 reward=0.785534 (520.45 it/sec) -training >> step=1285200, episode=215 reward=0.7737316 (505.37 it/sec) -training >> step=1285300, episode=215 reward=0.7659351 (506.89 it/sec) -training >> step=1285400, episode=215 reward=0.7712201 (519.55 it/sec) -training >> step=1285500, episode=215 reward=0.7698348 (506.92 it/sec) -training >> step=1285600, episode=215 reward=0.772324 (542.64 it/sec) -training >> step=1285700, episode=215 reward=0.752753 (460.61 it/sec) -training >> step=1285800, episode=215 reward=0.7845442 (535.72 it/sec) -training >> step=1285900, episode=215 reward=0.7771908 (453.76 it/sec) -training >> step=1286000, episode=215 reward=0.7547197 (508.82 it/sec) -training >> step=1286100, episode=215 reward=0.7799351 (528.85 it/sec) -training >> step=1286200, episode=215 reward=0.7512651 (505.99 it/sec) -training >> step=1286300, episode=215 reward=0.7662738 (508.70 it/sec) -training >> step=1286400, episode=215 reward=0.7854202 (494.16 it/sec) -training >> step=1286500, episode=215 reward=0.7571045 (492.02 it/sec) -training >> step=1286600, episode=215 reward=0.7653442 (535.95 it/sec) -training >> step=1286700, episode=215 reward=0.7769391 (501.96 it/sec) -training >> step=1286800, episode=215 reward=0.7662758 (520.18 it/sec) -training >> step=1286900, episode=215 reward=0.7754412 (532.44 it/sec) -training >> step=1287000, episode=215 reward=0.7486851 (474.93 it/sec) -training >> step=1287100, episode=215 reward=0.731512 (499.02 it/sec) -training >> step=1287200, episode=215 reward=0.7432519 (516.86 it/sec) -training >> step=1287300, episode=215 reward=0.7788584 (520.60 it/sec) -training >> step=1287400, episode=215 reward=0.7819893 (545.72 it/sec) -training >> step=1287500, episode=215 reward=0.7556086 (486.95 it/sec) -training >> step=1287600, episode=215 reward=0.7667604 (515.64 it/sec) -training >> step=1287700, episode=215 reward=0.7632331 (538.98 it/sec) -training >> step=1287800, episode=215 reward=0.7497482 (516.17 it/sec) -training >> step=1287900, episode=215 reward=0.7616726 (514.12 it/sec) -training >> step=1288000, episode=215 reward=0.7481439 (483.96 it/sec) -training >> step=1288100, episode=215 reward=0.7795769 (505.61 it/sec) -training >> step=1288200, episode=215 reward=0.7476395 (536.52 it/sec) -training >> step=1288300, episode=215 reward=0.7419197 (502.62 it/sec) -training >> step=1288400, episode=215 reward=0.7541348 (506.88 it/sec) -training >> step=1288500, episode=215 reward=0.7666501 (534.63 it/sec) -training >> step=1288600, episode=215 reward=0.7606118 (496.08 it/sec) -training >> step=1288700, episode=215 reward=0.7380915 (525.68 it/sec) -training >> step=1288800, episode=215 reward=0.7515491 (522.76 it/sec) -training >> step=1288900, episode=215 reward=0.7747809 (502.80 it/sec) -training >> step=1289000, episode=215 reward=0.7536736 (491.70 it/sec) -training >> step=1289100, episode=215 reward=0.7382827 (378.15 it/sec) -training >> step=1289200, episode=215 reward=0.7498459 (468.94 it/sec) -training >> step=1289300, episode=216 reward=0.7685277 (193.39 it/sec) -training >> step=1289400, episode=216 reward=0.7557217 (482.21 it/sec) -training >> step=1289500, episode=216 reward=0.7438884 (473.51 it/sec) -training >> step=1289600, episode=216 reward=0.7476715 (484.08 it/sec) -training >> step=1289700, episode=216 reward=0.7633528 (517.83 it/sec) -training >> step=1289800, episode=216 reward=0.7652416 (531.86 it/sec) -training >> step=1289900, episode=216 reward=0.7465248 (500.55 it/sec) -training >> step=1290000, episode=216 reward=0.74959 (545.20 it/sec) -training >> step=1290100, episode=216 reward=0.7534632 (538.81 it/sec) -training >> step=1290200, episode=216 reward=0.7536718 (538.15 it/sec) -training >> step=1290300, episode=216 reward=0.7660376 (517.38 it/sec) -training >> step=1290400, episode=216 reward=0.7507441 (481.61 it/sec) -training >> step=1290500, episode=216 reward=0.7528243 (514.12 it/sec) -training >> step=1290600, episode=216 reward=0.7595297 (530.98 it/sec) -training >> step=1290700, episode=216 reward=0.7672587 (524.92 it/sec) -training >> step=1290800, episode=216 reward=0.7712756 (540.75 it/sec) -training >> step=1290900, episode=216 reward=0.7811548 (483.47 it/sec) -training >> step=1291000, episode=216 reward=0.7503723 (499.84 it/sec) -training >> step=1291100, episode=216 reward=0.7683902 (494.73 it/sec) -training >> step=1291200, episode=216 reward=0.7779268 (518.24 it/sec) -training >> step=1291300, episode=216 reward=0.766158 (505.93 it/sec) -training >> step=1291400, episode=216 reward=0.7697117 (493.69 it/sec) -training >> step=1291500, episode=216 reward=0.7571913 (544.81 it/sec) -training >> step=1291600, episode=216 reward=0.7577565 (531.80 it/sec) -training >> step=1291700, episode=216 reward=0.7922506 (555.36 it/sec) -training >> step=1291800, episode=216 reward=0.7436466 (504.06 it/sec) -training >> step=1291900, episode=216 reward=0.7556741 (547.68 it/sec) -training >> step=1292000, episode=216 reward=0.770779 (496.25 it/sec) -training >> step=1292100, episode=216 reward=0.7843205 (530.52 it/sec) -training >> step=1292200, episode=216 reward=0.7625487 (519.91 it/sec) -training >> step=1292300, episode=216 reward=0.7392845 (547.84 it/sec) -training >> step=1292400, episode=216 reward=0.7559894 (534.76 it/sec) -training >> step=1292500, episode=216 reward=0.7591429 (497.58 it/sec) -training >> step=1292600, episode=216 reward=0.7628756 (526.52 it/sec) -training >> step=1292700, episode=216 reward=0.773238 (555.34 it/sec) -training >> step=1292800, episode=216 reward=0.7515895 (555.40 it/sec) -training >> step=1292900, episode=216 reward=0.7499047 (488.06 it/sec) -training >> step=1293000, episode=216 reward=0.7715807 (488.18 it/sec) -training >> step=1293100, episode=216 reward=0.7647099 (466.41 it/sec) -training >> step=1293200, episode=216 reward=0.7691895 (525.27 it/sec) -training >> step=1293300, episode=216 reward=0.7738076 (507.17 it/sec) -training >> step=1293400, episode=216 reward=0.779007 (515.27 it/sec) -training >> step=1293500, episode=216 reward=0.7701554 (497.41 it/sec) -training >> step=1293600, episode=216 reward=0.7610748 (454.61 it/sec) -training >> step=1293700, episode=216 reward=0.7263804 (487.32 it/sec) -training >> step=1293800, episode=216 reward=0.764062 (492.17 it/sec) -training >> step=1293900, episode=216 reward=0.7424583 (578.59 it/sec) -training >> step=1294000, episode=216 reward=0.7342815 (483.52 it/sec) -training >> step=1294100, episode=216 reward=0.7277313 (546.63 it/sec) -training >> step=1294200, episode=216 reward=0.7839208 (512.64 it/sec) -training >> step=1294300, episode=216 reward=0.732336 (521.25 it/sec) -training >> step=1294400, episode=216 reward=0.7485167 (531.45 it/sec) -training >> step=1294500, episode=216 reward=0.7658511 (541.34 it/sec) -training >> step=1294600, episode=216 reward=0.7373175 (522.70 it/sec) -training >> step=1294700, episode=216 reward=0.7514725 (525.23 it/sec) -training >> step=1294800, episode=216 reward=0.7406955 (561.39 it/sec) -training >> step=1294900, episode=216 reward=0.7714776 (533.74 it/sec) -training >> step=1295000, episode=216 reward=0.7790461 (497.40 it/sec) -training >> step=1295100, episode=216 reward=0.7450755 (489.74 it/sec) -training >> step=1295200, episode=216 reward=0.746262 (482.62 it/sec) -training >> step=1295300, episode=217 reward=0.7528148 (145.24 it/sec) -training >> step=1295400, episode=217 reward=0.7170451 (473.04 it/sec) -training >> step=1295500, episode=217 reward=0.7686843 (471.24 it/sec) -training >> step=1295600, episode=217 reward=0.7598492 (519.08 it/sec) -training >> step=1295700, episode=217 reward=0.7490543 (520.83 it/sec) -training >> step=1295800, episode=217 reward=0.7586628 (525.33 it/sec) -training >> step=1295900, episode=217 reward=0.7682709 (506.62 it/sec) -training >> step=1296000, episode=217 reward=0.7665893 (495.46 it/sec) -training >> step=1296100, episode=217 reward=0.742578 (489.32 it/sec) -training >> step=1296200, episode=217 reward=0.7742645 (543.37 it/sec) -training >> step=1296300, episode=217 reward=0.7863632 (512.55 it/sec) -training >> step=1296400, episode=217 reward=0.7751741 (508.49 it/sec) -training >> step=1296500, episode=217 reward=0.7694364 (505.67 it/sec) -training >> step=1296600, episode=217 reward=0.7684225 (550.62 it/sec) -training >> step=1296700, episode=217 reward=0.7732374 (510.79 it/sec) -training >> step=1296800, episode=217 reward=0.7653255 (494.27 it/sec) -training >> step=1296900, episode=217 reward=0.7607808 (486.06 it/sec) -training >> step=1297000, episode=217 reward=0.7534049 (520.11 it/sec) -training >> step=1297100, episode=217 reward=0.7546874 (505.42 it/sec) -training >> step=1297200, episode=217 reward=0.7451838 (490.94 it/sec) -training >> step=1297300, episode=217 reward=0.7550007 (504.28 it/sec) -training >> step=1297400, episode=217 reward=0.7654391 (487.99 it/sec) -training >> step=1297500, episode=217 reward=0.756341 (489.14 it/sec) -training >> step=1297600, episode=217 reward=0.7574562 (519.91 it/sec) -training >> step=1297700, episode=217 reward=0.7454893 (506.69 it/sec) -training >> step=1297800, episode=217 reward=0.7503268 (522.51 it/sec) -training >> step=1297900, episode=217 reward=0.7613305 (497.65 it/sec) -training >> step=1298000, episode=217 reward=0.7709685 (462.84 it/sec) -training >> step=1298100, episode=217 reward=0.7506732 (490.34 it/sec) -training >> step=1298200, episode=217 reward=0.7658466 (515.95 it/sec) -training >> step=1298300, episode=217 reward=0.7701954 (511.14 it/sec) -training >> step=1298400, episode=217 reward=0.7772788 (499.37 it/sec) -training >> step=1298500, episode=217 reward=0.8000404 (488.07 it/sec) -training >> step=1298600, episode=217 reward=0.7411012 (526.91 it/sec) -training >> step=1298700, episode=217 reward=0.7833731 (489.83 it/sec) -training >> step=1298800, episode=217 reward=0.7554258 (491.70 it/sec) -training >> step=1298900, episode=217 reward=0.7402955 (492.75 it/sec) -training >> step=1299000, episode=217 reward=0.7786631 (513.22 it/sec) -training >> step=1299100, episode=217 reward=0.7703534 (495.36 it/sec) -training >> step=1299200, episode=217 reward=0.7486865 (509.94 it/sec) -training >> step=1299300, episode=217 reward=0.7778806 (533.08 it/sec) -training >> step=1299400, episode=217 reward=0.750958 (524.96 it/sec) -training >> step=1299500, episode=217 reward=0.7707534 (487.69 it/sec) -training >> step=1299600, episode=217 reward=0.781038 (506.64 it/sec) -training >> step=1299700, episode=217 reward=0.7816868 (502.21 it/sec) -training >> step=1299800, episode=217 reward=0.7635067 (525.05 it/sec) -training >> step=1299900, episode=217 reward=0.7955124 (516.55 it/sec) -training >> step=1300000, episode=217 reward=0.7692618 (496.08 it/sec) -training >> step=1300100, episode=217 reward=0.7575216 (521.91 it/sec) -training >> step=1300200, episode=217 reward=0.7626747 (488.41 it/sec) -training >> step=1300300, episode=217 reward=0.7457637 (488.47 it/sec) -training >> step=1300400, episode=217 reward=0.7778239 (477.75 it/sec) -training >> step=1300500, episode=217 reward=0.7757467 (526.21 it/sec) -training >> step=1300600, episode=217 reward=0.754932 (506.61 it/sec) -training >> step=1300700, episode=217 reward=0.7454033 (498.34 it/sec) -training >> step=1300800, episode=217 reward=0.7556361 (536.35 it/sec) -training >> step=1300900, episode=217 reward=0.7525272 (562.39 it/sec) -training >> step=1301000, episode=217 reward=0.7580857 (493.55 it/sec) -training >> step=1301100, episode=217 reward=0.7627634 (506.84 it/sec) -training >> step=1301200, episode=217 reward=0.7613268 (384.18 it/sec) -training >> step=1301300, episode=218 reward=0.7835888 (195.63 it/sec) -training >> step=1301400, episode=218 reward=0.7740946 (522.10 it/sec) -training >> step=1301500, episode=218 reward=0.7579678 (510.96 it/sec) -training >> step=1301600, episode=218 reward=0.8017522 (507.30 it/sec) -training >> step=1301700, episode=218 reward=0.7497073 (551.50 it/sec) -training >> step=1301800, episode=218 reward=0.7793036 (525.21 it/sec) -training >> step=1301900, episode=218 reward=0.7728401 (489.26 it/sec) -training >> step=1302000, episode=218 reward=0.7711315 (501.25 it/sec) -training >> step=1302100, episode=218 reward=0.7507684 (525.84 it/sec) -training >> step=1302200, episode=218 reward=0.768651 (552.09 it/sec) -training >> step=1302300, episode=218 reward=0.7658325 (489.42 it/sec) -training >> step=1302400, episode=218 reward=0.7221969 (489.64 it/sec) -training >> step=1302500, episode=218 reward=0.7534107 (551.33 it/sec) -training >> step=1302600, episode=218 reward=0.7856598 (505.07 it/sec) -training >> step=1302700, episode=218 reward=0.7821265 (474.93 it/sec) -training >> step=1302800, episode=218 reward=0.748206 (481.64 it/sec) -training >> step=1302900, episode=218 reward=0.7772874 (523.33 it/sec) -training >> step=1303000, episode=218 reward=0.7719917 (493.76 it/sec) -training >> step=1303100, episode=218 reward=0.7543102 (514.26 it/sec) -training >> step=1303200, episode=218 reward=0.7680048 (480.14 it/sec) -training >> step=1303300, episode=218 reward=0.7908682 (547.37 it/sec) -training >> step=1303400, episode=218 reward=0.7771994 (519.67 it/sec) -training >> step=1303500, episode=218 reward=0.7615371 (506.04 it/sec) -training >> step=1303600, episode=218 reward=0.7575768 (550.56 it/sec) -training >> step=1303700, episode=218 reward=0.7875383 (527.57 it/sec) -training >> step=1303800, episode=218 reward=0.7846894 (505.61 it/sec) -training >> step=1303900, episode=218 reward=0.7482435 (511.07 it/sec) -training >> step=1304000, episode=218 reward=0.7588779 (497.70 it/sec) -training >> step=1304100, episode=218 reward=0.761431 (514.66 it/sec) -training >> step=1304200, episode=218 reward=0.7749808 (489.84 it/sec) -training >> step=1304300, episode=218 reward=0.756145 (522.83 it/sec) -training >> step=1304400, episode=218 reward=0.7367945 (522.26 it/sec) -training >> step=1304500, episode=218 reward=0.7510053 (473.25 it/sec) -training >> step=1304600, episode=218 reward=0.7455068 (517.81 it/sec) -training >> step=1304700, episode=218 reward=0.7450896 (483.00 it/sec) -training >> step=1304800, episode=218 reward=0.7596959 (525.33 it/sec) -training >> step=1304900, episode=218 reward=0.775567 (519.28 it/sec) -training >> step=1305000, episode=218 reward=0.7582821 (516.27 it/sec) -training >> step=1305100, episode=218 reward=0.7696447 (559.80 it/sec) -training >> step=1305200, episode=218 reward=0.7685447 (505.48 it/sec) -training >> step=1305300, episode=218 reward=0.746667 (491.46 it/sec) -training >> step=1305400, episode=218 reward=0.7683501 (533.91 it/sec) -training >> step=1305500, episode=218 reward=0.7529234 (509.70 it/sec) -training >> step=1305600, episode=218 reward=0.7706624 (554.51 it/sec) -training >> step=1305700, episode=218 reward=0.7478593 (532.31 it/sec) -training >> step=1305800, episode=218 reward=0.7435926 (535.61 it/sec) -training >> step=1305900, episode=218 reward=0.7619458 (472.76 it/sec) -training >> step=1306000, episode=218 reward=0.7756437 (484.70 it/sec) -training >> step=1306100, episode=218 reward=0.766424 (533.99 it/sec) -training >> step=1306200, episode=218 reward=0.7522455 (521.30 it/sec) -training >> step=1306300, episode=218 reward=0.7555325 (531.99 it/sec) -training >> step=1306400, episode=218 reward=0.7497779 (547.71 it/sec) -training >> step=1306500, episode=218 reward=0.7544749 (507.08 it/sec) -training >> step=1306600, episode=218 reward=0.7643898 (431.73 it/sec) -training >> step=1306700, episode=218 reward=0.7656844 (545.17 it/sec) -training >> step=1306800, episode=218 reward=0.7450032 (514.63 it/sec) -training >> step=1306900, episode=218 reward=0.733586 (515.83 it/sec) -training >> step=1307000, episode=218 reward=0.7607981 (514.95 it/sec) -training >> step=1307100, episode=218 reward=0.7536477 (493.99 it/sec) -training >> step=1307200, episode=218 reward=0.7535123 (485.04 it/sec) -training >> step=1307300, episode=219 reward=0.7533841 (122.39 it/sec) -training >> step=1307400, episode=219 reward=0.7682495 (465.22 it/sec) -training >> step=1307500, episode=219 reward=0.7758065 (448.90 it/sec) -training >> step=1307600, episode=219 reward=0.7471896 (494.95 it/sec) -training >> step=1307700, episode=219 reward=0.7752146 (459.30 it/sec) -training >> step=1307800, episode=219 reward=0.7492991 (512.99 it/sec) -training >> step=1307900, episode=219 reward=0.7661778 (473.21 it/sec) -training >> step=1308000, episode=219 reward=0.7615664 (534.46 it/sec) -training >> step=1308100, episode=219 reward=0.762408 (418.56 it/sec) -training >> step=1308200, episode=219 reward=0.7678552 (474.09 it/sec) -training >> step=1308300, episode=219 reward=0.7653763 (463.49 it/sec) -training >> step=1308400, episode=219 reward=0.7570522 (488.48 it/sec) -training >> step=1308500, episode=219 reward=0.7767715 (462.81 it/sec) -training >> step=1308600, episode=219 reward=0.7662166 (473.58 it/sec) -training >> step=1308700, episode=219 reward=0.7598979 (532.66 it/sec) -training >> step=1308800, episode=219 reward=0.7429294 (494.21 it/sec) -training >> step=1308900, episode=219 reward=0.7755474 (497.02 it/sec) -training >> step=1309000, episode=219 reward=0.7540159 (453.90 it/sec) -training >> step=1309100, episode=219 reward=0.7538692 (514.51 it/sec) -training >> step=1309200, episode=219 reward=0.7679586 (528.33 it/sec) -training >> step=1309300, episode=219 reward=0.7795025 (471.97 it/sec) -training >> step=1309400, episode=219 reward=0.751143 (518.84 it/sec) -training >> step=1309500, episode=219 reward=0.7632346 (460.34 it/sec) -training >> step=1309600, episode=219 reward=0.7687207 (513.95 it/sec) -training >> step=1309700, episode=219 reward=0.7396073 (539.62 it/sec) -training >> step=1309800, episode=219 reward=0.765916 (519.63 it/sec) -training >> step=1309900, episode=219 reward=0.7517256 (525.88 it/sec) -training >> step=1310000, episode=219 reward=0.7871869 (461.82 it/sec) -training >> step=1310100, episode=219 reward=0.7340639 (420.76 it/sec) -training >> step=1310200, episode=219 reward=0.7549933 (565.80 it/sec) -training >> step=1310300, episode=219 reward=0.7568727 (492.10 it/sec) -training >> step=1310400, episode=219 reward=0.7606366 (509.71 it/sec) -training >> step=1310500, episode=219 reward=0.7702151 (509.87 it/sec) -training >> step=1310600, episode=219 reward=0.7545958 (496.72 it/sec) -training >> step=1310700, episode=219 reward=0.777487 (526.00 it/sec) -training >> step=1310800, episode=219 reward=0.7546438 (469.76 it/sec) -training >> step=1310900, episode=219 reward=0.7620748 (542.72 it/sec) -training >> step=1311000, episode=219 reward=0.7651761 (522.79 it/sec) -training >> step=1311100, episode=219 reward=0.7620398 (504.25 it/sec) -training >> step=1311200, episode=219 reward=0.7724235 (548.02 it/sec) -training >> step=1311300, episode=219 reward=0.768399 (553.46 it/sec) -training >> step=1311400, episode=219 reward=0.7512268 (558.35 it/sec) -training >> step=1311500, episode=219 reward=0.7547354 (545.31 it/sec) -training >> step=1311600, episode=219 reward=0.7715527 (487.19 it/sec) -training >> step=1311700, episode=219 reward=0.7734831 (544.15 it/sec) -training >> step=1311800, episode=219 reward=0.7731351 (509.41 it/sec) -training >> step=1311900, episode=219 reward=0.7748836 (439.64 it/sec) -training >> step=1312000, episode=219 reward=0.7384063 (485.92 it/sec) -training >> step=1312100, episode=219 reward=0.7643332 (458.60 it/sec) -training >> step=1312200, episode=219 reward=0.771715 (549.73 it/sec) -training >> step=1312300, episode=219 reward=0.7565061 (530.91 it/sec) -training >> step=1312400, episode=219 reward=0.7543231 (468.09 it/sec) -training >> step=1312500, episode=219 reward=0.761753 (473.26 it/sec) -training >> step=1312600, episode=219 reward=0.7702678 (438.61 it/sec) -training >> step=1312700, episode=219 reward=0.7425443 (463.53 it/sec) -training >> step=1312800, episode=219 reward=0.7792162 (442.77 it/sec) -training >> step=1312900, episode=219 reward=0.750491 (354.43 it/sec) -training >> step=1313000, episode=219 reward=0.7616104 (342.74 it/sec) -training >> step=1313100, episode=219 reward=0.7551728 (402.19 it/sec) -training >> step=1313200, episode=219 reward=0.7586873 (520.24 it/sec) -training >> step=1313300, episode=220 reward=0.7511352 (144.52 it/sec) -training >> step=1313400, episode=220 reward=0.7532519 (521.05 it/sec) -training >> step=1313500, episode=220 reward=0.7580091 (521.39 it/sec) -training >> step=1313600, episode=220 reward=0.7594327 (442.13 it/sec) -training >> step=1313700, episode=220 reward=0.7680702 (507.23 it/sec) -training >> step=1313800, episode=220 reward=0.7465166 (533.54 it/sec) -training >> step=1313900, episode=220 reward=0.7560158 (600.60 it/sec) -training >> step=1314000, episode=220 reward=0.7453471 (562.74 it/sec) -training >> step=1314100, episode=220 reward=0.7440419 (536.53 it/sec) -training >> step=1314200, episode=220 reward=0.7662398 (579.51 it/sec) -training >> step=1314300, episode=220 reward=0.7773877 (573.18 it/sec) -training >> step=1314400, episode=220 reward=0.7601081 (553.71 it/sec) -training >> step=1314500, episode=220 reward=0.7566747 (589.32 it/sec) -training >> step=1314600, episode=220 reward=0.7706012 (607.94 it/sec) -training >> step=1314700, episode=220 reward=0.7659892 (553.82 it/sec) -training >> step=1314800, episode=220 reward=0.7816905 (547.64 it/sec) -training >> step=1314900, episode=220 reward=0.7508527 (579.59 it/sec) -training >> step=1315000, episode=220 reward=0.753944 (580.01 it/sec) -training >> step=1315100, episode=220 reward=0.7765851 (532.10 it/sec) -training >> step=1315200, episode=220 reward=0.7414172 (547.88 it/sec) -training >> step=1315300, episode=220 reward=0.7713763 (554.42 it/sec) -training >> step=1315400, episode=220 reward=0.7704532 (553.78 it/sec) -training >> step=1315500, episode=220 reward=0.7742281 (510.42 it/sec) -training >> step=1315600, episode=220 reward=0.7423317 (508.90 it/sec) -training >> step=1315700, episode=220 reward=0.7710879 (529.23 it/sec) -training >> step=1315800, episode=220 reward=0.7686159 (575.23 it/sec) -training >> step=1315900, episode=220 reward=0.7731667 (495.28 it/sec) -training >> step=1316000, episode=220 reward=0.7597755 (520.55 it/sec) -training >> step=1316100, episode=220 reward=0.7642201 (570.62 it/sec) -training >> step=1316200, episode=220 reward=0.7737143 (568.48 it/sec) -training >> step=1316300, episode=220 reward=0.7696878 (539.36 it/sec) -training >> step=1316400, episode=220 reward=0.7534453 (526.97 it/sec) -training >> step=1316500, episode=220 reward=0.7549701 (521.41 it/sec) -training >> step=1316600, episode=220 reward=0.7553629 (556.24 it/sec) -training >> step=1316700, episode=220 reward=0.7623521 (569.20 it/sec) -training >> step=1316800, episode=220 reward=0.7478667 (577.62 it/sec) -training >> step=1316900, episode=220 reward=0.7580177 (567.25 it/sec) -training >> step=1317000, episode=220 reward=0.7436922 (594.95 it/sec) -training >> step=1317100, episode=220 reward=0.7566233 (537.64 it/sec) -training >> step=1317200, episode=220 reward=0.76207 (516.36 it/sec) -training >> step=1317300, episode=220 reward=0.7566814 (507.22 it/sec) -training >> step=1317400, episode=220 reward=0.7398623 (508.32 it/sec) -training >> step=1317500, episode=220 reward=0.7547922 (504.86 it/sec) -training >> step=1317600, episode=220 reward=0.7631016 (509.92 it/sec) -training >> step=1317700, episode=220 reward=0.7608562 (499.04 it/sec) -training >> step=1317800, episode=220 reward=0.7629589 (512.37 it/sec) -training >> step=1317900, episode=220 reward=0.7506263 (533.36 it/sec) -training >> step=1318000, episode=220 reward=0.7701053 (523.86 it/sec) -training >> step=1318100, episode=220 reward=0.772731 (534.55 it/sec) -training >> step=1318200, episode=220 reward=0.788275 (550.98 it/sec) -training >> step=1318300, episode=220 reward=0.7803527 (481.73 it/sec) -training >> step=1318400, episode=220 reward=0.7518926 (555.64 it/sec) -training >> step=1318500, episode=220 reward=0.767741 (566.45 it/sec) -training >> step=1318600, episode=220 reward=0.745603 (523.91 it/sec) -training >> step=1318700, episode=220 reward=0.7705098 (529.95 it/sec) -training >> step=1318800, episode=220 reward=0.7592921 (498.49 it/sec) -training >> step=1318900, episode=220 reward=0.7624916 (443.45 it/sec) -training >> step=1319000, episode=220 reward=0.7677703 (477.99 it/sec) -training >> step=1319100, episode=220 reward=0.7620723 (475.81 it/sec) -training >> step=1319200, episode=220 reward=0.7589862 (455.11 it/sec) -training >> step=1319300, episode=221 reward=0.7572579 (110.37 it/sec) -training >> step=1319400, episode=221 reward=0.7611479 (260.88 it/sec) -training >> step=1319500, episode=221 reward=0.777945 (436.64 it/sec) -training >> step=1319600, episode=221 reward=0.7490395 (437.19 it/sec) -training >> step=1319700, episode=221 reward=0.7501079 (507.59 it/sec) -training >> step=1319800, episode=221 reward=0.746317 (493.94 it/sec) -training >> step=1319900, episode=221 reward=0.7505094 (519.13 it/sec) -training >> step=1320000, episode=221 reward=0.7671286 (515.22 it/sec) -training >> step=1320100, episode=221 reward=0.7641038 (571.57 it/sec) -training >> step=1320200, episode=221 reward=0.752209 (510.49 it/sec) -training >> step=1320300, episode=221 reward=0.7680031 (479.40 it/sec) -training >> step=1320400, episode=221 reward=0.7423818 (460.03 it/sec) -training >> step=1320500, episode=221 reward=0.7455795 (514.86 it/sec) -training >> step=1320600, episode=221 reward=0.7708936 (453.97 it/sec) -training >> step=1320700, episode=221 reward=0.7510885 (502.77 it/sec) -training >> step=1320800, episode=221 reward=0.7379131 (452.97 it/sec) -training >> step=1320900, episode=221 reward=0.7814721 (525.33 it/sec) -training >> step=1321000, episode=221 reward=0.7770556 (564.81 it/sec) -training >> step=1321100, episode=221 reward=0.7676898 (532.01 it/sec) -training >> step=1321200, episode=221 reward=0.7795675 (534.10 it/sec) -training >> step=1321300, episode=221 reward=0.7702397 (579.98 it/sec) -training >> step=1321400, episode=221 reward=0.7399901 (499.05 it/sec) -training >> step=1321500, episode=221 reward=0.7893214 (509.90 it/sec) -training >> step=1321600, episode=221 reward=0.7579856 (538.72 it/sec) -training >> step=1321700, episode=221 reward=0.7549251 (567.64 it/sec) -training >> step=1321800, episode=221 reward=0.773167 (552.33 it/sec) -training >> step=1321900, episode=221 reward=0.7444797 (532.27 it/sec) -training >> step=1322000, episode=221 reward=0.7308908 (477.20 it/sec) -training >> step=1322100, episode=221 reward=0.7603508 (335.28 it/sec) -training >> step=1322200, episode=221 reward=0.7659917 (347.02 it/sec) -training >> step=1322300, episode=221 reward=0.7767972 (349.34 it/sec) -training >> step=1322400, episode=221 reward=0.7727343 (441.26 it/sec) -training >> step=1322500, episode=221 reward=0.7488943 (473.84 it/sec) -training >> step=1322600, episode=221 reward=0.7629551 (512.97 it/sec) -training >> step=1322700, episode=221 reward=0.7434568 (535.90 it/sec) -training >> step=1322800, episode=221 reward=0.7734677 (466.67 it/sec) -training >> step=1322900, episode=221 reward=0.7635649 (514.19 it/sec) -training >> step=1323000, episode=221 reward=0.7778388 (539.91 it/sec) -training >> step=1323100, episode=221 reward=0.7634264 (474.50 it/sec) -training >> step=1323200, episode=221 reward=0.7704617 (562.47 it/sec) -training >> step=1323300, episode=221 reward=0.7348812 (578.69 it/sec) -training >> step=1323400, episode=221 reward=0.7718273 (445.35 it/sec) -training >> step=1323500, episode=221 reward=0.7572792 (537.49 it/sec) -training >> step=1323600, episode=221 reward=0.7589986 (546.63 it/sec) -training >> step=1323700, episode=221 reward=0.7699296 (528.69 it/sec) -training >> step=1323800, episode=221 reward=0.7431371 (548.74 it/sec) -training >> step=1323900, episode=221 reward=0.7663044 (493.12 it/sec) -training >> step=1324000, episode=221 reward=0.7628099 (540.01 it/sec) -training >> step=1324100, episode=221 reward=0.7467667 (593.19 it/sec) -training >> step=1324200, episode=221 reward=0.7514403 (531.10 it/sec) -training >> step=1324300, episode=221 reward=0.756556 (540.44 it/sec) -training >> step=1324400, episode=221 reward=0.7650905 (578.89 it/sec) -training >> step=1324500, episode=221 reward=0.7588899 (504.64 it/sec) -training >> step=1324600, episode=221 reward=0.7484891 (526.75 it/sec) -training >> step=1324700, episode=221 reward=0.7532531 (551.56 it/sec) -training >> step=1324800, episode=221 reward=0.7566858 (580.52 it/sec) -training >> step=1324900, episode=221 reward=0.7646827 (546.19 it/sec) -training >> step=1325000, episode=221 reward=0.7641655 (542.62 it/sec) -training >> step=1325100, episode=221 reward=0.7505571 (512.07 it/sec) -training >> step=1325200, episode=221 reward=0.7886056 (570.79 it/sec) -training >> step=1325300, episode=222 reward=0.7670449 (133.68 it/sec) -training >> step=1325400, episode=222 reward=0.7392804 (519.04 it/sec) -training >> step=1325500, episode=222 reward=0.756358 (509.98 it/sec) -training >> step=1325600, episode=222 reward=0.7660456 (574.12 it/sec) -training >> step=1325700, episode=222 reward=0.7487741 (568.54 it/sec) -training >> step=1325800, episode=222 reward=0.780381 (568.10 it/sec) -training >> step=1325900, episode=222 reward=0.738264 (523.61 it/sec) -training >> step=1326000, episode=222 reward=0.7452334 (586.63 it/sec) -training >> step=1326100, episode=222 reward=0.7661105 (566.24 it/sec) -training >> step=1326200, episode=222 reward=0.7718081 (578.16 it/sec) -training >> step=1326300, episode=222 reward=0.7396439 (597.44 it/sec) -training >> step=1326400, episode=222 reward=0.7370628 (527.35 it/sec) -training >> step=1326500, episode=222 reward=0.7683268 (592.59 it/sec) -training >> step=1326600, episode=222 reward=0.7856213 (541.08 it/sec) -training >> step=1326700, episode=222 reward=0.7500185 (546.43 it/sec) -training >> step=1326800, episode=222 reward=0.7679111 (558.35 it/sec) -training >> step=1326900, episode=222 reward=0.7341707 (600.70 it/sec) -training >> step=1327000, episode=222 reward=0.7720919 (461.08 it/sec) -training >> step=1327100, episode=222 reward=0.7574441 (491.91 it/sec) -training >> step=1327200, episode=222 reward=0.7486624 (533.75 it/sec) -training >> step=1327300, episode=222 reward=0.7481478 (578.57 it/sec) -training >> step=1327400, episode=222 reward=0.7593241 (556.99 it/sec) -training >> step=1327500, episode=222 reward=0.7721593 (544.77 it/sec) -training >> step=1327600, episode=222 reward=0.7715708 (499.56 it/sec) -training >> step=1327700, episode=222 reward=0.7849661 (522.53 it/sec) -training >> step=1327800, episode=222 reward=0.7692103 (521.03 it/sec) -training >> step=1327900, episode=222 reward=0.7567787 (571.07 it/sec) -training >> step=1328000, episode=222 reward=0.7505513 (589.66 it/sec) -training >> step=1328100, episode=222 reward=0.7660398 (451.34 it/sec) -training >> step=1328200, episode=222 reward=0.7666272 (508.87 it/sec) -training >> step=1328300, episode=222 reward=0.7515761 (494.50 it/sec) -training >> step=1328400, episode=222 reward=0.7855127 (558.83 it/sec) -training >> step=1328500, episode=222 reward=0.7448283 (550.42 it/sec) -training >> step=1328600, episode=222 reward=0.7852275 (527.73 it/sec) -training >> step=1328700, episode=222 reward=0.7581746 (530.79 it/sec) -training >> step=1328800, episode=222 reward=0.757758 (562.04 it/sec) -training >> step=1328900, episode=222 reward=0.7436957 (551.33 it/sec) -training >> step=1329000, episode=222 reward=0.7645833 (537.54 it/sec) -training >> step=1329100, episode=222 reward=0.7577932 (557.89 it/sec) -training >> step=1329200, episode=222 reward=0.7654136 (517.46 it/sec) -training >> step=1329300, episode=222 reward=0.7790607 (513.27 it/sec) -training >> step=1329400, episode=222 reward=0.7559854 (539.78 it/sec) -training >> step=1329500, episode=222 reward=0.7474657 (589.00 it/sec) -training >> step=1329600, episode=222 reward=0.7449365 (492.83 it/sec) -training >> step=1329700, episode=222 reward=0.7813832 (490.66 it/sec) -training >> step=1329800, episode=222 reward=0.7453241 (500.59 it/sec) -training >> step=1329900, episode=222 reward=0.7649402 (577.69 it/sec) -training >> step=1330000, episode=222 reward=0.7452261 (539.48 it/sec) -training >> step=1330100, episode=222 reward=0.7667007 (579.06 it/sec) -training >> step=1330200, episode=222 reward=0.770628 (548.18 it/sec) -training >> step=1330300, episode=222 reward=0.7577142 (491.90 it/sec) -training >> step=1330400, episode=222 reward=0.7483212 (541.75 it/sec) -training >> step=1330500, episode=222 reward=0.7631546 (520.33 it/sec) -training >> step=1330600, episode=222 reward=0.736928 (586.37 it/sec) -training >> step=1330700, episode=222 reward=0.7657007 (552.10 it/sec) -training >> step=1330800, episode=222 reward=0.7428972 (487.36 it/sec) -training >> step=1330900, episode=222 reward=0.7588702 (531.22 it/sec) -training >> step=1331000, episode=222 reward=0.7297314 (580.92 it/sec) -training >> step=1331100, episode=222 reward=0.7814023 (542.58 it/sec) -training >> step=1331200, episode=222 reward=0.7473022 (552.42 it/sec) -training >> step=1331300, episode=223 reward=0.7605842 (130.49 it/sec) -training >> step=1331400, episode=223 reward=0.7683969 (610.00 it/sec) -training >> step=1331500, episode=223 reward=0.7562391 (575.80 it/sec) -training >> step=1331600, episode=223 reward=0.7311938 (547.02 it/sec) -training >> step=1331700, episode=223 reward=0.7757874 (522.26 it/sec) -training >> step=1331800, episode=223 reward=0.7568244 (524.09 it/sec) -training >> step=1331900, episode=223 reward=0.7715744 (565.92 it/sec) -training >> step=1332000, episode=223 reward=0.7551686 (537.90 it/sec) -training >> step=1332100, episode=223 reward=0.762064 (516.50 it/sec) -training >> step=1332200, episode=223 reward=0.7662153 (540.09 it/sec) -training >> step=1332300, episode=223 reward=0.7781491 (606.86 it/sec) -training >> step=1332400, episode=223 reward=0.7663256 (550.44 it/sec) -training >> step=1332500, episode=223 reward=0.7779021 (556.00 it/sec) -training >> step=1332600, episode=223 reward=0.7622369 (582.09 it/sec) -training >> step=1332700, episode=223 reward=0.7386238 (561.56 it/sec) -training >> step=1332800, episode=223 reward=0.7526008 (552.81 it/sec) -training >> step=1332900, episode=223 reward=0.7812405 (546.64 it/sec) -training >> step=1333000, episode=223 reward=0.7479571 (552.11 it/sec) -training >> step=1333100, episode=223 reward=0.7663352 (493.11 it/sec) -training >> step=1333200, episode=223 reward=0.7414083 (501.33 it/sec) -training >> step=1333300, episode=223 reward=0.7902576 (570.52 it/sec) -training >> step=1333400, episode=223 reward=0.7611792 (543.58 it/sec) -training >> step=1333500, episode=223 reward=0.7694024 (536.09 it/sec) -training >> step=1333600, episode=223 reward=0.7549528 (577.08 it/sec) -training >> step=1333700, episode=223 reward=0.7485804 (555.32 it/sec) -training >> step=1333800, episode=223 reward=0.7768852 (533.22 it/sec) -training >> step=1333900, episode=223 reward=0.7549519 (528.12 it/sec) -training >> step=1334000, episode=223 reward=0.7349066 (518.00 it/sec) -training >> step=1334100, episode=223 reward=0.7541596 (549.93 it/sec) -training >> step=1334200, episode=223 reward=0.753868 (582.19 it/sec) -training >> step=1334300, episode=223 reward=0.7505101 (481.35 it/sec) -training >> step=1334400, episode=223 reward=0.7726107 (540.16 it/sec) -training >> step=1334500, episode=223 reward=0.7604725 (528.94 it/sec) -training >> step=1334600, episode=223 reward=0.7355947 (563.21 it/sec) -training >> step=1334700, episode=223 reward=0.7409963 (562.07 it/sec) -training >> step=1334800, episode=223 reward=0.7700673 (558.30 it/sec) -training >> step=1334900, episode=223 reward=0.7526277 (542.79 it/sec) -training >> step=1335000, episode=223 reward=0.7831011 (544.27 it/sec) -training >> step=1335100, episode=223 reward=0.7472107 (502.58 it/sec) -training >> step=1335200, episode=223 reward=0.7599947 (540.63 it/sec) -training >> step=1335300, episode=223 reward=0.7686462 (560.71 it/sec) -training >> step=1335400, episode=223 reward=0.7388081 (537.68 it/sec) -training >> step=1335500, episode=223 reward=0.7471567 (521.73 it/sec) -training >> step=1335600, episode=223 reward=0.7682491 (533.42 it/sec) -training >> step=1335700, episode=223 reward=0.7573813 (547.24 it/sec) -training >> step=1335800, episode=223 reward=0.7441772 (529.19 it/sec) -training >> step=1335900, episode=223 reward=0.7393317 (498.39 it/sec) -training >> step=1336000, episode=223 reward=0.7611783 (518.86 it/sec) -training >> step=1336100, episode=223 reward=0.7746571 (571.65 it/sec) -training >> step=1336200, episode=223 reward=0.7442759 (493.81 it/sec) -training >> step=1336300, episode=223 reward=0.7915716 (546.63 it/sec) -training >> step=1336400, episode=223 reward=0.7577463 (582.43 it/sec) -training >> step=1336500, episode=223 reward=0.743521 (526.23 it/sec) -training >> step=1336600, episode=223 reward=0.7551738 (524.52 it/sec) -training >> step=1336700, episode=223 reward=0.7495191 (561.08 it/sec) -training >> step=1336800, episode=223 reward=0.7668549 (508.11 it/sec) -training >> step=1336900, episode=223 reward=0.7550543 (552.53 it/sec) -training >> step=1337000, episode=223 reward=0.7737936 (502.96 it/sec) -training >> step=1337100, episode=223 reward=0.7804566 (522.22 it/sec) -training >> step=1337200, episode=223 reward=0.7665019 (552.68 it/sec) -training >> step=1337300, episode=224 reward=0.7623366 (89.73 it/sec) -training >> step=1337400, episode=224 reward=0.7618427 (405.26 it/sec) -training >> step=1337500, episode=224 reward=0.7821998 (465.47 it/sec) -training >> step=1337600, episode=224 reward=0.7336773 (463.70 it/sec) -training >> step=1337700, episode=224 reward=0.758444 (502.48 it/sec) -training >> step=1337800, episode=224 reward=0.7795431 (499.39 it/sec) -training >> step=1337900, episode=224 reward=0.7654055 (531.32 it/sec) -training >> step=1338000, episode=224 reward=0.7515488 (551.41 it/sec) -training >> step=1338100, episode=224 reward=0.7768804 (479.52 it/sec) -training >> step=1338200, episode=224 reward=0.7704433 (439.09 it/sec) -training >> step=1338300, episode=224 reward=0.7765883 (435.04 it/sec) -training >> step=1338400, episode=224 reward=0.7378613 (504.51 it/sec) -training >> step=1338500, episode=224 reward=0.7267324 (491.31 it/sec) -training >> step=1338600, episode=224 reward=0.769326 (527.89 it/sec) -training >> step=1338700, episode=224 reward=0.7579331 (515.54 it/sec) -training >> step=1338800, episode=224 reward=0.7501555 (475.25 it/sec) -training >> step=1338900, episode=224 reward=0.7514522 (525.10 it/sec) -training >> step=1339000, episode=224 reward=0.7662413 (487.77 it/sec) -training >> step=1339100, episode=224 reward=0.7743673 (474.14 it/sec) -training >> step=1339200, episode=224 reward=0.787779 (510.29 it/sec) -training >> step=1339300, episode=224 reward=0.7637631 (528.26 it/sec) -training >> step=1339400, episode=224 reward=0.7626978 (528.86 it/sec) -training >> step=1339500, episode=224 reward=0.766982 (562.19 it/sec) -training >> step=1339600, episode=224 reward=0.7570077 (549.06 it/sec) -training >> step=1339700, episode=224 reward=0.7590011 (530.83 it/sec) -training >> step=1339800, episode=224 reward=0.7689759 (527.69 it/sec) -training >> step=1339900, episode=224 reward=0.7547952 (519.18 it/sec) -training >> step=1340000, episode=224 reward=0.7389677 (558.45 it/sec) -training >> step=1340100, episode=224 reward=0.7646698 (565.77 it/sec) -training >> step=1340200, episode=224 reward=0.7506835 (530.11 it/sec) -training >> step=1340300, episode=224 reward=0.7624484 (452.08 it/sec) -training >> step=1340400, episode=224 reward=0.7626909 (525.24 it/sec) -training >> step=1340500, episode=224 reward=0.763391 (529.86 it/sec) -training >> step=1340600, episode=224 reward=0.7497019 (557.55 it/sec) -training >> step=1340700, episode=224 reward=0.7327233 (585.49 it/sec) -training >> step=1340800, episode=224 reward=0.7597486 (546.28 it/sec) -training >> step=1340900, episode=224 reward=0.7331554 (532.51 it/sec) -training >> step=1341000, episode=224 reward=0.7450115 (545.13 it/sec) -training >> step=1341100, episode=224 reward=0.7701072 (524.01 it/sec) -training >> step=1341200, episode=224 reward=0.7532104 (555.43 it/sec) -training >> step=1341300, episode=224 reward=0.7688979 (541.55 it/sec) -training >> step=1341400, episode=224 reward=0.7625204 (544.82 it/sec) -training >> step=1341500, episode=224 reward=0.7602488 (498.08 it/sec) -training >> step=1341600, episode=224 reward=0.7404889 (530.65 it/sec) -training >> step=1341700, episode=224 reward=0.7587082 (509.23 it/sec) -training >> step=1341800, episode=224 reward=0.7557189 (567.86 it/sec) -training >> step=1341900, episode=224 reward=0.7650294 (519.78 it/sec) -training >> step=1342000, episode=224 reward=0.753333 (481.01 it/sec) -training >> step=1342100, episode=224 reward=0.7449983 (483.02 it/sec) -training >> step=1342200, episode=224 reward=0.7339463 (533.33 it/sec) -training >> step=1342300, episode=224 reward=0.7562613 (504.24 it/sec) -training >> step=1342400, episode=224 reward=0.7582307 (548.68 it/sec) -training >> step=1342500, episode=224 reward=0.7554379 (535.41 it/sec) -training >> step=1342600, episode=224 reward=0.7555833 (516.33 it/sec) -training >> step=1342700, episode=224 reward=0.7407194 (544.32 it/sec) -training >> step=1342800, episode=224 reward=0.7395841 (531.25 it/sec) -training >> step=1342900, episode=224 reward=0.7523437 (571.09 it/sec) -training >> step=1343000, episode=224 reward=0.7501498 (539.64 it/sec) -training >> step=1343100, episode=224 reward=0.7524208 (464.51 it/sec) -training >> step=1343200, episode=224 reward=0.7722667 (500.76 it/sec) -training >> step=1343300, episode=225 reward=0.7575629 (130.61 it/sec) -training >> step=1343400, episode=225 reward=0.7443328 (492.98 it/sec) -training >> step=1343500, episode=225 reward=0.7463276 (553.98 it/sec) -training >> step=1343600, episode=225 reward=0.7539109 (521.27 it/sec) -training >> step=1343700, episode=225 reward=0.7426208 (518.27 it/sec) -training >> step=1343800, episode=225 reward=0.7627925 (582.16 it/sec) -training >> step=1343900, episode=225 reward=0.7466474 (485.63 it/sec) -training >> step=1344000, episode=225 reward=0.7286972 (548.17 it/sec) -training >> step=1344100, episode=225 reward=0.7549696 (557.78 it/sec) -training >> step=1344200, episode=225 reward=0.7492476 (570.87 it/sec) -training >> step=1344300, episode=225 reward=0.7561443 (580.27 it/sec) -training >> step=1344400, episode=225 reward=0.7588161 (542.14 it/sec) -training >> step=1344500, episode=225 reward=0.7475571 (531.54 it/sec) -training >> step=1344600, episode=225 reward=0.7672274 (570.27 it/sec) -training >> step=1344700, episode=225 reward=0.7597961 (548.25 it/sec) -training >> step=1344800, episode=225 reward=0.7684403 (561.65 it/sec) -training >> step=1344900, episode=225 reward=0.7516933 (575.28 it/sec) -training >> step=1345000, episode=225 reward=0.7353957 (551.28 it/sec) -training >> step=1345100, episode=225 reward=0.774664 (505.02 it/sec) -training >> step=1345200, episode=225 reward=0.7633857 (569.78 it/sec) -training >> step=1345300, episode=225 reward=0.7273299 (569.39 it/sec) -training >> step=1345400, episode=225 reward=0.7752119 (576.64 it/sec) -training >> step=1345500, episode=225 reward=0.787183 (562.14 it/sec) -training >> step=1345600, episode=225 reward=0.733122 (532.14 it/sec) -training >> step=1345700, episode=225 reward=0.7617554 (552.60 it/sec) -training >> step=1345800, episode=225 reward=0.7439085 (587.71 it/sec) -training >> step=1345900, episode=225 reward=0.7817573 (561.65 it/sec) -training >> step=1346000, episode=225 reward=0.7585728 (552.45 it/sec) -training >> step=1346100, episode=225 reward=0.7602879 (524.83 it/sec) -training >> step=1346200, episode=225 reward=0.7582235 (462.29 it/sec) -training >> step=1346300, episode=225 reward=0.7708516 (556.52 it/sec) -training >> step=1346400, episode=225 reward=0.7502961 (590.00 it/sec) -training >> step=1346500, episode=225 reward=0.7350428 (546.38 it/sec) -training >> step=1346600, episode=225 reward=0.7607607 (532.82 it/sec) -training >> step=1346700, episode=225 reward=0.7552211 (514.34 it/sec) -training >> step=1346800, episode=225 reward=0.759827 (522.99 it/sec) -training >> step=1346900, episode=225 reward=0.7711681 (531.14 it/sec) -training >> step=1347000, episode=225 reward=0.7353448 (543.28 it/sec) -training >> step=1347100, episode=225 reward=0.7775657 (570.79 it/sec) -training >> step=1347200, episode=225 reward=0.7678322 (510.44 it/sec) -training >> step=1347300, episode=225 reward=0.7749751 (492.53 it/sec) -training >> step=1347400, episode=225 reward=0.7468085 (513.25 it/sec) -training >> step=1347500, episode=225 reward=0.7591812 (574.45 it/sec) -training >> step=1347600, episode=225 reward=0.7555221 (536.41 it/sec) -training >> step=1347700, episode=225 reward=0.7520437 (508.07 it/sec) -training >> step=1347800, episode=225 reward=0.750604 (587.54 it/sec) -training >> step=1347900, episode=225 reward=0.7531516 (502.09 it/sec) -training >> step=1348000, episode=225 reward=0.7628102 (531.84 it/sec) -training >> step=1348100, episode=225 reward=0.750688 (574.34 it/sec) -training >> step=1348200, episode=225 reward=0.7593775 (581.35 it/sec) -training >> step=1348300, episode=225 reward=0.7700008 (529.68 it/sec) -training >> step=1348400, episode=225 reward=0.7618279 (532.45 it/sec) -training >> step=1348500, episode=225 reward=0.7501329 (532.32 it/sec) -training >> step=1348600, episode=225 reward=0.7296268 (560.28 it/sec) -training >> step=1348700, episode=225 reward=0.7300968 (558.33 it/sec) -training >> step=1348800, episode=225 reward=0.7597908 (452.38 it/sec) -training >> step=1348900, episode=225 reward=0.7408731 (553.17 it/sec) -training >> step=1349000, episode=225 reward=0.7616444 (514.29 it/sec) -training >> step=1349100, episode=225 reward=0.731055 (539.06 it/sec) -training >> step=1349200, episode=225 reward=0.7601873 (568.90 it/sec) -training >> step=1349300, episode=226 reward=0.7547542 (145.41 it/sec) -training >> step=1349400, episode=226 reward=0.7531215 (528.93 it/sec) -training >> step=1349500, episode=226 reward=0.7654942 (529.66 it/sec) -training >> step=1349600, episode=226 reward=0.771722 (535.66 it/sec) -training >> step=1349700, episode=226 reward=0.7724403 (545.03 it/sec) -training >> step=1349800, episode=226 reward=0.7684091 (517.87 it/sec) -training >> step=1349900, episode=226 reward=0.7509483 (544.97 it/sec) -training >> step=1350000, episode=226 reward=0.7667855 (557.40 it/sec) -training >> step=1350100, episode=226 reward=0.7490979 (561.59 it/sec) -training >> step=1350200, episode=226 reward=0.7841576 (558.89 it/sec) -training >> step=1350300, episode=226 reward=0.7489402 (558.59 it/sec) -training >> step=1350400, episode=226 reward=0.7378467 (566.62 it/sec) -training >> step=1350500, episode=226 reward=0.762053 (578.78 it/sec) -training >> step=1350600, episode=226 reward=0.7661592 (531.33 it/sec) -training >> step=1350700, episode=226 reward=0.7696586 (539.73 it/sec) -training >> step=1350800, episode=226 reward=0.7464545 (525.53 it/sec) -training >> step=1350900, episode=226 reward=0.7622844 (591.55 it/sec) -training >> step=1351000, episode=226 reward=0.7456155 (547.67 it/sec) -training >> step=1351100, episode=226 reward=0.7626734 (545.61 it/sec) -training >> step=1351200, episode=226 reward=0.7327271 (562.82 it/sec) -training >> step=1351300, episode=226 reward=0.765126 (508.82 it/sec) -training >> step=1351400, episode=226 reward=0.7719979 (502.51 it/sec) -training >> step=1351500, episode=226 reward=0.7558808 (484.96 it/sec) -training >> step=1351600, episode=226 reward=0.7620387 (518.62 it/sec) -training >> step=1351700, episode=226 reward=0.7356478 (515.26 it/sec) -training >> step=1351800, episode=226 reward=0.7426165 (390.19 it/sec) -training >> step=1351900, episode=226 reward=0.7664407 (393.97 it/sec) -training >> step=1352000, episode=226 reward=0.7503381 (369.26 it/sec) -training >> step=1352100, episode=226 reward=0.748894 (374.12 it/sec) -training >> step=1352200, episode=226 reward=0.7499853 (367.46 it/sec) -training >> step=1352300, episode=226 reward=0.7370096 (389.44 it/sec) -training >> step=1352400, episode=226 reward=0.7911531 (404.24 it/sec) -training >> step=1352500, episode=226 reward=0.7842021 (491.89 it/sec) -training >> step=1352600, episode=226 reward=0.7535222 (486.48 it/sec) -training >> step=1352700, episode=226 reward=0.7522162 (472.96 it/sec) -training >> step=1352800, episode=226 reward=0.7562409 (536.84 it/sec) -training >> step=1352900, episode=226 reward=0.7672317 (515.41 it/sec) -training >> step=1353000, episode=226 reward=0.7693343 (524.01 it/sec) -training >> step=1353100, episode=226 reward=0.7524453 (522.74 it/sec) -training >> step=1353200, episode=226 reward=0.775855 (538.22 it/sec) -training >> step=1353300, episode=226 reward=0.7605938 (558.94 it/sec) -training >> step=1353400, episode=226 reward=0.7227494 (521.09 it/sec) -training >> step=1353500, episode=226 reward=0.7781814 (516.52 it/sec) -training >> step=1353600, episode=226 reward=0.7381216 (532.50 it/sec) -training >> step=1353700, episode=226 reward=0.7579619 (424.61 it/sec) -training >> step=1353800, episode=226 reward=0.7394148 (418.86 it/sec) -training >> step=1353900, episode=226 reward=0.7289751 (363.69 it/sec) -training >> step=1354000, episode=226 reward=0.7530962 (488.55 it/sec) -training >> step=1354100, episode=226 reward=0.7588586 (491.48 it/sec) -training >> step=1354200, episode=226 reward=0.7472711 (482.32 it/sec) -training >> step=1354300, episode=226 reward=0.7363681 (545.22 it/sec) -training >> step=1354400, episode=226 reward=0.7357717 (503.53 it/sec) -training >> step=1354500, episode=226 reward=0.7544863 (528.04 it/sec) -training >> step=1354600, episode=226 reward=0.7286593 (545.93 it/sec) -training >> step=1354700, episode=226 reward=0.7630176 (530.95 it/sec) -training >> step=1354800, episode=226 reward=0.7443807 (513.22 it/sec) -training >> step=1354900, episode=226 reward=0.7555881 (472.70 it/sec) -training >> step=1355000, episode=226 reward=0.7422063 (474.85 it/sec) -training >> step=1355100, episode=226 reward=0.7138948 (470.68 it/sec) -training >> step=1355200, episode=226 reward=0.7627264 (442.56 it/sec) -training >> step=1355300, episode=227 reward=0.7169121 (114.12 it/sec) -training >> step=1355400, episode=227 reward=0.7655352 (492.47 it/sec) -training >> step=1355500, episode=227 reward=0.7585141 (507.72 it/sec) -training >> step=1355600, episode=227 reward=0.7461588 (443.36 it/sec) -training >> step=1355700, episode=227 reward=0.7503556 (471.09 it/sec) -training >> step=1355800, episode=227 reward=0.7521145 (439.47 it/sec) -training >> step=1355900, episode=227 reward=0.7601243 (463.66 it/sec) -training >> step=1356000, episode=227 reward=0.7536601 (441.49 it/sec) -training >> step=1356100, episode=227 reward=0.7612556 (472.52 it/sec) -training >> step=1356200, episode=227 reward=0.7601687 (500.32 it/sec) -training >> step=1356300, episode=227 reward=0.7709622 (444.97 it/sec) -training >> step=1356400, episode=227 reward=0.7629413 (439.39 it/sec) -training >> step=1356500, episode=227 reward=0.7495589 (447.56 it/sec) -training >> step=1356600, episode=227 reward=0.7519284 (442.46 it/sec) -training >> step=1356700, episode=227 reward=0.7579752 (342.81 it/sec) -training >> step=1356800, episode=227 reward=0.768078 (358.28 it/sec) -training >> step=1356900, episode=227 reward=0.7542081 (342.08 it/sec) -training >> step=1357000, episode=227 reward=0.7729048 (424.58 it/sec) -training >> step=1357100, episode=227 reward=0.7451199 (406.34 it/sec) -training >> step=1357200, episode=227 reward=0.764491 (427.48 it/sec) -training >> step=1357300, episode=227 reward=0.7546476 (448.77 it/sec) -training >> step=1357400, episode=227 reward=0.7661048 (444.29 it/sec) -training >> step=1357500, episode=227 reward=0.7671646 (461.70 it/sec) -training >> step=1357600, episode=227 reward=0.7498206 (422.28 it/sec) -training >> step=1357700, episode=227 reward=0.7453415 (472.84 it/sec) -training >> step=1357800, episode=227 reward=0.7723331 (430.18 it/sec) -training >> step=1357900, episode=227 reward=0.7387522 (441.52 it/sec) -training >> step=1358000, episode=227 reward=0.7594957 (436.59 it/sec) -training >> step=1358100, episode=227 reward=0.7716178 (460.06 it/sec) -training >> step=1358200, episode=227 reward=0.7686756 (474.71 it/sec) -training >> step=1358300, episode=227 reward=0.7485215 (446.13 it/sec) -training >> step=1358400, episode=227 reward=0.7580806 (462.07 it/sec) -training >> step=1358500, episode=227 reward=0.7578454 (409.77 it/sec) -training >> step=1358600, episode=227 reward=0.7523722 (450.36 it/sec) -training >> step=1358700, episode=227 reward=0.7633325 (467.33 it/sec) -training >> step=1358800, episode=227 reward=0.756856 (471.13 it/sec) -training >> step=1358900, episode=227 reward=0.753279 (481.48 it/sec) -training >> step=1359000, episode=227 reward=0.7617511 (407.40 it/sec) -training >> step=1359100, episode=227 reward=0.7318143 (466.48 it/sec) -training >> step=1359200, episode=227 reward=0.7500541 (516.68 it/sec) -training >> step=1359300, episode=227 reward=0.7408739 (495.37 it/sec) -training >> step=1359400, episode=227 reward=0.7640077 (410.79 it/sec) -training >> step=1359500, episode=227 reward=0.7737866 (407.12 it/sec) -training >> step=1359600, episode=227 reward=0.764959 (387.15 it/sec) -training >> step=1359700, episode=227 reward=0.7507703 (398.37 it/sec) -training >> step=1359800, episode=227 reward=0.7706017 (456.90 it/sec) -training >> step=1359900, episode=227 reward=0.7738151 (432.70 it/sec) -training >> step=1360000, episode=227 reward=0.7450709 (531.79 it/sec) -training >> step=1360100, episode=227 reward=0.7725114 (523.23 it/sec) -training >> step=1360200, episode=227 reward=0.7441908 (542.37 it/sec) -training >> step=1360300, episode=227 reward=0.7549332 (477.15 it/sec) -training >> step=1360400, episode=227 reward=0.7572743 (516.85 it/sec) -training >> step=1360500, episode=227 reward=0.7697715 (528.42 it/sec) -training >> step=1360600, episode=227 reward=0.7660428 (541.82 it/sec) -training >> step=1360700, episode=227 reward=0.7318354 (493.08 it/sec) -training >> step=1360800, episode=227 reward=0.7832354 (520.99 it/sec) -training >> step=1360900, episode=227 reward=0.7564636 (492.60 it/sec) -training >> step=1361000, episode=227 reward=0.7442367 (512.11 it/sec) -training >> step=1361100, episode=227 reward=0.7535271 (543.87 it/sec) -training >> step=1361200, episode=227 reward=0.749588 (501.67 it/sec) -training >> step=1361300, episode=228 reward=0.7660823 (74.71 it/sec) -training >> step=1361400, episode=228 reward=0.7679681 (484.80 it/sec) -training >> step=1361500, episode=228 reward=0.7531242 (469.96 it/sec) -training >> step=1361600, episode=228 reward=0.7528116 (471.32 it/sec) -training >> step=1361700, episode=228 reward=0.740287 (506.32 it/sec) -training >> step=1361800, episode=228 reward=0.7853103 (523.89 it/sec) -training >> step=1361900, episode=228 reward=0.7696567 (550.10 it/sec) -training >> step=1362000, episode=228 reward=0.7319166 (532.98 it/sec) -training >> step=1362100, episode=228 reward=0.7833676 (511.79 it/sec) -training >> step=1362200, episode=228 reward=0.7866148 (506.65 it/sec) -training >> step=1362300, episode=228 reward=0.7745529 (472.71 it/sec) -training >> step=1362400, episode=228 reward=0.7686884 (507.04 it/sec) -training >> step=1362500, episode=228 reward=0.7695205 (525.66 it/sec) -training >> step=1362600, episode=228 reward=0.7494251 (511.30 it/sec) -training >> step=1362700, episode=228 reward=0.7624998 (528.18 it/sec) -training >> step=1362800, episode=228 reward=0.758191 (478.90 it/sec) -training >> step=1362900, episode=228 reward=0.7748287 (543.35 it/sec) -training >> step=1363000, episode=228 reward=0.7457249 (520.45 it/sec) -training >> step=1363100, episode=228 reward=0.743656 (484.36 it/sec) -training >> step=1363200, episode=228 reward=0.7633358 (473.82 it/sec) -training >> step=1363300, episode=228 reward=0.7839733 (532.74 it/sec) -training >> step=1363400, episode=228 reward=0.7590659 (497.76 it/sec) -training >> step=1363500, episode=228 reward=0.776594 (533.21 it/sec) -training >> step=1363600, episode=228 reward=0.7880048 (485.61 it/sec) -training >> step=1363700, episode=228 reward=0.7637517 (475.97 it/sec) -training >> step=1363800, episode=228 reward=0.7429476 (516.98 it/sec) -training >> step=1363900, episode=228 reward=0.7837352 (509.02 it/sec) -training >> step=1364000, episode=228 reward=0.7851351 (509.87 it/sec) -training >> step=1364100, episode=228 reward=0.7600921 (540.80 it/sec) -training >> step=1364200, episode=228 reward=0.7486174 (493.06 it/sec) -training >> step=1364300, episode=228 reward=0.759405 (490.20 it/sec) -training >> step=1364400, episode=228 reward=0.7873832 (510.52 it/sec) -training >> step=1364500, episode=228 reward=0.7666884 (561.87 it/sec) -training >> step=1364600, episode=228 reward=0.7503672 (522.78 it/sec) -training >> step=1364700, episode=228 reward=0.7692544 (508.43 it/sec) -training >> step=1364800, episode=228 reward=0.7769763 (507.16 it/sec) -training >> step=1364900, episode=228 reward=0.7521431 (514.89 it/sec) -training >> step=1365000, episode=228 reward=0.7452736 (506.29 it/sec) -training >> step=1365100, episode=228 reward=0.7330629 (490.36 it/sec) -training >> step=1365200, episode=228 reward=0.7551392 (442.84 it/sec) -training >> step=1365300, episode=228 reward=0.7710968 (466.86 it/sec) -training >> step=1365400, episode=228 reward=0.7656987 (479.90 it/sec) -training >> step=1365500, episode=228 reward=0.7493261 (386.85 it/sec) -training >> step=1365600, episode=228 reward=0.7577929 (334.96 it/sec) -training >> step=1365700, episode=228 reward=0.752312 (309.39 it/sec) -training >> step=1365800, episode=228 reward=0.7761674 (329.53 it/sec) -training >> step=1365900, episode=228 reward=0.7738948 (364.96 it/sec) -training >> step=1366000, episode=228 reward=0.749163 (465.23 it/sec) -training >> step=1366100, episode=228 reward=0.7658275 (457.34 it/sec) -training >> step=1366200, episode=228 reward=0.7541164 (520.77 it/sec) -training >> step=1366300, episode=228 reward=0.7866117 (484.69 it/sec) -training >> step=1366400, episode=228 reward=0.7627328 (548.62 it/sec) -training >> step=1366500, episode=228 reward=0.7679312 (495.65 it/sec) -training >> step=1366600, episode=228 reward=0.7705941 (460.76 it/sec) -training >> step=1366700, episode=228 reward=0.7622371 (427.93 it/sec) -training >> step=1366800, episode=228 reward=0.7872999 (512.53 it/sec) -training >> step=1366900, episode=228 reward=0.7676247 (471.64 it/sec) -training >> step=1367000, episode=228 reward=0.7708606 (553.76 it/sec) -training >> step=1367100, episode=228 reward=0.7565057 (490.39 it/sec) -training >> step=1367200, episode=228 reward=0.7606104 (554.52 it/sec) -training >> step=1367300, episode=229 reward=0.7594661 (139.32 it/sec) -training >> step=1367400, episode=229 reward=0.77693 (467.26 it/sec) -training >> step=1367500, episode=229 reward=0.7438194 (501.78 it/sec) -training >> step=1367600, episode=229 reward=0.7485809 (508.95 it/sec) -training >> step=1367700, episode=229 reward=0.754311 (522.93 it/sec) -training >> step=1367800, episode=229 reward=0.7766716 (524.03 it/sec) -training >> step=1367900, episode=229 reward=0.7453165 (467.95 it/sec) -training >> step=1368000, episode=229 reward=0.7749591 (501.58 it/sec) -training >> step=1368100, episode=229 reward=0.7628899 (513.77 it/sec) -training >> step=1368200, episode=229 reward=0.7643148 (502.30 it/sec) -training >> step=1368300, episode=229 reward=0.7588427 (504.31 it/sec) -training >> step=1368400, episode=229 reward=0.7478163 (528.14 it/sec) -training >> step=1368500, episode=229 reward=0.7558411 (539.53 it/sec) -training >> step=1368600, episode=229 reward=0.7542709 (518.63 it/sec) -training >> step=1368700, episode=229 reward=0.7564729 (504.59 it/sec) -training >> step=1368800, episode=229 reward=0.7784603 (515.74 it/sec) -training >> step=1368900, episode=229 reward=0.7351109 (494.08 it/sec) -training >> step=1369000, episode=229 reward=0.7776597 (520.71 it/sec) -training >> step=1369100, episode=229 reward=0.7450168 (555.52 it/sec) -training >> step=1369200, episode=229 reward=0.7873408 (500.69 it/sec) -training >> step=1369300, episode=229 reward=0.7684758 (502.16 it/sec) -training >> step=1369400, episode=229 reward=0.7677358 (498.72 it/sec) -training >> step=1369500, episode=229 reward=0.7762582 (558.66 it/sec) -training >> step=1369600, episode=229 reward=0.7514378 (525.74 it/sec) -training >> step=1369700, episode=229 reward=0.744231 (496.89 it/sec) -training >> step=1369800, episode=229 reward=0.7819301 (547.39 it/sec) -training >> step=1369900, episode=229 reward=0.7688128 (515.13 it/sec) -training >> step=1370000, episode=229 reward=0.7629058 (458.25 it/sec) -training >> step=1370100, episode=229 reward=0.7667991 (538.45 it/sec) -training >> step=1370200, episode=229 reward=0.7636572 (538.33 it/sec) -training >> step=1370300, episode=229 reward=0.7752993 (501.04 it/sec) -training >> step=1370400, episode=229 reward=0.7640606 (490.04 it/sec) -training >> step=1370500, episode=229 reward=0.7652548 (472.31 it/sec) -training >> step=1370600, episode=229 reward=0.7634075 (525.52 it/sec) -training >> step=1370700, episode=229 reward=0.7515637 (529.81 it/sec) -training >> step=1370800, episode=229 reward=0.7556866 (514.54 it/sec) -training >> step=1370900, episode=229 reward=0.7553198 (523.78 it/sec) -training >> step=1371000, episode=229 reward=0.7469946 (550.23 it/sec) -training >> step=1371100, episode=229 reward=0.7533239 (495.28 it/sec) -training >> step=1371200, episode=229 reward=0.7684037 (534.43 it/sec) -training >> step=1371300, episode=229 reward=0.7676732 (519.70 it/sec) -training >> step=1371400, episode=229 reward=0.772643 (535.12 it/sec) -training >> step=1371500, episode=229 reward=0.7779922 (504.96 it/sec) -training >> step=1371600, episode=229 reward=0.748866 (477.81 it/sec) -training >> step=1371700, episode=229 reward=0.7568884 (530.33 it/sec) -training >> step=1371800, episode=229 reward=0.7641427 (467.20 it/sec) -training >> step=1371900, episode=229 reward=0.7739409 (434.09 it/sec) -training >> step=1372000, episode=229 reward=0.7498557 (369.90 it/sec) -training >> step=1372100, episode=229 reward=0.762528 (398.66 it/sec) -training >> step=1372200, episode=229 reward=0.7456259 (370.19 it/sec) -training >> step=1372300, episode=229 reward=0.7555572 (519.67 it/sec) -training >> step=1372400, episode=229 reward=0.789682 (492.22 it/sec) -training >> step=1372500, episode=229 reward=0.7565713 (477.39 it/sec) -training >> step=1372600, episode=229 reward=0.7566057 (519.06 it/sec) -training >> step=1372700, episode=229 reward=0.7713119 (518.58 it/sec) -training >> step=1372800, episode=229 reward=0.7673178 (473.34 it/sec) -training >> step=1372900, episode=229 reward=0.7491232 (533.94 it/sec) -training >> step=1373000, episode=229 reward=0.7388954 (534.79 it/sec) -training >> step=1373100, episode=229 reward=0.7640021 (543.72 it/sec) -training >> step=1373200, episode=229 reward=0.744951 (544.04 it/sec) -training >> step=1373300, episode=230 reward=0.7686344 (124.95 it/sec) -training >> step=1373400, episode=230 reward=0.7395426 (499.26 it/sec) -training >> step=1373500, episode=230 reward=0.7469329 (492.43 it/sec) -training >> step=1373600, episode=230 reward=0.7315185 (468.09 it/sec) -training >> step=1373700, episode=230 reward=0.7626752 (460.10 it/sec) -training >> step=1373800, episode=230 reward=0.7541627 (558.22 it/sec) -training >> step=1373900, episode=230 reward=0.7680544 (514.17 it/sec) -training >> step=1374000, episode=230 reward=0.746303 (530.86 it/sec) -training >> step=1374100, episode=230 reward=0.762154 (587.59 it/sec) -training >> step=1374200, episode=230 reward=0.7519265 (564.22 it/sec) -training >> step=1374300, episode=230 reward=0.7533781 (553.67 it/sec) -training >> step=1374400, episode=230 reward=0.7451332 (515.34 it/sec) -training >> step=1374500, episode=230 reward=0.7576009 (568.72 it/sec) -training >> step=1374600, episode=230 reward=0.7355112 (538.08 it/sec) -training >> step=1374700, episode=230 reward=0.7729769 (558.36 it/sec) -training >> step=1374800, episode=230 reward=0.7602887 (556.72 it/sec) -training >> step=1374900, episode=230 reward=0.7572932 (569.64 it/sec) -training >> step=1375000, episode=230 reward=0.7675107 (486.55 it/sec) -training >> step=1375100, episode=230 reward=0.7688417 (501.06 it/sec) -training >> step=1375200, episode=230 reward=0.7533389 (528.83 it/sec) -training >> step=1375300, episode=230 reward=0.7632016 (543.16 it/sec) -training >> step=1375400, episode=230 reward=0.7571824 (560.30 it/sec) -training >> step=1375500, episode=230 reward=0.7655017 (527.93 it/sec) -training >> step=1375600, episode=230 reward=0.7631298 (540.72 it/sec) -training >> step=1375700, episode=230 reward=0.7342408 (505.79 it/sec) -training >> step=1375800, episode=230 reward=0.7756255 (529.54 it/sec) -training >> step=1375900, episode=230 reward=0.7416422 (542.47 it/sec) -training >> step=1376000, episode=230 reward=0.7616088 (541.72 it/sec) -training >> step=1376100, episode=230 reward=0.7537405 (557.34 it/sec) -training >> step=1376200, episode=230 reward=0.7451483 (543.30 it/sec) -training >> step=1376300, episode=230 reward=0.7685376 (468.29 it/sec) -training >> step=1376400, episode=230 reward=0.7790062 (591.34 it/sec) -training >> step=1376500, episode=230 reward=0.7494163 (553.82 it/sec) -training >> step=1376600, episode=230 reward=0.7679719 (507.84 it/sec) -training >> step=1376700, episode=230 reward=0.7484815 (522.81 it/sec) -training >> step=1376800, episode=230 reward=0.7411259 (511.54 it/sec) -training >> step=1376900, episode=230 reward=0.7475653 (517.46 it/sec) -training >> step=1377000, episode=230 reward=0.7657172 (520.28 it/sec) -training >> step=1377100, episode=230 reward=0.7599106 (503.37 it/sec) -training >> step=1377200, episode=230 reward=0.768366 (555.37 it/sec) -training >> step=1377300, episode=230 reward=0.783197 (548.37 it/sec) -training >> step=1377400, episode=230 reward=0.7550728 (473.34 it/sec) -training >> step=1377500, episode=230 reward=0.7748823 (486.22 it/sec) -training >> step=1377600, episode=230 reward=0.7604629 (455.16 it/sec) -training >> step=1377700, episode=230 reward=0.748875 (373.23 it/sec) -training >> step=1377800, episode=230 reward=0.7726688 (368.02 it/sec) -training >> step=1377900, episode=230 reward=0.7522486 (488.22 it/sec) -training >> step=1378000, episode=230 reward=0.7551625 (482.50 it/sec) -training >> step=1378100, episode=230 reward=0.759899 (478.73 it/sec) -training >> step=1378200, episode=230 reward=0.7826555 (363.51 it/sec) -training >> step=1378300, episode=230 reward=0.7689831 (449.65 it/sec) -training >> step=1378400, episode=230 reward=0.7282461 (541.77 it/sec) -training >> step=1378500, episode=230 reward=0.7636483 (481.06 it/sec) -training >> step=1378600, episode=230 reward=0.7648854 (542.40 it/sec) -training >> step=1378700, episode=230 reward=0.7449018 (547.86 it/sec) -training >> step=1378800, episode=230 reward=0.740478 (436.68 it/sec) -training >> step=1378900, episode=230 reward=0.7482057 (377.23 it/sec) -training >> step=1379000, episode=230 reward=0.7369969 (425.61 it/sec) -training >> step=1379100, episode=230 reward=0.7420624 (432.18 it/sec) -training >> step=1379200, episode=230 reward=0.7396342 (449.79 it/sec) -training >> step=1379300, episode=231 reward=0.7433417 (41.61 it/sec) -training >> step=1379400, episode=231 reward=0.7640036 (500.88 it/sec) -training >> step=1379500, episode=231 reward=0.7447752 (515.15 it/sec) -training >> step=1379600, episode=231 reward=0.7450119 (539.66 it/sec) -training >> step=1379700, episode=231 reward=0.7560028 (406.89 it/sec) -training >> step=1379800, episode=231 reward=0.755164 (376.85 it/sec) -training >> step=1379900, episode=231 reward=0.761632 (353.14 it/sec) -training >> step=1380000, episode=231 reward=0.7703899 (364.39 it/sec) -training >> step=1380100, episode=231 reward=0.7514045 (340.66 it/sec) -training >> step=1380200, episode=231 reward=0.7604872 (391.62 it/sec) -training >> step=1380300, episode=231 reward=0.7490986 (415.62 it/sec) -training >> step=1380400, episode=231 reward=0.7471395 (486.65 it/sec) -training >> step=1380500, episode=231 reward=0.7465854 (485.69 it/sec) -training >> step=1380600, episode=231 reward=0.767292 (411.77 it/sec) -training >> step=1380700, episode=231 reward=0.7620859 (457.87 it/sec) -training >> step=1380800, episode=231 reward=0.7693213 (549.04 it/sec) -training >> step=1380900, episode=231 reward=0.7567256 (525.16 it/sec) -training >> step=1381000, episode=231 reward=0.7667345 (457.96 it/sec) -training >> step=1381100, episode=231 reward=0.7713398 (417.64 it/sec) -training >> step=1381200, episode=231 reward=0.7598801 (491.95 it/sec) -training >> step=1381300, episode=231 reward=0.7770362 (481.22 it/sec) -training >> step=1381400, episode=231 reward=0.7546706 (473.56 it/sec) -training >> step=1381500, episode=231 reward=0.7725196 (452.64 it/sec) -training >> step=1381600, episode=231 reward=0.7750018 (426.93 it/sec) -training >> step=1381700, episode=231 reward=0.7410769 (470.09 it/sec) -training >> step=1381800, episode=231 reward=0.7636867 (447.02 it/sec) -training >> step=1381900, episode=231 reward=0.7665162 (477.13 it/sec) -training >> step=1382000, episode=231 reward=0.7600815 (459.58 it/sec) -training >> step=1382100, episode=231 reward=0.7674758 (519.06 it/sec) -training >> step=1382200, episode=231 reward=0.7726375 (452.28 it/sec) -training >> step=1382300, episode=231 reward=0.7671931 (498.99 it/sec) -training >> step=1382400, episode=231 reward=0.7598398 (431.50 it/sec) -training >> step=1382500, episode=231 reward=0.7677367 (442.60 it/sec) -training >> step=1382600, episode=231 reward=0.7735013 (509.93 it/sec) -training >> step=1382700, episode=231 reward=0.754949 (453.49 it/sec) -training >> step=1382800, episode=231 reward=0.7583827 (412.91 it/sec) -training >> step=1382900, episode=231 reward=0.7654393 (395.24 it/sec) -training >> step=1383000, episode=231 reward=0.7787219 (464.63 it/sec) -training >> step=1383100, episode=231 reward=0.7593256 (430.76 it/sec) -training >> step=1383200, episode=231 reward=0.757269 (510.71 it/sec) -training >> step=1383300, episode=231 reward=0.7701405 (484.20 it/sec) -training >> step=1383400, episode=231 reward=0.7400164 (428.56 it/sec) -training >> step=1383500, episode=231 reward=0.7502877 (440.81 it/sec) -training >> step=1383600, episode=231 reward=0.7656933 (452.78 it/sec) -training >> step=1383700, episode=231 reward=0.7661574 (527.13 it/sec) -training >> step=1383800, episode=231 reward=0.7734668 (423.62 it/sec) -training >> step=1383900, episode=231 reward=0.7826757 (473.58 it/sec) -training >> step=1384000, episode=231 reward=0.7752164 (508.71 it/sec) -training >> step=1384100, episode=231 reward=0.7589658 (458.60 it/sec) -training >> step=1384200, episode=231 reward=0.7815108 (487.76 it/sec) -training >> step=1384300, episode=231 reward=0.7364713 (461.61 it/sec) -training >> step=1384400, episode=231 reward=0.7506538 (490.29 it/sec) -training >> step=1384500, episode=231 reward=0.7432052 (416.72 it/sec) -training >> step=1384600, episode=231 reward=0.755159 (422.57 it/sec) -training >> step=1384700, episode=231 reward=0.7829905 (419.52 it/sec) -training >> step=1384800, episode=231 reward=0.7776266 (427.85 it/sec) -training >> step=1384900, episode=231 reward=0.7555549 (489.84 it/sec) -training >> step=1385000, episode=231 reward=0.7260895 (482.36 it/sec) -training >> step=1385100, episode=231 reward=0.7401092 (556.98 it/sec) -training >> step=1385200, episode=231 reward=0.7641527 (514.07 it/sec) -training >> step=1385300, episode=232 reward=0.7910554 (95.95 it/sec) -training >> step=1385400, episode=232 reward=0.7646868 (465.15 it/sec) -training >> step=1385500, episode=232 reward=0.7569333 (500.97 it/sec) -training >> step=1385600, episode=232 reward=0.7808329 (504.09 it/sec) -training >> step=1385700, episode=232 reward=0.7385293 (485.36 it/sec) -training >> step=1385800, episode=232 reward=0.7579454 (495.14 it/sec) -training >> step=1385900, episode=232 reward=0.7471777 (516.60 it/sec) -training >> step=1386000, episode=232 reward=0.7613025 (527.72 it/sec) -training >> step=1386100, episode=232 reward=0.7707598 (494.82 it/sec) -training >> step=1386200, episode=232 reward=0.7206031 (515.91 it/sec) -training >> step=1386300, episode=232 reward=0.7594298 (526.79 it/sec) -training >> step=1386400, episode=232 reward=0.743823 (508.28 it/sec) -training >> step=1386500, episode=232 reward=0.7449681 (498.94 it/sec) -training >> step=1386600, episode=232 reward=0.7537048 (512.06 it/sec) -training >> step=1386700, episode=232 reward=0.7515196 (507.30 it/sec) -training >> step=1386800, episode=232 reward=0.7673532 (523.50 it/sec) -training >> step=1386900, episode=232 reward=0.7493417 (470.54 it/sec) -training >> step=1387000, episode=232 reward=0.7713225 (525.88 it/sec) -training >> step=1387100, episode=232 reward=0.7719253 (461.91 it/sec) -training >> step=1387200, episode=232 reward=0.781247 (512.58 it/sec) -training >> step=1387300, episode=232 reward=0.7421684 (539.59 it/sec) -training >> step=1387400, episode=232 reward=0.7651802 (472.41 it/sec) -training >> step=1387500, episode=232 reward=0.76259 (544.16 it/sec) -training >> step=1387600, episode=232 reward=0.7659772 (506.19 it/sec) -training >> step=1387700, episode=232 reward=0.7634884 (425.25 it/sec) -training >> step=1387800, episode=232 reward=0.7619605 (518.51 it/sec) -training >> step=1387900, episode=232 reward=0.7726195 (489.11 it/sec) -training >> step=1388000, episode=232 reward=0.7716103 (501.65 it/sec) -training >> step=1388100, episode=232 reward=0.7655183 (518.56 it/sec) -training >> step=1388200, episode=232 reward=0.7594245 (492.87 it/sec) -training >> step=1388300, episode=232 reward=0.7143058 (503.70 it/sec) -training >> step=1388400, episode=232 reward=0.7740393 (498.43 it/sec) -training >> step=1388500, episode=232 reward=0.7628911 (529.44 it/sec) -training >> step=1388600, episode=232 reward=0.7761292 (533.43 it/sec) -training >> step=1388700, episode=232 reward=0.7653428 (496.24 it/sec) -training >> step=1388800, episode=232 reward=0.7663398 (517.83 it/sec) -training >> step=1388900, episode=232 reward=0.7836669 (518.18 it/sec) -training >> step=1389000, episode=232 reward=0.7763585 (483.71 it/sec) -training >> step=1389100, episode=232 reward=0.7533955 (525.61 it/sec) -training >> step=1389200, episode=232 reward=0.781386 (516.13 it/sec) -training >> step=1389300, episode=232 reward=0.7482547 (519.54 it/sec) -training >> step=1389400, episode=232 reward=0.7628852 (505.91 it/sec) -training >> step=1389500, episode=232 reward=0.7571353 (515.68 it/sec) -training >> step=1389600, episode=232 reward=0.7671924 (511.06 it/sec) -training >> step=1389700, episode=232 reward=0.7524763 (511.68 it/sec) -training >> step=1389800, episode=232 reward=0.7566336 (548.36 it/sec) -training >> step=1389900, episode=232 reward=0.7908588 (485.36 it/sec) -training >> step=1390000, episode=232 reward=0.7403018 (458.32 it/sec) -training >> step=1390100, episode=232 reward=0.7424441 (519.62 it/sec) -training >> step=1390200, episode=232 reward=0.7644494 (512.43 it/sec) -training >> step=1390300, episode=232 reward=0.7513375 (536.48 it/sec) -training >> step=1390400, episode=232 reward=0.7513701 (500.69 it/sec) -training >> step=1390500, episode=232 reward=0.7557486 (497.36 it/sec) -training >> step=1390600, episode=232 reward=0.7476028 (504.72 it/sec) -training >> step=1390700, episode=232 reward=0.7707324 (507.07 it/sec) -training >> step=1390800, episode=232 reward=0.7495977 (515.12 it/sec) -training >> step=1390900, episode=232 reward=0.7660376 (498.47 it/sec) -training >> step=1391000, episode=232 reward=0.7564788 (533.41 it/sec) -training >> step=1391100, episode=232 reward=0.7504194 (512.47 it/sec) -training >> step=1391200, episode=232 reward=0.7564843 (510.73 it/sec) -training >> step=1391300, episode=233 reward=0.7447624 (141.20 it/sec) -training >> step=1391400, episode=233 reward=0.7721153 (504.13 it/sec) -training >> step=1391500, episode=233 reward=0.7551153 (528.44 it/sec) -training >> step=1391600, episode=233 reward=0.7368182 (511.38 it/sec) -training >> step=1391700, episode=233 reward=0.7741042 (514.02 it/sec) -training >> step=1391800, episode=233 reward=0.7381276 (544.38 it/sec) -training >> step=1391900, episode=233 reward=0.7525177 (479.55 it/sec) -training >> step=1392000, episode=233 reward=0.7518596 (494.11 it/sec) -training >> step=1392100, episode=233 reward=0.7498019 (518.16 it/sec) -training >> step=1392200, episode=233 reward=0.7556527 (555.59 it/sec) -training >> step=1392300, episode=233 reward=0.7749023 (509.13 it/sec) -training >> step=1392400, episode=233 reward=0.770023 (409.99 it/sec) -training >> step=1392500, episode=233 reward=0.7549856 (378.63 it/sec) -training >> step=1392600, episode=233 reward=0.7472379 (431.71 it/sec) -training >> step=1392700, episode=233 reward=0.748606 (413.00 it/sec) -training >> step=1392800, episode=233 reward=0.7442423 (443.09 it/sec) -training >> step=1392900, episode=233 reward=0.7566457 (495.72 it/sec) -training >> step=1393000, episode=233 reward=0.7702078 (522.29 it/sec) -training >> step=1393100, episode=233 reward=0.7637244 (507.10 it/sec) -training >> step=1393200, episode=233 reward=0.7620568 (528.47 it/sec) -training >> step=1393300, episode=233 reward=0.7685623 (494.53 it/sec) -training >> step=1393400, episode=233 reward=0.7756634 (520.56 it/sec) -training >> step=1393500, episode=233 reward=0.7721078 (507.00 it/sec) -training >> step=1393600, episode=233 reward=0.7691793 (529.63 it/sec) -training >> step=1393700, episode=233 reward=0.7705696 (528.52 it/sec) -training >> step=1393800, episode=233 reward=0.7644362 (535.99 it/sec) -training >> step=1393900, episode=233 reward=0.776047 (495.08 it/sec) -training >> step=1394000, episode=233 reward=0.7403359 (524.97 it/sec) -training >> step=1394100, episode=233 reward=0.7732905 (505.06 it/sec) -training >> step=1394200, episode=233 reward=0.751725 (536.10 it/sec) -training >> step=1394300, episode=233 reward=0.7407023 (526.03 it/sec) -training >> step=1394400, episode=233 reward=0.7554797 (479.15 it/sec) -training >> step=1394500, episode=233 reward=0.7728212 (543.27 it/sec) -training >> step=1394600, episode=233 reward=0.7509505 (507.77 it/sec) -training >> step=1394700, episode=233 reward=0.7828809 (526.69 it/sec) -training >> step=1394800, episode=233 reward=0.7639749 (523.78 it/sec) -training >> step=1394900, episode=233 reward=0.7596045 (462.93 it/sec) -training >> step=1395000, episode=233 reward=0.7484542 (524.87 it/sec) -training >> step=1395100, episode=233 reward=0.7643449 (509.97 it/sec) -training >> step=1395200, episode=233 reward=0.7510852 (525.95 it/sec) -training >> step=1395300, episode=233 reward=0.7691223 (506.99 it/sec) -training >> step=1395400, episode=233 reward=0.7559599 (461.94 it/sec) -training >> step=1395500, episode=233 reward=0.764388 (506.45 it/sec) -training >> step=1395600, episode=233 reward=0.7600238 (545.62 it/sec) -training >> step=1395700, episode=233 reward=0.7776808 (465.50 it/sec) -training >> step=1395800, episode=233 reward=0.7514368 (508.13 it/sec) -training >> step=1395900, episode=233 reward=0.7741542 (519.61 it/sec) -training >> step=1396000, episode=233 reward=0.7630942 (502.55 it/sec) -training >> step=1396100, episode=233 reward=0.7677247 (505.18 it/sec) -training >> step=1396200, episode=233 reward=0.7914968 (511.72 it/sec) -training >> step=1396300, episode=233 reward=0.7535573 (566.70 it/sec) -training >> step=1396400, episode=233 reward=0.7149469 (504.67 it/sec) -training >> step=1396500, episode=233 reward=0.7649121 (494.55 it/sec) -training >> step=1396600, episode=233 reward=0.7645519 (516.43 it/sec) -training >> step=1396700, episode=233 reward=0.7551016 (493.32 it/sec) -training >> step=1396800, episode=233 reward=0.773193 (546.52 it/sec) -training >> step=1396900, episode=233 reward=0.7652182 (522.90 it/sec) -training >> step=1397000, episode=233 reward=0.7552511 (459.86 it/sec) -training >> step=1397100, episode=233 reward=0.7675965 (532.14 it/sec) -training >> step=1397200, episode=233 reward=0.7596011 (494.16 it/sec) -training >> step=1397300, episode=234 reward=0.7723435 (127.31 it/sec) -training >> step=1397400, episode=234 reward=0.7637515 (507.36 it/sec) -training >> step=1397500, episode=234 reward=0.7422369 (522.52 it/sec) -training >> step=1397600, episode=234 reward=0.7569734 (474.57 it/sec) -training >> step=1397700, episode=234 reward=0.762207 (498.63 it/sec) -training >> step=1397800, episode=234 reward=0.7628583 (471.94 it/sec) -training >> step=1397900, episode=234 reward=0.7565837 (463.08 it/sec) -training >> step=1398000, episode=234 reward=0.7515587 (521.61 it/sec) -training >> step=1398100, episode=234 reward=0.7705976 (540.95 it/sec) -training >> step=1398200, episode=234 reward=0.7321587 (492.70 it/sec) -training >> step=1398300, episode=234 reward=0.7546768 (511.95 it/sec) -training >> step=1398400, episode=234 reward=0.7383434 (472.21 it/sec) -training >> step=1398500, episode=234 reward=0.7765245 (524.57 it/sec) -training >> step=1398600, episode=234 reward=0.7662575 (522.45 it/sec) -training >> step=1398700, episode=234 reward=0.7428694 (554.59 it/sec) -training >> step=1398800, episode=234 reward=0.7458006 (492.43 it/sec) -training >> step=1398900, episode=234 reward=0.7587568 (500.21 it/sec) -training >> step=1399000, episode=234 reward=0.752694 (540.32 it/sec) -training >> step=1399100, episode=234 reward=0.7730135 (533.23 it/sec) -training >> step=1399200, episode=234 reward=0.7627563 (506.39 it/sec) -training >> step=1399300, episode=234 reward=0.7650248 (516.77 it/sec) -training >> step=1399400, episode=234 reward=0.7726705 (494.24 it/sec) -training >> step=1399500, episode=234 reward=0.7415777 (555.44 it/sec) -training >> step=1399600, episode=234 reward=0.7525797 (509.88 it/sec) -training >> step=1399700, episode=234 reward=0.770202 (527.29 it/sec) -training >> step=1399800, episode=234 reward=0.7442523 (494.62 it/sec) -training >> step=1399900, episode=234 reward=0.7536561 (493.24 it/sec) -training >> step=1400000, episode=234 reward=0.7716237 (502.70 it/sec) -training >> step=1400100, episode=234 reward=0.7581104 (530.82 it/sec) -training >> step=1400200, episode=234 reward=0.7470965 (529.26 it/sec) -training >> step=1400300, episode=234 reward=0.7603392 (504.40 it/sec) -training >> step=1400400, episode=234 reward=0.7652397 (473.92 it/sec) -training >> step=1400500, episode=234 reward=0.7485464 (493.81 it/sec) -training >> step=1400600, episode=234 reward=0.7762175 (555.26 it/sec) -training >> step=1400700, episode=234 reward=0.7418561 (518.80 it/sec) -training >> step=1400800, episode=234 reward=0.7419434 (495.70 it/sec) -training >> step=1400900, episode=234 reward=0.7562345 (469.21 it/sec) -training >> step=1401000, episode=234 reward=0.7519157 (514.13 it/sec) -training >> step=1401100, episode=234 reward=0.7769379 (505.74 it/sec) -training >> step=1401200, episode=234 reward=0.7504066 (536.33 it/sec) -training >> step=1401300, episode=234 reward=0.7682974 (521.86 it/sec) -training >> step=1401400, episode=234 reward=0.7363924 (514.76 it/sec) -training >> step=1401500, episode=234 reward=0.7673075 (491.28 it/sec) -training >> step=1401600, episode=234 reward=0.7624892 (515.61 it/sec) -training >> step=1401700, episode=234 reward=0.7769135 (476.26 it/sec) -training >> step=1401800, episode=234 reward=0.7601084 (482.46 it/sec) -training >> step=1401900, episode=234 reward=0.7767445 (513.52 it/sec) -training >> step=1402000, episode=234 reward=0.7422008 (477.97 it/sec) -training >> step=1402100, episode=234 reward=0.7448307 (505.23 it/sec) -training >> step=1402200, episode=234 reward=0.7626196 (511.37 it/sec) -training >> step=1402300, episode=234 reward=0.7529273 (514.79 it/sec) -training >> step=1402400, episode=234 reward=0.735669 (545.75 it/sec) -training >> step=1402500, episode=234 reward=0.7789344 (473.41 it/sec) -training >> step=1402600, episode=234 reward=0.7467907 (462.01 it/sec) -training >> step=1402700, episode=234 reward=0.7472299 (472.08 it/sec) -training >> step=1402800, episode=234 reward=0.7457206 (507.65 it/sec) -training >> step=1402900, episode=234 reward=0.7371005 (534.61 it/sec) -training >> step=1403000, episode=234 reward=0.7496139 (472.19 it/sec) -training >> step=1403100, episode=234 reward=0.7524523 (550.42 it/sec) -training >> step=1403200, episode=234 reward=0.7492496 (546.80 it/sec) -training >> step=1403300, episode=235 reward=0.7541161 (126.97 it/sec) -training >> step=1403400, episode=235 reward=0.7640072 (474.64 it/sec) -training >> step=1403500, episode=235 reward=0.7543046 (521.65 it/sec) -training >> step=1403600, episode=235 reward=0.7621719 (471.75 it/sec) -training >> step=1403700, episode=235 reward=0.7520835 (467.44 it/sec) -training >> step=1403800, episode=235 reward=0.7677838 (459.67 it/sec) -training >> step=1403900, episode=235 reward=0.7540075 (530.65 it/sec) -training >> step=1404000, episode=235 reward=0.7612489 (545.10 it/sec) -training >> step=1404100, episode=235 reward=0.7431175 (508.73 it/sec) -training >> step=1404200, episode=235 reward=0.7488726 (512.18 it/sec) -training >> step=1404300, episode=235 reward=0.768232 (476.16 it/sec) -training >> step=1404400, episode=235 reward=0.7633083 (480.21 it/sec) -training >> step=1404500, episode=235 reward=0.7598341 (518.28 it/sec) -training >> step=1404600, episode=235 reward=0.7598505 (524.12 it/sec) -training >> step=1404700, episode=235 reward=0.7674826 (554.44 it/sec) -training >> step=1404800, episode=235 reward=0.756642 (495.97 it/sec) -training >> step=1404900, episode=235 reward=0.769357 (495.20 it/sec) -training >> step=1405000, episode=235 reward=0.7470439 (518.32 it/sec) -training >> step=1405100, episode=235 reward=0.7890739 (528.30 it/sec) -training >> step=1405200, episode=235 reward=0.770902 (529.02 it/sec) -training >> step=1405300, episode=235 reward=0.7553808 (482.67 it/sec) -training >> step=1405400, episode=235 reward=0.7281049 (508.13 it/sec) -training >> step=1405500, episode=235 reward=0.7572663 (515.79 it/sec) -training >> step=1405600, episode=235 reward=0.7439197 (504.20 it/sec) -training >> step=1405700, episode=235 reward=0.7654568 (549.60 it/sec) -training >> step=1405800, episode=235 reward=0.759398 (545.92 it/sec) -training >> step=1405900, episode=235 reward=0.768237 (479.77 it/sec) -training >> step=1406000, episode=235 reward=0.7752913 (492.72 it/sec) -training >> step=1406100, episode=235 reward=0.7646994 (520.73 it/sec) -training >> step=1406200, episode=235 reward=0.7530329 (477.44 it/sec) -training >> step=1406300, episode=235 reward=0.7721027 (500.12 it/sec) -training >> step=1406400, episode=235 reward=0.7597845 (432.72 it/sec) -training >> step=1406500, episode=235 reward=0.7679363 (536.82 it/sec) -training >> step=1406600, episode=235 reward=0.7640491 (545.24 it/sec) -training >> step=1406700, episode=235 reward=0.7337407 (521.88 it/sec) -training >> step=1406800, episode=235 reward=0.7667745 (510.03 it/sec) -training >> step=1406900, episode=235 reward=0.754611 (443.92 it/sec) -training >> step=1407000, episode=235 reward=0.7671999 (392.55 it/sec) -training >> step=1407100, episode=235 reward=0.7591547 (398.92 it/sec) -training >> step=1407200, episode=235 reward=0.7446368 (457.68 it/sec) -training >> step=1407300, episode=235 reward=0.7904702 (431.47 it/sec) -training >> step=1407400, episode=235 reward=0.757829 (449.78 it/sec) -training >> step=1407500, episode=235 reward=0.7478733 (486.78 it/sec) -training >> step=1407600, episode=235 reward=0.7143942 (474.86 it/sec) -training >> step=1407700, episode=235 reward=0.7734237 (517.92 it/sec) -training >> step=1407800, episode=235 reward=0.7650512 (426.25 it/sec) -training >> step=1407900, episode=235 reward=0.7768063 (506.29 it/sec) -training >> step=1408000, episode=235 reward=0.7478972 (560.35 it/sec) -training >> step=1408100, episode=235 reward=0.7579724 (504.47 it/sec) -training >> step=1408200, episode=235 reward=0.7584537 (497.72 it/sec) -training >> step=1408300, episode=235 reward=0.7419186 (485.35 it/sec) -training >> step=1408400, episode=235 reward=0.7777525 (527.05 it/sec) -training >> step=1408500, episode=235 reward=0.7533423 (522.29 it/sec) -training >> step=1408600, episode=235 reward=0.7466642 (470.95 it/sec) -training >> step=1408700, episode=235 reward=0.766771 (518.06 it/sec) -training >> step=1408800, episode=235 reward=0.7545177 (500.71 it/sec) -training >> step=1408900, episode=235 reward=0.7531269 (474.62 it/sec) -training >> step=1409000, episode=235 reward=0.7563587 (531.88 it/sec) -training >> step=1409100, episode=235 reward=0.7620342 (521.62 it/sec) -training >> step=1409200, episode=235 reward=0.7777435 (525.38 it/sec) -training >> step=1409300, episode=236 reward=0.757352 (148.79 it/sec) -training >> step=1409400, episode=236 reward=0.7306922 (532.84 it/sec) -training >> step=1409500, episode=236 reward=0.7428969 (571.64 it/sec) -training >> step=1409600, episode=236 reward=0.742817 (502.96 it/sec) -training >> step=1409700, episode=236 reward=0.740107 (453.49 it/sec) -training >> step=1409800, episode=236 reward=0.7469158 (493.07 it/sec) -training >> step=1409900, episode=236 reward=0.7513934 (503.44 it/sec) -training >> step=1410000, episode=236 reward=0.7515089 (538.34 it/sec) -training >> step=1410100, episode=236 reward=0.7549725 (477.74 it/sec) -training >> step=1410200, episode=236 reward=0.7329072 (468.69 it/sec) -training >> step=1410300, episode=236 reward=0.7765048 (516.87 it/sec) -training >> step=1410400, episode=236 reward=0.7668734 (552.73 it/sec) -training >> step=1410500, episode=236 reward=0.7507095 (500.54 it/sec) -training >> step=1410600, episode=236 reward=0.762906 (502.53 it/sec) -training >> step=1410700, episode=236 reward=0.7574466 (545.72 it/sec) -training >> step=1410800, episode=236 reward=0.7689913 (502.08 it/sec) -training >> step=1410900, episode=236 reward=0.7742264 (526.70 it/sec) -training >> step=1411000, episode=236 reward=0.7472056 (489.31 it/sec) -training >> step=1411100, episode=236 reward=0.7575015 (503.94 it/sec) -training >> step=1411200, episode=236 reward=0.7826106 (508.26 it/sec) -training >> step=1411300, episode=236 reward=0.770161 (461.88 it/sec) -training >> step=1411400, episode=236 reward=0.7679084 (534.60 it/sec) -training >> step=1411500, episode=236 reward=0.7812525 (530.08 it/sec) -training >> step=1411600, episode=236 reward=0.7726288 (536.24 it/sec) -training >> step=1411700, episode=236 reward=0.7606223 (479.53 it/sec) -training >> step=1411800, episode=236 reward=0.774707 (446.38 it/sec) -training >> step=1411900, episode=236 reward=0.7394611 (546.16 it/sec) -training >> step=1412000, episode=236 reward=0.7678437 (519.69 it/sec) -training >> step=1412100, episode=236 reward=0.7479638 (521.73 it/sec) -training >> step=1412200, episode=236 reward=0.7474436 (461.82 it/sec) -training >> step=1412300, episode=236 reward=0.7534736 (522.85 it/sec) -training >> step=1412400, episode=236 reward=0.7716774 (503.57 it/sec) -training >> step=1412500, episode=236 reward=0.7375327 (529.61 it/sec) -training >> step=1412600, episode=236 reward=0.7343435 (527.70 it/sec) -training >> step=1412700, episode=236 reward=0.7655513 (503.76 it/sec) -training >> step=1412800, episode=236 reward=0.765837 (549.70 it/sec) -training >> step=1412900, episode=236 reward=0.7560184 (446.30 it/sec) -training >> step=1413000, episode=236 reward=0.7633085 (526.67 it/sec) -training >> step=1413100, episode=236 reward=0.7415208 (545.69 it/sec) -training >> step=1413200, episode=236 reward=0.7607138 (490.11 it/sec) -training >> step=1413300, episode=236 reward=0.7655711 (517.06 it/sec) -training >> step=1413400, episode=236 reward=0.7583542 (489.32 it/sec) -training >> step=1413500, episode=236 reward=0.754147 (523.14 it/sec) -training >> step=1413600, episode=236 reward=0.754068 (495.00 it/sec) -training >> step=1413700, episode=236 reward=0.7591796 (492.29 it/sec) -training >> step=1413800, episode=236 reward=0.782712 (565.34 it/sec) -training >> step=1413900, episode=236 reward=0.7838823 (504.08 it/sec) -training >> step=1414000, episode=236 reward=0.7507581 (526.94 it/sec) -training >> step=1414100, episode=236 reward=0.7462903 (511.02 it/sec) -training >> step=1414200, episode=236 reward=0.7780445 (516.15 it/sec) -training >> step=1414300, episode=236 reward=0.8022626 (499.95 it/sec) -training >> step=1414400, episode=236 reward=0.759935 (536.73 it/sec) -training >> step=1414500, episode=236 reward=0.7500749 (496.17 it/sec) -training >> step=1414600, episode=236 reward=0.7335918 (535.83 it/sec) -training >> step=1414700, episode=236 reward=0.7547799 (500.39 it/sec) -training >> step=1414800, episode=236 reward=0.7485099 (514.90 it/sec) -training >> step=1414900, episode=236 reward=0.7732238 (563.59 it/sec) -training >> step=1415000, episode=236 reward=0.7457934 (517.71 it/sec) -training >> step=1415100, episode=236 reward=0.7325208 (494.43 it/sec) -training >> step=1415200, episode=236 reward=0.7636309 (531.80 it/sec) -training >> step=1415300, episode=237 reward=0.760724 (141.32 it/sec) -training >> step=1415400, episode=237 reward=0.7417417 (521.05 it/sec) -training >> step=1415500, episode=237 reward=0.7496729 (503.51 it/sec) -training >> step=1415600, episode=237 reward=0.7230502 (494.82 it/sec) -training >> step=1415700, episode=237 reward=0.7314022 (525.54 it/sec) -training >> step=1415800, episode=237 reward=0.749415 (493.08 it/sec) -training >> step=1415900, episode=237 reward=0.7428305 (506.39 it/sec) -training >> step=1416000, episode=237 reward=0.7703514 (460.04 it/sec) -training >> step=1416100, episode=237 reward=0.756705 (531.05 it/sec) -training >> step=1416200, episode=237 reward=0.7694863 (545.58 it/sec) -training >> step=1416300, episode=237 reward=0.7754179 (508.21 it/sec) -training >> step=1416400, episode=237 reward=0.7530298 (507.22 it/sec) -training >> step=1416500, episode=237 reward=0.7651286 (483.87 it/sec) -training >> step=1416600, episode=237 reward=0.7397216 (553.22 it/sec) -training >> step=1416700, episode=237 reward=0.7434425 (529.22 it/sec) -training >> step=1416800, episode=237 reward=0.7381346 (512.95 it/sec) -training >> step=1416900, episode=237 reward=0.7629791 (477.19 it/sec) -training >> step=1417000, episode=237 reward=0.7675669 (514.29 it/sec) -training >> step=1417100, episode=237 reward=0.7785861 (471.29 it/sec) -training >> step=1417200, episode=237 reward=0.7493339 (534.33 it/sec) -training >> step=1417300, episode=237 reward=0.7579739 (538.03 it/sec) -training >> step=1417400, episode=237 reward=0.7650363 (504.13 it/sec) -training >> step=1417500, episode=237 reward=0.7600452 (491.40 it/sec) -training >> step=1417600, episode=237 reward=0.7767004 (511.34 it/sec) -training >> step=1417700, episode=237 reward=0.7678707 (537.12 it/sec) -training >> step=1417800, episode=237 reward=0.7432767 (534.31 it/sec) -training >> step=1417900, episode=237 reward=0.8009965 (498.92 it/sec) -training >> step=1418000, episode=237 reward=0.7770682 (505.31 it/sec) -training >> step=1418100, episode=237 reward=0.751207 (513.02 it/sec) -training >> step=1418200, episode=237 reward=0.7636883 (499.61 it/sec) -training >> step=1418300, episode=237 reward=0.7770003 (503.77 it/sec) -training >> step=1418400, episode=237 reward=0.7471437 (541.69 it/sec) -training >> step=1418500, episode=237 reward=0.7536863 (476.05 it/sec) -training >> step=1418600, episode=237 reward=0.7772782 (502.48 it/sec) -training >> step=1418700, episode=237 reward=0.7552319 (494.48 it/sec) -training >> step=1418800, episode=237 reward=0.7594193 (555.82 it/sec) -training >> step=1418900, episode=237 reward=0.7633528 (500.72 it/sec) -training >> step=1419000, episode=237 reward=0.7506807 (466.41 it/sec) -training >> step=1419100, episode=237 reward=0.7752423 (476.98 it/sec) -training >> step=1419200, episode=237 reward=0.7465797 (544.96 it/sec) -training >> step=1419300, episode=237 reward=0.7718033 (533.12 it/sec) -training >> step=1419400, episode=237 reward=0.7603182 (511.21 it/sec) -training >> step=1419500, episode=237 reward=0.7471133 (452.19 it/sec) -training >> step=1419600, episode=237 reward=0.7394683 (500.41 it/sec) -training >> step=1419700, episode=237 reward=0.745022 (514.48 it/sec) -training >> step=1419800, episode=237 reward=0.7735333 (534.91 it/sec) -training >> step=1419900, episode=237 reward=0.7741057 (525.52 it/sec) -training >> step=1420000, episode=237 reward=0.775836 (526.05 it/sec) -training >> step=1420100, episode=237 reward=0.7573433 (455.80 it/sec) -training >> step=1420200, episode=237 reward=0.7632859 (512.97 it/sec) -training >> step=1420300, episode=237 reward=0.7750638 (530.33 it/sec) -training >> step=1420400, episode=237 reward=0.7828574 (572.66 it/sec) -training >> step=1420500, episode=237 reward=0.7615504 (537.02 it/sec) -training >> step=1420600, episode=237 reward=0.7433866 (460.13 it/sec) -training >> step=1420700, episode=237 reward=0.7467548 (509.04 it/sec) -training >> step=1420800, episode=237 reward=0.7416085 (507.49 it/sec) -training >> step=1420900, episode=237 reward=0.7444752 (512.38 it/sec) -training >> step=1421000, episode=237 reward=0.7664613 (532.61 it/sec) -training >> step=1421100, episode=237 reward=0.7588542 (463.40 it/sec) -training >> step=1421200, episode=237 reward=0.7621005 (544.49 it/sec) -training >> step=1421300, episode=238 reward=0.7435369 (136.45 it/sec) -training >> step=1421400, episode=238 reward=0.7498819 (336.89 it/sec) -training >> step=1421500, episode=238 reward=0.7750592 (446.48 it/sec) -training >> step=1421600, episode=238 reward=0.7635468 (474.58 it/sec) -training >> step=1421700, episode=238 reward=0.7605147 (509.87 it/sec) -training >> step=1421800, episode=238 reward=0.7398612 (402.02 it/sec) -training >> step=1421900, episode=238 reward=0.7522638 (482.41 it/sec) -training >> step=1422000, episode=238 reward=0.7580748 (537.43 it/sec) -training >> step=1422100, episode=238 reward=0.7438806 (538.59 it/sec) -training >> step=1422200, episode=238 reward=0.7711788 (532.51 it/sec) -training >> step=1422300, episode=238 reward=0.7680035 (491.29 it/sec) -training >> step=1422400, episode=238 reward=0.7682217 (503.98 it/sec) -training >> step=1422500, episode=238 reward=0.7466798 (510.97 it/sec) -training >> step=1422600, episode=238 reward=0.7742996 (522.10 it/sec) -training >> step=1422700, episode=238 reward=0.7694006 (525.24 it/sec) -training >> step=1422800, episode=238 reward=0.7678434 (516.45 it/sec) -training >> step=1422900, episode=238 reward=0.7674727 (457.76 it/sec) -training >> step=1423000, episode=238 reward=0.7713971 (507.19 it/sec) -training >> step=1423100, episode=238 reward=0.753091 (535.07 it/sec) -training >> step=1423200, episode=238 reward=0.755053 (526.28 it/sec) -training >> step=1423300, episode=238 reward=0.7570755 (495.66 it/sec) -training >> step=1423400, episode=238 reward=0.7827558 (451.38 it/sec) -training >> step=1423500, episode=238 reward=0.7579531 (531.91 it/sec) -training >> step=1423600, episode=238 reward=0.7711061 (533.78 it/sec) -training >> step=1423700, episode=238 reward=0.7480718 (530.88 it/sec) -training >> step=1423800, episode=238 reward=0.7423054 (495.71 it/sec) -training >> step=1423900, episode=238 reward=0.7477226 (481.47 it/sec) -training >> step=1424000, episode=238 reward=0.75784 (485.47 it/sec) -training >> step=1424100, episode=238 reward=0.749676 (535.24 it/sec) -training >> step=1424200, episode=238 reward=0.7517884 (495.94 it/sec) -training >> step=1424300, episode=238 reward=0.786471 (512.17 it/sec) -training >> step=1424400, episode=238 reward=0.7706736 (511.85 it/sec) -training >> step=1424500, episode=238 reward=0.7407519 (502.55 it/sec) -training >> step=1424600, episode=238 reward=0.7443566 (478.65 it/sec) -training >> step=1424700, episode=238 reward=0.7471242 (543.03 it/sec) -training >> step=1424800, episode=238 reward=0.7576657 (522.57 it/sec) -training >> step=1424900, episode=238 reward=0.7471464 (502.44 it/sec) -training >> step=1425000, episode=238 reward=0.7489372 (486.87 it/sec) -training >> step=1425100, episode=238 reward=0.7657113 (527.72 it/sec) -training >> step=1425200, episode=238 reward=0.7630815 (476.85 it/sec) -training >> step=1425300, episode=238 reward=0.7375457 (494.63 it/sec) -training >> step=1425400, episode=238 reward=0.7458559 (473.32 it/sec) -training >> step=1425500, episode=238 reward=0.7751126 (495.99 it/sec) -training >> step=1425600, episode=238 reward=0.7674287 (541.86 it/sec) -training >> step=1425700, episode=238 reward=0.748795 (410.54 it/sec) -training >> step=1425800, episode=238 reward=0.7491727 (403.70 it/sec) -training >> step=1425900, episode=238 reward=0.7536514 (430.19 it/sec) -training >> step=1426000, episode=238 reward=0.7657375 (405.52 it/sec) -training >> step=1426100, episode=238 reward=0.7567579 (499.50 it/sec) -training >> step=1426200, episode=238 reward=0.7585091 (489.05 it/sec) -training >> step=1426300, episode=238 reward=0.7477852 (475.73 it/sec) -training >> step=1426400, episode=238 reward=0.7646312 (480.76 it/sec) -training >> step=1426500, episode=238 reward=0.7539448 (432.23 it/sec) -training >> step=1426600, episode=238 reward=0.7578275 (477.90 it/sec) -training >> step=1426700, episode=238 reward=0.7547981 (469.55 it/sec) -training >> step=1426800, episode=238 reward=0.7561828 (443.14 it/sec) -training >> step=1426900, episode=238 reward=0.7431003 (453.61 it/sec) -training >> step=1427000, episode=238 reward=0.7384802 (464.52 it/sec) -training >> step=1427100, episode=238 reward=0.7512939 (482.64 it/sec) -training >> step=1427200, episode=238 reward=0.7377909 (450.53 it/sec) -training >> step=1427300, episode=239 reward=0.7640442 (128.92 it/sec) -training >> step=1427400, episode=239 reward=0.7299207 (498.80 it/sec) -training >> step=1427500, episode=239 reward=0.7314161 (494.60 it/sec) -training >> step=1427600, episode=239 reward=0.7436134 (499.49 it/sec) -training >> step=1427700, episode=239 reward=0.7531328 (480.37 it/sec) -training >> step=1427800, episode=239 reward=0.7456451 (531.32 it/sec) -training >> step=1427900, episode=239 reward=0.7678933 (539.51 it/sec) -training >> step=1428000, episode=239 reward=0.7497821 (501.55 it/sec) -training >> step=1428100, episode=239 reward=0.748831 (548.03 it/sec) -training >> step=1428200, episode=239 reward=0.7423586 (475.46 it/sec) -training >> step=1428300, episode=239 reward=0.7513794 (470.49 it/sec) -training >> step=1428400, episode=239 reward=0.7579219 (509.72 it/sec) -training >> step=1428500, episode=239 reward=0.7375553 (514.35 it/sec) -training >> step=1428600, episode=239 reward=0.7587939 (516.06 it/sec) -training >> step=1428700, episode=239 reward=0.7694022 (497.77 it/sec) -training >> step=1428800, episode=239 reward=0.7630204 (529.92 it/sec) -training >> step=1428900, episode=239 reward=0.7597218 (538.98 it/sec) -training >> step=1429000, episode=239 reward=0.7519795 (475.12 it/sec) -training >> step=1429100, episode=239 reward=0.7587041 (511.52 it/sec) -training >> step=1429200, episode=239 reward=0.7683344 (512.80 it/sec) -training >> step=1429300, episode=239 reward=0.7749598 (467.13 it/sec) -training >> step=1429400, episode=239 reward=0.7486407 (497.28 it/sec) -training >> step=1429500, episode=239 reward=0.7702623 (505.43 it/sec) -training >> step=1429600, episode=239 reward=0.7802139 (522.16 it/sec) -training >> step=1429700, episode=239 reward=0.7602061 (521.52 it/sec) -training >> step=1429800, episode=239 reward=0.7760158 (474.99 it/sec) -training >> step=1429900, episode=239 reward=0.7620196 (490.91 it/sec) -training >> step=1430000, episode=239 reward=0.7666681 (536.48 it/sec) -training >> step=1430100, episode=239 reward=0.7820299 (517.07 it/sec) -training >> step=1430200, episode=239 reward=0.7528067 (510.90 it/sec) -training >> step=1430300, episode=239 reward=0.7682104 (460.87 it/sec) -training >> step=1430400, episode=239 reward=0.7581245 (517.49 it/sec) -training >> step=1430500, episode=239 reward=0.7648287 (554.87 it/sec) -training >> step=1430600, episode=239 reward=0.7733557 (512.34 it/sec) -training >> step=1430700, episode=239 reward=0.7579103 (523.56 it/sec) -training >> step=1430800, episode=239 reward=0.7627819 (515.17 it/sec) -training >> step=1430900, episode=239 reward=0.7639409 (500.73 it/sec) -training >> step=1431000, episode=239 reward=0.7780699 (521.62 it/sec) -training >> step=1431100, episode=239 reward=0.765285 (467.45 it/sec) -training >> step=1431200, episode=239 reward=0.7638411 (521.66 it/sec) -training >> step=1431300, episode=239 reward=0.7598445 (527.73 it/sec) -training >> step=1431400, episode=239 reward=0.7492226 (497.68 it/sec) -training >> step=1431500, episode=239 reward=0.7528834 (475.83 it/sec) -training >> step=1431600, episode=239 reward=0.7600811 (505.75 it/sec) -training >> step=1431700, episode=239 reward=0.7568455 (498.07 it/sec) -training >> step=1431800, episode=239 reward=0.763828 (523.40 it/sec) -training >> step=1431900, episode=239 reward=0.761762 (483.38 it/sec) -training >> step=1432000, episode=239 reward=0.7678295 (523.74 it/sec) -training >> step=1432100, episode=239 reward=0.7592756 (528.29 it/sec) -training >> step=1432200, episode=239 reward=0.7729235 (512.57 it/sec) -training >> step=1432300, episode=239 reward=0.7622358 (537.20 it/sec) -training >> step=1432400, episode=239 reward=0.7420543 (541.19 it/sec) -training >> step=1432500, episode=239 reward=0.7733414 (461.59 it/sec) -training >> step=1432600, episode=239 reward=0.7745538 (532.93 it/sec) -training >> step=1432700, episode=239 reward=0.7683164 (534.31 it/sec) -training >> step=1432800, episode=239 reward=0.7601727 (571.64 it/sec) -training >> step=1432900, episode=239 reward=0.7757023 (518.02 it/sec) -training >> step=1433000, episode=239 reward=0.7440234 (478.86 it/sec) -training >> step=1433100, episode=239 reward=0.7635019 (553.45 it/sec) -training >> step=1433200, episode=239 reward=0.75041 (482.32 it/sec) -training >> step=1433300, episode=240 reward=0.763501 (144.67 it/sec) -training >> step=1433400, episode=240 reward=0.7523148 (526.46 it/sec) -training >> step=1433500, episode=240 reward=0.7449677 (485.93 it/sec) -training >> step=1433600, episode=240 reward=0.7427311 (528.33 it/sec) -training >> step=1433700, episode=240 reward=0.7441675 (532.38 it/sec) -training >> step=1433800, episode=240 reward=0.7409673 (482.34 it/sec) -training >> step=1433900, episode=240 reward=0.7358298 (535.81 it/sec) -training >> step=1434000, episode=240 reward=0.7607847 (546.60 it/sec) -training >> step=1434100, episode=240 reward=0.7523149 (535.92 it/sec) -training >> step=1434200, episode=240 reward=0.7487807 (532.91 it/sec) -training >> step=1434300, episode=240 reward=0.7749895 (549.62 it/sec) -training >> step=1434400, episode=240 reward=0.7552789 (454.63 it/sec) -training >> step=1434500, episode=240 reward=0.7710934 (518.99 it/sec) -training >> step=1434600, episode=240 reward=0.7569672 (524.50 it/sec) -training >> step=1434700, episode=240 reward=0.7366715 (503.17 it/sec) -training >> step=1434800, episode=240 reward=0.7777008 (517.14 it/sec) -training >> step=1434900, episode=240 reward=0.7815531 (454.79 it/sec) -training >> step=1435000, episode=240 reward=0.7653556 (506.89 it/sec) -training >> step=1435100, episode=240 reward=0.7711641 (529.45 it/sec) -training >> step=1435200, episode=240 reward=0.7595069 (572.83 it/sec) -training >> step=1435300, episode=240 reward=0.7665306 (552.80 it/sec) -training >> step=1435400, episode=240 reward=0.773617 (492.78 it/sec) -training >> step=1435500, episode=240 reward=0.7514772 (517.69 it/sec) -training >> step=1435600, episode=240 reward=0.7378527 (504.91 it/sec) -training >> step=1435700, episode=240 reward=0.7694159 (502.81 it/sec) -training >> step=1435800, episode=240 reward=0.7484189 (428.64 it/sec) -training >> step=1435900, episode=240 reward=0.7562664 (360.83 it/sec) -training >> step=1436000, episode=240 reward=0.7832571 (407.02 it/sec) -training >> step=1436100, episode=240 reward=0.7656087 (507.27 it/sec) -training >> step=1436200, episode=240 reward=0.7695326 (523.16 it/sec) -training >> step=1436300, episode=240 reward=0.7576634 (523.94 it/sec) -training >> step=1436400, episode=240 reward=0.7667696 (465.10 it/sec) -training >> step=1436500, episode=240 reward=0.7346767 (506.65 it/sec) -training >> step=1436600, episode=240 reward=0.7633462 (522.86 it/sec) -training >> step=1436700, episode=240 reward=0.7694237 (520.83 it/sec) -training >> step=1436800, episode=240 reward=0.7672132 (548.44 it/sec) -training >> step=1436900, episode=240 reward=0.7509118 (495.90 it/sec) -training >> step=1437000, episode=240 reward=0.759227 (506.36 it/sec) -training >> step=1437100, episode=240 reward=0.776436 (512.40 it/sec) -training >> step=1437200, episode=240 reward=0.7699844 (500.75 it/sec) -training >> step=1437300, episode=240 reward=0.7607978 (530.27 it/sec) -training >> step=1437400, episode=240 reward=0.7507637 (572.02 it/sec) -training >> step=1437500, episode=240 reward=0.7668164 (425.47 it/sec) -training >> step=1437600, episode=240 reward=0.7562659 (515.20 it/sec) -training >> step=1437700, episode=240 reward=0.7747869 (555.23 it/sec) -training >> step=1437800, episode=240 reward=0.7650502 (573.59 it/sec) -training >> step=1437900, episode=240 reward=0.7537026 (542.30 it/sec) -training >> step=1438000, episode=240 reward=0.7569592 (492.22 it/sec) -training >> step=1438100, episode=240 reward=0.7548895 (550.50 it/sec) -training >> step=1438200, episode=240 reward=0.7626044 (524.37 it/sec) -training >> step=1438300, episode=240 reward=0.7719871 (542.06 it/sec) -training >> step=1438400, episode=240 reward=0.7585126 (509.50 it/sec) -training >> step=1438500, episode=240 reward=0.7491297 (559.78 it/sec) -training >> step=1438600, episode=240 reward=0.7393997 (488.60 it/sec) -training >> step=1438700, episode=240 reward=0.7509242 (516.37 it/sec) -training >> step=1438800, episode=240 reward=0.7517639 (551.11 it/sec) -training >> step=1438900, episode=240 reward=0.7917264 (559.59 it/sec) -training >> step=1439000, episode=240 reward=0.7574147 (478.94 it/sec) -training >> step=1439100, episode=240 reward=0.7662141 (455.58 it/sec) -training >> step=1439200, episode=240 reward=0.757748 (515.69 it/sec) -training >> step=1439300, episode=241 reward=0.7363735 (157.92 it/sec) -training >> step=1439400, episode=241 reward=0.7552955 (472.19 it/sec) -training >> step=1439500, episode=241 reward=0.7440987 (489.38 it/sec) -training >> step=1439600, episode=241 reward=0.7357351 (482.92 it/sec) -training >> step=1439700, episode=241 reward=0.7593942 (559.06 it/sec) -training >> step=1439800, episode=241 reward=0.7622954 (553.75 it/sec) -training >> step=1439900, episode=241 reward=0.7599167 (475.44 it/sec) -training >> step=1440000, episode=241 reward=0.7378914 (510.24 it/sec) -training >> step=1440100, episode=241 reward=0.7509527 (576.21 it/sec) -training >> step=1440200, episode=241 reward=0.7509782 (556.48 it/sec) -training >> step=1440300, episode=241 reward=0.7753302 (508.85 it/sec) -training >> step=1440400, episode=241 reward=0.7696655 (487.37 it/sec) -training >> step=1440500, episode=241 reward=0.7658408 (533.84 it/sec) -training >> step=1440600, episode=241 reward=0.7636639 (522.18 it/sec) -training >> step=1440700, episode=241 reward=0.7365301 (538.85 it/sec) -training >> step=1440800, episode=241 reward=0.765888 (533.71 it/sec) -training >> step=1440900, episode=241 reward=0.7315208 (557.05 it/sec) -training >> step=1441000, episode=241 reward=0.7584185 (509.97 it/sec) -training >> step=1441100, episode=241 reward=0.7773467 (468.22 it/sec) -training >> step=1441200, episode=241 reward=0.7776828 (521.65 it/sec) -training >> step=1441300, episode=241 reward=0.7691953 (490.38 it/sec) -training >> step=1441400, episode=241 reward=0.756727 (492.27 it/sec) -training >> step=1441500, episode=241 reward=0.7647698 (483.03 it/sec) -training >> step=1441600, episode=241 reward=0.7523291 (543.42 it/sec) -training >> step=1441700, episode=241 reward=0.7631417 (514.70 it/sec) -training >> step=1441800, episode=241 reward=0.7588337 (513.87 it/sec) -training >> step=1441900, episode=241 reward=0.7413869 (555.37 it/sec) -training >> step=1442000, episode=241 reward=0.7567484 (523.31 it/sec) -training >> step=1442100, episode=241 reward=0.7411295 (501.49 it/sec) -training >> step=1442200, episode=241 reward=0.7581891 (540.01 it/sec) -training >> step=1442300, episode=241 reward=0.7562214 (548.71 it/sec) -training >> step=1442400, episode=241 reward=0.7771455 (529.63 it/sec) -training >> step=1442500, episode=241 reward=0.7429999 (499.10 it/sec) -training >> step=1442600, episode=241 reward=0.7389306 (441.63 it/sec) -training >> step=1442700, episode=241 reward=0.7622533 (546.78 it/sec) -training >> step=1442800, episode=241 reward=0.7460461 (554.46 it/sec) -training >> step=1442900, episode=241 reward=0.7511804 (524.74 it/sec) -training >> step=1443000, episode=241 reward=0.766737 (508.99 it/sec) -training >> step=1443100, episode=241 reward=0.7413148 (489.03 it/sec) -training >> step=1443200, episode=241 reward=0.7560663 (528.09 it/sec) -training >> step=1443300, episode=241 reward=0.7749041 (538.01 it/sec) -training >> step=1443400, episode=241 reward=0.7429317 (529.08 it/sec) -training >> step=1443500, episode=241 reward=0.7261631 (502.65 it/sec) -training >> step=1443600, episode=241 reward=0.7378148 (541.60 it/sec) -training >> step=1443700, episode=241 reward=0.7343537 (495.19 it/sec) -training >> step=1443800, episode=241 reward=0.7687226 (556.04 it/sec) -training >> step=1443900, episode=241 reward=0.7502558 (548.95 it/sec) -training >> step=1444000, episode=241 reward=0.7604514 (526.55 it/sec) -training >> step=1444100, episode=241 reward=0.7511504 (524.69 it/sec) -training >> step=1444200, episode=241 reward=0.7490165 (519.26 it/sec) -training >> step=1444300, episode=241 reward=0.7663169 (559.66 it/sec) -training >> step=1444400, episode=241 reward=0.7774078 (541.48 it/sec) -training >> step=1444500, episode=241 reward=0.7596241 (546.99 it/sec) -training >> step=1444600, episode=241 reward=0.7631591 (532.75 it/sec) -training >> step=1444700, episode=241 reward=0.7619404 (542.50 it/sec) -training >> step=1444800, episode=241 reward=0.7773227 (489.11 it/sec) -training >> step=1444900, episode=241 reward=0.7640522 (522.16 it/sec) -training >> step=1445000, episode=241 reward=0.7493604 (552.78 it/sec) -training >> step=1445100, episode=241 reward=0.762004 (539.05 it/sec) -training >> step=1445200, episode=241 reward=0.7609213 (512.65 it/sec) -training >> step=1445300, episode=242 reward=0.7490807 (96.97 it/sec) -training >> step=1445400, episode=242 reward=0.7364122 (433.41 it/sec) -training >> step=1445500, episode=242 reward=0.7562419 (430.19 it/sec) -training >> step=1445600, episode=242 reward=0.7283447 (485.72 it/sec) -training >> step=1445700, episode=242 reward=0.7660005 (476.74 it/sec) -training >> step=1445800, episode=242 reward=0.7505556 (476.54 it/sec) -training >> step=1445900, episode=242 reward=0.7759287 (471.42 it/sec) -training >> step=1446000, episode=242 reward=0.7333758 (470.77 it/sec) -training >> step=1446100, episode=242 reward=0.7330958 (456.93 it/sec) -training >> step=1446200, episode=242 reward=0.7554969 (518.85 it/sec) -training >> step=1446300, episode=242 reward=0.7669005 (473.65 it/sec) -training >> step=1446400, episode=242 reward=0.7654068 (444.85 it/sec) -training >> step=1446500, episode=242 reward=0.7547094 (501.83 it/sec) -training >> step=1446600, episode=242 reward=0.7532528 (500.64 it/sec) -training >> step=1446700, episode=242 reward=0.7476681 (522.93 it/sec) -training >> step=1446800, episode=242 reward=0.7659611 (541.00 it/sec) -training >> step=1446900, episode=242 reward=0.7694243 (522.26 it/sec) -training >> step=1447000, episode=242 reward=0.7655245 (509.90 it/sec) -training >> step=1447100, episode=242 reward=0.7608101 (553.51 it/sec) -training >> step=1447200, episode=242 reward=0.7652436 (543.03 it/sec) -training >> step=1447300, episode=242 reward=0.749273 (490.31 it/sec) -training >> step=1447400, episode=242 reward=0.7661219 (533.25 it/sec) -training >> step=1447500, episode=242 reward=0.7657148 (528.98 it/sec) -training >> step=1447600, episode=242 reward=0.7521722 (509.02 it/sec) -training >> step=1447700, episode=242 reward=0.7746921 (542.85 it/sec) -training >> step=1447800, episode=242 reward=0.7478925 (513.80 it/sec) -training >> step=1447900, episode=242 reward=0.7746192 (541.68 it/sec) -training >> step=1448000, episode=242 reward=0.743443 (489.73 it/sec) -training >> step=1448100, episode=242 reward=0.7550725 (527.46 it/sec) -training >> step=1448200, episode=242 reward=0.7643828 (520.99 it/sec) -training >> step=1448300, episode=242 reward=0.7606729 (516.08 it/sec) -training >> step=1448400, episode=242 reward=0.7542349 (539.29 it/sec) -training >> step=1448500, episode=242 reward=0.7590164 (503.34 it/sec) -training >> step=1448600, episode=242 reward=0.7564414 (549.45 it/sec) -training >> step=1448700, episode=242 reward=0.7777217 (511.33 it/sec) -training >> step=1448800, episode=242 reward=0.7533413 (538.79 it/sec) -training >> step=1448900, episode=242 reward=0.7863105 (538.76 it/sec) -training >> step=1449000, episode=242 reward=0.7725305 (470.93 it/sec) -training >> step=1449100, episode=242 reward=0.772408 (483.98 it/sec) -training >> step=1449200, episode=242 reward=0.769186 (508.69 it/sec) -training >> step=1449300, episode=242 reward=0.7705804 (524.15 it/sec) -training >> step=1449400, episode=242 reward=0.7678337 (497.63 it/sec) -training >> step=1449500, episode=242 reward=0.7550539 (547.80 it/sec) -training >> step=1449600, episode=242 reward=0.7472219 (526.05 it/sec) -training >> step=1449700, episode=242 reward=0.7950593 (528.76 it/sec) -training >> step=1449800, episode=242 reward=0.7493521 (548.80 it/sec) -training >> step=1449900, episode=242 reward=0.7604761 (469.01 it/sec) -training >> step=1450000, episode=242 reward=0.7794193 (516.98 it/sec) -training >> step=1450100, episode=242 reward=0.7530752 (475.57 it/sec) -training >> step=1450200, episode=242 reward=0.7450303 (544.01 it/sec) -training >> step=1450300, episode=242 reward=0.7506897 (556.59 it/sec) -training >> step=1450400, episode=242 reward=0.7570997 (436.77 it/sec) -training >> step=1450500, episode=242 reward=0.7603703 (417.29 it/sec) -training >> step=1450600, episode=242 reward=0.7526875 (391.87 it/sec) -training >> step=1450700, episode=242 reward=0.7591235 (415.41 it/sec) -training >> step=1450800, episode=242 reward=0.7900002 (495.34 it/sec) -training >> step=1450900, episode=242 reward=0.7665983 (509.41 it/sec) -training >> step=1451000, episode=242 reward=0.7511478 (523.90 it/sec) -training >> step=1451100, episode=242 reward=0.7665622 (440.91 it/sec) -training >> step=1451200, episode=242 reward=0.7729585 (522.31 it/sec) -training >> step=1451300, episode=243 reward=0.7653167 (159.20 it/sec) -training >> step=1451400, episode=243 reward=0.7324684 (397.78 it/sec) -training >> step=1451500, episode=243 reward=0.7361844 (557.55 it/sec) -training >> step=1451600, episode=243 reward=0.7398586 (535.72 it/sec) -training >> step=1451700, episode=243 reward=0.7453631 (498.36 it/sec) -training >> step=1451800, episode=243 reward=0.7243532 (548.69 it/sec) -training >> step=1451900, episode=243 reward=0.7551222 (484.09 it/sec) -training >> step=1452000, episode=243 reward=0.7361537 (554.13 it/sec) -training >> step=1452100, episode=243 reward=0.7443497 (519.05 it/sec) -training >> step=1452200, episode=243 reward=0.7316206 (503.06 it/sec) -training >> step=1452300, episode=243 reward=0.7537249 (581.55 it/sec) -training >> step=1452400, episode=243 reward=0.7692416 (476.83 it/sec) -training >> step=1452500, episode=243 reward=0.7668615 (542.58 it/sec) -training >> step=1452600, episode=243 reward=0.7505702 (562.44 it/sec) -training >> step=1452700, episode=243 reward=0.7765217 (549.10 it/sec) -training >> step=1452800, episode=243 reward=0.759317 (549.95 it/sec) -training >> step=1452900, episode=243 reward=0.7433742 (536.19 it/sec) -training >> step=1453000, episode=243 reward=0.7964556 (493.94 it/sec) -training >> step=1453100, episode=243 reward=0.7447832 (554.98 it/sec) -training >> step=1453200, episode=243 reward=0.7849898 (527.12 it/sec) -training >> step=1453300, episode=243 reward=0.7874282 (528.94 it/sec) -training >> step=1453400, episode=243 reward=0.7559243 (567.50 it/sec) -training >> step=1453500, episode=243 reward=0.7474128 (515.37 it/sec) -training >> step=1453600, episode=243 reward=0.7687141 (465.09 it/sec) -training >> step=1453700, episode=243 reward=0.7693479 (521.77 it/sec) -training >> step=1453800, episode=243 reward=0.7618795 (500.52 it/sec) -training >> step=1453900, episode=243 reward=0.7702136 (529.03 it/sec) -training >> step=1454000, episode=243 reward=0.7571089 (553.40 it/sec) -training >> step=1454100, episode=243 reward=0.7770963 (542.33 it/sec) -training >> step=1454200, episode=243 reward=0.7501574 (548.93 it/sec) -training >> step=1454300, episode=243 reward=0.7855615 (521.50 it/sec) -training >> step=1454400, episode=243 reward=0.7469858 (533.58 it/sec) -training >> step=1454500, episode=243 reward=0.7771178 (544.72 it/sec) -training >> step=1454600, episode=243 reward=0.7685947 (499.62 it/sec) -training >> step=1454700, episode=243 reward=0.7436942 (542.42 it/sec) -training >> step=1454800, episode=243 reward=0.7565641 (547.91 it/sec) -training >> step=1454900, episode=243 reward=0.7758165 (509.93 it/sec) -training >> step=1455000, episode=243 reward=0.7603206 (545.50 it/sec) -training >> step=1455100, episode=243 reward=0.7477893 (533.41 it/sec) -training >> step=1455200, episode=243 reward=0.7650034 (494.25 it/sec) -training >> step=1455300, episode=243 reward=0.7617077 (500.01 it/sec) -training >> step=1455400, episode=243 reward=0.7426546 (507.20 it/sec) -training >> step=1455500, episode=243 reward=0.7649366 (542.68 it/sec) -training >> step=1455600, episode=243 reward=0.761232 (534.64 it/sec) -training >> step=1455700, episode=243 reward=0.7781715 (487.43 it/sec) -training >> step=1455800, episode=243 reward=0.7776415 (426.51 it/sec) -training >> step=1455900, episode=243 reward=0.7880921 (408.91 it/sec) -training >> step=1456000, episode=243 reward=0.7427781 (446.89 it/sec) -training >> step=1456100, episode=243 reward=0.7659571 (403.08 it/sec) -training >> step=1456200, episode=243 reward=0.7637433 (464.84 it/sec) -training >> step=1456300, episode=243 reward=0.7585983 (413.73 it/sec) -training >> step=1456400, episode=243 reward=0.7730628 (387.04 it/sec) -training >> step=1456500, episode=243 reward=0.7834966 (432.30 it/sec) -training >> step=1456600, episode=243 reward=0.7498771 (478.32 it/sec) -training >> step=1456700, episode=243 reward=0.7517296 (520.05 it/sec) -training >> step=1456800, episode=243 reward=0.7616396 (495.29 it/sec) -training >> step=1456900, episode=243 reward=0.7446757 (521.70 it/sec) -training >> step=1457000, episode=243 reward=0.7575521 (506.81 it/sec) -training >> step=1457100, episode=243 reward=0.7743956 (522.92 it/sec) -training >> step=1457200, episode=243 reward=0.7549839 (552.70 it/sec) -training >> step=1457300, episode=244 reward=0.7493426 (159.25 it/sec) -training >> step=1457400, episode=244 reward=0.7586877 (469.64 it/sec) -training >> step=1457500, episode=244 reward=0.7441974 (376.71 it/sec) -training >> step=1457600, episode=244 reward=0.7664596 (513.17 it/sec) -training >> step=1457700, episode=244 reward=0.7429564 (509.52 it/sec) -training >> step=1457800, episode=244 reward=0.7547913 (519.09 it/sec) -training >> step=1457900, episode=244 reward=0.7572349 (494.05 it/sec) -training >> step=1458000, episode=244 reward=0.7821226 (516.25 it/sec) -training >> step=1458100, episode=244 reward=0.7685191 (521.10 it/sec) -training >> step=1458200, episode=244 reward=0.7496513 (529.70 it/sec) -training >> step=1458300, episode=244 reward=0.7597481 (537.00 it/sec) -training >> step=1458400, episode=244 reward=0.7492368 (526.05 it/sec) -training >> step=1458500, episode=244 reward=0.7498163 (505.02 it/sec) -training >> step=1458600, episode=244 reward=0.7346181 (535.30 it/sec) -training >> step=1458700, episode=244 reward=0.751744 (503.40 it/sec) -training >> step=1458800, episode=244 reward=0.7805241 (550.93 it/sec) -training >> step=1458900, episode=244 reward=0.7493157 (512.08 it/sec) -training >> step=1459000, episode=244 reward=0.7510268 (521.54 it/sec) -training >> step=1459100, episode=244 reward=0.7688929 (563.12 it/sec) -training >> step=1459200, episode=244 reward=0.7579118 (514.54 it/sec) -training >> step=1459300, episode=244 reward=0.7489866 (541.34 it/sec) -training >> step=1459400, episode=244 reward=0.7555482 (552.86 it/sec) -training >> step=1459500, episode=244 reward=0.7532343 (534.23 it/sec) -training >> step=1459600, episode=244 reward=0.7487144 (557.57 it/sec) -training >> step=1459700, episode=244 reward=0.7755502 (530.78 it/sec) -training >> step=1459800, episode=244 reward=0.7701523 (528.94 it/sec) -training >> step=1459900, episode=244 reward=0.7664142 (549.62 it/sec) -training >> step=1460000, episode=244 reward=0.7415341 (501.11 it/sec) -training >> step=1460100, episode=244 reward=0.7593343 (508.79 it/sec) -training >> step=1460200, episode=244 reward=0.7578872 (584.81 it/sec) -training >> step=1460300, episode=244 reward=0.7422502 (496.75 it/sec) -training >> step=1460400, episode=244 reward=0.7481716 (546.99 it/sec) -training >> step=1460500, episode=244 reward=0.7620848 (551.10 it/sec) -training >> step=1460600, episode=244 reward=0.7726281 (504.35 it/sec) -training >> step=1460700, episode=244 reward=0.7584001 (516.45 it/sec) -training >> step=1460800, episode=244 reward=0.7353609 (538.85 it/sec) -training >> step=1460900, episode=244 reward=0.7730261 (472.64 it/sec) -training >> step=1461000, episode=244 reward=0.7544066 (513.92 it/sec) -training >> step=1461100, episode=244 reward=0.7590936 (473.27 it/sec) -training >> step=1461200, episode=244 reward=0.7787047 (525.89 it/sec) -training >> step=1461300, episode=244 reward=0.7810759 (537.36 it/sec) -training >> step=1461400, episode=244 reward=0.7727258 (571.64 it/sec) -training >> step=1461500, episode=244 reward=0.7749493 (564.57 it/sec) -training >> step=1461600, episode=244 reward=0.7562178 (488.85 it/sec) -training >> step=1461700, episode=244 reward=0.7679757 (443.51 it/sec) -training >> step=1461800, episode=244 reward=0.7760248 (513.77 it/sec) -training >> step=1461900, episode=244 reward=0.7749841 (487.92 it/sec) -training >> step=1462000, episode=244 reward=0.7646657 (475.10 it/sec) -training >> step=1462100, episode=244 reward=0.7529073 (430.21 it/sec) -training >> step=1462200, episode=244 reward=0.7583076 (452.10 it/sec) -training >> step=1462300, episode=244 reward=0.7591064 (485.81 it/sec) -training >> step=1462400, episode=244 reward=0.7094885 (427.34 it/sec) -training >> step=1462500, episode=244 reward=0.7526929 (430.80 it/sec) -training >> step=1462600, episode=244 reward=0.7559201 (457.26 it/sec) -training >> step=1462700, episode=244 reward=0.7720988 (493.88 it/sec) -training >> step=1462800, episode=244 reward=0.7436865 (491.17 it/sec) -training >> step=1462900, episode=244 reward=0.7754845 (542.78 it/sec) -training >> step=1463000, episode=244 reward=0.744363 (522.33 it/sec) -training >> step=1463100, episode=244 reward=0.7493631 (471.78 it/sec) -training >> step=1463200, episode=244 reward=0.7318273 (542.38 it/sec) -training >> step=1463300, episode=245 reward=0.7696296 (144.83 it/sec) -training >> step=1463400, episode=245 reward=0.7733915 (465.51 it/sec) -training >> step=1463500, episode=245 reward=0.7356272 (486.71 it/sec) -training >> step=1463600, episode=245 reward=0.7535982 (355.41 it/sec) -training >> step=1463700, episode=245 reward=0.7594609 (513.57 it/sec) -training >> step=1463800, episode=245 reward=0.7416878 (438.98 it/sec) -training >> step=1463900, episode=245 reward=0.7516468 (450.23 it/sec) -training >> step=1464000, episode=245 reward=0.7541684 (483.38 it/sec) -training >> step=1464100, episode=245 reward=0.7556261 (517.98 it/sec) -training >> step=1464200, episode=245 reward=0.7344744 (483.31 it/sec) -training >> step=1464300, episode=245 reward=0.768056 (465.05 it/sec) -training >> step=1464400, episode=245 reward=0.7582581 (515.69 it/sec) -training >> step=1464500, episode=245 reward=0.7499788 (507.57 it/sec) -training >> step=1464600, episode=245 reward=0.7553563 (534.42 it/sec) -training >> step=1464700, episode=245 reward=0.7577374 (509.39 it/sec) -training >> step=1464800, episode=245 reward=0.7618629 (381.93 it/sec) -training >> step=1464900, episode=245 reward=0.7311659 (434.74 it/sec) -training >> step=1465000, episode=245 reward=0.7732717 (398.53 it/sec) -training >> step=1465100, episode=245 reward=0.7513579 (437.02 it/sec) -training >> step=1465200, episode=245 reward=0.746165 (396.23 it/sec) -training >> step=1465300, episode=245 reward=0.7519563 (419.90 it/sec) -training >> step=1465400, episode=245 reward=0.7636276 (480.98 it/sec) -training >> step=1465500, episode=245 reward=0.7506083 (447.70 it/sec) -training >> step=1465600, episode=245 reward=0.7684204 (534.42 it/sec) -training >> step=1465700, episode=245 reward=0.7530533 (469.67 it/sec) -training >> step=1465800, episode=245 reward=0.7673882 (471.03 it/sec) -training >> step=1465900, episode=245 reward=0.7724677 (493.31 it/sec) -training >> step=1466000, episode=245 reward=0.766495 (510.28 it/sec) -training >> step=1466100, episode=245 reward=0.769832 (520.18 it/sec) -training >> step=1466200, episode=245 reward=0.7592386 (421.68 it/sec) -training >> step=1466300, episode=245 reward=0.7631229 (354.12 it/sec) -training >> step=1466400, episode=245 reward=0.7582287 (416.80 it/sec) -training >> step=1466500, episode=245 reward=0.7615487 (534.65 it/sec) -training >> step=1466600, episode=245 reward=0.7258191 (499.24 it/sec) -training >> step=1466700, episode=245 reward=0.757408 (462.93 it/sec) -training >> step=1466800, episode=245 reward=0.7632518 (544.12 it/sec) -training >> step=1466900, episode=245 reward=0.7608538 (539.68 it/sec) -training >> step=1467000, episode=245 reward=0.757081 (532.97 it/sec) -training >> step=1467100, episode=245 reward=0.769367 (534.12 it/sec) -training >> step=1467200, episode=245 reward=0.760603 (503.65 it/sec) -training >> step=1467300, episode=245 reward=0.7586361 (530.49 it/sec) -training >> step=1467400, episode=245 reward=0.7657287 (454.52 it/sec) -training >> step=1467500, episode=245 reward=0.7481397 (454.83 it/sec) -training >> step=1467600, episode=245 reward=0.7668333 (385.67 it/sec) -training >> step=1467700, episode=245 reward=0.7668971 (455.43 it/sec) -training >> step=1467800, episode=245 reward=0.7544907 (514.93 it/sec) -training >> step=1467900, episode=245 reward=0.7640455 (534.66 it/sec) -training >> step=1468000, episode=245 reward=0.7859771 (485.40 it/sec) -training >> step=1468100, episode=245 reward=0.7513455 (476.74 it/sec) -training >> step=1468200, episode=245 reward=0.7378991 (456.99 it/sec) -training >> step=1468300, episode=245 reward=0.7462012 (509.97 it/sec) -training >> step=1468400, episode=245 reward=0.7604698 (527.62 it/sec) -training >> step=1468500, episode=245 reward=0.7340299 (535.99 it/sec) -training >> step=1468600, episode=245 reward=0.7508513 (517.14 it/sec) -training >> step=1468700, episode=245 reward=0.7366031 (533.17 it/sec) -training >> step=1468800, episode=245 reward=0.7630273 (475.06 it/sec) -training >> step=1468900, episode=245 reward=0.7676925 (510.34 it/sec) -training >> step=1469000, episode=245 reward=0.76989 (484.51 it/sec) -training >> step=1469100, episode=245 reward=0.7652183 (495.46 it/sec) -training >> step=1469200, episode=245 reward=0.7699845 (504.02 it/sec) -training >> step=1469300, episode=246 reward=0.7405724 (170.20 it/sec) -training >> step=1469400, episode=246 reward=0.7571437 (482.55 it/sec) -training >> step=1469500, episode=246 reward=0.7441027 (481.73 it/sec) -training >> step=1469600, episode=246 reward=0.7647434 (514.64 it/sec) -training >> step=1469700, episode=246 reward=0.7308512 (486.76 it/sec) -training >> step=1469800, episode=246 reward=0.7569925 (496.91 it/sec) -training >> step=1469900, episode=246 reward=0.7449121 (350.00 it/sec) -training >> step=1470000, episode=246 reward=0.7369468 (544.47 it/sec) -training >> step=1470100, episode=246 reward=0.7597026 (487.34 it/sec) -training >> step=1470200, episode=246 reward=0.7543054 (484.46 it/sec) -training >> step=1470300, episode=246 reward=0.7600247 (511.98 it/sec) -training >> step=1470400, episode=246 reward=0.7372867 (510.78 it/sec) -training >> step=1470500, episode=246 reward=0.7632309 (522.23 it/sec) -training >> step=1470600, episode=246 reward=0.7667935 (493.44 it/sec) -training >> step=1470700, episode=246 reward=0.7672628 (505.01 it/sec) -training >> step=1470800, episode=246 reward=0.7811162 (535.19 it/sec) -training >> step=1470900, episode=246 reward=0.7632747 (531.12 it/sec) -training >> step=1471000, episode=246 reward=0.748745 (508.90 it/sec) -training >> step=1471100, episode=246 reward=0.7526002 (485.06 it/sec) -training >> step=1471200, episode=246 reward=0.77443 (481.42 it/sec) -training >> step=1471300, episode=246 reward=0.7699208 (495.10 it/sec) -training >> step=1471400, episode=246 reward=0.7412609 (503.47 it/sec) -training >> step=1471500, episode=246 reward=0.7704263 (505.53 it/sec) -training >> step=1471600, episode=246 reward=0.762075 (532.56 it/sec) -training >> step=1471700, episode=246 reward=0.7523409 (497.32 it/sec) -training >> step=1471800, episode=246 reward=0.7581803 (506.37 it/sec) -training >> step=1471900, episode=246 reward=0.7760667 (455.92 it/sec) -training >> step=1472000, episode=246 reward=0.7341326 (443.86 it/sec) -training >> step=1472100, episode=246 reward=0.765887 (386.92 it/sec) -training >> step=1472200, episode=246 reward=0.7558791 (374.56 it/sec) -training >> step=1472300, episode=246 reward=0.7858537 (420.90 it/sec) -training >> step=1472400, episode=246 reward=0.7652416 (473.08 it/sec) -training >> step=1472500, episode=246 reward=0.775897 (529.81 it/sec) -training >> step=1472600, episode=246 reward=0.760318 (505.77 it/sec) -training >> step=1472700, episode=246 reward=0.7676 (500.72 it/sec) -training >> step=1472800, episode=246 reward=0.7686742 (497.79 it/sec) -training >> step=1472900, episode=246 reward=0.7882226 (508.67 it/sec) -training >> step=1473000, episode=246 reward=0.7301124 (517.23 it/sec) -training >> step=1473100, episode=246 reward=0.7731328 (498.88 it/sec) -training >> step=1473200, episode=246 reward=0.7781552 (529.76 it/sec) -training >> step=1473300, episode=246 reward=0.7571596 (514.99 it/sec) -training >> step=1473400, episode=246 reward=0.7618635 (510.86 it/sec) -training >> step=1473500, episode=246 reward=0.7626276 (522.09 it/sec) -training >> step=1473600, episode=246 reward=0.7448881 (531.00 it/sec) -training >> step=1473700, episode=246 reward=0.7780093 (535.56 it/sec) -training >> step=1473800, episode=246 reward=0.7628291 (503.79 it/sec) -training >> step=1473900, episode=246 reward=0.7720374 (555.12 it/sec) -training >> step=1474000, episode=246 reward=0.7659883 (542.30 it/sec) -training >> step=1474100, episode=246 reward=0.7845898 (521.06 it/sec) -training >> step=1474200, episode=246 reward=0.7855108 (497.83 it/sec) -training >> step=1474300, episode=246 reward=0.7626698 (497.00 it/sec) -training >> step=1474400, episode=246 reward=0.735626 (564.69 it/sec) -training >> step=1474500, episode=246 reward=0.7412118 (514.11 it/sec) -training >> step=1474600, episode=246 reward=0.7545062 (533.07 it/sec) -training >> step=1474700, episode=246 reward=0.7518137 (471.58 it/sec) -training >> step=1474800, episode=246 reward=0.7468141 (545.99 it/sec) -training >> step=1474900, episode=246 reward=0.7499868 (521.77 it/sec) -training >> step=1475000, episode=246 reward=0.7540593 (505.48 it/sec) -training >> step=1475100, episode=246 reward=0.7441787 (499.94 it/sec) -training >> step=1475200, episode=246 reward=0.7549176 (470.06 it/sec) -training >> step=1475300, episode=247 reward=0.7510329 (169.80 it/sec) -training >> step=1475400, episode=247 reward=0.7522146 (519.89 it/sec) -training >> step=1475500, episode=247 reward=0.7635873 (488.35 it/sec) -training >> step=1475600, episode=247 reward=0.759617 (545.85 it/sec) -training >> step=1475700, episode=247 reward=0.7592711 (525.49 it/sec) -training >> step=1475800, episode=247 reward=0.7587969 (475.42 it/sec) -training >> step=1475900, episode=247 reward=0.7429081 (487.74 it/sec) -training >> step=1476000, episode=247 reward=0.7405871 (499.78 it/sec) -training >> step=1476100, episode=247 reward=0.7588884 (434.44 it/sec) -training >> step=1476200, episode=247 reward=0.7505016 (501.32 it/sec) -training >> step=1476300, episode=247 reward=0.7535776 (546.55 it/sec) -training >> step=1476400, episode=247 reward=0.7791613 (512.12 it/sec) -training >> step=1476500, episode=247 reward=0.7622128 (506.49 it/sec) -training >> step=1476600, episode=247 reward=0.7466362 (511.12 it/sec) -training >> step=1476700, episode=247 reward=0.7488863 (500.33 it/sec) -training >> step=1476800, episode=247 reward=0.7305544 (478.35 it/sec) -training >> step=1476900, episode=247 reward=0.7851803 (519.86 it/sec) -training >> step=1477000, episode=247 reward=0.7454311 (529.90 it/sec) -training >> step=1477100, episode=247 reward=0.7762871 (467.41 it/sec) -training >> step=1477200, episode=247 reward=0.7731405 (503.34 it/sec) -training >> step=1477300, episode=247 reward=0.768268 (528.30 it/sec) -training >> step=1477400, episode=247 reward=0.7834316 (523.32 it/sec) -training >> step=1477500, episode=247 reward=0.7592844 (541.40 it/sec) -training >> step=1477600, episode=247 reward=0.7503125 (515.83 it/sec) -training >> step=1477700, episode=247 reward=0.7556646 (516.72 it/sec) -training >> step=1477800, episode=247 reward=0.7495676 (522.79 it/sec) -training >> step=1477900, episode=247 reward=0.7727284 (506.82 it/sec) -training >> step=1478000, episode=247 reward=0.7667968 (550.40 it/sec) -training >> step=1478100, episode=247 reward=0.7571014 (537.62 it/sec) -training >> step=1478200, episode=247 reward=0.7545224 (505.97 it/sec) -training >> step=1478300, episode=247 reward=0.7606329 (505.17 it/sec) -training >> step=1478400, episode=247 reward=0.7566899 (540.61 it/sec) -training >> step=1478500, episode=247 reward=0.7509173 (521.75 it/sec) -training >> step=1478600, episode=247 reward=0.7469437 (535.62 it/sec) -training >> step=1478700, episode=247 reward=0.7716116 (484.09 it/sec) -training >> step=1478800, episode=247 reward=0.7520763 (507.47 it/sec) -training >> step=1478900, episode=247 reward=0.7586396 (545.57 it/sec) -training >> step=1479000, episode=247 reward=0.776087 (521.63 it/sec) -training >> step=1479100, episode=247 reward=0.7675633 (521.18 it/sec) -training >> step=1479200, episode=247 reward=0.7575247 (500.71 it/sec) -training >> step=1479300, episode=247 reward=0.7733486 (481.17 it/sec) -training >> step=1479400, episode=247 reward=0.7588636 (502.41 it/sec) -training >> step=1479500, episode=247 reward=0.7674133 (491.57 it/sec) -training >> step=1479600, episode=247 reward=0.7665814 (548.60 it/sec) -training >> step=1479700, episode=247 reward=0.7345035 (491.22 it/sec) -training >> step=1479800, episode=247 reward=0.7493262 (503.80 it/sec) -training >> step=1479900, episode=247 reward=0.7515039 (535.93 it/sec) -training >> step=1480000, episode=247 reward=0.7336215 (507.69 it/sec) -training >> step=1480100, episode=247 reward=0.759618 (458.62 it/sec) -training >> step=1480200, episode=247 reward=0.7701781 (514.15 it/sec) -training >> step=1480300, episode=247 reward=0.7696449 (515.77 it/sec) -training >> step=1480400, episode=247 reward=0.7904784 (540.26 it/sec) -training >> step=1480500, episode=247 reward=0.7497488 (538.78 it/sec) -training >> step=1480600, episode=247 reward=0.7547208 (521.18 it/sec) -training >> step=1480700, episode=247 reward=0.7640125 (527.95 it/sec) -training >> step=1480800, episode=247 reward=0.7500374 (464.50 it/sec) -training >> step=1480900, episode=247 reward=0.7468637 (528.24 it/sec) -training >> step=1481000, episode=247 reward=0.7563114 (542.83 it/sec) -training >> step=1481100, episode=247 reward=0.718653 (532.74 it/sec) -training >> step=1481200, episode=247 reward=0.7580538 (535.99 it/sec) -training >> step=1481300, episode=248 reward=0.753833 (148.77 it/sec) -training >> step=1481400, episode=248 reward=0.7637115 (464.22 it/sec) -training >> step=1481500, episode=248 reward=0.7065238 (554.17 it/sec) -training >> step=1481600, episode=248 reward=0.7592629 (485.97 it/sec) -training >> step=1481700, episode=248 reward=0.7568673 (532.78 it/sec) -training >> step=1481800, episode=248 reward=0.7450892 (497.57 it/sec) -training >> step=1481900, episode=248 reward=0.7372183 (571.59 it/sec) -training >> step=1482000, episode=248 reward=0.7657844 (539.37 it/sec) -training >> step=1482100, episode=248 reward=0.7433997 (358.06 it/sec) -training >> step=1482200, episode=248 reward=0.7327884 (544.08 it/sec) -training >> step=1482300, episode=248 reward=0.776492 (555.14 it/sec) -training >> step=1482400, episode=248 reward=0.7540153 (474.58 it/sec) -training >> step=1482500, episode=248 reward=0.7646998 (534.45 it/sec) -training >> step=1482600, episode=248 reward=0.759489 (480.04 it/sec) -training >> step=1482700, episode=248 reward=0.7552572 (575.89 it/sec) -training >> step=1482800, episode=248 reward=0.7321395 (539.43 it/sec) -training >> step=1482900, episode=248 reward=0.7467896 (537.70 it/sec) -training >> step=1483000, episode=248 reward=0.7519584 (537.49 it/sec) -training >> step=1483100, episode=248 reward=0.7512594 (483.41 it/sec) -training >> step=1483200, episode=248 reward=0.7364433 (520.04 it/sec) -training >> step=1483300, episode=248 reward=0.7363416 (543.61 it/sec) -training >> step=1483400, episode=248 reward=0.7591329 (538.82 it/sec) -training >> step=1483500, episode=248 reward=0.7598276 (526.52 it/sec) -training >> step=1483600, episode=248 reward=0.7539561 (540.72 it/sec) -training >> step=1483700, episode=248 reward=0.7710164 (468.74 it/sec) -training >> step=1483800, episode=248 reward=0.7674403 (560.12 it/sec) -training >> step=1483900, episode=248 reward=0.7694857 (566.11 it/sec) -training >> step=1484000, episode=248 reward=0.7489348 (530.40 it/sec) -training >> step=1484100, episode=248 reward=0.762534 (533.60 it/sec) -training >> step=1484200, episode=248 reward=0.7393838 (495.88 it/sec) -training >> step=1484300, episode=248 reward=0.7721857 (514.73 it/sec) -training >> step=1484400, episode=248 reward=0.7457218 (548.59 it/sec) -training >> step=1484500, episode=248 reward=0.7770067 (508.71 it/sec) -training >> step=1484600, episode=248 reward=0.7665873 (560.29 it/sec) -training >> step=1484700, episode=248 reward=0.7630856 (506.13 it/sec) -training >> step=1484800, episode=248 reward=0.7603812 (509.36 it/sec) -training >> step=1484900, episode=248 reward=0.7707828 (542.65 it/sec) -training >> step=1485000, episode=248 reward=0.7688392 (493.31 it/sec) -training >> step=1485100, episode=248 reward=0.7363461 (481.64 it/sec) -training >> step=1485200, episode=248 reward=0.7865869 (510.27 it/sec) -training >> step=1485300, episode=248 reward=0.767749 (508.34 it/sec) -training >> step=1485400, episode=248 reward=0.7640437 (547.35 it/sec) -training >> step=1485500, episode=248 reward=0.7299305 (514.13 it/sec) -training >> step=1485600, episode=248 reward=0.7763618 (519.90 it/sec) -training >> step=1485700, episode=248 reward=0.7570516 (518.07 it/sec) -training >> step=1485800, episode=248 reward=0.7723051 (555.31 it/sec) -training >> step=1485900, episode=248 reward=0.7762944 (504.95 it/sec) -training >> step=1486000, episode=248 reward=0.7676401 (552.64 it/sec) -training >> step=1486100, episode=248 reward=0.7741287 (523.89 it/sec) -training >> step=1486200, episode=248 reward=0.7861329 (528.38 it/sec) -training >> step=1486300, episode=248 reward=0.7516508 (514.25 it/sec) -training >> step=1486400, episode=248 reward=0.7611561 (499.51 it/sec) -training >> step=1486500, episode=248 reward=0.7692066 (521.72 it/sec) -training >> step=1486600, episode=248 reward=0.7265263 (517.50 it/sec) -training >> step=1486700, episode=248 reward=0.7548255 (519.39 it/sec) -training >> step=1486800, episode=248 reward=0.7761382 (486.39 it/sec) -training >> step=1486900, episode=248 reward=0.7708143 (529.57 it/sec) -training >> step=1487000, episode=248 reward=0.7456937 (512.21 it/sec) -training >> step=1487100, episode=248 reward=0.7592332 (533.24 it/sec) -training >> step=1487200, episode=248 reward=0.7547275 (523.42 it/sec) -training >> step=1487300, episode=249 reward=0.7436525 (150.04 it/sec) -training >> step=1487400, episode=249 reward=0.7517752 (557.13 it/sec) -training >> step=1487500, episode=249 reward=0.7592986 (537.60 it/sec) -training >> step=1487600, episode=249 reward=0.7677302 (525.67 it/sec) -training >> step=1487700, episode=249 reward=0.756301 (529.62 it/sec) -training >> step=1487800, episode=249 reward=0.7594265 (480.14 it/sec) -training >> step=1487900, episode=249 reward=0.7586754 (556.20 it/sec) -training >> step=1488000, episode=249 reward=0.7392976 (518.66 it/sec) -training >> step=1488100, episode=249 reward=0.79342 (430.78 it/sec) -training >> step=1488200, episode=249 reward=0.7592249 (525.70 it/sec) -training >> step=1488300, episode=249 reward=0.7492149 (477.79 it/sec) -training >> step=1488400, episode=249 reward=0.7537593 (530.47 it/sec) -training >> step=1488500, episode=249 reward=0.7513046 (515.80 it/sec) -training >> step=1488600, episode=249 reward=0.7496867 (487.08 it/sec) -training >> step=1488700, episode=249 reward=0.7479633 (514.60 it/sec) -training >> step=1488800, episode=249 reward=0.7675498 (500.98 it/sec) -training >> step=1488900, episode=249 reward=0.7463657 (569.43 it/sec) -training >> step=1489000, episode=249 reward=0.7675574 (546.87 it/sec) -training >> step=1489100, episode=249 reward=0.7697391 (483.16 it/sec) -training >> step=1489200, episode=249 reward=0.7472534 (544.60 it/sec) -training >> step=1489300, episode=249 reward=0.7671807 (505.18 it/sec) -training >> step=1489400, episode=249 reward=0.7558683 (529.35 it/sec) -training >> step=1489500, episode=249 reward=0.7534368 (518.80 it/sec) -training >> step=1489600, episode=249 reward=0.7552304 (557.55 it/sec) -training >> step=1489700, episode=249 reward=0.7772695 (487.98 it/sec) -training >> step=1489800, episode=249 reward=0.7679994 (524.58 it/sec) -training >> step=1489900, episode=249 reward=0.7681095 (522.59 it/sec) -training >> step=1490000, episode=249 reward=0.7283421 (505.07 it/sec) -training >> step=1490100, episode=249 reward=0.7718437 (535.45 it/sec) -training >> step=1490200, episode=249 reward=0.7518578 (481.73 it/sec) -training >> step=1490300, episode=249 reward=0.7589183 (574.72 it/sec) -training >> step=1490400, episode=249 reward=0.755595 (485.43 it/sec) -training >> step=1490500, episode=249 reward=0.767197 (524.54 it/sec) -training >> step=1490600, episode=249 reward=0.7528443 (527.23 it/sec) -training >> step=1490700, episode=249 reward=0.7653614 (526.35 it/sec) -training >> step=1490800, episode=249 reward=0.7462012 (542.23 it/sec) -training >> step=1490900, episode=249 reward=0.7618659 (502.60 it/sec) -training >> step=1491000, episode=249 reward=0.772402 (544.86 it/sec) -training >> step=1491100, episode=249 reward=0.7486718 (570.13 it/sec) -training >> step=1491200, episode=249 reward=0.765065 (554.60 it/sec) -training >> step=1491300, episode=249 reward=0.785504 (488.05 it/sec) -training >> step=1491400, episode=249 reward=0.7657818 (532.04 it/sec) -training >> step=1491500, episode=249 reward=0.7807603 (505.41 it/sec) -training >> step=1491600, episode=249 reward=0.7820026 (527.29 it/sec) -training >> step=1491700, episode=249 reward=0.7655497 (541.09 it/sec) -training >> step=1491800, episode=249 reward=0.7477863 (539.93 it/sec) -training >> step=1491900, episode=249 reward=0.7691828 (497.20 it/sec) -training >> step=1492000, episode=249 reward=0.7615797 (480.48 it/sec) -training >> step=1492100, episode=249 reward=0.7534579 (535.04 it/sec) -training >> step=1492200, episode=249 reward=0.7537472 (560.61 it/sec) -training >> step=1492300, episode=249 reward=0.7520654 (507.80 it/sec) -training >> step=1492400, episode=249 reward=0.7540918 (526.38 it/sec) -training >> step=1492500, episode=249 reward=0.7646213 (545.73 it/sec) -training >> step=1492600, episode=249 reward=0.7620847 (525.35 it/sec) -training >> step=1492700, episode=249 reward=0.7503771 (545.58 it/sec) -training >> step=1492800, episode=249 reward=0.7345044 (484.22 it/sec) -training >> step=1492900, episode=249 reward=0.7484617 (531.22 it/sec) -training >> step=1493000, episode=249 reward=0.7447757 (569.47 it/sec) -training >> step=1493100, episode=249 reward=0.7518772 (492.70 it/sec) -training >> step=1493200, episode=249 reward=0.7474851 (534.15 it/sec) -training >> step=1493300, episode=250 reward=0.7857431 (152.96 it/sec) -training >> step=1493400, episode=250 reward=0.7424627 (544.75 it/sec) -training >> step=1493500, episode=250 reward=0.7533885 (542.17 it/sec) -training >> step=1493600, episode=250 reward=0.7818918 (552.04 it/sec) -training >> step=1493700, episode=250 reward=0.746555 (510.15 it/sec) -training >> step=1493800, episode=250 reward=0.7540839 (532.29 it/sec) -training >> step=1493900, episode=250 reward=0.7655501 (506.49 it/sec) -training >> step=1494000, episode=250 reward=0.7687866 (506.45 it/sec) -training >> step=1494100, episode=250 reward=0.7700003 (490.87 it/sec) -training >> step=1494200, episode=250 reward=0.746857 (507.76 it/sec) -training >> step=1494300, episode=250 reward=0.7649487 (326.56 it/sec) -training >> step=1494400, episode=250 reward=0.7424258 (505.29 it/sec) -training >> step=1494500, episode=250 reward=0.7379668 (532.33 it/sec) -training >> step=1494600, episode=250 reward=0.7528794 (525.39 it/sec) -training >> step=1494700, episode=250 reward=0.7502853 (515.84 it/sec) -training >> step=1494800, episode=250 reward=0.771194 (506.68 it/sec) -training >> step=1494900, episode=250 reward=0.7595953 (500.21 it/sec) -training >> step=1495000, episode=250 reward=0.7765326 (530.20 it/sec) -training >> step=1495100, episode=250 reward=0.7686518 (524.70 it/sec) -training >> step=1495200, episode=250 reward=0.7948887 (541.78 it/sec) -training >> step=1495300, episode=250 reward=0.7554803 (543.71 it/sec) -training >> step=1495400, episode=250 reward=0.7651645 (551.19 it/sec) -training >> step=1495500, episode=250 reward=0.7595815 (499.99 it/sec) -training >> step=1495600, episode=250 reward=0.7618197 (554.44 it/sec) -training >> step=1495700, episode=250 reward=0.7728112 (545.56 it/sec) -training >> step=1495800, episode=250 reward=0.7703847 (520.54 it/sec) -training >> step=1495900, episode=250 reward=0.7580234 (522.34 it/sec) -training >> step=1496000, episode=250 reward=0.7537766 (499.68 it/sec) -training >> step=1496100, episode=250 reward=0.7674345 (547.53 it/sec) -training >> step=1496200, episode=250 reward=0.7697118 (525.90 it/sec) -training >> step=1496300, episode=250 reward=0.760148 (513.00 it/sec) -training >> step=1496400, episode=250 reward=0.7521436 (534.77 it/sec) -training >> step=1496500, episode=250 reward=0.7791288 (541.79 it/sec) -training >> step=1496600, episode=250 reward=0.767697 (506.02 it/sec) -training >> step=1496700, episode=250 reward=0.7647308 (534.10 it/sec) -training >> step=1496800, episode=250 reward=0.742624 (536.21 it/sec) -training >> step=1496900, episode=250 reward=0.7697899 (543.58 it/sec) -training >> step=1497000, episode=250 reward=0.7514466 (528.01 it/sec) -training >> step=1497100, episode=250 reward=0.7594779 (522.59 it/sec) -training >> step=1497200, episode=250 reward=0.7628825 (538.22 it/sec) -training >> step=1497300, episode=250 reward=0.7348325 (547.32 it/sec) -training >> step=1497400, episode=250 reward=0.7455277 (524.13 it/sec) -training >> step=1497500, episode=250 reward=0.7669519 (518.62 it/sec) -training >> step=1497600, episode=250 reward=0.7530118 (520.46 it/sec) -training >> step=1497700, episode=250 reward=0.751533 (492.27 it/sec) -training >> step=1497800, episode=250 reward=0.7434289 (504.55 it/sec) -training >> step=1497900, episode=250 reward=0.7907478 (529.54 it/sec) -training >> step=1498000, episode=250 reward=0.7668739 (563.91 it/sec) -training >> step=1498100, episode=250 reward=0.7741088 (521.66 it/sec) -training >> step=1498200, episode=250 reward=0.7600645 (499.16 it/sec) -training >> step=1498300, episode=250 reward=0.756291 (544.36 it/sec) -training >> step=1498400, episode=250 reward=0.768299 (576.20 it/sec) -training >> step=1498500, episode=250 reward=0.7484081 (443.81 it/sec) -training >> step=1498600, episode=250 reward=0.7577853 (536.28 it/sec) -training >> step=1498700, episode=250 reward=0.7566913 (519.53 it/sec) -training >> step=1498800, episode=250 reward=0.77218 (464.19 it/sec) -training >> step=1498900, episode=250 reward=0.7504035 (500.79 it/sec) -training >> step=1499000, episode=250 reward=0.753691 (493.23 it/sec) -training >> step=1499100, episode=250 reward=0.7509619 (537.95 it/sec) -training >> step=1499200, episode=250 reward=0.7700953 (529.94 it/sec) -training >> step=1499300, episode=251 reward=0.7525631 (153.41 it/sec) -training >> step=1499400, episode=251 reward=0.7719566 (538.29 it/sec) -training >> step=1499500, episode=251 reward=0.7733294 (562.91 it/sec) -training >> step=1499600, episode=251 reward=0.7515526 (488.55 it/sec) -training >> step=1499700, episode=251 reward=0.7567167 (530.71 it/sec) -training >> step=1499800, episode=251 reward=0.761602 (490.87 it/sec) -training >> step=1499900, episode=251 reward=0.744949 (534.28 it/sec) -training >> step=1500000, episode=251 reward=0.7677846 (495.77 it/sec) -training >> step=1500100, episode=251 reward=0.7498406 (471.50 it/sec) -training >> step=1500200, episode=251 reward=0.7750786 (536.12 it/sec) -training >> step=1500300, episode=251 reward=0.7534845 (565.70 it/sec) -training >> step=1500400, episode=251 reward=0.7562031 (541.84 it/sec) -training >> step=1500500, episode=251 reward=0.7655644 (429.80 it/sec) -training >> step=1500600, episode=251 reward=0.7423115 (498.59 it/sec) -training >> step=1500700, episode=251 reward=0.7609114 (536.70 it/sec) -training >> step=1500800, episode=251 reward=0.7613216 (532.71 it/sec) -training >> step=1500900, episode=251 reward=0.7665207 (492.55 it/sec) -training >> step=1501000, episode=251 reward=0.7552519 (559.88 it/sec) -training >> step=1501100, episode=251 reward=0.77562 (549.45 it/sec) -training >> step=1501200, episode=251 reward=0.763314 (524.70 it/sec) -training >> step=1501300, episode=251 reward=0.7604687 (522.07 it/sec) -training >> step=1501400, episode=251 reward=0.7747992 (559.70 it/sec) -training >> step=1501500, episode=251 reward=0.7366914 (523.75 it/sec) -training >> step=1501600, episode=251 reward=0.7593657 (530.39 it/sec) -training >> step=1501700, episode=251 reward=0.7615442 (494.25 it/sec) -training >> step=1501800, episode=251 reward=0.7631159 (549.47 it/sec) -training >> step=1501900, episode=251 reward=0.7631215 (553.04 it/sec) -training >> step=1502000, episode=251 reward=0.7516795 (526.43 it/sec) -training >> step=1502100, episode=251 reward=0.7687821 (556.07 it/sec) -training >> step=1502200, episode=251 reward=0.7645947 (481.38 it/sec) -training >> step=1502300, episode=251 reward=0.7616836 (533.24 it/sec) -training >> step=1502400, episode=251 reward=0.7636095 (524.37 it/sec) -training >> step=1502500, episode=251 reward=0.7652342 (512.41 it/sec) -training >> step=1502600, episode=251 reward=0.7388691 (539.59 it/sec) -training >> step=1502700, episode=251 reward=0.7555938 (552.76 it/sec) -training >> step=1502800, episode=251 reward=0.754508 (505.81 it/sec) -training >> step=1502900, episode=251 reward=0.7669239 (536.44 it/sec) -training >> step=1503000, episode=251 reward=0.7817404 (538.90 it/sec) -training >> step=1503100, episode=251 reward=0.7507922 (526.69 it/sec) -training >> step=1503200, episode=251 reward=0.7532954 (515.07 it/sec) -training >> step=1503300, episode=251 reward=0.7704867 (494.26 it/sec) -training >> step=1503400, episode=251 reward=0.7653915 (559.91 it/sec) -training >> step=1503500, episode=251 reward=0.7521858 (521.79 it/sec) -training >> step=1503600, episode=251 reward=0.7531902 (509.53 it/sec) -training >> step=1503700, episode=251 reward=0.7542595 (503.60 it/sec) -training >> step=1503800, episode=251 reward=0.7458687 (519.68 it/sec) -training >> step=1503900, episode=251 reward=0.7720159 (528.03 it/sec) -training >> step=1504000, episode=251 reward=0.7635304 (534.72 it/sec) -training >> step=1504100, episode=251 reward=0.7737091 (530.34 it/sec) -training >> step=1504200, episode=251 reward=0.7688858 (568.08 it/sec) -training >> step=1504300, episode=251 reward=0.7558294 (538.95 it/sec) -training >> step=1504400, episode=251 reward=0.7824714 (497.27 it/sec) -training >> step=1504500, episode=251 reward=0.7465723 (549.06 it/sec) -training >> step=1504600, episode=251 reward=0.7538989 (526.91 it/sec) -training >> step=1504700, episode=251 reward=0.7316501 (522.12 it/sec) -training >> step=1504800, episode=251 reward=0.7607365 (501.62 it/sec) -training >> step=1504900, episode=251 reward=0.7739859 (462.62 it/sec) -training >> step=1505000, episode=251 reward=0.7594105 (553.26 it/sec) -training >> step=1505100, episode=251 reward=0.7629856 (544.35 it/sec) -training >> step=1505200, episode=251 reward=0.7410809 (502.23 it/sec) -training >> step=1505300, episode=252 reward=0.7596033 (128.92 it/sec) -training >> step=1505400, episode=252 reward=0.7259701 (505.71 it/sec) -training >> step=1505500, episode=252 reward=0.7347714 (509.90 it/sec) -training >> step=1505600, episode=252 reward=0.7453249 (517.78 it/sec) -training >> step=1505700, episode=252 reward=0.7443821 (494.77 it/sec) -training >> step=1505800, episode=252 reward=0.743866 (575.08 it/sec) -training >> step=1505900, episode=252 reward=0.760933 (525.89 it/sec) -training >> step=1506000, episode=252 reward=0.7481443 (500.97 it/sec) -training >> step=1506100, episode=252 reward=0.7444404 (522.53 it/sec) -training >> step=1506200, episode=252 reward=0.7403479 (532.50 it/sec) -training >> step=1506300, episode=252 reward=0.7423074 (481.68 it/sec) -training >> step=1506400, episode=252 reward=0.7870001 (377.17 it/sec) -training >> step=1506500, episode=252 reward=0.7530912 (555.24 it/sec) -training >> step=1506600, episode=252 reward=0.7610545 (553.30 it/sec) -training >> step=1506700, episode=252 reward=0.7432644 (520.07 it/sec) -training >> step=1506800, episode=252 reward=0.7747766 (530.29 it/sec) -training >> step=1506900, episode=252 reward=0.7239378 (520.89 it/sec) -training >> step=1507000, episode=252 reward=0.7573382 (500.14 it/sec) -training >> step=1507100, episode=252 reward=0.7445279 (531.76 it/sec) -training >> step=1507200, episode=252 reward=0.7512557 (528.23 it/sec) -training >> step=1507300, episode=252 reward=0.7471443 (535.18 it/sec) -training >> step=1507400, episode=252 reward=0.7637722 (535.00 it/sec) -training >> step=1507500, episode=252 reward=0.7386382 (481.73 it/sec) -training >> step=1507600, episode=252 reward=0.7479383 (525.18 it/sec) -training >> step=1507700, episode=252 reward=0.7450252 (500.38 it/sec) -training >> step=1507800, episode=252 reward=0.7581804 (495.66 it/sec) -training >> step=1507900, episode=252 reward=0.7580883 (528.23 it/sec) -training >> step=1508000, episode=252 reward=0.7646961 (515.47 it/sec) -training >> step=1508100, episode=252 reward=0.7610278 (492.54 it/sec) -training >> step=1508200, episode=252 reward=0.7795935 (543.11 it/sec) -training >> step=1508300, episode=252 reward=0.7757745 (498.04 it/sec) -training >> step=1508400, episode=252 reward=0.7793103 (572.65 it/sec) -training >> step=1508500, episode=252 reward=0.7610259 (479.64 it/sec) -training >> step=1508600, episode=252 reward=0.7599705 (501.06 it/sec) -training >> step=1508700, episode=252 reward=0.735888 (539.97 it/sec) -training >> step=1508800, episode=252 reward=0.7572325 (493.68 it/sec) -training >> step=1508900, episode=252 reward=0.7675204 (526.05 it/sec) -training >> step=1509000, episode=252 reward=0.7467355 (480.86 it/sec) -training >> step=1509100, episode=252 reward=0.7611047 (448.41 it/sec) -training >> step=1509200, episode=252 reward=0.7716424 (502.32 it/sec) -training >> step=1509300, episode=252 reward=0.751212 (454.41 it/sec) -training >> step=1509400, episode=252 reward=0.7676135 (538.71 it/sec) -training >> step=1509500, episode=252 reward=0.7319698 (510.89 it/sec) -training >> step=1509600, episode=252 reward=0.7572142 (521.49 it/sec) -training >> step=1509700, episode=252 reward=0.7784658 (486.31 it/sec) -training >> step=1509800, episode=252 reward=0.7227485 (548.29 it/sec) -training >> step=1509900, episode=252 reward=0.7616837 (490.08 it/sec) -training >> step=1510000, episode=252 reward=0.7653127 (486.71 it/sec) -training >> step=1510100, episode=252 reward=0.7799542 (485.51 it/sec) -training >> step=1510200, episode=252 reward=0.7445213 (531.80 it/sec) -training >> step=1510300, episode=252 reward=0.7670246 (564.61 it/sec) -training >> step=1510400, episode=252 reward=0.7379163 (464.08 it/sec) -training >> step=1510500, episode=252 reward=0.7512659 (483.68 it/sec) -training >> step=1510600, episode=252 reward=0.7468091 (517.66 it/sec) -training >> step=1510700, episode=252 reward=0.7592533 (485.92 it/sec) -training >> step=1510800, episode=252 reward=0.7635334 (489.52 it/sec) -training >> step=1510900, episode=252 reward=0.7661095 (450.70 it/sec) -training >> step=1511000, episode=252 reward=0.7748935 (394.80 it/sec) -training >> step=1511100, episode=252 reward=0.7637269 (414.89 it/sec) -training >> step=1511200, episode=252 reward=0.7607237 (359.36 it/sec) -training >> step=1511300, episode=253 reward=0.7605273 (112.48 it/sec) -training >> step=1511400, episode=253 reward=0.7389244 (524.13 it/sec) -training >> step=1511500, episode=253 reward=0.7529951 (496.91 it/sec) -training >> step=1511600, episode=253 reward=0.7469436 (500.53 it/sec) -training >> step=1511700, episode=253 reward=0.7391547 (507.17 it/sec) -training >> step=1511800, episode=253 reward=0.7544093 (503.18 it/sec) -training >> step=1511900, episode=253 reward=0.7430598 (511.97 it/sec) -training >> step=1512000, episode=253 reward=0.7641432 (488.30 it/sec) -training >> step=1512100, episode=253 reward=0.7642012 (503.72 it/sec) -training >> step=1512200, episode=253 reward=0.7532502 (506.55 it/sec) -training >> step=1512300, episode=253 reward=0.762387 (534.81 it/sec) -training >> step=1512400, episode=253 reward=0.7480843 (539.79 it/sec) -training >> step=1512500, episode=253 reward=0.7514114 (519.20 it/sec) -training >> step=1512600, episode=253 reward=0.7502722 (392.46 it/sec) -training >> step=1512700, episode=253 reward=0.7813157 (573.89 it/sec) -training >> step=1512800, episode=253 reward=0.7717751 (529.14 it/sec) -training >> step=1512900, episode=253 reward=0.7748184 (522.07 it/sec) -training >> step=1513000, episode=253 reward=0.7693213 (523.59 it/sec) -training >> step=1513100, episode=253 reward=0.7905521 (515.81 it/sec) -training >> step=1513200, episode=253 reward=0.7697158 (511.15 it/sec) -training >> step=1513300, episode=253 reward=0.7882839 (500.55 it/sec) -training >> step=1513400, episode=253 reward=0.7637671 (501.82 it/sec) -training >> step=1513500, episode=253 reward=0.7618043 (498.64 it/sec) -training >> step=1513600, episode=253 reward=0.776742 (456.29 it/sec) -training >> step=1513700, episode=253 reward=0.7749908 (496.38 it/sec) -training >> step=1513800, episode=253 reward=0.751591 (522.14 it/sec) -training >> step=1513900, episode=253 reward=0.7748439 (481.65 it/sec) -training >> step=1514000, episode=253 reward=0.7577447 (454.25 it/sec) -training >> step=1514100, episode=253 reward=0.7852772 (372.82 it/sec) -training >> step=1514200, episode=253 reward=0.7474695 (393.48 it/sec) -training >> step=1514300, episode=253 reward=0.7722429 (362.62 it/sec) -training >> step=1514400, episode=253 reward=0.795176 (479.03 it/sec) -training >> step=1514500, episode=253 reward=0.7778445 (441.61 it/sec) -training >> step=1514600, episode=253 reward=0.782056 (517.37 it/sec) -training >> step=1514700, episode=253 reward=0.775354 (505.48 it/sec) -training >> step=1514800, episode=253 reward=0.7621011 (469.89 it/sec) -training >> step=1514900, episode=253 reward=0.7735041 (450.42 it/sec) -training >> step=1515000, episode=253 reward=0.7572455 (452.91 it/sec) -training >> step=1515100, episode=253 reward=0.7659563 (509.92 it/sec) -training >> step=1515200, episode=253 reward=0.7650536 (463.78 it/sec) -training >> step=1515300, episode=253 reward=0.7541946 (509.34 it/sec) -training >> step=1515400, episode=253 reward=0.7662616 (522.76 it/sec) -training >> step=1515500, episode=253 reward=0.764414 (473.54 it/sec) -training >> step=1515600, episode=253 reward=0.7744961 (543.94 it/sec) -training >> step=1515700, episode=253 reward=0.7420575 (538.14 it/sec) -training >> step=1515800, episode=253 reward=0.7644439 (557.31 it/sec) -training >> step=1515900, episode=253 reward=0.7691404 (534.35 it/sec) -training >> step=1516000, episode=253 reward=0.7659393 (505.78 it/sec) -training >> step=1516100, episode=253 reward=0.753375 (547.46 it/sec) -training >> step=1516200, episode=253 reward=0.745977 (516.11 it/sec) -training >> step=1516300, episode=253 reward=0.7521017 (422.02 it/sec) -training >> step=1516400, episode=253 reward=0.7618877 (506.48 it/sec) -training >> step=1516500, episode=253 reward=0.7343657 (466.51 it/sec) -training >> step=1516600, episode=253 reward=0.7449504 (529.93 it/sec) -training >> step=1516700, episode=253 reward=0.7533584 (521.52 it/sec) -training >> step=1516800, episode=253 reward=0.7750327 (546.29 it/sec) -training >> step=1516900, episode=253 reward=0.7529101 (483.00 it/sec) -training >> step=1517000, episode=253 reward=0.7280027 (515.10 it/sec) -training >> step=1517100, episode=253 reward=0.7539412 (485.10 it/sec) -training >> step=1517200, episode=253 reward=0.7272612 (548.45 it/sec) -training >> step=1517300, episode=254 reward=0.7792259 (180.80 it/sec) -training >> step=1517400, episode=254 reward=0.7360691 (488.16 it/sec) -training >> step=1517500, episode=254 reward=0.7470514 (523.76 it/sec) -training >> step=1517600, episode=254 reward=0.7404678 (526.78 it/sec) -training >> step=1517700, episode=254 reward=0.7560213 (524.78 it/sec) -training >> step=1517800, episode=254 reward=0.7394223 (540.74 it/sec) -training >> step=1517900, episode=254 reward=0.7620659 (466.66 it/sec) -training >> step=1518000, episode=254 reward=0.7541155 (532.57 it/sec) -training >> step=1518100, episode=254 reward=0.7585073 (537.52 it/sec) -training >> step=1518200, episode=254 reward=0.7586271 (499.56 it/sec) -training >> step=1518300, episode=254 reward=0.7672771 (526.68 it/sec) -training >> step=1518400, episode=254 reward=0.7467573 (508.31 it/sec) -training >> step=1518500, episode=254 reward=0.7576595 (474.13 it/sec) -training >> step=1518600, episode=254 reward=0.754857 (535.91 it/sec) -training >> step=1518700, episode=254 reward=0.749531 (547.18 it/sec) -training >> step=1518800, episode=254 reward=0.7831226 (380.08 it/sec) -training >> step=1518900, episode=254 reward=0.7535639 (540.41 it/sec) -training >> step=1519000, episode=254 reward=0.7720544 (475.03 it/sec) -training >> step=1519100, episode=254 reward=0.7511344 (525.81 it/sec) -training >> step=1519200, episode=254 reward=0.7783303 (526.83 it/sec) -training >> step=1519300, episode=254 reward=0.7822259 (510.32 it/sec) -training >> step=1519400, episode=254 reward=0.7451907 (506.25 it/sec) -training >> step=1519500, episode=254 reward=0.7506875 (444.80 it/sec) -training >> step=1519600, episode=254 reward=0.7374322 (532.14 it/sec) -training >> step=1519700, episode=254 reward=0.7740805 (546.22 it/sec) -training >> step=1519800, episode=254 reward=0.7525221 (544.32 it/sec) -training >> step=1519900, episode=254 reward=0.7606508 (547.32 it/sec) -training >> step=1520000, episode=254 reward=0.7679126 (504.45 it/sec) -training >> step=1520100, episode=254 reward=0.7447006 (530.44 it/sec) -training >> step=1520200, episode=254 reward=0.7607882 (558.74 it/sec) -training >> step=1520300, episode=254 reward=0.7710117 (522.54 it/sec) -training >> step=1520400, episode=254 reward=0.7597823 (528.21 it/sec) -training >> step=1520500, episode=254 reward=0.7616307 (550.83 it/sec) -training >> step=1520600, episode=254 reward=0.7506117 (498.94 it/sec) -training >> step=1520700, episode=254 reward=0.7580791 (540.04 it/sec) -training >> step=1520800, episode=254 reward=0.7687401 (494.70 it/sec) -training >> step=1520900, episode=254 reward=0.7657 (569.83 it/sec) -training >> step=1521000, episode=254 reward=0.7535789 (535.44 it/sec) -training >> step=1521100, episode=254 reward=0.7755261 (496.49 it/sec) -training >> step=1521200, episode=254 reward=0.7521052 (522.43 it/sec) -training >> step=1521300, episode=254 reward=0.7457717 (555.32 it/sec) -training >> step=1521400, episode=254 reward=0.7691317 (525.38 it/sec) -training >> step=1521500, episode=254 reward=0.7483893 (535.54 it/sec) -training >> step=1521600, episode=254 reward=0.7663595 (552.65 it/sec) -training >> step=1521700, episode=254 reward=0.7730305 (515.62 it/sec) -training >> step=1521800, episode=254 reward=0.7695836 (550.18 it/sec) -training >> step=1521900, episode=254 reward=0.7521305 (493.81 it/sec) -training >> step=1522000, episode=254 reward=0.7709708 (499.43 it/sec) -training >> step=1522100, episode=254 reward=0.7661748 (535.13 it/sec) -training >> step=1522200, episode=254 reward=0.7896467 (502.74 it/sec) -training >> step=1522300, episode=254 reward=0.7555552 (542.06 it/sec) -training >> step=1522400, episode=254 reward=0.7581317 (550.34 it/sec) -training >> step=1522500, episode=254 reward=0.7496505 (522.20 it/sec) -training >> step=1522600, episode=254 reward=0.7834226 (541.86 it/sec) -training >> step=1522700, episode=254 reward=0.7550634 (528.65 it/sec) -training >> step=1522800, episode=254 reward=0.7510378 (474.92 it/sec) -training >> step=1522900, episode=254 reward=0.7564817 (534.69 it/sec) -training >> step=1523000, episode=254 reward=0.7424573 (529.46 it/sec) -training >> step=1523100, episode=254 reward=0.7499801 (500.53 it/sec) -training >> step=1523200, episode=254 reward=0.7478138 (530.61 it/sec) -training >> step=1523300, episode=255 reward=0.7350441 (224.85 it/sec) -training >> step=1523400, episode=255 reward=0.7430078 (502.21 it/sec) -training >> step=1523500, episode=255 reward=0.762736 (508.81 it/sec) -training >> step=1523600, episode=255 reward=0.7501028 (532.63 it/sec) -training >> step=1523700, episode=255 reward=0.7479405 (487.04 it/sec) -training >> step=1523800, episode=255 reward=0.7842247 (525.34 it/sec) -training >> step=1523900, episode=255 reward=0.7547505 (525.09 it/sec) -training >> step=1524000, episode=255 reward=0.7598535 (485.39 it/sec) -training >> step=1524100, episode=255 reward=0.7684448 (543.89 it/sec) -training >> step=1524200, episode=255 reward=0.7473963 (509.33 it/sec) -training >> step=1524300, episode=255 reward=0.7731275 (532.88 it/sec) -training >> step=1524400, episode=255 reward=0.7614117 (550.50 it/sec) -training >> step=1524500, episode=255 reward=0.7744619 (498.55 it/sec) -training >> step=1524600, episode=255 reward=0.7603224 (485.59 it/sec) -training >> step=1524700, episode=255 reward=0.7758865 (505.00 it/sec) -training >> step=1524800, episode=255 reward=0.7812094 (543.41 it/sec) -training >> step=1524900, episode=255 reward=0.762455 (554.70 it/sec) -training >> step=1525000, episode=255 reward=0.7400219 (505.37 it/sec) -training >> step=1525100, episode=255 reward=0.7571178 (383.93 it/sec) -training >> step=1525200, episode=255 reward=0.7547361 (546.37 it/sec) -training >> step=1525300, episode=255 reward=0.7611299 (488.72 it/sec) -training >> step=1525400, episode=255 reward=0.7717015 (506.53 it/sec) -training >> step=1525500, episode=255 reward=0.7643583 (515.42 it/sec) -training >> step=1525600, episode=255 reward=0.7544554 (566.31 it/sec) -training >> step=1525700, episode=255 reward=0.7792189 (529.04 it/sec) -training >> step=1525800, episode=255 reward=0.7812965 (501.83 it/sec) -training >> step=1525900, episode=255 reward=0.7603332 (529.25 it/sec) -training >> step=1526000, episode=255 reward=0.7763878 (520.94 it/sec) -training >> step=1526100, episode=255 reward=0.743988 (542.49 it/sec) -training >> step=1526200, episode=255 reward=0.7478611 (521.97 it/sec) -training >> step=1526300, episode=255 reward=0.7724259 (517.76 it/sec) -training >> step=1526400, episode=255 reward=0.7702615 (502.22 it/sec) -training >> step=1526500, episode=255 reward=0.7713253 (502.22 it/sec) -training >> step=1526600, episode=255 reward=0.7607572 (499.06 it/sec) -training >> step=1526700, episode=255 reward=0.7478849 (573.81 it/sec) -training >> step=1526800, episode=255 reward=0.7631291 (487.65 it/sec) -training >> step=1526900, episode=255 reward=0.7736591 (527.42 it/sec) -training >> step=1527000, episode=255 reward=0.7530978 (560.09 it/sec) -training >> step=1527100, episode=255 reward=0.7818432 (488.69 it/sec) -training >> step=1527200, episode=255 reward=0.7759595 (520.76 it/sec) -training >> step=1527300, episode=255 reward=0.7687734 (526.40 it/sec) -training >> step=1527400, episode=255 reward=0.7767239 (505.59 it/sec) -training >> step=1527500, episode=255 reward=0.7692425 (543.07 it/sec) -training >> step=1527600, episode=255 reward=0.7563274 (490.10 it/sec) -training >> step=1527700, episode=255 reward=0.7645724 (537.39 it/sec) -training >> step=1527800, episode=255 reward=0.747391 (556.88 it/sec) -training >> step=1527900, episode=255 reward=0.7268131 (518.63 it/sec) -training >> step=1528000, episode=255 reward=0.7331489 (522.80 it/sec) -training >> step=1528100, episode=255 reward=0.7733477 (493.16 it/sec) -training >> step=1528200, episode=255 reward=0.7459414 (522.06 it/sec) -training >> step=1528300, episode=255 reward=0.7587301 (516.93 it/sec) -training >> step=1528400, episode=255 reward=0.7483578 (520.78 it/sec) -training >> step=1528500, episode=255 reward=0.7597581 (515.64 it/sec) -training >> step=1528600, episode=255 reward=0.757247 (563.21 it/sec) -training >> step=1528700, episode=255 reward=0.7766486 (528.01 it/sec) -training >> step=1528800, episode=255 reward=0.764901 (534.10 it/sec) -training >> step=1528900, episode=255 reward=0.7528358 (548.33 it/sec) -training >> step=1529000, episode=255 reward=0.7699159 (544.02 it/sec) -training >> step=1529100, episode=255 reward=0.7690437 (476.55 it/sec) -training >> step=1529200, episode=255 reward=0.7407935 (485.73 it/sec) -training >> step=1529300, episode=256 reward=0.7440645 (229.19 it/sec) -training >> step=1529400, episode=256 reward=0.7413766 (541.10 it/sec) -training >> step=1529500, episode=256 reward=0.7317353 (485.11 it/sec) -training >> step=1529600, episode=256 reward=0.7665087 (466.25 it/sec) -training >> step=1529700, episode=256 reward=0.7482486 (493.21 it/sec) -training >> step=1529800, episode=256 reward=0.7578476 (524.79 it/sec) -training >> step=1529900, episode=256 reward=0.7631303 (558.18 it/sec) -training >> step=1530000, episode=256 reward=0.7565601 (500.74 it/sec) -training >> step=1530100, episode=256 reward=0.7627289 (492.18 it/sec) -training >> step=1530200, episode=256 reward=0.7701104 (543.17 it/sec) -training >> step=1530300, episode=256 reward=0.7688141 (540.17 it/sec) -training >> step=1530400, episode=256 reward=0.7631975 (527.64 it/sec) -training >> step=1530500, episode=256 reward=0.7689326 (513.15 it/sec) -training >> step=1530600, episode=256 reward=0.7793007 (520.32 it/sec) -training >> step=1530700, episode=256 reward=0.7781734 (510.65 it/sec) -training >> step=1530800, episode=256 reward=0.7630632 (536.43 it/sec) -training >> step=1530900, episode=256 reward=0.750483 (520.94 it/sec) -training >> step=1531000, episode=256 reward=0.7514675 (577.89 it/sec) -training >> step=1531100, episode=256 reward=0.7624843 (522.28 it/sec) -training >> step=1531200, episode=256 reward=0.744024 (518.05 it/sec) -training >> step=1531300, episode=256 reward=0.7766147 (396.52 it/sec) -training >> step=1531400, episode=256 reward=0.7645006 (489.88 it/sec) -training >> step=1531500, episode=256 reward=0.7634371 (552.81 it/sec) -training >> step=1531600, episode=256 reward=0.7704667 (471.17 it/sec) -training >> step=1531700, episode=256 reward=0.7955 (520.05 it/sec) -training >> step=1531800, episode=256 reward=0.7598186 (534.95 it/sec) -training >> step=1531900, episode=256 reward=0.7671862 (440.39 it/sec) -training >> step=1532000, episode=256 reward=0.7501484 (571.86 it/sec) -training >> step=1532100, episode=256 reward=0.7532588 (498.68 it/sec) -training >> step=1532200, episode=256 reward=0.7437523 (520.20 it/sec) -training >> step=1532300, episode=256 reward=0.760648 (523.93 it/sec) -training >> step=1532400, episode=256 reward=0.7756387 (553.85 it/sec) -training >> step=1532500, episode=256 reward=0.7457825 (513.83 it/sec) -training >> step=1532600, episode=256 reward=0.7452801 (512.35 it/sec) -training >> step=1532700, episode=256 reward=0.7614555 (535.92 it/sec) -training >> step=1532800, episode=256 reward=0.7442651 (577.69 it/sec) -training >> step=1532900, episode=256 reward=0.7664517 (528.20 it/sec) -training >> step=1533000, episode=256 reward=0.7843168 (526.16 it/sec) -training >> step=1533100, episode=256 reward=0.7106591 (522.28 it/sec) -training >> step=1533200, episode=256 reward=0.7670767 (500.75 it/sec) -training >> step=1533300, episode=256 reward=0.77408 (515.91 it/sec) -training >> step=1533400, episode=256 reward=0.7650778 (519.05 it/sec) -training >> step=1533500, episode=256 reward=0.7651311 (528.20 it/sec) -training >> step=1533600, episode=256 reward=0.7692419 (595.25 it/sec) -training >> step=1533700, episode=256 reward=0.7560325 (483.65 it/sec) -training >> step=1533800, episode=256 reward=0.7506717 (543.69 it/sec) -training >> step=1533900, episode=256 reward=0.7432046 (537.45 it/sec) -training >> step=1534000, episode=256 reward=0.7404628 (547.78 it/sec) -training >> step=1534100, episode=256 reward=0.7453347 (521.71 it/sec) -training >> step=1534200, episode=256 reward=0.7522545 (511.01 it/sec) -training >> step=1534300, episode=256 reward=0.7495408 (556.60 it/sec) -training >> step=1534400, episode=256 reward=0.7499038 (526.53 it/sec) -training >> step=1534500, episode=256 reward=0.7683366 (531.79 it/sec) -training >> step=1534600, episode=256 reward=0.7452264 (522.95 it/sec) -training >> step=1534700, episode=256 reward=0.7564598 (536.93 it/sec) -training >> step=1534800, episode=256 reward=0.7706382 (455.68 it/sec) -training >> step=1534900, episode=256 reward=0.7312061 (464.06 it/sec) -training >> step=1535000, episode=256 reward=0.7754201 (518.75 it/sec) -training >> step=1535100, episode=256 reward=0.7428553 (529.59 it/sec) -training >> step=1535200, episode=256 reward=0.7422729 (518.09 it/sec) -training >> step=1535300, episode=257 reward=0.7620716 (198.33 it/sec) -training >> step=1535400, episode=257 reward=0.7427849 (527.54 it/sec) -training >> step=1535500, episode=257 reward=0.7406279 (524.75 it/sec) -training >> step=1535600, episode=257 reward=0.737833 (518.04 it/sec) -training >> step=1535700, episode=257 reward=0.7490042 (461.82 it/sec) -training >> step=1535800, episode=257 reward=0.7623453 (501.46 it/sec) -training >> step=1535900, episode=257 reward=0.7607989 (540.18 it/sec) -training >> step=1536000, episode=257 reward=0.7359538 (519.35 it/sec) -training >> step=1536100, episode=257 reward=0.7670051 (518.69 it/sec) -training >> step=1536200, episode=257 reward=0.7441272 (492.39 it/sec) -training >> step=1536300, episode=257 reward=0.7509229 (528.87 it/sec) -training >> step=1536400, episode=257 reward=0.7331319 (533.88 it/sec) -training >> step=1536500, episode=257 reward=0.7540342 (505.81 it/sec) -training >> step=1536600, episode=257 reward=0.7566358 (540.83 it/sec) -training >> step=1536700, episode=257 reward=0.767187 (499.18 it/sec) -training >> step=1536800, episode=257 reward=0.7695117 (493.12 it/sec) -training >> step=1536900, episode=257 reward=0.7525319 (529.20 it/sec) -training >> step=1537000, episode=257 reward=0.7549818 (557.15 it/sec) -training >> step=1537100, episode=257 reward=0.7556904 (523.05 it/sec) -training >> step=1537200, episode=257 reward=0.7553244 (528.74 it/sec) -training >> step=1537300, episode=257 reward=0.7586135 (462.56 it/sec) -training >> step=1537400, episode=257 reward=0.7791885 (378.29 it/sec) -training >> step=1537500, episode=257 reward=0.7862356 (518.73 it/sec) -training >> step=1537600, episode=257 reward=0.7613991 (505.81 it/sec) -training >> step=1537700, episode=257 reward=0.7396227 (548.93 it/sec) -training >> step=1537800, episode=257 reward=0.7559254 (527.73 it/sec) -training >> step=1537900, episode=257 reward=0.7683707 (503.66 it/sec) -training >> step=1538000, episode=257 reward=0.7824507 (525.52 it/sec) -training >> step=1538100, episode=257 reward=0.7578133 (527.27 it/sec) -training >> step=1538200, episode=257 reward=0.7773307 (521.41 it/sec) -training >> step=1538300, episode=257 reward=0.7489431 (526.66 it/sec) -training >> step=1538400, episode=257 reward=0.7606393 (512.51 it/sec) -training >> step=1538500, episode=257 reward=0.7541695 (583.19 it/sec) -training >> step=1538600, episode=257 reward=0.75668 (509.77 it/sec) -training >> step=1538700, episode=257 reward=0.7527974 (499.60 it/sec) -training >> step=1538800, episode=257 reward=0.7454421 (528.86 it/sec) -training >> step=1538900, episode=257 reward=0.7854695 (523.39 it/sec) -training >> step=1539000, episode=257 reward=0.7572753 (509.06 it/sec) -training >> step=1539100, episode=257 reward=0.7593863 (511.44 it/sec) -training >> step=1539200, episode=257 reward=0.7636117 (499.06 it/sec) -training >> step=1539300, episode=257 reward=0.7690361 (548.09 it/sec) -training >> step=1539400, episode=257 reward=0.7642432 (510.11 it/sec) -training >> step=1539500, episode=257 reward=0.7780455 (535.27 it/sec) -training >> step=1539600, episode=257 reward=0.7648942 (522.79 it/sec) -training >> step=1539700, episode=257 reward=0.7686934 (551.09 it/sec) -training >> step=1539800, episode=257 reward=0.7486548 (488.08 it/sec) -training >> step=1539900, episode=257 reward=0.7648937 (475.06 it/sec) -training >> step=1540000, episode=257 reward=0.766783 (500.64 it/sec) -training >> step=1540100, episode=257 reward=0.7402131 (534.89 it/sec) -training >> step=1540200, episode=257 reward=0.7706077 (511.54 it/sec) -training >> step=1540300, episode=257 reward=0.7658912 (512.96 it/sec) -training >> step=1540400, episode=257 reward=0.7500532 (490.83 it/sec) -training >> step=1540500, episode=257 reward=0.7352806 (504.85 it/sec) -training >> step=1540600, episode=257 reward=0.7657046 (550.38 it/sec) -training >> step=1540700, episode=257 reward=0.7333045 (517.41 it/sec) -training >> step=1540800, episode=257 reward=0.7420558 (520.61 it/sec) -training >> step=1540900, episode=257 reward=0.7567654 (532.87 it/sec) -training >> step=1541000, episode=257 reward=0.7379122 (474.20 it/sec) -training >> step=1541100, episode=257 reward=0.764816 (537.62 it/sec) -training >> step=1541200, episode=257 reward=0.7565678 (502.84 it/sec) -training >> step=1541300, episode=258 reward=0.7528093 (199.01 it/sec) -training >> step=1541400, episode=258 reward=0.7610148 (524.77 it/sec) -training >> step=1541500, episode=258 reward=0.7592466 (533.59 it/sec) -training >> step=1541600, episode=258 reward=0.7629173 (550.32 it/sec) -training >> step=1541700, episode=258 reward=0.7454029 (520.70 it/sec) -training >> step=1541800, episode=258 reward=0.768879 (507.92 it/sec) -training >> step=1541900, episode=258 reward=0.7791505 (524.18 it/sec) -training >> step=1542000, episode=258 reward=0.770246 (515.01 it/sec) -training >> step=1542100, episode=258 reward=0.7679402 (509.11 it/sec) -training >> step=1542200, episode=258 reward=0.7664649 (478.19 it/sec) -training >> step=1542300, episode=258 reward=0.749743 (541.01 it/sec) -training >> step=1542400, episode=258 reward=0.7591839 (517.12 it/sec) -training >> step=1542500, episode=258 reward=0.7542133 (536.65 it/sec) -training >> step=1542600, episode=258 reward=0.7613405 (498.03 it/sec) -training >> step=1542700, episode=258 reward=0.753208 (505.60 it/sec) -training >> step=1542800, episode=258 reward=0.741205 (504.46 it/sec) -training >> step=1542900, episode=258 reward=0.7756004 (506.47 it/sec) -training >> step=1543000, episode=258 reward=0.7754616 (514.59 it/sec) -training >> step=1543100, episode=258 reward=0.747481 (542.67 it/sec) -training >> step=1543200, episode=258 reward=0.7606714 (503.40 it/sec) -training >> step=1543300, episode=258 reward=0.7695428 (536.21 it/sec) -training >> step=1543400, episode=258 reward=0.7858487 (499.91 it/sec) -training >> step=1543500, episode=258 reward=0.7480108 (388.73 it/sec) -training >> step=1543600, episode=258 reward=0.7400318 (547.06 it/sec) -training >> step=1543700, episode=258 reward=0.7649227 (498.28 it/sec) -training >> step=1543800, episode=258 reward=0.7348962 (537.04 it/sec) -training >> step=1543900, episode=258 reward=0.7418258 (541.47 it/sec) -training >> step=1544000, episode=258 reward=0.7622919 (500.64 it/sec) -training >> step=1544100, episode=258 reward=0.7616267 (525.97 it/sec) -training >> step=1544200, episode=258 reward=0.7385879 (498.51 it/sec) -training >> step=1544300, episode=258 reward=0.7436151 (535.13 it/sec) -training >> step=1544400, episode=258 reward=0.7646681 (522.01 it/sec) -training >> step=1544500, episode=258 reward=0.739689 (483.65 it/sec) -training >> step=1544600, episode=258 reward=0.78281 (543.99 it/sec) -training >> step=1544700, episode=258 reward=0.7790321 (538.91 it/sec) -training >> step=1544800, episode=258 reward=0.7710633 (455.34 it/sec) -training >> step=1544900, episode=258 reward=0.7540136 (514.59 it/sec) -training >> step=1545000, episode=258 reward=0.775791 (507.04 it/sec) -training >> step=1545100, episode=258 reward=0.7567865 (560.94 it/sec) -training >> step=1545200, episode=258 reward=0.7657729 (483.70 it/sec) -training >> step=1545300, episode=258 reward=0.7629892 (499.13 it/sec) -training >> step=1545400, episode=258 reward=0.7619036 (561.14 it/sec) -training >> step=1545500, episode=258 reward=0.7410533 (551.19 it/sec) -training >> step=1545600, episode=258 reward=0.7614393 (510.23 it/sec) -training >> step=1545700, episode=258 reward=0.7539773 (495.22 it/sec) -training >> step=1545800, episode=258 reward=0.7825448 (471.91 it/sec) -training >> step=1545900, episode=258 reward=0.769465 (518.61 it/sec) -training >> step=1546000, episode=258 reward=0.7552599 (490.10 it/sec) -training >> step=1546100, episode=258 reward=0.7532071 (508.85 it/sec) -training >> step=1546200, episode=258 reward=0.734064 (510.87 it/sec) -training >> step=1546300, episode=258 reward=0.788259 (524.10 it/sec) -training >> step=1546400, episode=258 reward=0.7743831 (501.18 it/sec) -training >> step=1546500, episode=258 reward=0.772002 (510.11 it/sec) -training >> step=1546600, episode=258 reward=0.7517146 (512.90 it/sec) -training >> step=1546700, episode=258 reward=0.7538905 (527.71 it/sec) -training >> step=1546800, episode=258 reward=0.7629929 (478.30 it/sec) -training >> step=1546900, episode=258 reward=0.7471646 (511.73 it/sec) -training >> step=1547000, episode=258 reward=0.746287 (532.29 it/sec) -training >> step=1547100, episode=258 reward=0.7625204 (521.53 it/sec) -training >> step=1547200, episode=258 reward=0.7412354 (520.41 it/sec) -training >> step=1547300, episode=259 reward=0.7664376 (197.19 it/sec) -training >> step=1547400, episode=259 reward=0.7425216 (536.54 it/sec) -training >> step=1547500, episode=259 reward=0.7628286 (510.16 it/sec) -training >> step=1547600, episode=259 reward=0.7679757 (544.00 it/sec) -training >> step=1547700, episode=259 reward=0.7501523 (510.78 it/sec) -training >> step=1547800, episode=259 reward=0.7348226 (520.98 it/sec) -training >> step=1547900, episode=259 reward=0.7421931 (562.16 it/sec) -training >> step=1548000, episode=259 reward=0.7644311 (512.63 it/sec) -training >> step=1548100, episode=259 reward=0.7490379 (527.99 it/sec) -training >> step=1548200, episode=259 reward=0.7478723 (507.10 it/sec) -training >> step=1548300, episode=259 reward=0.7581531 (545.19 it/sec) -training >> step=1548400, episode=259 reward=0.779313 (531.85 it/sec) -training >> step=1548500, episode=259 reward=0.7620448 (539.49 it/sec) -training >> step=1548600, episode=259 reward=0.780855 (523.62 it/sec) -training >> step=1548700, episode=259 reward=0.7790679 (548.96 it/sec) -training >> step=1548800, episode=259 reward=0.752641 (502.92 it/sec) -training >> step=1548900, episode=259 reward=0.7687137 (501.71 it/sec) -training >> step=1549000, episode=259 reward=0.7377351 (531.76 it/sec) -training >> step=1549100, episode=259 reward=0.7561426 (522.80 it/sec) -training >> step=1549200, episode=259 reward=0.7889867 (515.06 it/sec) -training >> step=1549300, episode=259 reward=0.769695 (472.47 it/sec) -training >> step=1549400, episode=259 reward=0.7641695 (536.94 it/sec) -training >> step=1549500, episode=259 reward=0.765398 (506.25 it/sec) -training >> step=1549600, episode=259 reward=0.742802 (523.01 it/sec) -training >> step=1549700, episode=259 reward=0.7669323 (522.95 it/sec) -training >> step=1549800, episode=259 reward=0.7662951 (356.46 it/sec) -training >> step=1549900, episode=259 reward=0.7688686 (480.20 it/sec) -training >> step=1550000, episode=259 reward=0.7603075 (517.59 it/sec) -training >> step=1550100, episode=259 reward=0.7594646 (505.36 it/sec) -training >> step=1550200, episode=259 reward=0.750963 (559.10 it/sec) -training >> step=1550300, episode=259 reward=0.7534142 (505.93 it/sec) -training >> step=1550400, episode=259 reward=0.7374803 (529.76 it/sec) -training >> step=1550500, episode=259 reward=0.7738145 (522.11 it/sec) -training >> step=1550600, episode=259 reward=0.7592898 (560.36 it/sec) -training >> step=1550700, episode=259 reward=0.7586242 (523.40 it/sec) -training >> step=1550800, episode=259 reward=0.7847394 (529.20 it/sec) -training >> step=1550900, episode=259 reward=0.7536924 (528.07 it/sec) -training >> step=1551000, episode=259 reward=0.7645115 (516.45 it/sec) -training >> step=1551100, episode=259 reward=0.7475245 (525.18 it/sec) -training >> step=1551200, episode=259 reward=0.7701351 (511.27 it/sec) -training >> step=1551300, episode=259 reward=0.7541994 (531.04 it/sec) -training >> step=1551400, episode=259 reward=0.7673126 (532.80 it/sec) -training >> step=1551500, episode=259 reward=0.777306 (507.40 it/sec) -training >> step=1551600, episode=259 reward=0.7624893 (532.94 it/sec) -training >> step=1551700, episode=259 reward=0.778986 (499.42 it/sec) -training >> step=1551800, episode=259 reward=0.7875661 (548.06 it/sec) -training >> step=1551900, episode=259 reward=0.755796 (507.34 it/sec) -training >> step=1552000, episode=259 reward=0.7628924 (494.67 it/sec) -training >> step=1552100, episode=259 reward=0.7601978 (475.89 it/sec) -training >> step=1552200, episode=259 reward=0.7724732 (547.93 it/sec) -training >> step=1552300, episode=259 reward=0.7359452 (504.85 it/sec) -training >> step=1552400, episode=259 reward=0.7579535 (509.01 it/sec) -training >> step=1552500, episode=259 reward=0.7536855 (518.66 it/sec) -training >> step=1552600, episode=259 reward=0.7584431 (544.31 it/sec) -training >> step=1552700, episode=259 reward=0.7341228 (493.36 it/sec) -training >> step=1552800, episode=259 reward=0.7799405 (446.41 it/sec) -training >> step=1552900, episode=259 reward=0.7686878 (494.76 it/sec) -training >> step=1553000, episode=259 reward=0.7512777 (520.41 it/sec) -training >> step=1553100, episode=259 reward=0.7399613 (557.68 it/sec) -training >> step=1553200, episode=259 reward=0.7490655 (537.69 it/sec) -training >> step=1553300, episode=260 reward=0.7630118 (190.57 it/sec) -training >> step=1553400, episode=260 reward=0.7514548 (475.61 it/sec) -training >> step=1553500, episode=260 reward=0.7385357 (501.43 it/sec) -training >> step=1553600, episode=260 reward=0.7559438 (535.44 it/sec) -training >> step=1553700, episode=260 reward=0.7603301 (493.96 it/sec) -training >> step=1553800, episode=260 reward=0.7445721 (502.73 it/sec) -training >> step=1553900, episode=260 reward=0.7685907 (489.64 it/sec) -training >> step=1554000, episode=260 reward=0.7788855 (531.99 it/sec) -training >> step=1554100, episode=260 reward=0.7691522 (494.93 it/sec) -training >> step=1554200, episode=260 reward=0.7436402 (486.19 it/sec) -training >> step=1554300, episode=260 reward=0.7667106 (510.14 it/sec) -training >> step=1554400, episode=260 reward=0.7701395 (485.60 it/sec) -training >> step=1554500, episode=260 reward=0.7635208 (517.68 it/sec) -training >> step=1554600, episode=260 reward=0.7499871 (495.60 it/sec) -training >> step=1554700, episode=260 reward=0.7652873 (484.76 it/sec) -training >> step=1554800, episode=260 reward=0.74948 (488.99 it/sec) -training >> step=1554900, episode=260 reward=0.7672479 (502.82 it/sec) -training >> step=1555000, episode=260 reward=0.7667288 (543.32 it/sec) -training >> step=1555100, episode=260 reward=0.782658 (536.67 it/sec) -training >> step=1555200, episode=260 reward=0.7530218 (502.89 it/sec) -training >> step=1555300, episode=260 reward=0.7745035 (533.01 it/sec) -training >> step=1555400, episode=260 reward=0.7642447 (515.58 it/sec) -training >> step=1555500, episode=260 reward=0.7459691 (540.13 it/sec) -training >> step=1555600, episode=260 reward=0.7673684 (544.01 it/sec) -training >> step=1555700, episode=260 reward=0.7517056 (512.30 it/sec) -training >> step=1555800, episode=260 reward=0.7624105 (484.33 it/sec) -training >> step=1555900, episode=260 reward=0.7634775 (536.03 it/sec) -training >> step=1556000, episode=260 reward=0.7731044 (548.68 it/sec) -training >> step=1556100, episode=260 reward=0.7433849 (373.75 it/sec) -training >> step=1556200, episode=260 reward=0.7375255 (540.73 it/sec) -training >> step=1556300, episode=260 reward=0.7504271 (474.09 it/sec) -training >> step=1556400, episode=260 reward=0.7510653 (497.72 it/sec) -training >> step=1556500, episode=260 reward=0.7281745 (501.72 it/sec) -training >> step=1556600, episode=260 reward=0.727685 (531.06 it/sec) -training >> step=1556700, episode=260 reward=0.7788437 (523.97 it/sec) -training >> step=1556800, episode=260 reward=0.739602 (504.42 it/sec) -training >> step=1556900, episode=260 reward=0.7483551 (520.95 it/sec) -training >> step=1557000, episode=260 reward=0.7726969 (522.52 it/sec) -training >> step=1557100, episode=260 reward=0.7916188 (521.69 it/sec) -training >> step=1557200, episode=260 reward=0.7631502 (546.54 it/sec) -training >> step=1557300, episode=260 reward=0.7430043 (463.74 it/sec) -training >> step=1557400, episode=260 reward=0.768869 (527.66 it/sec) -training >> step=1557500, episode=260 reward=0.7625788 (530.41 it/sec) -training >> step=1557600, episode=260 reward=0.7541823 (494.50 it/sec) -training >> step=1557700, episode=260 reward=0.750098 (543.08 it/sec) -training >> step=1557800, episode=260 reward=0.7667222 (489.08 it/sec) -training >> step=1557900, episode=260 reward=0.7633664 (546.85 it/sec) -training >> step=1558000, episode=260 reward=0.7901662 (525.35 it/sec) -training >> step=1558100, episode=260 reward=0.7628665 (528.13 it/sec) -training >> step=1558200, episode=260 reward=0.7613171 (542.03 it/sec) -training >> step=1558300, episode=260 reward=0.7840378 (538.76 it/sec) -training >> step=1558400, episode=260 reward=0.7715048 (487.54 it/sec) -training >> step=1558500, episode=260 reward=0.7404368 (524.72 it/sec) -training >> step=1558600, episode=260 reward=0.7275573 (576.56 it/sec) -training >> step=1558700, episode=260 reward=0.7595729 (510.90 it/sec) -training >> step=1558800, episode=260 reward=0.7438231 (533.45 it/sec) -training >> step=1558900, episode=260 reward=0.7546333 (415.56 it/sec) -training >> step=1559000, episode=260 reward=0.7387816 (544.40 it/sec) -training >> step=1559100, episode=260 reward=0.763841 (527.44 it/sec) -training >> step=1559200, episode=260 reward=0.7500225 (490.05 it/sec) -training >> step=1559300, episode=261 reward=0.7673577 (189.79 it/sec) -training >> step=1559400, episode=261 reward=0.7772487 (494.62 it/sec) -training >> step=1559500, episode=261 reward=0.7559335 (501.00 it/sec) -training >> step=1559600, episode=261 reward=0.7505763 (532.26 it/sec) -training >> step=1559700, episode=261 reward=0.7505711 (508.20 it/sec) -training >> step=1559800, episode=261 reward=0.7511473 (470.27 it/sec) -training >> step=1559900, episode=261 reward=0.761945 (509.85 it/sec) -training >> step=1560000, episode=261 reward=0.7538235 (502.61 it/sec) -training >> step=1560100, episode=261 reward=0.7405106 (505.45 it/sec) -training >> step=1560200, episode=261 reward=0.7581718 (530.94 it/sec) -training >> step=1560300, episode=261 reward=0.7650294 (575.81 it/sec) -training >> step=1560400, episode=261 reward=0.7766223 (525.50 it/sec) -training >> step=1560500, episode=261 reward=0.7392756 (554.91 it/sec) -training >> step=1560600, episode=261 reward=0.7495576 (528.98 it/sec) -training >> step=1560700, episode=261 reward=0.7572121 (498.91 it/sec) -training >> step=1560800, episode=261 reward=0.7516631 (539.92 it/sec) -training >> step=1560900, episode=261 reward=0.7518672 (476.46 it/sec) -training >> step=1561000, episode=261 reward=0.7645779 (536.88 it/sec) -training >> step=1561100, episode=261 reward=0.7596852 (532.75 it/sec) -training >> step=1561200, episode=261 reward=0.7678998 (531.04 it/sec) -training >> step=1561300, episode=261 reward=0.7602509 (505.54 it/sec) -training >> step=1561400, episode=261 reward=0.773947 (528.72 it/sec) -training >> step=1561500, episode=261 reward=0.774017 (520.91 it/sec) -training >> step=1561600, episode=261 reward=0.7566454 (543.31 it/sec) -training >> step=1561700, episode=261 reward=0.7644787 (515.56 it/sec) -training >> step=1561800, episode=261 reward=0.7530842 (529.86 it/sec) -training >> step=1561900, episode=261 reward=0.7659298 (553.28 it/sec) -training >> step=1562000, episode=261 reward=0.7658231 (488.45 it/sec) -training >> step=1562100, episode=261 reward=0.7794392 (529.70 it/sec) -training >> step=1562200, episode=261 reward=0.746376 (556.25 it/sec) -training >> step=1562300, episode=261 reward=0.7708864 (431.08 it/sec) -training >> step=1562400, episode=261 reward=0.7282982 (522.09 it/sec) -training >> step=1562500, episode=261 reward=0.7454512 (520.56 it/sec) -training >> step=1562600, episode=261 reward=0.776237 (529.82 it/sec) -training >> step=1562700, episode=261 reward=0.748702 (534.82 it/sec) -training >> step=1562800, episode=261 reward=0.7584469 (529.29 it/sec) -training >> step=1562900, episode=261 reward=0.7570307 (547.06 it/sec) -training >> step=1563000, episode=261 reward=0.7541819 (510.18 it/sec) -training >> step=1563100, episode=261 reward=0.7542683 (528.49 it/sec) -training >> step=1563200, episode=261 reward=0.759531 (490.32 it/sec) -training >> step=1563300, episode=261 reward=0.7819631 (483.44 it/sec) -training >> step=1563400, episode=261 reward=0.751858 (567.97 it/sec) -training >> step=1563500, episode=261 reward=0.7466126 (528.15 it/sec) -training >> step=1563600, episode=261 reward=0.7806974 (518.76 it/sec) -training >> step=1563700, episode=261 reward=0.7717978 (492.66 it/sec) -training >> step=1563800, episode=261 reward=0.76875 (529.81 it/sec) -training >> step=1563900, episode=261 reward=0.7688739 (482.36 it/sec) -training >> step=1564000, episode=261 reward=0.7383668 (505.18 it/sec) -training >> step=1564100, episode=261 reward=0.7575411 (487.15 it/sec) -training >> step=1564200, episode=261 reward=0.7598243 (544.31 it/sec) -training >> step=1564300, episode=261 reward=0.746501 (506.74 it/sec) -training >> step=1564400, episode=261 reward=0.7544338 (508.76 it/sec) -training >> step=1564500, episode=261 reward=0.7746699 (544.05 it/sec) -training >> step=1564600, episode=261 reward=0.7609119 (456.16 it/sec) -training >> step=1564700, episode=261 reward=0.7514372 (460.84 it/sec) -training >> step=1564800, episode=261 reward=0.7677231 (437.14 it/sec) -training >> step=1564900, episode=261 reward=0.7600812 (422.40 it/sec) -training >> step=1565000, episode=261 reward=0.7683313 (424.85 it/sec) -training >> step=1565100, episode=261 reward=0.7425954 (349.54 it/sec) -training >> step=1565200, episode=261 reward=0.7641762 (416.19 it/sec) -training >> step=1565300, episode=262 reward=0.765193 (147.70 it/sec) -training >> step=1565400, episode=262 reward=0.770177 (489.50 it/sec) -training >> step=1565500, episode=262 reward=0.7471298 (506.11 it/sec) -training >> step=1565600, episode=262 reward=0.7365738 (536.02 it/sec) -training >> step=1565700, episode=262 reward=0.7595484 (500.33 it/sec) -training >> step=1565800, episode=262 reward=0.7401935 (525.53 it/sec) -training >> step=1565900, episode=262 reward=0.7620753 (580.25 it/sec) -training >> step=1566000, episode=262 reward=0.7639114 (521.98 it/sec) -training >> step=1566100, episode=262 reward=0.7806221 (489.20 it/sec) -training >> step=1566200, episode=262 reward=0.7626441 (538.10 it/sec) -training >> step=1566300, episode=262 reward=0.7497518 (538.52 it/sec) -training >> step=1566400, episode=262 reward=0.7524286 (530.80 it/sec) -training >> step=1566500, episode=262 reward=0.7758077 (545.92 it/sec) -training >> step=1566600, episode=262 reward=0.7502 (526.78 it/sec) -training >> step=1566700, episode=262 reward=0.7665814 (537.99 it/sec) -training >> step=1566800, episode=262 reward=0.789682 (544.97 it/sec) -training >> step=1566900, episode=262 reward=0.7342675 (546.23 it/sec) -training >> step=1567000, episode=262 reward=0.7657179 (503.00 it/sec) -training >> step=1567100, episode=262 reward=0.7744818 (480.29 it/sec) -training >> step=1567200, episode=262 reward=0.7522786 (553.07 it/sec) -training >> step=1567300, episode=262 reward=0.7413099 (557.43 it/sec) -training >> step=1567400, episode=262 reward=0.747963 (570.36 it/sec) -training >> step=1567500, episode=262 reward=0.7656351 (557.69 it/sec) -training >> step=1567600, episode=262 reward=0.7681352 (519.78 it/sec) -training >> step=1567700, episode=262 reward=0.7557133 (549.52 it/sec) -training >> step=1567800, episode=262 reward=0.76306 (572.61 it/sec) -training >> step=1567900, episode=262 reward=0.7719427 (534.54 it/sec) -training >> step=1568000, episode=262 reward=0.7739929 (562.90 it/sec) -training >> step=1568100, episode=262 reward=0.7590186 (596.68 it/sec) -training >> step=1568200, episode=262 reward=0.7670565 (502.86 it/sec) -training >> step=1568300, episode=262 reward=0.7627069 (547.09 it/sec) -training >> step=1568400, episode=262 reward=0.7378117 (424.76 it/sec) -training >> step=1568500, episode=262 reward=0.7484717 (574.63 it/sec) -training >> step=1568600, episode=262 reward=0.748982 (546.64 it/sec) -training >> step=1568700, episode=262 reward=0.7758108 (525.51 it/sec) -training >> step=1568800, episode=262 reward=0.7835457 (528.79 it/sec) -training >> step=1568900, episode=262 reward=0.7752082 (496.86 it/sec) -training >> step=1569000, episode=262 reward=0.7446868 (563.44 it/sec) -training >> step=1569100, episode=262 reward=0.7772228 (520.57 it/sec) -training >> step=1569200, episode=262 reward=0.7296798 (562.23 it/sec) -training >> step=1569300, episode=262 reward=0.7476138 (558.69 it/sec) -training >> step=1569400, episode=262 reward=0.7252381 (538.81 it/sec) -training >> step=1569500, episode=262 reward=0.746139 (582.73 it/sec) -training >> step=1569600, episode=262 reward=0.7649016 (541.43 it/sec) -training >> step=1569700, episode=262 reward=0.7566368 (517.98 it/sec) -training >> step=1569800, episode=262 reward=0.7735525 (489.88 it/sec) -training >> step=1569900, episode=262 reward=0.7598913 (478.76 it/sec) -training >> step=1570000, episode=262 reward=0.7590309 (505.12 it/sec) -training >> step=1570100, episode=262 reward=0.7521629 (497.28 it/sec) -training >> step=1570200, episode=262 reward=0.7440181 (487.46 it/sec) -training >> step=1570300, episode=262 reward=0.7370568 (536.91 it/sec) -training >> step=1570400, episode=262 reward=0.7679127 (564.64 it/sec) -training >> step=1570500, episode=262 reward=0.7760636 (534.57 it/sec) -training >> step=1570600, episode=262 reward=0.7712329 (536.47 it/sec) -training >> step=1570700, episode=262 reward=0.7755455 (519.68 it/sec) -training >> step=1570800, episode=262 reward=0.7622244 (523.85 it/sec) -training >> step=1570900, episode=262 reward=0.7539933 (538.01 it/sec) -training >> step=1571000, episode=262 reward=0.7621329 (569.86 it/sec) -training >> step=1571100, episode=262 reward=0.7668363 (582.92 it/sec) -training >> step=1571200, episode=262 reward=0.7385966 (527.76 it/sec) -training >> step=1571300, episode=263 reward=0.7463796 (170.74 it/sec) -training >> step=1571400, episode=263 reward=0.7514305 (537.07 it/sec) -training >> step=1571500, episode=263 reward=0.7613716 (525.50 it/sec) -training >> step=1571600, episode=263 reward=0.7265639 (508.09 it/sec) -training >> step=1571700, episode=263 reward=0.7646844 (424.63 it/sec) -training >> step=1571800, episode=263 reward=0.7469811 (500.84 it/sec) -training >> step=1571900, episode=263 reward=0.7652055 (505.67 it/sec) -training >> step=1572000, episode=263 reward=0.7773641 (532.89 it/sec) -training >> step=1572100, episode=263 reward=0.7774355 (492.88 it/sec) -training >> step=1572200, episode=263 reward=0.7419763 (512.22 it/sec) -training >> step=1572300, episode=263 reward=0.7538036 (559.15 it/sec) -training >> step=1572400, episode=263 reward=0.7672408 (540.72 it/sec) -training >> step=1572500, episode=263 reward=0.7457833 (513.24 it/sec) -training >> step=1572600, episode=263 reward=0.7362815 (520.76 it/sec) -training >> step=1572700, episode=263 reward=0.7520013 (498.38 it/sec) -training >> step=1572800, episode=263 reward=0.7564085 (549.17 it/sec) -training >> step=1572900, episode=263 reward=0.7469723 (533.64 it/sec) -training >> step=1573000, episode=263 reward=0.7639875 (523.02 it/sec) -training >> step=1573100, episode=263 reward=0.7485616 (529.17 it/sec) -training >> step=1573200, episode=263 reward=0.7686332 (508.49 it/sec) -training >> step=1573300, episode=263 reward=0.757088 (540.85 it/sec) -training >> step=1573400, episode=263 reward=0.762051 (570.52 it/sec) -training >> step=1573500, episode=263 reward=0.7745967 (542.07 it/sec) -training >> step=1573600, episode=263 reward=0.7537034 (514.10 it/sec) -training >> step=1573700, episode=263 reward=0.7579932 (529.03 it/sec) -training >> step=1573800, episode=263 reward=0.7596989 (534.60 it/sec) -training >> step=1573900, episode=263 reward=0.74685 (548.54 it/sec) -training >> step=1574000, episode=263 reward=0.7720209 (547.19 it/sec) -training >> step=1574100, episode=263 reward=0.7741109 (540.47 it/sec) -training >> step=1574200, episode=263 reward=0.7500218 (505.97 it/sec) -training >> step=1574300, episode=263 reward=0.7532275 (506.05 it/sec) -training >> step=1574400, episode=263 reward=0.7690852 (430.37 it/sec) -training >> step=1574500, episode=263 reward=0.7777067 (590.42 it/sec) -training >> step=1574600, episode=263 reward=0.7950889 (543.23 it/sec) -training >> step=1574700, episode=263 reward=0.7249919 (466.25 it/sec) -training >> step=1574800, episode=263 reward=0.7671284 (415.25 it/sec) -training >> step=1574900, episode=263 reward=0.7563146 (434.49 it/sec) -training >> step=1575000, episode=263 reward=0.7604327 (499.07 it/sec) -training >> step=1575100, episode=263 reward=0.7640572 (471.83 it/sec) -training >> step=1575200, episode=263 reward=0.7515413 (443.77 it/sec) -training >> step=1575300, episode=263 reward=0.7745298 (429.99 it/sec) -training >> step=1575400, episode=263 reward=0.7675369 (522.61 it/sec) -training >> step=1575500, episode=263 reward=0.7453834 (538.54 it/sec) -training >> step=1575600, episode=263 reward=0.7616736 (542.79 it/sec) -training >> step=1575700, episode=263 reward=0.7446319 (554.55 it/sec) -training >> step=1575800, episode=263 reward=0.7544236 (517.77 it/sec) -training >> step=1575900, episode=263 reward=0.7727261 (558.73 it/sec) -training >> step=1576000, episode=263 reward=0.7656634 (548.79 it/sec) -training >> step=1576100, episode=263 reward=0.7519615 (511.26 it/sec) -training >> step=1576200, episode=263 reward=0.7349039 (510.01 it/sec) -training >> step=1576300, episode=263 reward=0.7557703 (428.69 it/sec) -training >> step=1576400, episode=263 reward=0.7241284 (589.42 it/sec) -training >> step=1576500, episode=263 reward=0.7670572 (554.78 it/sec) -training >> step=1576600, episode=263 reward=0.7359895 (510.13 it/sec) -training >> step=1576700, episode=263 reward=0.7414824 (564.61 it/sec) -training >> step=1576800, episode=263 reward=0.7475438 (543.36 it/sec) -training >> step=1576900, episode=263 reward=0.7546359 (489.44 it/sec) -training >> step=1577000, episode=263 reward=0.7578939 (565.01 it/sec) -training >> step=1577100, episode=263 reward=0.7540786 (563.03 it/sec) -training >> step=1577200, episode=263 reward=0.7297533 (572.85 it/sec) -training >> step=1577300, episode=264 reward=0.7312607 (183.94 it/sec) -training >> step=1577400, episode=264 reward=0.762462 (533.97 it/sec) -training >> step=1577500, episode=264 reward=0.7608952 (528.67 it/sec) -training >> step=1577600, episode=264 reward=0.7548119 (596.03 it/sec) -training >> step=1577700, episode=264 reward=0.7404311 (552.41 it/sec) -training >> step=1577800, episode=264 reward=0.7643769 (512.56 it/sec) -training >> step=1577900, episode=264 reward=0.7517054 (535.69 it/sec) -training >> step=1578000, episode=264 reward=0.7563535 (579.37 it/sec) -training >> step=1578100, episode=264 reward=0.7748423 (559.11 it/sec) -training >> step=1578200, episode=264 reward=0.7917889 (561.00 it/sec) -training >> step=1578300, episode=264 reward=0.7588912 (565.80 it/sec) -training >> step=1578400, episode=264 reward=0.7615066 (520.33 it/sec) -training >> step=1578500, episode=264 reward=0.7419068 (521.44 it/sec) -training >> step=1578600, episode=264 reward=0.7707822 (560.92 it/sec) -training >> step=1578700, episode=264 reward=0.7547857 (580.32 it/sec) -training >> step=1578800, episode=264 reward=0.7599117 (537.96 it/sec) -training >> step=1578900, episode=264 reward=0.7551079 (539.83 it/sec) -training >> step=1579000, episode=264 reward=0.7613533 (512.22 it/sec) -training >> step=1579100, episode=264 reward=0.7716014 (542.11 it/sec) -training >> step=1579200, episode=264 reward=0.7331797 (550.16 it/sec) -training >> step=1579300, episode=264 reward=0.7576247 (468.08 it/sec) -training >> step=1579400, episode=264 reward=0.7795057 (527.30 it/sec) -training >> step=1579500, episode=264 reward=0.7667949 (438.17 it/sec) -training >> step=1579600, episode=264 reward=0.7483265 (421.20 it/sec) -training >> step=1579700, episode=264 reward=0.7718243 (488.38 it/sec) -training >> step=1579800, episode=264 reward=0.7637072 (557.62 it/sec) -training >> step=1579900, episode=264 reward=0.7591215 (547.57 it/sec) -training >> step=1580000, episode=264 reward=0.7600721 (449.22 it/sec) -training >> step=1580100, episode=264 reward=0.7818279 (501.50 it/sec) -training >> step=1580200, episode=264 reward=0.7694007 (549.12 it/sec) -training >> step=1580300, episode=264 reward=0.7556 (543.88 it/sec) -training >> step=1580400, episode=264 reward=0.7588032 (524.94 it/sec) -training >> step=1580500, episode=264 reward=0.7696413 (407.64 it/sec) -training >> step=1580600, episode=264 reward=0.7662407 (553.04 it/sec) -training >> step=1580700, episode=264 reward=0.75482 (535.38 it/sec) -training >> step=1580800, episode=264 reward=0.7422352 (511.29 it/sec) -training >> step=1580900, episode=264 reward=0.7655292 (551.57 it/sec) -training >> step=1581000, episode=264 reward=0.7620919 (500.09 it/sec) -training >> step=1581100, episode=264 reward=0.7838666 (520.49 it/sec) -training >> step=1581200, episode=264 reward=0.7499211 (566.29 it/sec) -training >> step=1581300, episode=264 reward=0.7572329 (587.57 it/sec) -training >> step=1581400, episode=264 reward=0.7837567 (530.94 it/sec) -training >> step=1581500, episode=264 reward=0.7568972 (531.80 it/sec) -training >> step=1581600, episode=264 reward=0.7672977 (499.29 it/sec) -training >> step=1581700, episode=264 reward=0.7653572 (572.15 it/sec) -training >> step=1581800, episode=264 reward=0.7490265 (528.38 it/sec) -training >> step=1581900, episode=264 reward=0.7480708 (519.97 it/sec) -training >> step=1582000, episode=264 reward=0.7688707 (488.69 it/sec) -training >> step=1582100, episode=264 reward=0.7647446 (469.29 it/sec) -training >> step=1582200, episode=264 reward=0.7544617 (517.91 it/sec) -training >> step=1582300, episode=264 reward=0.7490072 (545.79 it/sec) -training >> step=1582400, episode=264 reward=0.7619673 (526.69 it/sec) -training >> step=1582500, episode=264 reward=0.7593281 (556.51 it/sec) -training >> step=1582600, episode=264 reward=0.7562509 (517.31 it/sec) -training >> step=1582700, episode=264 reward=0.7512073 (497.85 it/sec) -training >> step=1582800, episode=264 reward=0.7603549 (551.96 it/sec) -training >> step=1582900, episode=264 reward=0.7562217 (527.93 it/sec) -training >> step=1583000, episode=264 reward=0.7377092 (516.26 it/sec) -training >> step=1583100, episode=264 reward=0.7569886 (497.80 it/sec) -training >> step=1583200, episode=264 reward=0.7506213 (451.35 it/sec) -training >> step=1583300, episode=265 reward=0.7606425 (172.44 it/sec) -training >> step=1583400, episode=265 reward=0.7580088 (521.72 it/sec) -training >> step=1583500, episode=265 reward=0.7744591 (477.70 it/sec) -training >> step=1583600, episode=265 reward=0.7666126 (492.84 it/sec) -training >> step=1583700, episode=265 reward=0.7496393 (412.68 it/sec) -training >> step=1583800, episode=265 reward=0.7722157 (456.85 it/sec) -training >> step=1583900, episode=265 reward=0.7549626 (488.14 it/sec) -training >> step=1584000, episode=265 reward=0.7431572 (471.72 it/sec) -training >> step=1584100, episode=265 reward=0.7689266 (493.55 it/sec) -training >> step=1584200, episode=265 reward=0.761925 (493.49 it/sec) -training >> step=1584300, episode=265 reward=0.7602472 (492.66 it/sec) -training >> step=1584400, episode=265 reward=0.7621773 (479.57 it/sec) -training >> step=1584500, episode=265 reward=0.7362638 (480.73 it/sec) -training >> step=1584600, episode=265 reward=0.7709116 (504.76 it/sec) -training >> step=1584700, episode=265 reward=0.7717954 (487.40 it/sec) -training >> step=1584800, episode=265 reward=0.7644431 (514.90 it/sec) -training >> step=1584900, episode=265 reward=0.7458954 (457.07 it/sec) -training >> step=1585000, episode=265 reward=0.7704105 (423.12 it/sec) -training >> step=1585100, episode=265 reward=0.7636951 (502.66 it/sec) -training >> step=1585200, episode=265 reward=0.7529132 (524.84 it/sec) -training >> step=1585300, episode=265 reward=0.7588168 (503.41 it/sec) -training >> step=1585400, episode=265 reward=0.7463011 (476.01 it/sec) -training >> step=1585500, episode=265 reward=0.7564095 (471.11 it/sec) -training >> step=1585600, episode=265 reward=0.751316 (459.97 it/sec) -training >> step=1585700, episode=265 reward=0.7592407 (519.73 it/sec) -training >> step=1585800, episode=265 reward=0.7506049 (508.03 it/sec) -training >> step=1585900, episode=265 reward=0.7570271 (460.71 it/sec) -training >> step=1586000, episode=265 reward=0.7590867 (469.98 it/sec) -training >> step=1586100, episode=265 reward=0.7630175 (490.00 it/sec) -training >> step=1586200, episode=265 reward=0.7446596 (509.58 it/sec) -training >> step=1586300, episode=265 reward=0.7508565 (510.39 it/sec) -training >> step=1586400, episode=265 reward=0.7798834 (492.24 it/sec) -training >> step=1586500, episode=265 reward=0.760318 (485.21 it/sec) -training >> step=1586600, episode=265 reward=0.7507208 (486.57 it/sec) -training >> step=1586700, episode=265 reward=0.7610811 (421.87 it/sec) -training >> step=1586800, episode=265 reward=0.7673328 (505.20 it/sec) -training >> step=1586900, episode=265 reward=0.776745 (540.77 it/sec) -training >> step=1587000, episode=265 reward=0.7597854 (516.98 it/sec) -training >> step=1587100, episode=265 reward=0.7479247 (553.10 it/sec) -training >> step=1587200, episode=265 reward=0.7385158 (493.59 it/sec) -training >> step=1587300, episode=265 reward=0.7446848 (438.06 it/sec) -training >> step=1587400, episode=265 reward=0.7432649 (456.61 it/sec) -training >> step=1587500, episode=265 reward=0.7727467 (431.84 it/sec) -training >> step=1587600, episode=265 reward=0.7484947 (498.12 it/sec) -training >> step=1587700, episode=265 reward=0.7698882 (516.23 it/sec) -training >> step=1587800, episode=265 reward=0.7559503 (488.80 it/sec) -training >> step=1587900, episode=265 reward=0.7426476 (444.01 it/sec) -training >> step=1588000, episode=265 reward=0.7697362 (420.46 it/sec) -training >> step=1588100, episode=265 reward=0.7464605 (482.70 it/sec) -training >> step=1588200, episode=265 reward=0.7620379 (495.76 it/sec) -training >> step=1588300, episode=265 reward=0.7669476 (484.32 it/sec) -training >> step=1588400, episode=265 reward=0.7569165 (509.65 it/sec) -training >> step=1588500, episode=265 reward=0.7567719 (445.82 it/sec) -training >> step=1588600, episode=265 reward=0.7502891 (499.20 it/sec) -training >> step=1588700, episode=265 reward=0.7612181 (496.11 it/sec) -training >> step=1588800, episode=265 reward=0.7513863 (496.87 it/sec) -training >> step=1588900, episode=265 reward=0.745556 (547.19 it/sec) -training >> step=1589000, episode=265 reward=0.765687 (515.16 it/sec) -training >> step=1589100, episode=265 reward=0.7621781 (501.29 it/sec) -training >> step=1589200, episode=265 reward=0.7531887 (458.79 it/sec) -training >> step=1589300, episode=266 reward=0.7614194 (205.24 it/sec) -training >> step=1589400, episode=266 reward=0.7660394 (533.26 it/sec) -training >> step=1589500, episode=266 reward=0.7986547 (526.41 it/sec) -training >> step=1589600, episode=266 reward=0.7597656 (515.85 it/sec) -training >> step=1589700, episode=266 reward=0.7512116 (486.51 it/sec) -training >> step=1589800, episode=266 reward=0.7662501 (471.83 it/sec) -training >> step=1589900, episode=266 reward=0.7641559 (505.20 it/sec) -training >> step=1590000, episode=266 reward=0.7446644 (533.96 it/sec) -training >> step=1590100, episode=266 reward=0.742882 (536.36 it/sec) -training >> step=1590200, episode=266 reward=0.746545 (507.07 it/sec) -training >> step=1590300, episode=266 reward=0.7690436 (525.32 it/sec) -training >> step=1590400, episode=266 reward=0.7531252 (487.62 it/sec) -training >> step=1590500, episode=266 reward=0.7573228 (528.22 it/sec) -training >> step=1590600, episode=266 reward=0.756034 (579.43 it/sec) -training >> step=1590700, episode=266 reward=0.7549353 (521.31 it/sec) -training >> step=1590800, episode=266 reward=0.760536 (516.70 it/sec) -training >> step=1590900, episode=266 reward=0.7521293 (497.08 it/sec) -training >> step=1591000, episode=266 reward=0.7478365 (509.65 it/sec) -training >> step=1591100, episode=266 reward=0.7540187 (532.33 it/sec) -training >> step=1591200, episode=266 reward=0.7574628 (503.29 it/sec) -training >> step=1591300, episode=266 reward=0.7685751 (512.86 it/sec) -training >> step=1591400, episode=266 reward=0.7628959 (527.93 it/sec) -training >> step=1591500, episode=266 reward=0.7522546 (488.74 it/sec) -training >> step=1591600, episode=266 reward=0.7662845 (517.66 it/sec) -training >> step=1591700, episode=266 reward=0.764966 (498.39 it/sec) -training >> step=1591800, episode=266 reward=0.7735916 (539.68 it/sec) -training >> step=1591900, episode=266 reward=0.7424544 (536.30 it/sec) -training >> step=1592000, episode=266 reward=0.7425991 (486.22 it/sec) -training >> step=1592100, episode=266 reward=0.7772153 (532.78 it/sec) -training >> step=1592200, episode=266 reward=0.7793087 (500.92 it/sec) -training >> step=1592300, episode=266 reward=0.7537425 (536.96 it/sec) -training >> step=1592400, episode=266 reward=0.7518681 (544.59 it/sec) -training >> step=1592500, episode=266 reward=0.7719717 (553.10 it/sec) -training >> step=1592600, episode=266 reward=0.7675084 (505.54 it/sec) -training >> step=1592700, episode=266 reward=0.7740723 (521.79 it/sec) -training >> step=1592800, episode=266 reward=0.7637157 (380.06 it/sec) -training >> step=1592900, episode=266 reward=0.7373451 (562.82 it/sec) -training >> step=1593000, episode=266 reward=0.7553629 (535.64 it/sec) -training >> step=1593100, episode=266 reward=0.7576628 (510.89 it/sec) -training >> step=1593200, episode=266 reward=0.7638299 (521.50 it/sec) -training >> step=1593300, episode=266 reward=0.7745556 (551.25 it/sec) -training >> step=1593400, episode=266 reward=0.7611043 (505.41 it/sec) -training >> step=1593500, episode=266 reward=0.7492542 (524.15 it/sec) -training >> step=1593600, episode=266 reward=0.7506254 (477.13 it/sec) -training >> step=1593700, episode=266 reward=0.7576057 (559.09 it/sec) -training >> step=1593800, episode=266 reward=0.7616602 (500.13 it/sec) -training >> step=1593900, episode=266 reward=0.776723 (520.93 it/sec) -training >> step=1594000, episode=266 reward=0.7475039 (555.85 it/sec) -training >> step=1594100, episode=266 reward=0.7595423 (524.22 it/sec) -training >> step=1594200, episode=266 reward=0.7619328 (501.87 it/sec) -training >> step=1594300, episode=266 reward=0.7496637 (507.17 it/sec) -training >> step=1594400, episode=266 reward=0.7278251 (539.63 it/sec) -training >> step=1594500, episode=266 reward=0.7503767 (511.61 it/sec) -training >> step=1594600, episode=266 reward=0.7506027 (508.24 it/sec) -training >> step=1594700, episode=266 reward=0.7358668 (527.89 it/sec) -training >> step=1594800, episode=266 reward=0.7563431 (439.94 it/sec) -training >> step=1594900, episode=266 reward=0.7557868 (412.12 it/sec) -training >> step=1595000, episode=266 reward=0.7441442 (508.09 it/sec) -training >> step=1595100, episode=266 reward=0.7381497 (527.46 it/sec) -training >> step=1595200, episode=266 reward=0.7508731 (483.72 it/sec) -training >> step=1595300, episode=267 reward=0.7461818 (233.41 it/sec) -training >> step=1595400, episode=267 reward=0.7482417 (532.99 it/sec) -training >> step=1595500, episode=267 reward=0.7436353 (545.69 it/sec) -training >> step=1595600, episode=267 reward=0.7480216 (564.11 it/sec) -training >> step=1595700, episode=267 reward=0.7584108 (522.79 it/sec) -training >> step=1595800, episode=267 reward=0.7540067 (476.38 it/sec) -training >> step=1595900, episode=267 reward=0.7516378 (515.58 it/sec) -training >> step=1596000, episode=267 reward=0.7522211 (565.34 it/sec) -training >> step=1596100, episode=267 reward=0.7523174 (525.88 it/sec) -training >> step=1596200, episode=267 reward=0.7325844 (513.51 it/sec) -training >> step=1596300, episode=267 reward=0.7635324 (540.43 it/sec) -training >> step=1596400, episode=267 reward=0.7485037 (553.66 it/sec) -training >> step=1596500, episode=267 reward=0.7478378 (531.45 it/sec) -training >> step=1596600, episode=267 reward=0.7677076 (542.59 it/sec) -training >> step=1596700, episode=267 reward=0.760552 (507.32 it/sec) -training >> step=1596800, episode=267 reward=0.744165 (538.24 it/sec) -training >> step=1596900, episode=267 reward=0.7707058 (532.82 it/sec) -training >> step=1597000, episode=267 reward=0.7562902 (495.77 it/sec) -training >> step=1597100, episode=267 reward=0.7542663 (537.14 it/sec) -training >> step=1597200, episode=267 reward=0.767086 (512.44 it/sec) -training >> step=1597300, episode=267 reward=0.7794489 (508.00 it/sec) -training >> step=1597400, episode=267 reward=0.7653627 (530.49 it/sec) -training >> step=1597500, episode=267 reward=0.7534879 (525.16 it/sec) -training >> step=1597600, episode=267 reward=0.7725824 (551.70 it/sec) -training >> step=1597700, episode=267 reward=0.7707233 (497.15 it/sec) -training >> step=1597800, episode=267 reward=0.751687 (518.02 it/sec) -training >> step=1597900, episode=267 reward=0.7542148 (553.90 it/sec) -training >> step=1598000, episode=267 reward=0.7786345 (536.28 it/sec) -training >> step=1598100, episode=267 reward=0.7594697 (551.91 it/sec) -training >> step=1598200, episode=267 reward=0.770676 (519.00 it/sec) -training >> step=1598300, episode=267 reward=0.7653336 (514.91 it/sec) -training >> step=1598400, episode=267 reward=0.7578143 (519.95 it/sec) -training >> step=1598500, episode=267 reward=0.7654624 (501.72 it/sec) -training >> step=1598600, episode=267 reward=0.7749842 (518.82 it/sec) -training >> step=1598700, episode=267 reward=0.7490556 (567.00 it/sec) -training >> step=1598800, episode=267 reward=0.7643626 (497.95 it/sec) -training >> step=1598900, episode=267 reward=0.7463415 (512.46 it/sec) -training >> step=1599000, episode=267 reward=0.7741938 (541.74 it/sec) -training >> step=1599100, episode=267 reward=0.7513182 (541.68 it/sec) -training >> step=1599200, episode=267 reward=0.7673295 (369.38 it/sec) -training >> step=1599300, episode=267 reward=0.7798001 (523.86 it/sec) -training >> step=1599400, episode=267 reward=0.7789875 (517.38 it/sec) -training >> step=1599500, episode=267 reward=0.7652099 (543.75 it/sec) -training >> step=1599600, episode=267 reward=0.7519987 (504.96 it/sec) -training >> step=1599700, episode=267 reward=0.7534878 (557.23 it/sec) -training >> step=1599800, episode=267 reward=0.7587038 (539.71 it/sec) -training >> step=1599900, episode=267 reward=0.7510851 (477.00 it/sec) -training >> step=1600000, episode=267 reward=0.7459346 (501.38 it/sec) -training >> step=1600100, episode=267 reward=0.7560486 (512.92 it/sec) -training >> step=1600200, episode=267 reward=0.7702639 (535.33 it/sec) -training >> step=1600300, episode=267 reward=0.759993 (403.81 it/sec) -training >> step=1600400, episode=267 reward=0.7638179 (415.29 it/sec) -training >> step=1600500, episode=267 reward=0.7674094 (403.70 it/sec) -training >> step=1600600, episode=267 reward=0.7789907 (490.41 it/sec) -training >> step=1600700, episode=267 reward=0.7353727 (466.25 it/sec) -training >> step=1600800, episode=267 reward=0.7322602 (313.69 it/sec) -training >> step=1600900, episode=267 reward=0.7158102 (446.12 it/sec) -training >> step=1601000, episode=267 reward=0.7578818 (484.35 it/sec) -training >> step=1601100, episode=267 reward=0.7469141 (479.05 it/sec) -training >> step=1601200, episode=267 reward=0.7468454 (432.39 it/sec) -training >> step=1601300, episode=268 reward=0.7252258 (221.99 it/sec) -training >> step=1601400, episode=268 reward=0.7428699 (536.23 it/sec) -training >> step=1601500, episode=268 reward=0.7426822 (504.00 it/sec) -training >> step=1601600, episode=268 reward=0.7524782 (479.36 it/sec) -training >> step=1601700, episode=268 reward=0.7398626 (478.47 it/sec) -training >> step=1601800, episode=268 reward=0.7487192 (498.39 it/sec) -training >> step=1601900, episode=268 reward=0.7535557 (460.75 it/sec) -training >> step=1602000, episode=268 reward=0.7712718 (503.89 it/sec) -training >> step=1602100, episode=268 reward=0.7572086 (457.86 it/sec) -training >> step=1602200, episode=268 reward=0.7608861 (496.29 it/sec) -training >> step=1602300, episode=268 reward=0.7545274 (500.99 it/sec) -training >> step=1602400, episode=268 reward=0.7674781 (490.80 it/sec) -training >> step=1602500, episode=268 reward=0.7654632 (448.81 it/sec) -training >> step=1602600, episode=268 reward=0.7408656 (427.65 it/sec) -training >> step=1602700, episode=268 reward=0.7447667 (471.80 it/sec) -training >> step=1602800, episode=268 reward=0.7562783 (444.10 it/sec) -training >> step=1602900, episode=268 reward=0.7677354 (461.23 it/sec) -training >> step=1603000, episode=268 reward=0.7697389 (453.44 it/sec) -training >> step=1603100, episode=268 reward=0.7720264 (409.18 it/sec) -training >> step=1603200, episode=268 reward=0.7879777 (445.79 it/sec) -training >> step=1603300, episode=268 reward=0.7567513 (453.96 it/sec) -training >> step=1603400, episode=268 reward=0.7735732 (491.77 it/sec) -training >> step=1603500, episode=268 reward=0.7459917 (440.60 it/sec) -training >> step=1603600, episode=268 reward=0.7536699 (490.47 it/sec) -training >> step=1603700, episode=268 reward=0.7604997 (438.38 it/sec) -training >> step=1603800, episode=268 reward=0.7657359 (435.96 it/sec) -training >> step=1603900, episode=268 reward=0.7658358 (481.79 it/sec) -training >> step=1604000, episode=268 reward=0.7571349 (432.60 it/sec) -training >> step=1604100, episode=268 reward=0.7574497 (477.59 it/sec) -training >> step=1604200, episode=268 reward=0.7674373 (507.53 it/sec) -training >> step=1604300, episode=268 reward=0.7716569 (513.94 it/sec) -training >> step=1604400, episode=268 reward=0.7646638 (483.01 it/sec) -training >> step=1604500, episode=268 reward=0.7520573 (465.01 it/sec) -training >> step=1604600, episode=268 reward=0.7386293 (454.72 it/sec) -training >> step=1604700, episode=268 reward=0.762233 (487.75 it/sec) -training >> step=1604800, episode=268 reward=0.7739679 (506.67 it/sec) -training >> step=1604900, episode=268 reward=0.7730164 (517.84 it/sec) -training >> step=1605000, episode=268 reward=0.7454671 (490.27 it/sec) -training >> step=1605100, episode=268 reward=0.7462602 (489.64 it/sec) -training >> step=1605200, episode=268 reward=0.7711936 (426.75 it/sec) -training >> step=1605300, episode=268 reward=0.7497159 (453.41 it/sec) -training >> step=1605400, episode=268 reward=0.7423542 (478.68 it/sec) -training >> step=1605500, episode=268 reward=0.7624931 (474.00 it/sec) -training >> step=1605600, episode=268 reward=0.7532501 (493.27 it/sec) -training >> step=1605700, episode=268 reward=0.7568907 (426.02 it/sec) -training >> step=1605800, episode=268 reward=0.774559 (510.48 it/sec) -training >> step=1605900, episode=268 reward=0.7701641 (463.03 it/sec) -training >> step=1606000, episode=268 reward=0.7398674 (424.08 it/sec) -training >> step=1606100, episode=268 reward=0.753324 (502.88 it/sec) -training >> step=1606200, episode=268 reward=0.7534249 (432.73 it/sec) -training >> step=1606300, episode=268 reward=0.7554179 (404.29 it/sec) -training >> step=1606400, episode=268 reward=0.7305186 (438.99 it/sec) -training >> step=1606500, episode=268 reward=0.7283721 (493.21 it/sec) -training >> step=1606600, episode=268 reward=0.7796431 (435.53 it/sec) -training >> step=1606700, episode=268 reward=0.7429199 (482.99 it/sec) -training >> step=1606800, episode=268 reward=0.7494344 (483.34 it/sec) -training >> step=1606900, episode=268 reward=0.7354533 (490.00 it/sec) -training >> step=1607000, episode=268 reward=0.7609205 (504.26 it/sec) -training >> step=1607100, episode=268 reward=0.7514559 (470.32 it/sec) -training >> step=1607200, episode=268 reward=0.7664559 (454.15 it/sec) -training >> step=1607300, episode=269 reward=0.7470914 (95.79 it/sec) -training >> step=1607400, episode=269 reward=0.7503706 (497.63 it/sec) -training >> step=1607500, episode=269 reward=0.7450483 (462.81 it/sec) -training >> step=1607600, episode=269 reward=0.7472261 (523.73 it/sec) -training >> step=1607700, episode=269 reward=0.7474212 (503.25 it/sec) -training >> step=1607800, episode=269 reward=0.7511548 (424.20 it/sec) -training >> step=1607900, episode=269 reward=0.7452133 (457.08 it/sec) -training >> step=1608000, episode=269 reward=0.7493832 (458.68 it/sec) -training >> step=1608100, episode=269 reward=0.7843931 (528.98 it/sec) -training >> step=1608200, episode=269 reward=0.7613585 (534.26 it/sec) -training >> step=1608300, episode=269 reward=0.7763726 (507.32 it/sec) -training >> step=1608400, episode=269 reward=0.756097 (532.70 it/sec) -training >> step=1608500, episode=269 reward=0.7460197 (528.78 it/sec) -training >> step=1608600, episode=269 reward=0.7724189 (490.07 it/sec) -training >> step=1608700, episode=269 reward=0.7561589 (490.38 it/sec) -training >> step=1608800, episode=269 reward=0.7637099 (456.95 it/sec) -training >> step=1608900, episode=269 reward=0.7455003 (483.72 it/sec) -training >> step=1609000, episode=269 reward=0.7488474 (406.91 it/sec) -training >> step=1609100, episode=269 reward=0.7562159 (387.27 it/sec) -training >> step=1609200, episode=269 reward=0.7563097 (467.20 it/sec) -training >> step=1609300, episode=269 reward=0.7554827 (460.92 it/sec) -training >> step=1609400, episode=269 reward=0.7481595 (480.41 it/sec) -training >> step=1609500, episode=269 reward=0.7477523 (447.80 it/sec) -training >> step=1609600, episode=269 reward=0.7604513 (539.16 it/sec) -training >> step=1609700, episode=269 reward=0.774146 (524.69 it/sec) -training >> step=1609800, episode=269 reward=0.7700636 (480.96 it/sec) -training >> step=1609900, episode=269 reward=0.7943562 (469.94 it/sec) -training >> step=1610000, episode=269 reward=0.7583696 (515.74 it/sec) -training >> step=1610100, episode=269 reward=0.7491761 (459.37 it/sec) -training >> step=1610200, episode=269 reward=0.7676549 (475.14 it/sec) -training >> step=1610300, episode=269 reward=0.770368 (451.93 it/sec) -training >> step=1610400, episode=269 reward=0.769453 (465.49 it/sec) -training >> step=1610500, episode=269 reward=0.7568147 (433.71 it/sec) -training >> step=1610600, episode=269 reward=0.7754466 (455.24 it/sec) -training >> step=1610700, episode=269 reward=0.7536807 (463.01 it/sec) -training >> step=1610800, episode=269 reward=0.7580184 (430.41 it/sec) -training >> step=1610900, episode=269 reward=0.7630596 (476.89 it/sec) -training >> step=1611000, episode=269 reward=0.7828394 (426.96 it/sec) -training >> step=1611100, episode=269 reward=0.7776444 (471.60 it/sec) -training >> step=1611200, episode=269 reward=0.7318312 (441.45 it/sec) -training >> step=1611300, episode=269 reward=0.748807 (397.43 it/sec) -training >> step=1611400, episode=269 reward=0.7589378 (441.88 it/sec) -training >> step=1611500, episode=269 reward=0.7453512 (463.50 it/sec) -training >> step=1611600, episode=269 reward=0.7524455 (448.44 it/sec) -training >> step=1611700, episode=269 reward=0.7706916 (442.83 it/sec) -training >> step=1611800, episode=269 reward=0.7796544 (311.07 it/sec) -training >> step=1611900, episode=269 reward=0.7643524 (463.98 it/sec) -training >> step=1612000, episode=269 reward=0.7708489 (463.11 it/sec) -training >> step=1612100, episode=269 reward=0.7356587 (446.14 it/sec) -training >> step=1612200, episode=269 reward=0.7169347 (452.32 it/sec) -training >> step=1612300, episode=269 reward=0.7561963 (442.16 it/sec) -training >> step=1612400, episode=269 reward=0.7395444 (434.70 it/sec) -training >> step=1612500, episode=269 reward=0.7778286 (432.53 it/sec) -training >> step=1612600, episode=269 reward=0.7488934 (408.58 it/sec) -training >> step=1612700, episode=269 reward=0.7374168 (403.38 it/sec) -training >> step=1612800, episode=269 reward=0.7529678 (416.94 it/sec) -training >> step=1612900, episode=269 reward=0.7433074 (407.36 it/sec) -training >> step=1613000, episode=269 reward=0.7556461 (477.76 it/sec) -training >> step=1613100, episode=269 reward=0.7370746 (434.27 it/sec) -training >> step=1613200, episode=269 reward=0.7422653 (416.59 it/sec) -training >> step=1613300, episode=270 reward=0.7327026 (179.89 it/sec) -training >> step=1613400, episode=270 reward=0.7456268 (475.61 it/sec) -training >> step=1613500, episode=270 reward=0.7685515 (454.54 it/sec) -training >> step=1613600, episode=270 reward=0.7642657 (426.60 it/sec) -training >> step=1613700, episode=270 reward=0.7596154 (422.04 it/sec) -training >> step=1613800, episode=270 reward=0.7563463 (455.43 it/sec) -training >> step=1613900, episode=270 reward=0.7521122 (440.10 it/sec) -training >> step=1614000, episode=270 reward=0.7519698 (463.74 it/sec) -training >> step=1614100, episode=270 reward=0.7553508 (433.01 it/sec) -training >> step=1614200, episode=270 reward=0.7668353 (466.65 it/sec) -training >> step=1614300, episode=270 reward=0.7551713 (431.25 it/sec) -training >> step=1614400, episode=270 reward=0.7312335 (474.31 it/sec) -training >> step=1614500, episode=270 reward=0.7738398 (425.13 it/sec) -training >> step=1614600, episode=270 reward=0.7906637 (451.07 it/sec) -training >> step=1614700, episode=270 reward=0.7641338 (481.13 it/sec) -training >> step=1614800, episode=270 reward=0.7639641 (435.98 it/sec) -training >> step=1614900, episode=270 reward=0.7739383 (425.85 it/sec) -training >> step=1615000, episode=270 reward=0.752745 (473.43 it/sec) -training >> step=1615100, episode=270 reward=0.7384833 (473.54 it/sec) -training >> step=1615200, episode=270 reward=0.7678975 (478.37 it/sec) -training >> step=1615300, episode=270 reward=0.7700251 (436.44 it/sec) -training >> step=1615400, episode=270 reward=0.7629659 (421.89 it/sec) -training >> step=1615500, episode=270 reward=0.7543764 (460.10 it/sec) -training >> step=1615600, episode=270 reward=0.780039 (456.84 it/sec) -training >> step=1615700, episode=270 reward=0.7433972 (425.33 it/sec) -training >> step=1615800, episode=270 reward=0.7706748 (456.88 it/sec) -training >> step=1615900, episode=270 reward=0.7654669 (406.28 it/sec) -training >> step=1616000, episode=270 reward=0.7499835 (455.49 it/sec) -training >> step=1616100, episode=270 reward=0.7734951 (468.29 it/sec) -training >> step=1616200, episode=270 reward=0.7434341 (411.40 it/sec) -training >> step=1616300, episode=270 reward=0.7509666 (428.09 it/sec) -training >> step=1616400, episode=270 reward=0.7637049 (459.08 it/sec) -training >> step=1616500, episode=270 reward=0.7751396 (449.51 it/sec) -training >> step=1616600, episode=270 reward=0.774088 (478.44 it/sec) -training >> step=1616700, episode=270 reward=0.7654091 (420.59 it/sec) -training >> step=1616800, episode=270 reward=0.7491239 (449.29 it/sec) -training >> step=1616900, episode=270 reward=0.7670694 (461.17 it/sec) -training >> step=1617000, episode=270 reward=0.7299007 (488.48 it/sec) -training >> step=1617100, episode=270 reward=0.7500998 (457.89 it/sec) -training >> step=1617200, episode=270 reward=0.7649692 (436.03 it/sec) -training >> step=1617300, episode=270 reward=0.7588704 (455.18 it/sec) -training >> step=1617400, episode=270 reward=0.7677947 (436.09 it/sec) -training >> step=1617500, episode=270 reward=0.7786751 (432.17 it/sec) -training >> step=1617600, episode=270 reward=0.7603234 (445.78 it/sec) -training >> step=1617700, episode=270 reward=0.7762523 (426.88 it/sec) -training >> step=1617800, episode=270 reward=0.7537418 (479.70 it/sec) -training >> step=1617900, episode=270 reward=0.7647288 (446.29 it/sec) -training >> step=1618000, episode=270 reward=0.7700074 (385.36 it/sec) -training >> step=1618100, episode=270 reward=0.7683963 (327.08 it/sec) -training >> step=1618200, episode=270 reward=0.7402902 (470.66 it/sec) -training >> step=1618300, episode=270 reward=0.7612849 (471.14 it/sec) -training >> step=1618400, episode=270 reward=0.753793 (418.03 it/sec) -training >> step=1618500, episode=270 reward=0.7386649 (411.65 it/sec) -training >> step=1618600, episode=270 reward=0.7502521 (470.41 it/sec) -training >> step=1618700, episode=270 reward=0.7297525 (448.07 it/sec) -training >> step=1618800, episode=270 reward=0.7562115 (457.39 it/sec) -training >> step=1618900, episode=270 reward=0.7523937 (449.70 it/sec) -training >> step=1619000, episode=270 reward=0.7707846 (456.68 it/sec) -training >> step=1619100, episode=270 reward=0.7591127 (453.06 it/sec) -training >> step=1619200, episode=270 reward=0.7677776 (458.16 it/sec) -training >> step=1619300, episode=271 reward=0.7543467 (164.26 it/sec) -training >> step=1619400, episode=271 reward=0.7533602 (516.16 it/sec) -training >> step=1619500, episode=271 reward=0.7523459 (464.65 it/sec) -training >> step=1619600, episode=271 reward=0.7532676 (445.61 it/sec) -training >> step=1619700, episode=271 reward=0.7724266 (410.86 it/sec) -training >> step=1619800, episode=271 reward=0.7496379 (461.56 it/sec) -training >> step=1619900, episode=271 reward=0.7767928 (444.42 it/sec) -training >> step=1620000, episode=271 reward=0.7631419 (400.34 it/sec) -training >> step=1620100, episode=271 reward=0.7590773 (454.84 it/sec) -training >> step=1620200, episode=271 reward=0.7676275 (461.48 it/sec) -training >> step=1620300, episode=271 reward=0.7580658 (449.13 it/sec) -training >> step=1620400, episode=271 reward=0.7973545 (422.16 it/sec) -training >> step=1620500, episode=271 reward=0.7457728 (433.53 it/sec) -training >> step=1620600, episode=271 reward=0.7650265 (494.99 it/sec) -training >> step=1620700, episode=271 reward=0.7427845 (446.84 it/sec) -training >> step=1620800, episode=271 reward=0.7479211 (460.36 it/sec) -training >> step=1620900, episode=271 reward=0.7716576 (462.26 it/sec) -training >> step=1621000, episode=271 reward=0.7493418 (439.08 it/sec) -training >> step=1621100, episode=271 reward=0.7507812 (450.83 it/sec) -training >> step=1621200, episode=271 reward=0.7552949 (460.82 it/sec) -training >> step=1621300, episode=271 reward=0.7529672 (452.38 it/sec) -training >> step=1621400, episode=271 reward=0.76732 (474.52 it/sec) -training >> step=1621500, episode=271 reward=0.7772035 (430.15 it/sec) -training >> step=1621600, episode=271 reward=0.7722493 (432.87 it/sec) -training >> step=1621700, episode=271 reward=0.7596611 (470.05 it/sec) -training >> step=1621800, episode=271 reward=0.7575737 (504.23 it/sec) -training >> step=1621900, episode=271 reward=0.7528245 (444.54 it/sec) -training >> step=1622000, episode=271 reward=0.7520579 (404.02 it/sec) -training >> step=1622100, episode=271 reward=0.7479167 (426.55 it/sec) -training >> step=1622200, episode=271 reward=0.7320716 (441.59 it/sec) -training >> step=1622300, episode=271 reward=0.7452348 (502.27 it/sec) -training >> step=1622400, episode=271 reward=0.7484147 (445.49 it/sec) -training >> step=1622500, episode=271 reward=0.7423551 (462.12 it/sec) -training >> step=1622600, episode=271 reward=0.7724487 (433.91 it/sec) -training >> step=1622700, episode=271 reward=0.7589493 (487.76 it/sec) -training >> step=1622800, episode=271 reward=0.7667816 (480.64 it/sec) -training >> step=1622900, episode=271 reward=0.7439656 (442.04 it/sec) -training >> step=1623000, episode=271 reward=0.772348 (424.12 it/sec) -training >> step=1623100, episode=271 reward=0.7768283 (474.29 it/sec) -training >> step=1623200, episode=271 reward=0.7890096 (446.14 it/sec) -training >> step=1623300, episode=271 reward=0.7494632 (442.66 it/sec) -training >> step=1623400, episode=271 reward=0.7527974 (482.85 it/sec) -training >> step=1623500, episode=271 reward=0.7559599 (486.57 it/sec) -training >> step=1623600, episode=271 reward=0.7632261 (498.12 it/sec) -training >> step=1623700, episode=271 reward=0.7663491 (473.44 it/sec) -training >> step=1623800, episode=271 reward=0.7510506 (455.78 it/sec) -training >> step=1623900, episode=271 reward=0.7627596 (451.16 it/sec) -training >> step=1624000, episode=271 reward=0.7319419 (423.62 it/sec) -training >> step=1624100, episode=271 reward=0.7467574 (479.71 it/sec) -training >> step=1624200, episode=271 reward=0.7598053 (466.65 it/sec) -training >> step=1624300, episode=271 reward=0.7665187 (309.78 it/sec) -training >> step=1624400, episode=271 reward=0.7822494 (458.97 it/sec) -training >> step=1624500, episode=271 reward=0.7522141 (450.96 it/sec) -training >> step=1624600, episode=271 reward=0.7648882 (499.91 it/sec) -training >> step=1624700, episode=271 reward=0.7538579 (448.71 it/sec) -training >> step=1624800, episode=271 reward=0.7714016 (437.67 it/sec) -training >> step=1624900, episode=271 reward=0.7675807 (455.76 it/sec) -training >> step=1625000, episode=271 reward=0.7447332 (463.73 it/sec) -training >> step=1625100, episode=271 reward=0.7411996 (501.48 it/sec) -training >> step=1625200, episode=271 reward=0.7581643 (410.99 it/sec) -training >> step=1625300, episode=272 reward=0.7483821 (152.86 it/sec) -training >> step=1625400, episode=272 reward=0.7574071 (454.33 it/sec) -training >> step=1625500, episode=272 reward=0.768731 (499.36 it/sec) -training >> step=1625600, episode=272 reward=0.7656178 (466.78 it/sec) -training >> step=1625700, episode=272 reward=0.7740936 (446.20 it/sec) -training >> step=1625800, episode=272 reward=0.7457295 (423.39 it/sec) -training >> step=1625900, episode=272 reward=0.7542325 (444.76 it/sec) -training >> step=1626000, episode=272 reward=0.7540775 (465.83 it/sec) -training >> step=1626100, episode=272 reward=0.7876602 (468.62 it/sec) -training >> step=1626200, episode=272 reward=0.7599829 (480.18 it/sec) -training >> step=1626300, episode=272 reward=0.747665 (458.70 it/sec) -training >> step=1626400, episode=272 reward=0.7533138 (359.13 it/sec) -training >> step=1626500, episode=272 reward=0.7662829 (418.79 it/sec) -training >> step=1626600, episode=272 reward=0.7723923 (441.78 it/sec) -training >> step=1626700, episode=272 reward=0.7667854 (448.19 it/sec) -training >> step=1626800, episode=272 reward=0.7557935 (459.23 it/sec) -training >> step=1626900, episode=272 reward=0.7770724 (441.14 it/sec) -training >> step=1627000, episode=272 reward=0.7787101 (425.34 it/sec) -training >> step=1627100, episode=272 reward=0.7585497 (500.39 it/sec) -training >> step=1627200, episode=272 reward=0.7470726 (480.46 it/sec) -training >> step=1627300, episode=272 reward=0.7603233 (441.44 it/sec) -training >> step=1627400, episode=272 reward=0.7525191 (440.99 it/sec) -training >> step=1627500, episode=272 reward=0.7472331 (441.64 it/sec) -training >> step=1627600, episode=272 reward=0.7801918 (494.28 it/sec) -training >> step=1627700, episode=272 reward=0.7558169 (464.29 it/sec) -training >> step=1627800, episode=272 reward=0.7453145 (428.77 it/sec) -training >> step=1627900, episode=272 reward=0.781648 (459.47 it/sec) -training >> step=1628000, episode=272 reward=0.7550477 (457.47 it/sec) -training >> step=1628100, episode=272 reward=0.7848997 (477.51 it/sec) -training >> step=1628200, episode=272 reward=0.7650445 (452.09 it/sec) -training >> step=1628300, episode=272 reward=0.7527324 (411.45 it/sec) -training >> step=1628400, episode=272 reward=0.7607535 (467.20 it/sec) -training >> step=1628500, episode=272 reward=0.7536631 (463.74 it/sec) -training >> step=1628600, episode=272 reward=0.7672728 (449.17 it/sec) -training >> step=1628700, episode=272 reward=0.7762711 (438.79 it/sec) -training >> step=1628800, episode=272 reward=0.7642096 (471.04 it/sec) -training >> step=1628900, episode=272 reward=0.760456 (471.80 it/sec) -training >> step=1629000, episode=272 reward=0.7670826 (470.01 it/sec) -training >> step=1629100, episode=272 reward=0.7699107 (483.33 it/sec) -training >> step=1629200, episode=272 reward=0.7531634 (429.33 it/sec) -training >> step=1629300, episode=272 reward=0.7543616 (477.03 it/sec) -training >> step=1629400, episode=272 reward=0.7682325 (471.89 it/sec) -training >> step=1629500, episode=272 reward=0.7653449 (456.80 it/sec) -training >> step=1629600, episode=272 reward=0.7477273 (445.24 it/sec) -training >> step=1629700, episode=272 reward=0.762261 (381.56 it/sec) -training >> step=1629800, episode=272 reward=0.776158 (467.36 it/sec) -training >> step=1629900, episode=272 reward=0.7432365 (455.86 it/sec) -training >> step=1630000, episode=272 reward=0.7650814 (466.30 it/sec) -training >> step=1630100, episode=272 reward=0.7495562 (435.39 it/sec) -training >> step=1630200, episode=272 reward=0.7406943 (449.57 it/sec) -training >> step=1630300, episode=272 reward=0.7397778 (491.49 it/sec) -training >> step=1630400, episode=272 reward=0.7761831 (498.91 it/sec) -training >> step=1630500, episode=272 reward=0.7681396 (319.05 it/sec) -training >> step=1630600, episode=272 reward=0.7615269 (416.28 it/sec) -training >> step=1630700, episode=272 reward=0.7545555 (448.00 it/sec) -training >> step=1630800, episode=272 reward=0.7430657 (480.17 it/sec) -training >> step=1630900, episode=272 reward=0.7532465 (407.06 it/sec) -training >> step=1631000, episode=272 reward=0.7519937 (418.06 it/sec) -training >> step=1631100, episode=272 reward=0.7577465 (452.40 it/sec) -training >> step=1631200, episode=272 reward=0.7770957 (448.32 it/sec) -training >> step=1631300, episode=273 reward=0.7759352 (147.92 it/sec) -training >> step=1631400, episode=273 reward=0.7671227 (496.00 it/sec) -training >> step=1631500, episode=273 reward=0.7627863 (432.33 it/sec) -training >> step=1631600, episode=273 reward=0.7628659 (447.82 it/sec) -training >> step=1631700, episode=273 reward=0.7912502 (470.63 it/sec) -training >> step=1631800, episode=273 reward=0.7508045 (470.28 it/sec) -training >> step=1631900, episode=273 reward=0.7480857 (457.14 it/sec) -training >> step=1632000, episode=273 reward=0.7711154 (489.91 it/sec) -training >> step=1632100, episode=273 reward=0.7756726 (422.09 it/sec) -training >> step=1632200, episode=273 reward=0.7600265 (434.26 it/sec) -training >> step=1632300, episode=273 reward=0.7593172 (466.53 it/sec) -training >> step=1632400, episode=273 reward=0.7578738 (458.24 it/sec) -training >> step=1632500, episode=273 reward=0.7673994 (474.35 it/sec) -training >> step=1632600, episode=273 reward=0.7610945 (451.69 it/sec) -training >> step=1632700, episode=273 reward=0.7669853 (415.67 it/sec) -training >> step=1632800, episode=273 reward=0.758523 (467.93 it/sec) -training >> step=1632900, episode=273 reward=0.740384 (480.39 it/sec) -training >> step=1633000, episode=273 reward=0.7655996 (460.57 it/sec) -training >> step=1633100, episode=273 reward=0.763523 (420.27 it/sec) -training >> step=1633200, episode=273 reward=0.7466605 (438.37 it/sec) -training >> step=1633300, episode=273 reward=0.7501655 (438.82 it/sec) -training >> step=1633400, episode=273 reward=0.7685866 (480.27 it/sec) -training >> step=1633500, episode=273 reward=0.7611918 (475.09 it/sec) -training >> step=1633600, episode=273 reward=0.7688356 (450.17 it/sec) -training >> step=1633700, episode=273 reward=0.7539182 (454.35 it/sec) -training >> step=1633800, episode=273 reward=0.7633024 (425.46 it/sec) -training >> step=1633900, episode=273 reward=0.7763817 (420.98 it/sec) -training >> step=1634000, episode=273 reward=0.7737284 (436.58 it/sec) -training >> step=1634100, episode=273 reward=0.7665371 (453.30 it/sec) -training >> step=1634200, episode=273 reward=0.7518027 (477.84 it/sec) -training >> step=1634300, episode=273 reward=0.7439277 (471.52 it/sec) -training >> step=1634400, episode=273 reward=0.7602094 (499.65 it/sec) -training >> step=1634500, episode=273 reward=0.7760445 (488.72 it/sec) -training >> step=1634600, episode=273 reward=0.7506375 (489.08 it/sec) -training >> step=1634700, episode=273 reward=0.7673158 (497.21 it/sec) -training >> step=1634800, episode=273 reward=0.7685142 (506.08 it/sec) -training >> step=1634900, episode=273 reward=0.7717931 (517.28 it/sec) -training >> step=1635000, episode=273 reward=0.7500572 (484.28 it/sec) -training >> step=1635100, episode=273 reward=0.7479704 (468.31 it/sec) -training >> step=1635200, episode=273 reward=0.7574585 (501.98 it/sec) -training >> step=1635300, episode=273 reward=0.7687556 (483.95 it/sec) -training >> step=1635400, episode=273 reward=0.7651767 (503.58 it/sec) -training >> step=1635500, episode=273 reward=0.760496 (482.33 it/sec) -training >> step=1635600, episode=273 reward=0.7673308 (439.22 it/sec) -training >> step=1635700, episode=273 reward=0.7670601 (505.27 it/sec) -training >> step=1635800, episode=273 reward=0.7593919 (471.60 it/sec) -training >> step=1635900, episode=273 reward=0.7533255 (511.76 it/sec) -training >> step=1636000, episode=273 reward=0.7701354 (538.51 it/sec) -training >> step=1636100, episode=273 reward=0.7461057 (478.22 it/sec) -training >> step=1636200, episode=273 reward=0.7732237 (503.56 it/sec) -training >> step=1636300, episode=273 reward=0.7550829 (522.95 it/sec) -training >> step=1636400, episode=273 reward=0.7610876 (506.87 it/sec) -training >> step=1636500, episode=273 reward=0.7577912 (508.06 it/sec) -training >> step=1636600, episode=273 reward=0.7436329 (482.56 it/sec) -training >> step=1636700, episode=273 reward=0.7674698 (439.10 it/sec) -training >> step=1636800, episode=273 reward=0.7620679 (517.28 it/sec) -training >> step=1636900, episode=273 reward=0.7732675 (509.28 it/sec) -training >> step=1637000, episode=273 reward=0.7653095 (503.10 it/sec) -training >> step=1637100, episode=273 reward=0.75433 (460.23 it/sec) -training >> step=1637200, episode=273 reward=0.7833229 (553.61 it/sec) -training >> step=1637300, episode=274 reward=0.7401929 (137.09 it/sec) -training >> step=1637400, episode=274 reward=0.7628959 (492.48 it/sec) -training >> step=1637500, episode=274 reward=0.7441788 (506.81 it/sec) -training >> step=1637600, episode=274 reward=0.7714432 (500.96 it/sec) -training >> step=1637700, episode=274 reward=0.7747403 (490.21 it/sec) -training >> step=1637800, episode=274 reward=0.7413399 (512.77 it/sec) -training >> step=1637900, episode=274 reward=0.7456505 (472.20 it/sec) -training >> step=1638000, episode=274 reward=0.7533012 (515.34 it/sec) -training >> step=1638100, episode=274 reward=0.7596733 (503.39 it/sec) -training >> step=1638200, episode=274 reward=0.7801572 (489.63 it/sec) -training >> step=1638300, episode=274 reward=0.7587624 (518.42 it/sec) -training >> step=1638400, episode=274 reward=0.7621789 (508.96 it/sec) -training >> step=1638500, episode=274 reward=0.7585239 (527.57 it/sec) -training >> step=1638600, episode=274 reward=0.7686968 (506.64 it/sec) -training >> step=1638700, episode=274 reward=0.7705425 (494.97 it/sec) -training >> step=1638800, episode=274 reward=0.7774741 (512.26 it/sec) -training >> step=1638900, episode=274 reward=0.7599117 (457.25 it/sec) -training >> step=1639000, episode=274 reward=0.7862487 (515.57 it/sec) -training >> step=1639100, episode=274 reward=0.7579182 (503.63 it/sec) -training >> step=1639200, episode=274 reward=0.7662641 (533.94 it/sec) -training >> step=1639300, episode=274 reward=0.7435358 (514.23 it/sec) -training >> step=1639400, episode=274 reward=0.7618598 (481.16 it/sec) -training >> step=1639500, episode=274 reward=0.7687082 (550.41 it/sec) -training >> step=1639600, episode=274 reward=0.7813369 (485.81 it/sec) -training >> step=1639700, episode=274 reward=0.7715213 (491.04 it/sec) -training >> step=1639800, episode=274 reward=0.767768 (524.76 it/sec) -training >> step=1639900, episode=274 reward=0.7768754 (498.72 it/sec) -training >> step=1640000, episode=274 reward=0.7685363 (477.76 it/sec) -training >> step=1640100, episode=274 reward=0.779606 (482.07 it/sec) -training >> step=1640200, episode=274 reward=0.7656572 (514.21 it/sec) -training >> step=1640300, episode=274 reward=0.7755104 (477.68 it/sec) -training >> step=1640400, episode=274 reward=0.7652893 (466.44 it/sec) -training >> step=1640500, episode=274 reward=0.7380589 (508.07 it/sec) -training >> step=1640600, episode=274 reward=0.7591749 (520.19 it/sec) -training >> step=1640700, episode=274 reward=0.7513383 (517.43 it/sec) -training >> step=1640800, episode=274 reward=0.7435835 (428.85 it/sec) -training >> step=1640900, episode=274 reward=0.7719914 (444.51 it/sec) -training >> step=1641000, episode=274 reward=0.772889 (480.26 it/sec) -training >> step=1641100, episode=274 reward=0.7475309 (506.02 it/sec) -training >> step=1641200, episode=274 reward=0.7698779 (518.70 it/sec) -training >> step=1641300, episode=274 reward=0.7458166 (510.56 it/sec) -training >> step=1641400, episode=274 reward=0.7857832 (475.32 it/sec) -training >> step=1641500, episode=274 reward=0.7424987 (490.01 it/sec) -training >> step=1641600, episode=274 reward=0.7724005 (509.63 it/sec) -training >> step=1641700, episode=274 reward=0.783454 (517.01 it/sec) -training >> step=1641800, episode=274 reward=0.758095 (544.04 it/sec) -training >> step=1641900, episode=274 reward=0.7788211 (456.85 it/sec) -training >> step=1642000, episode=274 reward=0.7595435 (405.12 it/sec) -training >> step=1642100, episode=274 reward=0.7502119 (464.60 it/sec) -training >> step=1642200, episode=274 reward=0.7782151 (508.82 it/sec) -training >> step=1642300, episode=274 reward=0.7513234 (519.62 it/sec) -training >> step=1642400, episode=274 reward=0.7880661 (467.77 it/sec) -training >> step=1642500, episode=274 reward=0.7554377 (488.26 it/sec) -training >> step=1642600, episode=274 reward=0.7557482 (481.32 it/sec) -training >> step=1642700, episode=274 reward=0.7493149 (461.33 it/sec) -training >> step=1642800, episode=274 reward=0.7422853 (342.08 it/sec) -training >> step=1642900, episode=274 reward=0.7537463 (406.67 it/sec) -training >> step=1643000, episode=274 reward=0.7500175 (476.28 it/sec) -training >> step=1643100, episode=274 reward=0.7721931 (479.42 it/sec) -training >> step=1643200, episode=274 reward=0.7543959 (513.57 it/sec) -training >> step=1643300, episode=275 reward=0.7626125 (134.85 it/sec) -training >> step=1643400, episode=275 reward=0.7445447 (487.80 it/sec) -training >> step=1643500, episode=275 reward=0.7501225 (476.74 it/sec) -training >> step=1643600, episode=275 reward=0.7626007 (474.37 it/sec) -training >> step=1643700, episode=275 reward=0.7623502 (475.14 it/sec) -training >> step=1643800, episode=275 reward=0.7780507 (488.72 it/sec) -training >> step=1643900, episode=275 reward=0.786886 (501.44 it/sec) -training >> step=1644000, episode=275 reward=0.7444254 (427.85 it/sec) -training >> step=1644100, episode=275 reward=0.7569543 (443.27 it/sec) -training >> step=1644200, episode=275 reward=0.7542082 (510.02 it/sec) -training >> step=1644300, episode=275 reward=0.742673 (512.43 it/sec) -training >> step=1644400, episode=275 reward=0.7584169 (476.29 it/sec) -training >> step=1644500, episode=275 reward=0.7515337 (443.55 it/sec) -training >> step=1644600, episode=275 reward=0.746669 (433.40 it/sec) -training >> step=1644700, episode=275 reward=0.7715569 (465.76 it/sec) -training >> step=1644800, episode=275 reward=0.7559324 (505.30 it/sec) -training >> step=1644900, episode=275 reward=0.766825 (517.85 it/sec) -training >> step=1645000, episode=275 reward=0.7568394 (499.66 it/sec) -training >> step=1645100, episode=275 reward=0.7834834 (492.62 it/sec) -training >> step=1645200, episode=275 reward=0.7593288 (535.70 it/sec) -training >> step=1645300, episode=275 reward=0.7446819 (532.21 it/sec) -training >> step=1645400, episode=275 reward=0.7743294 (508.09 it/sec) -training >> step=1645500, episode=275 reward=0.7735215 (525.47 it/sec) -training >> step=1645600, episode=275 reward=0.7624338 (479.34 it/sec) -training >> step=1645700, episode=275 reward=0.7519349 (518.31 it/sec) -training >> step=1645800, episode=275 reward=0.7656854 (544.28 it/sec) -training >> step=1645900, episode=275 reward=0.7734786 (522.19 it/sec) -training >> step=1646000, episode=275 reward=0.7736858 (521.29 it/sec) -training >> step=1646100, episode=275 reward=0.7581692 (547.31 it/sec) -training >> step=1646200, episode=275 reward=0.7613727 (509.45 it/sec) -training >> step=1646300, episode=275 reward=0.7823194 (520.14 it/sec) -training >> step=1646400, episode=275 reward=0.761978 (528.94 it/sec) -training >> step=1646500, episode=275 reward=0.7848681 (538.86 it/sec) -training >> step=1646600, episode=275 reward=0.7553181 (529.77 it/sec) -training >> step=1646700, episode=275 reward=0.7588475 (507.74 it/sec) -training >> step=1646800, episode=275 reward=0.7574226 (526.36 it/sec) -training >> step=1646900, episode=275 reward=0.7757409 (528.36 it/sec) -training >> step=1647000, episode=275 reward=0.7640569 (506.77 it/sec) -training >> step=1647100, episode=275 reward=0.7718579 (525.44 it/sec) -training >> step=1647200, episode=275 reward=0.7535552 (523.97 it/sec) -training >> step=1647300, episode=275 reward=0.7600544 (532.08 it/sec) -training >> step=1647400, episode=275 reward=0.7656796 (535.61 it/sec) -training >> step=1647500, episode=275 reward=0.7641767 (481.33 it/sec) -training >> step=1647600, episode=275 reward=0.7440017 (529.48 it/sec) -training >> step=1647700, episode=275 reward=0.7643475 (528.62 it/sec) -training >> step=1647800, episode=275 reward=0.7633209 (506.49 it/sec) -training >> step=1647900, episode=275 reward=0.7561997 (512.28 it/sec) -training >> step=1648000, episode=275 reward=0.7626325 (494.11 it/sec) -training >> step=1648100, episode=275 reward=0.758419 (551.07 it/sec) -training >> step=1648200, episode=275 reward=0.7541113 (509.42 it/sec) -training >> step=1648300, episode=275 reward=0.7509162 (526.74 it/sec) -training >> step=1648400, episode=275 reward=0.7729497 (524.05 it/sec) -training >> step=1648500, episode=275 reward=0.7613673 (540.73 it/sec) -training >> step=1648600, episode=275 reward=0.7703145 (507.18 it/sec) -training >> step=1648700, episode=275 reward=0.7491591 (498.87 it/sec) -training >> step=1648800, episode=275 reward=0.7573074 (420.25 it/sec) -training >> step=1648900, episode=275 reward=0.7589692 (550.09 it/sec) -training >> step=1649000, episode=275 reward=0.7305517 (506.30 it/sec) -training >> step=1649100, episode=275 reward=0.7605991 (490.54 it/sec) -training >> step=1649200, episode=275 reward=0.758451 (518.30 it/sec) -training >> step=1649300, episode=276 reward=0.756144 (178.05 it/sec) -training >> step=1649400, episode=276 reward=0.7535971 (500.99 it/sec) -training >> step=1649500, episode=276 reward=0.7506155 (470.28 it/sec) -training >> step=1649600, episode=276 reward=0.7353733 (482.62 it/sec) -training >> step=1649700, episode=276 reward=0.7649499 (499.36 it/sec) -training >> step=1649800, episode=276 reward=0.7645849 (573.04 it/sec) -training >> step=1649900, episode=276 reward=0.7611171 (498.27 it/sec) -training >> step=1650000, episode=276 reward=0.7615604 (524.59 it/sec) -training >> step=1650100, episode=276 reward=0.7549108 (515.09 it/sec) -training >> step=1650200, episode=276 reward=0.7710104 (517.58 it/sec) -training >> step=1650300, episode=276 reward=0.7486055 (536.03 it/sec) -training >> step=1650400, episode=276 reward=0.7648607 (492.94 it/sec) -training >> step=1650500, episode=276 reward=0.7944661 (553.77 it/sec) -training >> step=1650600, episode=276 reward=0.745605 (532.57 it/sec) -training >> step=1650700, episode=276 reward=0.7406523 (515.53 it/sec) -training >> step=1650800, episode=276 reward=0.7984308 (516.81 it/sec) -training >> step=1650900, episode=276 reward=0.7653054 (507.05 it/sec) -training >> step=1651000, episode=276 reward=0.7577633 (535.94 it/sec) -training >> step=1651100, episode=276 reward=0.7699673 (516.04 it/sec) -training >> step=1651200, episode=276 reward=0.7542082 (520.93 it/sec) -training >> step=1651300, episode=276 reward=0.7633149 (536.45 it/sec) -training >> step=1651400, episode=276 reward=0.7620993 (522.88 it/sec) -training >> step=1651500, episode=276 reward=0.7588263 (519.94 it/sec) -training >> step=1651600, episode=276 reward=0.7660868 (534.77 it/sec) -training >> step=1651700, episode=276 reward=0.752435 (542.38 it/sec) -training >> step=1651800, episode=276 reward=0.7634827 (500.53 it/sec) -training >> step=1651900, episode=276 reward=0.757674 (534.83 it/sec) -training >> step=1652000, episode=276 reward=0.7705842 (543.58 it/sec) -training >> step=1652100, episode=276 reward=0.7760964 (556.94 it/sec) -training >> step=1652200, episode=276 reward=0.7568956 (521.75 it/sec) -training >> step=1652300, episode=276 reward=0.7801163 (535.40 it/sec) -training >> step=1652400, episode=276 reward=0.7765057 (534.58 it/sec) -training >> step=1652500, episode=276 reward=0.7585446 (540.11 it/sec) -training >> step=1652600, episode=276 reward=0.7726274 (515.10 it/sec) -training >> step=1652700, episode=276 reward=0.7711034 (532.09 it/sec) -training >> step=1652800, episode=276 reward=0.7837288 (513.94 it/sec) -training >> step=1652900, episode=276 reward=0.7769466 (531.13 it/sec) -training >> step=1653000, episode=276 reward=0.761755 (532.81 it/sec) -training >> step=1653100, episode=276 reward=0.7509617 (521.56 it/sec) -training >> step=1653200, episode=276 reward=0.7592877 (497.83 it/sec) -training >> step=1653300, episode=276 reward=0.7632393 (429.45 it/sec) -training >> step=1653400, episode=276 reward=0.7806064 (448.36 it/sec) -training >> step=1653500, episode=276 reward=0.733409 (463.22 it/sec) -training >> step=1653600, episode=276 reward=0.7689517 (445.70 it/sec) -training >> step=1653700, episode=276 reward=0.7606749 (567.51 it/sec) -training >> step=1653800, episode=276 reward=0.7583723 (502.64 it/sec) -training >> step=1653900, episode=276 reward=0.7502843 (504.20 it/sec) -training >> step=1654000, episode=276 reward=0.7480949 (506.51 it/sec) -training >> step=1654100, episode=276 reward=0.7750264 (506.94 it/sec) -training >> step=1654200, episode=276 reward=0.7408797 (541.33 it/sec) -training >> step=1654300, episode=276 reward=0.7490493 (426.51 it/sec) -training >> step=1654400, episode=276 reward=0.7628685 (437.24 it/sec) -training >> step=1654500, episode=276 reward=0.7569957 (506.45 it/sec) -training >> step=1654600, episode=276 reward=0.7629719 (463.84 it/sec) -training >> step=1654700, episode=276 reward=0.7420622 (474.10 it/sec) -training >> step=1654800, episode=276 reward=0.7562156 (397.86 it/sec) -training >> step=1654900, episode=276 reward=0.7570215 (541.81 it/sec) -training >> step=1655000, episode=276 reward=0.7503242 (513.14 it/sec) -training >> step=1655100, episode=276 reward=0.7493404 (520.64 it/sec) -training >> step=1655200, episode=276 reward=0.7713686 (535.68 it/sec) -training >> step=1655300, episode=277 reward=0.7538424 (155.71 it/sec) -training >> step=1655400, episode=277 reward=0.759676 (500.16 it/sec) -training >> step=1655500, episode=277 reward=0.7814631 (527.46 it/sec) -training >> step=1655600, episode=277 reward=0.7636977 (513.09 it/sec) -training >> step=1655700, episode=277 reward=0.7644201 (487.72 it/sec) -training >> step=1655800, episode=277 reward=0.7570588 (571.03 it/sec) -training >> step=1655900, episode=277 reward=0.7680001 (466.68 it/sec) -training >> step=1656000, episode=277 reward=0.7462677 (522.49 it/sec) -training >> step=1656100, episode=277 reward=0.7647907 (517.61 it/sec) -training >> step=1656200, episode=277 reward=0.7703811 (528.81 it/sec) -training >> step=1656300, episode=277 reward=0.7671803 (526.80 it/sec) -training >> step=1656400, episode=277 reward=0.7793993 (496.63 it/sec) -training >> step=1656500, episode=277 reward=0.7610791 (517.35 it/sec) -training >> step=1656600, episode=277 reward=0.769777 (492.23 it/sec) -training >> step=1656700, episode=277 reward=0.7803963 (507.92 it/sec) -training >> step=1656800, episode=277 reward=0.7602744 (463.59 it/sec) -training >> step=1656900, episode=277 reward=0.7575926 (521.35 it/sec) -training >> step=1657000, episode=277 reward=0.7800679 (491.76 it/sec) -training >> step=1657100, episode=277 reward=0.7650871 (484.41 it/sec) -training >> step=1657200, episode=277 reward=0.7530009 (501.13 it/sec) -training >> step=1657300, episode=277 reward=0.7584984 (496.27 it/sec) -training >> step=1657400, episode=277 reward=0.7612926 (506.22 it/sec) -training >> step=1657500, episode=277 reward=0.7802413 (497.42 it/sec) -training >> step=1657600, episode=277 reward=0.7450827 (476.07 it/sec) -training >> step=1657700, episode=277 reward=0.765261 (540.89 it/sec) -training >> step=1657800, episode=277 reward=0.7666085 (486.24 it/sec) -training >> step=1657900, episode=277 reward=0.7616823 (512.24 it/sec) -training >> step=1658000, episode=277 reward=0.767661 (489.82 it/sec) -training >> step=1658100, episode=277 reward=0.7661744 (510.51 it/sec) -training >> step=1658200, episode=277 reward=0.7466822 (531.85 it/sec) -training >> step=1658300, episode=277 reward=0.739023 (514.16 it/sec) -training >> step=1658400, episode=277 reward=0.7512201 (487.19 it/sec) -training >> step=1658500, episode=277 reward=0.7665177 (523.00 it/sec) -training >> step=1658600, episode=277 reward=0.7671568 (532.83 it/sec) -training >> step=1658700, episode=277 reward=0.7673916 (488.65 it/sec) -training >> step=1658800, episode=277 reward=0.7764544 (523.11 it/sec) -training >> step=1658900, episode=277 reward=0.7611055 (519.39 it/sec) -training >> step=1659000, episode=277 reward=0.7536142 (513.29 it/sec) -training >> step=1659100, episode=277 reward=0.7571486 (492.18 it/sec) -training >> step=1659200, episode=277 reward=0.7622657 (523.37 it/sec) -training >> step=1659300, episode=277 reward=0.762569 (562.95 it/sec) -training >> step=1659400, episode=277 reward=0.7851735 (478.03 it/sec) -training >> step=1659500, episode=277 reward=0.7751418 (464.82 it/sec) -training >> step=1659600, episode=277 reward=0.7738197 (495.90 it/sec) -training >> step=1659700, episode=277 reward=0.7416292 (509.93 it/sec) -training >> step=1659800, episode=277 reward=0.7663192 (523.71 it/sec) -training >> step=1659900, episode=277 reward=0.7409938 (512.92 it/sec) -training >> step=1660000, episode=277 reward=0.7656121 (516.35 it/sec) -training >> step=1660100, episode=277 reward=0.7616686 (518.01 it/sec) -training >> step=1660200, episode=277 reward=0.7728081 (530.32 it/sec) -training >> step=1660300, episode=277 reward=0.7388539 (510.08 it/sec) -training >> step=1660400, episode=277 reward=0.7664251 (525.96 it/sec) -training >> step=1660500, episode=277 reward=0.7692757 (500.71 it/sec) -training >> step=1660600, episode=277 reward=0.7487373 (511.18 it/sec) -training >> step=1660700, episode=277 reward=0.7587186 (515.64 it/sec) -training >> step=1660800, episode=277 reward=0.7477515 (534.16 it/sec) -training >> step=1660900, episode=277 reward=0.7621732 (376.97 it/sec) -training >> step=1661000, episode=277 reward=0.764945 (526.86 it/sec) -training >> step=1661100, episode=277 reward=0.7592766 (530.43 it/sec) -training >> step=1661200, episode=277 reward=0.7342088 (500.72 it/sec) -training >> step=1661300, episode=278 reward=0.7425334 (174.96 it/sec) -training >> step=1661400, episode=278 reward=0.7613808 (500.42 it/sec) -training >> step=1661500, episode=278 reward=0.7420028 (489.62 it/sec) -training >> step=1661600, episode=278 reward=0.7615114 (462.53 it/sec) -training >> step=1661700, episode=278 reward=0.7668928 (553.73 it/sec) -training >> step=1661800, episode=278 reward=0.7475209 (490.84 it/sec) -training >> step=1661900, episode=278 reward=0.7496549 (510.51 it/sec) -training >> step=1662000, episode=278 reward=0.7794464 (499.81 it/sec) -training >> step=1662100, episode=278 reward=0.7721167 (507.92 it/sec) -training >> step=1662200, episode=278 reward=0.7861015 (523.24 it/sec) -training >> step=1662300, episode=278 reward=0.7707911 (483.92 it/sec) -training >> step=1662400, episode=278 reward=0.7521118 (503.90 it/sec) -training >> step=1662500, episode=278 reward=0.7603923 (531.10 it/sec) -training >> step=1662600, episode=278 reward=0.7681563 (513.50 it/sec) -training >> step=1662700, episode=278 reward=0.7778722 (491.62 it/sec) -training >> step=1662800, episode=278 reward=0.775113 (490.60 it/sec) -training >> step=1662900, episode=278 reward=0.754629 (504.96 it/sec) -training >> step=1663000, episode=278 reward=0.7538848 (510.06 it/sec) -training >> step=1663100, episode=278 reward=0.7754184 (514.66 it/sec) -training >> step=1663200, episode=278 reward=0.7581311 (526.85 it/sec) -training >> step=1663300, episode=278 reward=0.7533883 (479.39 it/sec) -training >> step=1663400, episode=278 reward=0.7725562 (499.68 it/sec) -training >> step=1663500, episode=278 reward=0.7745888 (523.98 it/sec) -training >> step=1663600, episode=278 reward=0.7501115 (521.27 it/sec) -training >> step=1663700, episode=278 reward=0.7776541 (505.95 it/sec) -training >> step=1663800, episode=278 reward=0.7542086 (494.68 it/sec) -training >> step=1663900, episode=278 reward=0.7650495 (454.60 it/sec) -training >> step=1664000, episode=278 reward=0.7554342 (440.53 it/sec) -training >> step=1664100, episode=278 reward=0.7478631 (480.83 it/sec) -training >> step=1664200, episode=278 reward=0.7588202 (498.47 it/sec) -training >> step=1664300, episode=278 reward=0.7564987 (488.64 it/sec) -training >> step=1664400, episode=278 reward=0.758406 (470.71 it/sec) -training >> step=1664500, episode=278 reward=0.7765219 (510.64 it/sec) -training >> step=1664600, episode=278 reward=0.7695129 (494.11 it/sec) -training >> step=1664700, episode=278 reward=0.7645763 (516.52 it/sec) -training >> step=1664800, episode=278 reward=0.7443489 (498.93 it/sec) -training >> step=1664900, episode=278 reward=0.769455 (522.65 it/sec) -training >> step=1665000, episode=278 reward=0.7576329 (493.05 it/sec) -training >> step=1665100, episode=278 reward=0.7697803 (490.38 it/sec) -training >> step=1665200, episode=278 reward=0.7543991 (535.28 it/sec) -training >> step=1665300, episode=278 reward=0.7626993 (447.96 it/sec) -training >> step=1665400, episode=278 reward=0.7493274 (451.92 it/sec) -training >> step=1665500, episode=278 reward=0.7447445 (473.14 it/sec) -training >> step=1665600, episode=278 reward=0.753924 (446.18 it/sec) -training >> step=1665700, episode=278 reward=0.742036 (530.13 it/sec) -training >> step=1665800, episode=278 reward=0.762741 (476.15 it/sec) -training >> step=1665900, episode=278 reward=0.7750246 (469.07 it/sec) -training >> step=1666000, episode=278 reward=0.7738504 (487.25 it/sec) -training >> step=1666100, episode=278 reward=0.7684997 (482.86 it/sec) -training >> step=1666200, episode=278 reward=0.7728855 (453.18 it/sec) -training >> step=1666300, episode=278 reward=0.7531077 (490.76 it/sec) -training >> step=1666400, episode=278 reward=0.760868 (461.79 it/sec) -training >> step=1666500, episode=278 reward=0.7577307 (503.46 it/sec) -training >> step=1666600, episode=278 reward=0.7570686 (466.64 it/sec) -training >> step=1666700, episode=278 reward=0.7654962 (476.38 it/sec) -training >> step=1666800, episode=278 reward=0.7769809 (457.32 it/sec) -training >> step=1666900, episode=278 reward=0.7375779 (471.44 it/sec) -training >> step=1667000, episode=278 reward=0.778573 (455.23 it/sec) -training >> step=1667100, episode=278 reward=0.7606117 (343.08 it/sec) -training >> step=1667200, episode=278 reward=0.7666977 (442.58 it/sec) -training >> step=1667300, episode=279 reward=0.7467468 (169.29 it/sec) -training >> step=1667400, episode=279 reward=0.7778645 (440.58 it/sec) -training >> step=1667500, episode=279 reward=0.7578959 (463.36 it/sec) -training >> step=1667600, episode=279 reward=0.7631366 (451.99 it/sec) -training >> step=1667700, episode=279 reward=0.7584144 (491.41 it/sec) -training >> step=1667800, episode=279 reward=0.7778015 (466.78 it/sec) -training >> step=1667900, episode=279 reward=0.7565543 (474.78 it/sec) -training >> step=1668000, episode=279 reward=0.7804214 (415.30 it/sec) -training >> step=1668100, episode=279 reward=0.7767853 (487.85 it/sec) -training >> step=1668200, episode=279 reward=0.7357827 (504.85 it/sec) -training >> step=1668300, episode=279 reward=0.7339466 (480.05 it/sec) -training >> step=1668400, episode=279 reward=0.7644225 (492.27 it/sec) -training >> step=1668500, episode=279 reward=0.7568337 (433.09 it/sec) -training >> step=1668600, episode=279 reward=0.7369656 (480.19 it/sec) -training >> step=1668700, episode=279 reward=0.7415071 (457.65 it/sec) -training >> step=1668800, episode=279 reward=0.7743598 (458.08 it/sec) -training >> step=1668900, episode=279 reward=0.7626382 (487.13 it/sec) -training >> step=1669000, episode=279 reward=0.7531773 (457.39 it/sec) -training >> step=1669100, episode=279 reward=0.7564412 (472.33 it/sec) -training >> step=1669200, episode=279 reward=0.7869862 (455.02 it/sec) -training >> step=1669300, episode=279 reward=0.7676989 (466.44 it/sec) -training >> step=1669400, episode=279 reward=0.7748392 (491.42 it/sec) -training >> step=1669500, episode=279 reward=0.7565564 (487.77 it/sec) -training >> step=1669600, episode=279 reward=0.7825742 (489.76 it/sec) -training >> step=1669700, episode=279 reward=0.7693542 (477.72 it/sec) -training >> step=1669800, episode=279 reward=0.7648798 (458.94 it/sec) -training >> step=1669900, episode=279 reward=0.7695239 (411.82 it/sec) -training >> step=1670000, episode=279 reward=0.7767624 (500.62 it/sec) -training >> step=1670100, episode=279 reward=0.7714821 (511.34 it/sec) -training >> step=1670200, episode=279 reward=0.755078 (379.24 it/sec) -training >> step=1670300, episode=279 reward=0.7431664 (442.00 it/sec) -training >> step=1670400, episode=279 reward=0.7660344 (447.71 it/sec) -training >> step=1670500, episode=279 reward=0.775665 (479.93 it/sec) -training >> step=1670600, episode=279 reward=0.7629426 (456.20 it/sec) -training >> step=1670700, episode=279 reward=0.770974 (471.51 it/sec) -training >> step=1670800, episode=279 reward=0.7576904 (472.28 it/sec) -training >> step=1670900, episode=279 reward=0.7721564 (483.60 it/sec) -training >> step=1671000, episode=279 reward=0.7641729 (452.31 it/sec) -training >> step=1671100, episode=279 reward=0.7754015 (509.55 it/sec) -training >> step=1671200, episode=279 reward=0.7721275 (471.70 it/sec) -training >> step=1671300, episode=279 reward=0.7798668 (436.30 it/sec) -training >> step=1671400, episode=279 reward=0.7461211 (478.27 it/sec) -training >> step=1671500, episode=279 reward=0.7539421 (461.97 it/sec) -training >> step=1671600, episode=279 reward=0.780332 (503.66 it/sec) -training >> step=1671700, episode=279 reward=0.7602911 (464.57 it/sec) -training >> step=1671800, episode=279 reward=0.7524793 (442.74 it/sec) -training >> step=1671900, episode=279 reward=0.7448035 (482.44 it/sec) -training >> step=1672000, episode=279 reward=0.7701737 (507.86 it/sec) -training >> step=1672100, episode=279 reward=0.7512214 (452.66 it/sec) -training >> step=1672200, episode=279 reward=0.776572 (453.09 it/sec) -training >> step=1672300, episode=279 reward=0.7568411 (467.26 it/sec) -training >> step=1672400, episode=279 reward=0.752907 (503.44 it/sec) -training >> step=1672500, episode=279 reward=0.7584209 (484.67 it/sec) -training >> step=1672600, episode=279 reward=0.7746696 (473.92 it/sec) -training >> step=1672700, episode=279 reward=0.7490552 (450.99 it/sec) -training >> step=1672800, episode=279 reward=0.7615389 (470.98 it/sec) -training >> step=1672900, episode=279 reward=0.764845 (477.96 it/sec) -training >> step=1673000, episode=279 reward=0.730965 (453.54 it/sec) -training >> step=1673100, episode=279 reward=0.7749742 (420.79 it/sec) -training >> step=1673200, episode=279 reward=0.7528929 (452.64 it/sec) -training >> step=1673300, episode=280 reward=0.7398 (126.08 it/sec) -training >> step=1673400, episode=280 reward=0.7485651 (411.69 it/sec) -training >> step=1673500, episode=280 reward=0.7581035 (445.24 it/sec) -training >> step=1673600, episode=280 reward=0.7570963 (479.79 it/sec) -training >> step=1673700, episode=280 reward=0.7835395 (441.35 it/sec) -training >> step=1673800, episode=280 reward=0.7672124 (492.20 it/sec) -training >> step=1673900, episode=280 reward=0.7540642 (446.58 it/sec) -training >> step=1674000, episode=280 reward=0.7541314 (443.95 it/sec) -training >> step=1674100, episode=280 reward=0.7781396 (496.17 it/sec) -training >> step=1674200, episode=280 reward=0.7637061 (495.06 it/sec) -training >> step=1674300, episode=280 reward=0.7602568 (461.04 it/sec) -training >> step=1674400, episode=280 reward=0.764832 (461.64 it/sec) -training >> step=1674500, episode=280 reward=0.7704728 (501.12 it/sec) -training >> step=1674600, episode=280 reward=0.7411784 (573.22 it/sec) -training >> step=1674700, episode=280 reward=0.7652074 (545.01 it/sec) -training >> step=1674800, episode=280 reward=0.7719808 (519.61 it/sec) -training >> step=1674900, episode=280 reward=0.770794 (545.76 it/sec) -training >> step=1675000, episode=280 reward=0.766573 (439.15 it/sec) -training >> step=1675100, episode=280 reward=0.7628483 (457.46 it/sec) -training >> step=1675200, episode=280 reward=0.7783023 (479.25 it/sec) -training >> step=1675300, episode=280 reward=0.7792954 (488.03 it/sec) -training >> step=1675400, episode=280 reward=0.7467597 (517.76 it/sec) -training >> step=1675500, episode=280 reward=0.7661332 (528.62 it/sec) -training >> step=1675600, episode=280 reward=0.7762046 (562.26 it/sec) -training >> step=1675700, episode=280 reward=0.7570551 (566.53 it/sec) -training >> step=1675800, episode=280 reward=0.7788364 (522.26 it/sec) -training >> step=1675900, episode=280 reward=0.7722039 (521.71 it/sec) -training >> step=1676000, episode=280 reward=0.7704069 (560.98 it/sec) -training >> step=1676100, episode=280 reward=0.7716105 (555.86 it/sec) -training >> step=1676200, episode=280 reward=0.7625767 (522.57 it/sec) -training >> step=1676300, episode=280 reward=0.7771096 (540.99 it/sec) -training >> step=1676400, episode=280 reward=0.762156 (474.91 it/sec) -training >> step=1676500, episode=280 reward=0.7783502 (550.51 it/sec) -training >> step=1676600, episode=280 reward=0.76291 (559.50 it/sec) -training >> step=1676700, episode=280 reward=0.7476489 (544.67 it/sec) -training >> step=1676800, episode=280 reward=0.758981 (501.67 it/sec) -training >> step=1676900, episode=280 reward=0.7676965 (507.65 it/sec) -training >> step=1677000, episode=280 reward=0.7758955 (535.92 it/sec) -training >> step=1677100, episode=280 reward=0.7517051 (532.34 it/sec) -training >> step=1677200, episode=280 reward=0.7604368 (542.41 it/sec) -training >> step=1677300, episode=280 reward=0.786885 (591.08 it/sec) -training >> step=1677400, episode=280 reward=0.7537215 (530.16 it/sec) -training >> step=1677500, episode=280 reward=0.7709134 (518.84 it/sec) -training >> step=1677600, episode=280 reward=0.7582971 (541.43 it/sec) -training >> step=1677700, episode=280 reward=0.7560726 (580.56 it/sec) -training >> step=1677800, episode=280 reward=0.7645391 (568.54 it/sec) -training >> step=1677900, episode=280 reward=0.7659938 (538.56 it/sec) -training >> step=1678000, episode=280 reward=0.7554626 (506.53 it/sec) -training >> step=1678100, episode=280 reward=0.7653702 (552.43 it/sec) -training >> step=1678200, episode=280 reward=0.7412418 (497.83 it/sec) -training >> step=1678300, episode=280 reward=0.7398137 (536.74 it/sec) -training >> step=1678400, episode=280 reward=0.75016 (536.61 it/sec) -training >> step=1678500, episode=280 reward=0.7450208 (520.36 it/sec) -training >> step=1678600, episode=280 reward=0.7740272 (501.32 it/sec) -training >> step=1678700, episode=280 reward=0.7666224 (534.70 it/sec) -training >> step=1678800, episode=280 reward=0.7581697 (573.27 it/sec) -training >> step=1678900, episode=280 reward=0.7518218 (563.93 it/sec) -training >> step=1679000, episode=280 reward=0.7803165 (503.81 it/sec) -training >> step=1679100, episode=280 reward=0.758101 (528.95 it/sec) -training >> step=1679200, episode=280 reward=0.7540847 (376.27 it/sec) -training >> step=1679300, episode=281 reward=0.7381226 (226.93 it/sec) -training >> step=1679400, episode=281 reward=0.7483134 (516.55 it/sec) -training >> step=1679500, episode=281 reward=0.7722508 (489.56 it/sec) -training >> step=1679600, episode=281 reward=0.7584884 (530.14 it/sec) -training >> step=1679700, episode=281 reward=0.7564361 (576.14 it/sec) -training >> step=1679800, episode=281 reward=0.7619482 (540.42 it/sec) -training >> step=1679900, episode=281 reward=0.7302484 (540.61 it/sec) -training >> step=1680000, episode=281 reward=0.7745321 (461.68 it/sec) -training >> step=1680100, episode=281 reward=0.7615628 (463.19 it/sec) -training >> step=1680200, episode=281 reward=0.779359 (484.07 it/sec) -training >> step=1680300, episode=281 reward=0.7755916 (476.69 it/sec) -training >> step=1680400, episode=281 reward=0.7820168 (529.92 it/sec) -training >> step=1680500, episode=281 reward=0.7518944 (472.87 it/sec) -training >> step=1680600, episode=281 reward=0.7652974 (490.09 it/sec) -training >> step=1680700, episode=281 reward=0.7796286 (520.79 it/sec) -training >> step=1680800, episode=281 reward=0.7629616 (524.69 it/sec) -training >> step=1680900, episode=281 reward=0.7510791 (526.76 it/sec) -training >> step=1681000, episode=281 reward=0.7526485 (479.14 it/sec) -training >> step=1681100, episode=281 reward=0.7546749 (487.75 it/sec) -training >> step=1681200, episode=281 reward=0.7364359 (495.99 it/sec) -training >> step=1681300, episode=281 reward=0.7687252 (459.66 it/sec) -training >> step=1681400, episode=281 reward=0.7731211 (499.64 it/sec) -training >> step=1681500, episode=281 reward=0.7525796 (467.89 it/sec) -training >> step=1681600, episode=281 reward=0.7405298 (448.22 it/sec) -training >> step=1681700, episode=281 reward=0.7437546 (549.71 it/sec) -training >> step=1681800, episode=281 reward=0.7592371 (491.20 it/sec) -training >> step=1681900, episode=281 reward=0.7695977 (443.82 it/sec) -training >> step=1682000, episode=281 reward=0.7614486 (450.72 it/sec) -training >> step=1682100, episode=281 reward=0.7503086 (434.20 it/sec) -training >> step=1682200, episode=281 reward=0.7841141 (540.51 it/sec) -training >> step=1682300, episode=281 reward=0.7616272 (502.62 it/sec) -training >> step=1682400, episode=281 reward=0.7732484 (507.54 it/sec) -training >> step=1682500, episode=281 reward=0.7482935 (445.89 it/sec) -training >> step=1682600, episode=281 reward=0.7592332 (510.24 it/sec) -training >> step=1682700, episode=281 reward=0.7622269 (521.58 it/sec) -training >> step=1682800, episode=281 reward=0.7771128 (527.38 it/sec) -training >> step=1682900, episode=281 reward=0.7707761 (463.96 it/sec) -training >> step=1683000, episode=281 reward=0.724007 (513.01 it/sec) -training >> step=1683100, episode=281 reward=0.7380767 (501.21 it/sec) -training >> step=1683200, episode=281 reward=0.7667727 (437.69 it/sec) -training >> step=1683300, episode=281 reward=0.772316 (451.08 it/sec) -training >> step=1683400, episode=281 reward=0.7651436 (501.25 it/sec) -training >> step=1683500, episode=281 reward=0.7764824 (477.26 it/sec) -training >> step=1683600, episode=281 reward=0.7328982 (503.94 it/sec) -training >> step=1683700, episode=281 reward=0.7638931 (509.24 it/sec) -training >> step=1683800, episode=281 reward=0.7672066 (499.60 it/sec) -training >> step=1683900, episode=281 reward=0.7579987 (480.73 it/sec) -training >> step=1684000, episode=281 reward=0.7634119 (450.82 it/sec) -training >> step=1684100, episode=281 reward=0.7444005 (422.11 it/sec) -training >> step=1684200, episode=281 reward=0.7605402 (472.36 it/sec) -training >> step=1684300, episode=281 reward=0.7548335 (476.13 it/sec) -training >> step=1684400, episode=281 reward=0.7495963 (470.95 it/sec) -training >> step=1684500, episode=281 reward=0.7714186 (471.25 it/sec) -training >> step=1684600, episode=281 reward=0.7582205 (569.48 it/sec) -training >> step=1684700, episode=281 reward=0.7581796 (494.42 it/sec) -training >> step=1684800, episode=281 reward=0.7518228 (466.40 it/sec) -training >> step=1684900, episode=281 reward=0.757602 (512.63 it/sec) -training >> step=1685000, episode=281 reward=0.7539347 (460.31 it/sec) -training >> step=1685100, episode=281 reward=0.7589172 (291.10 it/sec) -training >> step=1685200, episode=281 reward=0.7893847 (424.07 it/sec) -training >> step=1685300, episode=282 reward=0.7708675 (220.33 it/sec) -training >> step=1685400, episode=282 reward=0.7385677 (456.96 it/sec) -training >> step=1685500, episode=282 reward=0.7438486 (452.69 it/sec) -training >> step=1685600, episode=282 reward=0.7431753 (472.90 it/sec) -training >> step=1685700, episode=282 reward=0.7289478 (443.96 it/sec) -training >> step=1685800, episode=282 reward=0.7672338 (433.92 it/sec) -training >> step=1685900, episode=282 reward=0.7404377 (461.06 it/sec) -training >> step=1686000, episode=282 reward=0.7586684 (392.98 it/sec) -training >> step=1686100, episode=282 reward=0.7620878 (430.51 it/sec) -training >> step=1686200, episode=282 reward=0.7775202 (440.71 it/sec) -training >> step=1686300, episode=282 reward=0.7734089 (448.70 it/sec) -training >> step=1686400, episode=282 reward=0.7641872 (435.72 it/sec) -training >> step=1686500, episode=282 reward=0.7546093 (445.21 it/sec) -training >> step=1686600, episode=282 reward=0.7560928 (444.24 it/sec) -training >> step=1686700, episode=282 reward=0.7814105 (442.27 it/sec) -training >> step=1686800, episode=282 reward=0.7719072 (435.69 it/sec) -training >> step=1686900, episode=282 reward=0.7510531 (452.38 it/sec) -training >> step=1687000, episode=282 reward=0.7671956 (424.45 it/sec) -training >> step=1687100, episode=282 reward=0.7608325 (495.46 it/sec) -training >> step=1687200, episode=282 reward=0.75329 (429.01 it/sec) -training >> step=1687300, episode=282 reward=0.7785544 (475.51 it/sec) -training >> step=1687400, episode=282 reward=0.7628672 (454.68 it/sec) -training >> step=1687500, episode=282 reward=0.7725141 (449.08 it/sec) -training >> step=1687600, episode=282 reward=0.7684906 (426.19 it/sec) -training >> step=1687700, episode=282 reward=0.7920037 (438.36 it/sec) -training >> step=1687800, episode=282 reward=0.7522587 (450.00 it/sec) -training >> step=1687900, episode=282 reward=0.75121 (372.90 it/sec) -training >> step=1688000, episode=282 reward=0.7566774 (397.37 it/sec) -training >> step=1688100, episode=282 reward=0.7667804 (431.28 it/sec) -training >> step=1688200, episode=282 reward=0.7423192 (487.07 it/sec) -training >> step=1688300, episode=282 reward=0.7561368 (453.04 it/sec) -training >> step=1688400, episode=282 reward=0.7488619 (415.10 it/sec) -training >> step=1688500, episode=282 reward=0.7731775 (436.46 it/sec) -training >> step=1688600, episode=282 reward=0.7610899 (443.21 it/sec) -training >> step=1688700, episode=282 reward=0.7475122 (404.89 it/sec) -training >> step=1688800, episode=282 reward=0.7729834 (433.38 it/sec) -training >> step=1688900, episode=282 reward=0.7590598 (399.80 it/sec) -training >> step=1689000, episode=282 reward=0.7640269 (407.06 it/sec) -training >> step=1689100, episode=282 reward=0.7480265 (448.49 it/sec) -training >> step=1689200, episode=282 reward=0.7644597 (438.75 it/sec) -training >> step=1689300, episode=282 reward=0.7443382 (468.84 it/sec) -training >> step=1689400, episode=282 reward=0.7621582 (455.47 it/sec) -training >> step=1689500, episode=282 reward=0.7600321 (467.35 it/sec) -training >> step=1689600, episode=282 reward=0.7530472 (422.24 it/sec) -training >> step=1689700, episode=282 reward=0.7687863 (448.77 it/sec) -training >> step=1689800, episode=282 reward=0.7690725 (459.91 it/sec) -training >> step=1689900, episode=282 reward=0.7632113 (431.98 it/sec) -training >> step=1690000, episode=282 reward=0.7576511 (447.58 it/sec) -training >> step=1690100, episode=282 reward=0.7637745 (431.10 it/sec) -training >> step=1690200, episode=282 reward=0.7587786 (439.47 it/sec) -training >> step=1690300, episode=282 reward=0.7412059 (474.08 it/sec) -training >> step=1690400, episode=282 reward=0.7550496 (472.16 it/sec) -training >> step=1690500, episode=282 reward=0.7536952 (449.99 it/sec) -training >> step=1690600, episode=282 reward=0.7481781 (460.70 it/sec) -training >> step=1690700, episode=282 reward=0.7437258 (445.05 it/sec) -training >> step=1690800, episode=282 reward=0.7555839 (436.38 it/sec) -training >> step=1690900, episode=282 reward=0.7300168 (464.72 it/sec) -training >> step=1691000, episode=282 reward=0.7434684 (462.70 it/sec) -training >> step=1691100, episode=282 reward=0.7592811 (468.51 it/sec) -training >> step=1691200, episode=282 reward=0.751951 (433.04 it/sec) -training >> step=1691300, episode=283 reward=0.7855414 (153.69 it/sec) -training >> step=1691400, episode=283 reward=0.7566952 (484.31 it/sec) -training >> step=1691500, episode=283 reward=0.7373673 (444.21 it/sec) -training >> step=1691600, episode=283 reward=0.7643822 (449.68 it/sec) -training >> step=1691700, episode=283 reward=0.75552 (484.36 it/sec) -training >> step=1691800, episode=283 reward=0.7422197 (499.52 it/sec) -training >> step=1691900, episode=283 reward=0.7686535 (444.46 it/sec) -training >> step=1692000, episode=283 reward=0.744037 (413.68 it/sec) -training >> step=1692100, episode=283 reward=0.7664722 (428.30 it/sec) -training >> step=1692200, episode=283 reward=0.7914404 (473.04 it/sec) -training >> step=1692300, episode=283 reward=0.7553937 (459.95 it/sec) -training >> step=1692400, episode=283 reward=0.7716422 (442.74 it/sec) -training >> step=1692500, episode=283 reward=0.7511869 (438.88 it/sec) -training >> step=1692600, episode=283 reward=0.7822241 (432.86 it/sec) -training >> step=1692700, episode=283 reward=0.7638122 (465.01 it/sec) -training >> step=1692800, episode=283 reward=0.7389597 (467.66 it/sec) -training >> step=1692900, episode=283 reward=0.7550774 (465.07 it/sec) -training >> step=1693000, episode=283 reward=0.7533792 (433.05 it/sec) -training >> step=1693100, episode=283 reward=0.7704716 (476.91 it/sec) -training >> step=1693200, episode=283 reward=0.7787642 (435.91 it/sec) -training >> step=1693300, episode=283 reward=0.7440928 (435.60 it/sec) -training >> step=1693400, episode=283 reward=0.7741229 (479.84 it/sec) -training >> step=1693500, episode=283 reward=0.7670547 (490.05 it/sec) -training >> step=1693600, episode=283 reward=0.7656876 (481.15 it/sec) -training >> step=1693700, episode=283 reward=0.7534619 (441.43 it/sec) -training >> step=1693800, episode=283 reward=0.763203 (460.83 it/sec) -training >> step=1693900, episode=283 reward=0.7780003 (453.55 it/sec) -training >> step=1694000, episode=283 reward=0.7287609 (460.81 it/sec) -training >> step=1694100, episode=283 reward=0.7548399 (426.29 it/sec) -training >> step=1694200, episode=283 reward=0.7709633 (434.86 it/sec) -training >> step=1694300, episode=283 reward=0.756934 (452.78 it/sec) -training >> step=1694400, episode=283 reward=0.7815171 (458.52 it/sec) -training >> step=1694500, episode=283 reward=0.7478219 (483.99 it/sec) -training >> step=1694600, episode=283 reward=0.7587956 (459.79 it/sec) -training >> step=1694700, episode=283 reward=0.7544588 (491.69 it/sec) -training >> step=1694800, episode=283 reward=0.7653654 (464.27 it/sec) -training >> step=1694900, episode=283 reward=0.7714316 (414.69 it/sec) -training >> step=1695000, episode=283 reward=0.7805358 (464.06 it/sec) -training >> step=1695100, episode=283 reward=0.7619966 (449.35 it/sec) -training >> step=1695200, episode=283 reward=0.7804124 (478.59 it/sec) -training >> step=1695300, episode=283 reward=0.7727333 (453.45 it/sec) -training >> step=1695400, episode=283 reward=0.7775926 (457.18 it/sec) -training >> step=1695500, episode=283 reward=0.7586226 (468.62 it/sec) -training >> step=1695600, episode=283 reward=0.7679867 (492.22 it/sec) -training >> step=1695700, episode=283 reward=0.7518799 (449.56 it/sec) -training >> step=1695800, episode=283 reward=0.7604466 (483.78 it/sec) -training >> step=1695900, episode=283 reward=0.7692572 (475.40 it/sec) -training >> step=1696000, episode=283 reward=0.7497142 (437.40 it/sec) -training >> step=1696100, episode=283 reward=0.7423808 (466.50 it/sec) -training >> step=1696200, episode=283 reward=0.7563171 (472.68 it/sec) -training >> step=1696300, episode=283 reward=0.7555647 (440.26 it/sec) -training >> step=1696400, episode=283 reward=0.7363306 (451.24 it/sec) -training >> step=1696500, episode=283 reward=0.7753991 (444.52 it/sec) -training >> step=1696600, episode=283 reward=0.7567953 (438.75 it/sec) -training >> step=1696700, episode=283 reward=0.7496082 (473.85 it/sec) -training >> step=1696800, episode=283 reward=0.7638023 (440.79 it/sec) -training >> step=1696900, episode=283 reward=0.7725131 (455.31 it/sec) -training >> step=1697000, episode=283 reward=0.7596798 (462.38 it/sec) -training >> step=1697100, episode=283 reward=0.7466416 (465.92 it/sec) -training >> step=1697200, episode=283 reward=0.7395288 (454.26 it/sec) -training >> step=1697300, episode=284 reward=0.768496 (142.02 it/sec) -training >> step=1697400, episode=284 reward=0.769954 (461.51 it/sec) -training >> step=1697500, episode=284 reward=0.7375374 (496.59 it/sec) -training >> step=1697600, episode=284 reward=0.7532656 (465.77 it/sec) -training >> step=1697700, episode=284 reward=0.7627234 (455.64 it/sec) -training >> step=1697800, episode=284 reward=0.7622955 (425.02 it/sec) -training >> step=1697900, episode=284 reward=0.7544973 (425.25 it/sec) -training >> step=1698000, episode=284 reward=0.7661502 (394.07 it/sec) -training >> step=1698100, episode=284 reward=0.7772283 (462.52 it/sec) -training >> step=1698200, episode=284 reward=0.7689541 (467.58 it/sec) -training >> step=1698300, episode=284 reward=0.7612122 (477.41 it/sec) -training >> step=1698400, episode=284 reward=0.755167 (472.32 it/sec) -training >> step=1698500, episode=284 reward=0.7815678 (442.77 it/sec) -training >> step=1698600, episode=284 reward=0.7666391 (441.53 it/sec) -training >> step=1698700, episode=284 reward=0.7672095 (486.88 it/sec) -training >> step=1698800, episode=284 reward=0.7596169 (414.51 it/sec) -training >> step=1698900, episode=284 reward=0.7507834 (453.50 it/sec) -training >> step=1699000, episode=284 reward=0.7378941 (453.02 it/sec) -training >> step=1699100, episode=284 reward=0.7624751 (468.01 it/sec) -training >> step=1699200, episode=284 reward=0.779301 (486.02 it/sec) -training >> step=1699300, episode=284 reward=0.7587895 (457.67 it/sec) -training >> step=1699400, episode=284 reward=0.7650716 (427.91 it/sec) -training >> step=1699500, episode=284 reward=0.7636158 (420.60 it/sec) -training >> step=1699600, episode=284 reward=0.7715879 (488.65 it/sec) -training >> step=1699700, episode=284 reward=0.7458886 (461.53 it/sec) -training >> step=1699800, episode=284 reward=0.7418171 (429.56 it/sec) -training >> step=1699900, episode=284 reward=0.7704573 (463.81 it/sec) -training >> step=1700000, episode=284 reward=0.7637696 (455.40 it/sec) -training >> step=1700100, episode=284 reward=0.775051 (450.85 it/sec) -training >> step=1700200, episode=284 reward=0.7622885 (460.20 it/sec) -training >> step=1700300, episode=284 reward=0.7883202 (468.26 it/sec) -training >> step=1700400, episode=284 reward=0.7557763 (433.14 it/sec) -training >> step=1700500, episode=284 reward=0.7534782 (458.59 it/sec) -training >> step=1700600, episode=284 reward=0.7495606 (447.31 it/sec) -training >> step=1700700, episode=284 reward=0.7845961 (473.96 it/sec) -training >> step=1700800, episode=284 reward=0.7778561 (467.44 it/sec) -training >> step=1700900, episode=284 reward=0.7583742 (435.48 it/sec) -training >> step=1701000, episode=284 reward=0.7742604 (469.51 it/sec) -training >> step=1701100, episode=284 reward=0.7444423 (522.43 it/sec) -training >> step=1701200, episode=284 reward=0.741733 (501.20 it/sec) -training >> step=1701300, episode=284 reward=0.7534443 (492.37 it/sec) -training >> step=1701400, episode=284 reward=0.7683418 (538.61 it/sec) -training >> step=1701500, episode=284 reward=0.7785389 (578.56 it/sec) -training >> step=1701600, episode=284 reward=0.777634 (487.59 it/sec) -training >> step=1701700, episode=284 reward=0.7526507 (504.52 it/sec) -training >> step=1701800, episode=284 reward=0.7732571 (500.68 it/sec) -training >> step=1701900, episode=284 reward=0.7534058 (544.25 it/sec) -training >> step=1702000, episode=284 reward=0.7625836 (537.75 it/sec) -training >> step=1702100, episode=284 reward=0.7667736 (510.39 it/sec) -training >> step=1702200, episode=284 reward=0.7438811 (483.02 it/sec) -training >> step=1702300, episode=284 reward=0.7615407 (471.38 it/sec) -training >> step=1702400, episode=284 reward=0.7434078 (509.60 it/sec) -training >> step=1702500, episode=284 reward=0.7326205 (536.14 it/sec) -training >> step=1702600, episode=284 reward=0.7504845 (524.34 it/sec) -training >> step=1702700, episode=284 reward=0.7284669 (525.54 it/sec) -training >> step=1702800, episode=284 reward=0.7552507 (479.29 it/sec) -training >> step=1702900, episode=284 reward=0.7614415 (520.12 it/sec) -training >> step=1703000, episode=284 reward=0.7474853 (541.81 it/sec) -training >> step=1703100, episode=284 reward=0.7551178 (569.68 it/sec) -training >> step=1703200, episode=284 reward=0.7480589 (526.61 it/sec) -training >> step=1703300, episode=285 reward=0.7652934 (164.47 it/sec) -training >> step=1703400, episode=285 reward=0.753479 (467.44 it/sec) -training >> step=1703500, episode=285 reward=0.7351499 (487.53 it/sec) -training >> step=1703600, episode=285 reward=0.7703563 (444.28 it/sec) -training >> step=1703700, episode=285 reward=0.7604595 (455.96 it/sec) -training >> step=1703800, episode=285 reward=0.7705419 (456.78 it/sec) -training >> step=1703900, episode=285 reward=0.75041 (487.87 it/sec) -training >> step=1704000, episode=285 reward=0.7808596 (426.48 it/sec) -training >> step=1704100, episode=285 reward=0.7768823 (434.04 it/sec) -training >> step=1704200, episode=285 reward=0.7640461 (469.18 it/sec) -training >> step=1704300, episode=285 reward=0.7662326 (500.60 it/sec) -training >> step=1704400, episode=285 reward=0.7818742 (460.84 it/sec) -training >> step=1704500, episode=285 reward=0.7602224 (441.62 it/sec) -training >> step=1704600, episode=285 reward=0.7544035 (442.73 it/sec) -training >> step=1704700, episode=285 reward=0.751559 (451.40 it/sec) -training >> step=1704800, episode=285 reward=0.7654098 (486.56 it/sec) -training >> step=1704900, episode=285 reward=0.7671375 (475.96 it/sec) -training >> step=1705000, episode=285 reward=0.7705725 (425.23 it/sec) -training >> step=1705100, episode=285 reward=0.7692804 (413.31 it/sec) -training >> step=1705200, episode=285 reward=0.741838 (459.68 it/sec) -training >> step=1705300, episode=285 reward=0.7756869 (446.99 it/sec) -training >> step=1705400, episode=285 reward=0.7560707 (452.66 it/sec) -training >> step=1705500, episode=285 reward=0.744801 (463.17 it/sec) -training >> step=1705600, episode=285 reward=0.7634585 (445.50 it/sec) -training >> step=1705700, episode=285 reward=0.7667097 (421.18 it/sec) -training >> step=1705800, episode=285 reward=0.7611408 (435.39 it/sec) -training >> step=1705900, episode=285 reward=0.7568204 (432.84 it/sec) -training >> step=1706000, episode=285 reward=0.7762616 (477.68 it/sec) -training >> step=1706100, episode=285 reward=0.7718214 (457.17 it/sec) -training >> step=1706200, episode=285 reward=0.7635376 (429.69 it/sec) -training >> step=1706300, episode=285 reward=0.7758944 (450.73 it/sec) -training >> step=1706400, episode=285 reward=0.765192 (435.84 it/sec) -training >> step=1706500, episode=285 reward=0.7501908 (450.67 it/sec) -training >> step=1706600, episode=285 reward=0.7644658 (467.95 it/sec) -training >> step=1706700, episode=285 reward=0.7603245 (470.49 it/sec) -training >> step=1706800, episode=285 reward=0.7753268 (419.55 it/sec) -training >> step=1706900, episode=285 reward=0.7444446 (460.44 it/sec) -training >> step=1707000, episode=285 reward=0.7758287 (483.41 it/sec) -training >> step=1707100, episode=285 reward=0.7837089 (460.79 it/sec) -training >> step=1707200, episode=285 reward=0.7442371 (452.57 it/sec) -training >> step=1707300, episode=285 reward=0.7714889 (385.70 it/sec) -training >> step=1707400, episode=285 reward=0.7557598 (444.26 it/sec) -training >> step=1707500, episode=285 reward=0.7492757 (507.96 it/sec) -training >> step=1707600, episode=285 reward=0.7510535 (463.35 it/sec) -training >> step=1707700, episode=285 reward=0.7614615 (416.56 it/sec) -training >> step=1707800, episode=285 reward=0.753764 (431.49 it/sec) -training >> step=1707900, episode=285 reward=0.7517974 (471.39 it/sec) -training >> step=1708000, episode=285 reward=0.785244 (483.99 it/sec) -training >> step=1708100, episode=285 reward=0.7523752 (449.84 it/sec) -training >> step=1708200, episode=285 reward=0.7707729 (416.38 it/sec) -training >> step=1708300, episode=285 reward=0.7602623 (474.70 it/sec) -training >> step=1708400, episode=285 reward=0.7751862 (442.63 it/sec) -training >> step=1708500, episode=285 reward=0.7622547 (456.31 it/sec) -training >> step=1708600, episode=285 reward=0.7510144 (472.40 it/sec) -training >> step=1708700, episode=285 reward=0.7639615 (449.46 it/sec) -training >> step=1708800, episode=285 reward=0.7547035 (441.94 it/sec) -training >> step=1708900, episode=285 reward=0.7501908 (450.50 it/sec) -training >> step=1709000, episode=285 reward=0.7784371 (426.40 it/sec) -training >> step=1709100, episode=285 reward=0.7593977 (462.39 it/sec) -training >> step=1709200, episode=285 reward=0.7474199 (436.60 it/sec) -training >> step=1709300, episode=286 reward=0.7442132 (162.59 it/sec) -training >> step=1709400, episode=286 reward=0.7410519 (461.15 it/sec) -training >> step=1709500, episode=286 reward=0.7636574 (481.34 it/sec) -training >> step=1709600, episode=286 reward=0.7811936 (441.43 it/sec) -training >> step=1709700, episode=286 reward=0.7709523 (483.05 it/sec) -training >> step=1709800, episode=286 reward=0.7402681 (465.78 it/sec) -training >> step=1709900, episode=286 reward=0.7712249 (438.44 it/sec) -training >> step=1710000, episode=286 reward=0.7626995 (449.62 it/sec) -training >> step=1710100, episode=286 reward=0.7625518 (469.62 it/sec) -training >> step=1710200, episode=286 reward=0.767633 (457.96 it/sec) -training >> step=1710300, episode=286 reward=0.7805815 (458.64 it/sec) -training >> step=1710400, episode=286 reward=0.765061 (428.51 it/sec) -training >> step=1710500, episode=286 reward=0.7559091 (464.79 it/sec) -training >> step=1710600, episode=286 reward=0.7584363 (481.52 it/sec) -training >> step=1710700, episode=286 reward=0.755618 (454.01 it/sec) -training >> step=1710800, episode=286 reward=0.7851459 (400.93 it/sec) -training >> step=1710900, episode=286 reward=0.7506759 (409.34 it/sec) -training >> step=1711000, episode=286 reward=0.7590727 (434.72 it/sec) -training >> step=1711100, episode=286 reward=0.7499323 (448.83 it/sec) -training >> step=1711200, episode=286 reward=0.7506592 (452.29 it/sec) -training >> step=1711300, episode=286 reward=0.7813686 (430.53 it/sec) -training >> step=1711400, episode=286 reward=0.7606344 (457.13 it/sec) -training >> step=1711500, episode=286 reward=0.7787343 (461.13 it/sec) -training >> step=1711600, episode=286 reward=0.7632503 (464.31 it/sec) -training >> step=1711700, episode=286 reward=0.7441047 (439.47 it/sec) -training >> step=1711800, episode=286 reward=0.7719799 (454.48 it/sec) -training >> step=1711900, episode=286 reward=0.7548429 (475.39 it/sec) -training >> step=1712000, episode=286 reward=0.7553543 (454.47 it/sec) -training >> step=1712100, episode=286 reward=0.7816119 (442.97 it/sec) -training >> step=1712200, episode=286 reward=0.7588708 (477.21 it/sec) -training >> step=1712300, episode=286 reward=0.7660238 (468.46 it/sec) -training >> step=1712400, episode=286 reward=0.7562811 (476.44 it/sec) -training >> step=1712500, episode=286 reward=0.7630125 (451.35 it/sec) -training >> step=1712600, episode=286 reward=0.7681163 (439.46 it/sec) -training >> step=1712700, episode=286 reward=0.7849703 (471.25 it/sec) -training >> step=1712800, episode=286 reward=0.7564016 (504.69 it/sec) -training >> step=1712900, episode=286 reward=0.7568898 (531.67 it/sec) -training >> step=1713000, episode=286 reward=0.7529924 (540.79 it/sec) -training >> step=1713100, episode=286 reward=0.750386 (502.00 it/sec) -training >> step=1713200, episode=286 reward=0.7595145 (475.01 it/sec) -training >> step=1713300, episode=286 reward=0.7488112 (508.36 it/sec) -training >> step=1713400, episode=286 reward=0.772829 (545.43 it/sec) -training >> step=1713500, episode=286 reward=0.7716321 (529.99 it/sec) -training >> step=1713600, episode=286 reward=0.7616873 (481.73 it/sec) -training >> step=1713700, episode=286 reward=0.7632436 (524.17 it/sec) -training >> step=1713800, episode=286 reward=0.7563953 (504.51 it/sec) -training >> step=1713900, episode=286 reward=0.7609685 (516.61 it/sec) -training >> step=1714000, episode=286 reward=0.7598416 (502.89 it/sec) -training >> step=1714100, episode=286 reward=0.7612539 (500.10 it/sec) -training >> step=1714200, episode=286 reward=0.7510882 (551.65 it/sec) -training >> step=1714300, episode=286 reward=0.7625706 (507.13 it/sec) -training >> step=1714400, episode=286 reward=0.7510971 (505.29 it/sec) -training >> step=1714500, episode=286 reward=0.7646503 (522.60 it/sec) -training >> step=1714600, episode=286 reward=0.7498793 (512.79 it/sec) -training >> step=1714700, episode=286 reward=0.7541329 (551.40 it/sec) -training >> step=1714800, episode=286 reward=0.765277 (472.04 it/sec) -training >> step=1714900, episode=286 reward=0.7797252 (475.06 it/sec) -training >> step=1715000, episode=286 reward=0.7664022 (525.72 it/sec) -training >> step=1715100, episode=286 reward=0.7715088 (500.51 it/sec) -training >> step=1715200, episode=286 reward=0.7536082 (474.93 it/sec) -training >> step=1715300, episode=287 reward=0.7631728 (181.04 it/sec) -training >> step=1715400, episode=287 reward=0.7745019 (518.29 it/sec) -training >> step=1715500, episode=287 reward=0.7717735 (510.20 it/sec) -training >> step=1715600, episode=287 reward=0.7413679 (492.56 it/sec) -training >> step=1715700, episode=287 reward=0.7352344 (564.09 it/sec) -training >> step=1715800, episode=287 reward=0.746038 (545.20 it/sec) -training >> step=1715900, episode=287 reward=0.7542778 (545.66 it/sec) -training >> step=1716000, episode=287 reward=0.7368265 (491.91 it/sec) -training >> step=1716100, episode=287 reward=0.7776634 (511.16 it/sec) -training >> step=1716200, episode=287 reward=0.7912417 (541.64 it/sec) -training >> step=1716300, episode=287 reward=0.7544228 (518.25 it/sec) -training >> step=1716400, episode=287 reward=0.7605054 (522.81 it/sec) -training >> step=1716500, episode=287 reward=0.7644638 (528.03 it/sec) -training >> step=1716600, episode=287 reward=0.7573764 (564.16 it/sec) -training >> step=1716700, episode=287 reward=0.7758837 (538.15 it/sec) -training >> step=1716800, episode=287 reward=0.7598427 (497.62 it/sec) -training >> step=1716900, episode=287 reward=0.7671801 (514.81 it/sec) -training >> step=1717000, episode=287 reward=0.7607602 (525.63 it/sec) -training >> step=1717100, episode=287 reward=0.7436904 (517.23 it/sec) -training >> step=1717200, episode=287 reward=0.7558931 (518.89 it/sec) -training >> step=1717300, episode=287 reward=0.7644559 (518.02 it/sec) -training >> step=1717400, episode=287 reward=0.7695797 (515.76 it/sec) -training >> step=1717500, episode=287 reward=0.763421 (518.14 it/sec) -training >> step=1717600, episode=287 reward=0.7687714 (533.72 it/sec) -training >> step=1717700, episode=287 reward=0.738265 (459.25 it/sec) -training >> step=1717800, episode=287 reward=0.791418 (428.62 it/sec) -training >> step=1717900, episode=287 reward=0.7804059 (422.90 it/sec) -training >> step=1718000, episode=287 reward=0.7541717 (469.83 it/sec) -training >> step=1718100, episode=287 reward=0.7737039 (490.17 it/sec) -training >> step=1718200, episode=287 reward=0.7739167 (471.14 it/sec) -training >> step=1718300, episode=287 reward=0.7687209 (493.99 it/sec) -training >> step=1718400, episode=287 reward=0.7630308 (543.64 it/sec) -training >> step=1718500, episode=287 reward=0.7526771 (546.89 it/sec) -training >> step=1718600, episode=287 reward=0.7693503 (424.37 it/sec) -training >> step=1718700, episode=287 reward=0.7694989 (407.53 it/sec) -training >> step=1718800, episode=287 reward=0.7522781 (426.78 it/sec) -training >> step=1718900, episode=287 reward=0.7510686 (456.10 it/sec) -training >> step=1719000, episode=287 reward=0.7522084 (470.66 it/sec) -training >> step=1719100, episode=287 reward=0.7515606 (477.55 it/sec) -training >> step=1719200, episode=287 reward=0.7778389 (433.33 it/sec) -training >> step=1719300, episode=287 reward=0.7531375 (492.32 it/sec) -training >> step=1719400, episode=287 reward=0.7487022 (474.02 it/sec) -training >> step=1719500, episode=287 reward=0.7288457 (484.82 it/sec) -training >> step=1719600, episode=287 reward=0.7671244 (443.99 it/sec) -training >> step=1719700, episode=287 reward=0.7525411 (442.36 it/sec) -training >> step=1719800, episode=287 reward=0.7729287 (450.39 it/sec) -training >> step=1719900, episode=287 reward=0.7832112 (487.58 it/sec) -training >> step=1720000, episode=287 reward=0.7364834 (504.07 it/sec) -training >> step=1720100, episode=287 reward=0.7394529 (536.68 it/sec) -training >> step=1720200, episode=287 reward=0.7806777 (474.65 it/sec) -training >> step=1720300, episode=287 reward=0.7356997 (508.70 it/sec) -training >> step=1720400, episode=287 reward=0.7825888 (494.99 it/sec) -training >> step=1720500, episode=287 reward=0.7655116 (452.89 it/sec) -training >> step=1720600, episode=287 reward=0.7799796 (531.53 it/sec) -training >> step=1720700, episode=287 reward=0.7631776 (508.27 it/sec) -training >> step=1720800, episode=287 reward=0.7518866 (495.72 it/sec) -training >> step=1720900, episode=287 reward=0.7495961 (467.10 it/sec) -training >> step=1721000, episode=287 reward=0.7458588 (436.54 it/sec) -training >> step=1721100, episode=287 reward=0.7681862 (390.80 it/sec) -training >> step=1721200, episode=287 reward=0.7533448 (437.64 it/sec) -training >> step=1721300, episode=288 reward=0.7508546 (144.05 it/sec) -training >> step=1721400, episode=288 reward=0.7501284 (466.18 it/sec) -training >> step=1721500, episode=288 reward=0.7286128 (506.18 it/sec) -training >> step=1721600, episode=288 reward=0.7601508 (485.13 it/sec) -training >> step=1721700, episode=288 reward=0.7458751 (444.09 it/sec) -training >> step=1721800, episode=288 reward=0.7579054 (434.42 it/sec) -training >> step=1721900, episode=288 reward=0.7554634 (455.69 it/sec) -training >> step=1722000, episode=288 reward=0.7625659 (434.39 it/sec) -training >> step=1722100, episode=288 reward=0.7390892 (471.06 it/sec) -training >> step=1722200, episode=288 reward=0.7700436 (460.34 it/sec) -training >> step=1722300, episode=288 reward=0.7566307 (451.61 it/sec) -training >> step=1722400, episode=288 reward=0.7578934 (417.87 it/sec) -training >> step=1722500, episode=288 reward=0.776663 (440.72 it/sec) -training >> step=1722600, episode=288 reward=0.7747967 (469.37 it/sec) -training >> step=1722700, episode=288 reward=0.7798139 (462.49 it/sec) -training >> step=1722800, episode=288 reward=0.7591392 (455.77 it/sec) -training >> step=1722900, episode=288 reward=0.7636386 (415.73 it/sec) -training >> step=1723000, episode=288 reward=0.7593728 (450.37 it/sec) -training >> step=1723100, episode=288 reward=0.750751 (445.64 it/sec) -training >> step=1723200, episode=288 reward=0.7882916 (476.46 it/sec) -training >> step=1723300, episode=288 reward=0.7710543 (433.76 it/sec) -training >> step=1723400, episode=288 reward=0.7852061 (446.43 it/sec) -training >> step=1723500, episode=288 reward=0.7586361 (469.86 it/sec) -training >> step=1723600, episode=288 reward=0.7601922 (420.37 it/sec) -training >> step=1723700, episode=288 reward=0.7905527 (471.69 it/sec) -training >> step=1723800, episode=288 reward=0.7636622 (390.47 it/sec) -training >> step=1723900, episode=288 reward=0.772166 (448.43 it/sec) -training >> step=1724000, episode=288 reward=0.7516959 (420.97 it/sec) -training >> step=1724100, episode=288 reward=0.7602488 (449.17 it/sec) -training >> step=1724200, episode=288 reward=0.7627089 (411.36 it/sec) -training >> step=1724300, episode=288 reward=0.7470612 (454.99 it/sec) -training >> step=1724400, episode=288 reward=0.7605027 (451.60 it/sec) -training >> step=1724500, episode=288 reward=0.7554489 (442.21 it/sec) -training >> step=1724600, episode=288 reward=0.741641 (437.35 it/sec) -training >> step=1724700, episode=288 reward=0.7580657 (445.72 it/sec) -training >> step=1724800, episode=288 reward=0.7535918 (417.00 it/sec) -training >> step=1724900, episode=288 reward=0.7828359 (466.08 it/sec) -training >> step=1725000, episode=288 reward=0.7646559 (483.41 it/sec) -training >> step=1725100, episode=288 reward=0.7715237 (437.30 it/sec) -training >> step=1725200, episode=288 reward=0.7485275 (434.78 it/sec) -training >> step=1725300, episode=288 reward=0.7662596 (477.74 it/sec) -training >> step=1725400, episode=288 reward=0.7573093 (472.89 it/sec) -training >> step=1725500, episode=288 reward=0.7537313 (440.30 it/sec) -training >> step=1725600, episode=288 reward=0.7534002 (430.65 it/sec) -training >> step=1725700, episode=288 reward=0.7493486 (527.24 it/sec) -training >> step=1725800, episode=288 reward=0.7451284 (560.24 it/sec) -training >> step=1725900, episode=288 reward=0.7680423 (524.17 it/sec) -training >> step=1726000, episode=288 reward=0.7805886 (569.14 it/sec) -training >> step=1726100, episode=288 reward=0.7564825 (518.44 it/sec) -training >> step=1726200, episode=288 reward=0.7557192 (523.85 it/sec) -training >> step=1726300, episode=288 reward=0.7634012 (558.72 it/sec) -training >> step=1726400, episode=288 reward=0.7832416 (546.57 it/sec) -training >> step=1726500, episode=288 reward=0.7550028 (545.28 it/sec) -training >> step=1726600, episode=288 reward=0.7601147 (521.06 it/sec) -training >> step=1726700, episode=288 reward=0.759552 (532.42 it/sec) -training >> step=1726800, episode=288 reward=0.75192 (534.88 it/sec) -training >> step=1726900, episode=288 reward=0.763813 (534.04 it/sec) -training >> step=1727000, episode=288 reward=0.7546879 (570.61 it/sec) -training >> step=1727100, episode=288 reward=0.7647758 (559.70 it/sec) -training >> step=1727200, episode=288 reward=0.73488 (512.01 it/sec) -training >> step=1727300, episode=289 reward=0.7476521 (75.82 it/sec) -training >> step=1727400, episode=289 reward=0.7663383 (478.43 it/sec) -training >> step=1727500, episode=289 reward=0.7607473 (487.64 it/sec) -training >> step=1727600, episode=289 reward=0.7301289 (518.51 it/sec) -training >> step=1727700, episode=289 reward=0.7563739 (524.60 it/sec) -training >> step=1727800, episode=289 reward=0.765009 (477.31 it/sec) -training >> step=1727900, episode=289 reward=0.7549528 (524.01 it/sec) -training >> step=1728000, episode=289 reward=0.7668297 (559.62 it/sec) -training >> step=1728100, episode=289 reward=0.7491878 (447.48 it/sec) -training >> step=1728200, episode=289 reward=0.7634714 (526.06 it/sec) -training >> step=1728300, episode=289 reward=0.7743157 (544.08 it/sec) -training >> step=1728400, episode=289 reward=0.7781445 (523.52 it/sec) -training >> step=1728500, episode=289 reward=0.7558639 (529.43 it/sec) -training >> step=1728600, episode=289 reward=0.7530531 (530.78 it/sec) -training >> step=1728700, episode=289 reward=0.7500116 (535.51 it/sec) -training >> step=1728800, episode=289 reward=0.7654499 (523.21 it/sec) -training >> step=1728900, episode=289 reward=0.7660504 (547.08 it/sec) -training >> step=1729000, episode=289 reward=0.7424448 (478.96 it/sec) -training >> step=1729100, episode=289 reward=0.765696 (528.29 it/sec) -training >> step=1729200, episode=289 reward=0.7484611 (546.88 it/sec) -training >> step=1729300, episode=289 reward=0.7941266 (521.08 it/sec) -training >> step=1729400, episode=289 reward=0.7586502 (558.27 it/sec) -training >> step=1729500, episode=289 reward=0.7562248 (526.09 it/sec) -training >> step=1729600, episode=289 reward=0.7537 (516.51 it/sec) -training >> step=1729700, episode=289 reward=0.7617707 (522.11 it/sec) -training >> step=1729800, episode=289 reward=0.7585575 (521.70 it/sec) -training >> step=1729900, episode=289 reward=0.7575432 (532.43 it/sec) -training >> step=1730000, episode=289 reward=0.7554348 (534.21 it/sec) -training >> step=1730100, episode=289 reward=0.7683145 (500.63 it/sec) -training >> step=1730200, episode=289 reward=0.7448378 (553.78 it/sec) -training >> step=1730300, episode=289 reward=0.7482077 (526.70 it/sec) -training >> step=1730400, episode=289 reward=0.7708651 (501.81 it/sec) -training >> step=1730500, episode=289 reward=0.7740799 (536.96 it/sec) -training >> step=1730600, episode=289 reward=0.7406542 (492.05 it/sec) -training >> step=1730700, episode=289 reward=0.7675572 (564.70 it/sec) -training >> step=1730800, episode=289 reward=0.7452495 (545.41 it/sec) -training >> step=1730900, episode=289 reward=0.7552943 (539.76 it/sec) -training >> step=1731000, episode=289 reward=0.753928 (573.71 it/sec) -training >> step=1731100, episode=289 reward=0.766594 (514.98 it/sec) -training >> step=1731200, episode=289 reward=0.7625331 (555.93 it/sec) -training >> step=1731300, episode=289 reward=0.7532615 (547.12 it/sec) -training >> step=1731400, episode=289 reward=0.7730258 (536.33 it/sec) -training >> step=1731500, episode=289 reward=0.773339 (524.87 it/sec) -training >> step=1731600, episode=289 reward=0.7798199 (539.82 it/sec) -training >> step=1731700, episode=289 reward=0.7665922 (529.79 it/sec) -training >> step=1731800, episode=289 reward=0.7624509 (526.32 it/sec) -training >> step=1731900, episode=289 reward=0.7422192 (530.78 it/sec) -training >> step=1732000, episode=289 reward=0.7539781 (521.64 it/sec) -training >> step=1732100, episode=289 reward=0.7845923 (513.09 it/sec) -training >> step=1732200, episode=289 reward=0.7724626 (520.63 it/sec) -training >> step=1732300, episode=289 reward=0.7497876 (542.96 it/sec) -training >> step=1732400, episode=289 reward=0.7837801 (549.07 it/sec) -training >> step=1732500, episode=289 reward=0.7421945 (521.05 it/sec) -training >> step=1732600, episode=289 reward=0.7464516 (509.88 it/sec) -training >> step=1732700, episode=289 reward=0.7550994 (537.89 it/sec) -training >> step=1732800, episode=289 reward=0.7511162 (535.49 it/sec) -training >> step=1732900, episode=289 reward=0.7733409 (536.76 it/sec) -training >> step=1733000, episode=289 reward=0.7557048 (510.88 it/sec) -training >> step=1733100, episode=289 reward=0.7657505 (498.23 it/sec) -training >> step=1733200, episode=289 reward=0.7555882 (510.43 it/sec) -training >> step=1733300, episode=290 reward=0.7422943 (144.63 it/sec) -training >> step=1733400, episode=290 reward=0.735556 (484.74 it/sec) -training >> step=1733500, episode=290 reward=0.7461199 (494.27 it/sec) -training >> step=1733600, episode=290 reward=0.7477716 (536.03 it/sec) -training >> step=1733700, episode=290 reward=0.7372525 (552.90 it/sec) -training >> step=1733800, episode=290 reward=0.7335408 (524.83 it/sec) -training >> step=1733900, episode=290 reward=0.7668568 (521.49 it/sec) -training >> step=1734000, episode=290 reward=0.7691211 (539.71 it/sec) -training >> step=1734100, episode=290 reward=0.745006 (517.93 it/sec) -training >> step=1734200, episode=290 reward=0.7367411 (527.23 it/sec) -training >> step=1734300, episode=290 reward=0.7497218 (537.34 it/sec) -training >> step=1734400, episode=290 reward=0.7599311 (507.30 it/sec) -training >> step=1734500, episode=290 reward=0.7383679 (475.71 it/sec) -training >> step=1734600, episode=290 reward=0.7661751 (500.17 it/sec) -training >> step=1734700, episode=290 reward=0.7460939 (554.20 it/sec) -training >> step=1734800, episode=290 reward=0.7665601 (456.59 it/sec) -training >> step=1734900, episode=290 reward=0.7787327 (459.00 it/sec) -training >> step=1735000, episode=290 reward=0.7470768 (449.22 it/sec) -training >> step=1735100, episode=290 reward=0.7718428 (525.08 it/sec) -training >> step=1735200, episode=290 reward=0.7700657 (517.04 it/sec) -training >> step=1735300, episode=290 reward=0.750487 (424.76 it/sec) -training >> step=1735400, episode=290 reward=0.7780044 (442.69 it/sec) -training >> step=1735500, episode=290 reward=0.7773204 (484.39 it/sec) -training >> step=1735600, episode=290 reward=0.7406327 (504.43 it/sec) -training >> step=1735700, episode=290 reward=0.7536696 (521.33 it/sec) -training >> step=1735800, episode=290 reward=0.7704346 (492.87 it/sec) -training >> step=1735900, episode=290 reward=0.7674988 (424.54 it/sec) -training >> step=1736000, episode=290 reward=0.7553844 (437.06 it/sec) -training >> step=1736100, episode=290 reward=0.7626167 (496.99 it/sec) -training >> step=1736200, episode=290 reward=0.7495112 (529.39 it/sec) -training >> step=1736300, episode=290 reward=0.7416724 (486.44 it/sec) -training >> step=1736400, episode=290 reward=0.7780118 (481.44 it/sec) -training >> step=1736500, episode=290 reward=0.7753355 (517.86 it/sec) -training >> step=1736600, episode=290 reward=0.7521673 (565.77 it/sec) -training >> step=1736700, episode=290 reward=0.7557923 (540.08 it/sec) -training >> step=1736800, episode=290 reward=0.7746965 (486.06 it/sec) -training >> step=1736900, episode=290 reward=0.7752079 (481.18 it/sec) -training >> step=1737000, episode=290 reward=0.7551075 (486.74 it/sec) -training >> step=1737100, episode=290 reward=0.7812366 (544.17 it/sec) -training >> step=1737200, episode=290 reward=0.7623233 (526.93 it/sec) -training >> step=1737300, episode=290 reward=0.7403253 (501.17 it/sec) -training >> step=1737400, episode=290 reward=0.7515272 (522.84 it/sec) -training >> step=1737500, episode=290 reward=0.7708476 (478.09 it/sec) -training >> step=1737600, episode=290 reward=0.7732022 (511.15 it/sec) -training >> step=1737700, episode=290 reward=0.7427199 (514.93 it/sec) -training >> step=1737800, episode=290 reward=0.7514994 (538.40 it/sec) -training >> step=1737900, episode=290 reward=0.7557374 (496.58 it/sec) -training >> step=1738000, episode=290 reward=0.7707459 (455.06 it/sec) -training >> step=1738100, episode=290 reward=0.7717443 (542.62 it/sec) -training >> step=1738200, episode=290 reward=0.7768844 (547.71 it/sec) -training >> step=1738300, episode=290 reward=0.7782422 (507.58 it/sec) -training >> step=1738400, episode=290 reward=0.7505865 (463.68 it/sec) -training >> step=1738500, episode=290 reward=0.737753 (468.90 it/sec) -training >> step=1738600, episode=290 reward=0.7472628 (469.18 it/sec) -training >> step=1738700, episode=290 reward=0.788386 (504.10 it/sec) -training >> step=1738800, episode=290 reward=0.759263 (495.96 it/sec) -training >> step=1738900, episode=290 reward=0.7405974 (505.22 it/sec) -training >> step=1739000, episode=290 reward=0.7375049 (546.95 it/sec) -training >> step=1739100, episode=290 reward=0.7576194 (471.02 it/sec) -training >> step=1739200, episode=290 reward=0.7714563 (516.72 it/sec) -training >> step=1739300, episode=291 reward=0.747797 (173.70 it/sec) -training >> step=1739400, episode=291 reward=0.7494511 (505.16 it/sec) -training >> step=1739500, episode=291 reward=0.7430597 (539.69 it/sec) -training >> step=1739600, episode=291 reward=0.7579165 (534.67 it/sec) -training >> step=1739700, episode=291 reward=0.7516333 (535.41 it/sec) -training >> step=1739800, episode=291 reward=0.780414 (541.32 it/sec) -training >> step=1739900, episode=291 reward=0.7809508 (483.18 it/sec) -training >> step=1740000, episode=291 reward=0.7559257 (533.69 it/sec) -training >> step=1740100, episode=291 reward=0.7632968 (558.80 it/sec) -training >> step=1740200, episode=291 reward=0.7651691 (503.31 it/sec) -training >> step=1740300, episode=291 reward=0.767853 (550.87 it/sec) -training >> step=1740400, episode=291 reward=0.7688878 (494.13 it/sec) -training >> step=1740500, episode=291 reward=0.7552992 (489.44 it/sec) -training >> step=1740600, episode=291 reward=0.7641798 (574.08 it/sec) -training >> step=1740700, episode=291 reward=0.7696288 (519.00 it/sec) -training >> step=1740800, episode=291 reward=0.7459657 (522.77 it/sec) -training >> step=1740900, episode=291 reward=0.7727778 (547.93 it/sec) -training >> step=1741000, episode=291 reward=0.7591739 (456.31 it/sec) -training >> step=1741100, episode=291 reward=0.7566673 (502.18 it/sec) -training >> step=1741200, episode=291 reward=0.7544525 (545.08 it/sec) -training >> step=1741300, episode=291 reward=0.7717437 (521.91 it/sec) -training >> step=1741400, episode=291 reward=0.7926283 (528.62 it/sec) -training >> step=1741500, episode=291 reward=0.749167 (487.66 it/sec) -training >> step=1741600, episode=291 reward=0.7573229 (528.12 it/sec) -training >> step=1741700, episode=291 reward=0.7496232 (538.49 it/sec) -training >> step=1741800, episode=291 reward=0.7574658 (514.99 it/sec) -training >> step=1741900, episode=291 reward=0.7603197 (525.01 it/sec) -training >> step=1742000, episode=291 reward=0.7546874 (506.81 it/sec) -training >> step=1742100, episode=291 reward=0.7862296 (508.83 it/sec) -training >> step=1742200, episode=291 reward=0.7655783 (510.63 it/sec) -training >> step=1742300, episode=291 reward=0.779547 (496.42 it/sec) -training >> step=1742400, episode=291 reward=0.7550393 (541.37 it/sec) -training >> step=1742500, episode=291 reward=0.7657066 (482.80 it/sec) -training >> step=1742600, episode=291 reward=0.7870607 (493.37 it/sec) -training >> step=1742700, episode=291 reward=0.793903 (541.68 it/sec) -training >> step=1742800, episode=291 reward=0.7935518 (501.13 it/sec) -training >> step=1742900, episode=291 reward=0.7723653 (515.75 it/sec) -training >> step=1743000, episode=291 reward=0.7687787 (482.20 it/sec) -training >> step=1743100, episode=291 reward=0.7549058 (425.77 it/sec) -training >> step=1743200, episode=291 reward=0.7651285 (475.78 it/sec) -training >> step=1743300, episode=291 reward=0.7703362 (490.39 it/sec) -training >> step=1743400, episode=291 reward=0.794016 (459.34 it/sec) -training >> step=1743500, episode=291 reward=0.7560345 (439.10 it/sec) -training >> step=1743600, episode=291 reward=0.7467442 (409.77 it/sec) -training >> step=1743700, episode=291 reward=0.7715541 (397.70 it/sec) -training >> step=1743800, episode=291 reward=0.7499795 (402.56 it/sec) -training >> step=1743900, episode=291 reward=0.7572036 (433.11 it/sec) -training >> step=1744000, episode=291 reward=0.7800377 (408.77 it/sec) -training >> step=1744100, episode=291 reward=0.7692615 (484.55 it/sec) -training >> step=1744200, episode=291 reward=0.7579073 (484.27 it/sec) -training >> step=1744300, episode=291 reward=0.7569721 (499.06 it/sec) -training >> step=1744400, episode=291 reward=0.772086 (430.46 it/sec) -training >> step=1744500, episode=291 reward=0.7634069 (421.87 it/sec) -training >> step=1744600, episode=291 reward=0.7809376 (403.03 it/sec) -training >> step=1744700, episode=291 reward=0.7706155 (487.72 it/sec) -training >> step=1744800, episode=291 reward=0.7577515 (476.62 it/sec) -training >> step=1744900, episode=291 reward=0.7614874 (436.09 it/sec) -training >> step=1745000, episode=291 reward=0.7738625 (471.73 it/sec) -training >> step=1745100, episode=291 reward=0.7365742 (433.02 it/sec) -training >> step=1745200, episode=291 reward=0.7543843 (410.42 it/sec) -training >> step=1745300, episode=292 reward=0.7470145 (121.79 it/sec) -training >> step=1745400, episode=292 reward=0.731741 (268.55 it/sec) -training >> step=1745500, episode=292 reward=0.7541304 (414.06 it/sec) -training >> step=1745600, episode=292 reward=0.7575185 (491.02 it/sec) -training >> step=1745700, episode=292 reward=0.7713566 (472.14 it/sec) -training >> step=1745800, episode=292 reward=0.7475153 (522.39 it/sec) -training >> step=1745900, episode=292 reward=0.7791274 (451.61 it/sec) -training >> step=1746000, episode=292 reward=0.7565766 (508.72 it/sec) -training >> step=1746100, episode=292 reward=0.7436691 (497.46 it/sec) -training >> step=1746200, episode=292 reward=0.746561 (552.77 it/sec) -training >> step=1746300, episode=292 reward=0.7739986 (553.00 it/sec) -training >> step=1746400, episode=292 reward=0.78039 (524.36 it/sec) -training >> step=1746500, episode=292 reward=0.7676584 (505.17 it/sec) -training >> step=1746600, episode=292 reward=0.7379556 (563.14 it/sec) -training >> step=1746700, episode=292 reward=0.7833443 (546.45 it/sec) -training >> step=1746800, episode=292 reward=0.7330265 (506.67 it/sec) -training >> step=1746900, episode=292 reward=0.7507182 (585.74 it/sec) -training >> step=1747000, episode=292 reward=0.761426 (533.64 it/sec) -training >> step=1747100, episode=292 reward=0.7576941 (526.76 it/sec) -training >> step=1747200, episode=292 reward=0.7734057 (552.28 it/sec) -training >> step=1747300, episode=292 reward=0.7596917 (552.90 it/sec) -training >> step=1747400, episode=292 reward=0.7480035 (526.16 it/sec) -training >> step=1747500, episode=292 reward=0.7536919 (550.15 it/sec) -training >> step=1747600, episode=292 reward=0.762633 (514.75 it/sec) -training >> step=1747700, episode=292 reward=0.7806666 (530.04 it/sec) -training >> step=1747800, episode=292 reward=0.768145 (489.85 it/sec) -training >> step=1747900, episode=292 reward=0.7924907 (474.38 it/sec) -training >> step=1748000, episode=292 reward=0.7983305 (497.76 it/sec) -training >> step=1748100, episode=292 reward=0.7514055 (544.25 it/sec) -training >> step=1748200, episode=292 reward=0.7711486 (495.67 it/sec) -training >> step=1748300, episode=292 reward=0.757575 (534.67 it/sec) -training >> step=1748400, episode=292 reward=0.7551302 (527.32 it/sec) -training >> step=1748500, episode=292 reward=0.7806492 (563.15 it/sec) -training >> step=1748600, episode=292 reward=0.7809059 (532.69 it/sec) -training >> step=1748700, episode=292 reward=0.7752866 (559.66 it/sec) -training >> step=1748800, episode=292 reward=0.7738068 (559.15 it/sec) -training >> step=1748900, episode=292 reward=0.7354572 (523.95 it/sec) -training >> step=1749000, episode=292 reward=0.7620883 (535.76 it/sec) -training >> step=1749100, episode=292 reward=0.7602053 (574.22 it/sec) -training >> step=1749200, episode=292 reward=0.7634059 (489.40 it/sec) -training >> step=1749300, episode=292 reward=0.7462093 (548.02 it/sec) -training >> step=1749400, episode=292 reward=0.7603405 (483.62 it/sec) -training >> step=1749500, episode=292 reward=0.7585745 (510.02 it/sec) -training >> step=1749600, episode=292 reward=0.7670984 (576.22 it/sec) -training >> step=1749700, episode=292 reward=0.7470469 (561.19 it/sec) -training >> step=1749800, episode=292 reward=0.7668149 (543.27 it/sec) -training >> step=1749900, episode=292 reward=0.7737334 (535.31 it/sec) -training >> step=1750000, episode=292 reward=0.7406526 (526.35 it/sec) -training >> step=1750100, episode=292 reward=0.7713291 (511.51 it/sec) -training >> step=1750200, episode=292 reward=0.7571177 (509.20 it/sec) -training >> step=1750300, episode=292 reward=0.7712108 (450.65 it/sec) -training >> step=1750400, episode=292 reward=0.7552135 (475.55 it/sec) -training >> step=1750500, episode=292 reward=0.7515506 (487.56 it/sec) -training >> step=1750600, episode=292 reward=0.7439098 (498.32 it/sec) -training >> step=1750700, episode=292 reward=0.7382348 (509.80 it/sec) -training >> step=1750800, episode=292 reward=0.7600058 (541.91 it/sec) -training >> step=1750900, episode=292 reward=0.7642105 (519.25 it/sec) -training >> step=1751000, episode=292 reward=0.7585418 (559.45 it/sec) -training >> step=1751100, episode=292 reward=0.775085 (534.65 it/sec) -training >> step=1751200, episode=292 reward=0.7155184 (550.79 it/sec) -training >> step=1751300, episode=293 reward=0.747956 (118.67 it/sec) -training >> step=1751400, episode=293 reward=0.7602313 (535.76 it/sec) -training >> step=1751500, episode=293 reward=0.7396372 (521.05 it/sec) -training >> step=1751600, episode=293 reward=0.7532986 (507.32 it/sec) -training >> step=1751700, episode=293 reward=0.7535883 (534.45 it/sec) -training >> step=1751800, episode=293 reward=0.7758568 (507.75 it/sec) -training >> step=1751900, episode=293 reward=0.7562345 (542.98 it/sec) -training >> step=1752000, episode=293 reward=0.7438303 (569.71 it/sec) -training >> step=1752100, episode=293 reward=0.7772079 (543.31 it/sec) -training >> step=1752200, episode=293 reward=0.7505652 (476.31 it/sec) -training >> step=1752300, episode=293 reward=0.7619475 (520.15 it/sec) -training >> step=1752400, episode=293 reward=0.7939139 (548.50 it/sec) -training >> step=1752500, episode=293 reward=0.7833686 (476.02 it/sec) -training >> step=1752600, episode=293 reward=0.7601969 (483.00 it/sec) -training >> step=1752700, episode=293 reward=0.7862811 (487.95 it/sec) -training >> step=1752800, episode=293 reward=0.7663335 (464.88 it/sec) -training >> step=1752900, episode=293 reward=0.7422599 (455.32 it/sec) -training >> step=1753000, episode=293 reward=0.7613377 (440.03 it/sec) -training >> step=1753100, episode=293 reward=0.7679739 (509.62 it/sec) -training >> step=1753200, episode=293 reward=0.7682626 (442.98 it/sec) -training >> step=1753300, episode=293 reward=0.779783 (466.84 it/sec) -training >> step=1753400, episode=293 reward=0.7765017 (472.70 it/sec) -training >> step=1753500, episode=293 reward=0.7579759 (478.36 it/sec) -training >> step=1753600, episode=293 reward=0.7573631 (493.00 it/sec) -training >> step=1753700, episode=293 reward=0.7416255 (446.92 it/sec) -training >> step=1753800, episode=293 reward=0.7888039 (501.71 it/sec) -training >> step=1753900, episode=293 reward=0.751561 (512.66 it/sec) -training >> step=1754000, episode=293 reward=0.7562776 (486.79 it/sec) -training >> step=1754100, episode=293 reward=0.7736575 (491.91 it/sec) -training >> step=1754200, episode=293 reward=0.7423227 (511.85 it/sec) -training >> step=1754300, episode=293 reward=0.7757076 (531.82 it/sec) -training >> step=1754400, episode=293 reward=0.742101 (542.53 it/sec) -training >> step=1754500, episode=293 reward=0.7745654 (540.40 it/sec) -training >> step=1754600, episode=293 reward=0.7801934 (535.44 it/sec) -training >> step=1754700, episode=293 reward=0.7337005 (476.29 it/sec) -training >> step=1754800, episode=293 reward=0.7770222 (530.82 it/sec) -training >> step=1754900, episode=293 reward=0.7771341 (477.83 it/sec) -training >> step=1755000, episode=293 reward=0.7551207 (429.32 it/sec) -training >> step=1755100, episode=293 reward=0.7508383 (386.39 it/sec) -training >> step=1755200, episode=293 reward=0.7715714 (395.92 it/sec) -training >> step=1755300, episode=293 reward=0.7654666 (359.49 it/sec) -training >> step=1755400, episode=293 reward=0.7689314 (410.36 it/sec) -training >> step=1755500, episode=293 reward=0.7768053 (481.27 it/sec) -training >> step=1755600, episode=293 reward=0.7667806 (508.05 it/sec) -training >> step=1755700, episode=293 reward=0.7534784 (489.21 it/sec) -training >> step=1755800, episode=293 reward=0.7416662 (531.52 it/sec) -training >> step=1755900, episode=293 reward=0.7542146 (546.47 it/sec) -training >> step=1756000, episode=293 reward=0.7466744 (545.10 it/sec) -training >> step=1756100, episode=293 reward=0.7680899 (518.94 it/sec) -training >> step=1756200, episode=293 reward=0.716227 (527.64 it/sec) -training >> step=1756300, episode=293 reward=0.7465714 (547.59 it/sec) -training >> step=1756400, episode=293 reward=0.77632 (529.77 it/sec) -training >> step=1756500, episode=293 reward=0.786709 (526.59 it/sec) -training >> step=1756600, episode=293 reward=0.7563353 (541.69 it/sec) -training >> step=1756700, episode=293 reward=0.7448627 (537.90 it/sec) -training >> step=1756800, episode=293 reward=0.7592116 (502.02 it/sec) -training >> step=1756900, episode=293 reward=0.7571489 (570.54 it/sec) -training >> step=1757000, episode=293 reward=0.7914274 (576.90 it/sec) -training >> step=1757100, episode=293 reward=0.7528904 (536.51 it/sec) -training >> step=1757200, episode=293 reward=0.7370824 (548.45 it/sec) -training >> step=1757300, episode=294 reward=0.7783911 (132.26 it/sec) -training >> step=1757400, episode=294 reward=0.7606292 (505.09 it/sec) -training >> step=1757500, episode=294 reward=0.7524465 (532.94 it/sec) -training >> step=1757600, episode=294 reward=0.7517236 (505.13 it/sec) -training >> step=1757700, episode=294 reward=0.7575946 (557.09 it/sec) -training >> step=1757800, episode=294 reward=0.7392245 (526.64 it/sec) -training >> step=1757900, episode=294 reward=0.7654587 (554.43 it/sec) -training >> step=1758000, episode=294 reward=0.780717 (524.78 it/sec) -training >> step=1758100, episode=294 reward=0.7499116 (522.76 it/sec) -training >> step=1758200, episode=294 reward=0.7733302 (555.37 it/sec) -training >> step=1758300, episode=294 reward=0.761313 (549.40 it/sec) -training >> step=1758400, episode=294 reward=0.7742059 (507.37 it/sec) -training >> step=1758500, episode=294 reward=0.751514 (553.10 it/sec) -training >> step=1758600, episode=294 reward=0.768437 (545.21 it/sec) -training >> step=1758700, episode=294 reward=0.7370331 (520.47 it/sec) -training >> step=1758800, episode=294 reward=0.7467659 (488.36 it/sec) -training >> step=1758900, episode=294 reward=0.7868371 (466.05 it/sec) -training >> step=1759000, episode=294 reward=0.7481386 (494.66 it/sec) -training >> step=1759100, episode=294 reward=0.774032 (458.05 it/sec) -training >> step=1759200, episode=294 reward=0.7743755 (457.51 it/sec) -training >> step=1759300, episode=294 reward=0.7508149 (428.52 it/sec) -training >> step=1759400, episode=294 reward=0.7660974 (478.15 it/sec) -training >> step=1759500, episode=294 reward=0.7644328 (482.33 it/sec) -training >> step=1759600, episode=294 reward=0.7639332 (439.49 it/sec) -training >> step=1759700, episode=294 reward=0.7694221 (526.08 it/sec) -training >> step=1759800, episode=294 reward=0.751475 (513.34 it/sec) -training >> step=1759900, episode=294 reward=0.7523964 (504.36 it/sec) -training >> step=1760000, episode=294 reward=0.7715659 (551.81 it/sec) -training >> step=1760100, episode=294 reward=0.7687481 (537.16 it/sec) -training >> step=1760200, episode=294 reward=0.7679983 (507.14 it/sec) -training >> step=1760300, episode=294 reward=0.7557926 (513.22 it/sec) -training >> step=1760400, episode=294 reward=0.7555324 (512.61 it/sec) -training >> step=1760500, episode=294 reward=0.7900248 (560.06 it/sec) -training >> step=1760600, episode=294 reward=0.7803685 (481.40 it/sec) -training >> step=1760700, episode=294 reward=0.7626938 (432.87 it/sec) -training >> step=1760800, episode=294 reward=0.7589764 (451.80 it/sec) -training >> step=1760900, episode=294 reward=0.7647788 (492.64 it/sec) -training >> step=1761000, episode=294 reward=0.7540885 (453.17 it/sec) -training >> step=1761100, episode=294 reward=0.788775 (527.54 it/sec) -training >> step=1761200, episode=294 reward=0.7689806 (507.96 it/sec) -training >> step=1761300, episode=294 reward=0.7729245 (545.64 it/sec) -training >> step=1761400, episode=294 reward=0.7563655 (511.91 it/sec) -training >> step=1761500, episode=294 reward=0.7439792 (517.64 it/sec) -training >> step=1761600, episode=294 reward=0.7557123 (544.22 it/sec) -training >> step=1761700, episode=294 reward=0.7573087 (466.17 it/sec) -training >> step=1761800, episode=294 reward=0.7657332 (450.39 it/sec) -training >> step=1761900, episode=294 reward=0.7411005 (488.51 it/sec) -training >> step=1762000, episode=294 reward=0.7814695 (516.69 it/sec) -training >> step=1762100, episode=294 reward=0.7488096 (562.27 it/sec) -training >> step=1762200, episode=294 reward=0.7652166 (402.61 it/sec) -training >> step=1762300, episode=294 reward=0.7330875 (428.36 it/sec) -training >> step=1762400, episode=294 reward=0.7781997 (334.56 it/sec) -training >> step=1762500, episode=294 reward=0.7636037 (385.09 it/sec) -training >> step=1762600, episode=294 reward=0.7612573 (362.91 it/sec) -training >> step=1762700, episode=294 reward=0.7669665 (432.40 it/sec) -training >> step=1762800, episode=294 reward=0.7876905 (461.34 it/sec) -training >> step=1762900, episode=294 reward=0.7534537 (509.56 it/sec) -training >> step=1763000, episode=294 reward=0.7527675 (515.25 it/sec) -training >> step=1763100, episode=294 reward=0.7475516 (507.94 it/sec) -training >> step=1763200, episode=294 reward=0.7579749 (561.26 it/sec) -training >> step=1763300, episode=295 reward=0.7544948 (124.39 it/sec) -training >> step=1763400, episode=295 reward=0.7557671 (508.47 it/sec) -training >> step=1763500, episode=295 reward=0.7532791 (521.35 it/sec) -training >> step=1763600, episode=295 reward=0.7439111 (537.16 it/sec) -training >> step=1763700, episode=295 reward=0.7543967 (502.16 it/sec) -training >> step=1763800, episode=295 reward=0.7597237 (514.31 it/sec) -training >> step=1763900, episode=295 reward=0.752883 (555.15 it/sec) -training >> step=1764000, episode=295 reward=0.7805576 (519.89 it/sec) -training >> step=1764100, episode=295 reward=0.7494604 (495.22 it/sec) -training >> step=1764200, episode=295 reward=0.7637168 (456.70 it/sec) -training >> step=1764300, episode=295 reward=0.7709758 (322.76 it/sec) -training >> step=1764400, episode=295 reward=0.7522156 (338.06 it/sec) -training >> step=1764500, episode=295 reward=0.7634661 (354.61 it/sec) -training >> step=1764600, episode=295 reward=0.7779657 (433.58 it/sec) -training >> step=1764700, episode=295 reward=0.746892 (485.73 it/sec) -training >> step=1764800, episode=295 reward=0.7649858 (500.48 it/sec) -training >> step=1764900, episode=295 reward=0.7572411 (472.89 it/sec) -training >> step=1765000, episode=295 reward=0.7631254 (550.29 it/sec) -training >> step=1765100, episode=295 reward=0.7727261 (569.78 it/sec) -training >> step=1765200, episode=295 reward=0.7580246 (485.68 it/sec) -training >> step=1765300, episode=295 reward=0.7715638 (546.27 it/sec) -training >> step=1765400, episode=295 reward=0.743317 (522.03 it/sec) -training >> step=1765500, episode=295 reward=0.7615447 (536.94 it/sec) -training >> step=1765600, episode=295 reward=0.7808442 (570.77 it/sec) -training >> step=1765700, episode=295 reward=0.7723248 (534.67 it/sec) -training >> step=1765800, episode=295 reward=0.7527215 (501.30 it/sec) -training >> step=1765900, episode=295 reward=0.7774307 (531.38 it/sec) -training >> step=1766000, episode=295 reward=0.7623613 (528.79 it/sec) -training >> step=1766100, episode=295 reward=0.7715294 (549.17 it/sec) -training >> step=1766200, episode=295 reward=0.7570729 (537.82 it/sec) -training >> step=1766300, episode=295 reward=0.7804781 (506.40 it/sec) -training >> step=1766400, episode=295 reward=0.7827116 (542.41 it/sec) -training >> step=1766500, episode=295 reward=0.7836912 (557.45 it/sec) -training >> step=1766600, episode=295 reward=0.7841764 (523.24 it/sec) -training >> step=1766700, episode=295 reward=0.760316 (498.78 it/sec) -training >> step=1766800, episode=295 reward=0.7622826 (459.80 it/sec) -training >> step=1766900, episode=295 reward=0.7590105 (447.87 it/sec) -training >> step=1767000, episode=295 reward=0.779577 (477.56 it/sec) -training >> step=1767100, episode=295 reward=0.7482363 (483.28 it/sec) -training >> step=1767200, episode=295 reward=0.7642817 (457.79 it/sec) -training >> step=1767300, episode=295 reward=0.7575753 (517.77 it/sec) -training >> step=1767400, episode=295 reward=0.7650139 (472.91 it/sec) -training >> step=1767500, episode=295 reward=0.7448893 (485.26 it/sec) -training >> step=1767600, episode=295 reward=0.7666705 (506.30 it/sec) -training >> step=1767700, episode=295 reward=0.7821745 (498.72 it/sec) -training >> step=1767800, episode=295 reward=0.7422987 (355.09 it/sec) -training >> step=1767900, episode=295 reward=0.7499534 (362.90 it/sec) -training >> step=1768000, episode=295 reward=0.7443283 (397.66 it/sec) -training >> step=1768100, episode=295 reward=0.7810616 (424.92 it/sec) -training >> step=1768200, episode=295 reward=0.7703661 (443.37 it/sec) -training >> step=1768300, episode=295 reward=0.7561272 (447.64 it/sec) -training >> step=1768400, episode=295 reward=0.7439512 (464.20 it/sec) -training >> step=1768500, episode=295 reward=0.766816 (447.51 it/sec) -training >> step=1768600, episode=295 reward=0.7417187 (480.16 it/sec) -training >> step=1768700, episode=295 reward=0.7399151 (429.30 it/sec) -training >> step=1768800, episode=295 reward=0.7612197 (457.36 it/sec) -training >> step=1768900, episode=295 reward=0.7554225 (477.06 it/sec) -training >> step=1769000, episode=295 reward=0.7561136 (474.35 it/sec) -training >> step=1769100, episode=295 reward=0.7676394 (506.99 it/sec) -training >> step=1769200, episode=295 reward=0.760439 (453.96 it/sec) -training >> step=1769300, episode=296 reward=0.7674702 (155.36 it/sec) -training >> step=1769400, episode=296 reward=0.7619931 (528.56 it/sec) -training >> step=1769500, episode=296 reward=0.7618608 (522.42 it/sec) -training >> step=1769600, episode=296 reward=0.7467198 (500.84 it/sec) -training >> step=1769700, episode=296 reward=0.7334062 (543.86 it/sec) -training >> step=1769800, episode=296 reward=0.7361646 (578.81 it/sec) -training >> step=1769900, episode=296 reward=0.7399592 (571.15 it/sec) -training >> step=1770000, episode=296 reward=0.749721 (543.54 it/sec) -training >> step=1770100, episode=296 reward=0.7629844 (564.27 it/sec) -training >> step=1770200, episode=296 reward=0.7549236 (567.78 it/sec) -training >> step=1770300, episode=296 reward=0.7686855 (501.45 it/sec) -training >> step=1770400, episode=296 reward=0.7539586 (507.56 it/sec) -training >> step=1770500, episode=296 reward=0.7975237 (468.98 it/sec) -training >> step=1770600, episode=296 reward=0.7570254 (517.17 it/sec) -training >> step=1770700, episode=296 reward=0.7411666 (500.13 it/sec) -training >> step=1770800, episode=296 reward=0.7732928 (514.16 it/sec) -training >> step=1770900, episode=296 reward=0.7797596 (570.03 it/sec) -training >> step=1771000, episode=296 reward=0.7873437 (568.42 it/sec) -training >> step=1771100, episode=296 reward=0.7456563 (485.43 it/sec) -training >> step=1771200, episode=296 reward=0.7543281 (536.73 it/sec) -training >> step=1771300, episode=296 reward=0.7681653 (521.83 it/sec) -training >> step=1771400, episode=296 reward=0.7942168 (556.72 it/sec) -training >> step=1771500, episode=296 reward=0.7571748 (516.18 it/sec) -training >> step=1771600, episode=296 reward=0.7826607 (479.90 it/sec) -training >> step=1771700, episode=296 reward=0.7265121 (510.32 it/sec) -training >> step=1771800, episode=296 reward=0.7615131 (495.00 it/sec) -training >> step=1771900, episode=296 reward=0.7524979 (534.09 it/sec) -training >> step=1772000, episode=296 reward=0.7719809 (570.76 it/sec) -training >> step=1772100, episode=296 reward=0.7829233 (490.23 it/sec) -training >> step=1772200, episode=296 reward=0.783254 (532.93 it/sec) -training >> step=1772300, episode=296 reward=0.7564548 (476.83 it/sec) -training >> step=1772400, episode=296 reward=0.7710214 (459.58 it/sec) -training >> step=1772500, episode=296 reward=0.7567029 (486.33 it/sec) -training >> step=1772600, episode=296 reward=0.7635226 (501.94 it/sec) -training >> step=1772700, episode=296 reward=0.7747611 (466.28 it/sec) -training >> step=1772800, episode=296 reward=0.7677933 (466.36 it/sec) -training >> step=1772900, episode=296 reward=0.7527289 (509.12 it/sec) -training >> step=1773000, episode=296 reward=0.7617993 (509.27 it/sec) -training >> step=1773100, episode=296 reward=0.7660807 (506.20 it/sec) -training >> step=1773200, episode=296 reward=0.7566265 (551.17 it/sec) -training >> step=1773300, episode=296 reward=0.7578614 (493.14 it/sec) -training >> step=1773400, episode=296 reward=0.7483298 (543.35 it/sec) -training >> step=1773500, episode=296 reward=0.7619264 (548.81 it/sec) -training >> step=1773600, episode=296 reward=0.7536873 (544.61 it/sec) -training >> step=1773700, episode=296 reward=0.7612106 (522.02 it/sec) -training >> step=1773800, episode=296 reward=0.7524654 (524.54 it/sec) -training >> step=1773900, episode=296 reward=0.7903413 (531.75 it/sec) -training >> step=1774000, episode=296 reward=0.7823044 (570.69 it/sec) -training >> step=1774100, episode=296 reward=0.7716778 (559.72 it/sec) -training >> step=1774200, episode=296 reward=0.7701803 (560.74 it/sec) -training >> step=1774300, episode=296 reward=0.7531924 (508.08 it/sec) -training >> step=1774400, episode=296 reward=0.7383543 (522.99 it/sec) -training >> step=1774500, episode=296 reward=0.7732083 (548.98 it/sec) -training >> step=1774600, episode=296 reward=0.7404323 (558.96 it/sec) -training >> step=1774700, episode=296 reward=0.7523395 (543.61 it/sec) -training >> step=1774800, episode=296 reward=0.7333183 (511.33 it/sec) -training >> step=1774900, episode=296 reward=0.7582784 (534.86 it/sec) -training >> step=1775000, episode=296 reward=0.7806158 (512.43 it/sec) -training >> step=1775100, episode=296 reward=0.7550744 (549.64 it/sec) -training >> step=1775200, episode=296 reward=0.7605938 (569.16 it/sec) -training >> step=1775300, episode=297 reward=0.7597368 (144.44 it/sec) -training >> step=1775400, episode=297 reward=0.7592704 (591.44 it/sec) -training >> step=1775500, episode=297 reward=0.7685617 (535.62 it/sec) -training >> step=1775600, episode=297 reward=0.7402872 (558.03 it/sec) -training >> step=1775700, episode=297 reward=0.7666951 (526.65 it/sec) -training >> step=1775800, episode=297 reward=0.7750787 (544.10 it/sec) -training >> step=1775900, episode=297 reward=0.7741627 (554.50 it/sec) -training >> step=1776000, episode=297 reward=0.7711448 (569.57 it/sec) -training >> step=1776100, episode=297 reward=0.7767825 (567.22 it/sec) -training >> step=1776200, episode=297 reward=0.7519576 (554.80 it/sec) -training >> step=1776300, episode=297 reward=0.7435433 (542.43 it/sec) -training >> step=1776400, episode=297 reward=0.7523093 (549.60 it/sec) -training >> step=1776500, episode=297 reward=0.7641466 (517.81 it/sec) -training >> step=1776600, episode=297 reward=0.7612004 (552.79 it/sec) -training >> step=1776700, episode=297 reward=0.7530611 (546.58 it/sec) -training >> step=1776800, episode=297 reward=0.7524605 (533.64 it/sec) -training >> step=1776900, episode=297 reward=0.7693241 (557.80 it/sec) -training >> step=1777000, episode=297 reward=0.7475623 (568.33 it/sec) -training >> step=1777100, episode=297 reward=0.7535705 (570.32 it/sec) -training >> step=1777200, episode=297 reward=0.7635078 (550.30 it/sec) -training >> step=1777300, episode=297 reward=0.7686467 (526.61 it/sec) -training >> step=1777400, episode=297 reward=0.7584346 (534.90 it/sec) -training >> step=1777500, episode=297 reward=0.7651045 (568.82 it/sec) -training >> step=1777600, episode=297 reward=0.7574996 (563.80 it/sec) -training >> step=1777700, episode=297 reward=0.7572087 (487.78 it/sec) -training >> step=1777800, episode=297 reward=0.7768596 (554.52 it/sec) -training >> step=1777900, episode=297 reward=0.7812073 (567.29 it/sec) -training >> step=1778000, episode=297 reward=0.7799132 (542.39 it/sec) -training >> step=1778100, episode=297 reward=0.7354745 (546.79 it/sec) -training >> step=1778200, episode=297 reward=0.7716784 (577.32 it/sec) -training >> step=1778300, episode=297 reward=0.7633003 (541.26 it/sec) -training >> step=1778400, episode=297 reward=0.7625802 (556.15 it/sec) -training >> step=1778500, episode=297 reward=0.7655691 (525.39 it/sec) -training >> step=1778600, episode=297 reward=0.7863969 (533.63 it/sec) -training >> step=1778700, episode=297 reward=0.785107 (585.78 it/sec) -training >> step=1778800, episode=297 reward=0.7644579 (527.30 it/sec) -training >> step=1778900, episode=297 reward=0.7516083 (489.27 it/sec) -training >> step=1779000, episode=297 reward=0.756407 (375.81 it/sec) -training >> step=1779100, episode=297 reward=0.7511505 (365.53 it/sec) -training >> step=1779200, episode=297 reward=0.760442 (430.31 it/sec) -training >> step=1779300, episode=297 reward=0.7706553 (438.34 it/sec) -training >> step=1779400, episode=297 reward=0.7590263 (491.02 it/sec) -training >> step=1779500, episode=297 reward=0.779172 (489.81 it/sec) -training >> step=1779600, episode=297 reward=0.7572639 (467.59 it/sec) -training >> step=1779700, episode=297 reward=0.7744794 (459.34 it/sec) -training >> step=1779800, episode=297 reward=0.7472672 (440.88 it/sec) -training >> step=1779900, episode=297 reward=0.7634985 (475.90 it/sec) -training >> step=1780000, episode=297 reward=0.7439494 (498.46 it/sec) -training >> step=1780100, episode=297 reward=0.7832813 (524.90 it/sec) -training >> step=1780200, episode=297 reward=0.7757779 (554.34 it/sec) -training >> step=1780300, episode=297 reward=0.7400293 (555.96 it/sec) -training >> step=1780400, episode=297 reward=0.7586284 (547.54 it/sec) -training >> step=1780500, episode=297 reward=0.7578412 (484.08 it/sec) -training >> step=1780600, episode=297 reward=0.764755 (553.93 it/sec) -training >> step=1780700, episode=297 reward=0.7670954 (553.90 it/sec) -training >> step=1780800, episode=297 reward=0.7767213 (550.39 it/sec) -training >> step=1780900, episode=297 reward=0.7484964 (593.31 it/sec) -training >> step=1781000, episode=297 reward=0.7463735 (536.14 it/sec) -training >> step=1781100, episode=297 reward=0.7730112 (462.65 it/sec) -training >> step=1781200, episode=297 reward=0.7459919 (526.68 it/sec) -training >> step=1781300, episode=298 reward=0.7648307 (154.39 it/sec) -training >> step=1781400, episode=298 reward=0.7440953 (531.40 it/sec) -training >> step=1781500, episode=298 reward=0.730868 (553.81 it/sec) -training >> step=1781600, episode=298 reward=0.7772043 (531.82 it/sec) -training >> step=1781700, episode=298 reward=0.7505386 (528.80 it/sec) -training >> step=1781800, episode=298 reward=0.7599182 (550.82 it/sec) -training >> step=1781900, episode=298 reward=0.7381355 (540.62 it/sec) -training >> step=1782000, episode=298 reward=0.7928233 (566.53 it/sec) -training >> step=1782100, episode=298 reward=0.7473798 (541.09 it/sec) -training >> step=1782200, episode=298 reward=0.7508786 (574.88 it/sec) -training >> step=1782300, episode=298 reward=0.7572072 (569.86 it/sec) -training >> step=1782400, episode=298 reward=0.78258 (561.62 it/sec) -training >> step=1782500, episode=298 reward=0.7615843 (558.89 it/sec) -training >> step=1782600, episode=298 reward=0.7538641 (549.56 it/sec) -training >> step=1782700, episode=298 reward=0.753543 (548.53 it/sec) -training >> step=1782800, episode=298 reward=0.7654797 (579.74 it/sec) -training >> step=1782900, episode=298 reward=0.7702011 (601.96 it/sec) -training >> step=1783000, episode=298 reward=0.7685081 (503.28 it/sec) -training >> step=1783100, episode=298 reward=0.7569497 (556.32 it/sec) -training >> step=1783200, episode=298 reward=0.7814497 (556.51 it/sec) -training >> step=1783300, episode=298 reward=0.7725149 (587.21 it/sec) -training >> step=1783400, episode=298 reward=0.7538079 (571.19 it/sec) -training >> step=1783500, episode=298 reward=0.760424 (565.92 it/sec) -training >> step=1783600, episode=298 reward=0.74914 (534.24 it/sec) -training >> step=1783700, episode=298 reward=0.7712947 (512.15 it/sec) -training >> step=1783800, episode=298 reward=0.7543228 (534.84 it/sec) -training >> step=1783900, episode=298 reward=0.7710772 (572.57 it/sec) -training >> step=1784000, episode=298 reward=0.7766587 (573.67 it/sec) -training >> step=1784100, episode=298 reward=0.7755239 (583.08 it/sec) -training >> step=1784200, episode=298 reward=0.7583718 (432.02 it/sec) -training >> step=1784300, episode=298 reward=0.7550313 (574.09 it/sec) -training >> step=1784400, episode=298 reward=0.7674176 (581.63 it/sec) -training >> step=1784500, episode=298 reward=0.7526305 (560.06 it/sec) -training >> step=1784600, episode=298 reward=0.7632368 (561.74 it/sec) -training >> step=1784700, episode=298 reward=0.7471362 (537.80 it/sec) -training >> step=1784800, episode=298 reward=0.7622863 (487.56 it/sec) -training >> step=1784900, episode=298 reward=0.762251 (528.42 it/sec) -training >> step=1785000, episode=298 reward=0.765714 (544.25 it/sec) -training >> step=1785100, episode=298 reward=0.7783836 (593.96 it/sec) -training >> step=1785200, episode=298 reward=0.7745494 (586.68 it/sec) -training >> step=1785300, episode=298 reward=0.7539324 (509.39 it/sec) -training >> step=1785400, episode=298 reward=0.7578202 (549.33 it/sec) -training >> step=1785500, episode=298 reward=0.7570923 (590.91 it/sec) -training >> step=1785600, episode=298 reward=0.7628102 (564.71 it/sec) -training >> step=1785700, episode=298 reward=0.7854531 (548.28 it/sec) -training >> step=1785800, episode=298 reward=0.7766899 (509.48 it/sec) -training >> step=1785900, episode=298 reward=0.7517039 (554.26 it/sec) -training >> step=1786000, episode=298 reward=0.7735298 (505.82 it/sec) -training >> step=1786100, episode=298 reward=0.7539622 (559.41 it/sec) -training >> step=1786200, episode=298 reward=0.7269409 (579.00 it/sec) -training >> step=1786300, episode=298 reward=0.7414682 (583.17 it/sec) -training >> step=1786400, episode=298 reward=0.7599766 (541.75 it/sec) -training >> step=1786500, episode=298 reward=0.7548053 (505.34 it/sec) -training >> step=1786600, episode=298 reward=0.7752198 (564.51 it/sec) -training >> step=1786700, episode=298 reward=0.7478838 (571.61 it/sec) -training >> step=1786800, episode=298 reward=0.7603936 (536.30 it/sec) -training >> step=1786900, episode=298 reward=0.7482348 (576.79 it/sec) -training >> step=1787000, episode=298 reward=0.7625304 (510.94 it/sec) -training >> step=1787100, episode=298 reward=0.7552552 (486.02 it/sec) -training >> step=1787200, episode=298 reward=0.7517468 (464.69 it/sec) -training >> step=1787300, episode=299 reward=0.7527241 (156.72 it/sec) -training >> step=1787400, episode=299 reward=0.7487974 (536.89 it/sec) -training >> step=1787500, episode=299 reward=0.7475021 (543.75 it/sec) -training >> step=1787600, episode=299 reward=0.749878 (543.38 it/sec) -training >> step=1787700, episode=299 reward=0.77728 (570.59 it/sec) -training >> step=1787800, episode=299 reward=0.7622899 (501.18 it/sec) -training >> step=1787900, episode=299 reward=0.7646092 (506.03 it/sec) -training >> step=1788000, episode=299 reward=0.7549689 (571.49 it/sec) -training >> step=1788100, episode=299 reward=0.7749781 (560.69 it/sec) -training >> step=1788200, episode=299 reward=0.7697782 (552.08 it/sec) -training >> step=1788300, episode=299 reward=0.7891321 (574.92 it/sec) -training >> step=1788400, episode=299 reward=0.757911 (512.68 it/sec) -training >> step=1788500, episode=299 reward=0.7595134 (607.02 it/sec) -training >> step=1788600, episode=299 reward=0.7743006 (550.60 it/sec) -training >> step=1788700, episode=299 reward=0.7777865 (547.44 it/sec) -training >> step=1788800, episode=299 reward=0.757639 (563.17 it/sec) -training >> step=1788900, episode=299 reward=0.7521152 (569.40 it/sec) -training >> step=1789000, episode=299 reward=0.7520337 (489.67 it/sec) -training >> step=1789100, episode=299 reward=0.7668696 (553.63 it/sec) -training >> step=1789200, episode=299 reward=0.7651973 (557.36 it/sec) -training >> step=1789300, episode=299 reward=0.7941203 (563.65 it/sec) -training >> step=1789400, episode=299 reward=0.7595538 (568.73 it/sec) -training >> step=1789500, episode=299 reward=0.7745678 (536.10 it/sec) -training >> step=1789600, episode=299 reward=0.7741596 (520.07 it/sec) -training >> step=1789700, episode=299 reward=0.7535173 (573.21 it/sec) -training >> step=1789800, episode=299 reward=0.7603475 (570.98 it/sec) -training >> step=1789900, episode=299 reward=0.7717545 (546.90 it/sec) -training >> step=1790000, episode=299 reward=0.7618881 (592.92 it/sec) -training >> step=1790100, episode=299 reward=0.7683715 (522.70 it/sec) -training >> step=1790200, episode=299 reward=0.7642046 (540.68 it/sec) -training >> step=1790300, episode=299 reward=0.7718905 (541.72 it/sec) -training >> step=1790400, episode=299 reward=0.7415625 (480.94 it/sec) -training >> step=1790500, episode=299 reward=0.7735994 (513.91 it/sec) -training >> step=1790600, episode=299 reward=0.7489772 (502.41 it/sec) -training >> step=1790700, episode=299 reward=0.7731731 (508.09 it/sec) -training >> step=1790800, episode=299 reward=0.7577027 (488.85 it/sec) -training >> step=1790900, episode=299 reward=0.7602577 (509.90 it/sec) -training >> step=1791000, episode=299 reward=0.7720235 (503.57 it/sec) -training >> step=1791100, episode=299 reward=0.7563294 (475.94 it/sec) -training >> step=1791200, episode=299 reward=0.7758437 (471.56 it/sec) -training >> step=1791300, episode=299 reward=0.7426804 (484.01 it/sec) -training >> step=1791400, episode=299 reward=0.7786021 (518.73 it/sec) -training >> step=1791500, episode=299 reward=0.7753802 (558.58 it/sec) -training >> step=1791600, episode=299 reward=0.7558201 (512.72 it/sec) -training >> step=1791700, episode=299 reward=0.7571344 (464.93 it/sec) -training >> step=1791800, episode=299 reward=0.7688366 (489.02 it/sec) -training >> step=1791900, episode=299 reward=0.7558049 (461.53 it/sec) -training >> step=1792000, episode=299 reward=0.7734969 (418.30 it/sec) -training >> step=1792100, episode=299 reward=0.7659211 (445.30 it/sec) -training >> step=1792200, episode=299 reward=0.7648288 (433.11 it/sec) -training >> step=1792300, episode=299 reward=0.7744613 (490.03 it/sec) -training >> step=1792400, episode=299 reward=0.7728023 (474.82 it/sec) -training >> step=1792500, episode=299 reward=0.763631 (529.52 it/sec) -training >> step=1792600, episode=299 reward=0.7735103 (450.65 it/sec) -training >> step=1792700, episode=299 reward=0.7693627 (443.91 it/sec) -training >> step=1792800, episode=299 reward=0.7427664 (498.07 it/sec) -training >> step=1792900, episode=299 reward=0.7481894 (534.28 it/sec) -training >> step=1793000, episode=299 reward=0.7461246 (522.63 it/sec) -training >> step=1793100, episode=299 reward=0.7508579 (551.12 it/sec) -training >> step=1793200, episode=299 reward=0.7409197 (490.26 it/sec) -training >> step=1793300, episode=300 reward=0.7822388 (145.52 it/sec) -training >> step=1793400, episode=300 reward=0.7720238 (464.90 it/sec) -training >> step=1793500, episode=300 reward=0.7662014 (525.61 it/sec) -training >> step=1793600, episode=300 reward=0.7345042 (534.91 it/sec) -training >> step=1793700, episode=300 reward=0.7506428 (539.22 it/sec) -training >> step=1793800, episode=300 reward=0.7686408 (555.98 it/sec) -training >> step=1793900, episode=300 reward=0.7724724 (347.44 it/sec) -training >> step=1794000, episode=300 reward=0.7785007 (332.11 it/sec) -training >> step=1794100, episode=300 reward=0.7717435 (354.04 it/sec) -training >> step=1794200, episode=300 reward=0.7689077 (340.66 it/sec) -training >> step=1794300, episode=300 reward=0.7566803 (402.77 it/sec) -training >> step=1794400, episode=300 reward=0.7650917 (479.73 it/sec) -training >> step=1794500, episode=300 reward=0.7397397 (490.44 it/sec) -training >> step=1794600, episode=300 reward=0.7412723 (521.42 it/sec) -training >> step=1794700, episode=300 reward=0.7580164 (554.53 it/sec) -training >> step=1794800, episode=300 reward=0.7705679 (465.83 it/sec) -training >> step=1794900, episode=300 reward=0.7637398 (481.85 it/sec) -training >> step=1795000, episode=300 reward=0.7411749 (500.55 it/sec) -training >> step=1795100, episode=300 reward=0.7591127 (496.72 it/sec) -training >> step=1795200, episode=300 reward=0.7737719 (480.05 it/sec) -training >> step=1795300, episode=300 reward=0.7570846 (477.59 it/sec) -training >> step=1795400, episode=300 reward=0.7904459 (494.76 it/sec) -training >> step=1795500, episode=300 reward=0.7657584 (517.81 it/sec) -training >> step=1795600, episode=300 reward=0.7710928 (518.37 it/sec) -training >> step=1795700, episode=300 reward=0.758499 (510.15 it/sec) -training >> step=1795800, episode=300 reward=0.7720166 (540.09 it/sec) -training >> step=1795900, episode=300 reward=0.7380617 (465.45 it/sec) -training >> step=1796000, episode=300 reward=0.7768962 (508.23 it/sec) -training >> step=1796100, episode=300 reward=0.7577375 (507.81 it/sec) -training >> step=1796200, episode=300 reward=0.7533641 (481.12 it/sec) -training >> step=1796300, episode=300 reward=0.7708203 (536.82 it/sec) -training >> step=1796400, episode=300 reward=0.7490168 (424.45 it/sec) -training >> step=1796500, episode=300 reward=0.7383068 (519.61 it/sec) -training >> step=1796600, episode=300 reward=0.7361137 (512.45 it/sec) -training >> step=1796700, episode=300 reward=0.7479428 (490.28 it/sec) -training >> step=1796800, episode=300 reward=0.7484185 (482.03 it/sec) -training >> step=1796900, episode=300 reward=0.7565746 (465.73 it/sec) -training >> step=1797000, episode=300 reward=0.7551826 (502.48 it/sec) -training >> step=1797100, episode=300 reward=0.7592334 (488.13 it/sec) -training >> step=1797200, episode=300 reward=0.7724419 (470.26 it/sec) -training >> step=1797300, episode=300 reward=0.7625921 (454.36 it/sec) -training >> step=1797400, episode=300 reward=0.7413837 (418.50 it/sec) -training >> step=1797500, episode=300 reward=0.7651198 (439.99 it/sec) -training >> step=1797600, episode=300 reward=0.761103 (431.24 it/sec) -training >> step=1797700, episode=300 reward=0.7589726 (442.49 it/sec) -training >> step=1797800, episode=300 reward=0.7618033 (434.89 it/sec) -training >> step=1797900, episode=300 reward=0.7230285 (471.03 it/sec) -training >> step=1798000, episode=300 reward=0.7514685 (443.93 it/sec) -training >> step=1798100, episode=300 reward=0.7647932 (446.67 it/sec) -training >> step=1798200, episode=300 reward=0.7628698 (415.40 it/sec) -training >> step=1798300, episode=300 reward=0.7476242 (439.78 it/sec) -training >> step=1798400, episode=300 reward=0.777454 (440.21 it/sec) -training >> step=1798500, episode=300 reward=0.7423826 (406.95 it/sec) -training >> step=1798600, episode=300 reward=0.7825092 (478.79 it/sec) -training >> step=1798700, episode=300 reward=0.7682776 (481.12 it/sec) -training >> step=1798800, episode=300 reward=0.7733256 (376.91 it/sec) -training >> step=1798900, episode=300 reward=0.7574622 (437.95 it/sec) -training >> step=1799000, episode=300 reward=0.7692173 (384.57 it/sec) -training >> step=1799100, episode=300 reward=0.7707522 (445.22 it/sec) -training >> step=1799200, episode=300 reward=0.7624094 (419.82 it/sec) -training >> step=1799300, episode=301 reward=0.7266824 (145.48 it/sec) -training >> step=1799400, episode=301 reward=0.7459077 (431.17 it/sec) -training >> step=1799500, episode=301 reward=0.7508045 (501.80 it/sec) -training >> step=1799600, episode=301 reward=0.7795424 (494.70 it/sec) -training >> step=1799700, episode=301 reward=0.7682287 (506.40 it/sec) -training >> step=1799800, episode=301 reward=0.7649621 (443.82 it/sec) -training >> step=1799900, episode=301 reward=0.7641114 (393.23 it/sec) -training >> step=1800000, episode=301 reward=0.7564679 (427.18 it/sec) -training >> step=1800100, episode=301 reward=0.7628807 (376.80 it/sec) -training >> step=1800200, episode=301 reward=0.7715632 (439.91 it/sec) -training >> step=1800300, episode=301 reward=0.756511 (344.68 it/sec) -training >> step=1800400, episode=301 reward=0.7493577 (427.79 it/sec) -training >> step=1800500, episode=301 reward=0.7762133 (468.89 it/sec) -training >> step=1800600, episode=301 reward=0.7696452 (445.52 it/sec) -training >> step=1800700, episode=301 reward=0.7762887 (423.26 it/sec) -training >> step=1800800, episode=301 reward=0.753476 (431.61 it/sec) -training >> step=1800900, episode=301 reward=0.7683125 (457.47 it/sec) -training >> step=1801000, episode=301 reward=0.7554515 (459.57 it/sec) -training >> step=1801100, episode=301 reward=0.7679343 (417.88 it/sec) -training >> step=1801200, episode=301 reward=0.76024 (459.52 it/sec) -training >> step=1801300, episode=301 reward=0.7871488 (461.92 it/sec) -training >> step=1801400, episode=301 reward=0.7547518 (464.80 it/sec) -training >> step=1801500, episode=301 reward=0.7751352 (443.16 it/sec) -training >> step=1801600, episode=301 reward=0.7616255 (431.84 it/sec) -training >> step=1801700, episode=301 reward=0.7462807 (385.10 it/sec) -training >> step=1801800, episode=301 reward=0.7421013 (401.71 it/sec) -training >> step=1801900, episode=301 reward=0.7676333 (448.18 it/sec) -training >> step=1802000, episode=301 reward=0.7600515 (470.95 it/sec) -training >> step=1802100, episode=301 reward=0.7664927 (463.24 it/sec) -training >> step=1802200, episode=301 reward=0.784198 (442.15 it/sec) -training >> step=1802300, episode=301 reward=0.7558997 (419.06 it/sec) -training >> step=1802400, episode=301 reward=0.7675739 (445.23 it/sec) -training >> step=1802500, episode=301 reward=0.7535203 (430.78 it/sec) -training >> step=1802600, episode=301 reward=0.7736371 (421.52 it/sec) -training >> step=1802700, episode=301 reward=0.7707201 (465.63 it/sec) -training >> step=1802800, episode=301 reward=0.7621217 (493.34 it/sec) -training >> step=1802900, episode=301 reward=0.7636132 (513.51 it/sec) -training >> step=1803000, episode=301 reward=0.7682887 (447.30 it/sec) -training >> step=1803100, episode=301 reward=0.7521806 (450.17 it/sec) -training >> step=1803200, episode=301 reward=0.7578061 (465.58 it/sec) -training >> step=1803300, episode=301 reward=0.7705039 (503.98 it/sec) -training >> step=1803400, episode=301 reward=0.7525955 (435.53 it/sec) -training >> step=1803500, episode=301 reward=0.7550417 (441.95 it/sec) -training >> step=1803600, episode=301 reward=0.7563956 (441.84 it/sec) -training >> step=1803700, episode=301 reward=0.7472371 (469.38 it/sec) -training >> step=1803800, episode=301 reward=0.7725621 (469.82 it/sec) -training >> step=1803900, episode=301 reward=0.7572711 (436.34 it/sec) -training >> step=1804000, episode=301 reward=0.7903608 (429.96 it/sec) -training >> step=1804100, episode=301 reward=0.7766762 (421.84 it/sec) -training >> step=1804200, episode=301 reward=0.7571447 (477.00 it/sec) -training >> step=1804300, episode=301 reward=0.7587624 (484.40 it/sec) -training >> step=1804400, episode=301 reward=0.7548533 (475.21 it/sec) -training >> step=1804500, episode=301 reward=0.7839766 (453.00 it/sec) -training >> step=1804600, episode=301 reward=0.7423359 (506.36 it/sec) -training >> step=1804700, episode=301 reward=0.7471088 (437.80 it/sec) -training >> step=1804800, episode=301 reward=0.7510759 (454.11 it/sec) -training >> step=1804900, episode=301 reward=0.7621895 (427.88 it/sec) -training >> step=1805000, episode=301 reward=0.7638299 (461.42 it/sec) -training >> step=1805100, episode=301 reward=0.7353408 (480.34 it/sec) -training >> step=1805200, episode=301 reward=0.7610193 (494.11 it/sec) -training >> step=1805300, episode=302 reward=0.7608064 (120.76 it/sec) -training >> step=1805400, episode=302 reward=0.7686685 (391.35 it/sec) -training >> step=1805500, episode=302 reward=0.7738402 (448.69 it/sec) -training >> step=1805600, episode=302 reward=0.7667723 (406.03 it/sec) -training >> step=1805700, episode=302 reward=0.7497888 (463.27 it/sec) -training >> step=1805800, episode=302 reward=0.7745104 (482.57 it/sec) -training >> step=1805900, episode=302 reward=0.7527851 (462.20 it/sec) -training >> step=1806000, episode=302 reward=0.7806108 (507.49 it/sec) -training >> step=1806100, episode=302 reward=0.7696714 (467.92 it/sec) -training >> step=1806200, episode=302 reward=0.7838761 (502.10 it/sec) -training >> step=1806300, episode=302 reward=0.7719597 (513.52 it/sec) -training >> step=1806400, episode=302 reward=0.7741915 (540.70 it/sec) -training >> step=1806500, episode=302 reward=0.7583693 (413.52 it/sec) -training >> step=1806600, episode=302 reward=0.7815793 (393.84 it/sec) -training >> step=1806700, episode=302 reward=0.7654433 (500.10 it/sec) -training >> step=1806800, episode=302 reward=0.766595 (487.40 it/sec) -training >> step=1806900, episode=302 reward=0.7722254 (467.85 it/sec) -training >> step=1807000, episode=302 reward=0.7628467 (409.98 it/sec) -training >> step=1807100, episode=302 reward=0.7818877 (394.39 it/sec) -training >> step=1807200, episode=302 reward=0.7909921 (432.87 it/sec) -training >> step=1807300, episode=302 reward=0.7580472 (365.69 it/sec) -training >> step=1807400, episode=302 reward=0.7627832 (432.45 it/sec) -training >> step=1807500, episode=302 reward=0.7812127 (397.58 it/sec) -training >> step=1807600, episode=302 reward=0.7648855 (363.80 it/sec) -training >> step=1807700, episode=302 reward=0.7635416 (354.55 it/sec) -training >> step=1807800, episode=302 reward=0.7780856 (445.44 it/sec) -training >> step=1807900, episode=302 reward=0.7661223 (403.54 it/sec) -training >> step=1808000, episode=302 reward=0.7679656 (448.99 it/sec) -training >> step=1808100, episode=302 reward=0.7695653 (429.62 it/sec) -training >> step=1808200, episode=302 reward=0.7542949 (444.44 it/sec) -training >> step=1808300, episode=302 reward=0.7818596 (428.40 it/sec) -training >> step=1808400, episode=302 reward=0.7729788 (434.25 it/sec) -training >> step=1808500, episode=302 reward=0.7521151 (495.50 it/sec) -training >> step=1808600, episode=302 reward=0.7564659 (496.21 it/sec) -training >> step=1808700, episode=302 reward=0.7760116 (530.41 it/sec) -training >> step=1808800, episode=302 reward=0.7788591 (533.25 it/sec) -training >> step=1808900, episode=302 reward=0.7663533 (469.54 it/sec) -training >> step=1809000, episode=302 reward=0.7516493 (477.56 it/sec) -training >> step=1809100, episode=302 reward=0.7800674 (529.00 it/sec) -training >> step=1809200, episode=302 reward=0.7622705 (507.33 it/sec) -training >> step=1809300, episode=302 reward=0.7566245 (533.04 it/sec) -training >> step=1809400, episode=302 reward=0.7814949 (503.41 it/sec) -training >> step=1809500, episode=302 reward=0.7501636 (419.11 it/sec) -training >> step=1809600, episode=302 reward=0.7438336 (513.35 it/sec) -training >> step=1809700, episode=302 reward=0.7343731 (506.31 it/sec) -training >> step=1809800, episode=302 reward=0.7607291 (461.20 it/sec) -training >> step=1809900, episode=302 reward=0.7733183 (411.78 it/sec) -training >> step=1810000, episode=302 reward=0.7764778 (449.84 it/sec) -training >> step=1810100, episode=302 reward=0.7750919 (508.62 it/sec) -training >> step=1810200, episode=302 reward=0.7420819 (523.70 it/sec) -training >> step=1810300, episode=302 reward=0.7310196 (499.80 it/sec) -training >> step=1810400, episode=302 reward=0.7632107 (414.30 it/sec) -training >> step=1810500, episode=302 reward=0.7665682 (512.83 it/sec) -training >> step=1810600, episode=302 reward=0.7687452 (507.54 it/sec) -training >> step=1810700, episode=302 reward=0.7869534 (461.81 it/sec) -training >> step=1810800, episode=302 reward=0.7330255 (531.70 it/sec) -training >> step=1810900, episode=302 reward=0.7434474 (474.81 it/sec) -training >> step=1811000, episode=302 reward=0.7658212 (496.28 it/sec) -training >> step=1811100, episode=302 reward=0.7435848 (543.00 it/sec) -training >> step=1811200, episode=302 reward=0.7402246 (523.45 it/sec) -training >> step=1811300, episode=303 reward=0.7720148 (107.65 it/sec) -training >> step=1811400, episode=303 reward=0.7527358 (467.35 it/sec) -training >> step=1811500, episode=303 reward=0.7655092 (502.71 it/sec) -training >> step=1811600, episode=303 reward=0.7486249 (473.42 it/sec) -training >> step=1811700, episode=303 reward=0.7818149 (488.53 it/sec) -training >> step=1811800, episode=303 reward=0.7580014 (502.28 it/sec) -training >> step=1811900, episode=303 reward=0.7550513 (466.75 it/sec) -training >> step=1812000, episode=303 reward=0.75459 (531.28 it/sec) -training >> step=1812100, episode=303 reward=0.7562365 (502.94 it/sec) -training >> step=1812200, episode=303 reward=0.7536539 (516.12 it/sec) -training >> step=1812300, episode=303 reward=0.7699476 (505.95 it/sec) -training >> step=1812400, episode=303 reward=0.7578617 (476.16 it/sec) -training >> step=1812500, episode=303 reward=0.7532144 (497.16 it/sec) -training >> step=1812600, episode=303 reward=0.7605019 (507.11 it/sec) -training >> step=1812700, episode=303 reward=0.763407 (553.27 it/sec) -training >> step=1812800, episode=303 reward=0.7397024 (476.24 it/sec) -training >> step=1812900, episode=303 reward=0.7853257 (514.08 it/sec) -training >> step=1813000, episode=303 reward=0.7470148 (542.77 it/sec) -training >> step=1813100, episode=303 reward=0.7488456 (459.07 it/sec) -training >> step=1813200, episode=303 reward=0.7649832 (512.00 it/sec) -training >> step=1813300, episode=303 reward=0.7529675 (550.29 it/sec) -training >> step=1813400, episode=303 reward=0.7395253 (507.53 it/sec) -training >> step=1813500, episode=303 reward=0.757607 (533.06 it/sec) -training >> step=1813600, episode=303 reward=0.7701007 (532.59 it/sec) -training >> step=1813700, episode=303 reward=0.760875 (506.59 it/sec) -training >> step=1813800, episode=303 reward=0.7757476 (526.12 it/sec) -training >> step=1813900, episode=303 reward=0.7743529 (487.23 it/sec) -training >> step=1814000, episode=303 reward=0.749152 (570.23 it/sec) -training >> step=1814100, episode=303 reward=0.7515544 (474.41 it/sec) -training >> step=1814200, episode=303 reward=0.7593867 (492.56 it/sec) -training >> step=1814300, episode=303 reward=0.7742212 (527.48 it/sec) -training >> step=1814400, episode=303 reward=0.755617 (523.71 it/sec) -training >> step=1814500, episode=303 reward=0.758425 (507.37 it/sec) -training >> step=1814600, episode=303 reward=0.7493727 (506.67 it/sec) -training >> step=1814700, episode=303 reward=0.7669749 (523.07 it/sec) -training >> step=1814800, episode=303 reward=0.7626094 (497.17 it/sec) -training >> step=1814900, episode=303 reward=0.7291442 (486.22 it/sec) -training >> step=1815000, episode=303 reward=0.7502955 (464.18 it/sec) -training >> step=1815100, episode=303 reward=0.7586855 (527.37 it/sec) -training >> step=1815200, episode=303 reward=0.7514706 (469.16 it/sec) -training >> step=1815300, episode=303 reward=0.7412696 (503.50 it/sec) -training >> step=1815400, episode=303 reward=0.7875825 (523.58 it/sec) -training >> step=1815500, episode=303 reward=0.7392287 (479.55 it/sec) -training >> step=1815600, episode=303 reward=0.7741717 (519.66 it/sec) -training >> step=1815700, episode=303 reward=0.7579016 (473.01 it/sec) -training >> step=1815800, episode=303 reward=0.7635896 (472.27 it/sec) -training >> step=1815900, episode=303 reward=0.7645893 (499.82 it/sec) -training >> step=1816000, episode=303 reward=0.7784615 (460.20 it/sec) -training >> step=1816100, episode=303 reward=0.7500399 (456.75 it/sec) -training >> step=1816200, episode=303 reward=0.7637723 (505.67 it/sec) -training >> step=1816300, episode=303 reward=0.7614929 (529.53 it/sec) -training >> step=1816400, episode=303 reward=0.7583467 (489.30 it/sec) -training >> step=1816500, episode=303 reward=0.7509592 (511.02 it/sec) -training >> step=1816600, episode=303 reward=0.7634401 (546.03 it/sec) -training >> step=1816700, episode=303 reward=0.7895306 (504.69 it/sec) -training >> step=1816800, episode=303 reward=0.7619268 (493.87 it/sec) -training >> step=1816900, episode=303 reward=0.7501265 (513.06 it/sec) -training >> step=1817000, episode=303 reward=0.7664953 (509.76 it/sec) -training >> step=1817100, episode=303 reward=0.7515998 (495.23 it/sec) -training >> step=1817200, episode=303 reward=0.7579985 (490.75 it/sec) -training >> step=1817300, episode=304 reward=0.7454712 (91.83 it/sec) -training >> step=1817400, episode=304 reward=0.7349215 (473.72 it/sec) -training >> step=1817500, episode=304 reward=0.7654075 (505.61 it/sec) -training >> step=1817600, episode=304 reward=0.7571177 (493.16 it/sec) -training >> step=1817700, episode=304 reward=0.7559688 (480.76 it/sec) -training >> step=1817800, episode=304 reward=0.7724662 (553.76 it/sec) -training >> step=1817900, episode=304 reward=0.7701024 (505.60 it/sec) -training >> step=1818000, episode=304 reward=0.7786795 (552.61 it/sec) -training >> step=1818100, episode=304 reward=0.7517436 (525.70 it/sec) -training >> step=1818200, episode=304 reward=0.7604576 (494.10 it/sec) -training >> step=1818300, episode=304 reward=0.7602294 (545.96 it/sec) -training >> step=1818400, episode=304 reward=0.7868115 (496.89 it/sec) -training >> step=1818500, episode=304 reward=0.7803963 (481.89 it/sec) -training >> step=1818600, episode=304 reward=0.7585423 (486.58 it/sec) -training >> step=1818700, episode=304 reward=0.7552063 (506.00 it/sec) -training >> step=1818800, episode=304 reward=0.7310804 (529.09 it/sec) -training >> step=1818900, episode=304 reward=0.7572694 (494.38 it/sec) -training >> step=1819000, episode=304 reward=0.7705986 (516.05 it/sec) -training >> step=1819100, episode=304 reward=0.7678686 (519.95 it/sec) -training >> step=1819200, episode=304 reward=0.7692045 (529.01 it/sec) -training >> step=1819300, episode=304 reward=0.7764329 (513.56 it/sec) -training >> step=1819400, episode=304 reward=0.7620129 (465.15 it/sec) -training >> step=1819500, episode=304 reward=0.7588748 (505.88 it/sec) -training >> step=1819600, episode=304 reward=0.7797769 (583.46 it/sec) -training >> step=1819700, episode=304 reward=0.7723767 (494.28 it/sec) -training >> step=1819800, episode=304 reward=0.7617719 (506.93 it/sec) -training >> step=1819900, episode=304 reward=0.7709872 (489.28 it/sec) -training >> step=1820000, episode=304 reward=0.7655987 (488.47 it/sec) -training >> step=1820100, episode=304 reward=0.7476722 (502.49 it/sec) -training >> step=1820200, episode=304 reward=0.7893515 (514.82 it/sec) -training >> step=1820300, episode=304 reward=0.7692176 (471.75 it/sec) -training >> step=1820400, episode=304 reward=0.7575529 (471.80 it/sec) -training >> step=1820500, episode=304 reward=0.7374833 (476.75 it/sec) -training >> step=1820600, episode=304 reward=0.7494782 (493.45 it/sec) -training >> step=1820700, episode=304 reward=0.7656158 (523.64 it/sec) -training >> step=1820800, episode=304 reward=0.7425257 (519.12 it/sec) -training >> step=1820900, episode=304 reward=0.7776516 (505.25 it/sec) -training >> step=1821000, episode=304 reward=0.7820415 (476.35 it/sec) -training >> step=1821100, episode=304 reward=0.7492791 (506.84 it/sec) -training >> step=1821200, episode=304 reward=0.7676057 (554.49 it/sec) -training >> step=1821300, episode=304 reward=0.7561854 (548.57 it/sec) -training >> step=1821400, episode=304 reward=0.7556338 (470.75 it/sec) -training >> step=1821500, episode=304 reward=0.7588068 (455.44 it/sec) -training >> step=1821600, episode=304 reward=0.7460641 (503.93 it/sec) -training >> step=1821700, episode=304 reward=0.76306 (505.34 it/sec) -training >> step=1821800, episode=304 reward=0.754697 (530.13 it/sec) -training >> step=1821900, episode=304 reward=0.7723652 (521.69 it/sec) -training >> step=1822000, episode=304 reward=0.7814685 (470.89 it/sec) -training >> step=1822100, episode=304 reward=0.7801268 (496.98 it/sec) -training >> step=1822200, episode=304 reward=0.7513548 (519.85 it/sec) -training >> step=1822300, episode=304 reward=0.7737385 (537.05 it/sec) -training >> step=1822400, episode=304 reward=0.7709458 (471.69 it/sec) -training >> step=1822500, episode=304 reward=0.7458668 (470.58 it/sec) -training >> step=1822600, episode=304 reward=0.7630379 (510.10 it/sec) -training >> step=1822700, episode=304 reward=0.7498825 (532.04 it/sec) -training >> step=1822800, episode=304 reward=0.7618491 (499.42 it/sec) -training >> step=1822900, episode=304 reward=0.7680777 (497.71 it/sec) -training >> step=1823000, episode=304 reward=0.7450854 (504.32 it/sec) -training >> step=1823100, episode=304 reward=0.7550255 (517.35 it/sec) -training >> step=1823200, episode=304 reward=0.7540516 (508.99 it/sec) -training >> step=1823300, episode=305 reward=0.7698156 (98.30 it/sec) -training >> step=1823400, episode=305 reward=0.7518845 (480.17 it/sec) -training >> step=1823500, episode=305 reward=0.7536429 (364.19 it/sec) -training >> step=1823600, episode=305 reward=0.7491947 (472.68 it/sec) -training >> step=1823700, episode=305 reward=0.7561836 (498.20 it/sec) -training >> step=1823800, episode=305 reward=0.7436749 (515.18 it/sec) -training >> step=1823900, episode=305 reward=0.7525427 (510.74 it/sec) -training >> step=1824000, episode=305 reward=0.7800056 (519.57 it/sec) -training >> step=1824100, episode=305 reward=0.7655602 (471.62 it/sec) -training >> step=1824200, episode=305 reward=0.7684497 (495.32 it/sec) -training >> step=1824300, episode=305 reward=0.7631342 (473.41 it/sec) -training >> step=1824400, episode=305 reward=0.7479411 (529.78 it/sec) -training >> step=1824500, episode=305 reward=0.7437961 (533.80 it/sec) -training >> step=1824600, episode=305 reward=0.7757708 (545.93 it/sec) -training >> step=1824700, episode=305 reward=0.7422719 (539.72 it/sec) -training >> step=1824800, episode=305 reward=0.7718645 (520.92 it/sec) -training >> step=1824900, episode=305 reward=0.7569194 (524.42 it/sec) -training >> step=1825000, episode=305 reward=0.7875956 (553.80 it/sec) -training >> step=1825100, episode=305 reward=0.7530497 (507.84 it/sec) -training >> step=1825200, episode=305 reward=0.7836562 (529.29 it/sec) -training >> step=1825300, episode=305 reward=0.7553952 (542.68 it/sec) -training >> step=1825400, episode=305 reward=0.7439913 (497.84 it/sec) -training >> step=1825500, episode=305 reward=0.7702376 (516.18 it/sec) -training >> step=1825600, episode=305 reward=0.7666777 (532.47 it/sec) -training >> step=1825700, episode=305 reward=0.7586259 (523.23 it/sec) -training >> step=1825800, episode=305 reward=0.7677055 (503.32 it/sec) -training >> step=1825900, episode=305 reward=0.7603418 (488.03 it/sec) -training >> step=1826000, episode=305 reward=0.7560875 (517.49 it/sec) -training >> step=1826100, episode=305 reward=0.7704189 (476.61 it/sec) -training >> step=1826200, episode=305 reward=0.7645674 (520.99 it/sec) -training >> step=1826300, episode=305 reward=0.7527168 (558.19 it/sec) -training >> step=1826400, episode=305 reward=0.7813159 (504.77 it/sec) -training >> step=1826500, episode=305 reward=0.7406338 (519.06 it/sec) -training >> step=1826600, episode=305 reward=0.7810808 (539.30 it/sec) -training >> step=1826700, episode=305 reward=0.7638181 (524.79 it/sec) -training >> step=1826800, episode=305 reward=0.7623608 (558.01 it/sec) -training >> step=1826900, episode=305 reward=0.7558554 (491.23 it/sec) -training >> step=1827000, episode=305 reward=0.7706228 (516.43 it/sec) -training >> step=1827100, episode=305 reward=0.7523303 (510.48 it/sec) -training >> step=1827200, episode=305 reward=0.7851874 (567.70 it/sec) -training >> step=1827300, episode=305 reward=0.7657754 (522.76 it/sec) -training >> step=1827400, episode=305 reward=0.7644439 (519.89 it/sec) -training >> step=1827500, episode=305 reward=0.7553608 (534.09 it/sec) -training >> step=1827600, episode=305 reward=0.7414877 (490.00 it/sec) -training >> step=1827700, episode=305 reward=0.7603128 (523.01 it/sec) -training >> step=1827800, episode=305 reward=0.7450866 (475.47 it/sec) -training >> step=1827900, episode=305 reward=0.7755986 (526.41 it/sec) -training >> step=1828000, episode=305 reward=0.753911 (535.43 it/sec) -training >> step=1828100, episode=305 reward=0.75104 (531.05 it/sec) -training >> step=1828200, episode=305 reward=0.7617012 (517.56 it/sec) -training >> step=1828300, episode=305 reward=0.7589551 (557.75 it/sec) -training >> step=1828400, episode=305 reward=0.7647003 (501.34 it/sec) -training >> step=1828500, episode=305 reward=0.7843564 (474.07 it/sec) -training >> step=1828600, episode=305 reward=0.760986 (545.53 it/sec) -training >> step=1828700, episode=305 reward=0.7799926 (531.15 it/sec) -training >> step=1828800, episode=305 reward=0.7493084 (489.47 it/sec) -training >> step=1828900, episode=305 reward=0.7422578 (515.39 it/sec) -training >> step=1829000, episode=305 reward=0.7472101 (500.04 it/sec) -training >> step=1829100, episode=305 reward=0.7423919 (569.75 it/sec) -training >> step=1829200, episode=305 reward=0.7507033 (510.13 it/sec) -training >> step=1829300, episode=306 reward=0.7392588 (74.02 it/sec) -training >> step=1829400, episode=306 reward=0.7567896 (350.89 it/sec) -training >> step=1829500, episode=306 reward=0.7427722 (506.24 it/sec) -training >> step=1829600, episode=306 reward=0.7458076 (497.56 it/sec) -training >> step=1829700, episode=306 reward=0.7595088 (391.50 it/sec) -training >> step=1829800, episode=306 reward=0.7393544 (500.99 it/sec) -training >> step=1829900, episode=306 reward=0.7569823 (458.60 it/sec) -training >> step=1830000, episode=306 reward=0.7606776 (506.63 it/sec) -training >> step=1830100, episode=306 reward=0.7668778 (516.58 it/sec) -training >> step=1830200, episode=306 reward=0.7519085 (511.93 it/sec) -training >> step=1830300, episode=306 reward=0.7638697 (552.13 it/sec) -training >> step=1830400, episode=306 reward=0.7528778 (493.93 it/sec) -training >> step=1830500, episode=306 reward=0.747973 (497.75 it/sec) -training >> step=1830600, episode=306 reward=0.7687177 (551.13 it/sec) -training >> step=1830700, episode=306 reward=0.7709067 (476.22 it/sec) -training >> step=1830800, episode=306 reward=0.7587384 (537.05 it/sec) -training >> step=1830900, episode=306 reward=0.7458584 (502.16 it/sec) -training >> step=1831000, episode=306 reward=0.7496239 (529.65 it/sec) -training >> step=1831100, episode=306 reward=0.7527435 (523.45 it/sec) -training >> step=1831200, episode=306 reward=0.7594591 (423.01 it/sec) -training >> step=1831300, episode=306 reward=0.7765487 (383.57 it/sec) -training >> step=1831400, episode=306 reward=0.7655525 (500.07 it/sec) -training >> step=1831500, episode=306 reward=0.7602566 (473.01 it/sec) -training >> step=1831600, episode=306 reward=0.7832771 (410.43 it/sec) -training >> step=1831700, episode=306 reward=0.7705665 (409.45 it/sec) -training >> step=1831800, episode=306 reward=0.7700881 (513.28 it/sec) -training >> step=1831900, episode=306 reward=0.766004 (468.95 it/sec) -training >> step=1832000, episode=306 reward=0.764026 (528.96 it/sec) -training >> step=1832100, episode=306 reward=0.7865402 (505.93 it/sec) -training >> step=1832200, episode=306 reward=0.7675217 (472.74 it/sec) -training >> step=1832300, episode=306 reward=0.7759601 (504.34 it/sec) -training >> step=1832400, episode=306 reward=0.7644267 (505.62 it/sec) -training >> step=1832500, episode=306 reward=0.7633473 (518.16 it/sec) -training >> step=1832600, episode=306 reward=0.7644354 (527.24 it/sec) -training >> step=1832700, episode=306 reward=0.7632084 (499.19 it/sec) -training >> step=1832800, episode=306 reward=0.7771514 (542.41 it/sec) -training >> step=1832900, episode=306 reward=0.7624463 (521.75 it/sec) -training >> step=1833000, episode=306 reward=0.7497776 (483.67 it/sec) -training >> step=1833100, episode=306 reward=0.7689226 (504.35 it/sec) -training >> step=1833200, episode=306 reward=0.7524962 (494.81 it/sec) -training >> step=1833300, episode=306 reward=0.7627397 (508.46 it/sec) -training >> step=1833400, episode=306 reward=0.7373394 (493.47 it/sec) -training >> step=1833500, episode=306 reward=0.7388541 (494.13 it/sec) -training >> step=1833600, episode=306 reward=0.7629396 (488.40 it/sec) -training >> step=1833700, episode=306 reward=0.7672461 (497.96 it/sec) -training >> step=1833800, episode=306 reward=0.7821336 (546.73 it/sec) -training >> step=1833900, episode=306 reward=0.7730243 (515.87 it/sec) -training >> step=1834000, episode=306 reward=0.7380971 (512.13 it/sec) -training >> step=1834100, episode=306 reward=0.7510568 (495.21 it/sec) -training >> step=1834200, episode=306 reward=0.7298566 (511.51 it/sec) -training >> step=1834300, episode=306 reward=0.7542809 (516.26 it/sec) -training >> step=1834400, episode=306 reward=0.7736962 (501.08 it/sec) -training >> step=1834500, episode=306 reward=0.7697504 (519.12 it/sec) -training >> step=1834600, episode=306 reward=0.7513734 (521.79 it/sec) -training >> step=1834700, episode=306 reward=0.7628772 (481.49 it/sec) -training >> step=1834800, episode=306 reward=0.7238474 (544.12 it/sec) -training >> step=1834900, episode=306 reward=0.7766958 (476.95 it/sec) -training >> step=1835000, episode=306 reward=0.7698259 (518.72 it/sec) -training >> step=1835100, episode=306 reward=0.7589994 (509.17 it/sec) -training >> step=1835200, episode=306 reward=0.7531651 (479.06 it/sec) -training >> step=1835300, episode=307 reward=0.7329368 (158.79 it/sec) -training >> step=1835400, episode=307 reward=0.7573718 (471.41 it/sec) -training >> step=1835500, episode=307 reward=0.7344403 (475.18 it/sec) -training >> step=1835600, episode=307 reward=0.7476334 (518.22 it/sec) -training >> step=1835700, episode=307 reward=0.7365044 (498.48 it/sec) -training >> step=1835800, episode=307 reward=0.7560618 (411.92 it/sec) -training >> step=1835900, episode=307 reward=0.7475947 (516.63 it/sec) -training >> step=1836000, episode=307 reward=0.7663453 (507.71 it/sec) -training >> step=1836100, episode=307 reward=0.7613172 (535.76 it/sec) -training >> step=1836200, episode=307 reward=0.767036 (549.28 it/sec) -training >> step=1836300, episode=307 reward=0.7603905 (502.69 it/sec) -training >> step=1836400, episode=307 reward=0.7619631 (532.11 it/sec) -training >> step=1836500, episode=307 reward=0.7432945 (471.75 it/sec) -training >> step=1836600, episode=307 reward=0.7690371 (450.16 it/sec) -training >> step=1836700, episode=307 reward=0.7759451 (477.28 it/sec) -training >> step=1836800, episode=307 reward=0.7615454 (430.44 it/sec) -training >> step=1836900, episode=307 reward=0.7573398 (534.45 it/sec) -training >> step=1837000, episode=307 reward=0.7710379 (499.26 it/sec) -training >> step=1837100, episode=307 reward=0.7701469 (484.96 it/sec) -training >> step=1837200, episode=307 reward=0.7595345 (509.13 it/sec) -training >> step=1837300, episode=307 reward=0.7632772 (537.73 it/sec) -training >> step=1837400, episode=307 reward=0.760448 (558.60 it/sec) -training >> step=1837500, episode=307 reward=0.7717479 (541.43 it/sec) -training >> step=1837600, episode=307 reward=0.7590622 (476.50 it/sec) -training >> step=1837700, episode=307 reward=0.7720655 (545.41 it/sec) -training >> step=1837800, episode=307 reward=0.7513316 (508.72 it/sec) -training >> step=1837900, episode=307 reward=0.7682089 (503.04 it/sec) -training >> step=1838000, episode=307 reward=0.7775046 (480.37 it/sec) -training >> step=1838100, episode=307 reward=0.7830228 (512.90 it/sec) -training >> step=1838200, episode=307 reward=0.771521 (506.44 it/sec) -training >> step=1838300, episode=307 reward=0.7429866 (508.10 it/sec) -training >> step=1838400, episode=307 reward=0.7776144 (498.27 it/sec) -training >> step=1838500, episode=307 reward=0.7619563 (482.20 it/sec) -training >> step=1838600, episode=307 reward=0.7619817 (522.51 it/sec) -training >> step=1838700, episode=307 reward=0.7368007 (521.48 it/sec) -training >> step=1838800, episode=307 reward=0.7618046 (489.03 it/sec) -training >> step=1838900, episode=307 reward=0.7452877 (509.53 it/sec) -training >> step=1839000, episode=307 reward=0.7644172 (487.02 it/sec) -training >> step=1839100, episode=307 reward=0.7631226 (483.04 it/sec) -training >> step=1839200, episode=307 reward=0.7572557 (520.97 it/sec) -training >> step=1839300, episode=307 reward=0.782181 (539.61 it/sec) -training >> step=1839400, episode=307 reward=0.7528101 (508.58 it/sec) -training >> step=1839500, episode=307 reward=0.7585045 (535.76 it/sec) -training >> step=1839600, episode=307 reward=0.7431085 (488.22 it/sec) -training >> step=1839700, episode=307 reward=0.7808688 (567.90 it/sec) -training >> step=1839800, episode=307 reward=0.7572165 (511.11 it/sec) -training >> step=1839900, episode=307 reward=0.7686762 (534.42 it/sec) -training >> step=1840000, episode=307 reward=0.7474527 (521.32 it/sec) -training >> step=1840100, episode=307 reward=0.77569 (532.75 it/sec) -training >> step=1840200, episode=307 reward=0.7475318 (523.77 it/sec) -training >> step=1840300, episode=307 reward=0.755801 (520.10 it/sec) -training >> step=1840400, episode=307 reward=0.760883 (521.80 it/sec) -training >> step=1840500, episode=307 reward=0.7534823 (496.12 it/sec) -training >> step=1840600, episode=307 reward=0.753731 (519.41 it/sec) -training >> step=1840700, episode=307 reward=0.7671898 (554.90 it/sec) -training >> step=1840800, episode=307 reward=0.7422472 (550.14 it/sec) -training >> step=1840900, episode=307 reward=0.76124 (528.05 it/sec) -training >> step=1841000, episode=307 reward=0.7252711 (462.35 it/sec) -training >> step=1841100, episode=307 reward=0.7622969 (478.27 it/sec) -training >> step=1841200, episode=307 reward=0.7595297 (537.31 it/sec) -training >> step=1841300, episode=308 reward=0.7731547 (133.36 it/sec) -training >> step=1841400, episode=308 reward=0.7701256 (461.66 it/sec) -training >> step=1841500, episode=308 reward=0.7655683 (528.42 it/sec) -training >> step=1841600, episode=308 reward=0.7546037 (508.59 it/sec) -training >> step=1841700, episode=308 reward=0.7564746 (510.05 it/sec) -training >> step=1841800, episode=308 reward=0.7630941 (491.16 it/sec) -training >> step=1841900, episode=308 reward=0.7279947 (524.01 it/sec) -training >> step=1842000, episode=308 reward=0.7830275 (378.55 it/sec) -training >> step=1842100, episode=308 reward=0.7717255 (520.70 it/sec) -training >> step=1842200, episode=308 reward=0.747681 (550.44 it/sec) -training >> step=1842300, episode=308 reward=0.7508572 (493.26 it/sec) -training >> step=1842400, episode=308 reward=0.7610873 (503.42 it/sec) -training >> step=1842500, episode=308 reward=0.7585761 (529.34 it/sec) -training >> step=1842600, episode=308 reward=0.758015 (524.37 it/sec) -training >> step=1842700, episode=308 reward=0.75072 (498.01 it/sec) -training >> step=1842800, episode=308 reward=0.7755733 (489.88 it/sec) -training >> step=1842900, episode=308 reward=0.7592357 (540.43 it/sec) -training >> step=1843000, episode=308 reward=0.7694663 (492.87 it/sec) -training >> step=1843100, episode=308 reward=0.7347853 (519.15 it/sec) -training >> step=1843200, episode=308 reward=0.7710431 (570.21 it/sec) -training >> step=1843300, episode=308 reward=0.7806252 (553.37 it/sec) -training >> step=1843400, episode=308 reward=0.7617769 (496.58 it/sec) -training >> step=1843500, episode=308 reward=0.7728286 (517.61 it/sec) -training >> step=1843600, episode=308 reward=0.7734195 (554.62 it/sec) -training >> step=1843700, episode=308 reward=0.7942768 (521.03 it/sec) -training >> step=1843800, episode=308 reward=0.7542652 (512.07 it/sec) -training >> step=1843900, episode=308 reward=0.7466027 (506.53 it/sec) -training >> step=1844000, episode=308 reward=0.7679454 (484.19 it/sec) -training >> step=1844100, episode=308 reward=0.762023 (523.58 it/sec) -training >> step=1844200, episode=308 reward=0.7753906 (472.00 it/sec) -training >> step=1844300, episode=308 reward=0.7696434 (490.42 it/sec) -training >> step=1844400, episode=308 reward=0.7534897 (482.65 it/sec) -training >> step=1844500, episode=308 reward=0.7424697 (514.40 it/sec) -training >> step=1844600, episode=308 reward=0.7488415 (528.59 it/sec) -training >> step=1844700, episode=308 reward=0.7508834 (508.32 it/sec) -training >> step=1844800, episode=308 reward=0.7636412 (554.75 it/sec) -training >> step=1844900, episode=308 reward=0.7560204 (529.58 it/sec) -training >> step=1845000, episode=308 reward=0.7738438 (538.30 it/sec) -training >> step=1845100, episode=308 reward=0.7915177 (507.64 it/sec) -training >> step=1845200, episode=308 reward=0.7479572 (534.36 it/sec) -training >> step=1845300, episode=308 reward=0.7845229 (509.45 it/sec) -training >> step=1845400, episode=308 reward=0.7792103 (532.17 it/sec) -training >> step=1845500, episode=308 reward=0.7529231 (473.39 it/sec) -training >> step=1845600, episode=308 reward=0.7502004 (522.13 it/sec) -training >> step=1845700, episode=308 reward=0.7728875 (530.22 it/sec) -training >> step=1845800, episode=308 reward=0.749082 (542.41 it/sec) -training >> step=1845900, episode=308 reward=0.7742568 (520.80 it/sec) -training >> step=1846000, episode=308 reward=0.7793298 (450.07 it/sec) -training >> step=1846100, episode=308 reward=0.76651 (537.22 it/sec) -training >> step=1846200, episode=308 reward=0.7723797 (520.84 it/sec) -training >> step=1846300, episode=308 reward=0.7492988 (499.05 it/sec) -training >> step=1846400, episode=308 reward=0.7547429 (506.68 it/sec) -training >> step=1846500, episode=308 reward=0.7664778 (500.13 it/sec) -training >> step=1846600, episode=308 reward=0.7891614 (494.11 it/sec) -training >> step=1846700, episode=308 reward=0.7494055 (544.69 it/sec) -training >> step=1846800, episode=308 reward=0.7452149 (535.80 it/sec) -training >> step=1846900, episode=308 reward=0.7501848 (528.63 it/sec) -training >> step=1847000, episode=308 reward=0.7645197 (513.17 it/sec) -training >> step=1847100, episode=308 reward=0.7662815 (538.37 it/sec) -training >> step=1847200, episode=308 reward=0.7413133 (516.73 it/sec) -training >> step=1847300, episode=309 reward=0.7357323 (144.98 it/sec) -training >> step=1847400, episode=309 reward=0.7643402 (526.53 it/sec) -training >> step=1847500, episode=309 reward=0.7412278 (510.77 it/sec) -training >> step=1847600, episode=309 reward=0.7422614 (481.58 it/sec) -training >> step=1847700, episode=309 reward=0.7529516 (466.73 it/sec) -training >> step=1847800, episode=309 reward=0.7594353 (527.36 it/sec) -training >> step=1847900, episode=309 reward=0.7563749 (509.84 it/sec) -training >> step=1848000, episode=309 reward=0.7584133 (539.91 it/sec) -training >> step=1848100, episode=309 reward=0.7854714 (541.32 it/sec) -training >> step=1848200, episode=309 reward=0.7709849 (500.07 it/sec) -training >> step=1848300, episode=309 reward=0.7572966 (376.70 it/sec) -training >> step=1848400, episode=309 reward=0.7604754 (512.44 it/sec) -training >> step=1848500, episode=309 reward=0.7282106 (487.36 it/sec) -training >> step=1848600, episode=309 reward=0.7477567 (519.99 it/sec) -training >> step=1848700, episode=309 reward=0.7634342 (546.58 it/sec) -training >> step=1848800, episode=309 reward=0.7772542 (526.11 it/sec) -training >> step=1848900, episode=309 reward=0.7698493 (433.92 it/sec) -training >> step=1849000, episode=309 reward=0.7601371 (529.40 it/sec) -training >> step=1849100, episode=309 reward=0.7579395 (527.25 it/sec) -training >> step=1849200, episode=309 reward=0.7735786 (517.86 it/sec) -training >> step=1849300, episode=309 reward=0.7735069 (481.91 it/sec) -training >> step=1849400, episode=309 reward=0.7572948 (491.23 it/sec) -training >> step=1849500, episode=309 reward=0.7625313 (538.18 it/sec) -training >> step=1849600, episode=309 reward=0.7498941 (496.86 it/sec) -training >> step=1849700, episode=309 reward=0.7694259 (511.09 it/sec) -training >> step=1849800, episode=309 reward=0.7662861 (533.36 it/sec) -training >> step=1849900, episode=309 reward=0.757494 (491.18 it/sec) -training >> step=1850000, episode=309 reward=0.7651632 (543.90 it/sec) -training >> step=1850100, episode=309 reward=0.7621253 (542.24 it/sec) -training >> step=1850200, episode=309 reward=0.7679974 (572.89 it/sec) -training >> step=1850300, episode=309 reward=0.7616993 (495.46 it/sec) -training >> step=1850400, episode=309 reward=0.7595311 (495.17 it/sec) -training >> step=1850500, episode=309 reward=0.7569824 (507.82 it/sec) -training >> step=1850600, episode=309 reward=0.7401783 (532.30 it/sec) -training >> step=1850700, episode=309 reward=0.7492996 (478.94 it/sec) -training >> step=1850800, episode=309 reward=0.7666702 (482.56 it/sec) -training >> step=1850900, episode=309 reward=0.74617 (509.06 it/sec) -training >> step=1851000, episode=309 reward=0.7541804 (503.30 it/sec) -training >> step=1851100, episode=309 reward=0.7835574 (541.89 it/sec) -training >> step=1851200, episode=309 reward=0.7506098 (511.68 it/sec) -training >> step=1851300, episode=309 reward=0.769018 (528.47 it/sec) -training >> step=1851400, episode=309 reward=0.7613955 (489.43 it/sec) -training >> step=1851500, episode=309 reward=0.7715939 (504.88 it/sec) -training >> step=1851600, episode=309 reward=0.7595677 (503.03 it/sec) -training >> step=1851700, episode=309 reward=0.7427096 (508.26 it/sec) -training >> step=1851800, episode=309 reward=0.7664742 (555.02 it/sec) -training >> step=1851900, episode=309 reward=0.7597218 (533.02 it/sec) -training >> step=1852000, episode=309 reward=0.7552056 (488.13 it/sec) -training >> step=1852100, episode=309 reward=0.7645941 (514.31 it/sec) -training >> step=1852200, episode=309 reward=0.7448248 (554.10 it/sec) -training >> step=1852300, episode=309 reward=0.7545964 (508.19 it/sec) -training >> step=1852400, episode=309 reward=0.7792028 (502.67 it/sec) -training >> step=1852500, episode=309 reward=0.7794991 (489.71 it/sec) -training >> step=1852600, episode=309 reward=0.7860425 (516.52 it/sec) -training >> step=1852700, episode=309 reward=0.7466398 (536.13 it/sec) -training >> step=1852800, episode=309 reward=0.7584564 (496.43 it/sec) -training >> step=1852900, episode=309 reward=0.7573833 (469.27 it/sec) -training >> step=1853000, episode=309 reward=0.7645573 (466.29 it/sec) -training >> step=1853100, episode=309 reward=0.7742478 (539.17 it/sec) -training >> step=1853200, episode=309 reward=0.7393631 (501.98 it/sec) -training >> step=1853300, episode=310 reward=0.7566724 (137.08 it/sec) -training >> step=1853400, episode=310 reward=0.7431494 (552.74 it/sec) -training >> step=1853500, episode=310 reward=0.7657441 (494.97 it/sec) -training >> step=1853600, episode=310 reward=0.7476032 (515.15 it/sec) -training >> step=1853700, episode=310 reward=0.7438983 (510.92 it/sec) -training >> step=1853800, episode=310 reward=0.7666245 (538.18 it/sec) -training >> step=1853900, episode=310 reward=0.7385463 (488.96 it/sec) -training >> step=1854000, episode=310 reward=0.7513202 (515.89 it/sec) -training >> step=1854100, episode=310 reward=0.7647851 (516.50 it/sec) -training >> step=1854200, episode=310 reward=0.7817033 (528.98 it/sec) -training >> step=1854300, episode=310 reward=0.7566349 (517.86 it/sec) -training >> step=1854400, episode=310 reward=0.7701868 (360.65 it/sec) -training >> step=1854500, episode=310 reward=0.7597048 (508.28 it/sec) -training >> step=1854600, episode=310 reward=0.7549967 (501.15 it/sec) -training >> step=1854700, episode=310 reward=0.7706615 (542.00 it/sec) -training >> step=1854800, episode=310 reward=0.7566499 (489.53 it/sec) -training >> step=1854900, episode=310 reward=0.7198709 (498.84 it/sec) -training >> step=1855000, episode=310 reward=0.770406 (556.06 it/sec) -training >> step=1855100, episode=310 reward=0.7804359 (498.98 it/sec) -training >> step=1855200, episode=310 reward=0.7490514 (519.78 it/sec) -training >> step=1855300, episode=310 reward=0.7669606 (507.84 it/sec) -training >> step=1855400, episode=310 reward=0.742807 (518.95 it/sec) -training >> step=1855500, episode=310 reward=0.7592694 (487.27 it/sec) -training >> step=1855600, episode=310 reward=0.7617062 (519.09 it/sec) -training >> step=1855700, episode=310 reward=0.7687232 (493.67 it/sec) -training >> step=1855800, episode=310 reward=0.7678144 (529.18 it/sec) -training >> step=1855900, episode=310 reward=0.7581202 (493.63 it/sec) -training >> step=1856000, episode=310 reward=0.764336 (496.45 it/sec) -training >> step=1856100, episode=310 reward=0.7725353 (511.53 it/sec) -training >> step=1856200, episode=310 reward=0.773877 (516.67 it/sec) -training >> step=1856300, episode=310 reward=0.7692283 (506.48 it/sec) -training >> step=1856400, episode=310 reward=0.7669188 (468.20 it/sec) -training >> step=1856500, episode=310 reward=0.7634711 (520.11 it/sec) -training >> step=1856600, episode=310 reward=0.7449442 (536.15 it/sec) -training >> step=1856700, episode=310 reward=0.7818818 (450.70 it/sec) -training >> step=1856800, episode=310 reward=0.7718464 (545.56 it/sec) -training >> step=1856900, episode=310 reward=0.7579343 (506.60 it/sec) -training >> step=1857000, episode=310 reward=0.7836403 (522.47 it/sec) -training >> step=1857100, episode=310 reward=0.7596857 (492.17 it/sec) -training >> step=1857200, episode=310 reward=0.74347 (513.47 it/sec) -training >> step=1857300, episode=310 reward=0.7740518 (506.35 it/sec) -training >> step=1857400, episode=310 reward=0.7676435 (496.39 it/sec) -training >> step=1857500, episode=310 reward=0.7710566 (514.97 it/sec) -training >> step=1857600, episode=310 reward=0.7497775 (514.87 it/sec) -training >> step=1857700, episode=310 reward=0.766577 (445.51 it/sec) -training >> step=1857800, episode=310 reward=0.7622477 (509.82 it/sec) -training >> step=1857900, episode=310 reward=0.7745063 (448.17 it/sec) -training >> step=1858000, episode=310 reward=0.7551403 (501.32 it/sec) -training >> step=1858100, episode=310 reward=0.781251 (510.63 it/sec) -training >> step=1858200, episode=310 reward=0.7605139 (503.94 it/sec) -training >> step=1858300, episode=310 reward=0.7495405 (496.34 it/sec) -training >> step=1858400, episode=310 reward=0.7412727 (497.90 it/sec) -training >> step=1858500, episode=310 reward=0.7478627 (501.35 it/sec) -training >> step=1858600, episode=310 reward=0.7824526 (499.58 it/sec) -training >> step=1858700, episode=310 reward=0.7483935 (505.36 it/sec) -training >> step=1858800, episode=310 reward=0.7659724 (521.30 it/sec) -training >> step=1858900, episode=310 reward=0.7507485 (542.94 it/sec) -training >> step=1859000, episode=310 reward=0.7461821 (531.91 it/sec) -training >> step=1859100, episode=310 reward=0.7670751 (506.15 it/sec) -training >> step=1859200, episode=310 reward=0.7542626 (563.38 it/sec) -training >> step=1859300, episode=311 reward=0.7476704 (132.91 it/sec) -training >> step=1859400, episode=311 reward=0.7597905 (494.78 it/sec) -training >> step=1859500, episode=311 reward=0.7167813 (502.44 it/sec) -training >> step=1859600, episode=311 reward=0.7519363 (498.32 it/sec) -training >> step=1859700, episode=311 reward=0.7392264 (493.13 it/sec) -training >> step=1859800, episode=311 reward=0.7458028 (545.01 it/sec) -training >> step=1859900, episode=311 reward=0.7575945 (495.18 it/sec) -training >> step=1860000, episode=311 reward=0.7472998 (485.71 it/sec) -training >> step=1860100, episode=311 reward=0.7644134 (498.75 it/sec) -training >> step=1860200, episode=311 reward=0.7586694 (547.39 it/sec) -training >> step=1860300, episode=311 reward=0.7385371 (531.51 it/sec) -training >> step=1860400, episode=311 reward=0.7713293 (473.51 it/sec) -training >> step=1860500, episode=311 reward=0.7367298 (491.84 it/sec) -training >> step=1860600, episode=311 reward=0.7412311 (523.85 it/sec) -training >> step=1860700, episode=311 reward=0.767387 (423.38 it/sec) -training >> step=1860800, episode=311 reward=0.7518998 (522.48 it/sec) -training >> step=1860900, episode=311 reward=0.7805533 (516.76 it/sec) -training >> step=1861000, episode=311 reward=0.7715333 (523.88 it/sec) -training >> step=1861100, episode=311 reward=0.7797141 (507.21 it/sec) -training >> step=1861200, episode=311 reward=0.7691789 (516.10 it/sec) -training >> step=1861300, episode=311 reward=0.7771565 (527.69 it/sec) -training >> step=1861400, episode=311 reward=0.788103 (505.69 it/sec) -training >> step=1861500, episode=311 reward=0.7270474 (489.62 it/sec) -training >> step=1861600, episode=311 reward=0.7617769 (513.77 it/sec) -training >> step=1861700, episode=311 reward=0.7522665 (510.85 it/sec) -training >> step=1861800, episode=311 reward=0.7522212 (544.02 it/sec) -training >> step=1861900, episode=311 reward=0.7483429 (514.40 it/sec) -training >> step=1862000, episode=311 reward=0.7929519 (500.79 it/sec) -training >> step=1862100, episode=311 reward=0.772133 (508.75 it/sec) -training >> step=1862200, episode=311 reward=0.7723764 (509.56 it/sec) -training >> step=1862300, episode=311 reward=0.7597668 (509.93 it/sec) -training >> step=1862400, episode=311 reward=0.7399244 (426.59 it/sec) -training >> step=1862500, episode=311 reward=0.7614213 (486.41 it/sec) -training >> step=1862600, episode=311 reward=0.7719191 (483.33 it/sec) -training >> step=1862700, episode=311 reward=0.762778 (433.76 it/sec) -training >> step=1862800, episode=311 reward=0.7707582 (515.09 it/sec) -training >> step=1862900, episode=311 reward=0.7643208 (473.10 it/sec) -training >> step=1863000, episode=311 reward=0.7587634 (509.91 it/sec) -training >> step=1863100, episode=311 reward=0.7718942 (517.45 it/sec) -training >> step=1863200, episode=311 reward=0.7748306 (310.92 it/sec) -training >> step=1863300, episode=311 reward=0.756087 (387.53 it/sec) -training >> step=1863400, episode=311 reward=0.7600182 (368.74 it/sec) -training >> step=1863500, episode=311 reward=0.7618217 (391.35 it/sec) -training >> step=1863600, episode=311 reward=0.7712252 (437.66 it/sec) -training >> step=1863700, episode=311 reward=0.7433106 (461.04 it/sec) -training >> step=1863800, episode=311 reward=0.7659097 (448.36 it/sec) -training >> step=1863900, episode=311 reward=0.7513772 (498.58 it/sec) -training >> step=1864000, episode=311 reward=0.7409616 (522.29 it/sec) -training >> step=1864100, episode=311 reward=0.7707584 (512.53 it/sec) -training >> step=1864200, episode=311 reward=0.7596625 (438.72 it/sec) -training >> step=1864300, episode=311 reward=0.7514599 (425.40 it/sec) -training >> step=1864400, episode=311 reward=0.7669878 (426.75 it/sec) -training >> step=1864500, episode=311 reward=0.7619264 (450.94 it/sec) -training >> step=1864600, episode=311 reward=0.7770426 (428.65 it/sec) -training >> step=1864700, episode=311 reward=0.717481 (359.04 it/sec) -training >> step=1864800, episode=311 reward=0.7529505 (455.18 it/sec) -training >> step=1864900, episode=311 reward=0.7421719 (431.47 it/sec) -training >> step=1865000, episode=311 reward=0.7508415 (471.01 it/sec) -training >> step=1865100, episode=311 reward=0.7358289 (457.68 it/sec) -training >> step=1865200, episode=311 reward=0.7375367 (468.90 it/sec) -training >> step=1865300, episode=312 reward=0.7479566 (154.36 it/sec) -training >> step=1865400, episode=312 reward=0.7625739 (525.22 it/sec) -training >> step=1865500, episode=312 reward=0.7335127 (510.56 it/sec) -training >> step=1865600, episode=312 reward=0.7500432 (509.04 it/sec) -training >> step=1865700, episode=312 reward=0.7403733 (533.41 it/sec) -training >> step=1865800, episode=312 reward=0.7702594 (456.06 it/sec) -training >> step=1865900, episode=312 reward=0.7805291 (437.96 it/sec) -training >> step=1866000, episode=312 reward=0.7477992 (458.49 it/sec) -training >> step=1866100, episode=312 reward=0.7759855 (514.98 it/sec) -training >> step=1866200, episode=312 reward=0.7635558 (491.82 it/sec) -training >> step=1866300, episode=312 reward=0.7685046 (492.23 it/sec) -training >> step=1866400, episode=312 reward=0.7488708 (463.96 it/sec) -training >> step=1866500, episode=312 reward=0.7707688 (443.77 it/sec) -training >> step=1866600, episode=312 reward=0.7524793 (493.61 it/sec) -training >> step=1866700, episode=312 reward=0.7474914 (488.23 it/sec) -training >> step=1866800, episode=312 reward=0.7755039 (330.59 it/sec) -training >> step=1866900, episode=312 reward=0.7282621 (483.79 it/sec) -training >> step=1867000, episode=312 reward=0.7635199 (496.18 it/sec) -training >> step=1867100, episode=312 reward=0.7647087 (470.17 it/sec) -training >> step=1867200, episode=312 reward=0.7662627 (474.71 it/sec) -training >> step=1867300, episode=312 reward=0.7791044 (463.72 it/sec) -training >> step=1867400, episode=312 reward=0.7564659 (472.80 it/sec) -training >> step=1867500, episode=312 reward=0.7581065 (508.54 it/sec) -training >> step=1867600, episode=312 reward=0.7556009 (483.92 it/sec) -training >> step=1867700, episode=312 reward=0.7818612 (503.29 it/sec) -training >> step=1867800, episode=312 reward=0.7505693 (476.81 it/sec) -training >> step=1867900, episode=312 reward=0.7654799 (490.29 it/sec) -training >> step=1868000, episode=312 reward=0.7675267 (518.32 it/sec) -training >> step=1868100, episode=312 reward=0.7716008 (508.44 it/sec) -training >> step=1868200, episode=312 reward=0.7590406 (460.70 it/sec) -training >> step=1868300, episode=312 reward=0.74471 (491.42 it/sec) -training >> step=1868400, episode=312 reward=0.7309691 (492.65 it/sec) -training >> step=1868500, episode=312 reward=0.7604939 (509.28 it/sec) -training >> step=1868600, episode=312 reward=0.7473186 (485.07 it/sec) -training >> step=1868700, episode=312 reward=0.7631854 (513.79 it/sec) -training >> step=1868800, episode=312 reward=0.7637265 (540.89 it/sec) -training >> step=1868900, episode=312 reward=0.7526258 (495.59 it/sec) -training >> step=1869000, episode=312 reward=0.7840153 (498.25 it/sec) -training >> step=1869100, episode=312 reward=0.7378954 (548.94 it/sec) -training >> step=1869200, episode=312 reward=0.7634231 (509.08 it/sec) -training >> step=1869300, episode=312 reward=0.7547088 (505.62 it/sec) -training >> step=1869400, episode=312 reward=0.7575307 (482.39 it/sec) -training >> step=1869500, episode=312 reward=0.7467253 (501.74 it/sec) -training >> step=1869600, episode=312 reward=0.7459696 (481.87 it/sec) -training >> step=1869700, episode=312 reward=0.7740083 (471.18 it/sec) -training >> step=1869800, episode=312 reward=0.7965682 (484.89 it/sec) -training >> step=1869900, episode=312 reward=0.7740439 (490.69 it/sec) -training >> step=1870000, episode=312 reward=0.7524776 (505.16 it/sec) -training >> step=1870100, episode=312 reward=0.7698833 (489.73 it/sec) -training >> step=1870200, episode=312 reward=0.7423429 (514.63 it/sec) -training >> step=1870300, episode=312 reward=0.7572499 (484.40 it/sec) -training >> step=1870400, episode=312 reward=0.7555355 (471.00 it/sec) -training >> step=1870500, episode=312 reward=0.7480351 (490.69 it/sec) -training >> step=1870600, episode=312 reward=0.7528589 (504.84 it/sec) -training >> step=1870700, episode=312 reward=0.7729582 (525.39 it/sec) -training >> step=1870800, episode=312 reward=0.7405905 (501.09 it/sec) -training >> step=1870900, episode=312 reward=0.7639047 (518.15 it/sec) -training >> step=1871000, episode=312 reward=0.7561481 (502.89 it/sec) -training >> step=1871100, episode=312 reward=0.7488809 (531.19 it/sec) -training >> step=1871200, episode=312 reward=0.7405534 (487.05 it/sec) -training >> step=1871300, episode=313 reward=0.7743308 (187.35 it/sec) -training >> step=1871400, episode=313 reward=0.7282501 (389.00 it/sec) -training >> step=1871500, episode=313 reward=0.7265226 (434.23 it/sec) -training >> step=1871600, episode=313 reward=0.7602992 (464.01 it/sec) -training >> step=1871700, episode=313 reward=0.7667369 (456.97 it/sec) -training >> step=1871800, episode=313 reward=0.7419021 (481.39 it/sec) -training >> step=1871900, episode=313 reward=0.7757134 (519.72 it/sec) -training >> step=1872000, episode=313 reward=0.7457922 (503.50 it/sec) -training >> step=1872100, episode=313 reward=0.7954865 (477.81 it/sec) -training >> step=1872200, episode=313 reward=0.7696986 (526.18 it/sec) -training >> step=1872300, episode=313 reward=0.770425 (485.63 it/sec) -training >> step=1872400, episode=313 reward=0.7549416 (495.85 it/sec) -training >> step=1872500, episode=313 reward=0.7615348 (507.46 it/sec) -training >> step=1872600, episode=313 reward=0.7690834 (502.84 it/sec) -training >> step=1872700, episode=313 reward=0.7578319 (347.76 it/sec) -training >> step=1872800, episode=313 reward=0.7662692 (413.75 it/sec) -training >> step=1872900, episode=313 reward=0.7579284 (388.75 it/sec) -training >> step=1873000, episode=313 reward=0.7516589 (317.33 it/sec) -training >> step=1873100, episode=313 reward=0.746072 (481.83 it/sec) -training >> step=1873200, episode=313 reward=0.7526453 (534.30 it/sec) -training >> step=1873300, episode=313 reward=0.7382299 (523.18 it/sec) -training >> step=1873400, episode=313 reward=0.7522427 (554.37 it/sec) -training >> step=1873500, episode=313 reward=0.7558137 (505.83 it/sec) -training >> step=1873600, episode=313 reward=0.7684205 (521.13 it/sec) -training >> step=1873700, episode=313 reward=0.758746 (513.65 it/sec) -training >> step=1873800, episode=313 reward=0.7582409 (520.77 it/sec) -training >> step=1873900, episode=313 reward=0.7757884 (508.44 it/sec) -training >> step=1874000, episode=313 reward=0.752081 (477.60 it/sec) -training >> step=1874100, episode=313 reward=0.7560663 (496.93 it/sec) -training >> step=1874200, episode=313 reward=0.7574282 (484.40 it/sec) -training >> step=1874300, episode=313 reward=0.7580431 (532.25 it/sec) -training >> step=1874400, episode=313 reward=0.7710443 (446.82 it/sec) -training >> step=1874500, episode=313 reward=0.7693022 (438.96 it/sec) -training >> step=1874600, episode=313 reward=0.7893268 (519.68 it/sec) -training >> step=1874700, episode=313 reward=0.7438174 (508.04 it/sec) -training >> step=1874800, episode=313 reward=0.7731287 (529.06 it/sec) -training >> step=1874900, episode=313 reward=0.7424802 (474.88 it/sec) -training >> step=1875000, episode=313 reward=0.7595258 (486.00 it/sec) -training >> step=1875100, episode=313 reward=0.7598343 (500.12 it/sec) -training >> step=1875200, episode=313 reward=0.7505086 (490.89 it/sec) -training >> step=1875300, episode=313 reward=0.7515237 (540.66 it/sec) -training >> step=1875400, episode=313 reward=0.7593725 (548.30 it/sec) -training >> step=1875500, episode=313 reward=0.7686732 (499.63 it/sec) -training >> step=1875600, episode=313 reward=0.7774267 (529.28 it/sec) -training >> step=1875700, episode=313 reward=0.7772452 (512.91 it/sec) -training >> step=1875800, episode=313 reward=0.7796462 (536.15 it/sec) -training >> step=1875900, episode=313 reward=0.7583603 (539.93 it/sec) -training >> step=1876000, episode=313 reward=0.7473994 (504.42 it/sec) -training >> step=1876100, episode=313 reward=0.7528103 (555.84 it/sec) -training >> step=1876200, episode=313 reward=0.7738451 (572.37 it/sec) -training >> step=1876300, episode=313 reward=0.7680443 (482.00 it/sec) -training >> step=1876400, episode=313 reward=0.7574314 (540.02 it/sec) -training >> step=1876500, episode=313 reward=0.7466516 (489.93 it/sec) -training >> step=1876600, episode=313 reward=0.7659793 (552.30 it/sec) -training >> step=1876700, episode=313 reward=0.7352821 (536.87 it/sec) -training >> step=1876800, episode=313 reward=0.735785 (505.29 it/sec) -training >> step=1876900, episode=313 reward=0.7479178 (549.37 it/sec) -training >> step=1877000, episode=313 reward=0.7461124 (511.68 it/sec) -training >> step=1877100, episode=313 reward=0.7553737 (549.80 it/sec) -training >> step=1877200, episode=313 reward=0.7428994 (547.89 it/sec) -training >> step=1877300, episode=314 reward=0.7614655 (184.96 it/sec) -training >> step=1877400, episode=314 reward=0.7390059 (518.10 it/sec) -training >> step=1877500, episode=314 reward=0.7303712 (486.40 it/sec) -training >> step=1877600, episode=314 reward=0.7449391 (547.21 it/sec) -training >> step=1877700, episode=314 reward=0.7487451 (556.75 it/sec) -training >> step=1877800, episode=314 reward=0.7650673 (579.33 it/sec) -training >> step=1877900, episode=314 reward=0.7480129 (527.65 it/sec) -training >> step=1878000, episode=314 reward=0.7552194 (553.06 it/sec) -training >> step=1878100, episode=314 reward=0.7799988 (527.01 it/sec) -training >> step=1878200, episode=314 reward=0.774582 (465.11 it/sec) -training >> step=1878300, episode=314 reward=0.7633268 (503.01 it/sec) -training >> step=1878400, episode=314 reward=0.7695237 (491.43 it/sec) -training >> step=1878500, episode=314 reward=0.7547454 (509.63 it/sec) -training >> step=1878600, episode=314 reward=0.7719335 (598.23 it/sec) -training >> step=1878700, episode=314 reward=0.7608562 (557.35 it/sec) -training >> step=1878800, episode=314 reward=0.737329 (503.55 it/sec) -training >> step=1878900, episode=314 reward=0.7737875 (525.18 it/sec) -training >> step=1879000, episode=314 reward=0.755383 (513.07 it/sec) -training >> step=1879100, episode=314 reward=0.7635409 (468.24 it/sec) -training >> step=1879200, episode=314 reward=0.7713041 (378.87 it/sec) -training >> step=1879300, episode=314 reward=0.7588502 (479.73 it/sec) -training >> step=1879400, episode=314 reward=0.7842109 (481.01 it/sec) -training >> step=1879500, episode=314 reward=0.793391 (464.53 it/sec) -training >> step=1879600, episode=314 reward=0.772436 (505.71 it/sec) -training >> step=1879700, episode=314 reward=0.774093 (531.29 it/sec) -training >> step=1879800, episode=314 reward=0.7866597 (493.49 it/sec) -training >> step=1879900, episode=314 reward=0.7777184 (558.35 it/sec) -training >> step=1880000, episode=314 reward=0.7615179 (504.82 it/sec) -training >> step=1880100, episode=314 reward=0.7572986 (586.83 it/sec) -training >> step=1880200, episode=314 reward=0.7793915 (587.67 it/sec) -training >> step=1880300, episode=314 reward=0.7636154 (558.87 it/sec) -training >> step=1880400, episode=314 reward=0.7667074 (515.74 it/sec) -training >> step=1880500, episode=314 reward=0.7479469 (547.46 it/sec) -training >> step=1880600, episode=314 reward=0.7688489 (580.51 it/sec) -training >> step=1880700, episode=314 reward=0.7526868 (559.96 it/sec) -training >> step=1880800, episode=314 reward=0.7334094 (521.30 it/sec) -training >> step=1880900, episode=314 reward=0.7859632 (522.08 it/sec) -training >> step=1881000, episode=314 reward=0.7622257 (541.07 it/sec) -training >> step=1881100, episode=314 reward=0.7539269 (555.20 it/sec) -training >> step=1881200, episode=314 reward=0.7742873 (562.61 it/sec) -training >> step=1881300, episode=314 reward=0.7618424 (592.06 it/sec) -training >> step=1881400, episode=314 reward=0.7641641 (584.04 it/sec) -training >> step=1881500, episode=314 reward=0.7825969 (487.58 it/sec) -training >> step=1881600, episode=314 reward=0.7612109 (492.08 it/sec) -training >> step=1881700, episode=314 reward=0.7536192 (506.56 it/sec) -training >> step=1881800, episode=314 reward=0.7703714 (560.48 it/sec) -training >> step=1881900, episode=314 reward=0.7896304 (534.46 it/sec) -training >> step=1882000, episode=314 reward=0.7695693 (475.06 it/sec) -training >> step=1882100, episode=314 reward=0.7785529 (511.79 it/sec) -training >> step=1882200, episode=314 reward=0.7798663 (534.38 it/sec) -training >> step=1882300, episode=314 reward=0.7533573 (550.02 it/sec) -training >> step=1882400, episode=314 reward=0.7454938 (586.42 it/sec) -training >> step=1882500, episode=314 reward=0.7558931 (563.61 it/sec) -training >> step=1882600, episode=314 reward=0.7771272 (514.35 it/sec) -training >> step=1882700, episode=314 reward=0.7672959 (517.29 it/sec) -training >> step=1882800, episode=314 reward=0.7408608 (575.57 it/sec) -training >> step=1882900, episode=314 reward=0.7577972 (579.86 it/sec) -training >> step=1883000, episode=314 reward=0.7608149 (539.21 it/sec) -training >> step=1883100, episode=314 reward=0.7413967 (578.10 it/sec) -training >> step=1883200, episode=314 reward=0.7608305 (527.29 it/sec) -training >> step=1883300, episode=315 reward=0.7451377 (167.11 it/sec) -training >> step=1883400, episode=315 reward=0.7624255 (561.94 it/sec) -training >> step=1883500, episode=315 reward=0.7424266 (542.60 it/sec) -training >> step=1883600, episode=315 reward=0.7457836 (552.43 it/sec) -training >> step=1883700, episode=315 reward=0.7643092 (541.81 it/sec) -training >> step=1883800, episode=315 reward=0.7679821 (579.91 it/sec) -training >> step=1883900, episode=315 reward=0.7537228 (565.73 it/sec) -training >> step=1884000, episode=315 reward=0.7510988 (558.13 it/sec) -training >> step=1884100, episode=315 reward=0.7860999 (584.10 it/sec) -training >> step=1884200, episode=315 reward=0.7642267 (555.80 it/sec) -training >> step=1884300, episode=315 reward=0.7457557 (573.31 it/sec) -training >> step=1884400, episode=315 reward=0.7688987 (582.01 it/sec) -training >> step=1884500, episode=315 reward=0.7494785 (583.11 it/sec) -training >> step=1884600, episode=315 reward=0.7621003 (578.02 it/sec) -training >> step=1884700, episode=315 reward=0.7694585 (547.85 it/sec) -training >> step=1884800, episode=315 reward=0.7775871 (492.87 it/sec) -training >> step=1884900, episode=315 reward=0.7589864 (522.13 it/sec) -training >> step=1885000, episode=315 reward=0.7655494 (536.29 it/sec) -training >> step=1885100, episode=315 reward=0.7874753 (564.36 it/sec) -training >> step=1885200, episode=315 reward=0.7659312 (541.36 it/sec) -training >> step=1885300, episode=315 reward=0.7454812 (564.44 it/sec) -training >> step=1885400, episode=315 reward=0.7707712 (381.63 it/sec) -training >> step=1885500, episode=315 reward=0.7753516 (560.80 it/sec) -training >> step=1885600, episode=315 reward=0.7865932 (559.65 it/sec) -training >> step=1885700, episode=315 reward=0.7566503 (555.81 it/sec) -training >> step=1885800, episode=315 reward=0.7859509 (594.95 it/sec) -training >> step=1885900, episode=315 reward=0.7502997 (517.07 it/sec) -training >> step=1886000, episode=315 reward=0.7605708 (576.99 it/sec) -training >> step=1886100, episode=315 reward=0.7847623 (563.71 it/sec) -training >> step=1886200, episode=315 reward=0.7658042 (531.26 it/sec) -training >> step=1886300, episode=315 reward=0.755257 (539.49 it/sec) -training >> step=1886400, episode=315 reward=0.7617052 (575.16 it/sec) -training >> step=1886500, episode=315 reward=0.767693 (595.42 it/sec) -training >> step=1886600, episode=315 reward=0.7783823 (571.95 it/sec) -training >> step=1886700, episode=315 reward=0.7776599 (551.78 it/sec) -training >> step=1886800, episode=315 reward=0.7853266 (496.29 it/sec) -training >> step=1886900, episode=315 reward=0.7843966 (499.90 it/sec) -training >> step=1887000, episode=315 reward=0.7630638 (515.47 it/sec) -training >> step=1887100, episode=315 reward=0.7652173 (524.77 it/sec) -training >> step=1887200, episode=315 reward=0.7803119 (574.63 it/sec) -training >> step=1887300, episode=315 reward=0.7537875 (550.04 it/sec) -training >> step=1887400, episode=315 reward=0.7485588 (526.92 it/sec) -training >> step=1887500, episode=315 reward=0.7667229 (497.19 it/sec) -training >> step=1887600, episode=315 reward=0.756031 (544.80 it/sec) -training >> step=1887700, episode=315 reward=0.7580869 (577.62 it/sec) -training >> step=1887800, episode=315 reward=0.79443 (523.51 it/sec) -training >> step=1887900, episode=315 reward=0.7598359 (522.58 it/sec) -training >> step=1888000, episode=315 reward=0.7611531 (549.02 it/sec) -training >> step=1888100, episode=315 reward=0.7571713 (536.57 it/sec) -training >> step=1888200, episode=315 reward=0.7655891 (575.08 it/sec) -training >> step=1888300, episode=315 reward=0.7511379 (560.73 it/sec) -training >> step=1888400, episode=315 reward=0.7743111 (555.71 it/sec) -training >> step=1888500, episode=315 reward=0.7287926 (551.27 it/sec) -training >> step=1888600, episode=315 reward=0.764923 (547.43 it/sec) -training >> step=1888700, episode=315 reward=0.7450166 (531.63 it/sec) -training >> step=1888800, episode=315 reward=0.7755704 (572.05 it/sec) -training >> step=1888900, episode=315 reward=0.7652274 (525.78 it/sec) -training >> step=1889000, episode=315 reward=0.7509829 (534.53 it/sec) -training >> step=1889100, episode=315 reward=0.7323155 (564.75 it/sec) -training >> step=1889200, episode=315 reward=0.7481442 (519.78 it/sec) -training >> step=1889300, episode=316 reward=0.7453058 (174.45 it/sec) -training >> step=1889400, episode=316 reward=0.7456556 (541.14 it/sec) -training >> step=1889500, episode=316 reward=0.7409006 (536.92 it/sec) -training >> step=1889600, episode=316 reward=0.7609957 (487.05 it/sec) -training >> step=1889700, episode=316 reward=0.7250593 (521.41 it/sec) -training >> step=1889800, episode=316 reward=0.7693385 (563.28 it/sec) -training >> step=1889900, episode=316 reward=0.7748677 (552.96 it/sec) -training >> step=1890000, episode=316 reward=0.7582936 (592.43 it/sec) -training >> step=1890100, episode=316 reward=0.7707454 (567.36 it/sec) -training >> step=1890200, episode=316 reward=0.7489866 (551.81 it/sec) -training >> step=1890300, episode=316 reward=0.7415566 (587.51 it/sec) -training >> step=1890400, episode=316 reward=0.7788823 (542.03 it/sec) -training >> step=1890500, episode=316 reward=0.7722188 (521.27 it/sec) -training >> step=1890600, episode=316 reward=0.7674494 (569.70 it/sec) -training >> step=1890700, episode=316 reward=0.754568 (586.72 it/sec) -training >> step=1890800, episode=316 reward=0.7545241 (558.64 it/sec) -training >> step=1890900, episode=316 reward=0.7703601 (557.14 it/sec) -training >> step=1891000, episode=316 reward=0.7504861 (544.59 it/sec) -training >> step=1891100, episode=316 reward=0.7682242 (610.04 it/sec) -training >> step=1891200, episode=316 reward=0.7603068 (568.35 it/sec) -training >> step=1891300, episode=316 reward=0.7782899 (524.68 it/sec) -training >> step=1891400, episode=316 reward=0.7438419 (545.72 it/sec) -training >> step=1891500, episode=316 reward=0.7703913 (522.98 it/sec) -training >> step=1891600, episode=316 reward=0.7706797 (377.92 it/sec) -training >> step=1891700, episode=316 reward=0.7302634 (559.04 it/sec) -training >> step=1891800, episode=316 reward=0.7382436 (559.31 it/sec) -training >> step=1891900, episode=316 reward=0.7732596 (594.07 it/sec) -training >> step=1892000, episode=316 reward=0.7561181 (487.73 it/sec) -training >> step=1892100, episode=316 reward=0.7673593 (477.01 it/sec) -training >> step=1892200, episode=316 reward=0.7615772 (522.01 it/sec) -training >> step=1892300, episode=316 reward=0.7806636 (562.23 it/sec) -training >> step=1892400, episode=316 reward=0.7845564 (470.52 it/sec) -training >> step=1892500, episode=316 reward=0.7715356 (510.70 it/sec) -training >> step=1892600, episode=316 reward=0.7581011 (549.44 it/sec) -training >> step=1892700, episode=316 reward=0.7627211 (536.08 it/sec) -training >> step=1892800, episode=316 reward=0.7867882 (546.44 it/sec) -training >> step=1892900, episode=316 reward=0.7759023 (503.45 it/sec) -training >> step=1893000, episode=316 reward=0.7435235 (559.61 it/sec) -training >> step=1893100, episode=316 reward=0.7527906 (516.80 it/sec) -training >> step=1893200, episode=316 reward=0.7763802 (523.24 it/sec) -training >> step=1893300, episode=316 reward=0.75581 (537.87 it/sec) -training >> step=1893400, episode=316 reward=0.7622454 (563.62 it/sec) -training >> step=1893500, episode=316 reward=0.7615314 (555.37 it/sec) -training >> step=1893600, episode=316 reward=0.765461 (542.49 it/sec) -training >> step=1893700, episode=316 reward=0.7604446 (536.13 it/sec) -training >> step=1893800, episode=316 reward=0.7732677 (556.14 it/sec) -training >> step=1893900, episode=316 reward=0.7586834 (557.93 it/sec) -training >> step=1894000, episode=316 reward=0.7451584 (531.02 it/sec) -training >> step=1894100, episode=316 reward=0.7726778 (524.40 it/sec) -training >> step=1894200, episode=316 reward=0.7541524 (556.15 it/sec) -training >> step=1894300, episode=316 reward=0.770954 (564.11 it/sec) -training >> step=1894400, episode=316 reward=0.7617332 (529.64 it/sec) -training >> step=1894500, episode=316 reward=0.7526164 (523.88 it/sec) -training >> step=1894600, episode=316 reward=0.7718546 (484.28 it/sec) -training >> step=1894700, episode=316 reward=0.7465292 (461.84 it/sec) -training >> step=1894800, episode=316 reward=0.7760946 (492.48 it/sec) -training >> step=1894900, episode=316 reward=0.7599545 (505.17 it/sec) -training >> step=1895000, episode=316 reward=0.7722901 (509.02 it/sec) -training >> step=1895100, episode=316 reward=0.7729393 (503.49 it/sec) -training >> step=1895200, episode=316 reward=0.76152 (448.10 it/sec) -training >> step=1895300, episode=317 reward=0.7514123 (167.71 it/sec) -training >> step=1895400, episode=317 reward=0.7683005 (559.47 it/sec) -training >> step=1895500, episode=317 reward=0.7452801 (553.76 it/sec) -training >> step=1895600, episode=317 reward=0.7439175 (543.34 it/sec) -training >> step=1895700, episode=317 reward=0.7737865 (559.31 it/sec) -training >> step=1895800, episode=317 reward=0.7399663 (560.28 it/sec) -training >> step=1895900, episode=317 reward=0.7505248 (552.89 it/sec) -training >> step=1896000, episode=317 reward=0.7846768 (577.57 it/sec) -training >> step=1896100, episode=317 reward=0.7552849 (537.64 it/sec) -training >> step=1896200, episode=317 reward=0.7667078 (566.20 it/sec) -training >> step=1896300, episode=317 reward=0.7773404 (527.48 it/sec) -training >> step=1896400, episode=317 reward=0.7636431 (559.59 it/sec) -training >> step=1896500, episode=317 reward=0.7685797 (543.19 it/sec) -training >> step=1896600, episode=317 reward=0.770052 (593.65 it/sec) -training >> step=1896700, episode=317 reward=0.7752402 (573.60 it/sec) -training >> step=1896800, episode=317 reward=0.7500429 (555.37 it/sec) -training >> step=1896900, episode=317 reward=0.7567684 (607.35 it/sec) -training >> step=1897000, episode=317 reward=0.739978 (590.86 it/sec) -training >> step=1897100, episode=317 reward=0.7564186 (537.46 it/sec) -training >> step=1897200, episode=317 reward=0.7518513 (494.77 it/sec) -training >> step=1897300, episode=317 reward=0.7612283 (539.03 it/sec) -training >> step=1897400, episode=317 reward=0.7661663 (526.82 it/sec) -training >> step=1897500, episode=317 reward=0.7779514 (539.24 it/sec) -training >> step=1897600, episode=317 reward=0.7692566 (512.43 it/sec) -training >> step=1897700, episode=317 reward=0.7503883 (389.87 it/sec) -training >> step=1897800, episode=317 reward=0.7495538 (530.83 it/sec) -training >> step=1897900, episode=317 reward=0.7789735 (543.65 it/sec) -training >> step=1898000, episode=317 reward=0.7662228 (541.79 it/sec) -training >> step=1898100, episode=317 reward=0.7705504 (565.98 it/sec) -training >> step=1898200, episode=317 reward=0.7609321 (558.64 it/sec) -training >> step=1898300, episode=317 reward=0.7739173 (526.26 it/sec) -training >> step=1898400, episode=317 reward=0.7874642 (555.95 it/sec) -training >> step=1898500, episode=317 reward=0.7731488 (557.33 it/sec) -training >> step=1898600, episode=317 reward=0.7610334 (541.56 it/sec) -training >> step=1898700, episode=317 reward=0.7486395 (574.57 it/sec) -training >> step=1898800, episode=317 reward=0.7621526 (559.07 it/sec) -training >> step=1898900, episode=317 reward=0.7673876 (566.50 it/sec) -training >> step=1899000, episode=317 reward=0.7605143 (531.97 it/sec) -training >> step=1899100, episode=317 reward=0.7598759 (556.35 it/sec) -training >> step=1899200, episode=317 reward=0.7833968 (471.93 it/sec) -training >> step=1899300, episode=317 reward=0.7592785 (558.09 it/sec) -training >> step=1899400, episode=317 reward=0.7667537 (520.19 it/sec) -training >> step=1899500, episode=317 reward=0.7700376 (521.88 it/sec) -training >> step=1899600, episode=317 reward=0.7686926 (580.23 it/sec) -training >> step=1899700, episode=317 reward=0.7503631 (546.29 it/sec) -training >> step=1899800, episode=317 reward=0.7668539 (526.05 it/sec) -training >> step=1899900, episode=317 reward=0.7612152 (539.68 it/sec) -training >> step=1900000, episode=317 reward=0.7626341 (586.83 it/sec) -training >> step=1900100, episode=317 reward=0.7733539 (568.63 it/sec) -training >> step=1900200, episode=317 reward=0.7575943 (554.65 it/sec) -training >> step=1900300, episode=317 reward=0.7617301 (534.68 it/sec) -training >> step=1900400, episode=317 reward=0.7482894 (543.59 it/sec) -training >> step=1900500, episode=317 reward=0.7620124 (508.63 it/sec) -training >> step=1900600, episode=317 reward=0.7648821 (547.08 it/sec) -training >> step=1900700, episode=317 reward=0.7855182 (564.52 it/sec) -training >> step=1900800, episode=317 reward=0.7455186 (543.59 it/sec) -training >> step=1900900, episode=317 reward=0.7550852 (533.20 it/sec) -training >> step=1901000, episode=317 reward=0.7571075 (500.91 it/sec) -training >> step=1901100, episode=317 reward=0.7288821 (559.12 it/sec) -training >> step=1901200, episode=317 reward=0.761226 (538.11 it/sec) -training >> step=1901300, episode=318 reward=0.7656409 (165.70 it/sec) -training >> step=1901400, episode=318 reward=0.7727296 (547.74 it/sec) -training >> step=1901500, episode=318 reward=0.7359665 (556.48 it/sec) -training >> step=1901600, episode=318 reward=0.7556211 (562.91 it/sec) -training >> step=1901700, episode=318 reward=0.7524869 (552.90 it/sec) -training >> step=1901800, episode=318 reward=0.7635487 (536.26 it/sec) -training >> step=1901900, episode=318 reward=0.7835168 (498.00 it/sec) -training >> step=1902000, episode=318 reward=0.7517977 (611.45 it/sec) -training >> step=1902100, episode=318 reward=0.7466502 (571.39 it/sec) -training >> step=1902200, episode=318 reward=0.7647271 (564.26 it/sec) -training >> step=1902300, episode=318 reward=0.7602237 (569.23 it/sec) -training >> step=1902400, episode=318 reward=0.7737663 (546.59 it/sec) -training >> step=1902500, episode=318 reward=0.765924 (538.06 it/sec) -training >> step=1902600, episode=318 reward=0.7730472 (577.24 it/sec) -training >> step=1902700, episode=318 reward=0.7550279 (602.59 it/sec) -training >> step=1902800, episode=318 reward=0.7430567 (549.48 it/sec) -training >> step=1902900, episode=318 reward=0.7673256 (529.38 it/sec) -training >> step=1903000, episode=318 reward=0.7667711 (518.97 it/sec) -training >> step=1903100, episode=318 reward=0.7500716 (561.45 it/sec) -training >> step=1903200, episode=318 reward=0.7435528 (558.17 it/sec) -training >> step=1903300, episode=318 reward=0.7393687 (541.23 it/sec) -training >> step=1903400, episode=318 reward=0.7386934 (523.58 it/sec) -training >> step=1903500, episode=318 reward=0.7832999 (526.36 it/sec) -training >> step=1903600, episode=318 reward=0.7729625 (528.31 it/sec) -training >> step=1903700, episode=318 reward=0.7666971 (578.20 it/sec) -training >> step=1903800, episode=318 reward=0.7665613 (584.75 it/sec) -training >> step=1903900, episode=318 reward=0.7753323 (573.09 it/sec) -training >> step=1904000, episode=318 reward=0.7464228 (516.01 it/sec) -training >> step=1904100, episode=318 reward=0.7699791 (383.07 it/sec) -training >> step=1904200, episode=318 reward=0.7711778 (590.25 it/sec) -training >> step=1904300, episode=318 reward=0.760437 (555.54 it/sec) -training >> step=1904400, episode=318 reward=0.7602848 (582.57 it/sec) -training >> step=1904500, episode=318 reward=0.7682065 (548.63 it/sec) -training >> step=1904600, episode=318 reward=0.7495917 (595.41 it/sec) -training >> step=1904700, episode=318 reward=0.7483578 (517.91 it/sec) -training >> step=1904800, episode=318 reward=0.7599471 (546.66 it/sec) -training >> step=1904900, episode=318 reward=0.7757675 (544.95 it/sec) -training >> step=1905000, episode=318 reward=0.7572643 (558.09 it/sec) -training >> step=1905100, episode=318 reward=0.7985013 (537.94 it/sec) -training >> step=1905200, episode=318 reward=0.757821 (477.99 it/sec) -training >> step=1905300, episode=318 reward=0.7686654 (559.62 it/sec) -training >> step=1905400, episode=318 reward=0.7619918 (585.70 it/sec) -training >> step=1905500, episode=318 reward=0.7656705 (556.95 it/sec) -training >> step=1905600, episode=318 reward=0.744469 (531.28 it/sec) -training >> step=1905700, episode=318 reward=0.7765132 (503.71 it/sec) -training >> step=1905800, episode=318 reward=0.7582684 (532.50 it/sec) -training >> step=1905900, episode=318 reward=0.7668992 (493.32 it/sec) -training >> step=1906000, episode=318 reward=0.7753744 (453.73 it/sec) -training >> step=1906100, episode=318 reward=0.7697998 (484.99 it/sec) -training >> step=1906200, episode=318 reward=0.7612602 (469.33 it/sec) -training >> step=1906300, episode=318 reward=0.7796637 (470.12 it/sec) -training >> step=1906400, episode=318 reward=0.7692177 (500.62 it/sec) -training >> step=1906500, episode=318 reward=0.7582891 (501.55 it/sec) -training >> step=1906600, episode=318 reward=0.786487 (478.24 it/sec) -training >> step=1906700, episode=318 reward=0.7662641 (461.11 it/sec) -training >> step=1906800, episode=318 reward=0.7639027 (464.75 it/sec) -training >> step=1906900, episode=318 reward=0.7463236 (511.34 it/sec) -training >> step=1907000, episode=318 reward=0.7619802 (498.37 it/sec) -training >> step=1907100, episode=318 reward=0.7524689 (462.44 it/sec) -training >> step=1907200, episode=318 reward=0.76419 (483.46 it/sec) -training >> step=1907300, episode=319 reward=0.767204 (125.95 it/sec) -training >> step=1907400, episode=319 reward=0.7526707 (454.98 it/sec) -training >> step=1907500, episode=319 reward=0.7263096 (476.74 it/sec) -training >> step=1907600, episode=319 reward=0.7483994 (480.25 it/sec) -training >> step=1907700, episode=319 reward=0.7436402 (483.77 it/sec) -training >> step=1907800, episode=319 reward=0.776053 (499.64 it/sec) -training >> step=1907900, episode=319 reward=0.777702 (472.04 it/sec) -training >> step=1908000, episode=319 reward=0.7702313 (485.11 it/sec) -training >> step=1908100, episode=319 reward=0.764119 (490.37 it/sec) -training >> step=1908200, episode=319 reward=0.7707983 (494.42 it/sec) -training >> step=1908300, episode=319 reward=0.77864 (469.69 it/sec) -training >> step=1908400, episode=319 reward=0.7694196 (478.65 it/sec) -training >> step=1908500, episode=319 reward=0.7964572 (496.69 it/sec) -training >> step=1908600, episode=319 reward=0.7550664 (519.32 it/sec) -training >> step=1908700, episode=319 reward=0.7560291 (503.21 it/sec) -training >> step=1908800, episode=319 reward=0.7941725 (493.63 it/sec) -training >> step=1908900, episode=319 reward=0.7553909 (413.86 it/sec) -training >> step=1909000, episode=319 reward=0.757732 (465.88 it/sec) -training >> step=1909100, episode=319 reward=0.7510061 (456.48 it/sec) -training >> step=1909200, episode=319 reward=0.7589422 (488.34 it/sec) -training >> step=1909300, episode=319 reward=0.7645424 (480.02 it/sec) -training >> step=1909400, episode=319 reward=0.7624764 (452.99 it/sec) -training >> step=1909500, episode=319 reward=0.7528312 (467.57 it/sec) -training >> step=1909600, episode=319 reward=0.7746196 (473.12 it/sec) -training >> step=1909700, episode=319 reward=0.76846 (444.68 it/sec) -training >> step=1909800, episode=319 reward=0.7707965 (431.80 it/sec) -training >> step=1909900, episode=319 reward=0.7662798 (450.42 it/sec) -training >> step=1910000, episode=319 reward=0.7738471 (458.54 it/sec) -training >> step=1910100, episode=319 reward=0.7627352 (301.41 it/sec) -training >> step=1910200, episode=319 reward=0.7650885 (426.23 it/sec) -training >> step=1910300, episode=319 reward=0.7658292 (429.65 it/sec) -training >> step=1910400, episode=319 reward=0.7712198 (458.06 it/sec) -training >> step=1910500, episode=319 reward=0.736795 (447.07 it/sec) -training >> step=1910600, episode=319 reward=0.7598264 (397.87 it/sec) -training >> step=1910700, episode=319 reward=0.7547695 (397.17 it/sec) -training >> step=1910800, episode=319 reward=0.791043 (480.36 it/sec) -training >> step=1910900, episode=319 reward=0.779336 (502.79 it/sec) -training >> step=1911000, episode=319 reward=0.7626135 (491.15 it/sec) -training >> step=1911100, episode=319 reward=0.7755386 (452.18 it/sec) -training >> step=1911200, episode=319 reward=0.7664964 (462.21 it/sec) -training >> step=1911300, episode=319 reward=0.7632824 (484.83 it/sec) -training >> step=1911400, episode=319 reward=0.7760435 (468.58 it/sec) -training >> step=1911500, episode=319 reward=0.7716201 (493.07 it/sec) -training >> step=1911600, episode=319 reward=0.7508774 (458.48 it/sec) -training >> step=1911700, episode=319 reward=0.754772 (443.07 it/sec) -training >> step=1911800, episode=319 reward=0.7584585 (490.97 it/sec) -training >> step=1911900, episode=319 reward=0.7719274 (467.59 it/sec) -training >> step=1912000, episode=319 reward=0.7654235 (486.62 it/sec) -training >> step=1912100, episode=319 reward=0.7681256 (506.70 it/sec) -training >> step=1912200, episode=319 reward=0.7350873 (451.32 it/sec) -training >> step=1912300, episode=319 reward=0.7558632 (495.73 it/sec) -training >> step=1912400, episode=319 reward=0.7513925 (523.92 it/sec) -training >> step=1912500, episode=319 reward=0.7579504 (538.69 it/sec) -training >> step=1912600, episode=319 reward=0.7657346 (468.75 it/sec) -training >> step=1912700, episode=319 reward=0.7604918 (497.98 it/sec) -training >> step=1912800, episode=319 reward=0.7503345 (556.64 it/sec) -training >> step=1912900, episode=319 reward=0.7529603 (530.03 it/sec) -training >> step=1913000, episode=319 reward=0.7448031 (526.16 it/sec) -training >> step=1913100, episode=319 reward=0.7448282 (504.46 it/sec) -training >> step=1913200, episode=319 reward=0.7779295 (513.34 it/sec) -training >> step=1913300, episode=320 reward=0.7547304 (138.28 it/sec) -training >> step=1913400, episode=320 reward=0.731684 (536.13 it/sec) -training >> step=1913500, episode=320 reward=0.751973 (540.81 it/sec) -training >> step=1913600, episode=320 reward=0.7527805 (557.27 it/sec) -training >> step=1913700, episode=320 reward=0.748661 (551.82 it/sec) -training >> step=1913800, episode=320 reward=0.7472941 (471.69 it/sec) -training >> step=1913900, episode=320 reward=0.7569031 (442.20 it/sec) -training >> step=1914000, episode=320 reward=0.744257 (497.07 it/sec) -training >> step=1914100, episode=320 reward=0.7465981 (568.47 it/sec) -training >> step=1914200, episode=320 reward=0.7650288 (536.45 it/sec) -training >> step=1914300, episode=320 reward=0.7571117 (534.86 it/sec) -training >> step=1914400, episode=320 reward=0.7519593 (456.30 it/sec) -training >> step=1914500, episode=320 reward=0.7674398 (455.78 it/sec) -training >> step=1914600, episode=320 reward=0.7657689 (443.67 it/sec) -training >> step=1914700, episode=320 reward=0.7782269 (503.24 it/sec) -training >> step=1914800, episode=320 reward=0.7536024 (538.98 it/sec) -training >> step=1914900, episode=320 reward=0.7714128 (507.12 it/sec) -training >> step=1915000, episode=320 reward=0.7757395 (516.64 it/sec) -training >> step=1915100, episode=320 reward=0.7460036 (519.94 it/sec) -training >> step=1915200, episode=320 reward=0.7620656 (511.05 it/sec) -training >> step=1915300, episode=320 reward=0.7515206 (521.05 it/sec) -training >> step=1915400, episode=320 reward=0.7796658 (483.14 it/sec) -training >> step=1915500, episode=320 reward=0.7671471 (474.25 it/sec) -training >> step=1915600, episode=320 reward=0.7540002 (450.12 it/sec) -training >> step=1915700, episode=320 reward=0.7817609 (471.04 it/sec) -training >> step=1915800, episode=320 reward=0.7569526 (476.11 it/sec) -training >> step=1915900, episode=320 reward=0.7761555 (479.30 it/sec) -training >> step=1916000, episode=320 reward=0.7594309 (477.84 it/sec) -training >> step=1916100, episode=320 reward=0.772342 (402.73 it/sec) -training >> step=1916200, episode=320 reward=0.7584782 (446.94 it/sec) -training >> step=1916300, episode=320 reward=0.7536625 (345.71 it/sec) -training >> step=1916400, episode=320 reward=0.794821 (529.37 it/sec) -training >> step=1916500, episode=320 reward=0.7708502 (432.81 it/sec) -training >> step=1916600, episode=320 reward=0.7428832 (408.23 it/sec) -training >> step=1916700, episode=320 reward=0.7596059 (471.28 it/sec) -training >> step=1916800, episode=320 reward=0.750851 (456.53 it/sec) -training >> step=1916900, episode=320 reward=0.7426661 (455.12 it/sec) -training >> step=1917000, episode=320 reward=0.7626332 (525.41 it/sec) -training >> step=1917100, episode=320 reward=0.7463813 (521.04 it/sec) -training >> step=1917200, episode=320 reward=0.7670068 (581.60 it/sec) -training >> step=1917300, episode=320 reward=0.7648065 (493.84 it/sec) -training >> step=1917400, episode=320 reward=0.7674422 (538.28 it/sec) -training >> step=1917500, episode=320 reward=0.7628754 (491.43 it/sec) -training >> step=1917600, episode=320 reward=0.7546841 (569.87 it/sec) -training >> step=1917700, episode=320 reward=0.7675862 (520.26 it/sec) -training >> step=1917800, episode=320 reward=0.7451937 (474.19 it/sec) -training >> step=1917900, episode=320 reward=0.7498409 (427.31 it/sec) -training >> step=1918000, episode=320 reward=0.7758285 (483.09 it/sec) -training >> step=1918100, episode=320 reward=0.767548 (467.04 it/sec) -training >> step=1918200, episode=320 reward=0.7734295 (483.09 it/sec) -training >> step=1918300, episode=320 reward=0.7566119 (510.82 it/sec) -training >> step=1918400, episode=320 reward=0.7652068 (530.94 it/sec) -training >> step=1918500, episode=320 reward=0.7527171 (502.15 it/sec) -training >> step=1918600, episode=320 reward=0.7725546 (553.39 it/sec) -training >> step=1918700, episode=320 reward=0.7509165 (558.47 it/sec) -training >> step=1918800, episode=320 reward=0.752108 (515.48 it/sec) -training >> step=1918900, episode=320 reward=0.7701508 (548.80 it/sec) -training >> step=1919000, episode=320 reward=0.7389771 (526.46 it/sec) -training >> step=1919100, episode=320 reward=0.771526 (605.69 it/sec) -training >> step=1919200, episode=320 reward=0.7455643 (554.58 it/sec) -training >> step=1919300, episode=321 reward=0.7349669 (66.05 it/sec) -training >> step=1919400, episode=321 reward=0.757153 (498.40 it/sec) -training >> step=1919500, episode=321 reward=0.7356113 (516.10 it/sec) -training >> step=1919600, episode=321 reward=0.7497212 (529.11 it/sec) -training >> step=1919700, episode=321 reward=0.7588428 (518.20 it/sec) -training >> step=1919800, episode=321 reward=0.7564263 (507.25 it/sec) -training >> step=1919900, episode=321 reward=0.7630743 (568.40 it/sec) -training >> step=1920000, episode=321 reward=0.7713894 (554.09 it/sec) -training >> step=1920100, episode=321 reward=0.777471 (538.82 it/sec) -training >> step=1920200, episode=321 reward=0.7991326 (573.11 it/sec) -training >> step=1920300, episode=321 reward=0.7514512 (548.00 it/sec) -training >> step=1920400, episode=321 reward=0.7664254 (544.37 it/sec) -training >> step=1920500, episode=321 reward=0.7734618 (576.74 it/sec) -training >> step=1920600, episode=321 reward=0.7600529 (563.63 it/sec) -training >> step=1920700, episode=321 reward=0.7596325 (548.74 it/sec) -training >> step=1920800, episode=321 reward=0.7717699 (570.25 it/sec) -training >> step=1920900, episode=321 reward=0.737931 (492.17 it/sec) -training >> step=1921000, episode=321 reward=0.7682663 (559.06 it/sec) -training >> step=1921100, episode=321 reward=0.7731125 (556.36 it/sec) -training >> step=1921200, episode=321 reward=0.7545517 (555.80 it/sec) -training >> step=1921300, episode=321 reward=0.7791537 (551.56 it/sec) -training >> step=1921400, episode=321 reward=0.7605084 (516.04 it/sec) -training >> step=1921500, episode=321 reward=0.7558455 (544.63 it/sec) -training >> step=1921600, episode=321 reward=0.7669981 (553.16 it/sec) -training >> step=1921700, episode=321 reward=0.7612501 (575.01 it/sec) -training >> step=1921800, episode=321 reward=0.7513375 (548.35 it/sec) -training >> step=1921900, episode=321 reward=0.7562703 (577.69 it/sec) -training >> step=1922000, episode=321 reward=0.7807624 (551.71 it/sec) -training >> step=1922100, episode=321 reward=0.745239 (455.08 it/sec) -training >> step=1922200, episode=321 reward=0.7549242 (538.44 it/sec) -training >> step=1922300, episode=321 reward=0.7790934 (510.93 it/sec) -training >> step=1922400, episode=321 reward=0.7765287 (524.26 it/sec) -training >> step=1922500, episode=321 reward=0.7827031 (534.60 it/sec) -training >> step=1922600, episode=321 reward=0.7676332 (525.45 it/sec) -training >> step=1922700, episode=321 reward=0.7579157 (421.39 it/sec) -training >> step=1922800, episode=321 reward=0.7673864 (523.64 it/sec) -training >> step=1922900, episode=321 reward=0.7678291 (457.16 it/sec) -training >> step=1923000, episode=321 reward=0.7741136 (459.51 it/sec) -training >> step=1923100, episode=321 reward=0.7674186 (560.03 it/sec) -training >> step=1923200, episode=321 reward=0.7810807 (553.21 it/sec) -training >> step=1923300, episode=321 reward=0.7590283 (477.70 it/sec) -training >> step=1923400, episode=321 reward=0.752043 (532.94 it/sec) -training >> step=1923500, episode=321 reward=0.7653751 (507.66 it/sec) -training >> step=1923600, episode=321 reward=0.7721483 (475.17 it/sec) -training >> step=1923700, episode=321 reward=0.7885492 (487.80 it/sec) -training >> step=1923800, episode=321 reward=0.7771245 (517.01 it/sec) -training >> step=1923900, episode=321 reward=0.769464 (557.17 it/sec) -training >> step=1924000, episode=321 reward=0.7657167 (532.70 it/sec) -training >> step=1924100, episode=321 reward=0.7719828 (555.16 it/sec) -training >> step=1924200, episode=321 reward=0.7626943 (553.08 it/sec) -training >> step=1924300, episode=321 reward=0.7576549 (557.20 it/sec) -training >> step=1924400, episode=321 reward=0.7656455 (568.46 it/sec) -training >> step=1924500, episode=321 reward=0.760037 (540.94 it/sec) -training >> step=1924600, episode=321 reward=0.7670482 (540.34 it/sec) -training >> step=1924700, episode=321 reward=0.7432317 (543.62 it/sec) -training >> step=1924800, episode=321 reward=0.7657551 (557.41 it/sec) -training >> step=1924900, episode=321 reward=0.7600582 (538.28 it/sec) -training >> step=1925000, episode=321 reward=0.7531876 (571.74 it/sec) -training >> step=1925100, episode=321 reward=0.756096 (539.25 it/sec) -training >> step=1925200, episode=321 reward=0.7434265 (559.64 it/sec) -training >> step=1925300, episode=322 reward=0.7476676 (52.33 it/sec) -training >> step=1925400, episode=322 reward=0.7401943 (513.36 it/sec) -training >> step=1925500, episode=322 reward=0.754667 (501.99 it/sec) -training >> step=1925600, episode=322 reward=0.7533882 (531.59 it/sec) -training >> step=1925700, episode=322 reward=0.7790399 (521.30 it/sec) -training >> step=1925800, episode=322 reward=0.7753212 (520.50 it/sec) -training >> step=1925900, episode=322 reward=0.7756712 (575.42 it/sec) -training >> step=1926000, episode=322 reward=0.757251 (556.32 it/sec) -training >> step=1926100, episode=322 reward=0.753805 (580.90 it/sec) -training >> step=1926200, episode=322 reward=0.7608647 (544.42 it/sec) -training >> step=1926300, episode=322 reward=0.7716119 (549.05 it/sec) -training >> step=1926400, episode=322 reward=0.7724645 (531.32 it/sec) -training >> step=1926500, episode=322 reward=0.771374 (566.98 it/sec) -training >> step=1926600, episode=322 reward=0.7451849 (530.91 it/sec) -training >> step=1926700, episode=322 reward=0.7580397 (465.41 it/sec) -training >> step=1926800, episode=322 reward=0.7756828 (522.00 it/sec) -training >> step=1926900, episode=322 reward=0.7478802 (411.30 it/sec) -training >> step=1927000, episode=322 reward=0.7674552 (531.93 it/sec) -training >> step=1927100, episode=322 reward=0.7684594 (541.65 it/sec) -training >> step=1927200, episode=322 reward=0.7732741 (560.32 it/sec) -training >> step=1927300, episode=322 reward=0.7584353 (538.90 it/sec) -training >> step=1927400, episode=322 reward=0.7826709 (500.17 it/sec) -training >> step=1927500, episode=322 reward=0.7666389 (504.99 it/sec) -training >> step=1927600, episode=322 reward=0.7897577 (515.28 it/sec) -training >> step=1927700, episode=322 reward=0.764406 (546.68 it/sec) -training >> step=1927800, episode=322 reward=0.7730449 (543.20 it/sec) -training >> step=1927900, episode=322 reward=0.7568843 (501.08 it/sec) -training >> step=1928000, episode=322 reward=0.7727732 (552.56 it/sec) -training >> step=1928100, episode=322 reward=0.7606625 (522.93 it/sec) -training >> step=1928200, episode=322 reward=0.7759352 (443.13 it/sec) -training >> step=1928300, episode=322 reward=0.7846907 (535.14 it/sec) -training >> step=1928400, episode=322 reward=0.7710161 (524.16 it/sec) -training >> step=1928500, episode=322 reward=0.7541198 (456.33 it/sec) -training >> step=1928600, episode=322 reward=0.7473506 (278.32 it/sec) -training >> step=1928700, episode=322 reward=0.775546 (384.06 it/sec) -training >> step=1928800, episode=322 reward=0.7812641 (377.02 it/sec) -training >> step=1928900, episode=322 reward=0.753774 (459.96 it/sec) -training >> step=1929000, episode=322 reward=0.7607098 (480.36 it/sec) -training >> step=1929100, episode=322 reward=0.7737822 (466.66 it/sec) -training >> step=1929200, episode=322 reward=0.7578399 (461.85 it/sec) -training >> step=1929300, episode=322 reward=0.7463719 (394.12 it/sec) -training >> step=1929400, episode=322 reward=0.7504284 (435.03 it/sec) -training >> step=1929500, episode=322 reward=0.7688475 (448.02 it/sec) -training >> step=1929600, episode=322 reward=0.7655302 (489.24 it/sec) -training >> step=1929700, episode=322 reward=0.7409806 (475.56 it/sec) -training >> step=1929800, episode=322 reward=0.7538496 (478.62 it/sec) -training >> step=1929900, episode=322 reward=0.7728132 (534.42 it/sec) -training >> step=1930000, episode=322 reward=0.7729387 (515.43 it/sec) -training >> step=1930100, episode=322 reward=0.7465233 (500.83 it/sec) -training >> step=1930200, episode=322 reward=0.7555858 (518.16 it/sec) -training >> step=1930300, episode=322 reward=0.7548424 (516.10 it/sec) -training >> step=1930400, episode=322 reward=0.7763999 (515.08 it/sec) -training >> step=1930500, episode=322 reward=0.7672244 (458.15 it/sec) -training >> step=1930600, episode=322 reward=0.7599264 (486.24 it/sec) -training >> step=1930700, episode=322 reward=0.7593839 (465.68 it/sec) -training >> step=1930800, episode=322 reward=0.749325 (520.36 it/sec) -training >> step=1930900, episode=322 reward=0.7152857 (527.65 it/sec) -training >> step=1931000, episode=322 reward=0.7562197 (482.66 it/sec) -training >> step=1931100, episode=322 reward=0.7571113 (515.38 it/sec) -training >> step=1931200, episode=322 reward=0.7497024 (488.57 it/sec) -training >> step=1931300, episode=323 reward=0.7532087 (117.11 it/sec) -training >> step=1931400, episode=323 reward=0.7515947 (498.98 it/sec) -training >> step=1931500, episode=323 reward=0.7773639 (527.96 it/sec) -training >> step=1931600, episode=323 reward=0.7586282 (509.95 it/sec) -training >> step=1931700, episode=323 reward=0.7599462 (520.90 it/sec) -training >> step=1931800, episode=323 reward=0.7612166 (552.10 it/sec) -training >> step=1931900, episode=323 reward=0.7731792 (499.56 it/sec) -training >> step=1932000, episode=323 reward=0.7669069 (553.28 it/sec) -training >> step=1932100, episode=323 reward=0.7784978 (510.28 it/sec) -training >> step=1932200, episode=323 reward=0.7726284 (491.82 it/sec) -training >> step=1932300, episode=323 reward=0.7678183 (532.70 it/sec) -training >> step=1932400, episode=323 reward=0.7553003 (520.65 it/sec) -training >> step=1932500, episode=323 reward=0.7722143 (554.73 it/sec) -training >> step=1932600, episode=323 reward=0.7685184 (531.33 it/sec) -training >> step=1932700, episode=323 reward=0.783627 (493.79 it/sec) -training >> step=1932800, episode=323 reward=0.7923305 (547.54 it/sec) -training >> step=1932900, episode=323 reward=0.76223 (495.12 it/sec) -training >> step=1933000, episode=323 reward=0.7532558 (544.83 it/sec) -training >> step=1933100, episode=323 reward=0.7757144 (561.68 it/sec) -training >> step=1933200, episode=323 reward=0.7607379 (516.77 it/sec) -training >> step=1933300, episode=323 reward=0.7525295 (484.89 it/sec) -training >> step=1933400, episode=323 reward=0.7472158 (480.57 it/sec) -training >> step=1933500, episode=323 reward=0.752644 (437.83 it/sec) -training >> step=1933600, episode=323 reward=0.7435835 (438.42 it/sec) -training >> step=1933700, episode=323 reward=0.7463086 (462.20 it/sec) -training >> step=1933800, episode=323 reward=0.7427661 (483.59 it/sec) -training >> step=1933900, episode=323 reward=0.7900562 (464.50 it/sec) -training >> step=1934000, episode=323 reward=0.7716205 (460.63 it/sec) -training >> step=1934100, episode=323 reward=0.7780117 (394.16 it/sec) -training >> step=1934200, episode=323 reward=0.7537646 (417.03 it/sec) -training >> step=1934300, episode=323 reward=0.7614481 (371.09 it/sec) -training >> step=1934400, episode=323 reward=0.752649 (425.22 it/sec) -training >> step=1934500, episode=323 reward=0.7550813 (499.17 it/sec) -training >> step=1934600, episode=323 reward=0.7691253 (547.08 it/sec) -training >> step=1934700, episode=323 reward=0.7667357 (536.45 it/sec) -training >> step=1934800, episode=323 reward=0.7635057 (351.31 it/sec) -training >> step=1934900, episode=323 reward=0.7520735 (539.03 it/sec) -training >> step=1935000, episode=323 reward=0.7498925 (530.55 it/sec) -training >> step=1935100, episode=323 reward=0.7734681 (537.81 it/sec) -training >> step=1935200, episode=323 reward=0.7595095 (503.73 it/sec) -training >> step=1935300, episode=323 reward=0.754891 (515.24 it/sec) -training >> step=1935400, episode=323 reward=0.7751422 (550.28 it/sec) -training >> step=1935500, episode=323 reward=0.7602902 (540.80 it/sec) -training >> step=1935600, episode=323 reward=0.7701988 (529.24 it/sec) -training >> step=1935700, episode=323 reward=0.7671778 (541.52 it/sec) -training >> step=1935800, episode=323 reward=0.7521141 (544.49 it/sec) -training >> step=1935900, episode=323 reward=0.7539684 (532.17 it/sec) -training >> step=1936000, episode=323 reward=0.7425453 (562.39 it/sec) -training >> step=1936100, episode=323 reward=0.7687662 (521.62 it/sec) -training >> step=1936200, episode=323 reward=0.7736705 (586.83 it/sec) -training >> step=1936300, episode=323 reward=0.7534469 (516.11 it/sec) -training >> step=1936400, episode=323 reward=0.7643123 (541.69 it/sec) -training >> step=1936500, episode=323 reward=0.7473142 (561.37 it/sec) -training >> step=1936600, episode=323 reward=0.7283686 (558.41 it/sec) -training >> step=1936700, episode=323 reward=0.7699351 (527.55 it/sec) -training >> step=1936800, episode=323 reward=0.7791486 (568.40 it/sec) -training >> step=1936900, episode=323 reward=0.7542019 (550.47 it/sec) -training >> step=1937000, episode=323 reward=0.7781467 (561.97 it/sec) -training >> step=1937100, episode=323 reward=0.7597418 (558.68 it/sec) -training >> step=1937200, episode=323 reward=0.7224063 (542.25 it/sec) -training >> step=1937300, episode=324 reward=0.7579438 (58.42 it/sec) -training >> step=1937400, episode=324 reward=0.7419398 (520.02 it/sec) -training >> step=1937500, episode=324 reward=0.7647408 (506.96 it/sec) -training >> step=1937600, episode=324 reward=0.7693529 (547.80 it/sec) -training >> step=1937700, episode=324 reward=0.7764872 (511.24 it/sec) -training >> step=1937800, episode=324 reward=0.735589 (468.10 it/sec) -training >> step=1937900, episode=324 reward=0.7490163 (525.15 it/sec) -training >> step=1938000, episode=324 reward=0.7671064 (582.08 it/sec) -training >> step=1938100, episode=324 reward=0.7424076 (553.15 it/sec) -training >> step=1938200, episode=324 reward=0.7717313 (578.92 it/sec) -training >> step=1938300, episode=324 reward=0.7862891 (501.75 it/sec) -training >> step=1938400, episode=324 reward=0.7811235 (560.68 it/sec) -training >> step=1938500, episode=324 reward=0.7424479 (571.36 it/sec) -training >> step=1938600, episode=324 reward=0.7747225 (541.46 it/sec) -training >> step=1938700, episode=324 reward=0.7502566 (587.87 it/sec) -training >> step=1938800, episode=324 reward=0.7622781 (599.10 it/sec) -training >> step=1938900, episode=324 reward=0.7803992 (532.94 it/sec) -training >> step=1939000, episode=324 reward=0.775793 (561.24 it/sec) -training >> step=1939100, episode=324 reward=0.7570634 (551.92 it/sec) -training >> step=1939200, episode=324 reward=0.7624105 (554.78 it/sec) -training >> step=1939300, episode=324 reward=0.7432746 (560.12 it/sec) -training >> step=1939400, episode=324 reward=0.7594191 (558.27 it/sec) -training >> step=1939500, episode=324 reward=0.747514 (556.64 it/sec) -training >> step=1939600, episode=324 reward=0.7405973 (485.93 it/sec) -training >> step=1939700, episode=324 reward=0.7615104 (532.63 it/sec) -training >> step=1939800, episode=324 reward=0.7395898 (555.21 it/sec) -training >> step=1939900, episode=324 reward=0.7329143 (517.26 it/sec) -training >> step=1940000, episode=324 reward=0.7582421 (511.97 it/sec) -training >> step=1940100, episode=324 reward=0.7746504 (577.28 it/sec) -training >> step=1940200, episode=324 reward=0.7683098 (569.00 it/sec) -training >> step=1940300, episode=324 reward=0.764684 (576.73 it/sec) -training >> step=1940400, episode=324 reward=0.7616678 (538.75 it/sec) -training >> step=1940500, episode=324 reward=0.7574404 (569.53 it/sec) -training >> step=1940600, episode=324 reward=0.7696373 (503.15 it/sec) -training >> step=1940700, episode=324 reward=0.762613 (483.80 it/sec) -training >> step=1940800, episode=324 reward=0.7619081 (555.29 it/sec) -training >> step=1940900, episode=324 reward=0.7767345 (606.91 it/sec) -training >> step=1941000, episode=324 reward=0.7926701 (489.61 it/sec) -training >> step=1941100, episode=324 reward=0.777563 (517.05 it/sec) -training >> step=1941200, episode=324 reward=0.7580337 (413.64 it/sec) -training >> step=1941300, episode=324 reward=0.7725083 (537.48 it/sec) -training >> step=1941400, episode=324 reward=0.7382202 (571.14 it/sec) -training >> step=1941500, episode=324 reward=0.7536308 (511.44 it/sec) -training >> step=1941600, episode=324 reward=0.7885037 (524.63 it/sec) -training >> step=1941700, episode=324 reward=0.7710336 (572.52 it/sec) -training >> step=1941800, episode=324 reward=0.7435823 (512.86 it/sec) -training >> step=1941900, episode=324 reward=0.7756605 (551.34 it/sec) -training >> step=1942000, episode=324 reward=0.7795625 (560.13 it/sec) -training >> step=1942100, episode=324 reward=0.7593893 (532.55 it/sec) -training >> step=1942200, episode=324 reward=0.7590669 (551.36 it/sec) -training >> step=1942300, episode=324 reward=0.7268128 (541.20 it/sec) -training >> step=1942400, episode=324 reward=0.7498884 (563.86 it/sec) -training >> step=1942500, episode=324 reward=0.7698668 (551.85 it/sec) -training >> step=1942600, episode=324 reward=0.7675297 (561.40 it/sec) -training >> step=1942700, episode=324 reward=0.7821091 (564.62 it/sec) -training >> step=1942800, episode=324 reward=0.7566143 (567.91 it/sec) -training >> step=1942900, episode=324 reward=0.7592636 (533.56 it/sec) -training >> step=1943000, episode=324 reward=0.7474515 (530.13 it/sec) -training >> step=1943100, episode=324 reward=0.7383314 (580.49 it/sec) -training >> step=1943200, episode=324 reward=0.7635114 (528.47 it/sec) -training >> step=1943300, episode=325 reward=0.7362756 (52.79 it/sec) -training >> step=1943400, episode=325 reward=0.7359607 (519.48 it/sec) -training >> step=1943500, episode=325 reward=0.7509319 (521.23 it/sec) -training >> step=1943600, episode=325 reward=0.7413903 (528.92 it/sec) -training >> step=1943700, episode=325 reward=0.736743 (529.11 it/sec) -training >> step=1943800, episode=325 reward=0.7694469 (483.71 it/sec) -training >> step=1943900, episode=325 reward=0.7573014 (583.58 it/sec) -training >> step=1944000, episode=325 reward=0.769178 (527.81 it/sec) -training >> step=1944100, episode=325 reward=0.7698075 (538.64 it/sec) -training >> step=1944200, episode=325 reward=0.763175 (567.49 it/sec) -training >> step=1944300, episode=325 reward=0.7614545 (535.74 it/sec) -training >> step=1944400, episode=325 reward=0.7464157 (534.95 it/sec) -training >> step=1944500, episode=325 reward=0.7361744 (591.63 it/sec) -training >> step=1944600, episode=325 reward=0.7842659 (542.50 it/sec) -training >> step=1944700, episode=325 reward=0.7537341 (554.83 it/sec) -training >> step=1944800, episode=325 reward=0.7569632 (562.91 it/sec) -training >> step=1944900, episode=325 reward=0.7624158 (520.28 it/sec) -training >> step=1945000, episode=325 reward=0.7531027 (528.55 it/sec) -training >> step=1945100, episode=325 reward=0.7369854 (528.24 it/sec) -training >> step=1945200, episode=325 reward=0.7457012 (522.06 it/sec) -training >> step=1945300, episode=325 reward=0.7624118 (595.50 it/sec) -training >> step=1945400, episode=325 reward=0.7707381 (523.93 it/sec) -training >> step=1945500, episode=325 reward=0.7578908 (466.29 it/sec) -training >> step=1945600, episode=325 reward=0.7745317 (540.31 it/sec) -training >> step=1945700, episode=325 reward=0.7748303 (545.60 it/sec) -training >> step=1945800, episode=325 reward=0.7674935 (533.25 it/sec) -training >> step=1945900, episode=325 reward=0.7724488 (542.59 it/sec) -training >> step=1946000, episode=325 reward=0.761515 (545.89 it/sec) -training >> step=1946100, episode=325 reward=0.7559893 (518.48 it/sec) -training >> step=1946200, episode=325 reward=0.7627602 (523.88 it/sec) -training >> step=1946300, episode=325 reward=0.7793658 (508.65 it/sec) -training >> step=1946400, episode=325 reward=0.753661 (587.57 it/sec) -training >> step=1946500, episode=325 reward=0.7704228 (531.46 it/sec) -training >> step=1946600, episode=325 reward=0.758888 (469.71 it/sec) -training >> step=1946700, episode=325 reward=0.7826216 (532.33 it/sec) -training >> step=1946800, episode=325 reward=0.7724369 (523.78 it/sec) -training >> step=1946900, episode=325 reward=0.7718203 (541.73 it/sec) -training >> step=1947000, episode=325 reward=0.7648982 (553.30 it/sec) -training >> step=1947100, episode=325 reward=0.746755 (556.51 it/sec) -training >> step=1947200, episode=325 reward=0.7802663 (551.12 it/sec) -training >> step=1947300, episode=325 reward=0.7629839 (480.93 it/sec) -training >> step=1947400, episode=325 reward=0.7593393 (361.87 it/sec) -training >> step=1947500, episode=325 reward=0.7582253 (553.97 it/sec) -training >> step=1947600, episode=325 reward=0.7641873 (510.32 it/sec) -training >> step=1947700, episode=325 reward=0.7598413 (527.86 it/sec) -training >> step=1947800, episode=325 reward=0.7639876 (518.51 it/sec) -training >> step=1947900, episode=325 reward=0.7735657 (540.63 it/sec) -training >> step=1948000, episode=325 reward=0.7518371 (463.94 it/sec) -training >> step=1948100, episode=325 reward=0.7615526 (564.40 it/sec) -training >> step=1948200, episode=325 reward=0.7787986 (554.44 it/sec) -training >> step=1948300, episode=325 reward=0.7391269 (562.86 it/sec) -training >> step=1948400, episode=325 reward=0.7790961 (547.42 it/sec) -training >> step=1948500, episode=325 reward=0.7742448 (527.95 it/sec) -training >> step=1948600, episode=325 reward=0.7534072 (392.69 it/sec) -training >> step=1948700, episode=325 reward=0.7336546 (551.47 it/sec) -training >> step=1948800, episode=325 reward=0.7602765 (561.39 it/sec) -training >> step=1948900, episode=325 reward=0.7540154 (468.51 it/sec) -training >> step=1949000, episode=325 reward=0.7543896 (562.49 it/sec) -training >> step=1949100, episode=325 reward=0.7208338 (484.85 it/sec) -training >> step=1949200, episode=325 reward=0.7442085 (535.91 it/sec) -training >> step=1949300, episode=326 reward=0.7529361 (62.27 it/sec) -training >> step=1949400, episode=326 reward=0.7418267 (528.33 it/sec) -training >> step=1949500, episode=326 reward=0.7301337 (492.52 it/sec) -training >> step=1949600, episode=326 reward=0.7463324 (438.44 it/sec) -training >> step=1949700, episode=326 reward=0.742469 (473.91 it/sec) -training >> step=1949800, episode=326 reward=0.7377773 (536.91 it/sec) -training >> step=1949900, episode=326 reward=0.7829953 (526.62 it/sec) -training >> step=1950000, episode=326 reward=0.7613407 (534.44 it/sec) -training >> step=1950100, episode=326 reward=0.7562128 (504.50 it/sec) -training >> step=1950200, episode=326 reward=0.7707364 (562.07 it/sec) -training >> step=1950300, episode=326 reward=0.7672498 (414.67 it/sec) -training >> step=1950400, episode=326 reward=0.7589552 (554.83 it/sec) -training >> step=1950500, episode=326 reward=0.7516028 (526.68 it/sec) -training >> step=1950600, episode=326 reward=0.7327965 (541.19 it/sec) -training >> step=1950700, episode=326 reward=0.7630019 (544.64 it/sec) -training >> step=1950800, episode=326 reward=0.7634599 (441.45 it/sec) -training >> step=1950900, episode=326 reward=0.7565595 (472.36 it/sec) -training >> step=1951000, episode=326 reward=0.7577707 (499.65 it/sec) -training >> step=1951100, episode=326 reward=0.7905566 (483.23 it/sec) -training >> step=1951200, episode=326 reward=0.7529634 (445.56 it/sec) -training >> step=1951300, episode=326 reward=0.7783411 (366.01 it/sec) -training >> step=1951400, episode=326 reward=0.7668705 (323.85 it/sec) -training >> step=1951500, episode=326 reward=0.7767916 (438.06 it/sec) -training >> step=1951600, episode=326 reward=0.7572408 (447.01 it/sec) -training >> step=1951700, episode=326 reward=0.7820527 (339.79 it/sec) -training >> step=1951800, episode=326 reward=0.7870667 (464.28 it/sec) -training >> step=1951900, episode=326 reward=0.767484 (521.84 it/sec) -training >> step=1952000, episode=326 reward=0.7734128 (545.06 it/sec) -training >> step=1952100, episode=326 reward=0.7318622 (489.39 it/sec) -training >> step=1952200, episode=326 reward=0.7685419 (456.31 it/sec) -training >> step=1952300, episode=326 reward=0.7601614 (413.55 it/sec) -training >> step=1952400, episode=326 reward=0.7597739 (546.71 it/sec) -training >> step=1952500, episode=326 reward=0.775304 (528.37 it/sec) -training >> step=1952600, episode=326 reward=0.7836196 (447.63 it/sec) -training >> step=1952700, episode=326 reward=0.7753977 (491.61 it/sec) -training >> step=1952800, episode=326 reward=0.7742658 (539.77 it/sec) -training >> step=1952900, episode=326 reward=0.7558007 (526.99 it/sec) -training >> step=1953000, episode=326 reward=0.7862715 (517.55 it/sec) -training >> step=1953100, episode=326 reward=0.7581961 (518.61 it/sec) -training >> step=1953200, episode=326 reward=0.7724971 (515.03 it/sec) -training >> step=1953300, episode=326 reward=0.7705057 (531.18 it/sec) -training >> step=1953400, episode=326 reward=0.7477264 (540.56 it/sec) -training >> step=1953500, episode=326 reward=0.7581781 (407.77 it/sec) -training >> step=1953600, episode=326 reward=0.7601406 (425.67 it/sec) -training >> step=1953700, episode=326 reward=0.7697605 (480.81 it/sec) -training >> step=1953800, episode=326 reward=0.7681265 (526.34 it/sec) -training >> step=1953900, episode=326 reward=0.749832 (482.81 it/sec) -training >> step=1954000, episode=326 reward=0.747042 (490.48 it/sec) -training >> step=1954100, episode=326 reward=0.7505831 (464.01 it/sec) -training >> step=1954200, episode=326 reward=0.7538952 (459.04 it/sec) -training >> step=1954300, episode=326 reward=0.7430556 (437.29 it/sec) -training >> step=1954400, episode=326 reward=0.7450222 (487.30 it/sec) -training >> step=1954500, episode=326 reward=0.781574 (480.58 it/sec) -training >> step=1954600, episode=326 reward=0.7525596 (486.09 it/sec) -training >> step=1954700, episode=326 reward=0.7607456 (504.33 it/sec) -training >> step=1954800, episode=326 reward=0.7489672 (514.25 it/sec) -training >> step=1954900, episode=326 reward=0.7585914 (507.54 it/sec) -training >> step=1955000, episode=326 reward=0.7416127 (496.32 it/sec) -training >> step=1955100, episode=326 reward=0.7740451 (487.57 it/sec) -training >> step=1955200, episode=326 reward=0.7678761 (447.89 it/sec) -training >> step=1955300, episode=327 reward=0.726172 (152.27 it/sec) -training >> step=1955400, episode=327 reward=0.73559 (506.11 it/sec) -training >> step=1955500, episode=327 reward=0.73487 (500.68 it/sec) -training >> step=1955600, episode=327 reward=0.7545988 (530.53 it/sec) -training >> step=1955700, episode=327 reward=0.7593976 (503.97 it/sec) -training >> step=1955800, episode=327 reward=0.7602575 (465.52 it/sec) -training >> step=1955900, episode=327 reward=0.7461131 (478.61 it/sec) -training >> step=1956000, episode=327 reward=0.7509937 (463.94 it/sec) -training >> step=1956100, episode=327 reward=0.7588024 (478.34 it/sec) -training >> step=1956200, episode=327 reward=0.7654922 (497.85 it/sec) -training >> step=1956300, episode=327 reward=0.7790208 (496.40 it/sec) -training >> step=1956400, episode=327 reward=0.7652843 (485.06 it/sec) -training >> step=1956500, episode=327 reward=0.7587759 (449.80 it/sec) -training >> step=1956600, episode=327 reward=0.765349 (470.26 it/sec) -training >> step=1956700, episode=327 reward=0.7676879 (533.16 it/sec) -training >> step=1956800, episode=327 reward=0.76769 (512.47 it/sec) -training >> step=1956900, episode=327 reward=0.762342 (472.71 it/sec) -training >> step=1957000, episode=327 reward=0.7620331 (493.60 it/sec) -training >> step=1957100, episode=327 reward=0.7768462 (475.33 it/sec) -training >> step=1957200, episode=327 reward=0.7464921 (502.21 it/sec) -training >> step=1957300, episode=327 reward=0.76824 (463.62 it/sec) -training >> step=1957400, episode=327 reward=0.7585008 (497.50 it/sec) -training >> step=1957500, episode=327 reward=0.7701859 (485.97 it/sec) -training >> step=1957600, episode=327 reward=0.7674288 (473.77 it/sec) -training >> step=1957700, episode=327 reward=0.7624732 (502.72 it/sec) -training >> step=1957800, episode=327 reward=0.7612453 (500.48 it/sec) -training >> step=1957900, episode=327 reward=0.7457515 (513.12 it/sec) -training >> step=1958000, episode=327 reward=0.7736213 (490.74 it/sec) -training >> step=1958100, episode=327 reward=0.7763467 (485.21 it/sec) -training >> step=1958200, episode=327 reward=0.7641889 (501.16 it/sec) -training >> step=1958300, episode=327 reward=0.7459232 (454.12 it/sec) -training >> step=1958400, episode=327 reward=0.7666645 (502.70 it/sec) -training >> step=1958500, episode=327 reward=0.7525862 (475.47 it/sec) -training >> step=1958600, episode=327 reward=0.7818865 (535.63 it/sec) -training >> step=1958700, episode=327 reward=0.740706 (518.80 it/sec) -training >> step=1958800, episode=327 reward=0.7642967 (519.23 it/sec) -training >> step=1958900, episode=327 reward=0.7656379 (577.45 it/sec) -training >> step=1959000, episode=327 reward=0.7559175 (529.79 it/sec) -training >> step=1959100, episode=327 reward=0.7555953 (497.97 it/sec) -training >> step=1959200, episode=327 reward=0.7519741 (503.82 it/sec) -training >> step=1959300, episode=327 reward=0.7464063 (540.82 it/sec) -training >> step=1959400, episode=327 reward=0.7694157 (499.94 it/sec) -training >> step=1959500, episode=327 reward=0.7590786 (512.07 it/sec) -training >> step=1959600, episode=327 reward=0.7641442 (438.86 it/sec) -training >> step=1959700, episode=327 reward=0.7235384 (548.86 it/sec) -training >> step=1959800, episode=327 reward=0.7740142 (525.58 it/sec) -training >> step=1959900, episode=327 reward=0.7512713 (529.58 it/sec) -training >> step=1960000, episode=327 reward=0.7789196 (525.13 it/sec) -training >> step=1960100, episode=327 reward=0.7548462 (493.56 it/sec) -training >> step=1960200, episode=327 reward=0.7423542 (512.42 it/sec) -training >> step=1960300, episode=327 reward=0.7413387 (524.09 it/sec) -training >> step=1960400, episode=327 reward=0.7795512 (535.44 it/sec) -training >> step=1960500, episode=327 reward=0.7581729 (543.85 it/sec) -training >> step=1960600, episode=327 reward=0.7880599 (513.72 it/sec) -training >> step=1960700, episode=327 reward=0.7407113 (535.52 it/sec) -training >> step=1960800, episode=327 reward=0.7633033 (540.45 it/sec) -training >> step=1960900, episode=327 reward=0.756367 (521.77 it/sec) -training >> step=1961000, episode=327 reward=0.7699143 (521.27 it/sec) -training >> step=1961100, episode=327 reward=0.7534115 (514.70 it/sec) -training >> step=1961200, episode=327 reward=0.7439279 (515.03 it/sec) -training >> step=1961300, episode=328 reward=0.7609422 (171.83 it/sec) -training >> step=1961400, episode=328 reward=0.7483422 (518.83 it/sec) -training >> step=1961500, episode=328 reward=0.7375182 (505.51 it/sec) -training >> step=1961600, episode=328 reward=0.7674862 (512.02 it/sec) -training >> step=1961700, episode=328 reward=0.7561167 (515.77 it/sec) -training >> step=1961800, episode=328 reward=0.7693458 (508.81 it/sec) -training >> step=1961900, episode=328 reward=0.7757509 (495.43 it/sec) -training >> step=1962000, episode=328 reward=0.7596008 (537.99 it/sec) -training >> step=1962100, episode=328 reward=0.7524158 (468.32 it/sec) -training >> step=1962200, episode=328 reward=0.7669668 (494.85 it/sec) -training >> step=1962300, episode=328 reward=0.727859 (527.57 it/sec) -training >> step=1962400, episode=328 reward=0.7726713 (500.74 it/sec) -training >> step=1962500, episode=328 reward=0.783807 (462.85 it/sec) -training >> step=1962600, episode=328 reward=0.7502538 (469.07 it/sec) -training >> step=1962700, episode=328 reward=0.7439448 (529.96 it/sec) -training >> step=1962800, episode=328 reward=0.7167298 (514.28 it/sec) -training >> step=1962900, episode=328 reward=0.7446506 (510.76 it/sec) -training >> step=1963000, episode=328 reward=0.7847715 (503.41 it/sec) -training >> step=1963100, episode=328 reward=0.7455157 (491.33 it/sec) -training >> step=1963200, episode=328 reward=0.7679991 (461.43 it/sec) -training >> step=1963300, episode=328 reward=0.7714637 (498.37 it/sec) -training >> step=1963400, episode=328 reward=0.7532015 (490.11 it/sec) -training >> step=1963500, episode=328 reward=0.7525744 (514.09 it/sec) -training >> step=1963600, episode=328 reward=0.7488309 (497.51 it/sec) -training >> step=1963700, episode=328 reward=0.7662451 (504.09 it/sec) -training >> step=1963800, episode=328 reward=0.7511113 (558.15 it/sec) -training >> step=1963900, episode=328 reward=0.7495589 (519.97 it/sec) -training >> step=1964000, episode=328 reward=0.782284 (486.25 it/sec) -training >> step=1964100, episode=328 reward=0.7557822 (482.74 it/sec) -training >> step=1964200, episode=328 reward=0.7620549 (517.65 it/sec) -training >> step=1964300, episode=328 reward=0.7834563 (525.44 it/sec) -training >> step=1964400, episode=328 reward=0.7751451 (469.23 it/sec) -training >> step=1964500, episode=328 reward=0.7897912 (480.53 it/sec) -training >> step=1964600, episode=328 reward=0.7648132 (513.94 it/sec) -training >> step=1964700, episode=328 reward=0.7654454 (493.06 it/sec) -training >> step=1964800, episode=328 reward=0.7695949 (454.66 it/sec) -training >> step=1964900, episode=328 reward=0.7748228 (457.86 it/sec) -training >> step=1965000, episode=328 reward=0.7693453 (474.89 it/sec) -training >> step=1965100, episode=328 reward=0.7701342 (502.66 it/sec) -training >> step=1965200, episode=328 reward=0.7706251 (482.55 it/sec) -training >> step=1965300, episode=328 reward=0.773923 (512.26 it/sec) -training >> step=1965400, episode=328 reward=0.7434741 (471.39 it/sec) -training >> step=1965500, episode=328 reward=0.7622716 (473.79 it/sec) -training >> step=1965600, episode=328 reward=0.7591894 (440.15 it/sec) -training >> step=1965700, episode=328 reward=0.7736989 (432.49 it/sec) -training >> step=1965800, episode=328 reward=0.7860194 (376.26 it/sec) -training >> step=1965900, episode=328 reward=0.741828 (497.68 it/sec) -training >> step=1966000, episode=328 reward=0.7608067 (462.89 it/sec) -training >> step=1966100, episode=328 reward=0.7663275 (482.33 it/sec) -training >> step=1966200, episode=328 reward=0.7651526 (553.12 it/sec) -training >> step=1966300, episode=328 reward=0.7490869 (494.61 it/sec) -training >> step=1966400, episode=328 reward=0.7474121 (481.47 it/sec) -training >> step=1966500, episode=328 reward=0.7644579 (493.86 it/sec) -training >> step=1966600, episode=328 reward=0.7467421 (485.34 it/sec) -training >> step=1966700, episode=328 reward=0.7908465 (499.53 it/sec) -training >> step=1966800, episode=328 reward=0.7666327 (499.69 it/sec) -training >> step=1966900, episode=328 reward=0.7595909 (535.91 it/sec) -training >> step=1967000, episode=328 reward=0.7512952 (494.91 it/sec) -training >> step=1967100, episode=328 reward=0.7555482 (482.08 it/sec) -training >> step=1967200, episode=328 reward=0.7608058 (499.53 it/sec) -training >> step=1967300, episode=329 reward=0.7183381 (173.02 it/sec) -training >> step=1967400, episode=329 reward=0.7534165 (536.70 it/sec) -training >> step=1967500, episode=329 reward=0.7535481 (517.63 it/sec) -training >> step=1967600, episode=329 reward=0.7486467 (513.18 it/sec) -training >> step=1967700, episode=329 reward=0.7610888 (496.37 it/sec) -training >> step=1967800, episode=329 reward=0.7683383 (527.51 it/sec) -training >> step=1967900, episode=329 reward=0.7717071 (468.21 it/sec) -training >> step=1968000, episode=329 reward=0.7871377 (476.39 it/sec) -training >> step=1968100, episode=329 reward=0.7782474 (476.12 it/sec) -training >> step=1968200, episode=329 reward=0.7656392 (505.24 it/sec) -training >> step=1968300, episode=329 reward=0.7613006 (481.74 it/sec) -training >> step=1968400, episode=329 reward=0.7679325 (505.63 it/sec) -training >> step=1968500, episode=329 reward=0.7428527 (514.47 it/sec) -training >> step=1968600, episode=329 reward=0.7696099 (529.07 it/sec) -training >> step=1968700, episode=329 reward=0.7803335 (526.51 it/sec) -training >> step=1968800, episode=329 reward=0.7821816 (512.89 it/sec) -training >> step=1968900, episode=329 reward=0.7947519 (515.44 it/sec) -training >> step=1969000, episode=329 reward=0.7696307 (497.19 it/sec) -training >> step=1969100, episode=329 reward=0.7707134 (523.00 it/sec) -training >> step=1969200, episode=329 reward=0.7503489 (511.25 it/sec) -training >> step=1969300, episode=329 reward=0.7361568 (541.89 it/sec) -training >> step=1969400, episode=329 reward=0.7675015 (511.27 it/sec) -training >> step=1969500, episode=329 reward=0.7709519 (452.38 it/sec) -training >> step=1969600, episode=329 reward=0.745526 (539.47 it/sec) -training >> step=1969700, episode=329 reward=0.7768484 (505.18 it/sec) -training >> step=1969800, episode=329 reward=0.7639021 (536.95 it/sec) -training >> step=1969900, episode=329 reward=0.7715299 (491.31 it/sec) -training >> step=1970000, episode=329 reward=0.7651041 (480.95 it/sec) -training >> step=1970100, episode=329 reward=0.7662613 (483.94 it/sec) -training >> step=1970200, episode=329 reward=0.7487781 (477.33 it/sec) -training >> step=1970300, episode=329 reward=0.7423077 (502.00 it/sec) -training >> step=1970400, episode=329 reward=0.7639452 (470.11 it/sec) -training >> step=1970500, episode=329 reward=0.7698257 (540.34 it/sec) -training >> step=1970600, episode=329 reward=0.7508823 (479.14 it/sec) -training >> step=1970700, episode=329 reward=0.7664354 (461.60 it/sec) -training >> step=1970800, episode=329 reward=0.7493429 (525.33 it/sec) -training >> step=1970900, episode=329 reward=0.7827705 (530.22 it/sec) -training >> step=1971000, episode=329 reward=0.7681179 (518.56 it/sec) -training >> step=1971100, episode=329 reward=0.7593529 (491.59 it/sec) -training >> step=1971200, episode=329 reward=0.7748308 (493.56 it/sec) -training >> step=1971300, episode=329 reward=0.7736829 (524.44 it/sec) -training >> step=1971400, episode=329 reward=0.7730856 (471.27 it/sec) -training >> step=1971500, episode=329 reward=0.7467253 (495.49 it/sec) -training >> step=1971600, episode=329 reward=0.7725517 (501.75 it/sec) -training >> step=1971700, episode=329 reward=0.7516882 (491.47 it/sec) -training >> step=1971800, episode=329 reward=0.7556723 (542.43 it/sec) -training >> step=1971900, episode=329 reward=0.7577261 (510.95 it/sec) -training >> step=1972000, episode=329 reward=0.7801122 (409.24 it/sec) -training >> step=1972100, episode=329 reward=0.7498504 (473.13 it/sec) -training >> step=1972200, episode=329 reward=0.7475043 (496.25 it/sec) -training >> step=1972300, episode=329 reward=0.7395303 (523.88 it/sec) -training >> step=1972400, episode=329 reward=0.7588106 (535.88 it/sec) -training >> step=1972500, episode=329 reward=0.7548757 (516.53 it/sec) -training >> step=1972600, episode=329 reward=0.745585 (466.53 it/sec) -training >> step=1972700, episode=329 reward=0.7703341 (477.87 it/sec) -training >> step=1972800, episode=329 reward=0.7583238 (509.65 it/sec) -training >> step=1972900, episode=329 reward=0.737654 (472.68 it/sec) -training >> step=1973000, episode=329 reward=0.7613612 (451.96 it/sec) -training >> step=1973100, episode=329 reward=0.7613356 (447.45 it/sec) -training >> step=1973200, episode=329 reward=0.7618883 (430.14 it/sec) -training >> step=1973300, episode=330 reward=0.7702342 (193.46 it/sec) -training >> step=1973400, episode=330 reward=0.7614062 (489.36 it/sec) -training >> step=1973500, episode=330 reward=0.7491733 (506.53 it/sec) -training >> step=1973600, episode=330 reward=0.7332108 (492.78 it/sec) -training >> step=1973700, episode=330 reward=0.763101 (497.50 it/sec) -training >> step=1973800, episode=330 reward=0.7615909 (526.32 it/sec) -training >> step=1973900, episode=330 reward=0.7447144 (480.17 it/sec) -training >> step=1974000, episode=330 reward=0.7437473 (465.35 it/sec) -training >> step=1974100, episode=330 reward=0.7668321 (449.74 it/sec) -training >> step=1974200, episode=330 reward=0.7567604 (478.12 it/sec) -training >> step=1974300, episode=330 reward=0.7785943 (511.73 it/sec) -training >> step=1974400, episode=330 reward=0.7752684 (507.02 it/sec) -training >> step=1974500, episode=330 reward=0.7879125 (471.73 it/sec) -training >> step=1974600, episode=330 reward=0.7728774 (532.30 it/sec) -training >> step=1974700, episode=330 reward=0.7792134 (486.29 it/sec) -training >> step=1974800, episode=330 reward=0.7599967 (436.03 it/sec) -training >> step=1974900, episode=330 reward=0.762009 (488.17 it/sec) -training >> step=1975000, episode=330 reward=0.7489485 (490.58 it/sec) -training >> step=1975100, episode=330 reward=0.7758697 (529.08 it/sec) -training >> step=1975200, episode=330 reward=0.7639777 (512.48 it/sec) -training >> step=1975300, episode=330 reward=0.7752444 (536.23 it/sec) -training >> step=1975400, episode=330 reward=0.7637651 (517.93 it/sec) -training >> step=1975500, episode=330 reward=0.7831789 (503.47 it/sec) -training >> step=1975600, episode=330 reward=0.768019 (531.80 it/sec) -training >> step=1975700, episode=330 reward=0.7704308 (521.85 it/sec) -training >> step=1975800, episode=330 reward=0.796307 (523.34 it/sec) -training >> step=1975900, episode=330 reward=0.7635015 (503.81 it/sec) -training >> step=1976000, episode=330 reward=0.764282 (489.03 it/sec) -training >> step=1976100, episode=330 reward=0.7658577 (523.91 it/sec) -training >> step=1976200, episode=330 reward=0.7642938 (510.93 it/sec) -training >> step=1976300, episode=330 reward=0.7651919 (514.61 it/sec) -training >> step=1976400, episode=330 reward=0.7737668 (496.66 it/sec) -training >> step=1976500, episode=330 reward=0.7675699 (488.38 it/sec) -training >> step=1976600, episode=330 reward=0.7509235 (517.43 it/sec) -training >> step=1976700, episode=330 reward=0.7651863 (477.37 it/sec) -training >> step=1976800, episode=330 reward=0.7674226 (543.21 it/sec) -training >> step=1976900, episode=330 reward=0.7533888 (479.92 it/sec) -training >> step=1977000, episode=330 reward=0.7606103 (496.23 it/sec) -training >> step=1977100, episode=330 reward=0.767401 (534.89 it/sec) -training >> step=1977200, episode=330 reward=0.7460175 (507.84 it/sec) -training >> step=1977300, episode=330 reward=0.7747084 (429.79 it/sec) -training >> step=1977400, episode=330 reward=0.8009732 (491.39 it/sec) -training >> step=1977500, episode=330 reward=0.7417898 (469.11 it/sec) -training >> step=1977600, episode=330 reward=0.7570608 (543.25 it/sec) -training >> step=1977700, episode=330 reward=0.7689769 (516.87 it/sec) -training >> step=1977800, episode=330 reward=0.7735935 (494.82 it/sec) -training >> step=1977900, episode=330 reward=0.7442135 (534.15 it/sec) -training >> step=1978000, episode=330 reward=0.7442517 (509.11 it/sec) -training >> step=1978100, episode=330 reward=0.7689334 (510.35 it/sec) -training >> step=1978200, episode=330 reward=0.7639641 (536.98 it/sec) -training >> step=1978300, episode=330 reward=0.7476522 (379.95 it/sec) -training >> step=1978400, episode=330 reward=0.7573982 (511.50 it/sec) -training >> step=1978500, episode=330 reward=0.7348709 (493.24 it/sec) -training >> step=1978600, episode=330 reward=0.7564692 (536.95 it/sec) -training >> step=1978700, episode=330 reward=0.748386 (519.50 it/sec) -training >> step=1978800, episode=330 reward=0.7569675 (534.58 it/sec) -training >> step=1978900, episode=330 reward=0.7559695 (484.10 it/sec) -training >> step=1979000, episode=330 reward=0.7626862 (516.12 it/sec) -training >> step=1979100, episode=330 reward=0.7452173 (475.13 it/sec) -training >> step=1979200, episode=330 reward=0.7399701 (517.76 it/sec) -training >> step=1979300, episode=331 reward=0.7297862 (165.53 it/sec) -training >> step=1979400, episode=331 reward=0.7341026 (498.08 it/sec) -training >> step=1979500, episode=331 reward=0.763774 (480.47 it/sec) -training >> step=1979600, episode=331 reward=0.7662168 (494.96 it/sec) -training >> step=1979700, episode=331 reward=0.7435284 (485.39 it/sec) -training >> step=1979800, episode=331 reward=0.7787597 (526.98 it/sec) -training >> step=1979900, episode=331 reward=0.7448892 (476.29 it/sec) -training >> step=1980000, episode=331 reward=0.7653549 (533.49 it/sec) -training >> step=1980100, episode=331 reward=0.7651687 (547.79 it/sec) -training >> step=1980200, episode=331 reward=0.7680674 (503.11 it/sec) -training >> step=1980300, episode=331 reward=0.7780641 (496.71 it/sec) -training >> step=1980400, episode=331 reward=0.7792494 (489.81 it/sec) -training >> step=1980500, episode=331 reward=0.7724567 (514.62 it/sec) -training >> step=1980600, episode=331 reward=0.7560949 (551.02 it/sec) -training >> step=1980700, episode=331 reward=0.7929091 (501.70 it/sec) -training >> step=1980800, episode=331 reward=0.7677779 (550.36 it/sec) -training >> step=1980900, episode=331 reward=0.7636724 (493.20 it/sec) -training >> step=1981000, episode=331 reward=0.7475252 (500.73 it/sec) -training >> step=1981100, episode=331 reward=0.749705 (539.15 it/sec) -training >> step=1981200, episode=331 reward=0.7518508 (551.87 it/sec) -training >> step=1981300, episode=331 reward=0.7662616 (518.69 it/sec) -training >> step=1981400, episode=331 reward=0.7551149 (490.44 it/sec) -training >> step=1981500, episode=331 reward=0.7702563 (491.27 it/sec) -training >> step=1981600, episode=331 reward=0.7821605 (535.22 it/sec) -training >> step=1981700, episode=331 reward=0.776291 (521.94 it/sec) -training >> step=1981800, episode=331 reward=0.7560634 (524.08 it/sec) -training >> step=1981900, episode=331 reward=0.743811 (551.52 it/sec) -training >> step=1982000, episode=331 reward=0.7544768 (507.04 it/sec) -training >> step=1982100, episode=331 reward=0.7506229 (521.98 it/sec) -training >> step=1982200, episode=331 reward=0.7726958 (525.40 it/sec) -training >> step=1982300, episode=331 reward=0.7633866 (511.70 it/sec) -training >> step=1982400, episode=331 reward=0.7672772 (532.50 it/sec) -training >> step=1982500, episode=331 reward=0.740097 (501.29 it/sec) -training >> step=1982600, episode=331 reward=0.7554147 (496.91 it/sec) -training >> step=1982700, episode=331 reward=0.7602432 (487.75 it/sec) -training >> step=1982800, episode=331 reward=0.7636471 (531.04 it/sec) -training >> step=1982900, episode=331 reward=0.7606556 (523.13 it/sec) -training >> step=1983000, episode=331 reward=0.7770038 (517.54 it/sec) -training >> step=1983100, episode=331 reward=0.7679365 (519.77 it/sec) -training >> step=1983200, episode=331 reward=0.7716293 (518.14 it/sec) -training >> step=1983300, episode=331 reward=0.7696183 (510.81 it/sec) -training >> step=1983400, episode=331 reward=0.7688273 (554.69 it/sec) -training >> step=1983500, episode=331 reward=0.7485722 (494.36 it/sec) -training >> step=1983600, episode=331 reward=0.766017 (501.99 it/sec) -training >> step=1983700, episode=331 reward=0.7630792 (536.51 it/sec) -training >> step=1983800, episode=331 reward=0.7834579 (565.36 it/sec) -training >> step=1983900, episode=331 reward=0.7757347 (518.10 it/sec) -training >> step=1984000, episode=331 reward=0.7670688 (532.69 it/sec) -training >> step=1984100, episode=331 reward=0.7547086 (526.69 it/sec) -training >> step=1984200, episode=331 reward=0.7607585 (550.14 it/sec) -training >> step=1984300, episode=331 reward=0.7635409 (535.24 it/sec) -training >> step=1984400, episode=331 reward=0.7660118 (510.50 it/sec) -training >> step=1984500, episode=331 reward=0.7507716 (387.41 it/sec) -training >> step=1984600, episode=331 reward=0.7523016 (501.40 it/sec) -training >> step=1984700, episode=331 reward=0.7607465 (526.72 it/sec) -training >> step=1984800, episode=331 reward=0.7703764 (515.20 it/sec) -training >> step=1984900, episode=331 reward=0.7556481 (540.42 it/sec) -training >> step=1985000, episode=331 reward=0.7448519 (520.78 it/sec) -training >> step=1985100, episode=331 reward=0.7617125 (501.88 it/sec) -training >> step=1985200, episode=331 reward=0.7598974 (512.62 it/sec) -training >> step=1985300, episode=332 reward=0.7451804 (162.77 it/sec) -training >> step=1985400, episode=332 reward=0.7506954 (519.24 it/sec) -training >> step=1985500, episode=332 reward=0.7338791 (478.57 it/sec) -training >> step=1985600, episode=332 reward=0.7409301 (487.41 it/sec) -training >> step=1985700, episode=332 reward=0.7671125 (510.44 it/sec) -training >> step=1985800, episode=332 reward=0.7383102 (544.04 it/sec) -training >> step=1985900, episode=332 reward=0.7529792 (492.82 it/sec) -training >> step=1986000, episode=332 reward=0.7546064 (510.79 it/sec) -training >> step=1986100, episode=332 reward=0.7694255 (538.95 it/sec) -training >> step=1986200, episode=332 reward=0.7524999 (531.10 it/sec) -training >> step=1986300, episode=332 reward=0.773425 (525.95 it/sec) -training >> step=1986400, episode=332 reward=0.7810202 (516.53 it/sec) -training >> step=1986500, episode=332 reward=0.7638571 (531.48 it/sec) -training >> step=1986600, episode=332 reward=0.7623912 (532.53 it/sec) -training >> step=1986700, episode=332 reward=0.7657796 (530.91 it/sec) -training >> step=1986800, episode=332 reward=0.761286 (523.16 it/sec) -training >> step=1986900, episode=332 reward=0.7634887 (499.95 it/sec) -training >> step=1987000, episode=332 reward=0.7523237 (512.80 it/sec) -training >> step=1987100, episode=332 reward=0.7629526 (512.32 it/sec) -training >> step=1987200, episode=332 reward=0.7476423 (488.95 it/sec) -training >> step=1987300, episode=332 reward=0.7663944 (499.84 it/sec) -training >> step=1987400, episode=332 reward=0.762715 (486.32 it/sec) -training >> step=1987500, episode=332 reward=0.7614576 (488.62 it/sec) -training >> step=1987600, episode=332 reward=0.7502311 (443.80 it/sec) -training >> step=1987700, episode=332 reward=0.7518165 (501.09 it/sec) -training >> step=1987800, episode=332 reward=0.7923498 (556.46 it/sec) -training >> step=1987900, episode=332 reward=0.7648664 (551.53 it/sec) -training >> step=1988000, episode=332 reward=0.7620304 (520.66 it/sec) -training >> step=1988100, episode=332 reward=0.7505854 (505.44 it/sec) -training >> step=1988200, episode=332 reward=0.7694049 (483.03 it/sec) -training >> step=1988300, episode=332 reward=0.7904578 (519.95 it/sec) -training >> step=1988400, episode=332 reward=0.7815476 (486.56 it/sec) -training >> step=1988500, episode=332 reward=0.759549 (523.16 it/sec) -training >> step=1988600, episode=332 reward=0.7364181 (503.19 it/sec) -training >> step=1988700, episode=332 reward=0.7507975 (505.30 it/sec) -training >> step=1988800, episode=332 reward=0.7683724 (554.90 it/sec) -training >> step=1988900, episode=332 reward=0.7680797 (489.24 it/sec) -training >> step=1989000, episode=332 reward=0.775463 (488.26 it/sec) -training >> step=1989100, episode=332 reward=0.7644643 (533.04 it/sec) -training >> step=1989200, episode=332 reward=0.7643442 (496.47 it/sec) -training >> step=1989300, episode=332 reward=0.7710074 (525.22 it/sec) -training >> step=1989400, episode=332 reward=0.7791566 (518.89 it/sec) -training >> step=1989500, episode=332 reward=0.7639037 (537.28 it/sec) -training >> step=1989600, episode=332 reward=0.7588297 (543.84 it/sec) -training >> step=1989700, episode=332 reward=0.7772437 (551.09 it/sec) -training >> step=1989800, episode=332 reward=0.7492553 (491.56 it/sec) -training >> step=1989900, episode=332 reward=0.7544076 (411.50 it/sec) -training >> step=1990000, episode=332 reward=0.7639486 (396.37 it/sec) -training >> step=1990100, episode=332 reward=0.7783762 (488.88 it/sec) -training >> step=1990200, episode=332 reward=0.7882575 (438.29 it/sec) -training >> step=1990300, episode=332 reward=0.7631313 (487.65 it/sec) -training >> step=1990400, episode=332 reward=0.7556063 (526.77 it/sec) -training >> step=1990500, episode=332 reward=0.7715514 (550.03 it/sec) -training >> step=1990600, episode=332 reward=0.7546858 (354.35 it/sec) -training >> step=1990700, episode=332 reward=0.7562115 (438.16 it/sec) -training >> step=1990800, episode=332 reward=0.7485707 (357.47 it/sec) -training >> step=1990900, episode=332 reward=0.7467524 (384.49 it/sec) -training >> step=1991000, episode=332 reward=0.7822226 (501.57 it/sec) -training >> step=1991100, episode=332 reward=0.7482818 (493.40 it/sec) -training >> step=1991200, episode=332 reward=0.7576499 (428.72 it/sec) -training >> step=1991300, episode=333 reward=0.7539243 (185.54 it/sec) -training >> step=1991400, episode=333 reward=0.7582443 (450.25 it/sec) -training >> step=1991500, episode=333 reward=0.7451149 (531.60 it/sec) -training >> step=1991600, episode=333 reward=0.7558978 (512.31 it/sec) -training >> step=1991700, episode=333 reward=0.7552088 (479.92 it/sec) -training >> step=1991800, episode=333 reward=0.7703955 (525.03 it/sec) -training >> step=1991900, episode=333 reward=0.7509755 (529.36 it/sec) -training >> step=1992000, episode=333 reward=0.7777389 (546.79 it/sec) -training >> step=1992100, episode=333 reward=0.7472076 (503.26 it/sec) -training >> step=1992200, episode=333 reward=0.7607952 (518.88 it/sec) -training >> step=1992300, episode=333 reward=0.7686563 (548.40 it/sec) -training >> step=1992400, episode=333 reward=0.7647284 (479.60 it/sec) -training >> step=1992500, episode=333 reward=0.7908437 (521.94 it/sec) -training >> step=1992600, episode=333 reward=0.7533481 (546.43 it/sec) -training >> step=1992700, episode=333 reward=0.777248 (547.20 it/sec) -training >> step=1992800, episode=333 reward=0.7508991 (522.95 it/sec) -training >> step=1992900, episode=333 reward=0.747501 (540.25 it/sec) -training >> step=1993000, episode=333 reward=0.7529044 (543.99 it/sec) -training >> step=1993100, episode=333 reward=0.774731 (565.14 it/sec) -training >> step=1993200, episode=333 reward=0.7550333 (549.35 it/sec) -training >> step=1993300, episode=333 reward=0.7757917 (514.40 it/sec) -training >> step=1993400, episode=333 reward=0.7827125 (578.34 it/sec) -training >> step=1993500, episode=333 reward=0.7655895 (555.73 it/sec) -training >> step=1993600, episode=333 reward=0.7733079 (509.82 it/sec) -training >> step=1993700, episode=333 reward=0.7601106 (580.66 it/sec) -training >> step=1993800, episode=333 reward=0.7647865 (557.64 it/sec) -training >> step=1993900, episode=333 reward=0.7748654 (540.27 it/sec) -training >> step=1994000, episode=333 reward=0.7539971 (532.30 it/sec) -training >> step=1994100, episode=333 reward=0.7838012 (553.68 it/sec) -training >> step=1994200, episode=333 reward=0.759458 (545.86 it/sec) -training >> step=1994300, episode=333 reward=0.7714345 (492.59 it/sec) -training >> step=1994400, episode=333 reward=0.7941617 (486.17 it/sec) -training >> step=1994500, episode=333 reward=0.7489274 (385.79 it/sec) -training >> step=1994600, episode=333 reward=0.7604629 (395.83 it/sec) -training >> step=1994700, episode=333 reward=0.763109 (480.45 it/sec) -training >> step=1994800, episode=333 reward=0.7674388 (471.40 it/sec) -training >> step=1994900, episode=333 reward=0.7834784 (514.14 it/sec) -training >> step=1995000, episode=333 reward=0.7574112 (504.65 it/sec) -training >> step=1995100, episode=333 reward=0.7604543 (504.28 it/sec) -training >> step=1995200, episode=333 reward=0.7699758 (494.42 it/sec) -training >> step=1995300, episode=333 reward=0.7786241 (559.40 it/sec) -training >> step=1995400, episode=333 reward=0.7570764 (531.43 it/sec) -training >> step=1995500, episode=333 reward=0.7590367 (543.39 it/sec) -training >> step=1995600, episode=333 reward=0.7813105 (495.65 it/sec) -training >> step=1995700, episode=333 reward=0.7785692 (569.59 it/sec) -training >> step=1995800, episode=333 reward=0.7777058 (520.23 it/sec) -training >> step=1995900, episode=333 reward=0.768876 (517.44 it/sec) -training >> step=1996000, episode=333 reward=0.7704493 (536.38 it/sec) -training >> step=1996100, episode=333 reward=0.7852462 (574.08 it/sec) -training >> step=1996200, episode=333 reward=0.7673213 (504.89 it/sec) -training >> step=1996300, episode=333 reward=0.7543559 (516.13 it/sec) -training >> step=1996400, episode=333 reward=0.7679371 (539.05 it/sec) -training >> step=1996500, episode=333 reward=0.7582672 (557.62 it/sec) -training >> step=1996600, episode=333 reward=0.7730476 (540.07 it/sec) -training >> step=1996700, episode=333 reward=0.7597677 (550.18 it/sec) -training >> step=1996800, episode=333 reward=0.7652953 (506.83 it/sec) -training >> step=1996900, episode=333 reward=0.7730666 (509.56 it/sec) -training >> step=1997000, episode=333 reward=0.7437773 (405.16 it/sec) -training >> step=1997100, episode=333 reward=0.7574705 (522.54 it/sec) -training >> step=1997200, episode=333 reward=0.7582856 (508.36 it/sec) -training >> step=1997300, episode=334 reward=0.7669247 (210.16 it/sec) -training >> step=1997400, episode=334 reward=0.7557298 (518.04 it/sec) -training >> step=1997500, episode=334 reward=0.7557486 (507.25 it/sec) -training >> step=1997600, episode=334 reward=0.7293959 (544.35 it/sec) -training >> step=1997700, episode=334 reward=0.7533913 (537.22 it/sec) -training >> step=1997800, episode=334 reward=0.789147 (484.07 it/sec) -training >> step=1997900, episode=334 reward=0.7590656 (543.74 it/sec) -training >> step=1998000, episode=334 reward=0.7649196 (497.81 it/sec) -training >> step=1998100, episode=334 reward=0.7739965 (539.64 it/sec) -training >> step=1998200, episode=334 reward=0.7650438 (571.43 it/sec) -training >> step=1998300, episode=334 reward=0.7501158 (543.50 it/sec) -training >> step=1998400, episode=334 reward=0.7879955 (518.37 it/sec) -training >> step=1998500, episode=334 reward=0.7570603 (531.96 it/sec) -training >> step=1998600, episode=334 reward=0.7607073 (536.72 it/sec) -training >> step=1998700, episode=334 reward=0.741581 (553.45 it/sec) -training >> step=1998800, episode=334 reward=0.748872 (529.10 it/sec) -training >> step=1998900, episode=334 reward=0.7560802 (533.77 it/sec) -training >> step=1999000, episode=334 reward=0.7665658 (562.80 it/sec) -training >> step=1999100, episode=334 reward=0.7678846 (552.95 it/sec) -training >> step=1999200, episode=334 reward=0.7766699 (537.59 it/sec) -training >> step=1999300, episode=334 reward=0.7578912 (553.04 it/sec) -training >> step=1999400, episode=334 reward=0.7561187 (574.72 it/sec) -training >> step=1999500, episode=334 reward=0.7836056 (524.27 it/sec) -training >> step=1999600, episode=334 reward=0.7799742 (494.39 it/sec) -training >> step=1999700, episode=334 reward=0.7728048 (524.32 it/sec) -training >> step=1999800, episode=334 reward=0.76636 (538.70 it/sec) -training >> step=1999900, episode=334 reward=0.7619962 (528.39 it/sec) -training >> step=2000000, episode=334 reward=0.7530558 (531.61 it/sec) -training >> step=2000100, episode=334 reward=0.7584579 (554.33 it/sec) -training >> step=2000200, episode=334 reward=0.7706257 (543.66 it/sec) -training >> step=2000300, episode=334 reward=0.7755489 (531.02 it/sec) -training >> step=2000400, episode=334 reward=0.7450189 (504.17 it/sec) -training >> step=2000500, episode=334 reward=0.7545674 (591.39 it/sec) -training >> step=2000600, episode=334 reward=0.7324308 (521.91 it/sec) -training >> step=2000700, episode=334 reward=0.766915 (567.71 it/sec) -training >> step=2000800, episode=334 reward=0.742906 (518.05 it/sec) -training >> step=2000900, episode=334 reward=0.7654248 (558.72 it/sec) -training >> step=2001000, episode=334 reward=0.7746609 (531.59 it/sec) -training >> step=2001100, episode=334 reward=0.771184 (523.25 it/sec) -training >> step=2001200, episode=334 reward=0.7553085 (527.90 it/sec) -training >> step=2001300, episode=334 reward=0.7728848 (564.21 it/sec) -training >> step=2001400, episode=334 reward=0.7587209 (507.67 it/sec) -training >> step=2001500, episode=334 reward=0.7587177 (511.07 it/sec) -training >> step=2001600, episode=334 reward=0.7722782 (556.52 it/sec) -training >> step=2001700, episode=334 reward=0.7738624 (565.80 it/sec) -training >> step=2001800, episode=334 reward=0.7652916 (557.47 it/sec) -training >> step=2001900, episode=334 reward=0.7778797 (524.10 it/sec) -training >> step=2002000, episode=334 reward=0.7507825 (485.60 it/sec) -training >> step=2002100, episode=334 reward=0.7604281 (530.80 it/sec) -training >> step=2002200, episode=334 reward=0.7834523 (516.27 it/sec) -training >> step=2002300, episode=334 reward=0.770529 (524.85 it/sec) -training >> step=2002400, episode=334 reward=0.768249 (564.42 it/sec) -training >> step=2002500, episode=334 reward=0.7855049 (540.38 it/sec) -training >> step=2002600, episode=334 reward=0.7539313 (530.99 it/sec) -training >> step=2002700, episode=334 reward=0.7608914 (570.94 it/sec) -training >> step=2002800, episode=334 reward=0.7607228 (533.87 it/sec) -training >> step=2002900, episode=334 reward=0.7706102 (516.04 it/sec) -training >> step=2003000, episode=334 reward=0.7574778 (527.33 it/sec) -training >> step=2003100, episode=334 reward=0.7650244 (536.31 it/sec) -training >> step=2003200, episode=334 reward=0.777101 (375.64 it/sec) -training >> step=2003300, episode=335 reward=0.7700554 (143.13 it/sec) -training >> step=2003400, episode=335 reward=0.7526888 (504.26 it/sec) -training >> step=2003500, episode=335 reward=0.7295638 (534.24 it/sec) -training >> step=2003600, episode=335 reward=0.755835 (552.79 it/sec) -training >> step=2003700, episode=335 reward=0.7525892 (510.45 it/sec) -training >> step=2003800, episode=335 reward=0.7756021 (522.57 it/sec) -training >> step=2003900, episode=335 reward=0.7609475 (531.10 it/sec) -training >> step=2004000, episode=335 reward=0.7991914 (588.90 it/sec) -training >> step=2004100, episode=335 reward=0.7424591 (544.55 it/sec) -training >> step=2004200, episode=335 reward=0.7633809 (536.67 it/sec) -training >> step=2004300, episode=335 reward=0.7683814 (489.10 it/sec) -training >> step=2004400, episode=335 reward=0.7960588 (564.08 it/sec) -training >> step=2004500, episode=335 reward=0.7564148 (519.43 it/sec) -training >> step=2004600, episode=335 reward=0.7584746 (542.51 it/sec) -training >> step=2004700, episode=335 reward=0.7505219 (545.41 it/sec) -training >> step=2004800, episode=335 reward=0.7504144 (535.49 it/sec) -training >> step=2004900, episode=335 reward=0.7760199 (522.47 it/sec) -training >> step=2005000, episode=335 reward=0.7885251 (543.41 it/sec) -training >> step=2005100, episode=335 reward=0.751306 (589.96 it/sec) -training >> step=2005200, episode=335 reward=0.7714617 (542.29 it/sec) -training >> step=2005300, episode=335 reward=0.7618678 (544.99 it/sec) -training >> step=2005400, episode=335 reward=0.7550822 (535.54 it/sec) -training >> step=2005500, episode=335 reward=0.7655683 (542.31 it/sec) -training >> step=2005600, episode=335 reward=0.771782 (538.69 it/sec) -training >> step=2005700, episode=335 reward=0.7811785 (546.53 it/sec) -training >> step=2005800, episode=335 reward=0.7600092 (574.01 it/sec) -training >> step=2005900, episode=335 reward=0.7693726 (491.09 it/sec) -training >> step=2006000, episode=335 reward=0.7550089 (495.37 it/sec) -training >> step=2006100, episode=335 reward=0.768546 (465.93 it/sec) -training >> step=2006200, episode=335 reward=0.7636807 (493.26 it/sec) -training >> step=2006300, episode=335 reward=0.7471626 (538.13 it/sec) -training >> step=2006400, episode=335 reward=0.7601253 (474.47 it/sec) -training >> step=2006500, episode=335 reward=0.7635801 (534.76 it/sec) -training >> step=2006600, episode=335 reward=0.7642584 (557.23 it/sec) -training >> step=2006700, episode=335 reward=0.7493458 (522.20 it/sec) -training >> step=2006800, episode=335 reward=0.7613118 (537.50 it/sec) -training >> step=2006900, episode=335 reward=0.7498182 (570.31 it/sec) -training >> step=2007000, episode=335 reward=0.7556973 (565.89 it/sec) -training >> step=2007100, episode=335 reward=0.7775878 (492.07 it/sec) -training >> step=2007200, episode=335 reward=0.7628953 (472.19 it/sec) -training >> step=2007300, episode=335 reward=0.7720926 (546.14 it/sec) -training >> step=2007400, episode=335 reward=0.771118 (483.43 it/sec) -training >> step=2007500, episode=335 reward=0.7264664 (463.61 it/sec) -training >> step=2007600, episode=335 reward=0.7506179 (531.71 it/sec) -training >> step=2007700, episode=335 reward=0.7594883 (541.66 it/sec) -training >> step=2007800, episode=335 reward=0.7965605 (488.32 it/sec) -training >> step=2007900, episode=335 reward=0.7577795 (504.88 it/sec) -training >> step=2008000, episode=335 reward=0.7620318 (480.25 it/sec) -training >> step=2008100, episode=335 reward=0.7664884 (465.65 it/sec) -training >> step=2008200, episode=335 reward=0.7723397 (542.02 it/sec) -training >> step=2008300, episode=335 reward=0.7546872 (445.49 it/sec) -training >> step=2008400, episode=335 reward=0.7568533 (501.02 it/sec) -training >> step=2008500, episode=335 reward=0.7623915 (506.63 it/sec) -training >> step=2008600, episode=335 reward=0.7697346 (491.90 it/sec) -training >> step=2008700, episode=335 reward=0.775979 (530.74 it/sec) -training >> step=2008800, episode=335 reward=0.7759584 (526.83 it/sec) -training >> step=2008900, episode=335 reward=0.7388648 (522.87 it/sec) -training >> step=2009000, episode=335 reward=0.7756527 (522.18 it/sec) -training >> step=2009100, episode=335 reward=0.7588808 (505.73 it/sec) -training >> step=2009200, episode=335 reward=0.7767356 (313.26 it/sec) -training >> step=2009300, episode=336 reward=0.7553925 (168.09 it/sec) -training >> step=2009400, episode=336 reward=0.7787746 (366.97 it/sec) -training >> step=2009500, episode=336 reward=0.7426444 (330.17 it/sec) -training >> step=2009600, episode=336 reward=0.7326725 (330.64 it/sec) -training >> step=2009700, episode=336 reward=0.7248565 (355.07 it/sec) -training >> step=2009800, episode=336 reward=0.7657077 (350.98 it/sec) -training >> step=2009900, episode=336 reward=0.7759569 (433.73 it/sec) -training >> step=2010000, episode=336 reward=0.7491288 (510.52 it/sec) -training >> step=2010100, episode=336 reward=0.7667096 (519.33 it/sec) -training >> step=2010200, episode=336 reward=0.7426758 (493.43 it/sec) -training >> step=2010300, episode=336 reward=0.7678497 (488.04 it/sec) -training >> step=2010400, episode=336 reward=0.7722353 (518.45 it/sec) -training >> step=2010500, episode=336 reward=0.7653409 (524.83 it/sec) -training >> step=2010600, episode=336 reward=0.7643426 (501.29 it/sec) -training >> step=2010700, episode=336 reward=0.7728536 (505.29 it/sec) -training >> step=2010800, episode=336 reward=0.7634774 (515.90 it/sec) -training >> step=2010900, episode=336 reward=0.7820938 (547.98 it/sec) -training >> step=2011000, episode=336 reward=0.7677089 (514.37 it/sec) -training >> step=2011100, episode=336 reward=0.7530043 (516.63 it/sec) -training >> step=2011200, episode=336 reward=0.7835461 (518.45 it/sec) -training >> step=2011300, episode=336 reward=0.7521918 (550.87 it/sec) -training >> step=2011400, episode=336 reward=0.7553428 (531.68 it/sec) -training >> step=2011500, episode=336 reward=0.7751618 (526.52 it/sec) -training >> step=2011600, episode=336 reward=0.7506461 (574.85 it/sec) -training >> step=2011700, episode=336 reward=0.776498 (483.27 it/sec) -training >> step=2011800, episode=336 reward=0.7698535 (507.31 it/sec) -training >> step=2011900, episode=336 reward=0.8009933 (520.20 it/sec) -training >> step=2012000, episode=336 reward=0.7406548 (537.05 it/sec) -training >> step=2012100, episode=336 reward=0.7538893 (542.58 it/sec) -training >> step=2012200, episode=336 reward=0.751937 (496.88 it/sec) -training >> step=2012300, episode=336 reward=0.7704879 (478.06 it/sec) -training >> step=2012400, episode=336 reward=0.7501895 (466.78 it/sec) -training >> step=2012500, episode=336 reward=0.7587973 (516.53 it/sec) -training >> step=2012600, episode=336 reward=0.7487801 (529.16 it/sec) -training >> step=2012700, episode=336 reward=0.747876 (452.02 it/sec) -training >> step=2012800, episode=336 reward=0.7570152 (524.68 it/sec) -training >> step=2012900, episode=336 reward=0.7605678 (510.47 it/sec) -training >> step=2013000, episode=336 reward=0.7673995 (506.15 it/sec) -training >> step=2013100, episode=336 reward=0.7713851 (515.61 it/sec) -training >> step=2013200, episode=336 reward=0.7498658 (559.27 it/sec) -training >> step=2013300, episode=336 reward=0.767019 (484.89 it/sec) -training >> step=2013400, episode=336 reward=0.7707287 (513.56 it/sec) -training >> step=2013500, episode=336 reward=0.7619335 (509.77 it/sec) -training >> step=2013600, episode=336 reward=0.7725694 (529.90 it/sec) -training >> step=2013700, episode=336 reward=0.7735607 (552.12 it/sec) -training >> step=2013800, episode=336 reward=0.7697587 (523.88 it/sec) -training >> step=2013900, episode=336 reward=0.7658333 (465.88 it/sec) -training >> step=2014000, episode=336 reward=0.7519777 (520.73 it/sec) -training >> step=2014100, episode=336 reward=0.7589948 (562.21 it/sec) -training >> step=2014200, episode=336 reward=0.7639122 (556.99 it/sec) -training >> step=2014300, episode=336 reward=0.7502444 (526.31 it/sec) -training >> step=2014400, episode=336 reward=0.7718123 (535.87 it/sec) -training >> step=2014500, episode=336 reward=0.7723169 (529.36 it/sec) -training >> step=2014600, episode=336 reward=0.7704076 (540.92 it/sec) -training >> step=2014700, episode=336 reward=0.76495 (554.18 it/sec) -training >> step=2014800, episode=336 reward=0.7634776 (520.03 it/sec) -training >> step=2014900, episode=336 reward=0.7596095 (510.88 it/sec) -training >> step=2015000, episode=336 reward=0.7823366 (507.69 it/sec) -training >> step=2015100, episode=336 reward=0.7552095 (545.03 it/sec) -training >> step=2015200, episode=336 reward=0.7678542 (523.90 it/sec) -training >> step=2015300, episode=337 reward=0.7526557 (152.94 it/sec) -training >> step=2015400, episode=337 reward=0.7491221 (560.06 it/sec) -training >> step=2015500, episode=337 reward=0.7429524 (544.52 it/sec) -training >> step=2015600, episode=337 reward=0.7658944 (474.75 it/sec) -training >> step=2015700, episode=337 reward=0.7676001 (461.52 it/sec) -training >> step=2015800, episode=337 reward=0.773181 (535.26 it/sec) -training >> step=2015900, episode=337 reward=0.7744624 (541.72 it/sec) -training >> step=2016000, episode=337 reward=0.7483721 (537.24 it/sec) -training >> step=2016100, episode=337 reward=0.7586989 (557.75 it/sec) -training >> step=2016200, episode=337 reward=0.7924545 (498.93 it/sec) -training >> step=2016300, episode=337 reward=0.7466257 (502.48 it/sec) -training >> step=2016400, episode=337 reward=0.7597504 (562.69 it/sec) -training >> step=2016500, episode=337 reward=0.7496117 (553.29 it/sec) -training >> step=2016600, episode=337 reward=0.7704882 (550.85 it/sec) -training >> step=2016700, episode=337 reward=0.7586872 (567.50 it/sec) -training >> step=2016800, episode=337 reward=0.7691496 (520.53 it/sec) -training >> step=2016900, episode=337 reward=0.7643341 (545.75 it/sec) -training >> step=2017000, episode=337 reward=0.7718174 (589.48 it/sec) -training >> step=2017100, episode=337 reward=0.7362496 (530.42 it/sec) -training >> step=2017200, episode=337 reward=0.7578622 (543.55 it/sec) -training >> step=2017300, episode=337 reward=0.7835416 (542.81 it/sec) -training >> step=2017400, episode=337 reward=0.7605532 (499.51 it/sec) -training >> step=2017500, episode=337 reward=0.753313 (512.87 it/sec) -training >> step=2017600, episode=337 reward=0.7764073 (528.65 it/sec) -training >> step=2017700, episode=337 reward=0.7794036 (532.85 it/sec) -training >> step=2017800, episode=337 reward=0.7541699 (556.32 it/sec) -training >> step=2017900, episode=337 reward=0.7597842 (523.63 it/sec) -training >> step=2018000, episode=337 reward=0.7463426 (513.19 it/sec) -training >> step=2018100, episode=337 reward=0.7798418 (521.43 it/sec) -training >> step=2018200, episode=337 reward=0.7548394 (541.29 it/sec) -training >> step=2018300, episode=337 reward=0.7687652 (511.28 it/sec) -training >> step=2018400, episode=337 reward=0.7491322 (510.20 it/sec) -training >> step=2018500, episode=337 reward=0.772528 (511.36 it/sec) -training >> step=2018600, episode=337 reward=0.7502304 (582.64 it/sec) -training >> step=2018700, episode=337 reward=0.7390966 (500.34 it/sec) -training >> step=2018800, episode=337 reward=0.7634311 (543.51 it/sec) -training >> step=2018900, episode=337 reward=0.7670531 (517.11 it/sec) -training >> step=2019000, episode=337 reward=0.7617859 (412.68 it/sec) -training >> step=2019100, episode=337 reward=0.7682689 (452.40 it/sec) -training >> step=2019200, episode=337 reward=0.7689567 (460.21 it/sec) -training >> step=2019300, episode=337 reward=0.7591531 (496.37 it/sec) -training >> step=2019400, episode=337 reward=0.7818202 (542.20 it/sec) -training >> step=2019500, episode=337 reward=0.7596894 (487.76 it/sec) -training >> step=2019600, episode=337 reward=0.7777556 (533.17 it/sec) -training >> step=2019700, episode=337 reward=0.7717343 (526.64 it/sec) -training >> step=2019800, episode=337 reward=0.7682214 (506.93 it/sec) -training >> step=2019900, episode=337 reward=0.7587375 (517.04 it/sec) -training >> step=2020000, episode=337 reward=0.7808678 (534.39 it/sec) -training >> step=2020100, episode=337 reward=0.7589245 (587.73 it/sec) -training >> step=2020200, episode=337 reward=0.740888 (525.36 it/sec) -training >> step=2020300, episode=337 reward=0.7454831 (567.60 it/sec) -training >> step=2020400, episode=337 reward=0.7519788 (554.58 it/sec) -training >> step=2020500, episode=337 reward=0.7681561 (556.72 it/sec) -training >> step=2020600, episode=337 reward=0.7566064 (524.47 it/sec) -training >> step=2020700, episode=337 reward=0.7694837 (533.83 it/sec) -training >> step=2020800, episode=337 reward=0.7633047 (569.07 it/sec) -training >> step=2020900, episode=337 reward=0.7388256 (541.85 it/sec) -training >> step=2021000, episode=337 reward=0.7644586 (465.07 it/sec) -training >> step=2021100, episode=337 reward=0.7696067 (491.23 it/sec) -training >> step=2021200, episode=337 reward=0.7615964 (501.44 it/sec) -training >> step=2021300, episode=338 reward=0.7459494 (161.66 it/sec) -training >> step=2021400, episode=338 reward=0.7270548 (399.13 it/sec) -training >> step=2021500, episode=338 reward=0.7425135 (446.58 it/sec) -training >> step=2021600, episode=338 reward=0.7238775 (474.00 it/sec) -training >> step=2021700, episode=338 reward=0.7584928 (402.06 it/sec) -training >> step=2021800, episode=338 reward=0.7439486 (385.97 it/sec) -training >> step=2021900, episode=338 reward=0.7673103 (366.57 it/sec) -training >> step=2022000, episode=338 reward=0.7560726 (436.53 it/sec) -training >> step=2022100, episode=338 reward=0.7724906 (504.75 it/sec) -training >> step=2022200, episode=338 reward=0.7728665 (457.56 it/sec) -training >> step=2022300, episode=338 reward=0.7835121 (406.77 it/sec) -training >> step=2022400, episode=338 reward=0.7437131 (520.86 it/sec) -training >> step=2022500, episode=338 reward=0.758856 (486.17 it/sec) -training >> step=2022600, episode=338 reward=0.7645726 (431.78 it/sec) -training >> step=2022700, episode=338 reward=0.758548 (423.34 it/sec) -training >> step=2022800, episode=338 reward=0.7553545 (426.66 it/sec) -training >> step=2022900, episode=338 reward=0.7529206 (440.68 it/sec) -training >> step=2023000, episode=338 reward=0.7699113 (498.54 it/sec) -training >> step=2023100, episode=338 reward=0.750899 (505.94 it/sec) -training >> step=2023200, episode=338 reward=0.7791178 (450.00 it/sec) -training >> step=2023300, episode=338 reward=0.7650848 (511.94 it/sec) -training >> step=2023400, episode=338 reward=0.7478399 (438.92 it/sec) -training >> step=2023500, episode=338 reward=0.7844946 (488.76 it/sec) -training >> step=2023600, episode=338 reward=0.7839732 (503.54 it/sec) -training >> step=2023700, episode=338 reward=0.7795839 (542.63 it/sec) -training >> step=2023800, episode=338 reward=0.7308108 (492.59 it/sec) -training >> step=2023900, episode=338 reward=0.7813376 (441.48 it/sec) -training >> step=2024000, episode=338 reward=0.7612 (428.83 it/sec) -training >> step=2024100, episode=338 reward=0.7571656 (370.95 it/sec) -training >> step=2024200, episode=338 reward=0.7624521 (368.99 it/sec) -training >> step=2024300, episode=338 reward=0.7740055 (417.49 it/sec) -training >> step=2024400, episode=338 reward=0.7862337 (483.87 it/sec) -training >> step=2024500, episode=338 reward=0.7777874 (508.80 it/sec) -training >> step=2024600, episode=338 reward=0.7547659 (465.24 it/sec) -training >> step=2024700, episode=338 reward=0.7736189 (520.48 it/sec) -training >> step=2024800, episode=338 reward=0.754894 (533.65 it/sec) -training >> step=2024900, episode=338 reward=0.7561412 (543.51 it/sec) -training >> step=2025000, episode=338 reward=0.7588541 (489.87 it/sec) -training >> step=2025100, episode=338 reward=0.7808927 (493.15 it/sec) -training >> step=2025200, episode=338 reward=0.7741781 (518.35 it/sec) -training >> step=2025300, episode=338 reward=0.7605088 (500.49 it/sec) -training >> step=2025400, episode=338 reward=0.7727251 (508.27 it/sec) -training >> step=2025500, episode=338 reward=0.7289591 (484.69 it/sec) -training >> step=2025600, episode=338 reward=0.7575257 (515.03 it/sec) -training >> step=2025700, episode=338 reward=0.7584369 (487.08 it/sec) -training >> step=2025800, episode=338 reward=0.7623612 (524.63 it/sec) -training >> step=2025900, episode=338 reward=0.7709438 (548.07 it/sec) -training >> step=2026000, episode=338 reward=0.764176 (547.97 it/sec) -training >> step=2026100, episode=338 reward=0.7519497 (532.40 it/sec) -training >> step=2026200, episode=338 reward=0.742874 (498.51 it/sec) -training >> step=2026300, episode=338 reward=0.7736275 (531.00 it/sec) -training >> step=2026400, episode=338 reward=0.7855895 (566.15 it/sec) -training >> step=2026500, episode=338 reward=0.7649037 (528.35 it/sec) -training >> step=2026600, episode=338 reward=0.7537784 (550.38 it/sec) -training >> step=2026700, episode=338 reward=0.7425629 (519.95 it/sec) -training >> step=2026800, episode=338 reward=0.7568998 (492.84 it/sec) -training >> step=2026900, episode=338 reward=0.76584 (492.38 it/sec) -training >> step=2027000, episode=338 reward=0.7430454 (509.00 it/sec) -training >> step=2027100, episode=338 reward=0.7510492 (465.83 it/sec) -training >> step=2027200, episode=338 reward=0.7584439 (535.56 it/sec) -training >> step=2027300, episode=339 reward=0.750586 (144.90 it/sec) -training >> step=2027400, episode=339 reward=0.7611601 (487.57 it/sec) -training >> step=2027500, episode=339 reward=0.7488671 (505.49 it/sec) -training >> step=2027600, episode=339 reward=0.7493924 (521.14 it/sec) -training >> step=2027700, episode=339 reward=0.7669908 (545.57 it/sec) -training >> step=2027800, episode=339 reward=0.7607618 (546.34 it/sec) -training >> step=2027900, episode=339 reward=0.7711053 (536.19 it/sec) -training >> step=2028000, episode=339 reward=0.7837588 (573.98 it/sec) -training >> step=2028100, episode=339 reward=0.7352262 (555.34 it/sec) -training >> step=2028200, episode=339 reward=0.7716058 (567.48 it/sec) -training >> step=2028300, episode=339 reward=0.7472512 (541.37 it/sec) -training >> step=2028400, episode=339 reward=0.7754606 (526.09 it/sec) -training >> step=2028500, episode=339 reward=0.7630903 (555.09 it/sec) -training >> step=2028600, episode=339 reward=0.7478968 (487.13 it/sec) -training >> step=2028700, episode=339 reward=0.7686753 (469.70 it/sec) -training >> step=2028800, episode=339 reward=0.7524433 (513.57 it/sec) -training >> step=2028900, episode=339 reward=0.7392371 (431.14 it/sec) -training >> step=2029000, episode=339 reward=0.7588585 (415.42 it/sec) -training >> step=2029100, episode=339 reward=0.7374597 (462.21 it/sec) -training >> step=2029200, episode=339 reward=0.7631487 (501.08 it/sec) -training >> step=2029300, episode=339 reward=0.7715337 (492.89 it/sec) -training >> step=2029400, episode=339 reward=0.7551063 (498.73 it/sec) -training >> step=2029500, episode=339 reward=0.7577865 (508.66 it/sec) -training >> step=2029600, episode=339 reward=0.7648123 (518.80 it/sec) -training >> step=2029700, episode=339 reward=0.7589357 (522.84 it/sec) -training >> step=2029800, episode=339 reward=0.741615 (515.53 it/sec) -training >> step=2029900, episode=339 reward=0.7575083 (504.31 it/sec) -training >> step=2030000, episode=339 reward=0.7697673 (570.71 it/sec) -training >> step=2030100, episode=339 reward=0.7685344 (445.88 it/sec) -training >> step=2030200, episode=339 reward=0.7485681 (420.38 it/sec) -training >> step=2030300, episode=339 reward=0.7641147 (438.32 it/sec) -training >> step=2030400, episode=339 reward=0.7618348 (430.34 it/sec) -training >> step=2030500, episode=339 reward=0.7600063 (468.28 it/sec) -training >> step=2030600, episode=339 reward=0.7812083 (485.58 it/sec) -training >> step=2030700, episode=339 reward=0.7575223 (481.52 it/sec) -training >> step=2030800, episode=339 reward=0.763271 (516.52 it/sec) -training >> step=2030900, episode=339 reward=0.7783003 (490.67 it/sec) -training >> step=2031000, episode=339 reward=0.754262 (505.84 it/sec) -training >> step=2031100, episode=339 reward=0.7824092 (385.71 it/sec) -training >> step=2031200, episode=339 reward=0.752961 (458.17 it/sec) -training >> step=2031300, episode=339 reward=0.75271 (462.88 it/sec) -training >> step=2031400, episode=339 reward=0.7550783 (431.79 it/sec) -training >> step=2031500, episode=339 reward=0.767535 (514.70 it/sec) -training >> step=2031600, episode=339 reward=0.7434344 (520.00 it/sec) -training >> step=2031700, episode=339 reward=0.7665898 (481.23 it/sec) -training >> step=2031800, episode=339 reward=0.7533712 (458.42 it/sec) -training >> step=2031900, episode=339 reward=0.7808444 (389.14 it/sec) -training >> step=2032000, episode=339 reward=0.7820886 (424.06 it/sec) -training >> step=2032100, episode=339 reward=0.7412664 (417.97 it/sec) -training >> step=2032200, episode=339 reward=0.7572341 (438.86 it/sec) -training >> step=2032300, episode=339 reward=0.7707807 (424.53 it/sec) -training >> step=2032400, episode=339 reward=0.7587473 (484.71 it/sec) -training >> step=2032500, episode=339 reward=0.7623141 (448.80 it/sec) -training >> step=2032600, episode=339 reward=0.7723994 (389.37 it/sec) -training >> step=2032700, episode=339 reward=0.7637765 (363.94 it/sec) -training >> step=2032800, episode=339 reward=0.7482472 (356.15 it/sec) -training >> step=2032900, episode=339 reward=0.756278 (441.83 it/sec) -training >> step=2033000, episode=339 reward=0.7451557 (407.20 it/sec) -training >> step=2033100, episode=339 reward=0.7686403 (388.91 it/sec) -training >> step=2033200, episode=339 reward=0.7544226 (376.78 it/sec) -training >> step=2033300, episode=340 reward=0.7611195 (146.71 it/sec) -training >> step=2033400, episode=340 reward=0.7624697 (480.05 it/sec) -training >> step=2033500, episode=340 reward=0.7500402 (474.21 it/sec) -training >> step=2033600, episode=340 reward=0.7612083 (473.50 it/sec) -training >> step=2033700, episode=340 reward=0.7448513 (477.96 it/sec) -training >> step=2033800, episode=340 reward=0.7721415 (491.49 it/sec) -training >> step=2033900, episode=340 reward=0.7850376 (464.59 it/sec) -training >> step=2034000, episode=340 reward=0.7815844 (479.78 it/sec) -training >> step=2034100, episode=340 reward=0.7725547 (503.16 it/sec) -training >> step=2034200, episode=340 reward=0.7750706 (498.45 it/sec) -training >> step=2034300, episode=340 reward=0.7981981 (515.64 it/sec) -training >> step=2034400, episode=340 reward=0.7772262 (505.27 it/sec) -training >> step=2034500, episode=340 reward=0.7683253 (558.35 it/sec) -training >> step=2034600, episode=340 reward=0.7814694 (512.34 it/sec) -training >> step=2034700, episode=340 reward=0.7729126 (477.38 it/sec) -training >> step=2034800, episode=340 reward=0.7486239 (526.44 it/sec) -training >> step=2034900, episode=340 reward=0.7644758 (512.74 it/sec) -training >> step=2035000, episode=340 reward=0.7621191 (499.15 it/sec) -training >> step=2035100, episode=340 reward=0.7547582 (457.83 it/sec) -training >> step=2035200, episode=340 reward=0.7591174 (470.18 it/sec) -training >> step=2035300, episode=340 reward=0.7700935 (445.88 it/sec) -training >> step=2035400, episode=340 reward=0.7642553 (462.47 it/sec) -training >> step=2035500, episode=340 reward=0.7731937 (458.22 it/sec) -training >> step=2035600, episode=340 reward=0.7641492 (476.36 it/sec) -training >> step=2035700, episode=340 reward=0.7734405 (467.26 it/sec) -training >> step=2035800, episode=340 reward=0.786407 (459.30 it/sec) -training >> step=2035900, episode=340 reward=0.7563104 (441.84 it/sec) -training >> step=2036000, episode=340 reward=0.7620626 (483.36 it/sec) -training >> step=2036100, episode=340 reward=0.7585941 (482.84 it/sec) -training >> step=2036200, episode=340 reward=0.7674553 (467.15 it/sec) -training >> step=2036300, episode=340 reward=0.7460033 (471.24 it/sec) -training >> step=2036400, episode=340 reward=0.7504426 (447.55 it/sec) -training >> step=2036500, episode=340 reward=0.7596657 (466.50 it/sec) -training >> step=2036600, episode=340 reward=0.771962 (447.96 it/sec) -training >> step=2036700, episode=340 reward=0.7812015 (453.31 it/sec) -training >> step=2036800, episode=340 reward=0.754629 (470.20 it/sec) -training >> step=2036900, episode=340 reward=0.7636767 (468.03 it/sec) -training >> step=2037000, episode=340 reward=0.7524048 (456.87 it/sec) -training >> step=2037100, episode=340 reward=0.7745421 (429.23 it/sec) -training >> step=2037200, episode=340 reward=0.7782643 (457.49 it/sec) -training >> step=2037300, episode=340 reward=0.7820579 (449.79 it/sec) -training >> step=2037400, episode=340 reward=0.7743061 (499.15 it/sec) -training >> step=2037500, episode=340 reward=0.7695296 (446.04 it/sec) -training >> step=2037600, episode=340 reward=0.7618034 (380.88 it/sec) -training >> step=2037700, episode=340 reward=0.7545694 (402.09 it/sec) -training >> step=2037800, episode=340 reward=0.7715898 (447.30 it/sec) -training >> step=2037900, episode=340 reward=0.7636802 (462.80 it/sec) -training >> step=2038000, episode=340 reward=0.7645226 (504.85 it/sec) -training >> step=2038100, episode=340 reward=0.7490744 (436.39 it/sec) -training >> step=2038200, episode=340 reward=0.7423974 (408.97 it/sec) -training >> step=2038300, episode=340 reward=0.7655343 (416.70 it/sec) -training >> step=2038400, episode=340 reward=0.7805023 (471.94 it/sec) -training >> step=2038500, episode=340 reward=0.768358 (471.40 it/sec) -training >> step=2038600, episode=340 reward=0.7612556 (472.63 it/sec) -training >> step=2038700, episode=340 reward=0.7491561 (474.43 it/sec) -training >> step=2038800, episode=340 reward=0.7638395 (495.07 it/sec) -training >> step=2038900, episode=340 reward=0.7546846 (452.18 it/sec) -training >> step=2039000, episode=340 reward=0.7665573 (443.36 it/sec) -training >> step=2039100, episode=340 reward=0.7395752 (466.48 it/sec) -training >> step=2039200, episode=340 reward=0.7552003 (523.82 it/sec) -training >> step=2039300, episode=341 reward=0.7226059 (147.66 it/sec) -training >> step=2039400, episode=341 reward=0.756925 (536.28 it/sec) -training >> step=2039500, episode=341 reward=0.7335659 (520.03 it/sec) -training >> step=2039600, episode=341 reward=0.7645312 (517.84 it/sec) -training >> step=2039700, episode=341 reward=0.7531726 (539.71 it/sec) -training >> step=2039800, episode=341 reward=0.7555063 (540.01 it/sec) -training >> step=2039900, episode=341 reward=0.7847585 (534.52 it/sec) -training >> step=2040000, episode=341 reward=0.7691604 (524.25 it/sec) -training >> step=2040100, episode=341 reward=0.7695506 (523.74 it/sec) -training >> step=2040200, episode=341 reward=0.7758034 (521.20 it/sec) -training >> step=2040300, episode=341 reward=0.7579714 (540.98 it/sec) -training >> step=2040400, episode=341 reward=0.7558619 (584.70 it/sec) -training >> step=2040500, episode=341 reward=0.7887372 (532.19 it/sec) -training >> step=2040600, episode=341 reward=0.7469154 (516.77 it/sec) -training >> step=2040700, episode=341 reward=0.7915307 (538.29 it/sec) -training >> step=2040800, episode=341 reward=0.7818519 (490.11 it/sec) -training >> step=2040900, episode=341 reward=0.7505505 (535.72 it/sec) -training >> step=2041000, episode=341 reward=0.7610295 (546.57 it/sec) -training >> step=2041100, episode=341 reward=0.7775011 (541.70 it/sec) -training >> step=2041200, episode=341 reward=0.7561189 (502.44 it/sec) -training >> step=2041300, episode=341 reward=0.7785172 (512.11 it/sec) -training >> step=2041400, episode=341 reward=0.7709047 (529.18 it/sec) -training >> step=2041500, episode=341 reward=0.7389637 (539.95 it/sec) -training >> step=2041600, episode=341 reward=0.7702625 (544.21 it/sec) -training >> step=2041700, episode=341 reward=0.7572932 (478.79 it/sec) -training >> step=2041800, episode=341 reward=0.756003 (538.55 it/sec) -training >> step=2041900, episode=341 reward=0.751829 (556.05 it/sec) -training >> step=2042000, episode=341 reward=0.7492285 (554.76 it/sec) -training >> step=2042100, episode=341 reward=0.7748087 (535.00 it/sec) -training >> step=2042200, episode=341 reward=0.7645183 (546.90 it/sec) -training >> step=2042300, episode=341 reward=0.7633825 (513.20 it/sec) -training >> step=2042400, episode=341 reward=0.7696885 (524.32 it/sec) -training >> step=2042500, episode=341 reward=0.7460997 (537.48 it/sec) -training >> step=2042600, episode=341 reward=0.7683311 (513.96 it/sec) -training >> step=2042700, episode=341 reward=0.7599621 (568.57 it/sec) -training >> step=2042800, episode=341 reward=0.7586054 (559.73 it/sec) -training >> step=2042900, episode=341 reward=0.7567941 (514.87 it/sec) -training >> step=2043000, episode=341 reward=0.7928015 (497.95 it/sec) -training >> step=2043100, episode=341 reward=0.7840747 (522.01 it/sec) -training >> step=2043200, episode=341 reward=0.7554235 (566.23 it/sec) -training >> step=2043300, episode=341 reward=0.7648582 (570.43 it/sec) -training >> step=2043400, episode=341 reward=0.758365 (550.90 it/sec) -training >> step=2043500, episode=341 reward=0.7843866 (551.63 it/sec) -training >> step=2043600, episode=341 reward=0.7515196 (509.47 it/sec) -training >> step=2043700, episode=341 reward=0.7497378 (567.43 it/sec) -training >> step=2043800, episode=341 reward=0.7593477 (583.50 it/sec) -training >> step=2043900, episode=341 reward=0.748085 (534.20 it/sec) -training >> step=2044000, episode=341 reward=0.7575407 (511.54 it/sec) -training >> step=2044100, episode=341 reward=0.7324077 (558.53 it/sec) -training >> step=2044200, episode=341 reward=0.7480826 (541.60 it/sec) -training >> step=2044300, episode=341 reward=0.756862 (462.16 it/sec) -training >> step=2044400, episode=341 reward=0.775933 (547.15 it/sec) -training >> step=2044500, episode=341 reward=0.7668223 (554.91 it/sec) -training >> step=2044600, episode=341 reward=0.7662296 (544.35 it/sec) -training >> step=2044700, episode=341 reward=0.7664754 (475.65 it/sec) -training >> step=2044800, episode=341 reward=0.7444667 (523.28 it/sec) -training >> step=2044900, episode=341 reward=0.7738384 (541.07 it/sec) -training >> step=2045000, episode=341 reward=0.7806846 (554.29 it/sec) -training >> step=2045100, episode=341 reward=0.763679 (542.03 it/sec) -training >> step=2045200, episode=341 reward=0.7344461 (540.66 it/sec) -training >> step=2045300, episode=342 reward=0.7431504 (152.56 it/sec) -training >> step=2045400, episode=342 reward=0.7524825 (547.07 it/sec) -training >> step=2045500, episode=342 reward=0.7388427 (513.85 it/sec) -training >> step=2045600, episode=342 reward=0.7604851 (483.51 it/sec) -training >> step=2045700, episode=342 reward=0.7557322 (493.47 it/sec) -training >> step=2045800, episode=342 reward=0.7642213 (401.13 it/sec) -training >> step=2045900, episode=342 reward=0.7712128 (522.79 it/sec) -training >> step=2046000, episode=342 reward=0.7325876 (495.27 it/sec) -training >> step=2046100, episode=342 reward=0.7530928 (446.30 it/sec) -training >> step=2046200, episode=342 reward=0.7781066 (529.66 it/sec) -training >> step=2046300, episode=342 reward=0.7591322 (480.58 it/sec) -training >> step=2046400, episode=342 reward=0.7632838 (510.74 it/sec) -training >> step=2046500, episode=342 reward=0.7723399 (507.95 it/sec) -training >> step=2046600, episode=342 reward=0.7748635 (498.70 it/sec) -training >> step=2046700, episode=342 reward=0.7806914 (495.38 it/sec) -training >> step=2046800, episode=342 reward=0.7794096 (454.73 it/sec) -training >> step=2046900, episode=342 reward=0.7768686 (529.74 it/sec) -training >> step=2047000, episode=342 reward=0.761753 (509.50 it/sec) -training >> step=2047100, episode=342 reward=0.734439 (514.17 it/sec) -training >> step=2047200, episode=342 reward=0.7825193 (553.69 it/sec) -training >> step=2047300, episode=342 reward=0.7702726 (495.02 it/sec) -training >> step=2047400, episode=342 reward=0.7472 (528.43 it/sec) -training >> step=2047500, episode=342 reward=0.7472415 (540.80 it/sec) -training >> step=2047600, episode=342 reward=0.7610671 (537.74 it/sec) -training >> step=2047700, episode=342 reward=0.7694497 (550.86 it/sec) -training >> step=2047800, episode=342 reward=0.7653471 (554.21 it/sec) -training >> step=2047900, episode=342 reward=0.757094 (486.47 it/sec) -training >> step=2048000, episode=342 reward=0.7648687 (520.09 it/sec) -training >> step=2048100, episode=342 reward=0.7678598 (488.36 it/sec) -training >> step=2048200, episode=342 reward=0.7619081 (508.09 it/sec) -training >> step=2048300, episode=342 reward=0.7584392 (509.42 it/sec) -training >> step=2048400, episode=342 reward=0.7766536 (528.09 it/sec) -training >> step=2048500, episode=342 reward=0.7553735 (512.56 it/sec) -training >> step=2048600, episode=342 reward=0.7654784 (505.65 it/sec) -training >> step=2048700, episode=342 reward=0.7626974 (473.96 it/sec) -training >> step=2048800, episode=342 reward=0.7808965 (523.45 it/sec) -training >> step=2048900, episode=342 reward=0.7332757 (547.66 it/sec) -training >> step=2049000, episode=342 reward=0.7559147 (520.28 it/sec) -training >> step=2049100, episode=342 reward=0.7666721 (495.80 it/sec) -training >> step=2049200, episode=342 reward=0.7408512 (514.57 it/sec) -training >> step=2049300, episode=342 reward=0.7506025 (531.23 it/sec) -training >> step=2049400, episode=342 reward=0.7745457 (527.60 it/sec) -training >> step=2049500, episode=342 reward=0.7720976 (502.57 it/sec) -training >> step=2049600, episode=342 reward=0.7612869 (466.97 it/sec) -training >> step=2049700, episode=342 reward=0.7518175 (548.11 it/sec) -training >> step=2049800, episode=342 reward=0.7778356 (535.16 it/sec) -training >> step=2049900, episode=342 reward=0.790283 (503.41 it/sec) -training >> step=2050000, episode=342 reward=0.751834 (501.74 it/sec) -training >> step=2050100, episode=342 reward=0.7449594 (507.73 it/sec) -training >> step=2050200, episode=342 reward=0.7663323 (499.93 it/sec) -training >> step=2050300, episode=342 reward=0.7619174 (516.57 it/sec) -training >> step=2050400, episode=342 reward=0.7462963 (567.89 it/sec) -training >> step=2050500, episode=342 reward=0.760251 (508.79 it/sec) -training >> step=2050600, episode=342 reward=0.7565644 (494.61 it/sec) -training >> step=2050700, episode=342 reward=0.7536645 (503.80 it/sec) -training >> step=2050800, episode=342 reward=0.7584642 (556.82 it/sec) -training >> step=2050900, episode=342 reward=0.7415513 (522.24 it/sec) -training >> step=2051000, episode=342 reward=0.7509156 (504.00 it/sec) -training >> step=2051100, episode=342 reward=0.7870466 (499.07 it/sec) -training >> step=2051200, episode=342 reward=0.7493804 (441.06 it/sec) -training >> step=2051300, episode=343 reward=0.7839919 (189.35 it/sec) -training >> step=2051400, episode=343 reward=0.7292601 (525.24 it/sec) -training >> step=2051500, episode=343 reward=0.7453048 (485.63 it/sec) -training >> step=2051600, episode=343 reward=0.7550198 (457.99 it/sec) -training >> step=2051700, episode=343 reward=0.7488874 (517.86 it/sec) -training >> step=2051800, episode=343 reward=0.7507574 (535.22 it/sec) -training >> step=2051900, episode=343 reward=0.7520176 (528.38 it/sec) -training >> step=2052000, episode=343 reward=0.7696488 (486.51 it/sec) -training >> step=2052100, episode=343 reward=0.7604469 (518.78 it/sec) -training >> step=2052200, episode=343 reward=0.7827117 (523.06 it/sec) -training >> step=2052300, episode=343 reward=0.7635511 (508.43 it/sec) -training >> step=2052400, episode=343 reward=0.7682516 (502.48 it/sec) -training >> step=2052500, episode=343 reward=0.7538201 (488.90 it/sec) -training >> step=2052600, episode=343 reward=0.7623903 (507.64 it/sec) -training >> step=2052700, episode=343 reward=0.7897007 (540.52 it/sec) -training >> step=2052800, episode=343 reward=0.7513627 (543.61 it/sec) -training >> step=2052900, episode=343 reward=0.7636505 (536.99 it/sec) -training >> step=2053000, episode=343 reward=0.7565055 (499.57 it/sec) -training >> step=2053100, episode=343 reward=0.7415851 (534.12 it/sec) -training >> step=2053200, episode=343 reward=0.7586082 (546.01 it/sec) -training >> step=2053300, episode=343 reward=0.7649816 (525.02 it/sec) -training >> step=2053400, episode=343 reward=0.8023133 (536.47 it/sec) -training >> step=2053500, episode=343 reward=0.7645335 (522.64 it/sec) -training >> step=2053600, episode=343 reward=0.7655005 (454.97 it/sec) -training >> step=2053700, episode=343 reward=0.7664316 (489.53 it/sec) -training >> step=2053800, episode=343 reward=0.7454325 (532.59 it/sec) -training >> step=2053900, episode=343 reward=0.7650508 (506.65 it/sec) -training >> step=2054000, episode=343 reward=0.7723482 (537.27 it/sec) -training >> step=2054100, episode=343 reward=0.777871 (467.38 it/sec) -training >> step=2054200, episode=343 reward=0.7840929 (551.40 it/sec) -training >> step=2054300, episode=343 reward=0.7178053 (549.34 it/sec) -training >> step=2054400, episode=343 reward=0.7619156 (526.10 it/sec) -training >> step=2054500, episode=343 reward=0.7778452 (530.64 it/sec) -training >> step=2054600, episode=343 reward=0.7478288 (461.29 it/sec) -training >> step=2054700, episode=343 reward=0.7606251 (493.52 it/sec) -training >> step=2054800, episode=343 reward=0.7631747 (530.53 it/sec) -training >> step=2054900, episode=343 reward=0.7708364 (508.27 it/sec) -training >> step=2055000, episode=343 reward=0.7599123 (528.49 it/sec) -training >> step=2055100, episode=343 reward=0.7587537 (506.13 it/sec) -training >> step=2055200, episode=343 reward=0.7721507 (528.75 it/sec) -training >> step=2055300, episode=343 reward=0.7647159 (506.58 it/sec) -training >> step=2055400, episode=343 reward=0.7687885 (521.12 it/sec) -training >> step=2055500, episode=343 reward=0.7751111 (520.36 it/sec) -training >> step=2055600, episode=343 reward=0.7759171 (544.37 it/sec) -training >> step=2055700, episode=343 reward=0.7628281 (464.44 it/sec) -training >> step=2055800, episode=343 reward=0.7847849 (546.76 it/sec) -training >> step=2055900, episode=343 reward=0.7532869 (521.58 it/sec) -training >> step=2056000, episode=343 reward=0.7590738 (515.33 it/sec) -training >> step=2056100, episode=343 reward=0.7571874 (522.74 it/sec) -training >> step=2056200, episode=343 reward=0.7766516 (493.36 it/sec) -training >> step=2056300, episode=343 reward=0.7702587 (556.33 it/sec) -training >> step=2056400, episode=343 reward=0.7417207 (511.95 it/sec) -training >> step=2056500, episode=343 reward=0.753472 (493.53 it/sec) -training >> step=2056600, episode=343 reward=0.7808692 (502.83 it/sec) -training >> step=2056700, episode=343 reward=0.7850984 (493.42 it/sec) -training >> step=2056800, episode=343 reward=0.7520409 (511.88 it/sec) -training >> step=2056900, episode=343 reward=0.7670694 (546.24 it/sec) -training >> step=2057000, episode=343 reward=0.7562443 (503.54 it/sec) -training >> step=2057100, episode=343 reward=0.7641157 (487.02 it/sec) -training >> step=2057200, episode=343 reward=0.7537782 (371.67 it/sec) -training >> step=2057300, episode=344 reward=0.7514104 (275.40 it/sec) -training >> step=2057400, episode=344 reward=0.759944 (507.85 it/sec) -training >> step=2057500, episode=344 reward=0.7408057 (569.65 it/sec) -training >> step=2057600, episode=344 reward=0.7414303 (478.26 it/sec) -training >> step=2057700, episode=344 reward=0.7493416 (507.27 it/sec) -training >> step=2057800, episode=344 reward=0.7657788 (553.05 it/sec) -training >> step=2057900, episode=344 reward=0.7640668 (494.16 it/sec) -training >> step=2058000, episode=344 reward=0.7813166 (536.97 it/sec) -training >> step=2058100, episode=344 reward=0.7729138 (477.68 it/sec) -training >> step=2058200, episode=344 reward=0.7636619 (500.75 it/sec) -training >> step=2058300, episode=344 reward=0.763864 (556.47 it/sec) -training >> step=2058400, episode=344 reward=0.745434 (532.48 it/sec) -training >> step=2058500, episode=344 reward=0.7703968 (514.51 it/sec) -training >> step=2058600, episode=344 reward=0.7839995 (513.54 it/sec) -training >> step=2058700, episode=344 reward=0.7307518 (538.33 it/sec) -training >> step=2058800, episode=344 reward=0.7688645 (483.60 it/sec) -training >> step=2058900, episode=344 reward=0.770694 (507.46 it/sec) -training >> step=2059000, episode=344 reward=0.7650444 (537.41 it/sec) -training >> step=2059100, episode=344 reward=0.7817796 (509.38 it/sec) -training >> step=2059200, episode=344 reward=0.7708427 (524.73 it/sec) -training >> step=2059300, episode=344 reward=0.765606 (506.92 it/sec) -training >> step=2059400, episode=344 reward=0.7697904 (529.04 it/sec) -training >> step=2059500, episode=344 reward=0.775905 (528.27 it/sec) -training >> step=2059600, episode=344 reward=0.7763131 (499.29 it/sec) -training >> step=2059700, episode=344 reward=0.7889555 (505.31 it/sec) -training >> step=2059800, episode=344 reward=0.7825456 (512.10 it/sec) -training >> step=2059900, episode=344 reward=0.7669275 (518.21 it/sec) -training >> step=2060000, episode=344 reward=0.7663935 (512.75 it/sec) -training >> step=2060100, episode=344 reward=0.7468657 (520.86 it/sec) -training >> step=2060200, episode=344 reward=0.7563688 (515.53 it/sec) -training >> step=2060300, episode=344 reward=0.7622858 (493.30 it/sec) -training >> step=2060400, episode=344 reward=0.7751834 (526.93 it/sec) -training >> step=2060500, episode=344 reward=0.75423 (521.75 it/sec) -training >> step=2060600, episode=344 reward=0.7592099 (541.15 it/sec) -training >> step=2060700, episode=344 reward=0.7670004 (466.13 it/sec) -training >> step=2060800, episode=344 reward=0.7827016 (469.06 it/sec) -training >> step=2060900, episode=344 reward=0.7698053 (459.30 it/sec) -training >> step=2061000, episode=344 reward=0.7733167 (517.77 it/sec) -training >> step=2061100, episode=344 reward=0.7546209 (478.40 it/sec) -training >> step=2061200, episode=344 reward=0.7600319 (487.61 it/sec) -training >> step=2061300, episode=344 reward=0.7479951 (483.42 it/sec) -training >> step=2061400, episode=344 reward=0.7624255 (484.27 it/sec) -training >> step=2061500, episode=344 reward=0.7691735 (435.73 it/sec) -training >> step=2061600, episode=344 reward=0.7852246 (496.08 it/sec) -training >> step=2061700, episode=344 reward=0.7623213 (463.79 it/sec) -training >> step=2061800, episode=344 reward=0.7633544 (520.83 it/sec) -training >> step=2061900, episode=344 reward=0.7448598 (477.98 it/sec) -training >> step=2062000, episode=344 reward=0.7588969 (431.13 it/sec) -training >> step=2062100, episode=344 reward=0.7777901 (448.65 it/sec) -training >> step=2062200, episode=344 reward=0.7634666 (483.76 it/sec) -training >> step=2062300, episode=344 reward=0.7841465 (478.37 it/sec) -training >> step=2062400, episode=344 reward=0.7857828 (506.58 it/sec) -training >> step=2062500, episode=344 reward=0.7584227 (487.80 it/sec) -training >> step=2062600, episode=344 reward=0.7963498 (515.54 it/sec) -training >> step=2062700, episode=344 reward=0.7432709 (401.86 it/sec) -training >> step=2062800, episode=344 reward=0.7642791 (432.73 it/sec) -training >> step=2062900, episode=344 reward=0.7463456 (380.26 it/sec) -training >> step=2063000, episode=344 reward=0.7401349 (461.20 it/sec) -training >> step=2063100, episode=344 reward=0.7813436 (477.40 it/sec) -training >> step=2063200, episode=344 reward=0.7441921 (491.05 it/sec) -training >> step=2063300, episode=345 reward=0.7570738 (199.18 it/sec) -training >> step=2063400, episode=345 reward=0.7810469 (455.33 it/sec) -training >> step=2063500, episode=345 reward=0.7324632 (448.34 it/sec) -training >> step=2063600, episode=345 reward=0.7486385 (466.06 it/sec) -training >> step=2063700, episode=345 reward=0.7554479 (523.99 it/sec) -training >> step=2063800, episode=345 reward=0.7599608 (532.74 it/sec) -training >> step=2063900, episode=345 reward=0.7513452 (517.33 it/sec) -training >> step=2064000, episode=345 reward=0.7679597 (493.92 it/sec) -training >> step=2064100, episode=345 reward=0.7639035 (544.70 it/sec) -training >> step=2064200, episode=345 reward=0.759469 (513.67 it/sec) -training >> step=2064300, episode=345 reward=0.7710611 (528.68 it/sec) -training >> step=2064400, episode=345 reward=0.7551561 (509.67 it/sec) -training >> step=2064500, episode=345 reward=0.7508449 (501.21 it/sec) -training >> step=2064600, episode=345 reward=0.7403698 (543.06 it/sec) -training >> step=2064700, episode=345 reward=0.762947 (492.52 it/sec) -training >> step=2064800, episode=345 reward=0.7749823 (516.15 it/sec) -training >> step=2064900, episode=345 reward=0.7792872 (495.05 it/sec) -training >> step=2065000, episode=345 reward=0.7598534 (533.00 it/sec) -training >> step=2065100, episode=345 reward=0.7565292 (581.25 it/sec) -training >> step=2065200, episode=345 reward=0.7609398 (455.97 it/sec) -training >> step=2065300, episode=345 reward=0.7597936 (557.85 it/sec) -training >> step=2065400, episode=345 reward=0.7632982 (521.12 it/sec) -training >> step=2065500, episode=345 reward=0.7594972 (514.54 it/sec) -training >> step=2065600, episode=345 reward=0.7701221 (505.90 it/sec) -training >> step=2065700, episode=345 reward=0.7925869 (501.08 it/sec) -training >> step=2065800, episode=345 reward=0.7698556 (479.86 it/sec) -training >> step=2065900, episode=345 reward=0.7749251 (500.30 it/sec) -training >> step=2066000, episode=345 reward=0.7655741 (440.42 it/sec) -training >> step=2066100, episode=345 reward=0.7715836 (393.03 it/sec) -training >> step=2066200, episode=345 reward=0.7833788 (536.49 it/sec) -training >> step=2066300, episode=345 reward=0.7460833 (494.44 it/sec) -training >> step=2066400, episode=345 reward=0.7781298 (441.57 it/sec) -training >> step=2066500, episode=345 reward=0.7612384 (345.53 it/sec) -training >> step=2066600, episode=345 reward=0.7667372 (327.82 it/sec) -training >> step=2066700, episode=345 reward=0.75769 (450.86 it/sec) -training >> step=2066800, episode=345 reward=0.7645108 (481.54 it/sec) -training >> step=2066900, episode=345 reward=0.7514352 (439.13 it/sec) -training >> step=2067000, episode=345 reward=0.7590476 (377.39 it/sec) -training >> step=2067100, episode=345 reward=0.7756868 (396.08 it/sec) -training >> step=2067200, episode=345 reward=0.7688381 (361.58 it/sec) -training >> step=2067300, episode=345 reward=0.7754271 (337.43 it/sec) -training >> step=2067400, episode=345 reward=0.7517983 (429.09 it/sec) -training >> step=2067500, episode=345 reward=0.7770261 (454.35 it/sec) -training >> step=2067600, episode=345 reward=0.763981 (495.63 it/sec) -training >> step=2067700, episode=345 reward=0.7433116 (496.93 it/sec) -training >> step=2067800, episode=345 reward=0.7761996 (509.19 it/sec) -training >> step=2067900, episode=345 reward=0.7716324 (475.89 it/sec) -training >> step=2068000, episode=345 reward=0.7737297 (499.19 it/sec) -training >> step=2068100, episode=345 reward=0.7510223 (434.56 it/sec) -training >> step=2068200, episode=345 reward=0.7450312 (460.48 it/sec) -training >> step=2068300, episode=345 reward=0.7573173 (440.55 it/sec) -training >> step=2068400, episode=345 reward=0.7810264 (425.15 it/sec) -training >> step=2068500, episode=345 reward=0.7535943 (430.12 it/sec) -training >> step=2068600, episode=345 reward=0.7564996 (456.97 it/sec) -training >> step=2068700, episode=345 reward=0.7552203 (431.33 it/sec) -training >> step=2068800, episode=345 reward=0.7604656 (501.81 it/sec) -training >> step=2068900, episode=345 reward=0.7423232 (529.57 it/sec) -training >> step=2069000, episode=345 reward=0.7557813 (449.68 it/sec) -training >> step=2069100, episode=345 reward=0.7501 (434.72 it/sec) -training >> step=2069200, episode=345 reward=0.746452 (440.91 it/sec) -training >> step=2069300, episode=346 reward=0.7617907 (182.27 it/sec) -training >> step=2069400, episode=346 reward=0.7698382 (491.92 it/sec) -training >> step=2069500, episode=346 reward=0.7224151 (487.14 it/sec) -training >> step=2069600, episode=346 reward=0.757518 (517.26 it/sec) -training >> step=2069700, episode=346 reward=0.7590447 (549.53 it/sec) -training >> step=2069800, episode=346 reward=0.7593005 (534.16 it/sec) -training >> step=2069900, episode=346 reward=0.752655 (519.90 it/sec) -training >> step=2070000, episode=346 reward=0.7603326 (540.95 it/sec) -training >> step=2070100, episode=346 reward=0.7677713 (473.67 it/sec) -training >> step=2070200, episode=346 reward=0.7740638 (481.07 it/sec) -training >> step=2070300, episode=346 reward=0.7703578 (463.52 it/sec) -training >> step=2070400, episode=346 reward=0.7585511 (477.82 it/sec) -training >> step=2070500, episode=346 reward=0.7700551 (488.88 it/sec) -training >> step=2070600, episode=346 reward=0.7771192 (501.74 it/sec) -training >> step=2070700, episode=346 reward=0.7768826 (534.64 it/sec) -training >> step=2070800, episode=346 reward=0.7603327 (479.22 it/sec) -training >> step=2070900, episode=346 reward=0.7704395 (497.56 it/sec) -training >> step=2071000, episode=346 reward=0.7490928 (500.54 it/sec) -training >> step=2071100, episode=346 reward=0.7689268 (517.96 it/sec) -training >> step=2071200, episode=346 reward=0.7567322 (512.86 it/sec) -training >> step=2071300, episode=346 reward=0.7622665 (526.81 it/sec) -training >> step=2071400, episode=346 reward=0.7356272 (489.44 it/sec) -training >> step=2071500, episode=346 reward=0.7405078 (500.85 it/sec) -training >> step=2071600, episode=346 reward=0.7569886 (531.55 it/sec) -training >> step=2071700, episode=346 reward=0.7497325 (519.84 it/sec) -training >> step=2071800, episode=346 reward=0.7560083 (503.69 it/sec) -training >> step=2071900, episode=346 reward=0.7681882 (478.00 it/sec) -training >> step=2072000, episode=346 reward=0.7490889 (530.46 it/sec) -training >> step=2072100, episode=346 reward=0.7582427 (529.81 it/sec) -training >> step=2072200, episode=346 reward=0.7490575 (502.71 it/sec) -training >> step=2072300, episode=346 reward=0.7744771 (517.71 it/sec) -training >> step=2072400, episode=346 reward=0.7662058 (472.09 it/sec) -training >> step=2072500, episode=346 reward=0.7500316 (479.40 it/sec) -training >> step=2072600, episode=346 reward=0.770693 (525.53 it/sec) -training >> step=2072700, episode=346 reward=0.7855537 (528.06 it/sec) -training >> step=2072800, episode=346 reward=0.7732988 (525.27 it/sec) -training >> step=2072900, episode=346 reward=0.7742944 (496.51 it/sec) -training >> step=2073000, episode=346 reward=0.7644967 (505.36 it/sec) -training >> step=2073100, episode=346 reward=0.7504143 (544.70 it/sec) -training >> step=2073200, episode=346 reward=0.7506324 (508.05 it/sec) -training >> step=2073300, episode=346 reward=0.7644486 (534.61 it/sec) -training >> step=2073400, episode=346 reward=0.7646967 (522.63 it/sec) -training >> step=2073500, episode=346 reward=0.7617382 (493.33 it/sec) -training >> step=2073600, episode=346 reward=0.7500699 (541.51 it/sec) -training >> step=2073700, episode=346 reward=0.7740563 (507.13 it/sec) -training >> step=2073800, episode=346 reward=0.7827162 (493.51 it/sec) -training >> step=2073900, episode=346 reward=0.778345 (541.76 it/sec) -training >> step=2074000, episode=346 reward=0.7767549 (492.84 it/sec) -training >> step=2074100, episode=346 reward=0.7753255 (467.86 it/sec) -training >> step=2074200, episode=346 reward=0.7626651 (525.06 it/sec) -training >> step=2074300, episode=346 reward=0.765035 (510.90 it/sec) -training >> step=2074400, episode=346 reward=0.7460542 (527.13 it/sec) -training >> step=2074500, episode=346 reward=0.7668912 (509.40 it/sec) -training >> step=2074600, episode=346 reward=0.7590896 (514.39 it/sec) -training >> step=2074700, episode=346 reward=0.7450103 (531.62 it/sec) -training >> step=2074800, episode=346 reward=0.7521018 (556.38 it/sec) -training >> step=2074900, episode=346 reward=0.7577332 (510.66 it/sec) -training >> step=2075000, episode=346 reward=0.7548443 (515.99 it/sec) -training >> step=2075100, episode=346 reward=0.7493269 (519.32 it/sec) -training >> step=2075200, episode=346 reward=0.745287 (467.28 it/sec) -training >> step=2075300, episode=347 reward=0.7517475 (174.88 it/sec) -training >> step=2075400, episode=347 reward=0.7721723 (503.10 it/sec) -training >> step=2075500, episode=347 reward=0.7692936 (493.04 it/sec) -training >> step=2075600, episode=347 reward=0.7537588 (504.27 it/sec) -training >> step=2075700, episode=347 reward=0.7638459 (564.43 it/sec) -training >> step=2075800, episode=347 reward=0.7544618 (530.54 it/sec) -training >> step=2075900, episode=347 reward=0.7402259 (479.07 it/sec) -training >> step=2076000, episode=347 reward=0.7398217 (529.92 it/sec) -training >> step=2076100, episode=347 reward=0.8058986 (550.94 it/sec) -training >> step=2076200, episode=347 reward=0.765948 (526.34 it/sec) -training >> step=2076300, episode=347 reward=0.7481179 (471.57 it/sec) -training >> step=2076400, episode=347 reward=0.7768548 (516.71 it/sec) -training >> step=2076500, episode=347 reward=0.7482057 (539.13 it/sec) -training >> step=2076600, episode=347 reward=0.7725368 (536.77 it/sec) -training >> step=2076700, episode=347 reward=0.7497837 (536.17 it/sec) -training >> step=2076800, episode=347 reward=0.7572371 (547.69 it/sec) -training >> step=2076900, episode=347 reward=0.7686895 (488.70 it/sec) -training >> step=2077000, episode=347 reward=0.7668715 (487.75 it/sec) -training >> step=2077100, episode=347 reward=0.7815267 (513.57 it/sec) -training >> step=2077200, episode=347 reward=0.7714333 (528.94 it/sec) -training >> step=2077300, episode=347 reward=0.7633353 (518.11 it/sec) -training >> step=2077400, episode=347 reward=0.7677088 (499.09 it/sec) -training >> step=2077500, episode=347 reward=0.7516258 (508.22 it/sec) -training >> step=2077600, episode=347 reward=0.7786592 (524.25 it/sec) -training >> step=2077700, episode=347 reward=0.7536104 (519.19 it/sec) -training >> step=2077800, episode=347 reward=0.7579666 (553.81 it/sec) -training >> step=2077900, episode=347 reward=0.7623613 (536.92 it/sec) -training >> step=2078000, episode=347 reward=0.7840319 (526.10 it/sec) -training >> step=2078100, episode=347 reward=0.77314 (544.74 it/sec) -training >> step=2078200, episode=347 reward=0.7730444 (429.04 it/sec) -training >> step=2078300, episode=347 reward=0.7557161 (494.31 it/sec) -training >> step=2078400, episode=347 reward=0.7932553 (539.08 it/sec) -training >> step=2078500, episode=347 reward=0.7552402 (445.38 it/sec) -training >> step=2078600, episode=347 reward=0.7581056 (378.18 it/sec) -training >> step=2078700, episode=347 reward=0.7357512 (455.39 it/sec) -training >> step=2078800, episode=347 reward=0.7540883 (513.23 it/sec) -training >> step=2078900, episode=347 reward=0.7820896 (456.90 it/sec) -training >> step=2079000, episode=347 reward=0.7620425 (485.09 it/sec) -training >> step=2079100, episode=347 reward=0.7732763 (471.38 it/sec) -training >> step=2079200, episode=347 reward=0.75871 (460.41 it/sec) -training >> step=2079300, episode=347 reward=0.7687877 (483.84 it/sec) -training >> step=2079400, episode=347 reward=0.7386804 (476.97 it/sec) -training >> step=2079500, episode=347 reward=0.7739294 (508.12 it/sec) -training >> step=2079600, episode=347 reward=0.7655845 (531.40 it/sec) -training >> step=2079700, episode=347 reward=0.7439271 (485.62 it/sec) -training >> step=2079800, episode=347 reward=0.7702422 (509.54 it/sec) -training >> step=2079900, episode=347 reward=0.7666451 (490.68 it/sec) -training >> step=2080000, episode=347 reward=0.7611507 (499.23 it/sec) -training >> step=2080100, episode=347 reward=0.7516621 (479.52 it/sec) -training >> step=2080200, episode=347 reward=0.7633176 (433.31 it/sec) -training >> step=2080300, episode=347 reward=0.7522675 (449.17 it/sec) -training >> step=2080400, episode=347 reward=0.776127 (443.44 it/sec) -training >> step=2080500, episode=347 reward=0.7467332 (469.07 it/sec) -training >> step=2080600, episode=347 reward=0.7639433 (443.90 it/sec) -training >> step=2080700, episode=347 reward=0.7652496 (483.96 it/sec) -training >> step=2080800, episode=347 reward=0.7608156 (486.93 it/sec) -training >> step=2080900, episode=347 reward=0.7595819 (394.25 it/sec) -training >> step=2081000, episode=347 reward=0.7720574 (449.60 it/sec) -training >> step=2081100, episode=347 reward=0.774979 (452.30 it/sec) -training >> step=2081200, episode=347 reward=0.7654236 (478.22 it/sec) -training >> step=2081300, episode=348 reward=0.7458998 (118.54 it/sec) -training >> step=2081400, episode=348 reward=0.7651984 (350.71 it/sec) -training >> step=2081500, episode=348 reward=0.741443 (353.98 it/sec) -training >> step=2081600, episode=348 reward=0.7721874 (343.90 it/sec) -training >> step=2081700, episode=348 reward=0.7586687 (440.93 it/sec) -training >> step=2081800, episode=348 reward=0.7596405 (436.33 it/sec) -training >> step=2081900, episode=348 reward=0.7626391 (443.78 it/sec) -training >> step=2082000, episode=348 reward=0.7683616 (449.01 it/sec) -training >> step=2082100, episode=348 reward=0.7632545 (477.24 it/sec) -training >> step=2082200, episode=348 reward=0.7618192 (503.77 it/sec) -training >> step=2082300, episode=348 reward=0.7565691 (430.81 it/sec) -training >> step=2082400, episode=348 reward=0.7595273 (520.57 it/sec) -training >> step=2082500, episode=348 reward=0.7631336 (469.00 it/sec) -training >> step=2082600, episode=348 reward=0.7566326 (532.05 it/sec) -training >> step=2082700, episode=348 reward=0.7477701 (500.21 it/sec) -training >> step=2082800, episode=348 reward=0.7767786 (492.74 it/sec) -training >> step=2082900, episode=348 reward=0.7507253 (503.65 it/sec) -training >> step=2083000, episode=348 reward=0.7448837 (465.50 it/sec) -training >> step=2083100, episode=348 reward=0.7622041 (456.61 it/sec) -training >> step=2083200, episode=348 reward=0.7743402 (470.57 it/sec) -training >> step=2083300, episode=348 reward=0.7583666 (409.50 it/sec) -training >> step=2083400, episode=348 reward=0.7760093 (512.00 it/sec) -training >> step=2083500, episode=348 reward=0.7459384 (447.38 it/sec) -training >> step=2083600, episode=348 reward=0.7619642 (469.10 it/sec) -training >> step=2083700, episode=348 reward=0.7700422 (478.07 it/sec) -training >> step=2083800, episode=348 reward=0.7729772 (450.54 it/sec) -training >> step=2083900, episode=348 reward=0.7490659 (445.82 it/sec) -training >> step=2084000, episode=348 reward=0.7730997 (448.95 it/sec) -training >> step=2084100, episode=348 reward=0.7763649 (474.53 it/sec) -training >> step=2084200, episode=348 reward=0.7720758 (470.17 it/sec) -training >> step=2084300, episode=348 reward=0.764901 (497.43 it/sec) -training >> step=2084400, episode=348 reward=0.7633651 (439.35 it/sec) -training >> step=2084500, episode=348 reward=0.726559 (505.99 it/sec) -training >> step=2084600, episode=348 reward=0.7585135 (500.72 it/sec) -training >> step=2084700, episode=348 reward=0.7680287 (432.98 it/sec) -training >> step=2084800, episode=348 reward=0.7786232 (458.10 it/sec) -training >> step=2084900, episode=348 reward=0.779632 (401.23 it/sec) -training >> step=2085000, episode=348 reward=0.7725654 (431.82 it/sec) -training >> step=2085100, episode=348 reward=0.7364318 (373.56 it/sec) -training >> step=2085200, episode=348 reward=0.7706164 (409.09 it/sec) -training >> step=2085300, episode=348 reward=0.7836865 (378.84 it/sec) -training >> step=2085400, episode=348 reward=0.7909203 (417.05 it/sec) -training >> step=2085500, episode=348 reward=0.7522184 (445.50 it/sec) -training >> step=2085600, episode=348 reward=0.7594824 (443.74 it/sec) -training >> step=2085700, episode=348 reward=0.7800544 (456.33 it/sec) -training >> step=2085800, episode=348 reward=0.7438875 (520.23 it/sec) -training >> step=2085900, episode=348 reward=0.7398178 (445.25 it/sec) -training >> step=2086000, episode=348 reward=0.7526051 (506.12 it/sec) -training >> step=2086100, episode=348 reward=0.760599 (517.40 it/sec) -training >> step=2086200, episode=348 reward=0.7782703 (535.16 it/sec) -training >> step=2086300, episode=348 reward=0.7295549 (509.79 it/sec) -training >> step=2086400, episode=348 reward=0.7495402 (521.52 it/sec) -training >> step=2086500, episode=348 reward=0.7530079 (473.39 it/sec) -training >> step=2086600, episode=348 reward=0.7695353 (526.25 it/sec) -training >> step=2086700, episode=348 reward=0.760065 (540.51 it/sec) -training >> step=2086800, episode=348 reward=0.7625831 (482.98 it/sec) -training >> step=2086900, episode=348 reward=0.7426874 (512.73 it/sec) -training >> step=2087000, episode=348 reward=0.7634075 (501.94 it/sec) -training >> step=2087100, episode=348 reward=0.7534662 (452.26 it/sec) -training >> step=2087200, episode=348 reward=0.7450114 (506.93 it/sec) -training >> step=2087300, episode=349 reward=0.7328088 (114.12 it/sec) -training >> step=2087400, episode=349 reward=0.7622629 (452.37 it/sec) -training >> step=2087500, episode=349 reward=0.7498782 (464.52 it/sec) -training >> step=2087600, episode=349 reward=0.7442278 (495.43 it/sec) -training >> step=2087700, episode=349 reward=0.772658 (499.48 it/sec) -training >> step=2087800, episode=349 reward=0.766782 (491.28 it/sec) -training >> step=2087900, episode=349 reward=0.75711 (423.45 it/sec) -training >> step=2088000, episode=349 reward=0.7707863 (443.47 it/sec) -training >> step=2088100, episode=349 reward=0.7648904 (433.91 it/sec) -training >> step=2088200, episode=349 reward=0.7444114 (439.55 it/sec) -training >> step=2088300, episode=349 reward=0.7431356 (426.05 it/sec) -training >> step=2088400, episode=349 reward=0.7668216 (466.34 it/sec) -training >> step=2088500, episode=349 reward=0.7542109 (454.03 it/sec) -training >> step=2088600, episode=349 reward=0.7900663 (518.87 it/sec) -training >> step=2088700, episode=349 reward=0.7622097 (458.90 it/sec) -training >> step=2088800, episode=349 reward=0.7621863 (458.44 it/sec) -training >> step=2088900, episode=349 reward=0.7692066 (452.99 it/sec) -training >> step=2089000, episode=349 reward=0.8051996 (425.42 it/sec) -training >> step=2089100, episode=349 reward=0.7744473 (490.79 it/sec) -training >> step=2089200, episode=349 reward=0.7409217 (485.97 it/sec) -training >> step=2089300, episode=349 reward=0.780517 (548.66 it/sec) -training >> step=2089400, episode=349 reward=0.7761142 (508.86 it/sec) -training >> step=2089500, episode=349 reward=0.7569413 (473.90 it/sec) -training >> step=2089600, episode=349 reward=0.7749221 (486.52 it/sec) -training >> step=2089700, episode=349 reward=0.7657574 (501.12 it/sec) -training >> step=2089800, episode=349 reward=0.759912 (558.62 it/sec) -training >> step=2089900, episode=349 reward=0.7498572 (513.82 it/sec) -training >> step=2090000, episode=349 reward=0.7560745 (436.08 it/sec) -training >> step=2090100, episode=349 reward=0.7706675 (474.46 it/sec) -training >> step=2090200, episode=349 reward=0.769884 (428.80 it/sec) -training >> step=2090300, episode=349 reward=0.7532604 (421.29 it/sec) -training >> step=2090400, episode=349 reward=0.7536167 (440.16 it/sec) -training >> step=2090500, episode=349 reward=0.7532692 (505.28 it/sec) -training >> step=2090600, episode=349 reward=0.7615736 (468.81 it/sec) -training >> step=2090700, episode=349 reward=0.7550472 (486.76 it/sec) -training >> step=2090800, episode=349 reward=0.7558216 (537.80 it/sec) -training >> step=2090900, episode=349 reward=0.7645441 (517.66 it/sec) -training >> step=2091000, episode=349 reward=0.7561768 (482.62 it/sec) -training >> step=2091100, episode=349 reward=0.7751305 (387.80 it/sec) -training >> step=2091200, episode=349 reward=0.7721377 (437.27 it/sec) -training >> step=2091300, episode=349 reward=0.7699564 (410.57 it/sec) -training >> step=2091400, episode=349 reward=0.7626955 (432.43 it/sec) -training >> step=2091500, episode=349 reward=0.7692924 (376.66 it/sec) -training >> step=2091600, episode=349 reward=0.7532023 (427.57 it/sec) -training >> step=2091700, episode=349 reward=0.7543255 (446.42 it/sec) -training >> step=2091800, episode=349 reward=0.7567538 (469.36 it/sec) -training >> step=2091900, episode=349 reward=0.7792754 (448.89 it/sec) -training >> step=2092000, episode=349 reward=0.7788152 (420.84 it/sec) -training >> step=2092100, episode=349 reward=0.7747846 (443.53 it/sec) -training >> step=2092200, episode=349 reward=0.7617764 (422.29 it/sec) -training >> step=2092300, episode=349 reward=0.7578955 (476.71 it/sec) -training >> step=2092400, episode=349 reward=0.7430815 (472.19 it/sec) -training >> step=2092500, episode=349 reward=0.7573441 (436.99 it/sec) -training >> step=2092600, episode=349 reward=0.7712678 (448.51 it/sec) -training >> step=2092700, episode=349 reward=0.7836122 (452.45 it/sec) -training >> step=2092800, episode=349 reward=0.7770725 (468.69 it/sec) -training >> step=2092900, episode=349 reward=0.7614453 (453.34 it/sec) -training >> step=2093000, episode=349 reward=0.7479718 (504.74 it/sec) -training >> step=2093100, episode=349 reward=0.7575864 (511.19 it/sec) -training >> step=2093200, episode=349 reward=0.7519897 (487.10 it/sec) -training >> step=2093300, episode=350 reward=0.7744216 (124.11 it/sec) -training >> step=2093400, episode=350 reward=0.7480278 (486.51 it/sec) -training >> step=2093500, episode=350 reward=0.7373105 (504.87 it/sec) -training >> step=2093600, episode=350 reward=0.7617348 (518.71 it/sec) -training >> step=2093700, episode=350 reward=0.7528812 (510.20 it/sec) -training >> step=2093800, episode=350 reward=0.7692634 (530.78 it/sec) -training >> step=2093900, episode=350 reward=0.758112 (464.30 it/sec) -training >> step=2094000, episode=350 reward=0.7700447 (527.77 it/sec) -training >> step=2094100, episode=350 reward=0.7622827 (486.94 it/sec) -training >> step=2094200, episode=350 reward=0.7487762 (512.92 it/sec) -training >> step=2094300, episode=350 reward=0.7546993 (536.13 it/sec) -training >> step=2094400, episode=350 reward=0.7393042 (470.16 it/sec) -training >> step=2094500, episode=350 reward=0.7723598 (532.57 it/sec) -training >> step=2094600, episode=350 reward=0.749226 (521.83 it/sec) -training >> step=2094700, episode=350 reward=0.7420546 (566.45 it/sec) -training >> step=2094800, episode=350 reward=0.7595307 (531.65 it/sec) -training >> step=2094900, episode=350 reward=0.7623113 (478.86 it/sec) -training >> step=2095000, episode=350 reward=0.7825701 (468.83 it/sec) -training >> step=2095100, episode=350 reward=0.7726194 (497.44 it/sec) -training >> step=2095200, episode=350 reward=0.767453 (532.84 it/sec) -training >> step=2095300, episode=350 reward=0.7728496 (524.75 it/sec) -training >> step=2095400, episode=350 reward=0.7695096 (507.38 it/sec) -training >> step=2095500, episode=350 reward=0.7682868 (508.72 it/sec) -training >> step=2095600, episode=350 reward=0.7584649 (494.80 it/sec) -training >> step=2095700, episode=350 reward=0.7838608 (466.39 it/sec) -training >> step=2095800, episode=350 reward=0.7559459 (548.09 it/sec) -training >> step=2095900, episode=350 reward=0.7742354 (493.11 it/sec) -training >> step=2096000, episode=350 reward=0.7682328 (511.70 it/sec) -training >> step=2096100, episode=350 reward=0.7798528 (465.23 it/sec) -training >> step=2096200, episode=350 reward=0.7632245 (480.36 it/sec) -training >> step=2096300, episode=350 reward=0.7617043 (478.39 it/sec) -training >> step=2096400, episode=350 reward=0.7634735 (465.74 it/sec) -training >> step=2096500, episode=350 reward=0.7719561 (470.23 it/sec) -training >> step=2096600, episode=350 reward=0.7742186 (517.28 it/sec) -training >> step=2096700, episode=350 reward=0.7539052 (483.86 it/sec) -training >> step=2096800, episode=350 reward=0.7631433 (488.92 it/sec) -training >> step=2096900, episode=350 reward=0.7697871 (499.86 it/sec) -training >> step=2097000, episode=350 reward=0.7573038 (441.83 it/sec) -training >> step=2097100, episode=350 reward=0.7612321 (509.68 it/sec) -training >> step=2097200, episode=350 reward=0.7477809 (484.44 it/sec) -training >> step=2097300, episode=350 reward=0.7479058 (457.27 it/sec) -training >> step=2097400, episode=350 reward=0.7642758 (500.12 it/sec) -training >> step=2097500, episode=350 reward=0.7448118 (444.72 it/sec) -training >> step=2097600, episode=350 reward=0.7892777 (475.00 it/sec) -training >> step=2097700, episode=350 reward=0.7726662 (449.70 it/sec) -training >> step=2097800, episode=350 reward=0.759688 (528.11 it/sec) -training >> step=2097900, episode=350 reward=0.7770445 (459.08 it/sec) -training >> step=2098000, episode=350 reward=0.748962 (471.05 it/sec) -training >> step=2098100, episode=350 reward=0.7636317 (467.12 it/sec) -training >> step=2098200, episode=350 reward=0.7684624 (529.93 it/sec) -training >> step=2098300, episode=350 reward=0.7504354 (473.79 it/sec) -training >> step=2098400, episode=350 reward=0.7859971 (475.51 it/sec) -training >> step=2098500, episode=350 reward=0.7737833 (476.98 it/sec) -training >> step=2098600, episode=350 reward=0.7820883 (468.41 it/sec) -training >> step=2098700, episode=350 reward=0.7630824 (453.25 it/sec) -training >> step=2098800, episode=350 reward=0.7523959 (456.66 it/sec) -training >> step=2098900, episode=350 reward=0.773802 (470.28 it/sec) -training >> step=2099000, episode=350 reward=0.7516054 (462.48 it/sec) -training >> step=2099100, episode=350 reward=0.7465534 (462.43 it/sec) -training >> step=2099200, episode=350 reward=0.7600318 (417.89 it/sec) -training >> step=2099300, episode=351 reward=0.7712586 (184.33 it/sec) -training >> step=2099400, episode=351 reward=0.7548106 (507.41 it/sec) -training >> step=2099500, episode=351 reward=0.754631 (497.97 it/sec) -training >> step=2099600, episode=351 reward=0.758406 (453.19 it/sec) -training >> step=2099700, episode=351 reward=0.7416473 (425.99 it/sec) -training >> step=2099800, episode=351 reward=0.7653474 (457.04 it/sec) -training >> step=2099900, episode=351 reward=0.7740638 (465.82 it/sec) -training >> step=2100000, episode=351 reward=0.7578003 (396.30 it/sec) -training >> step=2100100, episode=351 reward=0.7712293 (420.44 it/sec) -training >> step=2100200, episode=351 reward=0.766564 (426.68 it/sec) -training >> step=2100300, episode=351 reward=0.77638 (468.63 it/sec) -training >> step=2100400, episode=351 reward=0.7568731 (453.58 it/sec) -training >> step=2100500, episode=351 reward=0.752383 (430.51 it/sec) -training >> step=2100600, episode=351 reward=0.7620019 (446.07 it/sec) -training >> step=2100700, episode=351 reward=0.7633276 (456.48 it/sec) -training >> step=2100800, episode=351 reward=0.7587889 (453.41 it/sec) -training >> step=2100900, episode=351 reward=0.7753941 (465.64 it/sec) -training >> step=2101000, episode=351 reward=0.7622798 (463.48 it/sec) -training >> step=2101100, episode=351 reward=0.777243 (449.13 it/sec) -training >> step=2101200, episode=351 reward=0.7547453 (443.09 it/sec) -training >> step=2101300, episode=351 reward=0.7737446 (489.98 it/sec) -training >> step=2101400, episode=351 reward=0.7580481 (504.29 it/sec) -training >> step=2101500, episode=351 reward=0.7640502 (565.84 it/sec) -training >> step=2101600, episode=351 reward=0.7789791 (510.14 it/sec) -training >> step=2101700, episode=351 reward=0.7516235 (488.10 it/sec) -training >> step=2101800, episode=351 reward=0.779125 (551.07 it/sec) -training >> step=2101900, episode=351 reward=0.7682154 (528.04 it/sec) -training >> step=2102000, episode=351 reward=0.7635208 (555.48 it/sec) -training >> step=2102100, episode=351 reward=0.7613723 (550.47 it/sec) -training >> step=2102200, episode=351 reward=0.7645882 (496.17 it/sec) -training >> step=2102300, episode=351 reward=0.7453038 (503.80 it/sec) -training >> step=2102400, episode=351 reward=0.7671789 (525.04 it/sec) -training >> step=2102500, episode=351 reward=0.7468731 (528.49 it/sec) -training >> step=2102600, episode=351 reward=0.774685 (567.42 it/sec) -training >> step=2102700, episode=351 reward=0.7643106 (440.81 it/sec) -training >> step=2102800, episode=351 reward=0.772508 (439.05 it/sec) -training >> step=2102900, episode=351 reward=0.7752241 (495.61 it/sec) -training >> step=2103000, episode=351 reward=0.7422815 (476.35 it/sec) -training >> step=2103100, episode=351 reward=0.7327136 (440.12 it/sec) -training >> step=2103200, episode=351 reward=0.7490709 (414.43 it/sec) -training >> step=2103300, episode=351 reward=0.7820581 (463.77 it/sec) -training >> step=2103400, episode=351 reward=0.7540766 (396.75 it/sec) -training >> step=2103500, episode=351 reward=0.7366617 (456.53 it/sec) -training >> step=2103600, episode=351 reward=0.7622294 (427.64 it/sec) -training >> step=2103700, episode=351 reward=0.7334834 (508.19 it/sec) -training >> step=2103800, episode=351 reward=0.7406768 (491.01 it/sec) -training >> step=2103900, episode=351 reward=0.7537823 (483.13 it/sec) -training >> step=2104000, episode=351 reward=0.7808914 (489.14 it/sec) -training >> step=2104100, episode=351 reward=0.7769043 (488.94 it/sec) -training >> step=2104200, episode=351 reward=0.7594855 (482.31 it/sec) -training >> step=2104300, episode=351 reward=0.775353 (462.15 it/sec) -training >> step=2104400, episode=351 reward=0.7817865 (470.25 it/sec) -training >> step=2104500, episode=351 reward=0.7725238 (488.67 it/sec) -training >> step=2104600, episode=351 reward=0.7465441 (466.61 it/sec) -training >> step=2104700, episode=351 reward=0.7616522 (478.85 it/sec) -training >> step=2104800, episode=351 reward=0.7740563 (502.94 it/sec) -training >> step=2104900, episode=351 reward=0.7707776 (477.04 it/sec) -training >> step=2105000, episode=351 reward=0.7545742 (523.41 it/sec) -training >> step=2105100, episode=351 reward=0.7568061 (501.61 it/sec) -training >> step=2105200, episode=351 reward=0.7615646 (554.83 it/sec) -training >> step=2105300, episode=352 reward=0.762713 (166.56 it/sec) -training >> step=2105400, episode=352 reward=0.7451909 (510.90 it/sec) -training >> step=2105500, episode=352 reward=0.7372394 (513.06 it/sec) -training >> step=2105600, episode=352 reward=0.7498268 (484.51 it/sec) -training >> step=2105700, episode=352 reward=0.7639575 (539.54 it/sec) -training >> step=2105800, episode=352 reward=0.7740399 (501.61 it/sec) -training >> step=2105900, episode=352 reward=0.7920196 (529.60 it/sec) -training >> step=2106000, episode=352 reward=0.7686951 (495.91 it/sec) -training >> step=2106100, episode=352 reward=0.7564126 (515.03 it/sec) -training >> step=2106200, episode=352 reward=0.7659092 (535.35 it/sec) -training >> step=2106300, episode=352 reward=0.7727 (528.11 it/sec) -training >> step=2106400, episode=352 reward=0.7331353 (524.61 it/sec) -training >> step=2106500, episode=352 reward=0.7739777 (545.70 it/sec) -training >> step=2106600, episode=352 reward=0.7632565 (499.97 it/sec) -training >> step=2106700, episode=352 reward=0.7558367 (553.44 it/sec) -training >> step=2106800, episode=352 reward=0.7563344 (529.80 it/sec) -training >> step=2106900, episode=352 reward=0.7701287 (537.06 it/sec) -training >> step=2107000, episode=352 reward=0.7811196 (497.45 it/sec) -training >> step=2107100, episode=352 reward=0.7372033 (530.39 it/sec) -training >> step=2107200, episode=352 reward=0.7658134 (485.46 it/sec) -training >> step=2107300, episode=352 reward=0.7452248 (525.28 it/sec) -training >> step=2107400, episode=352 reward=0.744426 (517.28 it/sec) -training >> step=2107500, episode=352 reward=0.7639881 (517.92 it/sec) -training >> step=2107600, episode=352 reward=0.79017 (546.31 it/sec) -training >> step=2107700, episode=352 reward=0.7380577 (498.35 it/sec) -training >> step=2107800, episode=352 reward=0.768562 (503.61 it/sec) -training >> step=2107900, episode=352 reward=0.7537962 (557.76 it/sec) -training >> step=2108000, episode=352 reward=0.7632693 (543.01 it/sec) -training >> step=2108100, episode=352 reward=0.7772191 (507.34 it/sec) -training >> step=2108200, episode=352 reward=0.7803444 (480.67 it/sec) -training >> step=2108300, episode=352 reward=0.7785791 (512.59 it/sec) -training >> step=2108400, episode=352 reward=0.7687299 (539.12 it/sec) -training >> step=2108500, episode=352 reward=0.7729121 (494.23 it/sec) -training >> step=2108600, episode=352 reward=0.7883356 (509.33 it/sec) -training >> step=2108700, episode=352 reward=0.7653213 (520.21 it/sec) -training >> step=2108800, episode=352 reward=0.7588535 (550.62 it/sec) -training >> step=2108900, episode=352 reward=0.7596447 (512.11 it/sec) -training >> step=2109000, episode=352 reward=0.7756995 (526.40 it/sec) -training >> step=2109100, episode=352 reward=0.7527325 (548.73 it/sec) -training >> step=2109200, episode=352 reward=0.7560937 (463.60 it/sec) -training >> step=2109300, episode=352 reward=0.7872984 (512.05 it/sec) -training >> step=2109400, episode=352 reward=0.7630551 (521.26 it/sec) -training >> step=2109500, episode=352 reward=0.7457173 (528.97 it/sec) -training >> step=2109600, episode=352 reward=0.7533531 (519.92 it/sec) -training >> step=2109700, episode=352 reward=0.7615467 (497.20 it/sec) -training >> step=2109800, episode=352 reward=0.7733386 (501.31 it/sec) -training >> step=2109900, episode=352 reward=0.7692412 (523.43 it/sec) -training >> step=2110000, episode=352 reward=0.7622421 (549.21 it/sec) -training >> step=2110100, episode=352 reward=0.7484303 (493.90 it/sec) -training >> step=2110200, episode=352 reward=0.7688513 (520.65 it/sec) -training >> step=2110300, episode=352 reward=0.7401403 (498.83 it/sec) -training >> step=2110400, episode=352 reward=0.7665929 (511.63 it/sec) -training >> step=2110500, episode=352 reward=0.7801208 (497.93 it/sec) -training >> step=2110600, episode=352 reward=0.7765785 (528.00 it/sec) -training >> step=2110700, episode=352 reward=0.7857266 (547.72 it/sec) -training >> step=2110800, episode=352 reward=0.7446606 (446.28 it/sec) -training >> step=2110900, episode=352 reward=0.7516125 (488.84 it/sec) -training >> step=2111000, episode=352 reward=0.7701852 (443.16 it/sec) -training >> step=2111100, episode=352 reward=0.7636821 (457.46 it/sec) -training >> step=2111200, episode=352 reward=0.7516034 (417.46 it/sec) -training >> step=2111300, episode=353 reward=0.7543775 (316.39 it/sec) -training >> step=2111400, episode=353 reward=0.7729453 (488.71 it/sec) -training >> step=2111500, episode=353 reward=0.7350824 (508.41 it/sec) -training >> step=2111600, episode=353 reward=0.7475979 (528.79 it/sec) -training >> step=2111700, episode=353 reward=0.7537083 (461.29 it/sec) -training >> step=2111800, episode=353 reward=0.752025 (488.08 it/sec) -training >> step=2111900, episode=353 reward=0.7626998 (478.55 it/sec) -training >> step=2112000, episode=353 reward=0.7639562 (475.34 it/sec) -training >> step=2112100, episode=353 reward=0.7648561 (489.09 it/sec) -training >> step=2112200, episode=353 reward=0.7730415 (500.20 it/sec) -training >> step=2112300, episode=353 reward=0.7712932 (499.42 it/sec) -training >> step=2112400, episode=353 reward=0.7998405 (538.86 it/sec) -training >> step=2112500, episode=353 reward=0.7419941 (452.85 it/sec) -training >> step=2112600, episode=353 reward=0.7620229 (505.08 it/sec) -training >> step=2112700, episode=353 reward=0.765462 (447.52 it/sec) -training >> step=2112800, episode=353 reward=0.7727579 (513.14 it/sec) -training >> step=2112900, episode=353 reward=0.7751333 (497.98 it/sec) -training >> step=2113000, episode=353 reward=0.7776722 (529.85 it/sec) -training >> step=2113100, episode=353 reward=0.7688185 (505.57 it/sec) -training >> step=2113200, episode=353 reward=0.7534034 (490.93 it/sec) -training >> step=2113300, episode=353 reward=0.7645572 (574.37 it/sec) -training >> step=2113400, episode=353 reward=0.7476592 (569.67 it/sec) -training >> step=2113500, episode=353 reward=0.7620937 (563.85 it/sec) -training >> step=2113600, episode=353 reward=0.7718619 (554.10 it/sec) -training >> step=2113700, episode=353 reward=0.7655763 (572.47 it/sec) -training >> step=2113800, episode=353 reward=0.7687491 (499.98 it/sec) -training >> step=2113900, episode=353 reward=0.7740781 (532.48 it/sec) -training >> step=2114000, episode=353 reward=0.7687584 (541.39 it/sec) -training >> step=2114100, episode=353 reward=0.7421254 (515.48 it/sec) -training >> step=2114200, episode=353 reward=0.7583368 (500.71 it/sec) -training >> step=2114300, episode=353 reward=0.7778748 (449.79 it/sec) -training >> step=2114400, episode=353 reward=0.7584853 (517.57 it/sec) -training >> step=2114500, episode=353 reward=0.7707304 (551.60 it/sec) -training >> step=2114600, episode=353 reward=0.7706762 (549.31 it/sec) -training >> step=2114700, episode=353 reward=0.7696314 (553.36 it/sec) -training >> step=2114800, episode=353 reward=0.7739725 (461.01 it/sec) -training >> step=2114900, episode=353 reward=0.7646195 (538.14 it/sec) -training >> step=2115000, episode=353 reward=0.7582217 (485.86 it/sec) -training >> step=2115100, episode=353 reward=0.7709298 (469.44 it/sec) -training >> step=2115200, episode=353 reward=0.7458998 (492.58 it/sec) -training >> step=2115300, episode=353 reward=0.7645441 (442.38 it/sec) -training >> step=2115400, episode=353 reward=0.7621054 (521.44 it/sec) -training >> step=2115500, episode=353 reward=0.7617294 (533.40 it/sec) -training >> step=2115600, episode=353 reward=0.7733036 (553.94 it/sec) -training >> step=2115700, episode=353 reward=0.7382265 (531.89 it/sec) -training >> step=2115800, episode=353 reward=0.7500598 (527.98 it/sec) -training >> step=2115900, episode=353 reward=0.7670064 (506.12 it/sec) -training >> step=2116000, episode=353 reward=0.7505104 (554.98 it/sec) -training >> step=2116100, episode=353 reward=0.7880856 (494.04 it/sec) -training >> step=2116200, episode=353 reward=0.771769 (478.43 it/sec) -training >> step=2116300, episode=353 reward=0.7590145 (447.13 it/sec) -training >> step=2116400, episode=353 reward=0.7751928 (386.92 it/sec) -training >> step=2116500, episode=353 reward=0.7360669 (395.77 it/sec) -training >> step=2116600, episode=353 reward=0.7397565 (463.91 it/sec) -training >> step=2116700, episode=353 reward=0.7768188 (484.69 it/sec) -training >> step=2116800, episode=353 reward=0.7581139 (438.16 it/sec) -training >> step=2116900, episode=353 reward=0.756782 (464.76 it/sec) -training >> step=2117000, episode=353 reward=0.7576618 (496.42 it/sec) -training >> step=2117100, episode=353 reward=0.7451231 (530.78 it/sec) -training >> step=2117200, episode=353 reward=0.7515475 (483.33 it/sec) -training >> step=2117300, episode=354 reward=0.7509847 (159.05 it/sec) -training >> step=2117400, episode=354 reward=0.7550031 (551.80 it/sec) -training >> step=2117500, episode=354 reward=0.7379189 (518.64 it/sec) -training >> step=2117600, episode=354 reward=0.7370266 (535.79 it/sec) -training >> step=2117700, episode=354 reward=0.733284 (522.50 it/sec) -training >> step=2117800, episode=354 reward=0.7631814 (492.50 it/sec) -training >> step=2117900, episode=354 reward=0.7687727 (512.72 it/sec) -training >> step=2118000, episode=354 reward=0.7536071 (568.96 it/sec) -training >> step=2118100, episode=354 reward=0.7727839 (536.98 it/sec) -training >> step=2118200, episode=354 reward=0.76503 (454.46 it/sec) -training >> step=2118300, episode=354 reward=0.7600196 (483.40 it/sec) -training >> step=2118400, episode=354 reward=0.7662696 (496.11 it/sec) -training >> step=2118500, episode=354 reward=0.747964 (455.28 it/sec) -training >> step=2118600, episode=354 reward=0.7571607 (473.36 it/sec) -training >> step=2118700, episode=354 reward=0.7762992 (471.30 it/sec) -training >> step=2118800, episode=354 reward=0.7717416 (515.86 it/sec) -training >> step=2118900, episode=354 reward=0.7684059 (525.69 it/sec) -training >> step=2119000, episode=354 reward=0.7597801 (528.03 it/sec) -training >> step=2119100, episode=354 reward=0.7840922 (511.77 it/sec) -training >> step=2119200, episode=354 reward=0.7843541 (528.09 it/sec) -training >> step=2119300, episode=354 reward=0.7751036 (490.17 it/sec) -training >> step=2119400, episode=354 reward=0.7578263 (522.15 it/sec) -training >> step=2119500, episode=354 reward=0.749791 (545.27 it/sec) -training >> step=2119600, episode=354 reward=0.73202 (538.41 it/sec) -training >> step=2119700, episode=354 reward=0.756725 (498.31 it/sec) -training >> step=2119800, episode=354 reward=0.7359278 (484.66 it/sec) -training >> step=2119900, episode=354 reward=0.7519664 (533.03 it/sec) -training >> step=2120000, episode=354 reward=0.7714882 (560.96 it/sec) -training >> step=2120100, episode=354 reward=0.7669722 (509.18 it/sec) -training >> step=2120200, episode=354 reward=0.7657483 (546.09 it/sec) -training >> step=2120300, episode=354 reward=0.7569252 (509.13 it/sec) -training >> step=2120400, episode=354 reward=0.7740564 (573.74 it/sec) -training >> step=2120500, episode=354 reward=0.7722206 (571.46 it/sec) -training >> step=2120600, episode=354 reward=0.7481683 (541.21 it/sec) -training >> step=2120700, episode=354 reward=0.7568098 (545.69 it/sec) -training >> step=2120800, episode=354 reward=0.7617947 (524.52 it/sec) -training >> step=2120900, episode=354 reward=0.7647414 (530.05 it/sec) -training >> step=2121000, episode=354 reward=0.76863 (522.60 it/sec) -training >> step=2121100, episode=354 reward=0.7616087 (567.48 it/sec) -training >> step=2121200, episode=354 reward=0.7745652 (513.73 it/sec) -training >> step=2121300, episode=354 reward=0.7429737 (532.95 it/sec) -training >> step=2121400, episode=354 reward=0.7607582 (509.26 it/sec) -training >> step=2121500, episode=354 reward=0.7496243 (554.58 it/sec) -training >> step=2121600, episode=354 reward=0.7560092 (553.69 it/sec) -training >> step=2121700, episode=354 reward=0.7512138 (529.15 it/sec) -training >> step=2121800, episode=354 reward=0.747215 (530.07 it/sec) -training >> step=2121900, episode=354 reward=0.748541 (523.51 it/sec) -training >> step=2122000, episode=354 reward=0.7447686 (527.31 it/sec) -training >> step=2122100, episode=354 reward=0.7729383 (544.96 it/sec) -training >> step=2122200, episode=354 reward=0.774873 (546.04 it/sec) -training >> step=2122300, episode=354 reward=0.748499 (568.86 it/sec) -training >> step=2122400, episode=354 reward=0.7537445 (541.69 it/sec) -training >> step=2122500, episode=354 reward=0.7839602 (472.27 it/sec) -training >> step=2122600, episode=354 reward=0.7570589 (510.53 it/sec) -training >> step=2122700, episode=354 reward=0.7729723 (522.46 it/sec) -training >> step=2122800, episode=354 reward=0.7752795 (535.95 it/sec) -training >> step=2122900, episode=354 reward=0.7330285 (514.53 it/sec) -training >> step=2123000, episode=354 reward=0.771715 (538.15 it/sec) -training >> step=2123100, episode=354 reward=0.7482584 (525.33 it/sec) -training >> step=2123200, episode=354 reward=0.7511045 (523.56 it/sec) -training >> step=2123300, episode=355 reward=0.761198 (144.79 it/sec) -training >> step=2123400, episode=355 reward=0.7445746 (547.99 it/sec) -training >> step=2123500, episode=355 reward=0.7232417 (526.62 it/sec) -training >> step=2123600, episode=355 reward=0.7557541 (550.96 it/sec) -training >> step=2123700, episode=355 reward=0.7518137 (479.79 it/sec) -training >> step=2123800, episode=355 reward=0.7718593 (499.88 it/sec) -training >> step=2123900, episode=355 reward=0.7769845 (577.24 it/sec) -training >> step=2124000, episode=355 reward=0.7457449 (550.72 it/sec) -training >> step=2124100, episode=355 reward=0.756191 (529.90 it/sec) -training >> step=2124200, episode=355 reward=0.7400106 (532.94 it/sec) -training >> step=2124300, episode=355 reward=0.785701 (538.63 it/sec) -training >> step=2124400, episode=355 reward=0.7690932 (508.48 it/sec) -training >> step=2124500, episode=355 reward=0.7434191 (548.54 it/sec) -training >> step=2124600, episode=355 reward=0.7814273 (564.48 it/sec) -training >> step=2124700, episode=355 reward=0.7505524 (542.90 it/sec) -training >> step=2124800, episode=355 reward=0.7776979 (524.73 it/sec) -training >> step=2124900, episode=355 reward=0.7464352 (520.16 it/sec) -training >> step=2125000, episode=355 reward=0.7717624 (537.83 it/sec) -training >> step=2125100, episode=355 reward=0.7581834 (548.80 it/sec) -training >> step=2125200, episode=355 reward=0.7680414 (536.85 it/sec) -training >> step=2125300, episode=355 reward=0.7713952 (553.53 it/sec) -training >> step=2125400, episode=355 reward=0.7635541 (569.17 it/sec) -training >> step=2125500, episode=355 reward=0.7501582 (516.27 it/sec) -training >> step=2125600, episode=355 reward=0.7660897 (543.14 it/sec) -training >> step=2125700, episode=355 reward=0.7473921 (544.86 it/sec) -training >> step=2125800, episode=355 reward=0.7622977 (547.11 it/sec) -training >> step=2125900, episode=355 reward=0.7734725 (501.64 it/sec) -training >> step=2126000, episode=355 reward=0.7590908 (458.32 it/sec) -training >> step=2126100, episode=355 reward=0.7594306 (463.36 it/sec) -training >> step=2126200, episode=355 reward=0.7492533 (443.74 it/sec) -training >> step=2126300, episode=355 reward=0.7742044 (491.63 it/sec) -training >> step=2126400, episode=355 reward=0.775803 (468.49 it/sec) -training >> step=2126500, episode=355 reward=0.7565258 (439.37 it/sec) -training >> step=2126600, episode=355 reward=0.7469153 (459.37 it/sec) -training >> step=2126700, episode=355 reward=0.7658012 (406.29 it/sec) -training >> step=2126800, episode=355 reward=0.7528478 (400.77 it/sec) -training >> step=2126900, episode=355 reward=0.748972 (410.00 it/sec) -training >> step=2127000, episode=355 reward=0.7676696 (453.16 it/sec) -training >> step=2127100, episode=355 reward=0.7502625 (478.34 it/sec) -training >> step=2127200, episode=355 reward=0.7690608 (488.31 it/sec) -training >> step=2127300, episode=355 reward=0.7825176 (452.99 it/sec) -training >> step=2127400, episode=355 reward=0.7560111 (429.32 it/sec) -training >> step=2127500, episode=355 reward=0.7824724 (472.27 it/sec) -training >> step=2127600, episode=355 reward=0.7673051 (430.03 it/sec) -training >> step=2127700, episode=355 reward=0.7823545 (441.42 it/sec) -training >> step=2127800, episode=355 reward=0.7603616 (464.16 it/sec) -training >> step=2127900, episode=355 reward=0.7486915 (471.35 it/sec) -training >> step=2128000, episode=355 reward=0.7645125 (490.73 it/sec) -training >> step=2128100, episode=355 reward=0.7798572 (477.62 it/sec) -training >> step=2128200, episode=355 reward=0.7533094 (391.04 it/sec) -training >> step=2128300, episode=355 reward=0.7801135 (357.68 it/sec) -training >> step=2128400, episode=355 reward=0.7598567 (410.07 it/sec) -training >> step=2128500, episode=355 reward=0.7534671 (442.39 it/sec) -training >> step=2128600, episode=355 reward=0.7656201 (467.58 it/sec) -training >> step=2128700, episode=355 reward=0.7745615 (495.72 it/sec) -training >> step=2128800, episode=355 reward=0.7676115 (473.46 it/sec) -training >> step=2128900, episode=355 reward=0.778111 (479.07 it/sec) -training >> step=2129000, episode=355 reward=0.7635167 (477.15 it/sec) -training >> step=2129100, episode=355 reward=0.7780257 (441.80 it/sec) -training >> step=2129200, episode=355 reward=0.7731771 (453.72 it/sec) -training >> step=2129300, episode=356 reward=0.7940208 (182.88 it/sec) -training >> step=2129400, episode=356 reward=0.7728614 (360.33 it/sec) -training >> step=2129500, episode=356 reward=0.7706125 (480.00 it/sec) -training >> step=2129600, episode=356 reward=0.7387096 (444.87 it/sec) -training >> step=2129700, episode=356 reward=0.7634156 (518.42 it/sec) -training >> step=2129800, episode=356 reward=0.7808974 (496.86 it/sec) -training >> step=2129900, episode=356 reward=0.7586079 (468.21 it/sec) -training >> step=2130000, episode=356 reward=0.7734411 (508.51 it/sec) -training >> step=2130100, episode=356 reward=0.7620153 (432.22 it/sec) -training >> step=2130200, episode=356 reward=0.7628632 (486.42 it/sec) -training >> step=2130300, episode=356 reward=0.7890689 (506.96 it/sec) -training >> step=2130400, episode=356 reward=0.7634037 (479.05 it/sec) -training >> step=2130500, episode=356 reward=0.7613968 (490.98 it/sec) -training >> step=2130600, episode=356 reward=0.7410098 (443.09 it/sec) -training >> step=2130700, episode=356 reward=0.747506 (444.03 it/sec) -training >> step=2130800, episode=356 reward=0.7872506 (431.92 it/sec) -training >> step=2130900, episode=356 reward=0.7579988 (436.54 it/sec) -training >> step=2131000, episode=356 reward=0.7938967 (411.27 it/sec) -training >> step=2131100, episode=356 reward=0.7488992 (459.33 it/sec) -training >> step=2131200, episode=356 reward=0.7568252 (417.68 it/sec) -training >> step=2131300, episode=356 reward=0.7474629 (402.66 it/sec) -training >> step=2131400, episode=356 reward=0.7434434 (444.77 it/sec) -training >> step=2131500, episode=356 reward=0.7561201 (439.67 it/sec) -training >> step=2131600, episode=356 reward=0.7802147 (452.94 it/sec) -training >> step=2131700, episode=356 reward=0.7245304 (440.22 it/sec) -training >> step=2131800, episode=356 reward=0.75942 (445.55 it/sec) -training >> step=2131900, episode=356 reward=0.7733899 (391.64 it/sec) -training >> step=2132000, episode=356 reward=0.749773 (437.77 it/sec) -training >> step=2132100, episode=356 reward=0.7745382 (449.16 it/sec) -training >> step=2132200, episode=356 reward=0.7583293 (341.82 it/sec) -training >> step=2132300, episode=356 reward=0.7694281 (410.58 it/sec) -training >> step=2132400, episode=356 reward=0.7552204 (414.49 it/sec) -training >> step=2132500, episode=356 reward=0.7685474 (414.41 it/sec) -training >> step=2132600, episode=356 reward=0.7730374 (474.34 it/sec) -training >> step=2132700, episode=356 reward=0.7527989 (476.42 it/sec) -training >> step=2132800, episode=356 reward=0.7633172 (448.91 it/sec) -training >> step=2132900, episode=356 reward=0.7837716 (479.97 it/sec) -training >> step=2133000, episode=356 reward=0.7825084 (491.32 it/sec) -training >> step=2133100, episode=356 reward=0.7651709 (517.96 it/sec) -training >> step=2133200, episode=356 reward=0.7614966 (515.74 it/sec) -training >> step=2133300, episode=356 reward=0.762665 (461.43 it/sec) -training >> step=2133400, episode=356 reward=0.7755821 (479.82 it/sec) -training >> step=2133500, episode=356 reward=0.7604638 (505.87 it/sec) -training >> step=2133600, episode=356 reward=0.7840573 (489.41 it/sec) -training >> step=2133700, episode=356 reward=0.7648055 (421.66 it/sec) -training >> step=2133800, episode=356 reward=0.7699376 (471.78 it/sec) -training >> step=2133900, episode=356 reward=0.7583598 (521.38 it/sec) -training >> step=2134000, episode=356 reward=0.76001 (517.29 it/sec) -training >> step=2134100, episode=356 reward=0.7667529 (463.54 it/sec) -training >> step=2134200, episode=356 reward=0.7846395 (464.39 it/sec) -training >> step=2134300, episode=356 reward=0.7618107 (475.80 it/sec) -training >> step=2134400, episode=356 reward=0.7532582 (512.55 it/sec) -training >> step=2134500, episode=356 reward=0.756157 (503.91 it/sec) -training >> step=2134600, episode=356 reward=0.7518132 (502.25 it/sec) -training >> step=2134700, episode=356 reward=0.7553397 (469.02 it/sec) -training >> step=2134800, episode=356 reward=0.7778435 (456.92 it/sec) -training >> step=2134900, episode=356 reward=0.7822911 (464.56 it/sec) -training >> step=2135000, episode=356 reward=0.7452437 (483.45 it/sec) -training >> step=2135100, episode=356 reward=0.7504011 (453.86 it/sec) -training >> step=2135200, episode=356 reward=0.7730511 (489.83 it/sec) -training >> step=2135300, episode=357 reward=0.7425446 (175.82 it/sec) -training >> step=2135400, episode=357 reward=0.7457154 (481.78 it/sec) -training >> step=2135500, episode=357 reward=0.7383513 (430.72 it/sec) -training >> step=2135600, episode=357 reward=0.7502273 (336.50 it/sec) -training >> step=2135700, episode=357 reward=0.7699756 (472.46 it/sec) -training >> step=2135800, episode=357 reward=0.762282 (480.23 it/sec) -training >> step=2135900, episode=357 reward=0.7769737 (442.24 it/sec) -training >> step=2136000, episode=357 reward=0.773466 (396.05 it/sec) -training >> step=2136100, episode=357 reward=0.7834068 (453.72 it/sec) -training >> step=2136200, episode=357 reward=0.7538669 (396.88 it/sec) -training >> step=2136300, episode=357 reward=0.7889339 (387.54 it/sec) -training >> step=2136400, episode=357 reward=0.7711861 (434.45 it/sec) -training >> step=2136500, episode=357 reward=0.7768011 (490.67 it/sec) -training >> step=2136600, episode=357 reward=0.7840106 (506.93 it/sec) -training >> step=2136700, episode=357 reward=0.7735447 (465.32 it/sec) -training >> step=2136800, episode=357 reward=0.7528197 (451.89 it/sec) -training >> step=2136900, episode=357 reward=0.779297 (443.38 it/sec) -training >> step=2137000, episode=357 reward=0.765471 (448.61 it/sec) -training >> step=2137100, episode=357 reward=0.7815268 (461.45 it/sec) -training >> step=2137200, episode=357 reward=0.7599776 (448.77 it/sec) -training >> step=2137300, episode=357 reward=0.7619135 (478.87 it/sec) -training >> step=2137400, episode=357 reward=0.7618425 (477.84 it/sec) -training >> step=2137500, episode=357 reward=0.7761517 (394.90 it/sec) -training >> step=2137600, episode=357 reward=0.7822348 (419.24 it/sec) -training >> step=2137700, episode=357 reward=0.7653764 (481.22 it/sec) -training >> step=2137800, episode=357 reward=0.7837195 (412.28 it/sec) -training >> step=2137900, episode=357 reward=0.7584128 (412.21 it/sec) -training >> step=2138000, episode=357 reward=0.7679341 (504.92 it/sec) -training >> step=2138100, episode=357 reward=0.7702124 (515.70 it/sec) -training >> step=2138200, episode=357 reward=0.7622269 (474.28 it/sec) -training >> step=2138300, episode=357 reward=0.7747766 (469.89 it/sec) -training >> step=2138400, episode=357 reward=0.7707832 (496.42 it/sec) -training >> step=2138500, episode=357 reward=0.7583219 (532.48 it/sec) -training >> step=2138600, episode=357 reward=0.7703571 (480.27 it/sec) -training >> step=2138700, episode=357 reward=0.7777277 (457.93 it/sec) -training >> step=2138800, episode=357 reward=0.7730269 (486.50 it/sec) -training >> step=2138900, episode=357 reward=0.7608987 (532.72 it/sec) -training >> step=2139000, episode=357 reward=0.7762756 (474.25 it/sec) -training >> step=2139100, episode=357 reward=0.7551469 (483.72 it/sec) -training >> step=2139200, episode=357 reward=0.7506474 (512.02 it/sec) -training >> step=2139300, episode=357 reward=0.7659914 (487.31 it/sec) -training >> step=2139400, episode=357 reward=0.7795174 (480.47 it/sec) -training >> step=2139500, episode=357 reward=0.7564221 (500.89 it/sec) -training >> step=2139600, episode=357 reward=0.7380695 (468.73 it/sec) -training >> step=2139700, episode=357 reward=0.7909774 (450.95 it/sec) -training >> step=2139800, episode=357 reward=0.7355461 (469.77 it/sec) -training >> step=2139900, episode=357 reward=0.7564625 (503.09 it/sec) -training >> step=2140000, episode=357 reward=0.7228522 (521.84 it/sec) -training >> step=2140100, episode=357 reward=0.7594737 (446.87 it/sec) -training >> step=2140200, episode=357 reward=0.7541394 (479.08 it/sec) -training >> step=2140300, episode=357 reward=0.7681104 (483.77 it/sec) -training >> step=2140400, episode=357 reward=0.7773116 (524.75 it/sec) -training >> step=2140500, episode=357 reward=0.7701098 (472.40 it/sec) -training >> step=2140600, episode=357 reward=0.7474934 (478.90 it/sec) -training >> step=2140700, episode=357 reward=0.7723094 (497.27 it/sec) -training >> step=2140800, episode=357 reward=0.756122 (462.85 it/sec) -training >> step=2140900, episode=357 reward=0.7675319 (483.85 it/sec) -training >> step=2141000, episode=357 reward=0.7656866 (453.00 it/sec) -training >> step=2141100, episode=357 reward=0.7678991 (481.72 it/sec) -training >> step=2141200, episode=357 reward=0.7797621 (490.36 it/sec) -training >> step=2141300, episode=358 reward=0.7446682 (154.81 it/sec) -training >> step=2141400, episode=358 reward=0.7490885 (487.68 it/sec) -training >> step=2141500, episode=358 reward=0.7632537 (491.46 it/sec) -training >> step=2141600, episode=358 reward=0.743678 (438.91 it/sec) -training >> step=2141700, episode=358 reward=0.7606916 (512.52 it/sec) -training >> step=2141800, episode=358 reward=0.7428465 (358.76 it/sec) -training >> step=2141900, episode=358 reward=0.7540033 (463.17 it/sec) -training >> step=2142000, episode=358 reward=0.7655101 (470.04 it/sec) -training >> step=2142100, episode=358 reward=0.7587829 (497.45 it/sec) -training >> step=2142200, episode=358 reward=0.7728217 (402.68 it/sec) -training >> step=2142300, episode=358 reward=0.7851754 (434.61 it/sec) -training >> step=2142400, episode=358 reward=0.7580904 (470.75 it/sec) -training >> step=2142500, episode=358 reward=0.7837187 (471.38 it/sec) -training >> step=2142600, episode=358 reward=0.7756188 (479.70 it/sec) -training >> step=2142700, episode=358 reward=0.757825 (464.78 it/sec) -training >> step=2142800, episode=358 reward=0.7768095 (442.47 it/sec) -training >> step=2142900, episode=358 reward=0.7444941 (434.64 it/sec) -training >> step=2143000, episode=358 reward=0.7508018 (484.99 it/sec) -training >> step=2143100, episode=358 reward=0.7739279 (495.90 it/sec) -training >> step=2143200, episode=358 reward=0.7662414 (482.61 it/sec) -training >> step=2143300, episode=358 reward=0.7747781 (500.57 it/sec) -training >> step=2143400, episode=358 reward=0.7426915 (466.90 it/sec) -training >> step=2143500, episode=358 reward=0.7794656 (504.51 it/sec) -training >> step=2143600, episode=358 reward=0.7719734 (525.33 it/sec) -training >> step=2143700, episode=358 reward=0.7520256 (432.23 it/sec) -training >> step=2143800, episode=358 reward=0.7703182 (475.37 it/sec) -training >> step=2143900, episode=358 reward=0.7681301 (501.84 it/sec) -training >> step=2144000, episode=358 reward=0.7981196 (414.75 it/sec) -training >> step=2144100, episode=358 reward=0.7787637 (491.69 it/sec) -training >> step=2144200, episode=358 reward=0.7825632 (435.98 it/sec) -training >> step=2144300, episode=358 reward=0.7619257 (470.93 it/sec) -training >> step=2144400, episode=358 reward=0.7558935 (469.62 it/sec) -training >> step=2144500, episode=358 reward=0.7669253 (486.39 it/sec) -training >> step=2144600, episode=358 reward=0.7791552 (442.20 it/sec) -training >> step=2144700, episode=358 reward=0.7821911 (481.31 it/sec) -training >> step=2144800, episode=358 reward=0.7675344 (504.31 it/sec) -training >> step=2144900, episode=358 reward=0.7608535 (498.15 it/sec) -training >> step=2145000, episode=358 reward=0.7551252 (506.41 it/sec) -training >> step=2145100, episode=358 reward=0.7644325 (484.49 it/sec) -training >> step=2145200, episode=358 reward=0.7542914 (493.81 it/sec) -training >> step=2145300, episode=358 reward=0.7808273 (478.06 it/sec) -training >> step=2145400, episode=358 reward=0.7694455 (445.37 it/sec) -training >> step=2145500, episode=358 reward=0.7616949 (490.34 it/sec) -training >> step=2145600, episode=358 reward=0.7751183 (465.05 it/sec) -training >> step=2145700, episode=358 reward=0.7635008 (494.02 it/sec) -training >> step=2145800, episode=358 reward=0.7619665 (473.42 it/sec) -training >> step=2145900, episode=358 reward=0.7658285 (507.45 it/sec) -training >> step=2146000, episode=358 reward=0.7764981 (522.53 it/sec) -training >> step=2146100, episode=358 reward=0.7516546 (481.18 it/sec) -training >> step=2146200, episode=358 reward=0.7555798 (482.58 it/sec) -training >> step=2146300, episode=358 reward=0.758895 (486.69 it/sec) -training >> step=2146400, episode=358 reward=0.7440847 (494.74 it/sec) -training >> step=2146500, episode=358 reward=0.7711166 (450.60 it/sec) -training >> step=2146600, episode=358 reward=0.7530766 (473.78 it/sec) -training >> step=2146700, episode=358 reward=0.7405353 (514.27 it/sec) -training >> step=2146800, episode=358 reward=0.7700468 (515.23 it/sec) -training >> step=2146900, episode=358 reward=0.7772916 (483.98 it/sec) -training >> step=2147000, episode=358 reward=0.7600043 (458.19 it/sec) -training >> step=2147100, episode=358 reward=0.7491648 (469.05 it/sec) -training >> step=2147200, episode=358 reward=0.7613059 (502.82 it/sec) -training >> step=2147300, episode=359 reward=0.7566894 (149.49 it/sec) -training >> step=2147400, episode=359 reward=0.739046 (492.62 it/sec) -training >> step=2147500, episode=359 reward=0.7562621 (489.03 it/sec) -training >> step=2147600, episode=359 reward=0.7412408 (480.84 it/sec) -training >> step=2147700, episode=359 reward=0.7568226 (452.67 it/sec) -training >> step=2147800, episode=359 reward=0.7667727 (399.47 it/sec) -training >> step=2147900, episode=359 reward=0.7753333 (333.04 it/sec) -training >> step=2148000, episode=359 reward=0.7571729 (493.59 it/sec) -training >> step=2148100, episode=359 reward=0.7681465 (462.17 it/sec) -training >> step=2148200, episode=359 reward=0.7684436 (511.51 it/sec) -training >> step=2148300, episode=359 reward=0.7450281 (428.45 it/sec) -training >> step=2148400, episode=359 reward=0.7807823 (422.91 it/sec) -training >> step=2148500, episode=359 reward=0.7727979 (474.11 it/sec) -training >> step=2148600, episode=359 reward=0.7652954 (498.00 it/sec) -training >> step=2148700, episode=359 reward=0.7806699 (555.72 it/sec) -training >> step=2148800, episode=359 reward=0.7736632 (468.24 it/sec) -training >> step=2148900, episode=359 reward=0.7523722 (522.08 it/sec) -training >> step=2149000, episode=359 reward=0.7588223 (550.16 it/sec) -training >> step=2149100, episode=359 reward=0.7766368 (532.09 it/sec) -training >> step=2149200, episode=359 reward=0.7694433 (508.92 it/sec) -training >> step=2149300, episode=359 reward=0.7507014 (505.18 it/sec) -training >> step=2149400, episode=359 reward=0.7711409 (534.77 it/sec) -training >> step=2149500, episode=359 reward=0.7662324 (534.21 it/sec) -training >> step=2149600, episode=359 reward=0.7769591 (505.40 it/sec) -training >> step=2149700, episode=359 reward=0.7601195 (514.61 it/sec) -training >> step=2149800, episode=359 reward=0.7677131 (513.56 it/sec) -training >> step=2149900, episode=359 reward=0.7540823 (492.84 it/sec) -training >> step=2150000, episode=359 reward=0.7669737 (455.80 it/sec) -training >> step=2150100, episode=359 reward=0.7688916 (444.49 it/sec) -training >> step=2150200, episode=359 reward=0.7643059 (419.22 it/sec) -training >> step=2150300, episode=359 reward=0.7607332 (416.45 it/sec) -training >> step=2150400, episode=359 reward=0.7778939 (478.28 it/sec) -training >> step=2150500, episode=359 reward=0.7769355 (484.10 it/sec) -training >> step=2150600, episode=359 reward=0.7501494 (470.76 it/sec) -training >> step=2150700, episode=359 reward=0.781407 (458.80 it/sec) -training >> step=2150800, episode=359 reward=0.7560844 (443.00 it/sec) -training >> step=2150900, episode=359 reward=0.7864214 (473.24 it/sec) -training >> step=2151000, episode=359 reward=0.7618788 (543.23 it/sec) -training >> step=2151100, episode=359 reward=0.7639369 (504.92 it/sec) -training >> step=2151200, episode=359 reward=0.7502173 (521.20 it/sec) -training >> step=2151300, episode=359 reward=0.7642145 (518.32 it/sec) -training >> step=2151400, episode=359 reward=0.7657672 (548.45 it/sec) -training >> step=2151500, episode=359 reward=0.7319224 (561.18 it/sec) -training >> step=2151600, episode=359 reward=0.7528185 (565.99 it/sec) -training >> step=2151700, episode=359 reward=0.7754312 (415.69 it/sec) -training >> step=2151800, episode=359 reward=0.7612763 (484.01 it/sec) -training >> step=2151900, episode=359 reward=0.7704026 (498.14 it/sec) -training >> step=2152000, episode=359 reward=0.7717729 (465.55 it/sec) -training >> step=2152100, episode=359 reward=0.7714432 (369.91 it/sec) -training >> step=2152200, episode=359 reward=0.7451791 (423.41 it/sec) -training >> step=2152300, episode=359 reward=0.7631306 (443.40 it/sec) -training >> step=2152400, episode=359 reward=0.7592182 (470.72 it/sec) -training >> step=2152500, episode=359 reward=0.7742175 (456.52 it/sec) -training >> step=2152600, episode=359 reward=0.7594705 (421.10 it/sec) -training >> step=2152700, episode=359 reward=0.7723036 (464.10 it/sec) -training >> step=2152800, episode=359 reward=0.7667162 (455.18 it/sec) -training >> step=2152900, episode=359 reward=0.7518097 (422.73 it/sec) -training >> step=2153000, episode=359 reward=0.7880417 (483.69 it/sec) -training >> step=2153100, episode=359 reward=0.7440323 (463.29 it/sec) -training >> step=2153200, episode=359 reward=0.7451979 (465.67 it/sec) -training >> step=2153300, episode=360 reward=0.7816473 (119.45 it/sec) -training >> step=2153400, episode=360 reward=0.7472821 (478.09 it/sec) -training >> step=2153500, episode=360 reward=0.7693455 (441.46 it/sec) -training >> step=2153600, episode=360 reward=0.7658917 (468.90 it/sec) -training >> step=2153700, episode=360 reward=0.7677253 (457.32 it/sec) -training >> step=2153800, episode=360 reward=0.7730991 (481.59 it/sec) -training >> step=2153900, episode=360 reward=0.7482039 (353.26 it/sec) -training >> step=2154000, episode=360 reward=0.763089 (518.13 it/sec) -training >> step=2154100, episode=360 reward=0.7827092 (519.66 it/sec) -training >> step=2154200, episode=360 reward=0.7533208 (492.33 it/sec) -training >> step=2154300, episode=360 reward=0.7561098 (472.22 it/sec) -training >> step=2154400, episode=360 reward=0.7466199 (462.02 it/sec) -training >> step=2154500, episode=360 reward=0.7661822 (484.67 it/sec) -training >> step=2154600, episode=360 reward=0.7882247 (455.65 it/sec) -training >> step=2154700, episode=360 reward=0.7471254 (495.65 it/sec) -training >> step=2154800, episode=360 reward=0.7623295 (494.81 it/sec) -training >> step=2154900, episode=360 reward=0.7792094 (537.34 it/sec) -training >> step=2155000, episode=360 reward=0.7772557 (464.81 it/sec) -training >> step=2155100, episode=360 reward=0.759216 (454.91 it/sec) -training >> step=2155200, episode=360 reward=0.7564284 (450.24 it/sec) -training >> step=2155300, episode=360 reward=0.7721413 (432.75 it/sec) -training >> step=2155400, episode=360 reward=0.768136 (397.31 it/sec) -training >> step=2155500, episode=360 reward=0.7556913 (354.87 it/sec) -training >> step=2155600, episode=360 reward=0.7467104 (351.39 it/sec) -training >> step=2155700, episode=360 reward=0.7607896 (412.26 it/sec) -training >> step=2155800, episode=360 reward=0.7526088 (417.42 it/sec) -training >> step=2155900, episode=360 reward=0.7903301 (474.42 it/sec) -training >> step=2156000, episode=360 reward=0.7805363 (442.56 it/sec) -training >> step=2156100, episode=360 reward=0.7713758 (432.40 it/sec) -training >> step=2156200, episode=360 reward=0.763881 (468.62 it/sec) -training >> step=2156300, episode=360 reward=0.7668437 (488.25 it/sec) -training >> step=2156400, episode=360 reward=0.7603778 (526.00 it/sec) -training >> step=2156500, episode=360 reward=0.7766535 (520.95 it/sec) -training >> step=2156600, episode=360 reward=0.7524906 (488.45 it/sec) -training >> step=2156700, episode=360 reward=0.7562314 (549.28 it/sec) -training >> step=2156800, episode=360 reward=0.7935081 (549.55 it/sec) -training >> step=2156900, episode=360 reward=0.7518883 (506.78 it/sec) -training >> step=2157000, episode=360 reward=0.7747108 (398.72 it/sec) -training >> step=2157100, episode=360 reward=0.7643377 (438.72 it/sec) -training >> step=2157200, episode=360 reward=0.7849995 (490.24 it/sec) -training >> step=2157300, episode=360 reward=0.7777266 (511.55 it/sec) -training >> step=2157400, episode=360 reward=0.7640356 (529.95 it/sec) -training >> step=2157500, episode=360 reward=0.754652 (509.35 it/sec) -training >> step=2157600, episode=360 reward=0.7554233 (497.40 it/sec) -training >> step=2157700, episode=360 reward=0.7670094 (545.33 it/sec) -training >> step=2157800, episode=360 reward=0.764012 (575.51 it/sec) -training >> step=2157900, episode=360 reward=0.7439584 (541.37 it/sec) -training >> step=2158000, episode=360 reward=0.7710481 (543.66 it/sec) -training >> step=2158100, episode=360 reward=0.7540559 (496.60 it/sec) -training >> step=2158200, episode=360 reward=0.7574838 (512.16 it/sec) -training >> step=2158300, episode=360 reward=0.7510478 (537.49 it/sec) -training >> step=2158400, episode=360 reward=0.750028 (559.54 it/sec) -training >> step=2158500, episode=360 reward=0.7502866 (569.44 it/sec) -training >> step=2158600, episode=360 reward=0.7680392 (506.43 it/sec) -training >> step=2158700, episode=360 reward=0.7585711 (487.19 it/sec) -training >> step=2158800, episode=360 reward=0.7413927 (537.63 it/sec) -training >> step=2158900, episode=360 reward=0.7579316 (556.41 it/sec) -training >> step=2159000, episode=360 reward=0.7674584 (517.54 it/sec) -training >> step=2159100, episode=360 reward=0.738666 (524.05 it/sec) -training >> step=2159200, episode=360 reward=0.7714812 (478.23 it/sec) -training >> step=2159300, episode=361 reward=0.7564632 (132.45 it/sec) -training >> step=2159400, episode=361 reward=0.7558239 (504.78 it/sec) -training >> step=2159500, episode=361 reward=0.7689832 (528.24 it/sec) -training >> step=2159600, episode=361 reward=0.7620031 (538.96 it/sec) -training >> step=2159700, episode=361 reward=0.7696267 (573.36 it/sec) -training >> step=2159800, episode=361 reward=0.7463471 (575.63 it/sec) -training >> step=2159900, episode=361 reward=0.7632062 (492.35 it/sec) -training >> step=2160000, episode=361 reward=0.7463281 (388.39 it/sec) -training >> step=2160100, episode=361 reward=0.7647356 (582.00 it/sec) -training >> step=2160200, episode=361 reward=0.752263 (556.76 it/sec) -training >> step=2160300, episode=361 reward=0.7760522 (548.87 it/sec) -training >> step=2160400, episode=361 reward=0.7824414 (524.28 it/sec) -training >> step=2160500, episode=361 reward=0.7401518 (531.53 it/sec) -training >> step=2160600, episode=361 reward=0.7444638 (568.22 it/sec) -training >> step=2160700, episode=361 reward=0.7616046 (550.48 it/sec) -training >> step=2160800, episode=361 reward=0.7624665 (588.24 it/sec) -training >> step=2160900, episode=361 reward=0.7455267 (542.87 it/sec) -training >> step=2161000, episode=361 reward=0.7763714 (531.54 it/sec) -training >> step=2161100, episode=361 reward=0.7750498 (484.46 it/sec) -training >> step=2161200, episode=361 reward=0.7587366 (529.86 it/sec) -training >> step=2161300, episode=361 reward=0.7645082 (545.52 it/sec) -training >> step=2161400, episode=361 reward=0.7593984 (532.02 it/sec) -training >> step=2161500, episode=361 reward=0.7961328 (528.58 it/sec) -training >> step=2161600, episode=361 reward=0.7526584 (509.20 it/sec) -training >> step=2161700, episode=361 reward=0.7779338 (490.45 it/sec) -training >> step=2161800, episode=361 reward=0.755507 (496.64 it/sec) -training >> step=2161900, episode=361 reward=0.7768502 (527.85 it/sec) -training >> step=2162000, episode=361 reward=0.7721181 (475.70 it/sec) -training >> step=2162100, episode=361 reward=0.7641395 (479.55 it/sec) -training >> step=2162200, episode=361 reward=0.759275 (495.90 it/sec) -training >> step=2162300, episode=361 reward=0.770391 (516.19 it/sec) -training >> step=2162400, episode=361 reward=0.7834421 (471.80 it/sec) -training >> step=2162500, episode=361 reward=0.7641922 (478.15 it/sec) -training >> step=2162600, episode=361 reward=0.7770585 (456.97 it/sec) -training >> step=2162700, episode=361 reward=0.775543 (494.17 it/sec) -training >> step=2162800, episode=361 reward=0.7687799 (490.54 it/sec) -training >> step=2162900, episode=361 reward=0.7776634 (470.41 it/sec) -training >> step=2163000, episode=361 reward=0.7548307 (462.17 it/sec) -training >> step=2163100, episode=361 reward=0.7837567 (458.64 it/sec) -training >> step=2163200, episode=361 reward=0.7837498 (491.17 it/sec) -training >> step=2163300, episode=361 reward=0.7886724 (494.39 it/sec) -training >> step=2163400, episode=361 reward=0.7628944 (494.18 it/sec) -training >> step=2163500, episode=361 reward=0.7736639 (514.42 it/sec) -training >> step=2163600, episode=361 reward=0.7421249 (468.91 it/sec) -training >> step=2163700, episode=361 reward=0.748138 (519.57 it/sec) -training >> step=2163800, episode=361 reward=0.7915896 (535.96 it/sec) -training >> step=2163900, episode=361 reward=0.7426881 (513.51 it/sec) -training >> step=2164000, episode=361 reward=0.77855 (487.69 it/sec) -training >> step=2164100, episode=361 reward=0.7770529 (424.18 it/sec) -training >> step=2164200, episode=361 reward=0.7594969 (480.89 it/sec) -training >> step=2164300, episode=361 reward=0.7679079 (511.30 it/sec) -training >> step=2164400, episode=361 reward=0.7529065 (510.21 it/sec) -training >> step=2164500, episode=361 reward=0.7542218 (507.91 it/sec) -training >> step=2164600, episode=361 reward=0.7626128 (478.61 it/sec) -training >> step=2164700, episode=361 reward=0.751974 (522.31 it/sec) -training >> step=2164800, episode=361 reward=0.7682895 (480.54 it/sec) -training >> step=2164900, episode=361 reward=0.7714837 (450.79 it/sec) -training >> step=2165000, episode=361 reward=0.7490081 (421.75 it/sec) -training >> step=2165100, episode=361 reward=0.760241 (452.58 it/sec) -training >> step=2165200, episode=361 reward=0.7766907 (483.41 it/sec) -training >> step=2165300, episode=362 reward=0.7579394 (126.02 it/sec) -training >> step=2165400, episode=362 reward=0.7428766 (509.10 it/sec) -training >> step=2165500, episode=362 reward=0.7577762 (518.69 it/sec) -training >> step=2165600, episode=362 reward=0.7530077 (515.81 it/sec) -training >> step=2165700, episode=362 reward=0.7747023 (528.94 it/sec) -training >> step=2165800, episode=362 reward=0.7761769 (534.93 it/sec) -training >> step=2165900, episode=362 reward=0.7656429 (559.68 it/sec) -training >> step=2166000, episode=362 reward=0.7652516 (554.67 it/sec) -training >> step=2166100, episode=362 reward=0.7665939 (537.07 it/sec) -training >> step=2166200, episode=362 reward=0.7755632 (345.62 it/sec) -training >> step=2166300, episode=362 reward=0.7740644 (572.97 it/sec) -training >> step=2166400, episode=362 reward=0.7771105 (541.53 it/sec) -training >> step=2166500, episode=362 reward=0.7552847 (541.39 it/sec) -training >> step=2166600, episode=362 reward=0.7628245 (527.94 it/sec) -training >> step=2166700, episode=362 reward=0.7615296 (558.98 it/sec) -training >> step=2166800, episode=362 reward=0.7556414 (556.80 it/sec) -training >> step=2166900, episode=362 reward=0.7593662 (509.92 it/sec) -training >> step=2167000, episode=362 reward=0.7590246 (533.50 it/sec) -training >> step=2167100, episode=362 reward=0.764671 (539.36 it/sec) -training >> step=2167200, episode=362 reward=0.7646024 (535.52 it/sec) -training >> step=2167300, episode=362 reward=0.773112 (467.43 it/sec) -training >> step=2167400, episode=362 reward=0.7587802 (494.02 it/sec) -training >> step=2167500, episode=362 reward=0.7737979 (498.46 it/sec) -training >> step=2167600, episode=362 reward=0.7863943 (525.69 it/sec) -training >> step=2167700, episode=362 reward=0.765407 (523.61 it/sec) -training >> step=2167800, episode=362 reward=0.7457776 (489.12 it/sec) -training >> step=2167900, episode=362 reward=0.7604052 (512.38 it/sec) -training >> step=2168000, episode=362 reward=0.7453925 (479.46 it/sec) -training >> step=2168100, episode=362 reward=0.7664466 (532.77 it/sec) -training >> step=2168200, episode=362 reward=0.7590202 (521.86 it/sec) -training >> step=2168300, episode=362 reward=0.7669913 (553.24 it/sec) -training >> step=2168400, episode=362 reward=0.7525883 (512.02 it/sec) -training >> step=2168500, episode=362 reward=0.7579256 (472.36 it/sec) -training >> step=2168600, episode=362 reward=0.7677929 (528.78 it/sec) -training >> step=2168700, episode=362 reward=0.7730622 (540.53 it/sec) -training >> step=2168800, episode=362 reward=0.7817667 (497.52 it/sec) -training >> step=2168900, episode=362 reward=0.7571653 (495.69 it/sec) -training >> step=2169000, episode=362 reward=0.7677318 (499.02 it/sec) -training >> step=2169100, episode=362 reward=0.7716916 (531.89 it/sec) -training >> step=2169200, episode=362 reward=0.7638856 (552.40 it/sec) -training >> step=2169300, episode=362 reward=0.7462052 (526.36 it/sec) -training >> step=2169400, episode=362 reward=0.7755532 (537.68 it/sec) -training >> step=2169500, episode=362 reward=0.7932246 (490.82 it/sec) -training >> step=2169600, episode=362 reward=0.7750038 (504.91 it/sec) -training >> step=2169700, episode=362 reward=0.7810943 (497.20 it/sec) -training >> step=2169800, episode=362 reward=0.7586781 (515.72 it/sec) -training >> step=2169900, episode=362 reward=0.7504644 (520.04 it/sec) -training >> step=2170000, episode=362 reward=0.7775002 (492.00 it/sec) -training >> step=2170100, episode=362 reward=0.7419652 (480.63 it/sec) -training >> step=2170200, episode=362 reward=0.7635627 (523.17 it/sec) -training >> step=2170300, episode=362 reward=0.7642518 (505.53 it/sec) -training >> step=2170400, episode=362 reward=0.7381409 (489.86 it/sec) -training >> step=2170500, episode=362 reward=0.7493722 (537.21 it/sec) -training >> step=2170600, episode=362 reward=0.7646626 (515.71 it/sec) -training >> step=2170700, episode=362 reward=0.774551 (576.81 it/sec) -training >> step=2170800, episode=362 reward=0.7907188 (485.05 it/sec) -training >> step=2170900, episode=362 reward=0.7673209 (505.87 it/sec) -training >> step=2171000, episode=362 reward=0.7476836 (535.16 it/sec) -training >> step=2171100, episode=362 reward=0.773113 (533.65 it/sec) -training >> step=2171200, episode=362 reward=0.7836974 (504.03 it/sec) -training >> step=2171300, episode=363 reward=0.7585609 (190.51 it/sec) -training >> step=2171400, episode=363 reward=0.7616428 (527.76 it/sec) -training >> step=2171500, episode=363 reward=0.7468216 (519.70 it/sec) -training >> step=2171600, episode=363 reward=0.7699952 (526.40 it/sec) -training >> step=2171700, episode=363 reward=0.7639506 (534.99 it/sec) -training >> step=2171800, episode=363 reward=0.7722955 (530.10 it/sec) -training >> step=2171900, episode=363 reward=0.7608368 (544.43 it/sec) -training >> step=2172000, episode=363 reward=0.7620096 (523.34 it/sec) -training >> step=2172100, episode=363 reward=0.7496696 (526.29 it/sec) -training >> step=2172200, episode=363 reward=0.767482 (535.77 it/sec) -training >> step=2172300, episode=363 reward=0.777589 (372.05 it/sec) -training >> step=2172400, episode=363 reward=0.7646042 (524.60 it/sec) -training >> step=2172500, episode=363 reward=0.7563059 (502.75 it/sec) -training >> step=2172600, episode=363 reward=0.7511064 (551.31 it/sec) -training >> step=2172700, episode=363 reward=0.7641024 (552.99 it/sec) -training >> step=2172800, episode=363 reward=0.757724 (505.59 it/sec) -training >> step=2172900, episode=363 reward=0.7558281 (506.52 it/sec) -training >> step=2173000, episode=363 reward=0.7777861 (508.62 it/sec) -training >> step=2173100, episode=363 reward=0.7713331 (527.11 it/sec) -training >> step=2173200, episode=363 reward=0.7683231 (514.72 it/sec) -training >> step=2173300, episode=363 reward=0.7581618 (549.12 it/sec) -training >> step=2173400, episode=363 reward=0.7814101 (555.67 it/sec) -training >> step=2173500, episode=363 reward=0.740128 (544.61 it/sec) -training >> step=2173600, episode=363 reward=0.7737563 (531.40 it/sec) -training >> step=2173700, episode=363 reward=0.7711505 (497.32 it/sec) -training >> step=2173800, episode=363 reward=0.771493 (568.84 it/sec) -training >> step=2173900, episode=363 reward=0.7771295 (498.46 it/sec) -training >> step=2174000, episode=363 reward=0.7657406 (479.03 it/sec) -training >> step=2174100, episode=363 reward=0.7437955 (497.78 it/sec) -training >> step=2174200, episode=363 reward=0.7700399 (511.66 it/sec) -training >> step=2174300, episode=363 reward=0.7455357 (567.24 it/sec) -training >> step=2174400, episode=363 reward=0.7544842 (500.78 it/sec) -training >> step=2174500, episode=363 reward=0.7822227 (538.78 it/sec) -training >> step=2174600, episode=363 reward=0.7682548 (558.35 it/sec) -training >> step=2174700, episode=363 reward=0.7779688 (567.89 it/sec) -training >> step=2174800, episode=363 reward=0.757539 (542.34 it/sec) -training >> step=2174900, episode=363 reward=0.7412242 (533.81 it/sec) -training >> step=2175000, episode=363 reward=0.7911378 (530.33 it/sec) -training >> step=2175100, episode=363 reward=0.7606406 (541.99 it/sec) -training >> step=2175200, episode=363 reward=0.7569461 (531.56 it/sec) -training >> step=2175300, episode=363 reward=0.7519658 (547.49 it/sec) -training >> step=2175400, episode=363 reward=0.73819 (569.44 it/sec) -training >> step=2175500, episode=363 reward=0.7807602 (547.08 it/sec) -training >> step=2175600, episode=363 reward=0.7776756 (516.11 it/sec) -training >> step=2175700, episode=363 reward=0.7766219 (540.57 it/sec) -training >> step=2175800, episode=363 reward=0.7650221 (498.82 it/sec) -training >> step=2175900, episode=363 reward=0.7658881 (529.94 it/sec) -training >> step=2176000, episode=363 reward=0.7604543 (546.35 it/sec) -training >> step=2176100, episode=363 reward=0.7671531 (529.76 it/sec) -training >> step=2176200, episode=363 reward=0.7290335 (515.27 it/sec) -training >> step=2176300, episode=363 reward=0.7483323 (532.74 it/sec) -training >> step=2176400, episode=363 reward=0.7461205 (533.04 it/sec) -training >> step=2176500, episode=363 reward=0.7546787 (536.63 it/sec) -training >> step=2176600, episode=363 reward=0.7568107 (531.72 it/sec) -training >> step=2176700, episode=363 reward=0.7668898 (516.67 it/sec) -training >> step=2176800, episode=363 reward=0.771139 (520.40 it/sec) -training >> step=2176900, episode=363 reward=0.7673415 (520.70 it/sec) -training >> step=2177000, episode=363 reward=0.7722216 (540.78 it/sec) -training >> step=2177100, episode=363 reward=0.7535274 (537.79 it/sec) -training >> step=2177200, episode=363 reward=0.7477066 (531.67 it/sec) -training >> step=2177300, episode=364 reward=0.7598956 (189.97 it/sec) -training >> step=2177400, episode=364 reward=0.7547089 (511.71 it/sec) -training >> step=2177500, episode=364 reward=0.7441909 (556.27 it/sec) -training >> step=2177600, episode=364 reward=0.7733647 (526.46 it/sec) -training >> step=2177700, episode=364 reward=0.7447542 (525.06 it/sec) -training >> step=2177800, episode=364 reward=0.7609149 (551.58 it/sec) -training >> step=2177900, episode=364 reward=0.7739329 (540.35 it/sec) -training >> step=2178000, episode=364 reward=0.7459816 (522.67 it/sec) -training >> step=2178100, episode=364 reward=0.7688436 (553.75 it/sec) -training >> step=2178200, episode=364 reward=0.7688518 (544.25 it/sec) -training >> step=2178300, episode=364 reward=0.754604 (509.27 it/sec) -training >> step=2178400, episode=364 reward=0.7617692 (532.49 it/sec) -training >> step=2178500, episode=364 reward=0.7714804 (560.79 it/sec) -training >> step=2178600, episode=364 reward=0.7614802 (377.13 it/sec) -training >> step=2178700, episode=364 reward=0.7832293 (531.93 it/sec) -training >> step=2178800, episode=364 reward=0.7405861 (531.28 it/sec) -training >> step=2178900, episode=364 reward=0.795997 (562.95 it/sec) -training >> step=2179000, episode=364 reward=0.7634858 (506.47 it/sec) -training >> step=2179100, episode=364 reward=0.7748597 (452.20 it/sec) -training >> step=2179200, episode=364 reward=0.7628205 (525.52 it/sec) -training >> step=2179300, episode=364 reward=0.7702132 (561.44 it/sec) -training >> step=2179400, episode=364 reward=0.765267 (524.47 it/sec) -training >> step=2179500, episode=364 reward=0.7464526 (545.23 it/sec) -training >> step=2179600, episode=364 reward=0.7532896 (547.84 it/sec) -training >> step=2179700, episode=364 reward=0.7524827 (559.32 it/sec) -training >> step=2179800, episode=364 reward=0.7719848 (539.09 it/sec) -training >> step=2179900, episode=364 reward=0.7873624 (510.32 it/sec) -training >> step=2180000, episode=364 reward=0.7802168 (521.56 it/sec) -training >> step=2180100, episode=364 reward=0.7644224 (554.09 it/sec) -training >> step=2180200, episode=364 reward=0.7701825 (515.15 it/sec) -training >> step=2180300, episode=364 reward=0.7610139 (511.78 it/sec) -training >> step=2180400, episode=364 reward=0.7661579 (551.55 it/sec) -training >> step=2180500, episode=364 reward=0.7541071 (528.54 it/sec) -training >> step=2180600, episode=364 reward=0.7609601 (542.07 it/sec) -training >> step=2180700, episode=364 reward=0.7723214 (534.71 it/sec) -training >> step=2180800, episode=364 reward=0.7484478 (544.43 it/sec) -training >> step=2180900, episode=364 reward=0.7604976 (545.95 it/sec) -training >> step=2181000, episode=364 reward=0.7777915 (503.87 it/sec) -training >> step=2181100, episode=364 reward=0.7623388 (532.08 it/sec) -training >> step=2181200, episode=364 reward=0.7832772 (527.25 it/sec) -training >> step=2181300, episode=364 reward=0.7769021 (536.98 it/sec) -training >> step=2181400, episode=364 reward=0.7842768 (541.86 it/sec) -training >> step=2181500, episode=364 reward=0.7642525 (569.49 it/sec) -training >> step=2181600, episode=364 reward=0.7633171 (545.12 it/sec) -training >> step=2181700, episode=364 reward=0.7533268 (543.40 it/sec) -training >> step=2181800, episode=364 reward=0.7782825 (486.88 it/sec) -training >> step=2181900, episode=364 reward=0.7646768 (550.40 it/sec) -training >> step=2182000, episode=364 reward=0.759464 (538.04 it/sec) -training >> step=2182100, episode=364 reward=0.7801467 (495.67 it/sec) -training >> step=2182200, episode=364 reward=0.7797806 (494.75 it/sec) -training >> step=2182300, episode=364 reward=0.7667761 (537.89 it/sec) -training >> step=2182400, episode=364 reward=0.7701346 (571.92 it/sec) -training >> step=2182500, episode=364 reward=0.7550902 (550.49 it/sec) -training >> step=2182600, episode=364 reward=0.7476493 (531.99 it/sec) -training >> step=2182700, episode=364 reward=0.774339 (519.90 it/sec) -training >> step=2182800, episode=364 reward=0.7544448 (559.48 it/sec) -training >> step=2182900, episode=364 reward=0.7759734 (507.81 it/sec) -training >> step=2183000, episode=364 reward=0.7735014 (532.67 it/sec) -training >> step=2183100, episode=364 reward=0.7473854 (537.57 it/sec) -training >> step=2183200, episode=364 reward=0.7642416 (525.26 it/sec) -training >> step=2183300, episode=365 reward=0.7499932 (173.44 it/sec) -training >> step=2183400, episode=365 reward=0.7496044 (493.21 it/sec) -training >> step=2183500, episode=365 reward=0.7697259 (505.71 it/sec) -training >> step=2183600, episode=365 reward=0.7572303 (533.56 it/sec) -training >> step=2183700, episode=365 reward=0.774041 (556.67 it/sec) -training >> step=2183800, episode=365 reward=0.7644389 (541.12 it/sec) -training >> step=2183900, episode=365 reward=0.7708709 (531.17 it/sec) -training >> step=2184000, episode=365 reward=0.7578475 (544.56 it/sec) -training >> step=2184100, episode=365 reward=0.755718 (501.94 it/sec) -training >> step=2184200, episode=365 reward=0.7649055 (489.24 it/sec) -training >> step=2184300, episode=365 reward=0.7529823 (522.77 it/sec) -training >> step=2184400, episode=365 reward=0.7678186 (523.64 it/sec) -training >> step=2184500, episode=365 reward=0.7617533 (564.13 it/sec) -training >> step=2184600, episode=365 reward=0.7775524 (497.73 it/sec) -training >> step=2184700, episode=365 reward=0.7681212 (548.79 it/sec) -training >> step=2184800, episode=365 reward=0.7534291 (375.29 it/sec) -training >> step=2184900, episode=365 reward=0.7590845 (519.64 it/sec) -training >> step=2185000, episode=365 reward=0.7703312 (563.01 it/sec) -training >> step=2185100, episode=365 reward=0.763794 (515.33 it/sec) -training >> step=2185200, episode=365 reward=0.7609422 (559.58 it/sec) -training >> step=2185300, episode=365 reward=0.7669854 (528.53 it/sec) -training >> step=2185400, episode=365 reward=0.7602934 (519.98 it/sec) -training >> step=2185500, episode=365 reward=0.7605473 (551.77 it/sec) -training >> step=2185600, episode=365 reward=0.7737275 (552.12 it/sec) -training >> step=2185700, episode=365 reward=0.7615508 (518.11 it/sec) -training >> step=2185800, episode=365 reward=0.7635053 (519.76 it/sec) -training >> step=2185900, episode=365 reward=0.7929167 (539.67 it/sec) -training >> step=2186000, episode=365 reward=0.757108 (511.24 it/sec) -training >> step=2186100, episode=365 reward=0.7660912 (552.92 it/sec) -training >> step=2186200, episode=365 reward=0.748565 (529.62 it/sec) -training >> step=2186300, episode=365 reward=0.7446936 (519.52 it/sec) -training >> step=2186400, episode=365 reward=0.7660366 (535.46 it/sec) -training >> step=2186500, episode=365 reward=0.7713171 (527.00 it/sec) -training >> step=2186600, episode=365 reward=0.7718836 (494.16 it/sec) -training >> step=2186700, episode=365 reward=0.7583605 (535.63 it/sec) -training >> step=2186800, episode=365 reward=0.7786154 (539.56 it/sec) -training >> step=2186900, episode=365 reward=0.7713495 (533.66 it/sec) -training >> step=2187000, episode=365 reward=0.7859144 (555.42 it/sec) -training >> step=2187100, episode=365 reward=0.7562248 (532.30 it/sec) -training >> step=2187200, episode=365 reward=0.7830139 (546.54 it/sec) -training >> step=2187300, episode=365 reward=0.7746837 (513.96 it/sec) -training >> step=2187400, episode=365 reward=0.7703555 (532.63 it/sec) -training >> step=2187500, episode=365 reward=0.7639824 (558.91 it/sec) -training >> step=2187600, episode=365 reward=0.7753744 (520.63 it/sec) -training >> step=2187700, episode=365 reward=0.7677475 (520.23 it/sec) -training >> step=2187800, episode=365 reward=0.7735598 (562.14 it/sec) -training >> step=2187900, episode=365 reward=0.7628494 (507.92 it/sec) -training >> step=2188000, episode=365 reward=0.7654617 (522.54 it/sec) -training >> step=2188100, episode=365 reward=0.7783638 (458.67 it/sec) -training >> step=2188200, episode=365 reward=0.7695384 (514.00 it/sec) -training >> step=2188300, episode=365 reward=0.7562753 (509.73 it/sec) -training >> step=2188400, episode=365 reward=0.7564903 (507.93 it/sec) -training >> step=2188500, episode=365 reward=0.7500463 (524.82 it/sec) -training >> step=2188600, episode=365 reward=0.7626583 (552.65 it/sec) -training >> step=2188700, episode=365 reward=0.7457207 (547.82 it/sec) -training >> step=2188800, episode=365 reward=0.7671826 (504.16 it/sec) -training >> step=2188900, episode=365 reward=0.7381133 (513.88 it/sec) -training >> step=2189000, episode=365 reward=0.7993726 (545.58 it/sec) -training >> step=2189100, episode=365 reward=0.7140221 (536.23 it/sec) -training >> step=2189200, episode=365 reward=0.759967 (471.02 it/sec) -training >> step=2189300, episode=366 reward=0.7906153 (183.07 it/sec) -training >> step=2189400, episode=366 reward=0.7634466 (541.01 it/sec) -training >> step=2189500, episode=366 reward=0.7512943 (538.47 it/sec) -training >> step=2189600, episode=366 reward=0.7355971 (533.28 it/sec) -training >> step=2189700, episode=366 reward=0.7404916 (523.73 it/sec) -training >> step=2189800, episode=366 reward=0.7809123 (519.58 it/sec) -training >> step=2189900, episode=366 reward=0.7889428 (557.18 it/sec) -training >> step=2190000, episode=366 reward=0.749221 (546.65 it/sec) -training >> step=2190100, episode=366 reward=0.7727636 (541.75 it/sec) -training >> step=2190200, episode=366 reward=0.7456753 (531.26 it/sec) -training >> step=2190300, episode=366 reward=0.776548 (553.03 it/sec) -training >> step=2190400, episode=366 reward=0.7734335 (526.99 it/sec) -training >> step=2190500, episode=366 reward=0.7570733 (503.29 it/sec) -training >> step=2190600, episode=366 reward=0.7313034 (537.55 it/sec) -training >> step=2190700, episode=366 reward=0.7546918 (524.25 it/sec) -training >> step=2190800, episode=366 reward=0.7623549 (543.06 it/sec) -training >> step=2190900, episode=366 reward=0.7584614 (529.03 it/sec) -training >> step=2191000, episode=366 reward=0.7808828 (530.46 it/sec) -training >> step=2191100, episode=366 reward=0.7678767 (375.05 it/sec) -training >> step=2191200, episode=366 reward=0.7661687 (480.57 it/sec) -training >> step=2191300, episode=366 reward=0.761857 (523.32 it/sec) -training >> step=2191400, episode=366 reward=0.7561392 (554.40 it/sec) -training >> step=2191500, episode=366 reward=0.7315538 (553.93 it/sec) -training >> step=2191600, episode=366 reward=0.7527779 (529.54 it/sec) -training >> step=2191700, episode=366 reward=0.7579545 (545.77 it/sec) -training >> step=2191800, episode=366 reward=0.7912784 (548.26 it/sec) -training >> step=2191900, episode=366 reward=0.7727727 (573.10 it/sec) -training >> step=2192000, episode=366 reward=0.7552547 (535.89 it/sec) -training >> step=2192100, episode=366 reward=0.7810118 (530.99 it/sec) -training >> step=2192200, episode=366 reward=0.7697061 (501.88 it/sec) -training >> step=2192300, episode=366 reward=0.7774249 (573.77 it/sec) -training >> step=2192400, episode=366 reward=0.7859181 (507.71 it/sec) -training >> step=2192500, episode=366 reward=0.7520171 (540.82 it/sec) -training >> step=2192600, episode=366 reward=0.740467 (540.75 it/sec) -training >> step=2192700, episode=366 reward=0.7693134 (265.50 it/sec) -training >> step=2192800, episode=366 reward=0.7624676 (535.45 it/sec) -training >> step=2192900, episode=366 reward=0.7573842 (531.82 it/sec) -training >> step=2193000, episode=366 reward=0.7648511 (516.13 it/sec) -training >> step=2193100, episode=366 reward=0.7509377 (414.86 it/sec) -training >> step=2193200, episode=366 reward=0.7700118 (440.56 it/sec) -training >> step=2193300, episode=366 reward=0.7997876 (464.66 it/sec) -training >> step=2193400, episode=366 reward=0.7660593 (515.50 it/sec) -training >> step=2193500, episode=366 reward=0.7690099 (465.88 it/sec) -training >> step=2193600, episode=366 reward=0.7858326 (451.47 it/sec) -training >> step=2193700, episode=366 reward=0.7834619 (513.51 it/sec) -training >> step=2193800, episode=366 reward=0.7816375 (541.55 it/sec) -training >> step=2193900, episode=366 reward=0.7732084 (533.84 it/sec) -training >> step=2194000, episode=366 reward=0.7890448 (495.72 it/sec) -training >> step=2194100, episode=366 reward=0.7493339 (497.80 it/sec) -training >> step=2194200, episode=366 reward=0.767217 (522.98 it/sec) -training >> step=2194300, episode=366 reward=0.7630882 (485.89 it/sec) -training >> step=2194400, episode=366 reward=0.7716104 (495.35 it/sec) -training >> step=2194500, episode=366 reward=0.7769945 (527.52 it/sec) -training >> step=2194600, episode=366 reward=0.7621797 (500.92 it/sec) -training >> step=2194700, episode=366 reward=0.7555496 (563.63 it/sec) -training >> step=2194800, episode=366 reward=0.7697319 (554.59 it/sec) -training >> step=2194900, episode=366 reward=0.752413 (514.94 it/sec) -training >> step=2195000, episode=366 reward=0.7493731 (531.37 it/sec) -training >> step=2195100, episode=366 reward=0.772665 (470.10 it/sec) -training >> step=2195200, episode=366 reward=0.7653833 (444.70 it/sec) -training >> step=2195300, episode=367 reward=0.7720487 (242.52 it/sec) -training >> step=2195400, episode=367 reward=0.7646196 (408.20 it/sec) -training >> step=2195500, episode=367 reward=0.7585777 (223.10 it/sec) -training >> step=2195600, episode=367 reward=0.7571194 (206.96 it/sec) -training >> step=2195700, episode=367 reward=0.7462146 (194.95 it/sec) -training >> step=2195800, episode=367 reward=0.7560446 (224.36 it/sec) -training >> step=2195900, episode=367 reward=0.7804676 (194.58 it/sec) -training >> step=2196000, episode=367 reward=0.7662023 (209.14 it/sec) -training >> step=2196100, episode=367 reward=0.7500774 (217.23 it/sec) -training >> step=2196200, episode=367 reward=0.768174 (224.89 it/sec) -training >> step=2196300, episode=367 reward=0.7397162 (235.34 it/sec) -training >> step=2196400, episode=367 reward=0.7562327 (232.76 it/sec) -training >> step=2196500, episode=367 reward=0.7680909 (228.35 it/sec) -training >> step=2196600, episode=367 reward=0.7657318 (215.25 it/sec) -training >> step=2196700, episode=367 reward=0.7579954 (216.67 it/sec) -training >> step=2196800, episode=367 reward=0.7530594 (234.33 it/sec) -training >> step=2196900, episode=367 reward=0.7522935 (225.44 it/sec) -training >> step=2197000, episode=367 reward=0.7811348 (232.02 it/sec) -training >> step=2197100, episode=367 reward=0.753186 (214.78 it/sec) -training >> step=2197200, episode=367 reward=0.7670387 (211.54 it/sec) -training >> step=2197300, episode=367 reward=0.7714476 (217.19 it/sec) -training >> step=2197400, episode=367 reward=0.7648521 (185.93 it/sec) -training >> step=2197500, episode=367 reward=0.769696 (217.77 it/sec) -training >> step=2197600, episode=367 reward=0.7756994 (235.43 it/sec) -training >> step=2197700, episode=367 reward=0.7646883 (203.61 it/sec) -training >> step=2197800, episode=367 reward=0.7659973 (217.20 it/sec) -training >> step=2197900, episode=367 reward=0.7738912 (234.54 it/sec) -training >> step=2198000, episode=367 reward=0.7723069 (212.44 it/sec) -training >> step=2198100, episode=367 reward=0.7431533 (236.03 it/sec) -training >> step=2198200, episode=367 reward=0.7565485 (231.34 it/sec) -training >> step=2198300, episode=367 reward=0.7551494 (253.92 it/sec) -training >> step=2198400, episode=367 reward=0.7357669 (221.17 it/sec) -training >> step=2198500, episode=367 reward=0.7703258 (227.70 it/sec) -training >> step=2198600, episode=367 reward=0.751942 (209.32 it/sec) -training >> step=2198700, episode=367 reward=0.7829607 (226.61 it/sec) -training >> step=2198800, episode=367 reward=0.7618756 (222.08 it/sec) -training >> step=2198900, episode=367 reward=0.7792325 (227.18 it/sec) -training >> step=2199000, episode=367 reward=0.7641674 (221.25 it/sec) -training >> step=2199100, episode=367 reward=0.7821222 (218.50 it/sec) -training >> step=2199200, episode=367 reward=0.7562423 (198.76 it/sec) -training >> step=2199300, episode=367 reward=0.7453648 (226.39 it/sec) -training >> step=2199400, episode=367 reward=0.7637824 (228.12 it/sec) -training >> step=2199500, episode=367 reward=0.7780868 (229.74 it/sec) -training >> step=2199600, episode=367 reward=0.7562847 (230.25 it/sec) -training >> step=2199700, episode=367 reward=0.7774868 (217.00 it/sec) -training >> step=2199800, episode=367 reward=0.788555 (206.25 it/sec) -training >> step=2199900, episode=367 reward=0.7555056 (215.32 it/sec) -training >> step=2200000, episode=367 reward=0.772404 (210.31 it/sec) -training >> step=2200100, episode=367 reward=0.7578516 (232.56 it/sec) -training >> step=2200200, episode=367 reward=0.7781963 (212.35 it/sec) -training >> step=2200300, episode=367 reward=0.7581699 (196.98 it/sec) -training >> step=2200400, episode=367 reward=0.7636243 (224.62 it/sec) -training >> step=2200500, episode=367 reward=0.7453534 (213.29 it/sec) -training >> step=2200600, episode=367 reward=0.7844418 (219.21 it/sec) -training >> step=2200700, episode=367 reward=0.7612028 (205.64 it/sec) -training >> step=2200800, episode=367 reward=0.7537475 (224.62 it/sec) -training >> step=2200900, episode=367 reward=0.7658606 (242.66 it/sec) -training >> step=2201000, episode=367 reward=0.7483041 (209.37 it/sec) -training >> step=2201100, episode=367 reward=0.7526026 (234.42 it/sec) -training >> step=2201200, episode=367 reward=0.7525551 (232.45 it/sec) -training >> step=2201300, episode=368 reward=0.7679687 (8.12 it/sec) -training >> step=2201400, episode=368 reward=0.776025 (209.14 it/sec) -training >> step=2201500, episode=368 reward=0.756944 (233.92 it/sec) -training >> step=2201600, episode=368 reward=0.7588788 (213.12 it/sec) -training >> step=2201700, episode=368 reward=0.7699283 (214.24 it/sec) -training >> step=2201800, episode=368 reward=0.7734412 (209.47 it/sec) -training >> step=2201900, episode=368 reward=0.7713808 (218.77 it/sec) -training >> step=2202000, episode=368 reward=0.76294 (235.13 it/sec) -training >> step=2202100, episode=368 reward=0.7749357 (226.11 it/sec) -training >> step=2202200, episode=368 reward=0.7732958 (228.62 it/sec) -training >> step=2202300, episode=368 reward=0.7630918 (217.54 it/sec) -training >> step=2202400, episode=368 reward=0.75932 (236.12 it/sec) -training >> step=2202500, episode=368 reward=0.7580191 (222.13 it/sec) -training >> step=2202600, episode=368 reward=0.7487503 (227.79 it/sec) -training >> step=2202700, episode=368 reward=0.7655898 (221.65 it/sec) -training >> step=2202800, episode=368 reward=0.7646456 (219.13 it/sec) -training >> step=2202900, episode=368 reward=0.7544838 (224.44 it/sec) -training >> step=2203000, episode=368 reward=0.7785847 (214.16 it/sec) -training >> step=2203100, episode=368 reward=0.7799087 (213.01 it/sec) -training >> step=2203200, episode=368 reward=0.7515471 (213.89 it/sec) -training >> step=2203300, episode=368 reward=0.759261 (234.45 it/sec) -training >> step=2203400, episode=368 reward=0.7685298 (231.25 it/sec) -training >> step=2203500, episode=368 reward=0.7753549 (218.81 it/sec) -training >> step=2203600, episode=368 reward=0.7722116 (237.84 it/sec) -training >> step=2203700, episode=368 reward=0.7721707 (221.28 it/sec) -training >> step=2203800, episode=368 reward=0.7786944 (227.49 it/sec) -training >> step=2203900, episode=368 reward=0.7627577 (168.07 it/sec) -training >> step=2204000, episode=368 reward=0.7540902 (229.36 it/sec) -training >> step=2204100, episode=368 reward=0.7826968 (235.48 it/sec) -training >> step=2204200, episode=368 reward=0.7453408 (204.50 it/sec) -training >> step=2204300, episode=368 reward=0.7737495 (226.47 it/sec) -training >> step=2204400, episode=368 reward=0.7744032 (220.76 it/sec) -training >> step=2204500, episode=368 reward=0.7555308 (223.45 it/sec) -training >> step=2204600, episode=368 reward=0.7558321 (226.12 it/sec) -training >> step=2204700, episode=368 reward=0.7781064 (217.63 it/sec) -training >> step=2204800, episode=368 reward=0.7506367 (227.55 it/sec) -training >> step=2204900, episode=368 reward=0.7560737 (234.47 it/sec) -training >> step=2205000, episode=368 reward=0.7643808 (229.32 it/sec) -training >> step=2205100, episode=368 reward=0.7628936 (267.71 it/sec) -training >> step=2205200, episode=368 reward=0.7377583 (453.26 it/sec) -training >> step=2205300, episode=368 reward=0.7536424 (466.52 it/sec) -training >> step=2205400, episode=368 reward=0.7532006 (522.53 it/sec) -training >> step=2205500, episode=368 reward=0.7609095 (554.56 it/sec) -training >> step=2205600, episode=368 reward=0.7501984 (539.86 it/sec) -training >> step=2205700, episode=368 reward=0.7585124 (557.25 it/sec) -training >> step=2205800, episode=368 reward=0.7536925 (569.26 it/sec) -training >> step=2205900, episode=368 reward=0.7583331 (591.92 it/sec) -training >> step=2206000, episode=368 reward=0.7681164 (619.76 it/sec) -training >> step=2206100, episode=368 reward=0.7546405 (575.90 it/sec) -training >> step=2206200, episode=368 reward=0.7550479 (587.59 it/sec) -training >> step=2206300, episode=368 reward=0.7421238 (564.78 it/sec) -training >> step=2206400, episode=368 reward=0.7556115 (547.89 it/sec) -training >> step=2206500, episode=368 reward=0.7611236 (586.70 it/sec) -training >> step=2206600, episode=368 reward=0.7659056 (601.71 it/sec) -training >> step=2206700, episode=368 reward=0.7480162 (565.26 it/sec) -training >> step=2206800, episode=368 reward=0.7562511 (593.86 it/sec) -training >> step=2206900, episode=368 reward=0.7635048 (573.91 it/sec) -training >> step=2207000, episode=368 reward=0.7704599 (571.04 it/sec) -training >> step=2207100, episode=368 reward=0.7629931 (601.05 it/sec) -training >> step=2207200, episode=368 reward=0.7610057 (587.06 it/sec) -training >> step=2207300, episode=369 reward=0.7650524 (52.24 it/sec) -training >> step=2207400, episode=369 reward=0.7581857 (573.63 it/sec) -training >> step=2207500, episode=369 reward=0.723761 (543.68 it/sec) -training >> step=2207600, episode=369 reward=0.7661574 (541.02 it/sec) -training >> step=2207700, episode=369 reward=0.7629813 (568.52 it/sec) -training >> step=2207800, episode=369 reward=0.7778587 (569.87 it/sec) -training >> step=2207900, episode=369 reward=0.7663636 (602.49 it/sec) -training >> step=2208000, episode=369 reward=0.7745523 (592.35 it/sec) -training >> step=2208100, episode=369 reward=0.778727 (545.35 it/sec) -training >> step=2208200, episode=369 reward=0.7636388 (614.82 it/sec) -training >> step=2208300, episode=369 reward=0.7611825 (583.32 it/sec) -training >> step=2208400, episode=369 reward=0.756946 (588.71 it/sec) -training >> step=2208500, episode=369 reward=0.7546356 (577.63 it/sec) -training >> step=2208600, episode=369 reward=0.7706866 (604.95 it/sec) -training >> step=2208700, episode=369 reward=0.7536131 (573.25 it/sec) -training >> step=2208800, episode=369 reward=0.7554271 (565.08 it/sec) -training >> step=2208900, episode=369 reward=0.7565094 (588.31 it/sec) -training >> step=2209000, episode=369 reward=0.7753754 (596.68 it/sec) -training >> step=2209100, episode=369 reward=0.7627247 (584.20 it/sec) -training >> step=2209200, episode=369 reward=0.7518567 (563.23 it/sec) -training >> step=2209300, episode=369 reward=0.7745492 (587.82 it/sec) -training >> step=2209400, episode=369 reward=0.7705251 (594.67 it/sec) -training >> step=2209500, episode=369 reward=0.7517934 (596.21 it/sec) -training >> step=2209600, episode=369 reward=0.7815216 (558.50 it/sec) -training >> step=2209700, episode=369 reward=0.7614263 (592.26 it/sec) -training >> step=2209800, episode=369 reward=0.7611673 (594.11 it/sec) -training >> step=2209900, episode=369 reward=0.7622347 (568.73 it/sec) -training >> step=2210000, episode=369 reward=0.763631 (571.42 it/sec) -training >> step=2210100, episode=369 reward=0.7709997 (405.63 it/sec) -training >> step=2210200, episode=369 reward=0.7661465 (591.99 it/sec) -training >> step=2210300, episode=369 reward=0.7651737 (585.85 it/sec) -training >> step=2210400, episode=369 reward=0.7738063 (552.23 it/sec) -training >> step=2210500, episode=369 reward=0.7821212 (587.25 it/sec) -training >> step=2210600, episode=369 reward=0.7683833 (571.20 it/sec) -training >> step=2210700, episode=369 reward=0.730991 (572.72 it/sec) -training >> step=2210800, episode=369 reward=0.7537835 (567.41 it/sec) -training >> step=2210900, episode=369 reward=0.7766986 (538.31 it/sec) -training >> step=2211000, episode=369 reward=0.7675949 (594.44 it/sec) -training >> step=2211100, episode=369 reward=0.7681534 (512.02 it/sec) -training >> step=2211200, episode=369 reward=0.7697777 (557.92 it/sec) -training >> step=2211300, episode=369 reward=0.7656578 (565.04 it/sec) -training >> step=2211400, episode=369 reward=0.7290081 (599.27 it/sec) -training >> step=2211500, episode=369 reward=0.7617234 (558.95 it/sec) -training >> step=2211600, episode=369 reward=0.7651069 (583.98 it/sec) -training >> step=2211700, episode=369 reward=0.7706644 (572.38 it/sec) -training >> step=2211800, episode=369 reward=0.7535547 (584.95 it/sec) -training >> step=2211900, episode=369 reward=0.7609064 (583.02 it/sec) -training >> step=2212000, episode=369 reward=0.7533118 (588.78 it/sec) -training >> step=2212100, episode=369 reward=0.7652041 (593.64 it/sec) -training >> step=2212200, episode=369 reward=0.7482497 (591.67 it/sec) -training >> step=2212300, episode=369 reward=0.7767923 (548.21 it/sec) -training >> step=2212400, episode=369 reward=0.7743849 (592.78 it/sec) -training >> step=2212500, episode=369 reward=0.7704461 (590.95 it/sec) -training >> step=2212600, episode=369 reward=0.7499791 (595.31 it/sec) -training >> step=2212700, episode=369 reward=0.7439863 (567.67 it/sec) -training >> step=2212800, episode=369 reward=0.7578679 (562.99 it/sec) -training >> step=2212900, episode=369 reward=0.750787 (574.04 it/sec) -training >> step=2213000, episode=369 reward=0.7513857 (552.62 it/sec) -training >> step=2213100, episode=369 reward=0.7598408 (586.79 it/sec) -training >> step=2213200, episode=369 reward=0.7548541 (583.62 it/sec) -training >> step=2213300, episode=370 reward=0.7455091 (171.66 it/sec) -training >> step=2213400, episode=370 reward=0.7569391 (543.73 it/sec) -training >> step=2213500, episode=370 reward=0.748472 (545.24 it/sec) -training >> step=2213600, episode=370 reward=0.7844708 (540.13 it/sec) -training >> step=2213700, episode=370 reward=0.7561591 (597.62 it/sec) -training >> step=2213800, episode=370 reward=0.7688978 (568.04 it/sec) -training >> step=2213900, episode=370 reward=0.7764801 (575.60 it/sec) -training >> step=2214000, episode=370 reward=0.7545774 (576.20 it/sec) -training >> step=2214100, episode=370 reward=0.7525114 (561.40 it/sec) -training >> step=2214200, episode=370 reward=0.7708511 (542.20 it/sec) -training >> step=2214300, episode=370 reward=0.769331 (571.80 it/sec) -training >> step=2214400, episode=370 reward=0.7616387 (532.08 it/sec) -training >> step=2214500, episode=370 reward=0.7310709 (551.95 it/sec) -training >> step=2214600, episode=370 reward=0.7596933 (554.32 it/sec) -training >> step=2214700, episode=370 reward=0.7778837 (556.18 it/sec) -training >> step=2214800, episode=370 reward=0.7690797 (554.55 it/sec) -training >> step=2214900, episode=370 reward=0.762629 (530.82 it/sec) -training >> step=2215000, episode=370 reward=0.7733632 (590.30 it/sec) -training >> step=2215100, episode=370 reward=0.7470337 (572.66 it/sec) -training >> step=2215200, episode=370 reward=0.7879921 (624.33 it/sec) -training >> step=2215300, episode=370 reward=0.7849803 (571.39 it/sec) -training >> step=2215400, episode=370 reward=0.7681627 (561.46 it/sec) -training >> step=2215500, episode=370 reward=0.7647384 (560.49 it/sec) -training >> step=2215600, episode=370 reward=0.7904913 (573.75 it/sec) -training >> step=2215700, episode=370 reward=0.7475656 (582.98 it/sec) -training >> step=2215800, episode=370 reward=0.7527837 (599.29 it/sec) -training >> step=2215900, episode=370 reward=0.7637919 (576.29 it/sec) -training >> step=2216000, episode=370 reward=0.7952342 (547.17 it/sec) -training >> step=2216100, episode=370 reward=0.7606213 (559.44 it/sec) -training >> step=2216200, episode=370 reward=0.7707881 (506.16 it/sec) -training >> step=2216300, episode=370 reward=0.7572882 (519.85 it/sec) -training >> step=2216400, episode=370 reward=0.7791705 (422.70 it/sec) -training >> step=2216500, episode=370 reward=0.7667711 (504.27 it/sec) -training >> step=2216600, episode=370 reward=0.7672883 (545.71 it/sec) -training >> step=2216700, episode=370 reward=0.8004715 (579.61 it/sec) -training >> step=2216800, episode=370 reward=0.7909252 (584.65 it/sec) -training >> step=2216900, episode=370 reward=0.7525657 (585.50 it/sec) -training >> step=2217000, episode=370 reward=0.7730674 (522.36 it/sec) -training >> step=2217100, episode=370 reward=0.7535169 (602.24 it/sec) -training >> step=2217200, episode=370 reward=0.749684 (569.53 it/sec) -training >> step=2217300, episode=370 reward=0.7781622 (580.90 it/sec) -training >> step=2217400, episode=370 reward=0.7547028 (617.08 it/sec) -training >> step=2217500, episode=370 reward=0.7470742 (585.00 it/sec) -training >> step=2217600, episode=370 reward=0.742888 (521.08 it/sec) -training >> step=2217700, episode=370 reward=0.7755239 (560.57 it/sec) -training >> step=2217800, episode=370 reward=0.7602538 (546.58 it/sec) -training >> step=2217900, episode=370 reward=0.7556376 (547.82 it/sec) -training >> step=2218000, episode=370 reward=0.7458109 (540.38 it/sec) -training >> step=2218100, episode=370 reward=0.7548103 (559.45 it/sec) -training >> step=2218200, episode=370 reward=0.7518616 (545.44 it/sec) -training >> step=2218300, episode=370 reward=0.7453523 (518.95 it/sec) -training >> step=2218400, episode=370 reward=0.7463201 (526.40 it/sec) -training >> step=2218500, episode=370 reward=0.7641758 (537.69 it/sec) -training >> step=2218600, episode=370 reward=0.77341 (545.32 it/sec) -training >> step=2218700, episode=370 reward=0.771731 (502.43 it/sec) -training >> step=2218800, episode=370 reward=0.7767651 (542.16 it/sec) -training >> step=2218900, episode=370 reward=0.7766122 (529.03 it/sec) -training >> step=2219000, episode=370 reward=0.7585478 (504.15 it/sec) -training >> step=2219100, episode=370 reward=0.7670498 (503.90 it/sec) -training >> step=2219200, episode=370 reward=0.7738994 (551.88 it/sec) -training >> step=2219300, episode=371 reward=0.7615374 (128.44 it/sec) -training >> step=2219400, episode=371 reward=0.7595138 (518.61 it/sec) -training >> step=2219500, episode=371 reward=0.7543519 (509.00 it/sec) -training >> step=2219600, episode=371 reward=0.758493 (555.31 it/sec) -training >> step=2219700, episode=371 reward=0.7757236 (484.88 it/sec) -training >> step=2219800, episode=371 reward=0.766697 (505.55 it/sec) -training >> step=2219900, episode=371 reward=0.7693748 (500.49 it/sec) -training >> step=2220000, episode=371 reward=0.7739695 (571.86 it/sec) -training >> step=2220100, episode=371 reward=0.7783332 (505.61 it/sec) -training >> step=2220200, episode=371 reward=0.7706917 (531.90 it/sec) -training >> step=2220300, episode=371 reward=0.7811696 (545.27 it/sec) -training >> step=2220400, episode=371 reward=0.7740954 (532.62 it/sec) -training >> step=2220500, episode=371 reward=0.7625915 (541.90 it/sec) -training >> step=2220600, episode=371 reward=0.7433677 (535.23 it/sec) -training >> step=2220700, episode=371 reward=0.7841569 (535.99 it/sec) -training >> step=2220800, episode=371 reward=0.7578945 (562.97 it/sec) -training >> step=2220900, episode=371 reward=0.7601476 (505.91 it/sec) -training >> step=2221000, episode=371 reward=0.7688172 (615.85 it/sec) -training >> step=2221100, episode=371 reward=0.7517229 (520.86 it/sec) -training >> step=2221200, episode=371 reward=0.7570299 (528.56 it/sec) -training >> step=2221300, episode=371 reward=0.7908725 (540.02 it/sec) -training >> step=2221400, episode=371 reward=0.7718548 (571.85 it/sec) -training >> step=2221500, episode=371 reward=0.7653125 (495.64 it/sec) -training >> step=2221600, episode=371 reward=0.7482288 (498.91 it/sec) -training >> step=2221700, episode=371 reward=0.7471799 (520.31 it/sec) -training >> step=2221800, episode=371 reward=0.7502233 (553.70 it/sec) -training >> step=2221900, episode=371 reward=0.7701032 (546.01 it/sec) -training >> step=2222000, episode=371 reward=0.7724766 (573.22 it/sec) -training >> step=2222100, episode=371 reward=0.7579948 (534.59 it/sec) -training >> step=2222200, episode=371 reward=0.7701185 (534.74 it/sec) -training >> step=2222300, episode=371 reward=0.7625213 (558.27 it/sec) -training >> step=2222400, episode=371 reward=0.7621233 (572.86 it/sec) -training >> step=2222500, episode=371 reward=0.7483649 (411.43 it/sec) -training >> step=2222600, episode=371 reward=0.7486887 (473.50 it/sec) -training >> step=2222700, episode=371 reward=0.7691302 (469.46 it/sec) -training >> step=2222800, episode=371 reward=0.7693496 (549.01 it/sec) -training >> step=2222900, episode=371 reward=0.7669442 (544.60 it/sec) -training >> step=2223000, episode=371 reward=0.7410333 (536.03 it/sec) -training >> step=2223100, episode=371 reward=0.7653401 (521.92 it/sec) -training >> step=2223200, episode=371 reward=0.7634421 (550.56 it/sec) -training >> step=2223300, episode=371 reward=0.7347587 (554.20 it/sec) -training >> step=2223400, episode=371 reward=0.7642287 (557.97 it/sec) -training >> step=2223500, episode=371 reward=0.7657887 (561.94 it/sec) -training >> step=2223600, episode=371 reward=0.7578955 (528.72 it/sec) -training >> step=2223700, episode=371 reward=0.7630378 (582.63 it/sec) -training >> step=2223800, episode=371 reward=0.7826233 (525.23 it/sec) -training >> step=2223900, episode=371 reward=0.7598301 (536.61 it/sec) -training >> step=2224000, episode=371 reward=0.7607259 (555.70 it/sec) -training >> step=2224100, episode=371 reward=0.7914256 (576.00 it/sec) -training >> step=2224200, episode=371 reward=0.7667617 (551.49 it/sec) -training >> step=2224300, episode=371 reward=0.7780061 (568.82 it/sec) -training >> step=2224400, episode=371 reward=0.7561938 (583.32 it/sec) -training >> step=2224500, episode=371 reward=0.7497115 (541.26 it/sec) -training >> step=2224600, episode=371 reward=0.7833555 (577.52 it/sec) -training >> step=2224700, episode=371 reward=0.7299887 (550.68 it/sec) -training >> step=2224800, episode=371 reward=0.7785962 (504.24 it/sec) -training >> step=2224900, episode=371 reward=0.7540483 (563.22 it/sec) -training >> step=2225000, episode=371 reward=0.7553073 (538.10 it/sec) -training >> step=2225100, episode=371 reward=0.7802531 (585.38 it/sec) -training >> step=2225200, episode=371 reward=0.7791833 (558.33 it/sec) -training >> step=2225300, episode=372 reward=0.7778403 (67.82 it/sec) -training >> step=2225400, episode=372 reward=0.7533612 (536.55 it/sec) -training >> step=2225500, episode=372 reward=0.7266972 (515.53 it/sec) -training >> step=2225600, episode=372 reward=0.7490943 (549.59 it/sec) -training >> step=2225700, episode=372 reward=0.7821098 (551.37 it/sec) -training >> step=2225800, episode=372 reward=0.7798882 (549.08 it/sec) -training >> step=2225900, episode=372 reward=0.7606883 (553.54 it/sec) -training >> step=2226000, episode=372 reward=0.7619836 (567.25 it/sec) -training >> step=2226100, episode=372 reward=0.7657234 (567.53 it/sec) -training >> step=2226200, episode=372 reward=0.766461 (570.45 it/sec) -training >> step=2226300, episode=372 reward=0.7554762 (581.96 it/sec) -training >> step=2226400, episode=372 reward=0.7747895 (548.61 it/sec) -training >> step=2226500, episode=372 reward=0.7313031 (554.59 it/sec) -training >> step=2226600, episode=372 reward=0.782882 (554.60 it/sec) -training >> step=2226700, episode=372 reward=0.7557691 (574.43 it/sec) -training >> step=2226800, episode=372 reward=0.7767491 (528.57 it/sec) -training >> step=2226900, episode=372 reward=0.7727709 (560.74 it/sec) -training >> step=2227000, episode=372 reward=0.7573275 (609.55 it/sec) -training >> step=2227100, episode=372 reward=0.7815652 (532.23 it/sec) -training >> step=2227200, episode=372 reward=0.7693186 (561.14 it/sec) -training >> step=2227300, episode=372 reward=0.7774137 (564.61 it/sec) -training >> step=2227400, episode=372 reward=0.7585226 (583.47 it/sec) -training >> step=2227500, episode=372 reward=0.7461089 (536.76 it/sec) -training >> step=2227600, episode=372 reward=0.764471 (574.74 it/sec) -training >> step=2227700, episode=372 reward=0.7749305 (568.62 it/sec) -training >> step=2227800, episode=372 reward=0.7852947 (554.52 it/sec) -training >> step=2227900, episode=372 reward=0.7569495 (552.31 it/sec) -training >> step=2228000, episode=372 reward=0.7683706 (564.45 it/sec) -training >> step=2228100, episode=372 reward=0.7445403 (584.49 it/sec) -training >> step=2228200, episode=372 reward=0.7653477 (583.04 it/sec) -training >> step=2228300, episode=372 reward=0.7713958 (554.83 it/sec) -training >> step=2228400, episode=372 reward=0.7613031 (564.43 it/sec) -training >> step=2228500, episode=372 reward=0.7615432 (565.70 it/sec) -training >> step=2228600, episode=372 reward=0.7757286 (560.16 it/sec) -training >> step=2228700, episode=372 reward=0.7727692 (520.57 it/sec) -training >> step=2228800, episode=372 reward=0.7554829 (417.84 it/sec) -training >> step=2228900, episode=372 reward=0.7606159 (541.52 it/sec) -training >> step=2229000, episode=372 reward=0.7796437 (560.77 it/sec) -training >> step=2229100, episode=372 reward=0.7386629 (556.75 it/sec) -training >> step=2229200, episode=372 reward=0.7524519 (571.52 it/sec) -training >> step=2229300, episode=372 reward=0.7619061 (556.59 it/sec) -training >> step=2229400, episode=372 reward=0.7632411 (561.19 it/sec) -training >> step=2229500, episode=372 reward=0.7737747 (581.94 it/sec) -training >> step=2229600, episode=372 reward=0.7578756 (584.92 it/sec) -training >> step=2229700, episode=372 reward=0.7604727 (549.60 it/sec) -training >> step=2229800, episode=372 reward=0.7818379 (567.73 it/sec) -training >> step=2229900, episode=372 reward=0.764028 (556.59 it/sec) -training >> step=2230000, episode=372 reward=0.7733468 (552.18 it/sec) -training >> step=2230100, episode=372 reward=0.7717742 (573.60 it/sec) -training >> step=2230200, episode=372 reward=0.7485027 (553.72 it/sec) -training >> step=2230300, episode=372 reward=0.7808112 (573.11 it/sec) -training >> step=2230400, episode=372 reward=0.7709613 (596.97 it/sec) -training >> step=2230500, episode=372 reward=0.7590232 (563.19 it/sec) -training >> step=2230600, episode=372 reward=0.7733397 (556.50 it/sec) -training >> step=2230700, episode=372 reward=0.750602 (587.17 it/sec) -training >> step=2230800, episode=372 reward=0.7585891 (507.51 it/sec) -training >> step=2230900, episode=372 reward=0.7747115 (546.24 it/sec) -training >> step=2231000, episode=372 reward=0.7682146 (573.23 it/sec) -training >> step=2231100, episode=372 reward=0.7534701 (569.97 it/sec) -training >> step=2231200, episode=372 reward=0.7635595 (539.41 it/sec) -training >> step=2231300, episode=373 reward=0.7676243 (50.69 it/sec) -training >> step=2231400, episode=373 reward=0.7412665 (555.42 it/sec) -training >> step=2231500, episode=373 reward=0.7323294 (528.60 it/sec) -training >> step=2231600, episode=373 reward=0.754435 (554.57 it/sec) -training >> step=2231700, episode=373 reward=0.7606899 (542.64 it/sec) -training >> step=2231800, episode=373 reward=0.775966 (528.79 it/sec) -training >> step=2231900, episode=373 reward=0.7547054 (571.67 it/sec) -training >> step=2232000, episode=373 reward=0.7641145 (558.03 it/sec) -training >> step=2232100, episode=373 reward=0.7307003 (584.92 it/sec) -training >> step=2232200, episode=373 reward=0.7689157 (586.18 it/sec) -training >> step=2232300, episode=373 reward=0.7646835 (570.38 it/sec) -training >> step=2232400, episode=373 reward=0.7642974 (581.27 it/sec) -training >> step=2232500, episode=373 reward=0.761414 (553.89 it/sec) -training >> step=2232600, episode=373 reward=0.7533156 (533.49 it/sec) -training >> step=2232700, episode=373 reward=0.766436 (561.94 it/sec) -training >> step=2232800, episode=373 reward=0.7365136 (561.91 it/sec) -training >> step=2232900, episode=373 reward=0.7641587 (562.82 it/sec) -training >> step=2233000, episode=373 reward=0.7729607 (531.49 it/sec) -training >> step=2233100, episode=373 reward=0.7899966 (569.20 it/sec) -training >> step=2233200, episode=373 reward=0.7559968 (547.47 it/sec) -training >> step=2233300, episode=373 reward=0.7647366 (588.41 it/sec) -training >> step=2233400, episode=373 reward=0.7696478 (561.99 it/sec) -training >> step=2233500, episode=373 reward=0.7610922 (541.31 it/sec) -training >> step=2233600, episode=373 reward=0.772413 (554.99 it/sec) -training >> step=2233700, episode=373 reward=0.7516544 (584.94 it/sec) -training >> step=2233800, episode=373 reward=0.7586034 (542.96 it/sec) -training >> step=2233900, episode=373 reward=0.7656725 (562.93 it/sec) -training >> step=2234000, episode=373 reward=0.7655578 (567.33 it/sec) -training >> step=2234100, episode=373 reward=0.7798214 (530.03 it/sec) -training >> step=2234200, episode=373 reward=0.7733815 (515.84 it/sec) -training >> step=2234300, episode=373 reward=0.7461131 (547.58 it/sec) -training >> step=2234400, episode=373 reward=0.7788979 (541.19 it/sec) -training >> step=2234500, episode=373 reward=0.7778844 (545.54 it/sec) -training >> step=2234600, episode=373 reward=0.7718481 (537.19 it/sec) -training >> step=2234700, episode=373 reward=0.7566776 (588.88 it/sec) -training >> step=2234800, episode=373 reward=0.7734981 (402.66 it/sec) -training >> step=2234900, episode=373 reward=0.7511672 (536.18 it/sec) -training >> step=2235000, episode=373 reward=0.760879 (567.28 it/sec) -training >> step=2235100, episode=373 reward=0.7420427 (552.84 it/sec) -training >> step=2235200, episode=373 reward=0.7746552 (560.96 it/sec) -training >> step=2235300, episode=373 reward=0.7563992 (533.70 it/sec) -training >> step=2235400, episode=373 reward=0.7605274 (548.23 it/sec) -training >> step=2235500, episode=373 reward=0.7619383 (555.09 it/sec) -training >> step=2235600, episode=373 reward=0.7730016 (557.85 it/sec) -training >> step=2235700, episode=373 reward=0.7724664 (544.42 it/sec) -training >> step=2235800, episode=373 reward=0.7584568 (555.64 it/sec) -training >> step=2235900, episode=373 reward=0.7602816 (548.36 it/sec) -training >> step=2236000, episode=373 reward=0.7510383 (516.76 it/sec) -training >> step=2236100, episode=373 reward=0.7427796 (561.90 it/sec) -training >> step=2236200, episode=373 reward=0.7698454 (531.44 it/sec) -training >> step=2236300, episode=373 reward=0.7628619 (561.22 it/sec) -training >> step=2236400, episode=373 reward=0.7594151 (569.29 it/sec) -training >> step=2236500, episode=373 reward=0.7533228 (560.70 it/sec) -training >> step=2236600, episode=373 reward=0.7506287 (524.05 it/sec) -training >> step=2236700, episode=373 reward=0.7487838 (555.13 it/sec) -training >> step=2236800, episode=373 reward=0.7465897 (543.89 it/sec) -training >> step=2236900, episode=373 reward=0.7467352 (553.92 it/sec) -training >> step=2237000, episode=373 reward=0.7549262 (560.83 it/sec) -training >> step=2237100, episode=373 reward=0.7589573 (553.30 it/sec) -training >> step=2237200, episode=373 reward=0.7527643 (544.86 it/sec) -training >> step=2237300, episode=374 reward=0.7641543 (127.31 it/sec) -training >> step=2237400, episode=374 reward=0.762699 (547.51 it/sec) -training >> step=2237500, episode=374 reward=0.761049 (558.01 it/sec) -training >> step=2237600, episode=374 reward=0.7653911 (546.27 it/sec) -training >> step=2237700, episode=374 reward=0.7620565 (565.84 it/sec) -training >> step=2237800, episode=374 reward=0.7649966 (564.92 it/sec) -training >> step=2237900, episode=374 reward=0.7480589 (514.14 it/sec) -training >> step=2238000, episode=374 reward=0.7750927 (569.01 it/sec) -training >> step=2238100, episode=374 reward=0.7465222 (518.57 it/sec) -training >> step=2238200, episode=374 reward=0.7500692 (588.66 it/sec) -training >> step=2238300, episode=374 reward=0.7639071 (539.34 it/sec) -training >> step=2238400, episode=374 reward=0.7744925 (563.30 it/sec) -training >> step=2238500, episode=374 reward=0.7764038 (551.41 it/sec) -training >> step=2238600, episode=374 reward=0.76739 (546.67 it/sec) -training >> step=2238700, episode=374 reward=0.7593781 (548.32 it/sec) -training >> step=2238800, episode=374 reward=0.7479368 (559.75 it/sec) -training >> step=2238900, episode=374 reward=0.7536072 (594.71 it/sec) -training >> step=2239000, episode=374 reward=0.7687891 (556.85 it/sec) -training >> step=2239100, episode=374 reward=0.7719719 (532.03 it/sec) -training >> step=2239200, episode=374 reward=0.7610751 (536.16 it/sec) -training >> step=2239300, episode=374 reward=0.7700621 (558.20 it/sec) -training >> step=2239400, episode=374 reward=0.7647155 (562.13 it/sec) -training >> step=2239500, episode=374 reward=0.7626657 (568.45 it/sec) -training >> step=2239600, episode=374 reward=0.7406785 (575.27 it/sec) -training >> step=2239700, episode=374 reward=0.7644943 (564.56 it/sec) -training >> step=2239800, episode=374 reward=0.746531 (529.54 it/sec) -training >> step=2239900, episode=374 reward=0.7500787 (593.66 it/sec) -training >> step=2240000, episode=374 reward=0.7755637 (583.09 it/sec) -training >> step=2240100, episode=374 reward=0.7669693 (568.63 it/sec) -training >> step=2240200, episode=374 reward=0.7632507 (520.83 it/sec) -training >> step=2240300, episode=374 reward=0.7603163 (606.24 it/sec) -training >> step=2240400, episode=374 reward=0.7401517 (554.75 it/sec) -training >> step=2240500, episode=374 reward=0.7542205 (573.78 it/sec) -training >> step=2240600, episode=374 reward=0.7509037 (548.60 it/sec) -training >> step=2240700, episode=374 reward=0.7627901 (580.66 it/sec) -training >> step=2240800, episode=374 reward=0.7513812 (426.59 it/sec) -training >> step=2240900, episode=374 reward=0.7524523 (534.03 it/sec) -training >> step=2241000, episode=374 reward=0.7525287 (557.21 it/sec) -training >> step=2241100, episode=374 reward=0.7708259 (563.60 it/sec) -training >> step=2241200, episode=374 reward=0.7397867 (566.95 it/sec) -training >> step=2241300, episode=374 reward=0.7635772 (555.48 it/sec) -training >> step=2241400, episode=374 reward=0.7563316 (497.81 it/sec) -training >> step=2241500, episode=374 reward=0.7724918 (576.90 it/sec) -training >> step=2241600, episode=374 reward=0.7519065 (526.03 it/sec) -training >> step=2241700, episode=374 reward=0.7665398 (532.24 it/sec) -training >> step=2241800, episode=374 reward=0.7502271 (580.79 it/sec) -training >> step=2241900, episode=374 reward=0.7664437 (508.53 it/sec) -training >> step=2242000, episode=374 reward=0.7768357 (539.13 it/sec) -training >> step=2242100, episode=374 reward=0.7858586 (555.50 it/sec) -training >> step=2242200, episode=374 reward=0.7324325 (569.53 it/sec) -training >> step=2242300, episode=374 reward=0.7675917 (536.13 it/sec) -training >> step=2242400, episode=374 reward=0.7563206 (568.42 it/sec) -training >> step=2242500, episode=374 reward=0.7573255 (566.45 it/sec) -training >> step=2242600, episode=374 reward=0.7692885 (541.47 it/sec) -training >> step=2242700, episode=374 reward=0.7473755 (571.38 it/sec) -training >> step=2242800, episode=374 reward=0.7600772 (534.44 it/sec) -training >> step=2242900, episode=374 reward=0.7564282 (547.48 it/sec) -training >> step=2243000, episode=374 reward=0.7849896 (539.43 it/sec) -training >> step=2243100, episode=374 reward=0.7552194 (552.42 it/sec) -training >> step=2243200, episode=374 reward=0.7432093 (531.75 it/sec) -training >> step=2243300, episode=375 reward=0.7270278 (199.74 it/sec) -training >> step=2243400, episode=375 reward=0.7613574 (533.71 it/sec) -training >> step=2243500, episode=375 reward=0.7262672 (485.04 it/sec) -training >> step=2243600, episode=375 reward=0.7424453 (531.05 it/sec) -training >> step=2243700, episode=375 reward=0.758367 (560.76 it/sec) -training >> step=2243800, episode=375 reward=0.7615738 (562.66 it/sec) -training >> step=2243900, episode=375 reward=0.7577113 (550.84 it/sec) -training >> step=2244000, episode=375 reward=0.7521493 (540.32 it/sec) -training >> step=2244100, episode=375 reward=0.7708316 (540.81 it/sec) -training >> step=2244200, episode=375 reward=0.7735962 (546.49 it/sec) -training >> step=2244300, episode=375 reward=0.7690431 (546.12 it/sec) -training >> step=2244400, episode=375 reward=0.7756876 (566.29 it/sec) -training >> step=2244500, episode=375 reward=0.7782567 (523.66 it/sec) -training >> step=2244600, episode=375 reward=0.7404296 (523.87 it/sec) -training >> step=2244700, episode=375 reward=0.7832848 (531.22 it/sec) -training >> step=2244800, episode=375 reward=0.7769398 (550.71 it/sec) -training >> step=2244900, episode=375 reward=0.7560275 (581.82 it/sec) -training >> step=2245000, episode=375 reward=0.7667822 (542.64 it/sec) -training >> step=2245100, episode=375 reward=0.7781675 (533.54 it/sec) -training >> step=2245200, episode=375 reward=0.7565278 (564.47 it/sec) -training >> step=2245300, episode=375 reward=0.7590927 (537.21 it/sec) -training >> step=2245400, episode=375 reward=0.7616444 (557.68 it/sec) -training >> step=2245500, episode=375 reward=0.7703614 (551.90 it/sec) -training >> step=2245600, episode=375 reward=0.7718767 (586.15 it/sec) -training >> step=2245700, episode=375 reward=0.7768828 (521.23 it/sec) -training >> step=2245800, episode=375 reward=0.7778994 (562.79 it/sec) -training >> step=2245900, episode=375 reward=0.7696539 (532.12 it/sec) -training >> step=2246000, episode=375 reward=0.7783871 (539.51 it/sec) -training >> step=2246100, episode=375 reward=0.7751539 (539.65 it/sec) -training >> step=2246200, episode=375 reward=0.7757673 (531.16 it/sec) -training >> step=2246300, episode=375 reward=0.75315 (541.55 it/sec) -training >> step=2246400, episode=375 reward=0.7514223 (540.69 it/sec) -training >> step=2246500, episode=375 reward=0.7489945 (523.77 it/sec) -training >> step=2246600, episode=375 reward=0.7660449 (536.10 it/sec) -training >> step=2246700, episode=375 reward=0.7401115 (569.63 it/sec) -training >> step=2246800, episode=375 reward=0.7630175 (502.50 it/sec) -training >> step=2246900, episode=375 reward=0.7817837 (561.54 it/sec) -training >> step=2247000, episode=375 reward=0.7546844 (543.60 it/sec) -training >> step=2247100, episode=375 reward=0.764089 (388.93 it/sec) -training >> step=2247200, episode=375 reward=0.7507051 (530.79 it/sec) -training >> step=2247300, episode=375 reward=0.7382446 (509.45 it/sec) -training >> step=2247400, episode=375 reward=0.7645738 (555.05 it/sec) -training >> step=2247500, episode=375 reward=0.7734538 (549.27 it/sec) -training >> step=2247600, episode=375 reward=0.7708656 (520.09 it/sec) -training >> step=2247700, episode=375 reward=0.7559467 (557.08 it/sec) -training >> step=2247800, episode=375 reward=0.7535632 (516.88 it/sec) -training >> step=2247900, episode=375 reward=0.7625763 (557.75 it/sec) -training >> step=2248000, episode=375 reward=0.7586625 (562.70 it/sec) -training >> step=2248100, episode=375 reward=0.735976 (537.75 it/sec) -training >> step=2248200, episode=375 reward=0.7401594 (552.26 it/sec) -training >> step=2248300, episode=375 reward=0.7566117 (540.29 it/sec) -training >> step=2248400, episode=375 reward=0.7609608 (516.88 it/sec) -training >> step=2248500, episode=375 reward=0.7742925 (565.30 it/sec) -training >> step=2248600, episode=375 reward=0.7762725 (559.24 it/sec) -training >> step=2248700, episode=375 reward=0.761207 (545.02 it/sec) -training >> step=2248800, episode=375 reward=0.7709599 (520.25 it/sec) -training >> step=2248900, episode=375 reward=0.7540801 (557.99 it/sec) -training >> step=2249000, episode=375 reward=0.7507082 (492.53 it/sec) -training >> step=2249100, episode=375 reward=0.7600889 (560.95 it/sec) -training >> step=2249200, episode=375 reward=0.7202473 (519.12 it/sec) -training >> step=2249300, episode=376 reward=0.7533587 (201.85 it/sec) -training >> step=2249400, episode=376 reward=0.7556187 (549.39 it/sec) -training >> step=2249500, episode=376 reward=0.750676 (560.57 it/sec) -training >> step=2249600, episode=376 reward=0.7361134 (575.52 it/sec) -training >> step=2249700, episode=376 reward=0.7970188 (621.38 it/sec) -training >> step=2249800, episode=376 reward=0.7634155 (607.37 it/sec) -training >> step=2249900, episode=376 reward=0.7508991 (551.04 it/sec) -training >> step=2250000, episode=376 reward=0.7539995 (576.76 it/sec) -training >> step=2250100, episode=376 reward=0.7579092 (616.68 it/sec) -training >> step=2250200, episode=376 reward=0.7615613 (560.94 it/sec) -training >> step=2250300, episode=376 reward=0.7557691 (591.12 it/sec) -training >> step=2250400, episode=376 reward=0.7473358 (597.16 it/sec) -training >> step=2250500, episode=376 reward=0.7684222 (592.59 it/sec) -training >> step=2250600, episode=376 reward=0.7760003 (570.12 it/sec) -training >> step=2250700, episode=376 reward=0.7660981 (587.90 it/sec) -training >> step=2250800, episode=376 reward=0.7434076 (608.47 it/sec) -training >> step=2250900, episode=376 reward=0.7523429 (593.86 it/sec) -training >> step=2251000, episode=376 reward=0.7654557 (565.34 it/sec) -training >> step=2251100, episode=376 reward=0.7657962 (542.03 it/sec) -training >> step=2251200, episode=376 reward=0.7368938 (581.06 it/sec) -training >> step=2251300, episode=376 reward=0.7606434 (619.08 it/sec) -training >> step=2251400, episode=376 reward=0.7715487 (553.95 it/sec) -training >> step=2251500, episode=376 reward=0.769309 (591.71 it/sec) -training >> step=2251600, episode=376 reward=0.7487058 (596.18 it/sec) -training >> step=2251700, episode=376 reward=0.7734988 (563.09 it/sec) -training >> step=2251800, episode=376 reward=0.7515223 (577.54 it/sec) -training >> step=2251900, episode=376 reward=0.7756087 (578.31 it/sec) -training >> step=2252000, episode=376 reward=0.7559243 (615.11 it/sec) -training >> step=2252100, episode=376 reward=0.7661452 (586.35 it/sec) -training >> step=2252200, episode=376 reward=0.7722555 (596.56 it/sec) -training >> step=2252300, episode=376 reward=0.7923565 (568.21 it/sec) -training >> step=2252400, episode=376 reward=0.7499247 (508.40 it/sec) -training >> step=2252500, episode=376 reward=0.7869528 (550.06 it/sec) -training >> step=2252600, episode=376 reward=0.7746386 (588.08 it/sec) -training >> step=2252700, episode=376 reward=0.7585423 (585.90 it/sec) -training >> step=2252800, episode=376 reward=0.7648364 (610.50 it/sec) -training >> step=2252900, episode=376 reward=0.7679505 (561.94 it/sec) -training >> step=2253000, episode=376 reward=0.7713516 (563.69 it/sec) -training >> step=2253100, episode=376 reward=0.7726685 (612.16 it/sec) -training >> step=2253200, episode=376 reward=0.777698 (413.16 it/sec) -training >> step=2253300, episode=376 reward=0.7719401 (594.91 it/sec) -training >> step=2253400, episode=376 reward=0.7624388 (559.54 it/sec) -training >> step=2253500, episode=376 reward=0.7529909 (553.03 it/sec) -training >> step=2253600, episode=376 reward=0.7683644 (595.41 it/sec) -training >> step=2253700, episode=376 reward=0.7608131 (576.07 it/sec) -training >> step=2253800, episode=376 reward=0.7642412 (582.06 it/sec) -training >> step=2253900, episode=376 reward=0.7591844 (605.31 it/sec) -training >> step=2254000, episode=376 reward=0.788593 (588.94 it/sec) -training >> step=2254100, episode=376 reward=0.7583966 (537.06 it/sec) -training >> step=2254200, episode=376 reward=0.7531086 (569.45 it/sec) -training >> step=2254300, episode=376 reward=0.7444434 (556.53 it/sec) -training >> step=2254400, episode=376 reward=0.7577124 (560.16 it/sec) -training >> step=2254500, episode=376 reward=0.7342544 (534.68 it/sec) -training >> step=2254600, episode=376 reward=0.7809774 (552.84 it/sec) -training >> step=2254700, episode=376 reward=0.7495816 (577.09 it/sec) -training >> step=2254800, episode=376 reward=0.7548971 (554.25 it/sec) -training >> step=2254900, episode=376 reward=0.7762529 (540.51 it/sec) -training >> step=2255000, episode=376 reward=0.7810649 (562.78 it/sec) -training >> step=2255100, episode=376 reward=0.7800215 (528.03 it/sec) -training >> step=2255200, episode=376 reward=0.7619364 (505.29 it/sec) -training >> step=2255300, episode=377 reward=0.7551736 (126.30 it/sec) -training >> step=2255400, episode=377 reward=0.7533124 (460.33 it/sec) -training >> step=2255500, episode=377 reward=0.7434581 (543.86 it/sec) -training >> step=2255600, episode=377 reward=0.7504936 (526.28 it/sec) -training >> step=2255700, episode=377 reward=0.76227 (526.52 it/sec) -training >> step=2255800, episode=377 reward=0.7697212 (546.00 it/sec) -training >> step=2255900, episode=377 reward=0.7555518 (516.78 it/sec) -training >> step=2256000, episode=377 reward=0.756614 (549.56 it/sec) -training >> step=2256100, episode=377 reward=0.8057439 (540.00 it/sec) -training >> step=2256200, episode=377 reward=0.7674541 (543.49 it/sec) -training >> step=2256300, episode=377 reward=0.7628409 (546.78 it/sec) -training >> step=2256400, episode=377 reward=0.7733095 (501.50 it/sec) -training >> step=2256500, episode=377 reward=0.7691998 (519.67 it/sec) -training >> step=2256600, episode=377 reward=0.7467086 (528.42 it/sec) -training >> step=2256700, episode=377 reward=0.7659021 (567.47 it/sec) -training >> step=2256800, episode=377 reward=0.7739338 (559.35 it/sec) -training >> step=2256900, episode=377 reward=0.767407 (542.92 it/sec) -training >> step=2257000, episode=377 reward=0.7833511 (539.64 it/sec) -training >> step=2257100, episode=377 reward=0.7562043 (576.88 it/sec) -training >> step=2257200, episode=377 reward=0.7533172 (547.12 it/sec) -training >> step=2257300, episode=377 reward=0.7660797 (508.98 it/sec) -training >> step=2257400, episode=377 reward=0.7690519 (556.86 it/sec) -training >> step=2257500, episode=377 reward=0.7798264 (542.67 it/sec) -training >> step=2257600, episode=377 reward=0.7863242 (487.88 it/sec) -training >> step=2257700, episode=377 reward=0.7649213 (533.05 it/sec) -training >> step=2257800, episode=377 reward=0.7545367 (536.43 it/sec) -training >> step=2257900, episode=377 reward=0.7791408 (570.80 it/sec) -training >> step=2258000, episode=377 reward=0.7581398 (536.98 it/sec) -training >> step=2258100, episode=377 reward=0.7764627 (544.34 it/sec) -training >> step=2258200, episode=377 reward=0.7617459 (553.31 it/sec) -training >> step=2258300, episode=377 reward=0.7626791 (536.43 it/sec) -training >> step=2258400, episode=377 reward=0.7733328 (550.64 it/sec) -training >> step=2258500, episode=377 reward=0.760602 (564.31 it/sec) -training >> step=2258600, episode=377 reward=0.7771338 (544.21 it/sec) -training >> step=2258700, episode=377 reward=0.7766349 (567.84 it/sec) -training >> step=2258800, episode=377 reward=0.7523004 (538.08 it/sec) -training >> step=2258900, episode=377 reward=0.7860091 (546.16 it/sec) -training >> step=2259000, episode=377 reward=0.7491198 (579.92 it/sec) -training >> step=2259100, episode=377 reward=0.7431096 (524.62 it/sec) -training >> step=2259200, episode=377 reward=0.7682104 (579.73 it/sec) -training >> step=2259300, episode=377 reward=0.7557423 (383.88 it/sec) -training >> step=2259400, episode=377 reward=0.7782553 (564.72 it/sec) -training >> step=2259500, episode=377 reward=0.7656177 (531.65 it/sec) -training >> step=2259600, episode=377 reward=0.7604968 (523.78 it/sec) -training >> step=2259700, episode=377 reward=0.7840175 (535.07 it/sec) -training >> step=2259800, episode=377 reward=0.7479328 (573.66 it/sec) -training >> step=2259900, episode=377 reward=0.7536023 (554.15 it/sec) -training >> step=2260000, episode=377 reward=0.7584532 (556.82 it/sec) -training >> step=2260100, episode=377 reward=0.7930632 (560.67 it/sec) -training >> step=2260200, episode=377 reward=0.7525824 (539.41 it/sec) -training >> step=2260300, episode=377 reward=0.7812891 (549.56 it/sec) -training >> step=2260400, episode=377 reward=0.7580764 (568.55 it/sec) -training >> step=2260500, episode=377 reward=0.7523288 (571.49 it/sec) -training >> step=2260600, episode=377 reward=0.7621374 (573.05 it/sec) -training >> step=2260700, episode=377 reward=0.7512709 (556.13 it/sec) -training >> step=2260800, episode=377 reward=0.7784426 (508.86 it/sec) -training >> step=2260900, episode=377 reward=0.7546244 (548.02 it/sec) -training >> step=2261000, episode=377 reward=0.7544435 (564.71 it/sec) -training >> step=2261100, episode=377 reward=0.7605778 (569.55 it/sec) -training >> step=2261200, episode=377 reward=0.7447311 (482.13 it/sec) -training >> step=2261300, episode=378 reward=0.7675163 (205.39 it/sec) -training >> step=2261400, episode=378 reward=0.7714233 (522.46 it/sec) -training >> step=2261500, episode=378 reward=0.7636779 (552.80 it/sec) -training >> step=2261600, episode=378 reward=0.7557024 (567.84 it/sec) -training >> step=2261700, episode=378 reward=0.7428497 (526.23 it/sec) -training >> step=2261800, episode=378 reward=0.7608425 (559.87 it/sec) -training >> step=2261900, episode=378 reward=0.7536129 (536.88 it/sec) -training >> step=2262000, episode=378 reward=0.7682697 (543.58 it/sec) -training >> step=2262100, episode=378 reward=0.7666944 (585.19 it/sec) -training >> step=2262200, episode=378 reward=0.76377 (550.13 it/sec) -training >> step=2262300, episode=378 reward=0.7706718 (540.40 it/sec) -training >> step=2262400, episode=378 reward=0.7880871 (550.54 it/sec) -training >> step=2262500, episode=378 reward=0.7743454 (546.98 it/sec) -training >> step=2262600, episode=378 reward=0.7519607 (512.86 it/sec) -training >> step=2262700, episode=378 reward=0.7730701 (543.86 it/sec) -training >> step=2262800, episode=378 reward=0.7750835 (534.43 it/sec) -training >> step=2262900, episode=378 reward=0.7600275 (570.85 it/sec) -training >> step=2263000, episode=378 reward=0.7584177 (556.02 it/sec) -training >> step=2263100, episode=378 reward=0.7911243 (485.73 it/sec) -training >> step=2263200, episode=378 reward=0.7582049 (557.68 it/sec) -training >> step=2263300, episode=378 reward=0.7691289 (551.78 it/sec) -training >> step=2263400, episode=378 reward=0.7750911 (537.73 it/sec) -training >> step=2263500, episode=378 reward=0.7626036 (536.87 it/sec) -training >> step=2263600, episode=378 reward=0.7618961 (567.01 it/sec) -training >> step=2263700, episode=378 reward=0.7625216 (537.34 it/sec) -training >> step=2263800, episode=378 reward=0.7479338 (541.27 it/sec) -training >> step=2263900, episode=378 reward=0.7532421 (543.79 it/sec) -training >> step=2264000, episode=378 reward=0.7490203 (553.18 it/sec) -training >> step=2264100, episode=378 reward=0.7804773 (567.55 it/sec) -training >> step=2264200, episode=378 reward=0.7663665 (534.38 it/sec) -training >> step=2264300, episode=378 reward=0.782086 (555.19 it/sec) -training >> step=2264400, episode=378 reward=0.7915975 (543.00 it/sec) -training >> step=2264500, episode=378 reward=0.7700067 (544.35 it/sec) -training >> step=2264600, episode=378 reward=0.7617443 (558.40 it/sec) -training >> step=2264700, episode=378 reward=0.7512927 (556.31 it/sec) -training >> step=2264800, episode=378 reward=0.7709082 (601.30 it/sec) -training >> step=2264900, episode=378 reward=0.7630165 (553.77 it/sec) -training >> step=2265000, episode=378 reward=0.7783359 (575.91 it/sec) -training >> step=2265100, episode=378 reward=0.7592292 (578.00 it/sec) -training >> step=2265200, episode=378 reward=0.7465445 (575.45 it/sec) -training >> step=2265300, episode=378 reward=0.7494586 (559.86 it/sec) -training >> step=2265400, episode=378 reward=0.7758278 (550.24 it/sec) -training >> step=2265500, episode=378 reward=0.7679884 (579.74 it/sec) -training >> step=2265600, episode=378 reward=0.765219 (429.54 it/sec) -training >> step=2265700, episode=378 reward=0.7980571 (565.15 it/sec) -training >> step=2265800, episode=378 reward=0.7788324 (572.63 it/sec) -training >> step=2265900, episode=378 reward=0.7568288 (569.37 it/sec) -training >> step=2266000, episode=378 reward=0.7808049 (548.06 it/sec) -training >> step=2266100, episode=378 reward=0.7542239 (532.81 it/sec) -training >> step=2266200, episode=378 reward=0.7715268 (564.35 it/sec) -training >> step=2266300, episode=378 reward=0.7685521 (580.33 it/sec) -training >> step=2266400, episode=378 reward=0.7615132 (548.28 it/sec) -training >> step=2266500, episode=378 reward=0.7684656 (534.50 it/sec) -training >> step=2266600, episode=378 reward=0.730079 (586.71 it/sec) -training >> step=2266700, episode=378 reward=0.7466401 (546.26 it/sec) -training >> step=2266800, episode=378 reward=0.7677899 (559.10 it/sec) -training >> step=2266900, episode=378 reward=0.7550899 (573.04 it/sec) -training >> step=2267000, episode=378 reward=0.772459 (594.01 it/sec) -training >> step=2267100, episode=378 reward=0.7567218 (553.45 it/sec) -training >> step=2267200, episode=378 reward=0.7594645 (532.16 it/sec) -training >> step=2267300, episode=379 reward=0.7551413 (174.66 it/sec) -training >> step=2267400, episode=379 reward=0.7627314 (528.99 it/sec) -training >> step=2267500, episode=379 reward=0.7657102 (564.34 it/sec) -training >> step=2267600, episode=379 reward=0.7501509 (546.11 it/sec) -training >> step=2267700, episode=379 reward=0.7691948 (534.97 it/sec) -training >> step=2267800, episode=379 reward=0.742658 (545.36 it/sec) -training >> step=2267900, episode=379 reward=0.779856 (585.66 it/sec) -training >> step=2268000, episode=379 reward=0.7812276 (537.07 it/sec) -training >> step=2268100, episode=379 reward=0.7755125 (552.95 it/sec) -training >> step=2268200, episode=379 reward=0.7576741 (541.82 it/sec) -training >> step=2268300, episode=379 reward=0.7651017 (553.65 it/sec) -training >> step=2268400, episode=379 reward=0.7925066 (577.17 it/sec) -training >> step=2268500, episode=379 reward=0.7626773 (531.77 it/sec) -training >> step=2268600, episode=379 reward=0.7687126 (576.39 it/sec) -training >> step=2268700, episode=379 reward=0.7501105 (574.60 it/sec) -training >> step=2268800, episode=379 reward=0.7658493 (566.09 it/sec) -training >> step=2268900, episode=379 reward=0.7601004 (544.55 it/sec) -training >> step=2269000, episode=379 reward=0.7635784 (588.24 it/sec) -training >> step=2269100, episode=379 reward=0.7676432 (513.36 it/sec) -training >> step=2269200, episode=379 reward=0.7845819 (530.23 it/sec) -training >> step=2269300, episode=379 reward=0.7704183 (564.78 it/sec) -training >> step=2269400, episode=379 reward=0.7314212 (596.40 it/sec) -training >> step=2269500, episode=379 reward=0.7742291 (544.46 it/sec) -training >> step=2269600, episode=379 reward=0.7513745 (568.35 it/sec) -training >> step=2269700, episode=379 reward=0.7885233 (547.61 it/sec) -training >> step=2269800, episode=379 reward=0.7908884 (578.50 it/sec) -training >> step=2269900, episode=379 reward=0.7765639 (575.56 it/sec) -training >> step=2270000, episode=379 reward=0.7699569 (560.60 it/sec) -training >> step=2270100, episode=379 reward=0.7544941 (560.95 it/sec) -training >> step=2270200, episode=379 reward=0.7540859 (575.97 it/sec) -training >> step=2270300, episode=379 reward=0.7884666 (517.77 it/sec) -training >> step=2270400, episode=379 reward=0.783811 (561.13 it/sec) -training >> step=2270500, episode=379 reward=0.7533168 (576.63 it/sec) -training >> step=2270600, episode=379 reward=0.7450011 (589.46 it/sec) -training >> step=2270700, episode=379 reward=0.7500791 (554.00 it/sec) -training >> step=2270800, episode=379 reward=0.7691722 (555.48 it/sec) -training >> step=2270900, episode=379 reward=0.7592164 (543.13 it/sec) -training >> step=2271000, episode=379 reward=0.7706054 (559.69 it/sec) -training >> step=2271100, episode=379 reward=0.7606149 (564.13 it/sec) -training >> step=2271200, episode=379 reward=0.7534513 (556.20 it/sec) -training >> step=2271300, episode=379 reward=0.7590678 (562.59 it/sec) -training >> step=2271400, episode=379 reward=0.751799 (553.15 it/sec) -training >> step=2271500, episode=379 reward=0.7677269 (584.57 it/sec) -training >> step=2271600, episode=379 reward=0.765574 (563.36 it/sec) -training >> step=2271700, episode=379 reward=0.7805435 (421.62 it/sec) -training >> step=2271800, episode=379 reward=0.7317543 (552.20 it/sec) -training >> step=2271900, episode=379 reward=0.7563986 (541.07 it/sec) -training >> step=2272000, episode=379 reward=0.7516714 (550.82 it/sec) -training >> step=2272100, episode=379 reward=0.7693436 (561.92 it/sec) -training >> step=2272200, episode=379 reward=0.7721448 (573.54 it/sec) -training >> step=2272300, episode=379 reward=0.7525117 (537.58 it/sec) -training >> step=2272400, episode=379 reward=0.7402096 (592.12 it/sec) -training >> step=2272500, episode=379 reward=0.7820367 (549.40 it/sec) -training >> step=2272600, episode=379 reward=0.7593494 (515.45 it/sec) -training >> step=2272700, episode=379 reward=0.7597778 (555.03 it/sec) -training >> step=2272800, episode=379 reward=0.7669453 (567.55 it/sec) -training >> step=2272900, episode=379 reward=0.760254 (563.13 it/sec) -training >> step=2273000, episode=379 reward=0.7554166 (548.04 it/sec) -training >> step=2273100, episode=379 reward=0.7491344 (528.61 it/sec) -training >> step=2273200, episode=379 reward=0.7689595 (551.46 it/sec) -training >> step=2273300, episode=380 reward=0.7467407 (199.90 it/sec) -training >> step=2273400, episode=380 reward=0.7531866 (558.93 it/sec) -training >> step=2273500, episode=380 reward=0.7650375 (531.54 it/sec) -training >> step=2273600, episode=380 reward=0.7696335 (543.62 it/sec) -training >> step=2273700, episode=380 reward=0.7357121 (536.84 it/sec) -training >> step=2273800, episode=380 reward=0.7702073 (545.97 it/sec) -training >> step=2273900, episode=380 reward=0.7595673 (562.40 it/sec) -training >> step=2274000, episode=380 reward=0.7470486 (568.78 it/sec) -training >> step=2274100, episode=380 reward=0.7305496 (548.27 it/sec) -training >> step=2274200, episode=380 reward=0.7712003 (565.58 it/sec) -training >> step=2274300, episode=380 reward=0.7544094 (559.04 it/sec) -training >> step=2274400, episode=380 reward=0.7583987 (548.65 it/sec) -training >> step=2274500, episode=380 reward=0.7568609 (548.69 it/sec) -training >> step=2274600, episode=380 reward=0.7644174 (553.75 it/sec) -training >> step=2274700, episode=380 reward=0.7439088 (584.64 it/sec) -training >> step=2274800, episode=380 reward=0.7459279 (560.28 it/sec) -training >> step=2274900, episode=380 reward=0.7648705 (555.81 it/sec) -training >> step=2275000, episode=380 reward=0.7639443 (597.21 it/sec) -training >> step=2275100, episode=380 reward=0.7676221 (572.82 it/sec) -training >> step=2275200, episode=380 reward=0.7456872 (540.29 it/sec) -training >> step=2275300, episode=380 reward=0.7304729 (572.09 it/sec) -training >> step=2275400, episode=380 reward=0.7717384 (577.83 it/sec) -training >> step=2275500, episode=380 reward=0.7514759 (562.75 it/sec) -training >> step=2275600, episode=380 reward=0.7623484 (563.85 it/sec) -training >> step=2275700, episode=380 reward=0.7914857 (534.92 it/sec) -training >> step=2275800, episode=380 reward=0.7691976 (527.16 it/sec) -training >> step=2275900, episode=380 reward=0.7636452 (603.36 it/sec) -training >> step=2276000, episode=380 reward=0.7769561 (566.23 it/sec) -training >> step=2276100, episode=380 reward=0.7694539 (545.50 it/sec) -training >> step=2276200, episode=380 reward=0.7737787 (589.26 it/sec) -training >> step=2276300, episode=380 reward=0.7681879 (524.08 it/sec) -training >> step=2276400, episode=380 reward=0.7586409 (510.77 it/sec) -training >> step=2276500, episode=380 reward=0.7488544 (570.32 it/sec) -training >> step=2276600, episode=380 reward=0.7614852 (583.94 it/sec) -training >> step=2276700, episode=380 reward=0.7759539 (540.57 it/sec) -training >> step=2276800, episode=380 reward=0.7410935 (564.61 it/sec) -training >> step=2276900, episode=380 reward=0.7527537 (547.38 it/sec) -training >> step=2277000, episode=380 reward=0.7683367 (597.47 it/sec) -training >> step=2277100, episode=380 reward=0.7657297 (550.46 it/sec) -training >> step=2277200, episode=380 reward=0.7534899 (558.30 it/sec) -training >> step=2277300, episode=380 reward=0.7472226 (574.58 it/sec) -training >> step=2277400, episode=380 reward=0.7494003 (549.99 it/sec) -training >> step=2277500, episode=380 reward=0.7686873 (538.47 it/sec) -training >> step=2277600, episode=380 reward=0.769676 (575.04 it/sec) -training >> step=2277700, episode=380 reward=0.7600803 (573.44 it/sec) -training >> step=2277800, episode=380 reward=0.7782781 (577.76 it/sec) -training >> step=2277900, episode=380 reward=0.7445561 (400.95 it/sec) -training >> step=2278000, episode=380 reward=0.744837 (583.93 it/sec) -training >> step=2278100, episode=380 reward=0.7484939 (581.89 it/sec) -training >> step=2278200, episode=380 reward=0.756734 (576.86 it/sec) -training >> step=2278300, episode=380 reward=0.781821 (572.20 it/sec) -training >> step=2278400, episode=380 reward=0.7760079 (564.07 it/sec) -training >> step=2278500, episode=380 reward=0.7564055 (564.73 it/sec) -training >> step=2278600, episode=380 reward=0.7617444 (567.12 it/sec) -training >> step=2278700, episode=380 reward=0.7691022 (544.52 it/sec) -training >> step=2278800, episode=380 reward=0.7593389 (575.44 it/sec) -training >> step=2278900, episode=380 reward=0.7667518 (584.90 it/sec) -training >> step=2279000, episode=380 reward=0.7793684 (580.23 it/sec) -training >> step=2279100, episode=380 reward=0.7538291 (560.74 it/sec) -training >> step=2279200, episode=380 reward=0.746147 (530.09 it/sec) -training >> step=2279300, episode=381 reward=0.7496454 (177.25 it/sec) -training >> step=2279400, episode=381 reward=0.7498489 (556.77 it/sec) -training >> step=2279500, episode=381 reward=0.7553757 (546.42 it/sec) -training >> step=2279600, episode=381 reward=0.7615694 (525.21 it/sec) -training >> step=2279700, episode=381 reward=0.7386339 (591.95 it/sec) -training >> step=2279800, episode=381 reward=0.7575055 (538.94 it/sec) -training >> step=2279900, episode=381 reward=0.7689934 (570.29 it/sec) -training >> step=2280000, episode=381 reward=0.7795075 (582.04 it/sec) -training >> step=2280100, episode=381 reward=0.7606915 (554.10 it/sec) -training >> step=2280200, episode=381 reward=0.7572486 (556.77 it/sec) -training >> step=2280300, episode=381 reward=0.7710626 (560.73 it/sec) -training >> step=2280400, episode=381 reward=0.7604198 (571.31 it/sec) -training >> step=2280500, episode=381 reward=0.7965823 (564.59 it/sec) -training >> step=2280600, episode=381 reward=0.7596998 (556.62 it/sec) -training >> step=2280700, episode=381 reward=0.7659791 (535.13 it/sec) -training >> step=2280800, episode=381 reward=0.7389953 (578.25 it/sec) -training >> step=2280900, episode=381 reward=0.7836936 (546.13 it/sec) -training >> step=2281000, episode=381 reward=0.7733218 (541.95 it/sec) -training >> step=2281100, episode=381 reward=0.7768511 (528.84 it/sec) -training >> step=2281200, episode=381 reward=0.7729701 (573.31 it/sec) -training >> step=2281300, episode=381 reward=0.763005 (537.03 it/sec) -training >> step=2281400, episode=381 reward=0.7595858 (556.98 it/sec) -training >> step=2281500, episode=381 reward=0.7732243 (575.71 it/sec) -training >> step=2281600, episode=381 reward=0.7747912 (564.83 it/sec) -training >> step=2281700, episode=381 reward=0.781323 (559.21 it/sec) -training >> step=2281800, episode=381 reward=0.7567199 (537.22 it/sec) -training >> step=2281900, episode=381 reward=0.7657437 (578.76 it/sec) -training >> step=2282000, episode=381 reward=0.7441029 (561.90 it/sec) -training >> step=2282100, episode=381 reward=0.7800347 (544.61 it/sec) -training >> step=2282200, episode=381 reward=0.7602604 (555.21 it/sec) -training >> step=2282300, episode=381 reward=0.7493929 (541.14 it/sec) -training >> step=2282400, episode=381 reward=0.7575386 (526.92 it/sec) -training >> step=2282500, episode=381 reward=0.7740401 (561.50 it/sec) -training >> step=2282600, episode=381 reward=0.7586798 (581.92 it/sec) -training >> step=2282700, episode=381 reward=0.7644468 (556.81 it/sec) -training >> step=2282800, episode=381 reward=0.7441414 (572.30 it/sec) -training >> step=2282900, episode=381 reward=0.7652799 (568.56 it/sec) -training >> step=2283000, episode=381 reward=0.7773342 (546.10 it/sec) -training >> step=2283100, episode=381 reward=0.7718436 (554.60 it/sec) -training >> step=2283200, episode=381 reward=0.7477643 (555.88 it/sec) -training >> step=2283300, episode=381 reward=0.7484439 (519.31 it/sec) -training >> step=2283400, episode=381 reward=0.7894338 (549.95 it/sec) -training >> step=2283500, episode=381 reward=0.7425553 (518.14 it/sec) -training >> step=2283600, episode=381 reward=0.7707555 (543.09 it/sec) -training >> step=2283700, episode=381 reward=0.7648259 (552.54 it/sec) -training >> step=2283800, episode=381 reward=0.7533551 (554.32 it/sec) -training >> step=2283900, episode=381 reward=0.7699583 (561.08 it/sec) -training >> step=2284000, episode=381 reward=0.7752951 (530.89 it/sec) -training >> step=2284100, episode=381 reward=0.7508144 (545.77 it/sec) -training >> step=2284200, episode=381 reward=0.7600226 (449.56 it/sec) -training >> step=2284300, episode=381 reward=0.7502551 (555.55 it/sec) -training >> step=2284400, episode=381 reward=0.7854238 (527.78 it/sec) -training >> step=2284500, episode=381 reward=0.7919366 (505.73 it/sec) -training >> step=2284600, episode=381 reward=0.7534073 (530.17 it/sec) -training >> step=2284700, episode=381 reward=0.7512939 (557.68 it/sec) -training >> step=2284800, episode=381 reward=0.7640047 (539.92 it/sec) -training >> step=2284900, episode=381 reward=0.7501962 (541.09 it/sec) -training >> step=2285000, episode=381 reward=0.7654104 (562.29 it/sec) -training >> step=2285100, episode=381 reward=0.737356 (526.09 it/sec) -training >> step=2285200, episode=381 reward=0.7789292 (536.25 it/sec) -training >> step=2285300, episode=382 reward=0.7602009 (160.34 it/sec) -training >> step=2285400, episode=382 reward=0.7651372 (522.69 it/sec) -training >> step=2285500, episode=382 reward=0.7298087 (548.37 it/sec) -training >> step=2285600, episode=382 reward=0.7418203 (531.98 it/sec) -training >> step=2285700, episode=382 reward=0.7701825 (516.39 it/sec) -training >> step=2285800, episode=382 reward=0.7660421 (553.75 it/sec) -training >> step=2285900, episode=382 reward=0.7587504 (560.79 it/sec) -training >> step=2286000, episode=382 reward=0.7545574 (540.47 it/sec) -training >> step=2286100, episode=382 reward=0.7981007 (537.32 it/sec) -training >> step=2286200, episode=382 reward=0.7744433 (551.60 it/sec) -training >> step=2286300, episode=382 reward=0.7831496 (586.67 it/sec) -training >> step=2286400, episode=382 reward=0.7736857 (534.60 it/sec) -training >> step=2286500, episode=382 reward=0.7818404 (523.63 it/sec) -training >> step=2286600, episode=382 reward=0.7712397 (552.69 it/sec) -training >> step=2286700, episode=382 reward=0.7553688 (524.85 it/sec) -training >> step=2286800, episode=382 reward=0.7470567 (559.65 it/sec) -training >> step=2286900, episode=382 reward=0.7735272 (558.85 it/sec) -training >> step=2287000, episode=382 reward=0.7656979 (531.36 it/sec) -training >> step=2287100, episode=382 reward=0.7808898 (512.19 it/sec) -training >> step=2287200, episode=382 reward=0.7502789 (532.33 it/sec) -training >> step=2287300, episode=382 reward=0.7814078 (547.62 it/sec) -training >> step=2287400, episode=382 reward=0.7637864 (559.03 it/sec) -training >> step=2287500, episode=382 reward=0.7682053 (571.44 it/sec) -training >> step=2287600, episode=382 reward=0.7595819 (546.84 it/sec) -training >> step=2287700, episode=382 reward=0.7670097 (573.90 it/sec) -training >> step=2287800, episode=382 reward=0.7740673 (551.68 it/sec) -training >> step=2287900, episode=382 reward=0.7399457 (532.17 it/sec) -training >> step=2288000, episode=382 reward=0.7801996 (558.61 it/sec) -training >> step=2288100, episode=382 reward=0.7633638 (535.26 it/sec) -training >> step=2288200, episode=382 reward=0.751641 (492.43 it/sec) -training >> step=2288300, episode=382 reward=0.7585765 (552.45 it/sec) -training >> step=2288400, episode=382 reward=0.7543396 (554.09 it/sec) -training >> step=2288500, episode=382 reward=0.7878801 (572.83 it/sec) -training >> step=2288600, episode=382 reward=0.7608441 (553.11 it/sec) -training >> step=2288700, episode=382 reward=0.775522 (525.21 it/sec) -training >> step=2288800, episode=382 reward=0.7679122 (538.12 it/sec) -training >> step=2288900, episode=382 reward=0.7674899 (539.07 it/sec) -training >> step=2289000, episode=382 reward=0.7608264 (547.58 it/sec) -training >> step=2289100, episode=382 reward=0.7415878 (581.20 it/sec) -training >> step=2289200, episode=382 reward=0.7664661 (543.41 it/sec) -training >> step=2289300, episode=382 reward=0.7587548 (537.84 it/sec) -training >> step=2289400, episode=382 reward=0.7555866 (529.86 it/sec) -training >> step=2289500, episode=382 reward=0.7534474 (548.92 it/sec) -training >> step=2289600, episode=382 reward=0.7588194 (579.04 it/sec) -training >> step=2289700, episode=382 reward=0.7482095 (538.41 it/sec) -training >> step=2289800, episode=382 reward=0.7853031 (431.89 it/sec) -training >> step=2289900, episode=382 reward=0.7545505 (560.02 it/sec) -training >> step=2290000, episode=382 reward=0.7514061 (551.73 it/sec) -training >> step=2290100, episode=382 reward=0.7701598 (544.46 it/sec) -training >> step=2290200, episode=382 reward=0.7639448 (562.36 it/sec) -training >> step=2290300, episode=382 reward=0.7475353 (379.66 it/sec) -training >> step=2290400, episode=382 reward=0.7645628 (543.46 it/sec) -training >> step=2290500, episode=382 reward=0.7528386 (549.47 it/sec) -training >> step=2290600, episode=382 reward=0.7324833 (547.66 it/sec) -training >> step=2290700, episode=382 reward=0.7740817 (548.07 it/sec) -training >> step=2290800, episode=382 reward=0.7628569 (536.40 it/sec) -training >> step=2290900, episode=382 reward=0.7503903 (528.24 it/sec) -training >> step=2291000, episode=382 reward=0.7605816 (540.48 it/sec) -training >> step=2291100, episode=382 reward=0.7573634 (567.11 it/sec) -training >> step=2291200, episode=382 reward=0.7296947 (517.93 it/sec) -training >> step=2291300, episode=383 reward=0.765322 (193.63 it/sec) -training >> step=2291400, episode=383 reward=0.7448886 (572.11 it/sec) -training >> step=2291500, episode=383 reward=0.7470322 (500.84 it/sec) -training >> step=2291600, episode=383 reward=0.7452809 (545.84 it/sec) -training >> step=2291700, episode=383 reward=0.7643836 (522.84 it/sec) -training >> step=2291800, episode=383 reward=0.7581015 (513.67 it/sec) -training >> step=2291900, episode=383 reward=0.7546352 (540.18 it/sec) -training >> step=2292000, episode=383 reward=0.7805626 (532.91 it/sec) -training >> step=2292100, episode=383 reward=0.7743559 (534.35 it/sec) -training >> step=2292200, episode=383 reward=0.7573645 (541.99 it/sec) -training >> step=2292300, episode=383 reward=0.7602558 (552.86 it/sec) -training >> step=2292400, episode=383 reward=0.761961 (519.87 it/sec) -training >> step=2292500, episode=383 reward=0.7818691 (533.35 it/sec) -training >> step=2292600, episode=383 reward=0.7592565 (533.33 it/sec) -training >> step=2292700, episode=383 reward=0.7643666 (548.80 it/sec) -training >> step=2292800, episode=383 reward=0.7806646 (552.18 it/sec) -training >> step=2292900, episode=383 reward=0.7767687 (535.64 it/sec) -training >> step=2293000, episode=383 reward=0.7780081 (580.39 it/sec) -training >> step=2293100, episode=383 reward=0.7663406 (512.45 it/sec) -training >> step=2293200, episode=383 reward=0.7700896 (545.90 it/sec) -training >> step=2293300, episode=383 reward=0.7538495 (482.26 it/sec) -training >> step=2293400, episode=383 reward=0.774398 (411.93 it/sec) -training >> step=2293500, episode=383 reward=0.7639431 (421.14 it/sec) -training >> step=2293600, episode=383 reward=0.76505 (518.10 it/sec) -training >> step=2293700, episode=383 reward=0.7453376 (522.61 it/sec) -training >> step=2293800, episode=383 reward=0.7822719 (521.69 it/sec) -training >> step=2293900, episode=383 reward=0.7609733 (497.03 it/sec) -training >> step=2294000, episode=383 reward=0.7666437 (528.01 it/sec) -training >> step=2294100, episode=383 reward=0.7628555 (555.54 it/sec) -training >> step=2294200, episode=383 reward=0.7626625 (535.35 it/sec) -training >> step=2294300, episode=383 reward=0.7718301 (529.31 it/sec) -training >> step=2294400, episode=383 reward=0.7785335 (549.54 it/sec) -training >> step=2294500, episode=383 reward=0.7729062 (546.98 it/sec) -training >> step=2294600, episode=383 reward=0.7614812 (563.26 it/sec) -training >> step=2294700, episode=383 reward=0.7624041 (531.84 it/sec) -training >> step=2294800, episode=383 reward=0.7877387 (547.16 it/sec) -training >> step=2294900, episode=383 reward=0.7657508 (534.16 it/sec) -training >> step=2295000, episode=383 reward=0.7683094 (523.16 it/sec) -training >> step=2295100, episode=383 reward=0.7595432 (560.05 it/sec) -training >> step=2295200, episode=383 reward=0.7594245 (564.85 it/sec) -training >> step=2295300, episode=383 reward=0.7617496 (546.57 it/sec) -training >> step=2295400, episode=383 reward=0.7866583 (523.40 it/sec) -training >> step=2295500, episode=383 reward=0.7681075 (547.09 it/sec) -training >> step=2295600, episode=383 reward=0.7612595 (533.92 it/sec) -training >> step=2295700, episode=383 reward=0.7602094 (561.41 it/sec) -training >> step=2295800, episode=383 reward=0.767063 (534.26 it/sec) -training >> step=2295900, episode=383 reward=0.7647189 (546.12 it/sec) -training >> step=2296000, episode=383 reward=0.7616315 (567.69 it/sec) -training >> step=2296100, episode=383 reward=0.7687588 (546.44 it/sec) -training >> step=2296200, episode=383 reward=0.766499 (517.85 it/sec) -training >> step=2296300, episode=383 reward=0.7549801 (536.44 it/sec) -training >> step=2296400, episode=383 reward=0.7766336 (551.11 it/sec) -training >> step=2296500, episode=383 reward=0.7533315 (440.51 it/sec) -training >> step=2296600, episode=383 reward=0.7931365 (528.98 it/sec) -training >> step=2296700, episode=383 reward=0.7613988 (520.15 it/sec) -training >> step=2296800, episode=383 reward=0.7646144 (570.14 it/sec) -training >> step=2296900, episode=383 reward=0.7646922 (554.78 it/sec) -training >> step=2297000, episode=383 reward=0.7524984 (540.95 it/sec) -training >> step=2297100, episode=383 reward=0.7647752 (539.65 it/sec) -training >> step=2297200, episode=383 reward=0.7557086 (519.37 it/sec) -training >> step=2297300, episode=384 reward=0.7516532 (202.06 it/sec) -training >> step=2297400, episode=384 reward=0.7549533 (524.92 it/sec) -training >> step=2297500, episode=384 reward=0.733268 (521.03 it/sec) -training >> step=2297600, episode=384 reward=0.760447 (540.40 it/sec) -training >> step=2297700, episode=384 reward=0.7487454 (559.76 it/sec) -training >> step=2297800, episode=384 reward=0.7769173 (534.92 it/sec) -training >> step=2297900, episode=384 reward=0.7965299 (528.64 it/sec) -training >> step=2298000, episode=384 reward=0.7616692 (565.74 it/sec) -training >> step=2298100, episode=384 reward=0.7853945 (537.81 it/sec) -training >> step=2298200, episode=384 reward=0.7744654 (534.56 it/sec) -training >> step=2298300, episode=384 reward=0.7746286 (558.57 it/sec) -training >> step=2298400, episode=384 reward=0.7521481 (560.26 it/sec) -training >> step=2298500, episode=384 reward=0.7447159 (556.26 it/sec) -training >> step=2298600, episode=384 reward=0.7407012 (529.54 it/sec) -training >> step=2298700, episode=384 reward=0.7258655 (541.56 it/sec) -training >> step=2298800, episode=384 reward=0.7596389 (538.24 it/sec) -training >> step=2298900, episode=384 reward=0.767213 (515.99 it/sec) -training >> step=2299000, episode=384 reward=0.751108 (530.68 it/sec) -training >> step=2299100, episode=384 reward=0.7597045 (550.68 it/sec) -training >> step=2299200, episode=384 reward=0.7699773 (549.44 it/sec) -training >> step=2299300, episode=384 reward=0.7715095 (540.28 it/sec) -training >> step=2299400, episode=384 reward=0.7763759 (549.09 it/sec) -training >> step=2299500, episode=384 reward=0.7851542 (551.18 it/sec) -training >> step=2299600, episode=384 reward=0.7500765 (564.61 it/sec) -training >> step=2299700, episode=384 reward=0.7536769 (525.25 it/sec) -training >> step=2299800, episode=384 reward=0.7549078 (538.12 it/sec) -training >> step=2299900, episode=384 reward=0.7801929 (569.99 it/sec) -training >> step=2300000, episode=384 reward=0.7654072 (535.75 it/sec) -training >> step=2300100, episode=384 reward=0.7788559 (513.87 it/sec) -training >> step=2300200, episode=384 reward=0.7774299 (550.28 it/sec) -training >> step=2300300, episode=384 reward=0.7479107 (554.66 it/sec) -training >> step=2300400, episode=384 reward=0.7513404 (500.29 it/sec) -training >> step=2300500, episode=384 reward=0.7835862 (527.28 it/sec) -training >> step=2300600, episode=384 reward=0.7724498 (531.58 it/sec) -training >> step=2300700, episode=384 reward=0.7855319 (578.82 it/sec) -training >> step=2300800, episode=384 reward=0.760695 (549.46 it/sec) -training >> step=2300900, episode=384 reward=0.7448847 (538.26 it/sec) -training >> step=2301000, episode=384 reward=0.762371 (495.84 it/sec) -training >> step=2301100, episode=384 reward=0.7559714 (548.50 it/sec) -training >> step=2301200, episode=384 reward=0.7704693 (549.02 it/sec) -training >> step=2301300, episode=384 reward=0.7706566 (552.45 it/sec) -training >> step=2301400, episode=384 reward=0.7576959 (550.88 it/sec) -training >> step=2301500, episode=384 reward=0.7796228 (553.76 it/sec) -training >> step=2301600, episode=384 reward=0.7502246 (520.35 it/sec) -training >> step=2301700, episode=384 reward=0.7751923 (506.53 it/sec) -training >> step=2301800, episode=384 reward=0.7332312 (559.39 it/sec) -training >> step=2301900, episode=384 reward=0.7637578 (558.02 it/sec) -training >> step=2302000, episode=384 reward=0.7582427 (529.95 it/sec) -training >> step=2302100, episode=384 reward=0.7476911 (534.46 it/sec) -training >> step=2302200, episode=384 reward=0.7892186 (540.37 it/sec) -training >> step=2302300, episode=384 reward=0.7583157 (559.96 it/sec) -training >> step=2302400, episode=384 reward=0.7625306 (544.23 it/sec) -training >> step=2302500, episode=384 reward=0.7630158 (531.86 it/sec) -training >> step=2302600, episode=384 reward=0.7691779 (549.19 it/sec) -training >> step=2302700, episode=384 reward=0.7585524 (426.01 it/sec) -training >> step=2302800, episode=384 reward=0.7572687 (505.56 it/sec) -training >> step=2302900, episode=384 reward=0.7516494 (564.45 it/sec) -training >> step=2303000, episode=384 reward=0.7590654 (555.88 it/sec) -training >> step=2303100, episode=384 reward=0.7713886 (537.35 it/sec) -training >> step=2303200, episode=384 reward=0.7614555 (507.16 it/sec) -training >> step=2303300, episode=385 reward=0.7489905 (197.04 it/sec) -training >> step=2303400, episode=385 reward=0.7458395 (529.48 it/sec) -training >> step=2303500, episode=385 reward=0.7731532 (543.35 it/sec) -training >> step=2303600, episode=385 reward=0.7525362 (520.41 it/sec) -training >> step=2303700, episode=385 reward=0.7377142 (518.42 it/sec) -training >> step=2303800, episode=385 reward=0.7752761 (514.68 it/sec) -training >> step=2303900, episode=385 reward=0.7716815 (503.70 it/sec) -training >> step=2304000, episode=385 reward=0.7638931 (510.40 it/sec) -training >> step=2304100, episode=385 reward=0.7608506 (527.11 it/sec) -training >> step=2304200, episode=385 reward=0.7734212 (532.68 it/sec) -training >> step=2304300, episode=385 reward=0.762191 (560.22 it/sec) -training >> step=2304400, episode=385 reward=0.7797022 (547.37 it/sec) -training >> step=2304500, episode=385 reward=0.7536123 (573.32 it/sec) -training >> step=2304600, episode=385 reward=0.7463762 (507.39 it/sec) -training >> step=2304700, episode=385 reward=0.7803624 (525.33 it/sec) -training >> step=2304800, episode=385 reward=0.7568792 (546.46 it/sec) -training >> step=2304900, episode=385 reward=0.7671506 (549.29 it/sec) -training >> step=2305000, episode=385 reward=0.7575197 (548.54 it/sec) -training >> step=2305100, episode=385 reward=0.775387 (560.47 it/sec) -training >> step=2305200, episode=385 reward=0.753094 (554.90 it/sec) -training >> step=2305300, episode=385 reward=0.7639475 (517.57 it/sec) -training >> step=2305400, episode=385 reward=0.7608896 (542.48 it/sec) -training >> step=2305500, episode=385 reward=0.7646894 (557.71 it/sec) -training >> step=2305600, episode=385 reward=0.7509774 (537.30 it/sec) -training >> step=2305700, episode=385 reward=0.7571731 (546.19 it/sec) -training >> step=2305800, episode=385 reward=0.7691908 (527.76 it/sec) -training >> step=2305900, episode=385 reward=0.7758237 (537.77 it/sec) -training >> step=2306000, episode=385 reward=0.771259 (553.48 it/sec) -training >> step=2306100, episode=385 reward=0.7742605 (585.28 it/sec) -training >> step=2306200, episode=385 reward=0.7655748 (528.07 it/sec) -training >> step=2306300, episode=385 reward=0.7705083 (515.79 it/sec) -training >> step=2306400, episode=385 reward=0.7488294 (502.46 it/sec) -training >> step=2306500, episode=385 reward=0.7780764 (550.03 it/sec) -training >> step=2306600, episode=385 reward=0.7584466 (559.35 it/sec) -training >> step=2306700, episode=385 reward=0.7553006 (527.46 it/sec) -training >> step=2306800, episode=385 reward=0.77487 (555.47 it/sec) -training >> step=2306900, episode=385 reward=0.791553 (533.27 it/sec) -training >> step=2307000, episode=385 reward=0.7713808 (535.89 it/sec) -training >> step=2307100, episode=385 reward=0.7774177 (556.53 it/sec) -training >> step=2307200, episode=385 reward=0.7741764 (554.54 it/sec) -training >> step=2307300, episode=385 reward=0.7557655 (541.57 it/sec) -training >> step=2307400, episode=385 reward=0.7527787 (520.86 it/sec) -training >> step=2307500, episode=385 reward=0.7388383 (550.76 it/sec) -training >> step=2307600, episode=385 reward=0.7729542 (585.95 it/sec) -training >> step=2307700, episode=385 reward=0.7598902 (505.67 it/sec) -training >> step=2307800, episode=385 reward=0.761719 (577.72 it/sec) -training >> step=2307900, episode=385 reward=0.7716073 (587.62 it/sec) -training >> step=2308000, episode=385 reward=0.7767256 (489.42 it/sec) -training >> step=2308100, episode=385 reward=0.7514667 (561.96 it/sec) -training >> step=2308200, episode=385 reward=0.7421442 (545.17 it/sec) -training >> step=2308300, episode=385 reward=0.7706984 (541.44 it/sec) -training >> step=2308400, episode=385 reward=0.7547079 (533.95 it/sec) -training >> step=2308500, episode=385 reward=0.7500045 (550.21 it/sec) -training >> step=2308600, episode=385 reward=0.7702373 (543.12 it/sec) -training >> step=2308700, episode=385 reward=0.7694314 (570.22 it/sec) -training >> step=2308800, episode=385 reward=0.7689491 (555.39 it/sec) -training >> step=2308900, episode=385 reward=0.7858541 (383.97 it/sec) -training >> step=2309000, episode=385 reward=0.7584127 (549.49 it/sec) -training >> step=2309100, episode=385 reward=0.7548305 (525.87 it/sec) -training >> step=2309200, episode=385 reward=0.7502068 (562.73 it/sec) -training >> step=2309300, episode=386 reward=0.7481837 (203.70 it/sec) -training >> step=2309400, episode=386 reward=0.7485113 (514.78 it/sec) -training >> step=2309500, episode=386 reward=0.7404287 (529.69 it/sec) -training >> step=2309600, episode=386 reward=0.7373233 (543.05 it/sec) -training >> step=2309700, episode=386 reward=0.7556186 (541.26 it/sec) -training >> step=2309800, episode=386 reward=0.7547656 (556.14 it/sec) -training >> step=2309900, episode=386 reward=0.7422633 (541.84 it/sec) -training >> step=2310000, episode=386 reward=0.7597364 (526.89 it/sec) -training >> step=2310100, episode=386 reward=0.7659953 (533.92 it/sec) -training >> step=2310200, episode=386 reward=0.778801 (536.30 it/sec) -training >> step=2310300, episode=386 reward=0.7799726 (561.98 it/sec) -training >> step=2310400, episode=386 reward=0.753433 (551.43 it/sec) -training >> step=2310500, episode=386 reward=0.7809146 (520.47 it/sec) -training >> step=2310600, episode=386 reward=0.7653491 (520.40 it/sec) -training >> step=2310700, episode=386 reward=0.7406804 (596.65 it/sec) -training >> step=2310800, episode=386 reward=0.7667294 (506.81 it/sec) -training >> step=2310900, episode=386 reward=0.7626582 (553.08 it/sec) -training >> step=2311000, episode=386 reward=0.755876 (572.59 it/sec) -training >> step=2311100, episode=386 reward=0.7591572 (507.58 it/sec) -training >> step=2311200, episode=386 reward=0.7849202 (550.06 it/sec) -training >> step=2311300, episode=386 reward=0.7851548 (520.82 it/sec) -training >> step=2311400, episode=386 reward=0.7684479 (572.49 it/sec) -training >> step=2311500, episode=386 reward=0.7713968 (539.74 it/sec) -training >> step=2311600, episode=386 reward=0.7690977 (533.19 it/sec) -training >> step=2311700, episode=386 reward=0.7577834 (525.36 it/sec) -training >> step=2311800, episode=386 reward=0.7771444 (555.26 it/sec) -training >> step=2311900, episode=386 reward=0.7679617 (504.25 it/sec) -training >> step=2312000, episode=386 reward=0.7524627 (555.21 it/sec) -training >> step=2312100, episode=386 reward=0.7517468 (534.56 it/sec) -training >> step=2312200, episode=386 reward=0.7590449 (483.88 it/sec) -training >> step=2312300, episode=386 reward=0.7776266 (562.17 it/sec) -training >> step=2312400, episode=386 reward=0.7525111 (551.90 it/sec) -training >> step=2312500, episode=386 reward=0.7765787 (565.33 it/sec) -training >> step=2312600, episode=386 reward=0.7849259 (547.86 it/sec) -training >> step=2312700, episode=386 reward=0.764313 (511.85 it/sec) -training >> step=2312800, episode=386 reward=0.7603 (464.49 it/sec) -training >> step=2312900, episode=386 reward=0.748323 (541.51 it/sec) -training >> step=2313000, episode=386 reward=0.7505754 (548.32 it/sec) -training >> step=2313100, episode=386 reward=0.7718313 (537.73 it/sec) -training >> step=2313200, episode=386 reward=0.7689933 (507.09 it/sec) -training >> step=2313300, episode=386 reward=0.7505823 (492.45 it/sec) -training >> step=2313400, episode=386 reward=0.7717578 (549.25 it/sec) -training >> step=2313500, episode=386 reward=0.7685598 (477.93 it/sec) -training >> step=2313600, episode=386 reward=0.7637514 (544.87 it/sec) -training >> step=2313700, episode=386 reward=0.7433578 (543.96 it/sec) -training >> step=2313800, episode=386 reward=0.7801382 (513.81 it/sec) -training >> step=2313900, episode=386 reward=0.754642 (501.57 it/sec) -training >> step=2314000, episode=386 reward=0.7651685 (530.87 it/sec) -training >> step=2314100, episode=386 reward=0.7768332 (545.16 it/sec) -training >> step=2314200, episode=386 reward=0.7525929 (489.44 it/sec) -training >> step=2314300, episode=386 reward=0.768547 (466.83 it/sec) -training >> step=2314400, episode=386 reward=0.7617695 (529.44 it/sec) -training >> step=2314500, episode=386 reward=0.767915 (569.39 it/sec) -training >> step=2314600, episode=386 reward=0.7468323 (545.81 it/sec) -training >> step=2314700, episode=386 reward=0.7521526 (546.32 it/sec) -training >> step=2314800, episode=386 reward=0.7534342 (514.04 it/sec) -training >> step=2314900, episode=386 reward=0.7798403 (535.89 it/sec) -training >> step=2315000, episode=386 reward=0.7572861 (392.02 it/sec) -training >> step=2315100, episode=386 reward=0.7701856 (524.84 it/sec) -training >> step=2315200, episode=386 reward=0.7435804 (471.47 it/sec) -training >> step=2315300, episode=387 reward=0.7904413 (220.65 it/sec) -training >> step=2315400, episode=387 reward=0.770281 (533.49 it/sec) -training >> step=2315500, episode=387 reward=0.7552083 (524.69 it/sec) -training >> step=2315600, episode=387 reward=0.7547778 (538.41 it/sec) -training >> step=2315700, episode=387 reward=0.7430227 (512.43 it/sec) -training >> step=2315800, episode=387 reward=0.7624962 (508.06 it/sec) -training >> step=2315900, episode=387 reward=0.7732551 (521.11 it/sec) -training >> step=2316000, episode=387 reward=0.7643223 (521.26 it/sec) -training >> step=2316100, episode=387 reward=0.7758415 (506.78 it/sec) -training >> step=2316200, episode=387 reward=0.7326751 (532.76 it/sec) -training >> step=2316300, episode=387 reward=0.7627977 (520.69 it/sec) -training >> step=2316400, episode=387 reward=0.7870462 (534.06 it/sec) -training >> step=2316500, episode=387 reward=0.7629458 (526.33 it/sec) -training >> step=2316600, episode=387 reward=0.7762877 (524.36 it/sec) -training >> step=2316700, episode=387 reward=0.767082 (498.52 it/sec) -training >> step=2316800, episode=387 reward=0.7695739 (542.34 it/sec) -training >> step=2316900, episode=387 reward=0.7503107 (519.63 it/sec) -training >> step=2317000, episode=387 reward=0.7681382 (583.72 it/sec) -training >> step=2317100, episode=387 reward=0.7820626 (524.13 it/sec) -training >> step=2317200, episode=387 reward=0.779686 (545.31 it/sec) -training >> step=2317300, episode=387 reward=0.7795119 (548.22 it/sec) -training >> step=2317400, episode=387 reward=0.7535868 (525.85 it/sec) -training >> step=2317500, episode=387 reward=0.7705861 (557.73 it/sec) -training >> step=2317600, episode=387 reward=0.7611629 (552.00 it/sec) -training >> step=2317700, episode=387 reward=0.7755785 (541.68 it/sec) -training >> step=2317800, episode=387 reward=0.7591409 (516.20 it/sec) -training >> step=2317900, episode=387 reward=0.7878329 (543.78 it/sec) -training >> step=2318000, episode=387 reward=0.7704211 (559.69 it/sec) -training >> step=2318100, episode=387 reward=0.7791631 (560.06 it/sec) -training >> step=2318200, episode=387 reward=0.7766341 (521.45 it/sec) -training >> step=2318300, episode=387 reward=0.7709619 (573.73 it/sec) -training >> step=2318400, episode=387 reward=0.7672215 (539.60 it/sec) -training >> step=2318500, episode=387 reward=0.7770723 (531.22 it/sec) -training >> step=2318600, episode=387 reward=0.7591227 (547.03 it/sec) -training >> step=2318700, episode=387 reward=0.7628462 (547.38 it/sec) -training >> step=2318800, episode=387 reward=0.7514255 (582.97 it/sec) -training >> step=2318900, episode=387 reward=0.7599083 (527.44 it/sec) -training >> step=2319000, episode=387 reward=0.7692375 (544.77 it/sec) -training >> step=2319100, episode=387 reward=0.7722158 (555.50 it/sec) -training >> step=2319200, episode=387 reward=0.7641646 (567.87 it/sec) -training >> step=2319300, episode=387 reward=0.7611446 (536.90 it/sec) -training >> step=2319400, episode=387 reward=0.7603307 (544.99 it/sec) -training >> step=2319500, episode=387 reward=0.7427579 (561.39 it/sec) -training >> step=2319600, episode=387 reward=0.7579135 (556.44 it/sec) -training >> step=2319700, episode=387 reward=0.7625883 (544.29 it/sec) -training >> step=2319800, episode=387 reward=0.7652817 (555.00 it/sec) -training >> step=2319900, episode=387 reward=0.7590526 (541.25 it/sec) -training >> step=2320000, episode=387 reward=0.7657334 (529.86 it/sec) -training >> step=2320100, episode=387 reward=0.7610999 (543.80 it/sec) -training >> step=2320200, episode=387 reward=0.7447851 (532.18 it/sec) -training >> step=2320300, episode=387 reward=0.768549 (566.85 it/sec) -training >> step=2320400, episode=387 reward=0.7779822 (564.69 it/sec) -training >> step=2320500, episode=387 reward=0.7704478 (538.68 it/sec) -training >> step=2320600, episode=387 reward=0.7939997 (526.69 it/sec) -training >> step=2320700, episode=387 reward=0.7562046 (570.57 it/sec) -training >> step=2320800, episode=387 reward=0.7790399 (567.66 it/sec) -training >> step=2320900, episode=387 reward=0.7501889 (540.97 it/sec) -training >> step=2321000, episode=387 reward=0.7524974 (556.34 it/sec) -training >> step=2321100, episode=387 reward=0.7806944 (495.03 it/sec) -training >> step=2321200, episode=387 reward=0.7894382 (358.92 it/sec) -training >> step=2321300, episode=388 reward=0.7519763 (212.81 it/sec) -training >> step=2321400, episode=388 reward=0.7411635 (520.86 it/sec) -training >> step=2321500, episode=388 reward=0.7419361 (530.02 it/sec) -training >> step=2321600, episode=388 reward=0.7503278 (527.22 it/sec) -training >> step=2321700, episode=388 reward=0.7728011 (532.39 it/sec) -training >> step=2321800, episode=388 reward=0.7520297 (552.11 it/sec) -training >> step=2321900, episode=388 reward=0.7582465 (531.57 it/sec) -training >> step=2322000, episode=388 reward=0.7625574 (527.72 it/sec) -training >> step=2322100, episode=388 reward=0.792205 (536.87 it/sec) -training >> step=2322200, episode=388 reward=0.7658902 (533.35 it/sec) -training >> step=2322300, episode=388 reward=0.7817464 (554.09 it/sec) -training >> step=2322400, episode=388 reward=0.7709394 (553.79 it/sec) -training >> step=2322500, episode=388 reward=0.75881 (526.48 it/sec) -training >> step=2322600, episode=388 reward=0.7525654 (542.81 it/sec) -training >> step=2322700, episode=388 reward=0.7980509 (517.36 it/sec) -training >> step=2322800, episode=388 reward=0.779732 (535.98 it/sec) -training >> step=2322900, episode=388 reward=0.7603632 (563.05 it/sec) -training >> step=2323000, episode=388 reward=0.7733318 (524.34 it/sec) -training >> step=2323100, episode=388 reward=0.783599 (552.15 it/sec) -training >> step=2323200, episode=388 reward=0.7598302 (537.47 it/sec) -training >> step=2323300, episode=388 reward=0.7854649 (518.40 it/sec) -training >> step=2323400, episode=388 reward=0.7813835 (575.87 it/sec) -training >> step=2323500, episode=388 reward=0.7785 (528.25 it/sec) -training >> step=2323600, episode=388 reward=0.8010872 (519.84 it/sec) -training >> step=2323700, episode=388 reward=0.7723732 (541.69 it/sec) -training >> step=2323800, episode=388 reward=0.7737371 (569.69 it/sec) -training >> step=2323900, episode=388 reward=0.7792543 (497.74 it/sec) -training >> step=2324000, episode=388 reward=0.7573 (542.41 it/sec) -training >> step=2324100, episode=388 reward=0.7683403 (509.88 it/sec) -training >> step=2324200, episode=388 reward=0.7668331 (525.57 it/sec) -training >> step=2324300, episode=388 reward=0.7810187 (535.72 it/sec) -training >> step=2324400, episode=388 reward=0.7795278 (535.40 it/sec) -training >> step=2324500, episode=388 reward=0.7581224 (560.94 it/sec) -training >> step=2324600, episode=388 reward=0.7653367 (540.83 it/sec) -training >> step=2324700, episode=388 reward=0.7673078 (501.73 it/sec) -training >> step=2324800, episode=388 reward=0.7768806 (546.89 it/sec) -training >> step=2324900, episode=388 reward=0.7600424 (526.52 it/sec) -training >> step=2325000, episode=388 reward=0.7517329 (561.94 it/sec) -training >> step=2325100, episode=388 reward=0.7603269 (526.47 it/sec) -training >> step=2325200, episode=388 reward=0.7768014 (508.62 it/sec) -training >> step=2325300, episode=388 reward=0.7728885 (546.79 it/sec) -training >> step=2325400, episode=388 reward=0.7696124 (556.94 it/sec) -training >> step=2325500, episode=388 reward=0.7413585 (526.78 it/sec) -training >> step=2325600, episode=388 reward=0.786796 (550.09 it/sec) -training >> step=2325700, episode=388 reward=0.7782241 (529.23 it/sec) -training >> step=2325800, episode=388 reward=0.7667828 (537.10 it/sec) -training >> step=2325900, episode=388 reward=0.7803593 (552.67 it/sec) -training >> step=2326000, episode=388 reward=0.752219 (549.24 it/sec) -training >> step=2326100, episode=388 reward=0.7736692 (553.99 it/sec) -training >> step=2326200, episode=388 reward=0.7478614 (550.45 it/sec) -training >> step=2326300, episode=388 reward=0.7548575 (487.50 it/sec) -training >> step=2326400, episode=388 reward=0.7741882 (531.23 it/sec) -training >> step=2326500, episode=388 reward=0.7847979 (543.90 it/sec) -training >> step=2326600, episode=388 reward=0.7440022 (488.22 it/sec) -training >> step=2326700, episode=388 reward=0.7464243 (561.00 it/sec) -training >> step=2326800, episode=388 reward=0.727266 (528.90 it/sec) -training >> step=2326900, episode=388 reward=0.770858 (557.81 it/sec) -training >> step=2327000, episode=388 reward=0.7638144 (535.24 it/sec) -training >> step=2327100, episode=388 reward=0.7719501 (532.23 it/sec) -training >> step=2327200, episode=388 reward=0.77268 (367.51 it/sec) -training >> step=2327300, episode=389 reward=0.758273 (200.52 it/sec) -training >> step=2327400, episode=389 reward=0.7670083 (546.04 it/sec) -training >> step=2327500, episode=389 reward=0.744739 (545.61 it/sec) -training >> step=2327600, episode=389 reward=0.7635284 (564.61 it/sec) -training >> step=2327700, episode=389 reward=0.75586 (534.37 it/sec) -training >> step=2327800, episode=389 reward=0.7659618 (523.29 it/sec) -training >> step=2327900, episode=389 reward=0.7617134 (535.32 it/sec) -training >> step=2328000, episode=389 reward=0.7919116 (567.36 it/sec) -training >> step=2328100, episode=389 reward=0.7840346 (533.94 it/sec) -training >> step=2328200, episode=389 reward=0.7610936 (521.33 it/sec) -training >> step=2328300, episode=389 reward=0.7633713 (520.23 it/sec) -training >> step=2328400, episode=389 reward=0.7769228 (563.68 it/sec) -training >> step=2328500, episode=389 reward=0.7529305 (537.73 it/sec) -training >> step=2328600, episode=389 reward=0.775474 (541.25 it/sec) -training >> step=2328700, episode=389 reward=0.7756873 (556.29 it/sec) -training >> step=2328800, episode=389 reward=0.7921773 (540.83 it/sec) -training >> step=2328900, episode=389 reward=0.7592994 (551.97 it/sec) -training >> step=2329000, episode=389 reward=0.7786499 (532.88 it/sec) -training >> step=2329100, episode=389 reward=0.7461586 (539.67 it/sec) -training >> step=2329200, episode=389 reward=0.7787445 (554.84 it/sec) -training >> step=2329300, episode=389 reward=0.7560529 (515.89 it/sec) -training >> step=2329400, episode=389 reward=0.7763475 (519.07 it/sec) -training >> step=2329500, episode=389 reward=0.7687805 (552.09 it/sec) -training >> step=2329600, episode=389 reward=0.7702398 (565.30 it/sec) -training >> step=2329700, episode=389 reward=0.7689423 (537.51 it/sec) -training >> step=2329800, episode=389 reward=0.7508722 (526.48 it/sec) -training >> step=2329900, episode=389 reward=0.7705161 (558.55 it/sec) -training >> step=2330000, episode=389 reward=0.7841492 (523.78 it/sec) -training >> step=2330100, episode=389 reward=0.7729579 (529.55 it/sec) -training >> step=2330200, episode=389 reward=0.7654589 (538.26 it/sec) -training >> step=2330300, episode=389 reward=0.7931127 (560.33 it/sec) -training >> step=2330400, episode=389 reward=0.7519329 (480.64 it/sec) -training >> step=2330500, episode=389 reward=0.7629601 (535.72 it/sec) -training >> step=2330600, episode=389 reward=0.7780384 (544.98 it/sec) -training >> step=2330700, episode=389 reward=0.7620375 (555.96 it/sec) -training >> step=2330800, episode=389 reward=0.7661755 (566.46 it/sec) -training >> step=2330900, episode=389 reward=0.7751177 (541.60 it/sec) -training >> step=2331000, episode=389 reward=0.773358 (531.16 it/sec) -training >> step=2331100, episode=389 reward=0.788462 (525.75 it/sec) -training >> step=2331200, episode=389 reward=0.7605912 (535.43 it/sec) -training >> step=2331300, episode=389 reward=0.7538639 (552.78 it/sec) -training >> step=2331400, episode=389 reward=0.7467887 (572.56 it/sec) -training >> step=2331500, episode=389 reward=0.7655405 (561.76 it/sec) -training >> step=2331600, episode=389 reward=0.750555 (553.74 it/sec) -training >> step=2331700, episode=389 reward=0.7817306 (598.70 it/sec) -training >> step=2331800, episode=389 reward=0.7518362 (599.52 it/sec) -training >> step=2331900, episode=389 reward=0.7872118 (564.58 it/sec) -training >> step=2332000, episode=389 reward=0.7606876 (570.39 it/sec) -training >> step=2332100, episode=389 reward=0.7653533 (599.68 it/sec) -training >> step=2332200, episode=389 reward=0.7748395 (567.62 it/sec) -training >> step=2332300, episode=389 reward=0.757464 (571.90 it/sec) -training >> step=2332400, episode=389 reward=0.7660543 (570.44 it/sec) -training >> step=2332500, episode=389 reward=0.7390755 (593.00 it/sec) -training >> step=2332600, episode=389 reward=0.7476558 (552.76 it/sec) -training >> step=2332700, episode=389 reward=0.7634903 (584.07 it/sec) -training >> step=2332800, episode=389 reward=0.7620477 (578.13 it/sec) -training >> step=2332900, episode=389 reward=0.7468645 (602.80 it/sec) -training >> step=2333000, episode=389 reward=0.7630946 (579.86 it/sec) -training >> step=2333100, episode=389 reward=0.7391064 (570.63 it/sec) -training >> step=2333200, episode=389 reward=0.7482546 (568.56 it/sec) -training >> step=2333300, episode=390 reward=0.7403491 (166.40 it/sec) -training >> step=2333400, episode=390 reward=0.7522384 (562.47 it/sec) -training >> step=2333500, episode=390 reward=0.7431259 (560.01 it/sec) -training >> step=2333600, episode=390 reward=0.7668801 (568.11 it/sec) -training >> step=2333700, episode=390 reward=0.7557682 (572.84 it/sec) -training >> step=2333800, episode=390 reward=0.7580101 (584.34 it/sec) -training >> step=2333900, episode=390 reward=0.7488008 (573.40 it/sec) -training >> step=2334000, episode=390 reward=0.7700107 (557.58 it/sec) -training >> step=2334100, episode=390 reward=0.7666944 (550.96 it/sec) -training >> step=2334200, episode=390 reward=0.7546565 (607.40 it/sec) -training >> step=2334300, episode=390 reward=0.7490874 (590.87 it/sec) -training >> step=2334400, episode=390 reward=0.771716 (570.58 it/sec) -training >> step=2334500, episode=390 reward=0.747203 (603.50 it/sec) -training >> step=2334600, episode=390 reward=0.7419762 (562.17 it/sec) -training >> step=2334700, episode=390 reward=0.7621604 (569.51 it/sec) -training >> step=2334800, episode=390 reward=0.7609712 (586.72 it/sec) -training >> step=2334900, episode=390 reward=0.7809264 (589.72 it/sec) -training >> step=2335000, episode=390 reward=0.7751686 (567.92 it/sec) -training >> step=2335100, episode=390 reward=0.7964826 (565.99 it/sec) -training >> step=2335200, episode=390 reward=0.7652347 (577.29 it/sec) -training >> step=2335300, episode=390 reward=0.7691609 (578.96 it/sec) -training >> step=2335400, episode=390 reward=0.7595937 (598.57 it/sec) -training >> step=2335500, episode=390 reward=0.777029 (578.09 it/sec) -training >> step=2335600, episode=390 reward=0.7686468 (596.69 it/sec) -training >> step=2335700, episode=390 reward=0.7759416 (577.27 it/sec) -training >> step=2335800, episode=390 reward=0.7742479 (548.82 it/sec) -training >> step=2335900, episode=390 reward=0.7765796 (602.47 it/sec) -training >> step=2336000, episode=390 reward=0.7729185 (589.88 it/sec) -training >> step=2336100, episode=390 reward=0.7604258 (583.06 it/sec) -training >> step=2336200, episode=390 reward=0.7613369 (579.75 it/sec) -training >> step=2336300, episode=390 reward=0.7872655 (554.65 it/sec) -training >> step=2336400, episode=390 reward=0.755447 (492.69 it/sec) -training >> step=2336500, episode=390 reward=0.7372887 (546.81 it/sec) -training >> step=2336600, episode=390 reward=0.7462236 (540.85 it/sec) -training >> step=2336700, episode=390 reward=0.7627451 (550.86 it/sec) -training >> step=2336800, episode=390 reward=0.759827 (527.85 it/sec) -training >> step=2336900, episode=390 reward=0.7655492 (541.81 it/sec) -training >> step=2337000, episode=390 reward=0.756295 (495.52 it/sec) -training >> step=2337100, episode=390 reward=0.7588704 (529.94 it/sec) -training >> step=2337200, episode=390 reward=0.7650537 (534.06 it/sec) -training >> step=2337300, episode=390 reward=0.756197 (533.81 it/sec) -training >> step=2337400, episode=390 reward=0.7742566 (558.18 it/sec) -training >> step=2337500, episode=390 reward=0.7635183 (551.55 it/sec) -training >> step=2337600, episode=390 reward=0.7367643 (529.00 it/sec) -training >> step=2337700, episode=390 reward=0.7773213 (548.20 it/sec) -training >> step=2337800, episode=390 reward=0.7797234 (554.14 it/sec) -training >> step=2337900, episode=390 reward=0.765076 (551.89 it/sec) -training >> step=2338000, episode=390 reward=0.7540428 (541.14 it/sec) -training >> step=2338100, episode=390 reward=0.7515107 (515.00 it/sec) -training >> step=2338200, episode=390 reward=0.7712416 (545.03 it/sec) -training >> step=2338300, episode=390 reward=0.7649856 (549.72 it/sec) -training >> step=2338400, episode=390 reward=0.765397 (543.35 it/sec) -training >> step=2338500, episode=390 reward=0.7419296 (540.93 it/sec) -training >> step=2338600, episode=390 reward=0.7715555 (552.03 it/sec) -training >> step=2338700, episode=390 reward=0.764642 (537.64 it/sec) -training >> step=2338800, episode=390 reward=0.7399106 (516.92 it/sec) -training >> step=2338900, episode=390 reward=0.7617126 (540.47 it/sec) -training >> step=2339000, episode=390 reward=0.7352369 (552.03 it/sec) -training >> step=2339100, episode=390 reward=0.7730893 (526.11 it/sec) -training >> step=2339200, episode=390 reward=0.7744283 (545.52 it/sec) -training >> step=2339300, episode=391 reward=0.7534732 (150.48 it/sec) -training >> step=2339400, episode=391 reward=0.7460112 (518.93 it/sec) -training >> step=2339500, episode=391 reward=0.759854 (547.17 it/sec) -training >> step=2339600, episode=391 reward=0.7687963 (516.82 it/sec) -training >> step=2339700, episode=391 reward=0.7592697 (548.21 it/sec) -training >> step=2339800, episode=391 reward=0.7679629 (522.48 it/sec) -training >> step=2339900, episode=391 reward=0.7607192 (520.78 it/sec) -training >> step=2340000, episode=391 reward=0.7547721 (547.45 it/sec) -training >> step=2340100, episode=391 reward=0.7513036 (552.29 it/sec) -training >> step=2340200, episode=391 reward=0.7737033 (548.05 it/sec) -training >> step=2340300, episode=391 reward=0.7747596 (550.52 it/sec) -training >> step=2340400, episode=391 reward=0.766834 (536.63 it/sec) -training >> step=2340500, episode=391 reward=0.784134 (524.16 it/sec) -training >> step=2340600, episode=391 reward=0.7686146 (549.50 it/sec) -training >> step=2340700, episode=391 reward=0.7800148 (556.61 it/sec) -training >> step=2340800, episode=391 reward=0.7727638 (542.49 it/sec) -training >> step=2340900, episode=391 reward=0.7877977 (545.15 it/sec) -training >> step=2341000, episode=391 reward=0.7505745 (528.60 it/sec) -training >> step=2341100, episode=391 reward=0.7561101 (552.14 it/sec) -training >> step=2341200, episode=391 reward=0.7623389 (541.50 it/sec) -training >> step=2341300, episode=391 reward=0.7594758 (540.78 it/sec) -training >> step=2341400, episode=391 reward=0.7800457 (569.23 it/sec) -training >> step=2341500, episode=391 reward=0.7648427 (542.78 it/sec) -training >> step=2341600, episode=391 reward=0.7660835 (524.17 it/sec) -training >> step=2341700, episode=391 reward=0.7578832 (553.11 it/sec) -training >> step=2341800, episode=391 reward=0.7762027 (525.62 it/sec) -training >> step=2341900, episode=391 reward=0.7639822 (522.42 it/sec) -training >> step=2342000, episode=391 reward=0.7768373 (538.00 it/sec) -training >> step=2342100, episode=391 reward=0.7792302 (547.27 it/sec) -training >> step=2342200, episode=391 reward=0.7572272 (524.64 it/sec) -training >> step=2342300, episode=391 reward=0.7761496 (556.28 it/sec) -training >> step=2342400, episode=391 reward=0.754168 (510.19 it/sec) -training >> step=2342500, episode=391 reward=0.7723369 (576.70 it/sec) -training >> step=2342600, episode=391 reward=0.7406996 (530.67 it/sec) -training >> step=2342700, episode=391 reward=0.7468334 (532.37 it/sec) -training >> step=2342800, episode=391 reward=0.7416106 (538.36 it/sec) -training >> step=2342900, episode=391 reward=0.792863 (558.77 it/sec) -training >> step=2343000, episode=391 reward=0.7511695 (533.47 it/sec) -training >> step=2343100, episode=391 reward=0.7724145 (554.47 it/sec) -training >> step=2343200, episode=391 reward=0.777894 (531.64 it/sec) -training >> step=2343300, episode=391 reward=0.7621102 (566.88 it/sec) -training >> step=2343400, episode=391 reward=0.7520446 (518.19 it/sec) -training >> step=2343500, episode=391 reward=0.7666301 (550.07 it/sec) -training >> step=2343600, episode=391 reward=0.7312662 (552.56 it/sec) -training >> step=2343700, episode=391 reward=0.7590088 (551.30 it/sec) -training >> step=2343800, episode=391 reward=0.7397462 (531.66 it/sec) -training >> step=2343900, episode=391 reward=0.761941 (560.35 it/sec) -training >> step=2344000, episode=391 reward=0.7656266 (561.36 it/sec) -training >> step=2344100, episode=391 reward=0.7503794 (551.44 it/sec) -training >> step=2344200, episode=391 reward=0.7340953 (515.59 it/sec) -training >> step=2344300, episode=391 reward=0.7637753 (523.19 it/sec) -training >> step=2344400, episode=391 reward=0.7449475 (585.31 it/sec) -training >> step=2344500, episode=391 reward=0.7637221 (538.24 it/sec) -training >> step=2344600, episode=391 reward=0.7401304 (533.63 it/sec) -training >> step=2344700, episode=391 reward=0.7584932 (572.51 it/sec) -training >> step=2344800, episode=391 reward=0.7499873 (549.19 it/sec) -training >> step=2344900, episode=391 reward=0.7750151 (540.33 it/sec) -training >> step=2345000, episode=391 reward=0.7706282 (542.84 it/sec) -training >> step=2345100, episode=391 reward=0.7523934 (561.96 it/sec) -training >> step=2345200, episode=391 reward=0.7465194 (526.04 it/sec) -training >> step=2345300, episode=392 reward=0.7618203 (162.14 it/sec) -training >> step=2345400, episode=392 reward=0.7603371 (525.45 it/sec) -training >> step=2345500, episode=392 reward=0.7807232 (535.34 it/sec) -training >> step=2345600, episode=392 reward=0.7523332 (516.89 it/sec) -training >> step=2345700, episode=392 reward=0.7667599 (498.18 it/sec) -training >> step=2345800, episode=392 reward=0.7307344 (531.29 it/sec) -training >> step=2345900, episode=392 reward=0.7427853 (538.22 it/sec) -training >> step=2346000, episode=392 reward=0.778609 (536.59 it/sec) -training >> step=2346100, episode=392 reward=0.7516116 (531.41 it/sec) -training >> step=2346200, episode=392 reward=0.7795889 (521.43 it/sec) -training >> step=2346300, episode=392 reward=0.7535536 (561.78 it/sec) -training >> step=2346400, episode=392 reward=0.7739956 (566.32 it/sec) -training >> step=2346500, episode=392 reward=0.77518 (526.42 it/sec) -training >> step=2346600, episode=392 reward=0.7818282 (550.29 it/sec) -training >> step=2346700, episode=392 reward=0.7776512 (539.62 it/sec) -training >> step=2346800, episode=392 reward=0.7732837 (527.81 it/sec) -training >> step=2346900, episode=392 reward=0.7754595 (549.05 it/sec) -training >> step=2347000, episode=392 reward=0.7798538 (534.21 it/sec) -training >> step=2347100, episode=392 reward=0.7634396 (541.57 it/sec) -training >> step=2347200, episode=392 reward=0.7738631 (535.27 it/sec) -training >> step=2347300, episode=392 reward=0.7722843 (506.91 it/sec) -training >> step=2347400, episode=392 reward=0.7768753 (486.71 it/sec) -training >> step=2347500, episode=392 reward=0.7576402 (580.11 it/sec) -training >> step=2347600, episode=392 reward=0.7534321 (540.67 it/sec) -training >> step=2347700, episode=392 reward=0.7552718 (528.69 it/sec) -training >> step=2347800, episode=392 reward=0.7610699 (544.64 it/sec) -training >> step=2347900, episode=392 reward=0.772714 (565.46 it/sec) -training >> step=2348000, episode=392 reward=0.7597914 (555.47 it/sec) -training >> step=2348100, episode=392 reward=0.7848144 (536.20 it/sec) -training >> step=2348200, episode=392 reward=0.7950958 (536.44 it/sec) -training >> step=2348300, episode=392 reward=0.7588223 (535.34 it/sec) -training >> step=2348400, episode=392 reward=0.7551625 (520.10 it/sec) -training >> step=2348500, episode=392 reward=0.7764296 (541.98 it/sec) -training >> step=2348600, episode=392 reward=0.7495074 (558.13 it/sec) -training >> step=2348700, episode=392 reward=0.7889628 (570.64 it/sec) -training >> step=2348800, episode=392 reward=0.7516094 (560.59 it/sec) -training >> step=2348900, episode=392 reward=0.7760662 (529.50 it/sec) -training >> step=2349000, episode=392 reward=0.7680144 (551.24 it/sec) -training >> step=2349100, episode=392 reward=0.7703701 (568.64 it/sec) -training >> step=2349200, episode=392 reward=0.7582243 (546.78 it/sec) -training >> step=2349300, episode=392 reward=0.754669 (543.17 it/sec) -training >> step=2349400, episode=392 reward=0.765413 (564.60 it/sec) -training >> step=2349500, episode=392 reward=0.772208 (502.38 it/sec) -training >> step=2349600, episode=392 reward=0.7621357 (549.69 it/sec) -training >> step=2349700, episode=392 reward=0.7999063 (533.22 it/sec) -training >> step=2349800, episode=392 reward=0.7699435 (567.08 it/sec) -training >> step=2349900, episode=392 reward=0.7794635 (545.00 it/sec) -training >> step=2350000, episode=392 reward=0.7698063 (522.16 it/sec) -training >> step=2350100, episode=392 reward=0.7498454 (560.68 it/sec) -training >> step=2350200, episode=392 reward=0.7768435 (554.60 it/sec) -training >> step=2350300, episode=392 reward=0.7691031 (555.57 it/sec) -training >> step=2350400, episode=392 reward=0.749247 (557.10 it/sec) -training >> step=2350500, episode=392 reward=0.7700716 (560.39 it/sec) -training >> step=2350600, episode=392 reward=0.7634656 (563.87 it/sec) -training >> step=2350700, episode=392 reward=0.7698427 (539.99 it/sec) -training >> step=2350800, episode=392 reward=0.7679482 (545.28 it/sec) -training >> step=2350900, episode=392 reward=0.7296392 (536.02 it/sec) -training >> step=2351000, episode=392 reward=0.7663979 (610.93 it/sec) -training >> step=2351100, episode=392 reward=0.7635033 (552.63 it/sec) -training >> step=2351200, episode=392 reward=0.7655395 (559.85 it/sec) -training >> step=2351300, episode=393 reward=0.7691114 (108.86 it/sec) -training >> step=2351400, episode=393 reward=0.7727314 (507.80 it/sec) -training >> step=2351500, episode=393 reward=0.7356957 (511.77 it/sec) -training >> step=2351600, episode=393 reward=0.7599264 (519.83 it/sec) -training >> step=2351700, episode=393 reward=0.7521046 (543.78 it/sec) -training >> step=2351800, episode=393 reward=0.7545006 (539.35 it/sec) -training >> step=2351900, episode=393 reward=0.745538 (522.23 it/sec) -training >> step=2352000, episode=393 reward=0.7841736 (548.17 it/sec) -training >> step=2352100, episode=393 reward=0.755454 (545.06 it/sec) -training >> step=2352200, episode=393 reward=0.7856785 (545.12 it/sec) -training >> step=2352300, episode=393 reward=0.7569594 (559.89 it/sec) -training >> step=2352400, episode=393 reward=0.7609073 (524.97 it/sec) -training >> step=2352500, episode=393 reward=0.7562365 (502.51 it/sec) -training >> step=2352600, episode=393 reward=0.7612136 (569.36 it/sec) -training >> step=2352700, episode=393 reward=0.7479544 (561.72 it/sec) -training >> step=2352800, episode=393 reward=0.7702792 (572.04 it/sec) -training >> step=2352900, episode=393 reward=0.7512446 (537.13 it/sec) -training >> step=2353000, episode=393 reward=0.7604246 (519.84 it/sec) -training >> step=2353100, episode=393 reward=0.7621022 (547.64 it/sec) -training >> step=2353200, episode=393 reward=0.7794833 (548.33 it/sec) -training >> step=2353300, episode=393 reward=0.7600711 (547.76 it/sec) -training >> step=2353400, episode=393 reward=0.7754461 (590.70 it/sec) -training >> step=2353500, episode=393 reward=0.7663878 (542.26 it/sec) -training >> step=2353600, episode=393 reward=0.7711166 (541.59 it/sec) -training >> step=2353700, episode=393 reward=0.765048 (552.76 it/sec) -training >> step=2353800, episode=393 reward=0.7794974 (537.31 it/sec) -training >> step=2353900, episode=393 reward=0.781987 (560.18 it/sec) -training >> step=2354000, episode=393 reward=0.7823569 (564.54 it/sec) -training >> step=2354100, episode=393 reward=0.7613739 (500.59 it/sec) -training >> step=2354200, episode=393 reward=0.7657173 (558.44 it/sec) -training >> step=2354300, episode=393 reward=0.7565012 (526.37 it/sec) -training >> step=2354400, episode=393 reward=0.7581171 (545.26 it/sec) -training >> step=2354500, episode=393 reward=0.772047 (558.22 it/sec) -training >> step=2354600, episode=393 reward=0.762223 (526.01 it/sec) -training >> step=2354700, episode=393 reward=0.7585021 (532.24 it/sec) -training >> step=2354800, episode=393 reward=0.7515336 (525.26 it/sec) -training >> step=2354900, episode=393 reward=0.7797542 (557.27 it/sec) -training >> step=2355000, episode=393 reward=0.7641944 (548.42 it/sec) -training >> step=2355100, episode=393 reward=0.773176 (550.15 it/sec) -training >> step=2355200, episode=393 reward=0.7525401 (530.77 it/sec) -training >> step=2355300, episode=393 reward=0.7735663 (515.90 it/sec) -training >> step=2355400, episode=393 reward=0.7512251 (560.05 it/sec) -training >> step=2355500, episode=393 reward=0.7770594 (517.41 it/sec) -training >> step=2355600, episode=393 reward=0.7670268 (572.30 it/sec) -training >> step=2355700, episode=393 reward=0.7652904 (539.38 it/sec) -training >> step=2355800, episode=393 reward=0.7390049 (548.01 it/sec) -training >> step=2355900, episode=393 reward=0.7526628 (553.19 it/sec) -training >> step=2356000, episode=393 reward=0.7639282 (531.65 it/sec) -training >> step=2356100, episode=393 reward=0.7877091 (536.60 it/sec) -training >> step=2356200, episode=393 reward=0.755442 (552.19 it/sec) -training >> step=2356300, episode=393 reward=0.7522532 (543.12 it/sec) -training >> step=2356400, episode=393 reward=0.7644976 (544.96 it/sec) -training >> step=2356500, episode=393 reward=0.7508149 (591.07 it/sec) -training >> step=2356600, episode=393 reward=0.7592794 (536.86 it/sec) -training >> step=2356700, episode=393 reward=0.7668724 (551.02 it/sec) -training >> step=2356800, episode=393 reward=0.7685403 (539.10 it/sec) -training >> step=2356900, episode=393 reward=0.755326 (547.79 it/sec) -training >> step=2357000, episode=393 reward=0.7611703 (555.59 it/sec) -training >> step=2357100, episode=393 reward=0.7581517 (539.89 it/sec) -training >> step=2357200, episode=393 reward=0.7673649 (573.90 it/sec) -training >> step=2357300, episode=394 reward=0.7812601 (110.17 it/sec) -training >> step=2357400, episode=394 reward=0.7595005 (375.93 it/sec) -training >> step=2357500, episode=394 reward=0.7644604 (511.90 it/sec) -training >> step=2357600, episode=394 reward=0.7609988 (523.70 it/sec) -training >> step=2357700, episode=394 reward=0.772565 (549.60 it/sec) -training >> step=2357800, episode=394 reward=0.7680224 (546.78 it/sec) -training >> step=2357900, episode=394 reward=0.7667285 (515.50 it/sec) -training >> step=2358000, episode=394 reward=0.7690116 (525.47 it/sec) -training >> step=2358100, episode=394 reward=0.7644414 (520.57 it/sec) -training >> step=2358200, episode=394 reward=0.7612833 (567.57 it/sec) -training >> step=2358300, episode=394 reward=0.7908105 (548.11 it/sec) -training >> step=2358400, episode=394 reward=0.7772459 (535.44 it/sec) -training >> step=2358500, episode=394 reward=0.765757 (530.94 it/sec) -training >> step=2358600, episode=394 reward=0.7631041 (528.04 it/sec) -training >> step=2358700, episode=394 reward=0.7823272 (524.83 it/sec) -training >> step=2358800, episode=394 reward=0.7774043 (547.30 it/sec) -training >> step=2358900, episode=394 reward=0.7556332 (561.89 it/sec) -training >> step=2359000, episode=394 reward=0.7460067 (544.93 it/sec) -training >> step=2359100, episode=394 reward=0.7638924 (564.52 it/sec) -training >> step=2359200, episode=394 reward=0.7577964 (525.58 it/sec) -training >> step=2359300, episode=394 reward=0.7613014 (543.11 it/sec) -training >> step=2359400, episode=394 reward=0.7870907 (558.51 it/sec) -training >> step=2359500, episode=394 reward=0.745718 (588.81 it/sec) -training >> step=2359600, episode=394 reward=0.7739764 (548.00 it/sec) -training >> step=2359700, episode=394 reward=0.7864651 (552.40 it/sec) -training >> step=2359800, episode=394 reward=0.7672794 (517.99 it/sec) -training >> step=2359900, episode=394 reward=0.77736 (552.93 it/sec) -training >> step=2360000, episode=394 reward=0.7701324 (534.93 it/sec) -training >> step=2360100, episode=394 reward=0.7719276 (557.73 it/sec) -training >> step=2360200, episode=394 reward=0.7479528 (562.04 it/sec) -training >> step=2360300, episode=394 reward=0.7611507 (542.96 it/sec) -training >> step=2360400, episode=394 reward=0.7776718 (487.18 it/sec) -training >> step=2360500, episode=394 reward=0.7566574 (573.19 it/sec) -training >> step=2360600, episode=394 reward=0.7718945 (534.10 it/sec) -training >> step=2360700, episode=394 reward=0.7624065 (568.52 it/sec) -training >> step=2360800, episode=394 reward=0.757852 (529.29 it/sec) -training >> step=2360900, episode=394 reward=0.7313647 (556.43 it/sec) -training >> step=2361000, episode=394 reward=0.7817661 (544.21 it/sec) -training >> step=2361100, episode=394 reward=0.7746004 (565.47 it/sec) -training >> step=2361200, episode=394 reward=0.7642857 (538.08 it/sec) -training >> step=2361300, episode=394 reward=0.7685234 (567.70 it/sec) -training >> step=2361400, episode=394 reward=0.7740842 (549.40 it/sec) -training >> step=2361500, episode=394 reward=0.7735033 (477.99 it/sec) -training >> step=2361600, episode=394 reward=0.7644702 (550.70 it/sec) -training >> step=2361700, episode=394 reward=0.7876753 (586.76 it/sec) -training >> step=2361800, episode=394 reward=0.7656031 (554.04 it/sec) -training >> step=2361900, episode=394 reward=0.7625266 (564.41 it/sec) -training >> step=2362000, episode=394 reward=0.7843901 (553.88 it/sec) -training >> step=2362100, episode=394 reward=0.7670527 (551.19 it/sec) -training >> step=2362200, episode=394 reward=0.7777324 (544.02 it/sec) -training >> step=2362300, episode=394 reward=0.7835121 (570.41 it/sec) -training >> step=2362400, episode=394 reward=0.7621899 (520.31 it/sec) -training >> step=2362500, episode=394 reward=0.746707 (560.55 it/sec) -training >> step=2362600, episode=394 reward=0.7527101 (548.72 it/sec) -training >> step=2362700, episode=394 reward=0.7623615 (571.91 it/sec) -training >> step=2362800, episode=394 reward=0.7701479 (558.97 it/sec) -training >> step=2362900, episode=394 reward=0.7450175 (571.08 it/sec) -training >> step=2363000, episode=394 reward=0.7719133 (544.18 it/sec) -training >> step=2363100, episode=394 reward=0.7647048 (585.33 it/sec) -training >> step=2363200, episode=394 reward=0.734267 (554.29 it/sec) -training >> step=2363300, episode=395 reward=0.7723683 (97.71 it/sec) -training >> step=2363400, episode=395 reward=0.7555385 (541.96 it/sec) -training >> step=2363500, episode=395 reward=0.7603928 (557.46 it/sec) -training >> step=2363600, episode=395 reward=0.7808993 (585.15 it/sec) -training >> step=2363700, episode=395 reward=0.7569524 (588.30 it/sec) -training >> step=2363800, episode=395 reward=0.7639752 (573.08 it/sec) -training >> step=2363900, episode=395 reward=0.7689609 (549.23 it/sec) -training >> step=2364000, episode=395 reward=0.774345 (617.04 it/sec) -training >> step=2364100, episode=395 reward=0.7778558 (574.58 it/sec) -training >> step=2364200, episode=395 reward=0.754308 (603.72 it/sec) -training >> step=2364300, episode=395 reward=0.7660772 (596.95 it/sec) -training >> step=2364400, episode=395 reward=0.7618443 (611.88 it/sec) -training >> step=2364500, episode=395 reward=0.7647653 (527.38 it/sec) -training >> step=2364600, episode=395 reward=0.7843211 (592.34 it/sec) -training >> step=2364700, episode=395 reward=0.7643425 (594.32 it/sec) -training >> step=2364800, episode=395 reward=0.7514248 (594.44 it/sec) -training >> step=2364900, episode=395 reward=0.76612 (599.79 it/sec) -training >> step=2365000, episode=395 reward=0.7742545 (576.33 it/sec) -training >> step=2365100, episode=395 reward=0.7498927 (545.37 it/sec) -training >> step=2365200, episode=395 reward=0.7779094 (594.93 it/sec) -training >> step=2365300, episode=395 reward=0.7316154 (558.12 it/sec) -training >> step=2365400, episode=395 reward=0.7532816 (571.89 it/sec) -training >> step=2365500, episode=395 reward=0.7885047 (593.27 it/sec) -training >> step=2365600, episode=395 reward=0.7893169 (576.95 it/sec) -training >> step=2365700, episode=395 reward=0.771358 (532.57 it/sec) -training >> step=2365800, episode=395 reward=0.7902232 (569.52 it/sec) -training >> step=2365900, episode=395 reward=0.7467218 (565.75 it/sec) -training >> step=2366000, episode=395 reward=0.775546 (579.09 it/sec) -training >> step=2366100, episode=395 reward=0.7842956 (568.32 it/sec) -training >> step=2366200, episode=395 reward=0.7887399 (535.64 it/sec) -training >> step=2366300, episode=395 reward=0.7937783 (538.16 it/sec) -training >> step=2366400, episode=395 reward=0.7744561 (562.67 it/sec) -training >> step=2366500, episode=395 reward=0.7322127 (548.33 it/sec) -training >> step=2366600, episode=395 reward=0.7562711 (594.85 it/sec) -training >> step=2366700, episode=395 reward=0.7494071 (570.03 it/sec) -training >> step=2366800, episode=395 reward=0.7505057 (568.54 it/sec) -training >> step=2366900, episode=395 reward=0.7526647 (529.69 it/sec) -training >> step=2367000, episode=395 reward=0.7781191 (606.97 it/sec) -training >> step=2367100, episode=395 reward=0.7641068 (570.87 it/sec) -training >> step=2367200, episode=395 reward=0.7617442 (559.88 it/sec) -training >> step=2367300, episode=395 reward=0.7562938 (564.02 it/sec) -training >> step=2367400, episode=395 reward=0.7642875 (577.90 it/sec) -training >> step=2367500, episode=395 reward=0.7611615 (534.26 it/sec) -training >> step=2367600, episode=395 reward=0.7694891 (559.63 it/sec) -training >> step=2367700, episode=395 reward=0.7648672 (582.95 it/sec) -training >> step=2367800, episode=395 reward=0.7724726 (531.57 it/sec) -training >> step=2367900, episode=395 reward=0.7365114 (534.80 it/sec) -training >> step=2368000, episode=395 reward=0.7713068 (517.08 it/sec) -training >> step=2368100, episode=395 reward=0.7772007 (577.42 it/sec) -training >> step=2368200, episode=395 reward=0.7678334 (521.85 it/sec) -training >> step=2368300, episode=395 reward=0.7641003 (565.22 it/sec) -training >> step=2368400, episode=395 reward=0.7501441 (579.30 it/sec) -training >> step=2368500, episode=395 reward=0.765598 (573.65 it/sec) -training >> step=2368600, episode=395 reward=0.7414669 (547.82 it/sec) -training >> step=2368700, episode=395 reward=0.7730108 (555.57 it/sec) -training >> step=2368800, episode=395 reward=0.7598318 (606.42 it/sec) -training >> step=2368900, episode=395 reward=0.7552354 (583.29 it/sec) -training >> step=2369000, episode=395 reward=0.7550358 (514.26 it/sec) -training >> step=2369100, episode=395 reward=0.730486 (549.92 it/sec) -training >> step=2369200, episode=395 reward=0.772382 (558.83 it/sec) -training >> step=2369300, episode=396 reward=0.7860773 (158.07 it/sec) -training >> step=2369400, episode=396 reward=0.7654853 (536.62 it/sec) -training >> step=2369500, episode=396 reward=0.76681 (490.22 it/sec) -training >> step=2369600, episode=396 reward=0.7624883 (537.92 it/sec) -training >> step=2369700, episode=396 reward=0.7735596 (566.78 it/sec) -training >> step=2369800, episode=396 reward=0.7613451 (548.95 it/sec) -training >> step=2369900, episode=396 reward=0.7595624 (528.02 it/sec) -training >> step=2370000, episode=396 reward=0.7416606 (553.91 it/sec) -training >> step=2370100, episode=396 reward=0.7511069 (531.60 it/sec) -training >> step=2370200, episode=396 reward=0.7699503 (554.49 it/sec) -training >> step=2370300, episode=396 reward=0.7691048 (564.63 it/sec) -training >> step=2370400, episode=396 reward=0.7525821 (568.36 it/sec) -training >> step=2370500, episode=396 reward=0.7450212 (548.16 it/sec) -training >> step=2370600, episode=396 reward=0.7671351 (566.11 it/sec) -training >> step=2370700, episode=396 reward=0.7764865 (529.32 it/sec) -training >> step=2370800, episode=396 reward=0.7710367 (557.80 it/sec) -training >> step=2370900, episode=396 reward=0.7759422 (550.37 it/sec) -training >> step=2371000, episode=396 reward=0.7342398 (528.17 it/sec) -training >> step=2371100, episode=396 reward=0.7788255 (545.36 it/sec) -training >> step=2371200, episode=396 reward=0.757028 (537.68 it/sec) -training >> step=2371300, episode=396 reward=0.7573768 (560.56 it/sec) -training >> step=2371400, episode=396 reward=0.7620648 (551.11 it/sec) -training >> step=2371500, episode=396 reward=0.7691748 (542.57 it/sec) -training >> step=2371600, episode=396 reward=0.7880579 (554.43 it/sec) -training >> step=2371700, episode=396 reward=0.7547768 (559.58 it/sec) -training >> step=2371800, episode=396 reward=0.7641844 (531.72 it/sec) -training >> step=2371900, episode=396 reward=0.7641084 (552.14 it/sec) -training >> step=2372000, episode=396 reward=0.7803854 (550.00 it/sec) -training >> step=2372100, episode=396 reward=0.7549828 (505.67 it/sec) -training >> step=2372200, episode=396 reward=0.7613949 (593.46 it/sec) -training >> step=2372300, episode=396 reward=0.7644442 (524.19 it/sec) -training >> step=2372400, episode=396 reward=0.7759518 (502.06 it/sec) -training >> step=2372500, episode=396 reward=0.763024 (585.27 it/sec) -training >> step=2372600, episode=396 reward=0.7610476 (515.69 it/sec) -training >> step=2372700, episode=396 reward=0.7583694 (496.83 it/sec) -training >> step=2372800, episode=396 reward=0.7773625 (578.15 it/sec) -training >> step=2372900, episode=396 reward=0.7631766 (482.32 it/sec) -training >> step=2373000, episode=396 reward=0.7633723 (503.21 it/sec) -training >> step=2373100, episode=396 reward=0.7783941 (562.94 it/sec) -training >> step=2373200, episode=396 reward=0.7554687 (559.78 it/sec) -training >> step=2373300, episode=396 reward=0.7563566 (522.42 it/sec) -training >> step=2373400, episode=396 reward=0.7842445 (523.83 it/sec) -training >> step=2373500, episode=396 reward=0.7499235 (454.42 it/sec) -training >> step=2373600, episode=396 reward=0.7515947 (592.87 it/sec) -training >> step=2373700, episode=396 reward=0.7938664 (560.03 it/sec) -training >> step=2373800, episode=396 reward=0.752103 (525.82 it/sec) -training >> step=2373900, episode=396 reward=0.7791696 (550.76 it/sec) -training >> step=2374000, episode=396 reward=0.7630391 (468.38 it/sec) -training >> step=2374100, episode=396 reward=0.7619046 (557.05 it/sec) -training >> step=2374200, episode=396 reward=0.7644351 (561.59 it/sec) -training >> step=2374300, episode=396 reward=0.7542382 (581.57 it/sec) -training >> step=2374400, episode=396 reward=0.7517692 (570.05 it/sec) -training >> step=2374500, episode=396 reward=0.7726155 (555.28 it/sec) -training >> step=2374600, episode=396 reward=0.7620641 (501.73 it/sec) -training >> step=2374700, episode=396 reward=0.7716521 (543.51 it/sec) -training >> step=2374800, episode=396 reward=0.7728189 (538.27 it/sec) -training >> step=2374900, episode=396 reward=0.7537276 (532.87 it/sec) -training >> step=2375000, episode=396 reward=0.7831445 (528.18 it/sec) -training >> step=2375100, episode=396 reward=0.7785056 (581.31 it/sec) -training >> step=2375200, episode=396 reward=0.7523378 (516.27 it/sec) -training >> step=2375300, episode=397 reward=0.7440904 (93.02 it/sec) -training >> step=2375400, episode=397 reward=0.7601017 (285.48 it/sec) -training >> step=2375500, episode=397 reward=0.7476013 (531.67 it/sec) -training >> step=2375600, episode=397 reward=0.7552651 (537.76 it/sec) -training >> step=2375700, episode=397 reward=0.7556728 (528.62 it/sec) -training >> step=2375800, episode=397 reward=0.7522249 (530.65 it/sec) -training >> step=2375900, episode=397 reward=0.774443 (525.84 it/sec) -training >> step=2376000, episode=397 reward=0.7697743 (588.41 it/sec) -training >> step=2376100, episode=397 reward=0.7756904 (551.82 it/sec) -training >> step=2376200, episode=397 reward=0.7530716 (562.06 it/sec) -training >> step=2376300, episode=397 reward=0.74929 (502.85 it/sec) -training >> step=2376400, episode=397 reward=0.7797337 (542.06 it/sec) -training >> step=2376500, episode=397 reward=0.7809932 (573.60 it/sec) -training >> step=2376600, episode=397 reward=0.7732649 (564.12 it/sec) -training >> step=2376700, episode=397 reward=0.7630704 (574.07 it/sec) -training >> step=2376800, episode=397 reward=0.7830094 (533.30 it/sec) -training >> step=2376900, episode=397 reward=0.7817363 (602.46 it/sec) -training >> step=2377000, episode=397 reward=0.7511418 (561.53 it/sec) -training >> step=2377100, episode=397 reward=0.7824265 (534.63 it/sec) -training >> step=2377200, episode=397 reward=0.7661304 (568.11 it/sec) -training >> step=2377300, episode=397 reward=0.7704733 (555.53 it/sec) -training >> step=2377400, episode=397 reward=0.7658733 (539.96 it/sec) -training >> step=2377500, episode=397 reward=0.7662285 (553.47 it/sec) -training >> step=2377600, episode=397 reward=0.7596254 (559.32 it/sec) -training >> step=2377700, episode=397 reward=0.7850109 (545.03 it/sec) -training >> step=2377800, episode=397 reward=0.7460814 (548.45 it/sec) -training >> step=2377900, episode=397 reward=0.7420444 (550.36 it/sec) -training >> step=2378000, episode=397 reward=0.7853115 (509.06 it/sec) -training >> step=2378100, episode=397 reward=0.7639876 (573.14 it/sec) -training >> step=2378200, episode=397 reward=0.769923 (556.66 it/sec) -training >> step=2378300, episode=397 reward=0.7704192 (537.90 it/sec) -training >> step=2378400, episode=397 reward=0.7599395 (538.97 it/sec) -training >> step=2378500, episode=397 reward=0.7431713 (527.43 it/sec) -training >> step=2378600, episode=397 reward=0.7603842 (502.42 it/sec) -training >> step=2378700, episode=397 reward=0.731072 (567.23 it/sec) -training >> step=2378800, episode=397 reward=0.7645928 (556.92 it/sec) -training >> step=2378900, episode=397 reward=0.7547513 (548.44 it/sec) -training >> step=2379000, episode=397 reward=0.7630767 (557.36 it/sec) -training >> step=2379100, episode=397 reward=0.7657947 (526.87 it/sec) -training >> step=2379200, episode=397 reward=0.756979 (537.09 it/sec) -training >> step=2379300, episode=397 reward=0.7518826 (546.05 it/sec) -training >> step=2379400, episode=397 reward=0.7663017 (592.31 it/sec) -training >> step=2379500, episode=397 reward=0.7647379 (543.38 it/sec) -training >> step=2379600, episode=397 reward=0.750325 (536.64 it/sec) -training >> step=2379700, episode=397 reward=0.7592941 (517.39 it/sec) -training >> step=2379800, episode=397 reward=0.7606028 (579.40 it/sec) -training >> step=2379900, episode=397 reward=0.7673028 (535.75 it/sec) -training >> step=2380000, episode=397 reward=0.7713113 (590.27 it/sec) -training >> step=2380100, episode=397 reward=0.7462528 (565.48 it/sec) -training >> step=2380200, episode=397 reward=0.7598198 (555.03 it/sec) -training >> step=2380300, episode=397 reward=0.7704264 (529.95 it/sec) -training >> step=2380400, episode=397 reward=0.7953474 (571.95 it/sec) -training >> step=2380500, episode=397 reward=0.7381608 (585.68 it/sec) -training >> step=2380600, episode=397 reward=0.7561972 (541.53 it/sec) -training >> step=2380700, episode=397 reward=0.7650419 (560.05 it/sec) -training >> step=2380800, episode=397 reward=0.7639828 (538.81 it/sec) -training >> step=2380900, episode=397 reward=0.7579987 (559.52 it/sec) -training >> step=2381000, episode=397 reward=0.7787644 (552.30 it/sec) -training >> step=2381100, episode=397 reward=0.7384449 (555.79 it/sec) -training >> step=2381200, episode=397 reward=0.7739078 (597.42 it/sec) -training >> step=2381300, episode=398 reward=0.75229 (65.82 it/sec) -training >> step=2381400, episode=398 reward=0.7640237 (551.67 it/sec) -training >> step=2381500, episode=398 reward=0.7525401 (531.85 it/sec) -training >> step=2381600, episode=398 reward=0.7691368 (549.51 it/sec) -training >> step=2381700, episode=398 reward=0.7636626 (533.28 it/sec) -training >> step=2381800, episode=398 reward=0.7724807 (528.85 it/sec) -training >> step=2381900, episode=398 reward=0.7728535 (574.52 it/sec) -training >> step=2382000, episode=398 reward=0.7594174 (583.90 it/sec) -training >> step=2382100, episode=398 reward=0.7720433 (554.18 it/sec) -training >> step=2382200, episode=398 reward=0.7525015 (561.61 it/sec) -training >> step=2382300, episode=398 reward=0.763791 (578.74 it/sec) -training >> step=2382400, episode=398 reward=0.7499853 (529.10 it/sec) -training >> step=2382500, episode=398 reward=0.7514539 (580.13 it/sec) -training >> step=2382600, episode=398 reward=0.7830667 (576.38 it/sec) -training >> step=2382700, episode=398 reward=0.7556584 (568.06 it/sec) -training >> step=2382800, episode=398 reward=0.7474678 (571.09 it/sec) -training >> step=2382900, episode=398 reward=0.7662244 (551.08 it/sec) -training >> step=2383000, episode=398 reward=0.7585047 (476.88 it/sec) -training >> step=2383100, episode=398 reward=0.7485673 (487.16 it/sec) -training >> step=2383200, episode=398 reward=0.7729927 (547.62 it/sec) -training >> step=2383300, episode=398 reward=0.7830802 (581.12 it/sec) -training >> step=2383400, episode=398 reward=0.7342651 (566.86 it/sec) -training >> step=2383500, episode=398 reward=0.7575814 (454.27 it/sec) -training >> step=2383600, episode=398 reward=0.7610828 (555.16 it/sec) -training >> step=2383700, episode=398 reward=0.741702 (572.71 it/sec) -training >> step=2383800, episode=398 reward=0.774148 (538.58 it/sec) -training >> step=2383900, episode=398 reward=0.7499203 (560.84 it/sec) -training >> step=2384000, episode=398 reward=0.7775881 (521.14 it/sec) -training >> step=2384100, episode=398 reward=0.7786701 (583.62 it/sec) -training >> step=2384200, episode=398 reward=0.7792024 (558.75 it/sec) -training >> step=2384300, episode=398 reward=0.7622207 (534.34 it/sec) -training >> step=2384400, episode=398 reward=0.751843 (561.31 it/sec) -training >> step=2384500, episode=398 reward=0.7755709 (589.81 it/sec) -training >> step=2384600, episode=398 reward=0.7800403 (538.65 it/sec) -training >> step=2384700, episode=398 reward=0.7804406 (546.40 it/sec) -training >> step=2384800, episode=398 reward=0.7688423 (552.70 it/sec) -training >> step=2384900, episode=398 reward=0.7363423 (554.07 it/sec) -training >> step=2385000, episode=398 reward=0.7678788 (576.74 it/sec) -training >> step=2385100, episode=398 reward=0.7745988 (529.19 it/sec) -training >> step=2385200, episode=398 reward=0.7329558 (506.47 it/sec) -training >> step=2385300, episode=398 reward=0.756227 (575.36 it/sec) -training >> step=2385400, episode=398 reward=0.7480485 (527.51 it/sec) -training >> step=2385500, episode=398 reward=0.7814418 (580.00 it/sec) -training >> step=2385600, episode=398 reward=0.7740305 (548.09 it/sec) -training >> step=2385700, episode=398 reward=0.7476379 (575.69 it/sec) -training >> step=2385800, episode=398 reward=0.7797012 (509.47 it/sec) -training >> step=2385900, episode=398 reward=0.7302435 (538.83 it/sec) -training >> step=2386000, episode=398 reward=0.7730829 (586.25 it/sec) -training >> step=2386100, episode=398 reward=0.7620581 (562.08 it/sec) -training >> step=2386200, episode=398 reward=0.7739102 (522.94 it/sec) -training >> step=2386300, episode=398 reward=0.7438315 (516.22 it/sec) -training >> step=2386400, episode=398 reward=0.7626572 (569.90 it/sec) -training >> step=2386500, episode=398 reward=0.7699107 (531.46 it/sec) -training >> step=2386600, episode=398 reward=0.7691951 (575.89 it/sec) -training >> step=2386700, episode=398 reward=0.7447451 (553.43 it/sec) -training >> step=2386800, episode=398 reward=0.7615118 (569.85 it/sec) -training >> step=2386900, episode=398 reward=0.7511867 (533.63 it/sec) -training >> step=2387000, episode=398 reward=0.7695113 (532.30 it/sec) -training >> step=2387100, episode=398 reward=0.7623764 (570.66 it/sec) -training >> step=2387200, episode=398 reward=0.7353674 (561.90 it/sec) -training >> step=2387300, episode=399 reward=0.7670841 (72.22 it/sec) -training >> step=2387400, episode=399 reward=0.7579395 (512.97 it/sec) -training >> step=2387500, episode=399 reward=0.759831 (526.11 it/sec) -training >> step=2387600, episode=399 reward=0.770335 (514.92 it/sec) -training >> step=2387700, episode=399 reward=0.7730417 (527.11 it/sec) -training >> step=2387800, episode=399 reward=0.7768326 (537.98 it/sec) -training >> step=2387900, episode=399 reward=0.7656231 (566.28 it/sec) -training >> step=2388000, episode=399 reward=0.7630305 (546.03 it/sec) -training >> step=2388100, episode=399 reward=0.7596079 (557.39 it/sec) -training >> step=2388200, episode=399 reward=0.7655111 (568.09 it/sec) -training >> step=2388300, episode=399 reward=0.7504353 (524.68 it/sec) -training >> step=2388400, episode=399 reward=0.754621 (541.36 it/sec) -training >> step=2388500, episode=399 reward=0.7673022 (532.54 it/sec) -training >> step=2388600, episode=399 reward=0.7778117 (572.85 it/sec) -training >> step=2388700, episode=399 reward=0.7824472 (556.90 it/sec) -training >> step=2388800, episode=399 reward=0.7649307 (527.36 it/sec) -training >> step=2388900, episode=399 reward=0.7756422 (557.27 it/sec) -training >> step=2389000, episode=399 reward=0.7611327 (559.13 it/sec) -training >> step=2389100, episode=399 reward=0.7659431 (558.21 it/sec) -training >> step=2389200, episode=399 reward=0.7616022 (531.02 it/sec) -training >> step=2389300, episode=399 reward=0.7730724 (582.18 it/sec) -training >> step=2389400, episode=399 reward=0.7577674 (563.23 it/sec) -training >> step=2389500, episode=399 reward=0.7492779 (522.92 it/sec) -training >> step=2389600, episode=399 reward=0.7601001 (543.47 it/sec) -training >> step=2389700, episode=399 reward=0.7568668 (574.67 it/sec) -training >> step=2389800, episode=399 reward=0.7637752 (564.15 it/sec) -training >> step=2389900, episode=399 reward=0.7966585 (551.38 it/sec) -training >> step=2390000, episode=399 reward=0.7747422 (494.20 it/sec) -training >> step=2390100, episode=399 reward=0.7793976 (543.60 it/sec) -training >> step=2390200, episode=399 reward=0.7710673 (552.80 it/sec) -training >> step=2390300, episode=399 reward=0.7733298 (528.79 it/sec) -training >> step=2390400, episode=399 reward=0.7580886 (567.78 it/sec) -training >> step=2390500, episode=399 reward=0.7562666 (550.24 it/sec) -training >> step=2390600, episode=399 reward=0.759605 (521.60 it/sec) -training >> step=2390700, episode=399 reward=0.7745289 (540.52 it/sec) -training >> step=2390800, episode=399 reward=0.7574806 (559.11 it/sec) -training >> step=2390900, episode=399 reward=0.7681756 (589.23 it/sec) -training >> step=2391000, episode=399 reward=0.7783132 (581.96 it/sec) -training >> step=2391100, episode=399 reward=0.7622151 (551.13 it/sec) -training >> step=2391200, episode=399 reward=0.7456235 (576.70 it/sec) -training >> step=2391300, episode=399 reward=0.7615837 (560.59 it/sec) -training >> step=2391400, episode=399 reward=0.7640457 (533.11 it/sec) -training >> step=2391500, episode=399 reward=0.7347319 (559.68 it/sec) -training >> step=2391600, episode=399 reward=0.7821363 (546.96 it/sec) -training >> step=2391700, episode=399 reward=0.7689084 (545.69 it/sec) -training >> step=2391800, episode=399 reward=0.7770655 (547.86 it/sec) -training >> step=2391900, episode=399 reward=0.7680129 (516.32 it/sec) -training >> step=2392000, episode=399 reward=0.7767376 (603.77 it/sec) -training >> step=2392100, episode=399 reward=0.7557898 (553.85 it/sec) -training >> step=2392200, episode=399 reward=0.7639099 (552.85 it/sec) -training >> step=2392300, episode=399 reward=0.7696656 (561.64 it/sec) -training >> step=2392400, episode=399 reward=0.7711228 (536.31 it/sec) -training >> step=2392500, episode=399 reward=0.7661269 (553.60 it/sec) -training >> step=2392600, episode=399 reward=0.762586 (546.96 it/sec) -training >> step=2392700, episode=399 reward=0.7489642 (553.46 it/sec) -training >> step=2392800, episode=399 reward=0.7605127 (556.64 it/sec) -training >> step=2392900, episode=399 reward=0.7414736 (552.95 it/sec) -training >> step=2393000, episode=399 reward=0.7519341 (550.68 it/sec) -training >> step=2393100, episode=399 reward=0.7464698 (557.10 it/sec) -training >> step=2393200, episode=399 reward=0.7676149 (573.10 it/sec) -training >> step=2393300, episode=400 reward=0.7616487 (153.35 it/sec) -training >> step=2393400, episode=400 reward=0.7507926 (557.28 it/sec) -training >> step=2393500, episode=400 reward=0.7759068 (557.41 it/sec) -training >> step=2393600, episode=400 reward=0.7539262 (496.77 it/sec) -training >> step=2393700, episode=400 reward=0.7504722 (528.88 it/sec) -training >> step=2393800, episode=400 reward=0.7673538 (527.18 it/sec) -training >> step=2393900, episode=400 reward=0.7651855 (549.54 it/sec) -training >> step=2394000, episode=400 reward=0.7727693 (548.61 it/sec) -training >> step=2394100, episode=400 reward=0.786061 (535.01 it/sec) -training >> step=2394200, episode=400 reward=0.7605614 (542.68 it/sec) -training >> step=2394300, episode=400 reward=0.7655502 (585.72 it/sec) -training >> step=2394400, episode=400 reward=0.76791 (538.74 it/sec) -training >> step=2394500, episode=400 reward=0.7694212 (567.74 it/sec) -training >> step=2394600, episode=400 reward=0.7733048 (552.55 it/sec) -training >> step=2394700, episode=400 reward=0.7715883 (539.64 it/sec) -training >> step=2394800, episode=400 reward=0.7640471 (541.61 it/sec) -training >> step=2394900, episode=400 reward=0.7628565 (538.62 it/sec) -training >> step=2395000, episode=400 reward=0.7554211 (543.98 it/sec) -training >> step=2395100, episode=400 reward=0.7824822 (566.25 it/sec) -training >> step=2395200, episode=400 reward=0.7325342 (556.72 it/sec) -training >> step=2395300, episode=400 reward=0.7541099 (530.60 it/sec) -training >> step=2395400, episode=400 reward=0.7617171 (559.50 it/sec) -training >> step=2395500, episode=400 reward=0.7756179 (548.54 it/sec) -training >> step=2395600, episode=400 reward=0.7746589 (544.57 it/sec) -training >> step=2395700, episode=400 reward=0.7422502 (558.65 it/sec) -training >> step=2395800, episode=400 reward=0.757613 (543.71 it/sec) -training >> step=2395900, episode=400 reward=0.7615418 (553.50 it/sec) -training >> step=2396000, episode=400 reward=0.7755585 (532.20 it/sec) -training >> step=2396100, episode=400 reward=0.7595026 (548.45 it/sec) -training >> step=2396200, episode=400 reward=0.7599908 (573.79 it/sec) -training >> step=2396300, episode=400 reward=0.7462754 (542.05 it/sec) -training >> step=2396400, episode=400 reward=0.7604935 (537.26 it/sec) -training >> step=2396500, episode=400 reward=0.7733757 (532.42 it/sec) -training >> step=2396600, episode=400 reward=0.7557483 (552.85 it/sec) -training >> step=2396700, episode=400 reward=0.7777444 (532.28 it/sec) -training >> step=2396800, episode=400 reward=0.7538173 (561.96 it/sec) -training >> step=2396900, episode=400 reward=0.7715641 (540.74 it/sec) -training >> step=2397000, episode=400 reward=0.7858561 (586.03 it/sec) -training >> step=2397100, episode=400 reward=0.7824312 (532.62 it/sec) -training >> step=2397200, episode=400 reward=0.7688304 (539.33 it/sec) -training >> step=2397300, episode=400 reward=0.7427769 (562.19 it/sec) -training >> step=2397400, episode=400 reward=0.7624499 (555.57 it/sec) -training >> step=2397500, episode=400 reward=0.7886589 (549.38 it/sec) -training >> step=2397600, episode=400 reward=0.7489303 (563.50 it/sec) -training >> step=2397700, episode=400 reward=0.773006 (575.11 it/sec) -training >> step=2397800, episode=400 reward=0.7658186 (520.76 it/sec) -training >> step=2397900, episode=400 reward=0.753334 (508.56 it/sec) -training >> step=2398000, episode=400 reward=0.7634515 (547.08 it/sec) -training >> step=2398100, episode=400 reward=0.7842419 (540.23 it/sec) -training >> step=2398200, episode=400 reward=0.7440478 (568.09 it/sec) -training >> step=2398300, episode=400 reward=0.7762924 (512.46 it/sec) -training >> step=2398400, episode=400 reward=0.753715 (526.55 it/sec) -training >> step=2398500, episode=400 reward=0.7533032 (571.78 it/sec) -training >> step=2398600, episode=400 reward=0.7622539 (523.26 it/sec) -training >> step=2398700, episode=400 reward=0.7613453 (503.17 it/sec) -training >> step=2398800, episode=400 reward=0.7537437 (486.33 it/sec) -training >> step=2398900, episode=400 reward=0.7467582 (507.84 it/sec) -training >> step=2399000, episode=400 reward=0.7398434 (533.77 it/sec) -training >> step=2399100, episode=400 reward=0.7710376 (498.22 it/sec) -training >> step=2399200, episode=400 reward=0.7476157 (560.30 it/sec) -training >> step=2399300, episode=401 reward=0.7650798 (150.12 it/sec) -training >> step=2399400, episode=401 reward=0.7538292 (511.17 it/sec) -training >> step=2399500, episode=401 reward=0.7580349 (514.55 it/sec) -training >> step=2399600, episode=401 reward=0.7436399 (503.76 it/sec) -training >> step=2399700, episode=401 reward=0.7516322 (535.73 it/sec) -training >> step=2399800, episode=401 reward=0.7617368 (571.51 it/sec) -training >> step=2399900, episode=401 reward=0.7437168 (521.73 it/sec) -training >> step=2400000, episode=401 reward=0.7547854 (525.89 it/sec) -training >> step=2400100, episode=401 reward=0.7687593 (583.05 it/sec) -training >> step=2400200, episode=401 reward=0.8031181 (603.25 it/sec) -training >> step=2400300, episode=401 reward=0.777963 (537.79 it/sec) -training >> step=2400400, episode=401 reward=0.7725883 (546.34 it/sec) -training >> step=2400500, episode=401 reward=0.7708585 (572.58 it/sec) -training >> step=2400600, episode=401 reward=0.7586399 (602.22 it/sec) -training >> step=2400700, episode=401 reward=0.7761426 (526.25 it/sec) -training >> step=2400800, episode=401 reward=0.7718145 (567.58 it/sec) -training >> step=2400900, episode=401 reward=0.7727545 (480.20 it/sec) -training >> step=2401000, episode=401 reward=0.7792758 (572.60 it/sec) -training >> step=2401100, episode=401 reward=0.7568269 (540.01 it/sec) -training >> step=2401200, episode=401 reward=0.7769849 (532.00 it/sec) -training >> step=2401300, episode=401 reward=0.7472245 (551.15 it/sec) -training >> step=2401400, episode=401 reward=0.7738197 (541.53 it/sec) -training >> step=2401500, episode=401 reward=0.7604195 (555.45 it/sec) -training >> step=2401600, episode=401 reward=0.7669693 (552.55 it/sec) -training >> step=2401700, episode=401 reward=0.7666539 (574.55 it/sec) -training >> step=2401800, episode=401 reward=0.7862 (538.24 it/sec) -training >> step=2401900, episode=401 reward=0.7498357 (536.21 it/sec) -training >> step=2402000, episode=401 reward=0.7628667 (547.81 it/sec) -training >> step=2402100, episode=401 reward=0.7628189 (548.86 it/sec) -training >> step=2402200, episode=401 reward=0.773876 (546.81 it/sec) -training >> step=2402300, episode=401 reward=0.7718855 (541.93 it/sec) -training >> step=2402400, episode=401 reward=0.7672966 (555.32 it/sec) -training >> step=2402500, episode=401 reward=0.7580056 (543.84 it/sec) -training >> step=2402600, episode=401 reward=0.7656556 (535.51 it/sec) -training >> step=2402700, episode=401 reward=0.7541952 (572.61 it/sec) -training >> step=2402800, episode=401 reward=0.7756392 (581.40 it/sec) -training >> step=2402900, episode=401 reward=0.7651057 (557.55 it/sec) -training >> step=2403000, episode=401 reward=0.7624666 (557.74 it/sec) -training >> step=2403100, episode=401 reward=0.7697634 (528.32 it/sec) -training >> step=2403200, episode=401 reward=0.7636493 (519.70 it/sec) -training >> step=2403300, episode=401 reward=0.7720041 (535.40 it/sec) -training >> step=2403400, episode=401 reward=0.782806 (576.14 it/sec) -training >> step=2403500, episode=401 reward=0.7566668 (551.09 it/sec) -training >> step=2403600, episode=401 reward=0.7683372 (578.26 it/sec) -training >> step=2403700, episode=401 reward=0.777075 (504.72 it/sec) -training >> step=2403800, episode=401 reward=0.7554082 (569.82 it/sec) -training >> step=2403900, episode=401 reward=0.7621385 (507.37 it/sec) -training >> step=2404000, episode=401 reward=0.763346 (510.27 it/sec) -training >> step=2404100, episode=401 reward=0.7688084 (526.69 it/sec) -training >> step=2404200, episode=401 reward=0.7781734 (557.81 it/sec) -training >> step=2404300, episode=401 reward=0.7605285 (526.67 it/sec) -training >> step=2404400, episode=401 reward=0.7801456 (533.81 it/sec) -training >> step=2404500, episode=401 reward=0.7624667 (569.69 it/sec) -training >> step=2404600, episode=401 reward=0.7590999 (543.93 it/sec) -training >> step=2404700, episode=401 reward=0.7493034 (578.74 it/sec) -training >> step=2404800, episode=401 reward=0.7524057 (527.24 it/sec) -training >> step=2404900, episode=401 reward=0.7601165 (557.75 it/sec) -training >> step=2405000, episode=401 reward=0.7731605 (542.08 it/sec) -training >> step=2405100, episode=401 reward=0.7605712 (562.51 it/sec) -training >> step=2405200, episode=401 reward=0.7586918 (542.76 it/sec) -training >> step=2405300, episode=402 reward=0.7903788 (124.22 it/sec) -training >> step=2405400, episode=402 reward=0.7760785 (557.92 it/sec) -training >> step=2405500, episode=402 reward=0.778442 (547.60 it/sec) -training >> step=2405600, episode=402 reward=0.757495 (496.86 it/sec) -training >> step=2405700, episode=402 reward=0.7512388 (527.89 it/sec) -training >> step=2405800, episode=402 reward=0.7583635 (604.74 it/sec) -training >> step=2405900, episode=402 reward=0.7716757 (561.65 it/sec) -training >> step=2406000, episode=402 reward=0.7675894 (572.76 it/sec) -training >> step=2406100, episode=402 reward=0.7779024 (579.46 it/sec) -training >> step=2406200, episode=402 reward=0.7721281 (586.86 it/sec) -training >> step=2406300, episode=402 reward=0.7884009 (587.25 it/sec) -training >> step=2406400, episode=402 reward=0.7835763 (553.03 it/sec) -training >> step=2406500, episode=402 reward=0.751578 (531.61 it/sec) -training >> step=2406600, episode=402 reward=0.7479048 (579.06 it/sec) -training >> step=2406700, episode=402 reward=0.7557011 (522.58 it/sec) -training >> step=2406800, episode=402 reward=0.7713965 (537.35 it/sec) -training >> step=2406900, episode=402 reward=0.7393216 (579.87 it/sec) -training >> step=2407000, episode=402 reward=0.758667 (563.35 it/sec) -training >> step=2407100, episode=402 reward=0.7655044 (539.67 it/sec) -training >> step=2407200, episode=402 reward=0.7851079 (589.66 it/sec) -training >> step=2407300, episode=402 reward=0.7605467 (525.85 it/sec) -training >> step=2407400, episode=402 reward=0.7571563 (566.67 it/sec) -training >> step=2407500, episode=402 reward=0.7612455 (565.55 it/sec) -training >> step=2407600, episode=402 reward=0.7448386 (540.16 it/sec) -training >> step=2407700, episode=402 reward=0.7795603 (572.74 it/sec) -training >> step=2407800, episode=402 reward=0.7863441 (545.70 it/sec) -training >> step=2407900, episode=402 reward=0.7596818 (541.35 it/sec) -training >> step=2408000, episode=402 reward=0.7826519 (567.41 it/sec) -training >> step=2408100, episode=402 reward=0.7853498 (564.52 it/sec) -training >> step=2408200, episode=402 reward=0.7747987 (526.62 it/sec) -training >> step=2408300, episode=402 reward=0.7795031 (532.96 it/sec) -training >> step=2408400, episode=402 reward=0.759864 (528.41 it/sec) -training >> step=2408500, episode=402 reward=0.7663954 (531.04 it/sec) -training >> step=2408600, episode=402 reward=0.7547263 (542.34 it/sec) -training >> step=2408700, episode=402 reward=0.7718165 (531.78 it/sec) -training >> step=2408800, episode=402 reward=0.7610465 (535.48 it/sec) -training >> step=2408900, episode=402 reward=0.773688 (540.29 it/sec) -training >> step=2409000, episode=402 reward=0.7624933 (504.83 it/sec) -training >> step=2409100, episode=402 reward=0.7517206 (511.38 it/sec) -training >> step=2409200, episode=402 reward=0.7641383 (492.73 it/sec) -training >> step=2409300, episode=402 reward=0.7779573 (513.68 it/sec) -training >> step=2409400, episode=402 reward=0.7606904 (558.22 it/sec) -training >> step=2409500, episode=402 reward=0.7756362 (512.91 it/sec) -training >> step=2409600, episode=402 reward=0.7882387 (518.18 it/sec) -training >> step=2409700, episode=402 reward=0.7571941 (532.22 it/sec) -training >> step=2409800, episode=402 reward=0.7536879 (538.62 it/sec) -training >> step=2409900, episode=402 reward=0.7465298 (576.87 it/sec) -training >> step=2410000, episode=402 reward=0.747409 (555.18 it/sec) -training >> step=2410100, episode=402 reward=0.7700528 (522.86 it/sec) -training >> step=2410200, episode=402 reward=0.7565582 (544.28 it/sec) -training >> step=2410300, episode=402 reward=0.7488356 (541.32 it/sec) -training >> step=2410400, episode=402 reward=0.7598341 (529.19 it/sec) -training >> step=2410500, episode=402 reward=0.7615311 (526.18 it/sec) -training >> step=2410600, episode=402 reward=0.7738742 (528.49 it/sec) -training >> step=2410700, episode=402 reward=0.7684999 (569.65 it/sec) -training >> step=2410800, episode=402 reward=0.7281243 (530.31 it/sec) -training >> step=2410900, episode=402 reward=0.7737432 (523.13 it/sec) -training >> step=2411000, episode=402 reward=0.7429109 (565.89 it/sec) -training >> step=2411100, episode=402 reward=0.7596405 (570.72 it/sec) -training >> step=2411200, episode=402 reward=0.7575516 (515.43 it/sec) -training >> step=2411300, episode=403 reward=0.7637057 (128.76 it/sec) -training >> step=2411400, episode=403 reward=0.7480106 (505.94 it/sec) -training >> step=2411500, episode=403 reward=0.7749164 (544.84 it/sec) -training >> step=2411600, episode=403 reward=0.7503752 (549.21 it/sec) -training >> step=2411700, episode=403 reward=0.7696396 (549.22 it/sec) -training >> step=2411800, episode=403 reward=0.7738292 (547.99 it/sec) -training >> step=2411900, episode=403 reward=0.773636 (543.61 it/sec) -training >> step=2412000, episode=403 reward=0.7408308 (582.94 it/sec) -training >> step=2412100, episode=403 reward=0.7594261 (563.40 it/sec) -training >> step=2412200, episode=403 reward=0.7625815 (564.82 it/sec) -training >> step=2412300, episode=403 reward=0.7691579 (567.67 it/sec) -training >> step=2412400, episode=403 reward=0.7637025 (578.68 it/sec) -training >> step=2412500, episode=403 reward=0.7649819 (534.06 it/sec) -training >> step=2412600, episode=403 reward=0.774151 (549.65 it/sec) -training >> step=2412700, episode=403 reward=0.770216 (580.61 it/sec) -training >> step=2412800, episode=403 reward=0.7831417 (593.81 it/sec) -training >> step=2412900, episode=403 reward=0.7604955 (535.94 it/sec) -training >> step=2413000, episode=403 reward=0.7713145 (570.47 it/sec) -training >> step=2413100, episode=403 reward=0.7786591 (557.10 it/sec) -training >> step=2413200, episode=403 reward=0.7709779 (529.03 it/sec) -training >> step=2413300, episode=403 reward=0.7904952 (543.90 it/sec) -training >> step=2413400, episode=403 reward=0.7639647 (550.33 it/sec) -training >> step=2413500, episode=403 reward=0.7600762 (549.72 it/sec) -training >> step=2413600, episode=403 reward=0.7479106 (531.15 it/sec) -training >> step=2413700, episode=403 reward=0.7482967 (503.71 it/sec) -training >> step=2413800, episode=403 reward=0.7305768 (541.91 it/sec) -training >> step=2413900, episode=403 reward=0.7791456 (581.73 it/sec) -training >> step=2414000, episode=403 reward=0.7575141 (545.17 it/sec) -training >> step=2414100, episode=403 reward=0.7934188 (549.45 it/sec) -training >> step=2414200, episode=403 reward=0.755339 (511.74 it/sec) -training >> step=2414300, episode=403 reward=0.767138 (464.84 it/sec) -training >> step=2414400, episode=403 reward=0.7810473 (540.96 it/sec) -training >> step=2414500, episode=403 reward=0.7421407 (539.87 it/sec) -training >> step=2414600, episode=403 reward=0.7746263 (580.59 it/sec) -training >> step=2414700, episode=403 reward=0.7758186 (529.53 it/sec) -training >> step=2414800, episode=403 reward=0.7528707 (518.23 it/sec) -training >> step=2414900, episode=403 reward=0.7461786 (542.11 it/sec) -training >> step=2415000, episode=403 reward=0.7751753 (539.66 it/sec) -training >> step=2415100, episode=403 reward=0.7620645 (531.50 it/sec) -training >> step=2415200, episode=403 reward=0.7768056 (516.03 it/sec) -training >> step=2415300, episode=403 reward=0.7746844 (536.33 it/sec) -training >> step=2415400, episode=403 reward=0.7801271 (557.83 it/sec) -training >> step=2415500, episode=403 reward=0.771204 (573.06 it/sec) -training >> step=2415600, episode=403 reward=0.7542413 (533.43 it/sec) -training >> step=2415700, episode=403 reward=0.7732983 (553.10 it/sec) -training >> step=2415800, episode=403 reward=0.7609447 (553.74 it/sec) -training >> step=2415900, episode=403 reward=0.7690746 (539.22 it/sec) -training >> step=2416000, episode=403 reward=0.7577468 (532.52 it/sec) -training >> step=2416100, episode=403 reward=0.7702153 (574.68 it/sec) -training >> step=2416200, episode=403 reward=0.7763963 (511.02 it/sec) -training >> step=2416300, episode=403 reward=0.7578958 (539.58 it/sec) -training >> step=2416400, episode=403 reward=0.7705742 (561.49 it/sec) -training >> step=2416500, episode=403 reward=0.7523352 (566.76 it/sec) -training >> step=2416600, episode=403 reward=0.7576221 (562.76 it/sec) -training >> step=2416700, episode=403 reward=0.7714029 (567.49 it/sec) -training >> step=2416800, episode=403 reward=0.7686861 (560.46 it/sec) -training >> step=2416900, episode=403 reward=0.7537112 (572.53 it/sec) -training >> step=2417000, episode=403 reward=0.7797257 (553.12 it/sec) -training >> step=2417100, episode=403 reward=0.7386108 (539.28 it/sec) -training >> step=2417200, episode=403 reward=0.7985946 (564.83 it/sec) -training >> step=2417300, episode=404 reward=0.7825833 (138.38 it/sec) -training >> step=2417400, episode=404 reward=0.7821451 (542.84 it/sec) -training >> step=2417500, episode=404 reward=0.7300996 (554.55 it/sec) -training >> step=2417600, episode=404 reward=0.7621472 (570.19 it/sec) -training >> step=2417700, episode=404 reward=0.7452877 (577.79 it/sec) -training >> step=2417800, episode=404 reward=0.7390735 (609.67 it/sec) -training >> step=2417900, episode=404 reward=0.7497692 (545.12 it/sec) -training >> step=2418000, episode=404 reward=0.7708753 (588.11 it/sec) -training >> step=2418100, episode=404 reward=0.7830608 (573.32 it/sec) -training >> step=2418200, episode=404 reward=0.7550323 (574.64 it/sec) -training >> step=2418300, episode=404 reward=0.7673153 (588.71 it/sec) -training >> step=2418400, episode=404 reward=0.7797801 (589.60 it/sec) -training >> step=2418500, episode=404 reward=0.7491797 (553.41 it/sec) -training >> step=2418600, episode=404 reward=0.7600873 (583.32 it/sec) -training >> step=2418700, episode=404 reward=0.7594754 (570.34 it/sec) -training >> step=2418800, episode=404 reward=0.7646903 (588.88 it/sec) -training >> step=2418900, episode=404 reward=0.7706562 (610.84 it/sec) -training >> step=2419000, episode=404 reward=0.748037 (587.02 it/sec) -training >> step=2419100, episode=404 reward=0.7686108 (559.68 it/sec) -training >> step=2419200, episode=404 reward=0.7667627 (605.21 it/sec) -training >> step=2419300, episode=404 reward=0.7645199 (579.08 it/sec) -training >> step=2419400, episode=404 reward=0.7703466 (543.20 it/sec) -training >> step=2419500, episode=404 reward=0.7468345 (501.78 it/sec) -training >> step=2419600, episode=404 reward=0.7747316 (526.38 it/sec) -training >> step=2419700, episode=404 reward=0.771284 (513.78 it/sec) -training >> step=2419800, episode=404 reward=0.7645402 (565.89 it/sec) -training >> step=2419900, episode=404 reward=0.7720404 (546.26 it/sec) -training >> step=2420000, episode=404 reward=0.7510811 (595.04 it/sec) -training >> step=2420100, episode=404 reward=0.7631944 (565.58 it/sec) -training >> step=2420200, episode=404 reward=0.7684885 (524.15 it/sec) -training >> step=2420300, episode=404 reward=0.7737199 (554.01 it/sec) -training >> step=2420400, episode=404 reward=0.7493574 (532.01 it/sec) -training >> step=2420500, episode=404 reward=0.7722664 (555.21 it/sec) -training >> step=2420600, episode=404 reward=0.7606025 (564.31 it/sec) -training >> step=2420700, episode=404 reward=0.7694757 (600.48 it/sec) -training >> step=2420800, episode=404 reward=0.75691 (545.15 it/sec) -training >> step=2420900, episode=404 reward=0.7618087 (534.36 it/sec) -training >> step=2421000, episode=404 reward=0.7733786 (586.81 it/sec) -training >> step=2421100, episode=404 reward=0.7549608 (556.42 it/sec) -training >> step=2421200, episode=404 reward=0.7748121 (576.17 it/sec) -training >> step=2421300, episode=404 reward=0.7675131 (512.40 it/sec) -training >> step=2421400, episode=404 reward=0.7700952 (547.39 it/sec) -training >> step=2421500, episode=404 reward=0.7788944 (523.72 it/sec) -training >> step=2421600, episode=404 reward=0.7721205 (548.69 it/sec) -training >> step=2421700, episode=404 reward=0.7602535 (569.14 it/sec) -training >> step=2421800, episode=404 reward=0.7689757 (587.28 it/sec) -training >> step=2421900, episode=404 reward=0.7823555 (518.88 it/sec) -training >> step=2422000, episode=404 reward=0.7570564 (555.10 it/sec) -training >> step=2422100, episode=404 reward=0.7561995 (552.99 it/sec) -training >> step=2422200, episode=404 reward=0.7744974 (530.22 it/sec) -training >> step=2422300, episode=404 reward=0.7722341 (581.83 it/sec) -training >> step=2422400, episode=404 reward=0.7689748 (552.71 it/sec) -training >> step=2422500, episode=404 reward=0.7457557 (544.61 it/sec) -training >> step=2422600, episode=404 reward=0.7517297 (564.12 it/sec) -training >> step=2422700, episode=404 reward=0.7471974 (548.32 it/sec) -training >> step=2422800, episode=404 reward=0.740486 (571.70 it/sec) -training >> step=2422900, episode=404 reward=0.754716 (609.79 it/sec) -training >> step=2423000, episode=404 reward=0.7620438 (500.64 it/sec) -training >> step=2423100, episode=404 reward=0.7546989 (491.31 it/sec) -training >> step=2423200, episode=404 reward=0.7514972 (527.85 it/sec) -training >> step=2423300, episode=405 reward=0.7512123 (120.51 it/sec) -training >> step=2423400, episode=405 reward=0.7532491 (534.17 it/sec) -training >> step=2423500, episode=405 reward=0.7187018 (561.56 it/sec) -training >> step=2423600, episode=405 reward=0.7663617 (577.78 it/sec) -training >> step=2423700, episode=405 reward=0.7723697 (575.91 it/sec) -training >> step=2423800, episode=405 reward=0.7615838 (581.32 it/sec) -training >> step=2423900, episode=405 reward=0.7494125 (576.18 it/sec) -training >> step=2424000, episode=405 reward=0.772269 (586.22 it/sec) -training >> step=2424100, episode=405 reward=0.7669448 (581.46 it/sec) -training >> step=2424200, episode=405 reward=0.760337 (614.40 it/sec) -training >> step=2424300, episode=405 reward=0.76207 (586.95 it/sec) -training >> step=2424400, episode=405 reward=0.7942784 (554.55 it/sec) -training >> step=2424500, episode=405 reward=0.7751029 (568.43 it/sec) -training >> step=2424600, episode=405 reward=0.7802177 (600.93 it/sec) -training >> step=2424700, episode=405 reward=0.7712668 (551.48 it/sec) -training >> step=2424800, episode=405 reward=0.7721143 (545.33 it/sec) -training >> step=2424900, episode=405 reward=0.748879 (588.93 it/sec) -training >> step=2425000, episode=405 reward=0.7854016 (572.65 it/sec) -training >> step=2425100, episode=405 reward=0.7687826 (504.89 it/sec) -training >> step=2425200, episode=405 reward=0.7577922 (539.18 it/sec) -training >> step=2425300, episode=405 reward=0.7620617 (575.18 it/sec) -training >> step=2425400, episode=405 reward=0.7701666 (594.86 it/sec) -training >> step=2425500, episode=405 reward=0.7602944 (519.15 it/sec) -training >> step=2425600, episode=405 reward=0.7844166 (557.95 it/sec) -training >> step=2425700, episode=405 reward=0.7690386 (559.86 it/sec) -training >> step=2425800, episode=405 reward=0.7513123 (561.94 it/sec) -training >> step=2425900, episode=405 reward=0.7783099 (570.50 it/sec) -training >> step=2426000, episode=405 reward=0.7894539 (554.15 it/sec) -training >> step=2426100, episode=405 reward=0.7503083 (531.35 it/sec) -training >> step=2426200, episode=405 reward=0.7753149 (508.59 it/sec) -training >> step=2426300, episode=405 reward=0.7572018 (499.73 it/sec) -training >> step=2426400, episode=405 reward=0.7901141 (572.20 it/sec) -training >> step=2426500, episode=405 reward=0.763836 (506.43 it/sec) -training >> step=2426600, episode=405 reward=0.7652898 (515.51 it/sec) -training >> step=2426700, episode=405 reward=0.7504753 (524.52 it/sec) -training >> step=2426800, episode=405 reward=0.7915487 (506.02 it/sec) -training >> step=2426900, episode=405 reward=0.753929 (549.91 it/sec) -training >> step=2427000, episode=405 reward=0.7871466 (559.10 it/sec) -training >> step=2427100, episode=405 reward=0.7865684 (538.21 it/sec) -training >> step=2427200, episode=405 reward=0.7592964 (554.39 it/sec) -training >> step=2427300, episode=405 reward=0.7774552 (489.12 it/sec) -training >> step=2427400, episode=405 reward=0.7912753 (512.52 it/sec) -training >> step=2427500, episode=405 reward=0.765637 (565.12 it/sec) -training >> step=2427600, episode=405 reward=0.7648652 (555.71 it/sec) -training >> step=2427700, episode=405 reward=0.7784986 (532.13 it/sec) -training >> step=2427800, episode=405 reward=0.7772999 (479.39 it/sec) -training >> step=2427900, episode=405 reward=0.7607026 (545.31 it/sec) -training >> step=2428000, episode=405 reward=0.7684565 (525.02 it/sec) -training >> step=2428100, episode=405 reward=0.7740114 (576.57 it/sec) -training >> step=2428200, episode=405 reward=0.754974 (522.44 it/sec) -training >> step=2428300, episode=405 reward=0.7757378 (539.51 it/sec) -training >> step=2428400, episode=405 reward=0.742648 (525.39 it/sec) -training >> step=2428500, episode=405 reward=0.7517488 (498.33 it/sec) -training >> step=2428600, episode=405 reward=0.7654066 (557.96 it/sec) -training >> step=2428700, episode=405 reward=0.7541823 (535.76 it/sec) -training >> step=2428800, episode=405 reward=0.7884273 (511.74 it/sec) -training >> step=2428900, episode=405 reward=0.7597495 (521.01 it/sec) -training >> step=2429000, episode=405 reward=0.7776189 (537.91 it/sec) -training >> step=2429100, episode=405 reward=0.7779107 (514.60 it/sec) -training >> step=2429200, episode=405 reward=0.7467621 (521.96 it/sec) -training >> step=2429300, episode=406 reward=0.7737873 (120.61 it/sec) -training >> step=2429400, episode=406 reward=0.7329835 (495.80 it/sec) -training >> step=2429500, episode=406 reward=0.7555354 (533.54 it/sec) -training >> step=2429600, episode=406 reward=0.757943 (493.09 it/sec) -training >> step=2429700, episode=406 reward=0.7336081 (478.61 it/sec) -training >> step=2429800, episode=406 reward=0.7796711 (488.31 it/sec) -training >> step=2429900, episode=406 reward=0.7478183 (547.87 it/sec) -training >> step=2430000, episode=406 reward=0.7649608 (521.31 it/sec) -training >> step=2430100, episode=406 reward=0.7650108 (524.82 it/sec) -training >> step=2430200, episode=406 reward=0.7949566 (529.07 it/sec) -training >> step=2430300, episode=406 reward=0.7468658 (541.26 it/sec) -training >> step=2430400, episode=406 reward=0.7902574 (514.30 it/sec) -training >> step=2430500, episode=406 reward=0.7614653 (542.20 it/sec) -training >> step=2430600, episode=406 reward=0.7780588 (511.23 it/sec) -training >> step=2430700, episode=406 reward=0.757389 (510.52 it/sec) -training >> step=2430800, episode=406 reward=0.7938105 (538.33 it/sec) -training >> step=2430900, episode=406 reward=0.7675511 (525.73 it/sec) -training >> step=2431000, episode=406 reward=0.7644923 (525.81 it/sec) -training >> step=2431100, episode=406 reward=0.7380414 (527.75 it/sec) -training >> step=2431200, episode=406 reward=0.7602543 (487.02 it/sec) -training >> step=2431300, episode=406 reward=0.7671623 (513.75 it/sec) -training >> step=2431400, episode=406 reward=0.7598662 (523.83 it/sec) -training >> step=2431500, episode=406 reward=0.7629484 (495.64 it/sec) -training >> step=2431600, episode=406 reward=0.7484229 (502.87 it/sec) -training >> step=2431700, episode=406 reward=0.7770538 (517.25 it/sec) -training >> step=2431800, episode=406 reward=0.7697718 (539.63 it/sec) -training >> step=2431900, episode=406 reward=0.7643313 (521.83 it/sec) -training >> step=2432000, episode=406 reward=0.7407563 (528.15 it/sec) -training >> step=2432100, episode=406 reward=0.7615896 (556.56 it/sec) -training >> step=2432200, episode=406 reward=0.737201 (556.43 it/sec) -training >> step=2432300, episode=406 reward=0.7684476 (553.85 it/sec) -training >> step=2432400, episode=406 reward=0.7749906 (556.83 it/sec) -training >> step=2432500, episode=406 reward=0.7692449 (550.38 it/sec) -training >> step=2432600, episode=406 reward=0.7781071 (510.06 it/sec) -training >> step=2432700, episode=406 reward=0.7586183 (558.24 it/sec) -training >> step=2432800, episode=406 reward=0.7816026 (545.23 it/sec) -training >> step=2432900, episode=406 reward=0.76565 (570.99 it/sec) -training >> step=2433000, episode=406 reward=0.7625 (596.21 it/sec) -training >> step=2433100, episode=406 reward=0.7752222 (555.52 it/sec) -training >> step=2433200, episode=406 reward=0.772086 (563.34 it/sec) -training >> step=2433300, episode=406 reward=0.7651936 (555.91 it/sec) -training >> step=2433400, episode=406 reward=0.7524327 (567.52 it/sec) -training >> step=2433500, episode=406 reward=0.7709517 (545.79 it/sec) -training >> step=2433600, episode=406 reward=0.7715062 (548.21 it/sec) -training >> step=2433700, episode=406 reward=0.771979 (566.23 it/sec) -training >> step=2433800, episode=406 reward=0.754608 (566.86 it/sec) -training >> step=2433900, episode=406 reward=0.769897 (545.62 it/sec) -training >> step=2434000, episode=406 reward=0.7653826 (522.95 it/sec) -training >> step=2434100, episode=406 reward=0.7377457 (566.96 it/sec) -training >> step=2434200, episode=406 reward=0.7738787 (565.54 it/sec) -training >> step=2434300, episode=406 reward=0.7614886 (538.64 it/sec) -training >> step=2434400, episode=406 reward=0.772482 (548.15 it/sec) -training >> step=2434500, episode=406 reward=0.7506548 (549.17 it/sec) -training >> step=2434600, episode=406 reward=0.7756684 (548.96 it/sec) -training >> step=2434700, episode=406 reward=0.765572 (533.65 it/sec) -training >> step=2434800, episode=406 reward=0.7699121 (557.16 it/sec) -training >> step=2434900, episode=406 reward=0.7819314 (546.37 it/sec) -training >> step=2435000, episode=406 reward=0.751282 (561.15 it/sec) -training >> step=2435100, episode=406 reward=0.7552522 (532.60 it/sec) -training >> step=2435200, episode=406 reward=0.7693608 (511.83 it/sec) -training >> step=2435300, episode=407 reward=0.7618679 (58.73 it/sec) -training >> step=2435400, episode=407 reward=0.7606934 (524.79 it/sec) -training >> step=2435500, episode=407 reward=0.7548051 (540.48 it/sec) -training >> step=2435600, episode=407 reward=0.759582 (565.49 it/sec) -training >> step=2435700, episode=407 reward=0.7496508 (529.30 it/sec) -training >> step=2435800, episode=407 reward=0.7575926 (536.58 it/sec) -training >> step=2435900, episode=407 reward=0.7419153 (544.08 it/sec) -training >> step=2436000, episode=407 reward=0.7634084 (550.73 it/sec) -training >> step=2436100, episode=407 reward=0.7688161 (554.96 it/sec) -training >> step=2436200, episode=407 reward=0.7516976 (547.66 it/sec) -training >> step=2436300, episode=407 reward=0.7835706 (556.98 it/sec) -training >> step=2436400, episode=407 reward=0.7671005 (528.80 it/sec) -training >> step=2436500, episode=407 reward=0.7540872 (577.57 it/sec) -training >> step=2436600, episode=407 reward=0.7490739 (559.59 it/sec) -training >> step=2436700, episode=407 reward=0.7757508 (554.38 it/sec) -training >> step=2436800, episode=407 reward=0.7623904 (556.87 it/sec) -training >> step=2436900, episode=407 reward=0.7756836 (525.61 it/sec) -training >> step=2437000, episode=407 reward=0.7653752 (563.95 it/sec) -training >> step=2437100, episode=407 reward=0.7650236 (563.55 it/sec) -training >> step=2437200, episode=407 reward=0.7644639 (558.60 it/sec) -training >> step=2437300, episode=407 reward=0.7647616 (534.11 it/sec) -training >> step=2437400, episode=407 reward=0.7840077 (552.38 it/sec) -training >> step=2437500, episode=407 reward=0.7945394 (568.39 it/sec) -training >> step=2437600, episode=407 reward=0.7641745 (560.75 it/sec) -training >> step=2437700, episode=407 reward=0.7644875 (549.04 it/sec) -training >> step=2437800, episode=407 reward=0.7943796 (543.55 it/sec) -training >> step=2437900, episode=407 reward=0.7751329 (539.88 it/sec) -training >> step=2438000, episode=407 reward=0.7689632 (548.61 it/sec) -training >> step=2438100, episode=407 reward=0.7791724 (559.91 it/sec) -training >> step=2438200, episode=407 reward=0.7659693 (558.05 it/sec) -training >> step=2438300, episode=407 reward=0.7742612 (593.09 it/sec) -training >> step=2438400, episode=407 reward=0.7621315 (574.71 it/sec) -training >> step=2438500, episode=407 reward=0.7633295 (554.01 it/sec) -training >> step=2438600, episode=407 reward=0.781213 (582.01 it/sec) -training >> step=2438700, episode=407 reward=0.7629418 (561.80 it/sec) -training >> step=2438800, episode=407 reward=0.7640717 (580.67 it/sec) -training >> step=2438900, episode=407 reward=0.7824066 (562.56 it/sec) -training >> step=2439000, episode=407 reward=0.7576951 (525.11 it/sec) -training >> step=2439100, episode=407 reward=0.7659443 (584.95 it/sec) -training >> step=2439200, episode=407 reward=0.7410284 (565.03 it/sec) -training >> step=2439300, episode=407 reward=0.765763 (563.75 it/sec) -training >> step=2439400, episode=407 reward=0.7532974 (578.67 it/sec) -training >> step=2439500, episode=407 reward=0.765203 (516.38 it/sec) -training >> step=2439600, episode=407 reward=0.7742038 (555.76 it/sec) -training >> step=2439700, episode=407 reward=0.7441214 (527.21 it/sec) -training >> step=2439800, episode=407 reward=0.7508586 (539.14 it/sec) -training >> step=2439900, episode=407 reward=0.7687603 (515.80 it/sec) -training >> step=2440000, episode=407 reward=0.7914092 (553.51 it/sec) -training >> step=2440100, episode=407 reward=0.7785282 (528.89 it/sec) -training >> step=2440200, episode=407 reward=0.7640305 (533.96 it/sec) -training >> step=2440300, episode=407 reward=0.7506884 (547.25 it/sec) -training >> step=2440400, episode=407 reward=0.7665848 (547.83 it/sec) -training >> step=2440500, episode=407 reward=0.7561605 (563.40 it/sec) -training >> step=2440600, episode=407 reward=0.8022795 (540.06 it/sec) -training >> step=2440700, episode=407 reward=0.7601054 (523.36 it/sec) -training >> step=2440800, episode=407 reward=0.7383699 (510.47 it/sec) -training >> step=2440900, episode=407 reward=0.7685296 (564.43 it/sec) -training >> step=2441000, episode=407 reward=0.7731236 (540.25 it/sec) -training >> step=2441100, episode=407 reward=0.7691792 (544.09 it/sec) -training >> step=2441200, episode=407 reward=0.7441829 (562.84 it/sec) -training >> step=2441300, episode=408 reward=0.7495455 (74.88 it/sec) -training >> step=2441400, episode=408 reward=0.7592366 (510.75 it/sec) -training >> step=2441500, episode=408 reward=0.7640043 (515.79 it/sec) -training >> step=2441600, episode=408 reward=0.7590491 (554.51 it/sec) -training >> step=2441700, episode=408 reward=0.7748628 (529.15 it/sec) -training >> step=2441800, episode=408 reward=0.7780634 (486.38 it/sec) -training >> step=2441900, episode=408 reward=0.775337 (543.41 it/sec) -training >> step=2442000, episode=408 reward=0.750293 (560.01 it/sec) -training >> step=2442100, episode=408 reward=0.7729928 (547.34 it/sec) -training >> step=2442200, episode=408 reward=0.7661801 (562.85 it/sec) -training >> step=2442300, episode=408 reward=0.7645919 (559.78 it/sec) -training >> step=2442400, episode=408 reward=0.7684699 (540.36 it/sec) -training >> step=2442500, episode=408 reward=0.7769394 (538.56 it/sec) -training >> step=2442600, episode=408 reward=0.7881284 (541.78 it/sec) -training >> step=2442700, episode=408 reward=0.758765 (547.90 it/sec) -training >> step=2442800, episode=408 reward=0.7592931 (546.91 it/sec) -training >> step=2442900, episode=408 reward=0.7650934 (514.30 it/sec) -training >> step=2443000, episode=408 reward=0.7459571 (573.80 it/sec) -training >> step=2443100, episode=408 reward=0.7683939 (544.14 it/sec) -training >> step=2443200, episode=408 reward=0.7718766 (530.61 it/sec) -training >> step=2443300, episode=408 reward=0.7823091 (545.73 it/sec) -training >> step=2443400, episode=408 reward=0.7829388 (539.29 it/sec) -training >> step=2443500, episode=408 reward=0.7825051 (525.58 it/sec) -training >> step=2443600, episode=408 reward=0.7351804 (528.02 it/sec) -training >> step=2443700, episode=408 reward=0.7523979 (545.52 it/sec) -training >> step=2443800, episode=408 reward=0.7788403 (542.01 it/sec) -training >> step=2443900, episode=408 reward=0.7590666 (544.48 it/sec) -training >> step=2444000, episode=408 reward=0.7627751 (531.10 it/sec) -training >> step=2444100, episode=408 reward=0.7844638 (529.62 it/sec) -training >> step=2444200, episode=408 reward=0.7697033 (537.64 it/sec) -training >> step=2444300, episode=408 reward=0.7543391 (545.35 it/sec) -training >> step=2444400, episode=408 reward=0.7565416 (534.68 it/sec) -training >> step=2444500, episode=408 reward=0.7314868 (558.97 it/sec) -training >> step=2444600, episode=408 reward=0.7430402 (574.35 it/sec) -training >> step=2444700, episode=408 reward=0.7580862 (525.30 it/sec) -training >> step=2444800, episode=408 reward=0.7648199 (570.84 it/sec) -training >> step=2444900, episode=408 reward=0.7509593 (566.43 it/sec) -training >> step=2445000, episode=408 reward=0.7487962 (525.55 it/sec) -training >> step=2445100, episode=408 reward=0.7652108 (554.85 it/sec) -training >> step=2445200, episode=408 reward=0.769949 (562.01 it/sec) -training >> step=2445300, episode=408 reward=0.7485188 (554.35 it/sec) -training >> step=2445400, episode=408 reward=0.7666631 (543.68 it/sec) -training >> step=2445500, episode=408 reward=0.7781302 (564.19 it/sec) -training >> step=2445600, episode=408 reward=0.7701901 (532.62 it/sec) -training >> step=2445700, episode=408 reward=0.7660412 (525.29 it/sec) -training >> step=2445800, episode=408 reward=0.7595692 (530.90 it/sec) -training >> step=2445900, episode=408 reward=0.7762849 (544.57 it/sec) -training >> step=2446000, episode=408 reward=0.7727446 (556.38 it/sec) -training >> step=2446100, episode=408 reward=0.78514 (555.26 it/sec) -training >> step=2446200, episode=408 reward=0.7642125 (559.00 it/sec) -training >> step=2446300, episode=408 reward=0.7517949 (561.44 it/sec) -training >> step=2446400, episode=408 reward=0.7827309 (570.62 it/sec) -training >> step=2446500, episode=408 reward=0.7521297 (551.76 it/sec) -training >> step=2446600, episode=408 reward=0.7526833 (528.64 it/sec) -training >> step=2446700, episode=408 reward=0.7674232 (530.87 it/sec) -training >> step=2446800, episode=408 reward=0.7641447 (561.21 it/sec) -training >> step=2446900, episode=408 reward=0.7632486 (518.81 it/sec) -training >> step=2447000, episode=408 reward=0.7679194 (544.34 it/sec) -training >> step=2447100, episode=408 reward=0.7632345 (518.00 it/sec) -training >> step=2447200, episode=408 reward=0.7603928 (505.54 it/sec) -training >> step=2447300, episode=409 reward=0.7593244 (213.51 it/sec) -training >> step=2447400, episode=409 reward=0.7424512 (539.55 it/sec) -training >> step=2447500, episode=409 reward=0.7614311 (555.68 it/sec) -training >> step=2447600, episode=409 reward=0.7778339 (549.50 it/sec) -training >> step=2447700, episode=409 reward=0.743732 (547.65 it/sec) -training >> step=2447800, episode=409 reward=0.7720821 (514.02 it/sec) -training >> step=2447900, episode=409 reward=0.7594458 (574.44 it/sec) -training >> step=2448000, episode=409 reward=0.7751341 (550.54 it/sec) -training >> step=2448100, episode=409 reward=0.7473784 (524.18 it/sec) -training >> step=2448200, episode=409 reward=0.7711466 (543.07 it/sec) -training >> step=2448300, episode=409 reward=0.7825038 (572.90 it/sec) -training >> step=2448400, episode=409 reward=0.7492381 (548.12 it/sec) -training >> step=2448500, episode=409 reward=0.7811128 (527.00 it/sec) -training >> step=2448600, episode=409 reward=0.7667938 (562.84 it/sec) -training >> step=2448700, episode=409 reward=0.7706897 (578.51 it/sec) -training >> step=2448800, episode=409 reward=0.74691 (563.59 it/sec) -training >> step=2448900, episode=409 reward=0.7784498 (536.22 it/sec) -training >> step=2449000, episode=409 reward=0.7377201 (553.82 it/sec) -training >> step=2449100, episode=409 reward=0.7645507 (570.39 it/sec) -training >> step=2449200, episode=409 reward=0.7760379 (556.89 it/sec) -training >> step=2449300, episode=409 reward=0.7659506 (548.48 it/sec) -training >> step=2449400, episode=409 reward=0.7609816 (554.38 it/sec) -training >> step=2449500, episode=409 reward=0.7708279 (560.40 it/sec) -training >> step=2449600, episode=409 reward=0.7850754 (528.07 it/sec) -training >> step=2449700, episode=409 reward=0.7432962 (542.58 it/sec) -training >> step=2449800, episode=409 reward=0.7632023 (553.65 it/sec) -training >> step=2449900, episode=409 reward=0.7529424 (568.32 it/sec) -training >> step=2450000, episode=409 reward=0.7869816 (527.13 it/sec) -training >> step=2450100, episode=409 reward=0.7991694 (538.97 it/sec) -training >> step=2450200, episode=409 reward=0.7694013 (540.92 it/sec) -training >> step=2450300, episode=409 reward=0.7647189 (504.77 it/sec) -training >> step=2450400, episode=409 reward=0.7542522 (534.33 it/sec) -training >> step=2450500, episode=409 reward=0.7815705 (507.91 it/sec) -training >> step=2450600, episode=409 reward=0.7799778 (557.54 it/sec) -training >> step=2450700, episode=409 reward=0.7853126 (579.91 it/sec) -training >> step=2450800, episode=409 reward=0.7729502 (505.72 it/sec) -training >> step=2450900, episode=409 reward=0.7606779 (546.59 it/sec) -training >> step=2451000, episode=409 reward=0.7582288 (554.94 it/sec) -training >> step=2451100, episode=409 reward=0.7723243 (575.10 it/sec) -training >> step=2451200, episode=409 reward=0.7584211 (566.47 it/sec) -training >> step=2451300, episode=409 reward=0.7588739 (510.30 it/sec) -training >> step=2451400, episode=409 reward=0.76248 (554.59 it/sec) -training >> step=2451500, episode=409 reward=0.7646703 (558.81 it/sec) -training >> step=2451600, episode=409 reward=0.7599392 (534.96 it/sec) -training >> step=2451700, episode=409 reward=0.7730494 (558.77 it/sec) -training >> step=2451800, episode=409 reward=0.7634557 (557.78 it/sec) -training >> step=2451900, episode=409 reward=0.7905132 (558.57 it/sec) -training >> step=2452000, episode=409 reward=0.7895302 (530.65 it/sec) -training >> step=2452100, episode=409 reward=0.7515463 (551.90 it/sec) -training >> step=2452200, episode=409 reward=0.7661428 (556.03 it/sec) -training >> step=2452300, episode=409 reward=0.7720166 (543.36 it/sec) -training >> step=2452400, episode=409 reward=0.7926148 (545.42 it/sec) -training >> step=2452500, episode=409 reward=0.7724925 (536.36 it/sec) -training >> step=2452600, episode=409 reward=0.7618393 (541.42 it/sec) -training >> step=2452700, episode=409 reward=0.7618498 (573.13 it/sec) -training >> step=2452800, episode=409 reward=0.7645497 (558.82 it/sec) -training >> step=2452900, episode=409 reward=0.7519911 (545.55 it/sec) -training >> step=2453000, episode=409 reward=0.7731335 (573.57 it/sec) -training >> step=2453100, episode=409 reward=0.7743232 (579.02 it/sec) -training >> step=2453200, episode=409 reward=0.7716239 (552.15 it/sec) -training >> step=2453300, episode=410 reward=0.7599788 (153.43 it/sec) -training >> step=2453400, episode=410 reward=0.754292 (545.81 it/sec) -training >> step=2453500, episode=410 reward=0.7538625 (538.32 it/sec) -training >> step=2453600, episode=410 reward=0.7431059 (535.47 it/sec) -training >> step=2453700, episode=410 reward=0.7801489 (520.61 it/sec) -training >> step=2453800, episode=410 reward=0.7852796 (557.06 it/sec) -training >> step=2453900, episode=410 reward=0.7769549 (529.06 it/sec) -training >> step=2454000, episode=410 reward=0.7684881 (517.51 it/sec) -training >> step=2454100, episode=410 reward=0.7822847 (508.29 it/sec) -training >> step=2454200, episode=410 reward=0.7799032 (555.21 it/sec) -training >> step=2454300, episode=410 reward=0.7634888 (523.41 it/sec) -training >> step=2454400, episode=410 reward=0.7648631 (498.97 it/sec) -training >> step=2454500, episode=410 reward=0.7727719 (501.27 it/sec) -training >> step=2454600, episode=410 reward=0.7690449 (552.04 it/sec) -training >> step=2454700, episode=410 reward=0.7598926 (533.72 it/sec) -training >> step=2454800, episode=410 reward=0.7744446 (534.40 it/sec) -training >> step=2454900, episode=410 reward=0.7710017 (548.29 it/sec) -training >> step=2455000, episode=410 reward=0.7746025 (504.16 it/sec) -training >> step=2455100, episode=410 reward=0.7493904 (534.81 it/sec) -training >> step=2455200, episode=410 reward=0.7835314 (491.87 it/sec) -training >> step=2455300, episode=410 reward=0.7529595 (532.89 it/sec) -training >> step=2455400, episode=410 reward=0.7833343 (523.89 it/sec) -training >> step=2455500, episode=410 reward=0.7618527 (517.29 it/sec) -training >> step=2455600, episode=410 reward=0.7468619 (506.52 it/sec) -training >> step=2455700, episode=410 reward=0.771708 (516.66 it/sec) -training >> step=2455800, episode=410 reward=0.7804854 (533.78 it/sec) -training >> step=2455900, episode=410 reward=0.7684429 (523.71 it/sec) -training >> step=2456000, episode=410 reward=0.7413197 (531.70 it/sec) -training >> step=2456100, episode=410 reward=0.7931087 (529.63 it/sec) -training >> step=2456200, episode=410 reward=0.7804922 (537.33 it/sec) -training >> step=2456300, episode=410 reward=0.768699 (555.01 it/sec) -training >> step=2456400, episode=410 reward=0.786213 (556.34 it/sec) -training >> step=2456500, episode=410 reward=0.7506125 (550.95 it/sec) -training >> step=2456600, episode=410 reward=0.7774031 (527.36 it/sec) -training >> step=2456700, episode=410 reward=0.7672101 (548.50 it/sec) -training >> step=2456800, episode=410 reward=0.7588863 (564.61 it/sec) -training >> step=2456900, episode=410 reward=0.7761579 (519.00 it/sec) -training >> step=2457000, episode=410 reward=0.7835976 (549.01 it/sec) -training >> step=2457100, episode=410 reward=0.7790839 (541.90 it/sec) -training >> step=2457200, episode=410 reward=0.7574003 (554.44 it/sec) -training >> step=2457300, episode=410 reward=0.7689717 (556.82 it/sec) -training >> step=2457400, episode=410 reward=0.7626284 (514.04 it/sec) -training >> step=2457500, episode=410 reward=0.7283501 (572.94 it/sec) -training >> step=2457600, episode=410 reward=0.7583051 (573.60 it/sec) -training >> step=2457700, episode=410 reward=0.743195 (523.53 it/sec) -training >> step=2457800, episode=410 reward=0.7540171 (555.51 it/sec) -training >> step=2457900, episode=410 reward=0.7647405 (563.01 it/sec) -training >> step=2458000, episode=410 reward=0.7624751 (575.71 it/sec) -training >> step=2458100, episode=410 reward=0.7693194 (536.94 it/sec) -training >> step=2458200, episode=410 reward=0.7562434 (575.89 it/sec) -training >> step=2458300, episode=410 reward=0.7479671 (506.44 it/sec) -training >> step=2458400, episode=410 reward=0.7827608 (546.23 it/sec) -training >> step=2458500, episode=410 reward=0.7398532 (566.62 it/sec) -training >> step=2458600, episode=410 reward=0.73518 (555.44 it/sec) -training >> step=2458700, episode=410 reward=0.7512643 (555.82 it/sec) -training >> step=2458800, episode=410 reward=0.7612295 (557.86 it/sec) -training >> step=2458900, episode=410 reward=0.7639112 (561.17 it/sec) -training >> step=2459000, episode=410 reward=0.7557606 (565.29 it/sec) -training >> step=2459100, episode=410 reward=0.759787 (574.75 it/sec) -training >> step=2459200, episode=410 reward=0.755302 (562.80 it/sec) -training >> step=2459300, episode=411 reward=0.7461158 (132.60 it/sec) -training >> step=2459400, episode=411 reward=0.7449946 (548.16 it/sec) -training >> step=2459500, episode=411 reward=0.7452954 (556.36 it/sec) -training >> step=2459600, episode=411 reward=0.7496256 (544.37 it/sec) -training >> step=2459700, episode=411 reward=0.7611942 (526.48 it/sec) -training >> step=2459800, episode=411 reward=0.7520467 (564.23 it/sec) -training >> step=2459900, episode=411 reward=0.7618081 (524.89 it/sec) -training >> step=2460000, episode=411 reward=0.7700772 (544.62 it/sec) -training >> step=2460100, episode=411 reward=0.7731093 (556.79 it/sec) -training >> step=2460200, episode=411 reward=0.7596651 (538.61 it/sec) -training >> step=2460300, episode=411 reward=0.7843334 (587.88 it/sec) -training >> step=2460400, episode=411 reward=0.7560209 (532.93 it/sec) -training >> step=2460500, episode=411 reward=0.7543883 (519.95 it/sec) -training >> step=2460600, episode=411 reward=0.7769133 (546.70 it/sec) -training >> step=2460700, episode=411 reward=0.7657387 (568.32 it/sec) -training >> step=2460800, episode=411 reward=0.756778 (534.86 it/sec) -training >> step=2460900, episode=411 reward=0.7701209 (560.49 it/sec) -training >> step=2461000, episode=411 reward=0.7863444 (551.30 it/sec) -training >> step=2461100, episode=411 reward=0.763611 (626.03 it/sec) -training >> step=2461200, episode=411 reward=0.7849129 (599.99 it/sec) -training >> step=2461300, episode=411 reward=0.760464 (592.58 it/sec) -training >> step=2461400, episode=411 reward=0.7864248 (577.54 it/sec) -training >> step=2461500, episode=411 reward=0.7430132 (618.58 it/sec) -training >> step=2461600, episode=411 reward=0.7615477 (558.35 it/sec) -training >> step=2461700, episode=411 reward=0.7707943 (590.49 it/sec) -training >> step=2461800, episode=411 reward=0.7750018 (604.60 it/sec) -training >> step=2461900, episode=411 reward=0.7536546 (601.22 it/sec) -training >> step=2462000, episode=411 reward=0.7640494 (584.79 it/sec) -training >> step=2462100, episode=411 reward=0.7838249 (578.09 it/sec) -training >> step=2462200, episode=411 reward=0.755775 (579.14 it/sec) -training >> step=2462300, episode=411 reward=0.7386742 (575.31 it/sec) -training >> step=2462400, episode=411 reward=0.7821831 (587.82 it/sec) -training >> step=2462500, episode=411 reward=0.7878023 (586.73 it/sec) -training >> step=2462600, episode=411 reward=0.7560206 (575.43 it/sec) -training >> step=2462700, episode=411 reward=0.7494634 (610.34 it/sec) -training >> step=2462800, episode=411 reward=0.7765849 (565.83 it/sec) -training >> step=2462900, episode=411 reward=0.775249 (539.30 it/sec) -training >> step=2463000, episode=411 reward=0.7869627 (598.04 it/sec) -training >> step=2463100, episode=411 reward=0.774514 (595.10 it/sec) -training >> step=2463200, episode=411 reward=0.7588042 (562.52 it/sec) -training >> step=2463300, episode=411 reward=0.7721562 (584.29 it/sec) -training >> step=2463400, episode=411 reward=0.7676353 (583.54 it/sec) -training >> step=2463500, episode=411 reward=0.7564155 (570.61 it/sec) -training >> step=2463600, episode=411 reward=0.7527467 (585.64 it/sec) -training >> step=2463700, episode=411 reward=0.7826805 (600.45 it/sec) -training >> step=2463800, episode=411 reward=0.776255 (600.46 it/sec) -training >> step=2463900, episode=411 reward=0.7489923 (602.77 it/sec) -training >> step=2464000, episode=411 reward=0.755835 (580.79 it/sec) -training >> step=2464100, episode=411 reward=0.7667495 (581.93 it/sec) -training >> step=2464200, episode=411 reward=0.7532546 (576.30 it/sec) -training >> step=2464300, episode=411 reward=0.7699617 (577.93 it/sec) -training >> step=2464400, episode=411 reward=0.7560385 (567.28 it/sec) -training >> step=2464500, episode=411 reward=0.7722235 (614.93 it/sec) -training >> step=2464600, episode=411 reward=0.7685267 (566.60 it/sec) -training >> step=2464700, episode=411 reward=0.7576554 (578.71 it/sec) -training >> step=2464800, episode=411 reward=0.7606348 (581.07 it/sec) -training >> step=2464900, episode=411 reward=0.7585821 (611.55 it/sec) -training >> step=2465000, episode=411 reward=0.7768199 (560.30 it/sec) -training >> step=2465100, episode=411 reward=0.7218595 (569.67 it/sec) -training >> step=2465200, episode=411 reward=0.7731591 (560.66 it/sec) -training >> step=2465300, episode=412 reward=0.7468261 (127.88 it/sec) -training >> step=2465400, episode=412 reward=0.7490832 (555.61 it/sec) -training >> step=2465500, episode=412 reward=0.7520103 (552.41 it/sec) -training >> step=2465600, episode=412 reward=0.7493079 (559.63 it/sec) -training >> step=2465700, episode=412 reward=0.7680809 (587.91 it/sec) -training >> step=2465800, episode=412 reward=0.7562911 (539.29 it/sec) -training >> step=2465900, episode=412 reward=0.7558201 (556.21 it/sec) -training >> step=2466000, episode=412 reward=0.7651559 (546.21 it/sec) -training >> step=2466100, episode=412 reward=0.7719494 (598.50 it/sec) -training >> step=2466200, episode=412 reward=0.7545413 (598.17 it/sec) -training >> step=2466300, episode=412 reward=0.769039 (593.05 it/sec) -training >> step=2466400, episode=412 reward=0.7562068 (562.01 it/sec) -training >> step=2466500, episode=412 reward=0.78067 (598.85 it/sec) -training >> step=2466600, episode=412 reward=0.7737243 (566.67 it/sec) -training >> step=2466700, episode=412 reward=0.7703319 (594.65 it/sec) -training >> step=2466800, episode=412 reward=0.7537833 (580.82 it/sec) -training >> step=2466900, episode=412 reward=0.7847557 (621.63 it/sec) -training >> step=2467000, episode=412 reward=0.7617081 (551.40 it/sec) -training >> step=2467100, episode=412 reward=0.7818273 (580.07 it/sec) -training >> step=2467200, episode=412 reward=0.7866542 (561.81 it/sec) -training >> step=2467300, episode=412 reward=0.784529 (599.03 it/sec) -training >> step=2467400, episode=412 reward=0.7584242 (563.55 it/sec) -training >> step=2467500, episode=412 reward=0.7805644 (578.53 it/sec) -training >> step=2467600, episode=412 reward=0.7593244 (574.04 it/sec) -training >> step=2467700, episode=412 reward=0.7661324 (583.17 it/sec) -training >> step=2467800, episode=412 reward=0.7558874 (586.82 it/sec) -training >> step=2467900, episode=412 reward=0.7502063 (573.29 it/sec) -training >> step=2468000, episode=412 reward=0.7706907 (585.43 it/sec) -training >> step=2468100, episode=412 reward=0.7537534 (597.63 it/sec) -training >> step=2468200, episode=412 reward=0.7749619 (533.28 it/sec) -training >> step=2468300, episode=412 reward=0.7734709 (552.94 it/sec) -training >> step=2468400, episode=412 reward=0.7661283 (574.72 it/sec) -training >> step=2468500, episode=412 reward=0.7882557 (567.52 it/sec) -training >> step=2468600, episode=412 reward=0.7671891 (581.10 it/sec) -training >> step=2468700, episode=412 reward=0.7822262 (598.00 it/sec) -training >> step=2468800, episode=412 reward=0.7578 (578.66 it/sec) -training >> step=2468900, episode=412 reward=0.7884533 (583.62 it/sec) -training >> step=2469000, episode=412 reward=0.7642805 (578.44 it/sec) -training >> step=2469100, episode=412 reward=0.7677495 (613.96 it/sec) -training >> step=2469200, episode=412 reward=0.7832921 (592.66 it/sec) -training >> step=2469300, episode=412 reward=0.7502974 (568.10 it/sec) -training >> step=2469400, episode=412 reward=0.7584225 (575.85 it/sec) -training >> step=2469500, episode=412 reward=0.7824915 (576.29 it/sec) -training >> step=2469600, episode=412 reward=0.758014 (523.93 it/sec) -training >> step=2469700, episode=412 reward=0.7709917 (563.10 it/sec) -training >> step=2469800, episode=412 reward=0.7522956 (558.94 it/sec) -training >> step=2469900, episode=412 reward=0.7800384 (582.75 it/sec) -training >> step=2470000, episode=412 reward=0.767528 (533.23 it/sec) -training >> step=2470100, episode=412 reward=0.7577508 (538.71 it/sec) -training >> step=2470200, episode=412 reward=0.7530828 (558.28 it/sec) -training >> step=2470300, episode=412 reward=0.7655395 (570.52 it/sec) -training >> step=2470400, episode=412 reward=0.7762367 (552.66 it/sec) -training >> step=2470500, episode=412 reward=0.7688349 (505.27 it/sec) -training >> step=2470600, episode=412 reward=0.7762595 (563.39 it/sec) -training >> step=2470700, episode=412 reward=0.7934819 (459.71 it/sec) -training >> step=2470800, episode=412 reward=0.7592312 (510.38 it/sec) -training >> step=2470900, episode=412 reward=0.7802477 (526.32 it/sec) -training >> step=2471000, episode=412 reward=0.7555949 (533.54 it/sec) -training >> step=2471100, episode=412 reward=0.7878795 (520.01 it/sec) -training >> step=2471200, episode=412 reward=0.7615808 (534.11 it/sec) -training >> step=2471300, episode=413 reward=0.7462173 (111.41 it/sec) -training >> step=2471400, episode=413 reward=0.7448636 (541.34 it/sec) -training >> step=2471500, episode=413 reward=0.7511565 (568.74 it/sec) -training >> step=2471600, episode=413 reward=0.7361341 (545.62 it/sec) -training >> step=2471700, episode=413 reward=0.7856058 (572.15 it/sec) -training >> step=2471800, episode=413 reward=0.7952542 (577.46 it/sec) -training >> step=2471900, episode=413 reward=0.7526793 (565.31 it/sec) -training >> step=2472000, episode=413 reward=0.7699022 (595.48 it/sec) -training >> step=2472100, episode=413 reward=0.7603299 (552.88 it/sec) -training >> step=2472200, episode=413 reward=0.7607653 (591.19 it/sec) -training >> step=2472300, episode=413 reward=0.7588098 (579.61 it/sec) -training >> step=2472400, episode=413 reward=0.7523267 (556.97 it/sec) -training >> step=2472500, episode=413 reward=0.7660273 (492.84 it/sec) -training >> step=2472600, episode=413 reward=0.7784649 (525.63 it/sec) -training >> step=2472700, episode=413 reward=0.7673024 (552.88 it/sec) -training >> step=2472800, episode=413 reward=0.762695 (559.68 it/sec) -training >> step=2472900, episode=413 reward=0.7518033 (531.00 it/sec) -training >> step=2473000, episode=413 reward=0.7843148 (539.86 it/sec) -training >> step=2473100, episode=413 reward=0.7823793 (568.91 it/sec) -training >> step=2473200, episode=413 reward=0.7651193 (526.00 it/sec) -training >> step=2473300, episode=413 reward=0.764783 (541.03 it/sec) -training >> step=2473400, episode=413 reward=0.7888389 (565.76 it/sec) -training >> step=2473500, episode=413 reward=0.7416385 (547.24 it/sec) -training >> step=2473600, episode=413 reward=0.7769269 (578.69 it/sec) -training >> step=2473700, episode=413 reward=0.7695465 (535.09 it/sec) -training >> step=2473800, episode=413 reward=0.7504941 (545.51 it/sec) -training >> step=2473900, episode=413 reward=0.7422006 (569.29 it/sec) -training >> step=2474000, episode=413 reward=0.7680196 (554.12 it/sec) -training >> step=2474100, episode=413 reward=0.7720596 (535.30 it/sec) -training >> step=2474200, episode=413 reward=0.7676663 (569.46 it/sec) -training >> step=2474300, episode=413 reward=0.7599864 (546.95 it/sec) -training >> step=2474400, episode=413 reward=0.7842585 (522.11 it/sec) -training >> step=2474500, episode=413 reward=0.7634892 (532.99 it/sec) -training >> step=2474600, episode=413 reward=0.7706155 (546.48 it/sec) -training >> step=2474700, episode=413 reward=0.7837202 (561.01 it/sec) -training >> step=2474800, episode=413 reward=0.7639984 (534.90 it/sec) -training >> step=2474900, episode=413 reward=0.7653819 (542.18 it/sec) -training >> step=2475000, episode=413 reward=0.7564983 (581.60 it/sec) -training >> step=2475100, episode=413 reward=0.7919635 (524.71 it/sec) -training >> step=2475200, episode=413 reward=0.7625504 (517.05 it/sec) -training >> step=2475300, episode=413 reward=0.7572709 (563.54 it/sec) -training >> step=2475400, episode=413 reward=0.7778359 (540.24 it/sec) -training >> step=2475500, episode=413 reward=0.7654867 (535.89 it/sec) -training >> step=2475600, episode=413 reward=0.7579359 (560.06 it/sec) -training >> step=2475700, episode=413 reward=0.7745022 (561.82 it/sec) -training >> step=2475800, episode=413 reward=0.7511595 (551.47 it/sec) -training >> step=2475900, episode=413 reward=0.7683397 (541.87 it/sec) -training >> step=2476000, episode=413 reward=0.7827618 (504.15 it/sec) -training >> step=2476100, episode=413 reward=0.7537724 (559.00 it/sec) -training >> step=2476200, episode=413 reward=0.7608792 (552.72 it/sec) -training >> step=2476300, episode=413 reward=0.7724277 (497.37 it/sec) -training >> step=2476400, episode=413 reward=0.7826926 (583.28 it/sec) -training >> step=2476500, episode=413 reward=0.7601165 (566.94 it/sec) -training >> step=2476600, episode=413 reward=0.7656206 (496.67 it/sec) -training >> step=2476700, episode=413 reward=0.7780622 (505.86 it/sec) -training >> step=2476800, episode=413 reward=0.7587255 (492.27 it/sec) -training >> step=2476900, episode=413 reward=0.7666573 (577.39 it/sec) -training >> step=2477000, episode=413 reward=0.7579323 (561.97 it/sec) -training >> step=2477100, episode=413 reward=0.7655502 (538.09 it/sec) -training >> step=2477200, episode=413 reward=0.7646256 (550.80 it/sec) -training >> step=2477300, episode=414 reward=0.7448218 (132.27 it/sec) -training >> step=2477400, episode=414 reward=0.7645898 (409.64 it/sec) -training >> step=2477500, episode=414 reward=0.7425337 (550.30 it/sec) -training >> step=2477600, episode=414 reward=0.7679442 (534.44 it/sec) -training >> step=2477700, episode=414 reward=0.7397116 (561.40 it/sec) -training >> step=2477800, episode=414 reward=0.7560085 (574.76 it/sec) -training >> step=2477900, episode=414 reward=0.7469102 (545.20 it/sec) -training >> step=2478000, episode=414 reward=0.7581131 (560.06 it/sec) -training >> step=2478100, episode=414 reward=0.7803579 (571.32 it/sec) -training >> step=2478200, episode=414 reward=0.7728413 (589.35 it/sec) -training >> step=2478300, episode=414 reward=0.7449663 (575.07 it/sec) -training >> step=2478400, episode=414 reward=0.7529618 (570.93 it/sec) -training >> step=2478500, episode=414 reward=0.7772297 (574.85 it/sec) -training >> step=2478600, episode=414 reward=0.7722789 (583.09 it/sec) -training >> step=2478700, episode=414 reward=0.7635536 (565.65 it/sec) -training >> step=2478800, episode=414 reward=0.7756571 (571.78 it/sec) -training >> step=2478900, episode=414 reward=0.7338701 (619.50 it/sec) -training >> step=2479000, episode=414 reward=0.7487607 (571.35 it/sec) -training >> step=2479100, episode=414 reward=0.7521251 (570.51 it/sec) -training >> step=2479200, episode=414 reward=0.7829892 (588.63 it/sec) -training >> step=2479300, episode=414 reward=0.7575496 (582.86 it/sec) -training >> step=2479400, episode=414 reward=0.7701693 (554.96 it/sec) -training >> step=2479500, episode=414 reward=0.7696909 (567.90 it/sec) -training >> step=2479600, episode=414 reward=0.7629374 (524.73 it/sec) -training >> step=2479700, episode=414 reward=0.7491568 (573.90 it/sec) -training >> step=2479800, episode=414 reward=0.7614457 (526.78 it/sec) -training >> step=2479900, episode=414 reward=0.7792262 (554.37 it/sec) -training >> step=2480000, episode=414 reward=0.7751394 (565.16 it/sec) -training >> step=2480100, episode=414 reward=0.7828354 (576.85 it/sec) -training >> step=2480200, episode=414 reward=0.7799742 (535.94 it/sec) -training >> step=2480300, episode=414 reward=0.7350045 (547.29 it/sec) -training >> step=2480400, episode=414 reward=0.7515389 (561.38 it/sec) -training >> step=2480500, episode=414 reward=0.7766553 (543.85 it/sec) -training >> step=2480600, episode=414 reward=0.7825549 (571.10 it/sec) -training >> step=2480700, episode=414 reward=0.7633021 (533.49 it/sec) -training >> step=2480800, episode=414 reward=0.7746758 (527.77 it/sec) -training >> step=2480900, episode=414 reward=0.7673877 (590.27 it/sec) -training >> step=2481000, episode=414 reward=0.7571577 (562.62 it/sec) -training >> step=2481100, episode=414 reward=0.7589896 (512.81 it/sec) -training >> step=2481200, episode=414 reward=0.7786662 (583.84 it/sec) -training >> step=2481300, episode=414 reward=0.7409888 (532.54 it/sec) -training >> step=2481400, episode=414 reward=0.7685943 (543.60 it/sec) -training >> step=2481500, episode=414 reward=0.7570917 (547.15 it/sec) -training >> step=2481600, episode=414 reward=0.7651513 (576.49 it/sec) -training >> step=2481700, episode=414 reward=0.7671586 (552.44 it/sec) -training >> step=2481800, episode=414 reward=0.7659983 (528.90 it/sec) -training >> step=2481900, episode=414 reward=0.760008 (480.14 it/sec) -training >> step=2482000, episode=414 reward=0.7689199 (539.59 it/sec) -training >> step=2482100, episode=414 reward=0.768249 (578.56 it/sec) -training >> step=2482200, episode=414 reward=0.7774154 (555.66 it/sec) -training >> step=2482300, episode=414 reward=0.7541846 (585.44 it/sec) -training >> step=2482400, episode=414 reward=0.753786 (526.44 it/sec) -training >> step=2482500, episode=414 reward=0.7587763 (540.22 it/sec) -training >> step=2482600, episode=414 reward=0.7639861 (536.98 it/sec) -training >> step=2482700, episode=414 reward=0.7661099 (580.13 it/sec) -training >> step=2482800, episode=414 reward=0.7671321 (570.88 it/sec) -training >> step=2482900, episode=414 reward=0.7463661 (539.66 it/sec) -training >> step=2483000, episode=414 reward=0.7574559 (542.08 it/sec) -training >> step=2483100, episode=414 reward=0.7303306 (505.30 it/sec) -training >> step=2483200, episode=414 reward=0.766504 (520.45 it/sec) -training >> step=2483300, episode=415 reward=0.774753 (137.93 it/sec) -training >> step=2483400, episode=415 reward=0.7767789 (488.77 it/sec) -training >> step=2483500, episode=415 reward=0.759868 (503.96 it/sec) -training >> step=2483600, episode=415 reward=0.7376639 (449.35 it/sec) -training >> step=2483700, episode=415 reward=0.7386456 (527.72 it/sec) -training >> step=2483800, episode=415 reward=0.7647135 (535.31 it/sec) -training >> step=2483900, episode=415 reward=0.7650279 (511.15 it/sec) -training >> step=2484000, episode=415 reward=0.7670397 (586.48 it/sec) -training >> step=2484100, episode=415 reward=0.7680572 (555.87 it/sec) -training >> step=2484200, episode=415 reward=0.7507209 (548.35 it/sec) -training >> step=2484300, episode=415 reward=0.7806427 (526.37 it/sec) -training >> step=2484400, episode=415 reward=0.7653809 (535.57 it/sec) -training >> step=2484500, episode=415 reward=0.7584482 (557.76 it/sec) -training >> step=2484600, episode=415 reward=0.7700147 (512.80 it/sec) -training >> step=2484700, episode=415 reward=0.7829006 (516.07 it/sec) -training >> step=2484800, episode=415 reward=0.750383 (525.22 it/sec) -training >> step=2484900, episode=415 reward=0.7700552 (522.09 it/sec) -training >> step=2485000, episode=415 reward=0.7724941 (515.39 it/sec) -training >> step=2485100, episode=415 reward=0.7676958 (551.18 it/sec) -training >> step=2485200, episode=415 reward=0.7549908 (537.66 it/sec) -training >> step=2485300, episode=415 reward=0.7421964 (537.95 it/sec) -training >> step=2485400, episode=415 reward=0.7798116 (514.84 it/sec) -training >> step=2485500, episode=415 reward=0.7677894 (485.15 it/sec) -training >> step=2485600, episode=415 reward=0.7621627 (565.72 it/sec) -training >> step=2485700, episode=415 reward=0.773453 (524.52 it/sec) -training >> step=2485800, episode=415 reward=0.7744337 (538.79 it/sec) -training >> step=2485900, episode=415 reward=0.760502 (515.95 it/sec) -training >> step=2486000, episode=415 reward=0.7743971 (531.08 it/sec) -training >> step=2486100, episode=415 reward=0.7682156 (534.37 it/sec) -training >> step=2486200, episode=415 reward=0.7673042 (522.30 it/sec) -training >> step=2486300, episode=415 reward=0.7301031 (539.85 it/sec) -training >> step=2486400, episode=415 reward=0.758094 (511.77 it/sec) -training >> step=2486500, episode=415 reward=0.7725497 (499.88 it/sec) -training >> step=2486600, episode=415 reward=0.7703394 (515.18 it/sec) -training >> step=2486700, episode=415 reward=0.762769 (571.83 it/sec) -training >> step=2486800, episode=415 reward=0.7607028 (532.62 it/sec) -training >> step=2486900, episode=415 reward=0.7745758 (496.71 it/sec) -training >> step=2487000, episode=415 reward=0.7835763 (479.10 it/sec) -training >> step=2487100, episode=415 reward=0.7561374 (484.17 it/sec) -training >> step=2487200, episode=415 reward=0.7651095 (545.94 it/sec) -training >> step=2487300, episode=415 reward=0.7781217 (536.66 it/sec) -training >> step=2487400, episode=415 reward=0.7611768 (545.34 it/sec) -training >> step=2487500, episode=415 reward=0.7742679 (523.32 it/sec) -training >> step=2487600, episode=415 reward=0.7645262 (487.04 it/sec) -training >> step=2487700, episode=415 reward=0.7663423 (547.05 it/sec) -training >> step=2487800, episode=415 reward=0.7671404 (556.26 it/sec) -training >> step=2487900, episode=415 reward=0.7800028 (545.38 it/sec) -training >> step=2488000, episode=415 reward=0.7551134 (519.15 it/sec) -training >> step=2488100, episode=415 reward=0.764964 (513.01 it/sec) -training >> step=2488200, episode=415 reward=0.7710652 (531.54 it/sec) -training >> step=2488300, episode=415 reward=0.7649674 (525.99 it/sec) -training >> step=2488400, episode=415 reward=0.7333333 (528.69 it/sec) -training >> step=2488500, episode=415 reward=0.7553608 (540.87 it/sec) -training >> step=2488600, episode=415 reward=0.7711847 (555.75 it/sec) -training >> step=2488700, episode=415 reward=0.7590715 (504.69 it/sec) -training >> step=2488800, episode=415 reward=0.7606844 (538.10 it/sec) -training >> step=2488900, episode=415 reward=0.7721177 (560.14 it/sec) -training >> step=2489000, episode=415 reward=0.7488906 (555.65 it/sec) -training >> step=2489100, episode=415 reward=0.7659551 (517.38 it/sec) -training >> step=2489200, episode=415 reward=0.7672195 (493.69 it/sec) -training >> step=2489300, episode=416 reward=0.7504678 (182.87 it/sec) -training >> step=2489400, episode=416 reward=0.7562912 (548.12 it/sec) -training >> step=2489500, episode=416 reward=0.7601187 (504.09 it/sec) -training >> step=2489600, episode=416 reward=0.7629802 (523.64 it/sec) -training >> step=2489700, episode=416 reward=0.7484549 (541.48 it/sec) -training >> step=2489800, episode=416 reward=0.7620351 (535.73 it/sec) -training >> step=2489900, episode=416 reward=0.7697181 (434.08 it/sec) -training >> step=2490000, episode=416 reward=0.7464075 (546.32 it/sec) -training >> step=2490100, episode=416 reward=0.7518058 (512.86 it/sec) -training >> step=2490200, episode=416 reward=0.76962 (561.20 it/sec) -training >> step=2490300, episode=416 reward=0.7552733 (578.35 it/sec) -training >> step=2490400, episode=416 reward=0.7750552 (554.13 it/sec) -training >> step=2490500, episode=416 reward=0.7770029 (538.66 it/sec) -training >> step=2490600, episode=416 reward=0.7649921 (535.44 it/sec) -training >> step=2490700, episode=416 reward=0.7577673 (518.04 it/sec) -training >> step=2490800, episode=416 reward=0.757866 (540.02 it/sec) -training >> step=2490900, episode=416 reward=0.7657064 (536.59 it/sec) -training >> step=2491000, episode=416 reward=0.7755282 (547.72 it/sec) -training >> step=2491100, episode=416 reward=0.7603881 (514.37 it/sec) -training >> step=2491200, episode=416 reward=0.7608433 (528.74 it/sec) -training >> step=2491300, episode=416 reward=0.7657427 (539.35 it/sec) -training >> step=2491400, episode=416 reward=0.7493657 (539.17 it/sec) -training >> step=2491500, episode=416 reward=0.7394334 (535.92 it/sec) -training >> step=2491600, episode=416 reward=0.7744504 (536.97 it/sec) -training >> step=2491700, episode=416 reward=0.755484 (488.07 it/sec) -training >> step=2491800, episode=416 reward=0.7535257 (552.47 it/sec) -training >> step=2491900, episode=416 reward=0.754672 (529.30 it/sec) -training >> step=2492000, episode=416 reward=0.7631949 (485.31 it/sec) -training >> step=2492100, episode=416 reward=0.7724123 (502.51 it/sec) -training >> step=2492200, episode=416 reward=0.7585612 (465.96 it/sec) -training >> step=2492300, episode=416 reward=0.7588074 (513.47 it/sec) -training >> step=2492400, episode=416 reward=0.7833424 (537.72 it/sec) -training >> step=2492500, episode=416 reward=0.7809821 (534.55 it/sec) -training >> step=2492600, episode=416 reward=0.7534398 (529.46 it/sec) -training >> step=2492700, episode=416 reward=0.7738932 (485.77 it/sec) -training >> step=2492800, episode=416 reward=0.7945995 (530.65 it/sec) -training >> step=2492900, episode=416 reward=0.7842095 (546.83 it/sec) -training >> step=2493000, episode=416 reward=0.7754103 (544.43 it/sec) -training >> step=2493100, episode=416 reward=0.7702019 (504.48 it/sec) -training >> step=2493200, episode=416 reward=0.7721086 (486.12 it/sec) -training >> step=2493300, episode=416 reward=0.760489 (523.57 it/sec) -training >> step=2493400, episode=416 reward=0.7614987 (542.39 it/sec) -training >> step=2493500, episode=416 reward=0.7673703 (545.08 it/sec) -training >> step=2493600, episode=416 reward=0.7658442 (564.00 it/sec) -training >> step=2493700, episode=416 reward=0.7645566 (552.74 it/sec) -training >> step=2493800, episode=416 reward=0.7711402 (537.60 it/sec) -training >> step=2493900, episode=416 reward=0.7506554 (553.76 it/sec) -training >> step=2494000, episode=416 reward=0.7555339 (537.41 it/sec) -training >> step=2494100, episode=416 reward=0.7736133 (567.30 it/sec) -training >> step=2494200, episode=416 reward=0.746423 (553.44 it/sec) -training >> step=2494300, episode=416 reward=0.7754167 (538.67 it/sec) -training >> step=2494400, episode=416 reward=0.7633917 (555.50 it/sec) -training >> step=2494500, episode=416 reward=0.7732907 (557.39 it/sec) -training >> step=2494600, episode=416 reward=0.7740039 (563.91 it/sec) -training >> step=2494700, episode=416 reward=0.7513359 (560.90 it/sec) -training >> step=2494800, episode=416 reward=0.7859128 (560.55 it/sec) -training >> step=2494900, episode=416 reward=0.7726929 (552.49 it/sec) -training >> step=2495000, episode=416 reward=0.7674171 (562.32 it/sec) -training >> step=2495100, episode=416 reward=0.7609256 (551.43 it/sec) -training >> step=2495200, episode=416 reward=0.7683472 (566.12 it/sec) -training >> step=2495300, episode=417 reward=0.7337644 (87.14 it/sec) -training >> step=2495400, episode=417 reward=0.7626283 (527.89 it/sec) -training >> step=2495500, episode=417 reward=0.7598342 (536.68 it/sec) -training >> step=2495600, episode=417 reward=0.7618176 (539.08 it/sec) -training >> step=2495700, episode=417 reward=0.7660278 (536.16 it/sec) -training >> step=2495800, episode=417 reward=0.7485824 (537.63 it/sec) -training >> step=2495900, episode=417 reward=0.7638741 (517.49 it/sec) -training >> step=2496000, episode=417 reward=0.772719 (554.21 it/sec) -training >> step=2496100, episode=417 reward=0.7721421 (413.74 it/sec) -training >> step=2496200, episode=417 reward=0.7550453 (547.45 it/sec) -training >> step=2496300, episode=417 reward=0.7914785 (552.00 it/sec) -training >> step=2496400, episode=417 reward=0.7716764 (519.87 it/sec) -training >> step=2496500, episode=417 reward=0.7719964 (531.67 it/sec) -training >> step=2496600, episode=417 reward=0.7649944 (548.77 it/sec) -training >> step=2496700, episode=417 reward=0.7536243 (547.44 it/sec) -training >> step=2496800, episode=417 reward=0.7767188 (580.46 it/sec) -training >> step=2496900, episode=417 reward=0.7858528 (493.29 it/sec) -training >> step=2497000, episode=417 reward=0.7575195 (549.11 it/sec) -training >> step=2497100, episode=417 reward=0.7709481 (555.44 it/sec) -training >> step=2497200, episode=417 reward=0.7819988 (534.05 it/sec) -training >> step=2497300, episode=417 reward=0.7759831 (549.05 it/sec) -training >> step=2497400, episode=417 reward=0.7757208 (555.76 it/sec) -training >> step=2497500, episode=417 reward=0.7675524 (534.28 it/sec) -training >> step=2497600, episode=417 reward=0.7649014 (559.37 it/sec) -training >> step=2497700, episode=417 reward=0.7753041 (543.81 it/sec) -training >> step=2497800, episode=417 reward=0.7506796 (543.43 it/sec) -training >> step=2497900, episode=417 reward=0.7741652 (562.51 it/sec) -training >> step=2498000, episode=417 reward=0.7929441 (521.67 it/sec) -training >> step=2498100, episode=417 reward=0.7728598 (530.20 it/sec) -training >> step=2498200, episode=417 reward=0.761263 (549.93 it/sec) -training >> step=2498300, episode=417 reward=0.7940634 (537.56 it/sec) -training >> step=2498400, episode=417 reward=0.7546458 (523.94 it/sec) -training >> step=2498500, episode=417 reward=0.7450721 (562.17 it/sec) -training >> step=2498600, episode=417 reward=0.7578348 (568.02 it/sec) -training >> step=2498700, episode=417 reward=0.7839432 (548.79 it/sec) -training >> step=2498800, episode=417 reward=0.7811529 (556.55 it/sec) -training >> step=2498900, episode=417 reward=0.7522289 (507.72 it/sec) -training >> step=2499000, episode=417 reward=0.7708007 (560.55 it/sec) -training >> step=2499100, episode=417 reward=0.7662694 (533.61 it/sec) -training >> step=2499200, episode=417 reward=0.762257 (568.93 it/sec) -training >> step=2499300, episode=417 reward=0.7592628 (553.21 it/sec) -training >> step=2499400, episode=417 reward=0.7542682 (529.09 it/sec) -training >> step=2499500, episode=417 reward=0.7711928 (566.54 it/sec) -training >> step=2499600, episode=417 reward=0.7691443 (537.44 it/sec) -training >> step=2499700, episode=417 reward=0.7604423 (564.98 it/sec) -training >> step=2499800, episode=417 reward=0.7872756 (554.12 it/sec) -training >> step=2499900, episode=417 reward=0.7542253 (554.05 it/sec) -training >> step=2500000, episode=417 reward=0.7606378 (513.81 it/sec) -training >> step=2500100, episode=417 reward=0.7671773 (560.51 it/sec) -training >> step=2500200, episode=417 reward=0.765912 (506.85 it/sec) -training >> step=2500300, episode=417 reward=0.7678703 (543.03 it/sec) -training >> step=2500400, episode=417 reward=0.7701132 (555.86 it/sec) -training >> step=2500500, episode=417 reward=0.7453021 (590.41 it/sec) -training >> step=2500600, episode=417 reward=0.7794021 (527.35 it/sec) -training >> step=2500700, episode=417 reward=0.7470157 (551.27 it/sec) -training >> step=2500800, episode=417 reward=0.75083 (546.56 it/sec) -training >> step=2500900, episode=417 reward=0.7536156 (520.13 it/sec) -training >> step=2501000, episode=417 reward=0.7719237 (541.62 it/sec) -training >> step=2501100, episode=417 reward=0.7570443 (499.10 it/sec) -training >> step=2501200, episode=417 reward=0.7612081 (583.21 it/sec) -training >> step=2501300, episode=418 reward=0.7666522 (150.27 it/sec) -training >> step=2501400, episode=418 reward=0.7538154 (512.53 it/sec) -training >> step=2501500, episode=418 reward=0.7323593 (540.32 it/sec) -training >> step=2501600, episode=418 reward=0.7699009 (521.59 it/sec) -training >> step=2501700, episode=418 reward=0.7575491 (501.94 it/sec) -training >> step=2501800, episode=418 reward=0.7869065 (528.15 it/sec) -training >> step=2501900, episode=418 reward=0.7962542 (541.99 it/sec) -training >> step=2502000, episode=418 reward=0.7733479 (503.94 it/sec) -training >> step=2502100, episode=418 reward=0.7765965 (506.60 it/sec) -training >> step=2502200, episode=418 reward=0.7550073 (408.65 it/sec) -training >> step=2502300, episode=418 reward=0.7464174 (556.05 it/sec) -training >> step=2502400, episode=418 reward=0.7943343 (543.21 it/sec) -training >> step=2502500, episode=418 reward=0.7760205 (532.68 it/sec) -training >> step=2502600, episode=418 reward=0.753651 (550.09 it/sec) -training >> step=2502700, episode=418 reward=0.7715695 (573.36 it/sec) -training >> step=2502800, episode=418 reward=0.7809787 (564.68 it/sec) -training >> step=2502900, episode=418 reward=0.767639 (585.05 it/sec) -training >> step=2503000, episode=418 reward=0.7711728 (532.83 it/sec) -training >> step=2503100, episode=418 reward=0.776042 (569.52 it/sec) -training >> step=2503200, episode=418 reward=0.7698151 (574.76 it/sec) -training >> step=2503300, episode=418 reward=0.7461953 (562.97 it/sec) -training >> step=2503400, episode=418 reward=0.7723426 (559.32 it/sec) -training >> step=2503500, episode=418 reward=0.763732 (595.76 it/sec) -training >> step=2503600, episode=418 reward=0.7659282 (525.46 it/sec) -training >> step=2503700, episode=418 reward=0.7731242 (552.75 it/sec) -training >> step=2503800, episode=418 reward=0.7571178 (556.45 it/sec) -training >> step=2503900, episode=418 reward=0.7659273 (564.53 it/sec) -training >> step=2504000, episode=418 reward=0.7753887 (561.94 it/sec) -training >> step=2504100, episode=418 reward=0.7626768 (567.59 it/sec) -training >> step=2504200, episode=418 reward=0.7734476 (532.00 it/sec) -training >> step=2504300, episode=418 reward=0.7741756 (562.16 it/sec) -training >> step=2504400, episode=418 reward=0.7874624 (552.41 it/sec) -training >> step=2504500, episode=418 reward=0.761084 (535.24 it/sec) -training >> step=2504600, episode=418 reward=0.7361553 (593.87 it/sec) -training >> step=2504700, episode=418 reward=0.7714685 (573.72 it/sec) -training >> step=2504800, episode=418 reward=0.7834623 (534.72 it/sec) -training >> step=2504900, episode=418 reward=0.7808083 (554.78 it/sec) -training >> step=2505000, episode=418 reward=0.7652817 (578.02 it/sec) -training >> step=2505100, episode=418 reward=0.7672205 (558.51 it/sec) -training >> step=2505200, episode=418 reward=0.7745715 (521.33 it/sec) -training >> step=2505300, episode=418 reward=0.7516601 (525.11 it/sec) -training >> step=2505400, episode=418 reward=0.7517461 (549.64 it/sec) -training >> step=2505500, episode=418 reward=0.7658368 (532.82 it/sec) -training >> step=2505600, episode=418 reward=0.7757999 (542.83 it/sec) -training >> step=2505700, episode=418 reward=0.7565399 (550.42 it/sec) -training >> step=2505800, episode=418 reward=0.7613626 (550.20 it/sec) -training >> step=2505900, episode=418 reward=0.7540594 (526.72 it/sec) -training >> step=2506000, episode=418 reward=0.7839237 (539.30 it/sec) -training >> step=2506100, episode=418 reward=0.7756801 (536.15 it/sec) -training >> step=2506200, episode=418 reward=0.7743249 (550.12 it/sec) -training >> step=2506300, episode=418 reward=0.7845435 (546.48 it/sec) -training >> step=2506400, episode=418 reward=0.7392156 (549.40 it/sec) -training >> step=2506500, episode=418 reward=0.7748029 (542.10 it/sec) -training >> step=2506600, episode=418 reward=0.7560047 (562.15 it/sec) -training >> step=2506700, episode=418 reward=0.7855597 (520.87 it/sec) -training >> step=2506800, episode=418 reward=0.77106 (515.57 it/sec) -training >> step=2506900, episode=418 reward=0.7700438 (545.99 it/sec) -training >> step=2507000, episode=418 reward=0.7454032 (498.89 it/sec) -training >> step=2507100, episode=418 reward=0.7403485 (531.86 it/sec) -training >> step=2507200, episode=418 reward=0.7483789 (521.76 it/sec) -training >> step=2507300, episode=419 reward=0.7525781 (181.91 it/sec) -training >> step=2507400, episode=419 reward=0.772326 (513.91 it/sec) -training >> step=2507500, episode=419 reward=0.7663689 (508.86 it/sec) -training >> step=2507600, episode=419 reward=0.7682465 (542.16 it/sec) -training >> step=2507700, episode=419 reward=0.7616005 (530.51 it/sec) -training >> step=2507800, episode=419 reward=0.76755 (547.60 it/sec) -training >> step=2507900, episode=419 reward=0.7474499 (500.97 it/sec) -training >> step=2508000, episode=419 reward=0.7831825 (502.54 it/sec) -training >> step=2508100, episode=419 reward=0.7605049 (539.32 it/sec) -training >> step=2508200, episode=419 reward=0.7680783 (553.15 it/sec) -training >> step=2508300, episode=419 reward=0.7615238 (541.69 it/sec) -training >> step=2508400, episode=419 reward=0.7494766 (390.96 it/sec) -training >> step=2508500, episode=419 reward=0.7693183 (557.98 it/sec) -training >> step=2508600, episode=419 reward=0.7890686 (533.90 it/sec) -training >> step=2508700, episode=419 reward=0.7593926 (534.95 it/sec) -training >> step=2508800, episode=419 reward=0.7636421 (531.06 it/sec) -training >> step=2508900, episode=419 reward=0.7784326 (510.25 it/sec) -training >> step=2509000, episode=419 reward=0.755803 (497.52 it/sec) -training >> step=2509100, episode=419 reward=0.7588192 (528.11 it/sec) -training >> step=2509200, episode=419 reward=0.7492206 (521.18 it/sec) -training >> step=2509300, episode=419 reward=0.7415656 (576.79 it/sec) -training >> step=2509400, episode=419 reward=0.7655706 (550.37 it/sec) -training >> step=2509500, episode=419 reward=0.7758276 (505.89 it/sec) -training >> step=2509600, episode=419 reward=0.7407679 (518.05 it/sec) -training >> step=2509700, episode=419 reward=0.7481944 (538.84 it/sec) -training >> step=2509800, episode=419 reward=0.7674674 (528.56 it/sec) -training >> step=2509900, episode=419 reward=0.7617303 (541.21 it/sec) -training >> step=2510000, episode=419 reward=0.7659941 (495.80 it/sec) -training >> step=2510100, episode=419 reward=0.7405059 (533.72 it/sec) -training >> step=2510200, episode=419 reward=0.7392982 (509.12 it/sec) -training >> step=2510300, episode=419 reward=0.7571372 (516.11 it/sec) -training >> step=2510400, episode=419 reward=0.7605877 (534.06 it/sec) -training >> step=2510500, episode=419 reward=0.7581363 (520.45 it/sec) -training >> step=2510600, episode=419 reward=0.7654742 (515.89 it/sec) -training >> step=2510700, episode=419 reward=0.7795057 (515.00 it/sec) -training >> step=2510800, episode=419 reward=0.7777152 (520.65 it/sec) -training >> step=2510900, episode=419 reward=0.7652565 (560.26 it/sec) -training >> step=2511000, episode=419 reward=0.7769803 (507.16 it/sec) -training >> step=2511100, episode=419 reward=0.7643498 (487.98 it/sec) -training >> step=2511200, episode=419 reward=0.7620932 (518.25 it/sec) -training >> step=2511300, episode=419 reward=0.7708293 (551.29 it/sec) -training >> step=2511400, episode=419 reward=0.7529681 (531.90 it/sec) -training >> step=2511500, episode=419 reward=0.7714511 (538.54 it/sec) -training >> step=2511600, episode=419 reward=0.7616841 (552.76 it/sec) -training >> step=2511700, episode=419 reward=0.7497855 (506.58 it/sec) -training >> step=2511800, episode=419 reward=0.7527053 (511.96 it/sec) -training >> step=2511900, episode=419 reward=0.7488289 (519.12 it/sec) -training >> step=2512000, episode=419 reward=0.762046 (533.76 it/sec) -training >> step=2512100, episode=419 reward=0.7593527 (486.27 it/sec) -training >> step=2512200, episode=419 reward=0.7683644 (476.74 it/sec) -training >> step=2512300, episode=419 reward=0.7710493 (496.65 it/sec) -training >> step=2512400, episode=419 reward=0.7877175 (500.75 it/sec) -training >> step=2512500, episode=419 reward=0.7606748 (489.40 it/sec) -training >> step=2512600, episode=419 reward=0.7953855 (479.85 it/sec) -training >> step=2512700, episode=419 reward=0.7850758 (491.43 it/sec) -training >> step=2512800, episode=419 reward=0.7557017 (520.32 it/sec) -training >> step=2512900, episode=419 reward=0.7852368 (507.47 it/sec) -training >> step=2513000, episode=419 reward=0.7740197 (522.69 it/sec) -training >> step=2513100, episode=419 reward=0.7563731 (520.68 it/sec) -training >> step=2513200, episode=419 reward=0.7606313 (550.40 it/sec) -training >> step=2513300, episode=420 reward=0.7888893 (169.62 it/sec) -training >> step=2513400, episode=420 reward=0.76186 (447.85 it/sec) -training >> step=2513500, episode=420 reward=0.77174 (463.74 it/sec) -training >> step=2513600, episode=420 reward=0.7664846 (466.60 it/sec) -training >> step=2513700, episode=420 reward=0.7607341 (484.67 it/sec) -training >> step=2513800, episode=420 reward=0.7592108 (490.99 it/sec) -training >> step=2513900, episode=420 reward=0.7731961 (515.58 it/sec) -training >> step=2514000, episode=420 reward=0.7698387 (546.02 it/sec) -training >> step=2514100, episode=420 reward=0.7638901 (505.46 it/sec) -training >> step=2514200, episode=420 reward=0.7625465 (542.34 it/sec) -training >> step=2514300, episode=420 reward=0.7804379 (498.75 it/sec) -training >> step=2514400, episode=420 reward=0.7500528 (492.61 it/sec) -training >> step=2514500, episode=420 reward=0.7507188 (506.74 it/sec) -training >> step=2514600, episode=420 reward=0.7675154 (415.06 it/sec) -training >> step=2514700, episode=420 reward=0.7687093 (567.28 it/sec) -training >> step=2514800, episode=420 reward=0.7804583 (536.71 it/sec) -training >> step=2514900, episode=420 reward=0.7527689 (505.34 it/sec) -training >> step=2515000, episode=420 reward=0.771255 (510.31 it/sec) -training >> step=2515100, episode=420 reward=0.7708959 (520.24 it/sec) -training >> step=2515200, episode=420 reward=0.7649917 (577.53 it/sec) -training >> step=2515300, episode=420 reward=0.7707312 (493.01 it/sec) -training >> step=2515400, episode=420 reward=0.7850452 (503.21 it/sec) -training >> step=2515500, episode=420 reward=0.7827093 (577.75 it/sec) -training >> step=2515600, episode=420 reward=0.7684838 (520.33 it/sec) -training >> step=2515700, episode=420 reward=0.7674276 (530.26 it/sec) -training >> step=2515800, episode=420 reward=0.7828682 (502.56 it/sec) -training >> step=2515900, episode=420 reward=0.7438515 (564.64 it/sec) -training >> step=2516000, episode=420 reward=0.7598284 (504.54 it/sec) -training >> step=2516100, episode=420 reward=0.7459171 (540.59 it/sec) -training >> step=2516200, episode=420 reward=0.7458158 (521.67 it/sec) -training >> step=2516300, episode=420 reward=0.7524408 (569.44 it/sec) -training >> step=2516400, episode=420 reward=0.75366 (478.80 it/sec) -training >> step=2516500, episode=420 reward=0.769408 (485.95 it/sec) -training >> step=2516600, episode=420 reward=0.7576724 (523.07 it/sec) -training >> step=2516700, episode=420 reward=0.7634206 (535.99 it/sec) -training >> step=2516800, episode=420 reward=0.7789418 (515.62 it/sec) -training >> step=2516900, episode=420 reward=0.7841481 (529.08 it/sec) -training >> step=2517000, episode=420 reward=0.7607793 (517.50 it/sec) -training >> step=2517100, episode=420 reward=0.7548262 (582.24 it/sec) -training >> step=2517200, episode=420 reward=0.7801816 (527.58 it/sec) -training >> step=2517300, episode=420 reward=0.7653751 (521.88 it/sec) -training >> step=2517400, episode=420 reward=0.7714185 (545.21 it/sec) -training >> step=2517500, episode=420 reward=0.7607613 (551.91 it/sec) -training >> step=2517600, episode=420 reward=0.77237 (490.42 it/sec) -training >> step=2517700, episode=420 reward=0.7680789 (552.12 it/sec) -training >> step=2517800, episode=420 reward=0.7886 (526.26 it/sec) -training >> step=2517900, episode=420 reward=0.7429513 (532.18 it/sec) -training >> step=2518000, episode=420 reward=0.7776589 (533.71 it/sec) -training >> step=2518100, episode=420 reward=0.768347 (478.70 it/sec) -training >> step=2518200, episode=420 reward=0.7573708 (563.60 it/sec) -training >> step=2518300, episode=420 reward=0.7545 (511.24 it/sec) -training >> step=2518400, episode=420 reward=0.7710225 (504.52 it/sec) -training >> step=2518500, episode=420 reward=0.769994 (563.36 it/sec) -training >> step=2518600, episode=420 reward=0.7754675 (535.98 it/sec) -training >> step=2518700, episode=420 reward=0.7801521 (537.59 it/sec) -training >> step=2518800, episode=420 reward=0.7567109 (509.54 it/sec) -training >> step=2518900, episode=420 reward=0.7712214 (528.51 it/sec) -training >> step=2519000, episode=420 reward=0.7706016 (551.78 it/sec) -training >> step=2519100, episode=420 reward=0.7669225 (519.49 it/sec) -training >> step=2519200, episode=420 reward=0.7555405 (492.95 it/sec) -training >> step=2519300, episode=421 reward=0.7568597 (73.26 it/sec) -training >> step=2519400, episode=421 reward=0.7889059 (510.37 it/sec) -training >> step=2519500, episode=421 reward=0.7503534 (495.47 it/sec) -training >> step=2519600, episode=421 reward=0.7671718 (502.79 it/sec) -training >> step=2519700, episode=421 reward=0.7754434 (535.10 it/sec) -training >> step=2519800, episode=421 reward=0.7705972 (506.10 it/sec) -training >> step=2519900, episode=421 reward=0.7638329 (538.81 it/sec) -training >> step=2520000, episode=421 reward=0.7756873 (606.28 it/sec) -training >> step=2520100, episode=421 reward=0.7832621 (520.17 it/sec) -training >> step=2520200, episode=421 reward=0.7664272 (520.90 it/sec) -training >> step=2520300, episode=421 reward=0.777813 (510.41 it/sec) -training >> step=2520400, episode=421 reward=0.7719696 (554.83 it/sec) -training >> step=2520500, episode=421 reward=0.7774533 (510.02 it/sec) -training >> step=2520600, episode=421 reward=0.7455505 (521.88 it/sec) -training >> step=2520700, episode=421 reward=0.7843077 (393.87 it/sec) -training >> step=2520800, episode=421 reward=0.7558114 (537.25 it/sec) -training >> step=2520900, episode=421 reward=0.7590791 (488.30 it/sec) -training >> step=2521000, episode=421 reward=0.7739725 (521.05 it/sec) -training >> step=2521100, episode=421 reward=0.7611225 (519.71 it/sec) -training >> step=2521200, episode=421 reward=0.7887537 (571.14 it/sec) -training >> step=2521300, episode=421 reward=0.7625215 (542.03 it/sec) -training >> step=2521400, episode=421 reward=0.7620918 (509.11 it/sec) -training >> step=2521500, episode=421 reward=0.7686236 (557.33 it/sec) -training >> step=2521600, episode=421 reward=0.781027 (581.40 it/sec) -training >> step=2521700, episode=421 reward=0.7601707 (526.54 it/sec) -training >> step=2521800, episode=421 reward=0.7614189 (554.47 it/sec) -training >> step=2521900, episode=421 reward=0.7755346 (535.92 it/sec) -training >> step=2522000, episode=421 reward=0.7510387 (561.79 it/sec) -training >> step=2522100, episode=421 reward=0.7741283 (531.65 it/sec) -training >> step=2522200, episode=421 reward=0.758286 (531.67 it/sec) -training >> step=2522300, episode=421 reward=0.7752222 (590.35 it/sec) -training >> step=2522400, episode=421 reward=0.7795801 (411.14 it/sec) -training >> step=2522500, episode=421 reward=0.7564892 (502.12 it/sec) -training >> step=2522600, episode=421 reward=0.7462237 (554.11 it/sec) -training >> step=2522700, episode=421 reward=0.7564783 (570.62 it/sec) -training >> step=2522800, episode=421 reward=0.7522897 (567.97 it/sec) -training >> step=2522900, episode=421 reward=0.7730682 (575.06 it/sec) -training >> step=2523000, episode=421 reward=0.7601676 (566.44 it/sec) -training >> step=2523100, episode=421 reward=0.7371829 (541.58 it/sec) -training >> step=2523200, episode=421 reward=0.7637924 (568.60 it/sec) -training >> step=2523300, episode=421 reward=0.7716349 (544.26 it/sec) -training >> step=2523400, episode=421 reward=0.7705538 (586.86 it/sec) -training >> step=2523500, episode=421 reward=0.7640927 (596.07 it/sec) -training >> step=2523600, episode=421 reward=0.7821355 (573.92 it/sec) -training >> step=2523700, episode=421 reward=0.7734937 (527.89 it/sec) -training >> step=2523800, episode=421 reward=0.7864105 (598.78 it/sec) -training >> step=2523900, episode=421 reward=0.7853979 (520.61 it/sec) -training >> step=2524000, episode=421 reward=0.7495355 (566.21 it/sec) -training >> step=2524100, episode=421 reward=0.7463475 (553.91 it/sec) -training >> step=2524200, episode=421 reward=0.7636194 (560.40 it/sec) -training >> step=2524300, episode=421 reward=0.7649151 (522.55 it/sec) -training >> step=2524400, episode=421 reward=0.7680762 (575.51 it/sec) -training >> step=2524500, episode=421 reward=0.7768576 (571.86 it/sec) -training >> step=2524600, episode=421 reward=0.7833233 (537.47 it/sec) -training >> step=2524700, episode=421 reward=0.770517 (551.83 it/sec) -training >> step=2524800, episode=421 reward=0.7709008 (563.90 it/sec) -training >> step=2524900, episode=421 reward=0.7434003 (514.26 it/sec) -training >> step=2525000, episode=421 reward=0.7471645 (548.66 it/sec) -training >> step=2525100, episode=421 reward=0.7570898 (535.57 it/sec) -training >> step=2525200, episode=421 reward=0.790244 (569.90 it/sec) -training >> step=2525300, episode=422 reward=0.7853227 (150.39 it/sec) -training >> step=2525400, episode=422 reward=0.7772267 (551.01 it/sec) -training >> step=2525500, episode=422 reward=0.7653304 (555.35 it/sec) -training >> step=2525600, episode=422 reward=0.7552803 (520.14 it/sec) -training >> step=2525700, episode=422 reward=0.7519248 (506.74 it/sec) -training >> step=2525800, episode=422 reward=0.7570155 (554.08 it/sec) -training >> step=2525900, episode=422 reward=0.7267439 (544.07 it/sec) -training >> step=2526000, episode=422 reward=0.7540392 (553.22 it/sec) -training >> step=2526100, episode=422 reward=0.7550327 (561.31 it/sec) -training >> step=2526200, episode=422 reward=0.7787333 (533.51 it/sec) -training >> step=2526300, episode=422 reward=0.759389 (517.46 it/sec) -training >> step=2526400, episode=422 reward=0.7646673 (583.12 it/sec) -training >> step=2526500, episode=422 reward=0.7754818 (558.13 it/sec) -training >> step=2526600, episode=422 reward=0.7679172 (542.89 it/sec) -training >> step=2526700, episode=422 reward=0.7760686 (554.09 it/sec) -training >> step=2526800, episode=422 reward=0.7687527 (550.43 it/sec) -training >> step=2526900, episode=422 reward=0.7730618 (502.32 it/sec) -training >> step=2527000, episode=422 reward=0.7657586 (387.62 it/sec) -training >> step=2527100, episode=422 reward=0.781356 (521.73 it/sec) -training >> step=2527200, episode=422 reward=0.7838852 (566.20 it/sec) -training >> step=2527300, episode=422 reward=0.7538702 (540.76 it/sec) -training >> step=2527400, episode=422 reward=0.7586185 (506.39 it/sec) -training >> step=2527500, episode=422 reward=0.7646862 (541.97 it/sec) -training >> step=2527600, episode=422 reward=0.753889 (569.65 it/sec) -training >> step=2527700, episode=422 reward=0.7802699 (524.41 it/sec) -training >> step=2527800, episode=422 reward=0.7853142 (548.42 it/sec) -training >> step=2527900, episode=422 reward=0.7790061 (542.37 it/sec) -training >> step=2528000, episode=422 reward=0.7552245 (562.05 it/sec) -training >> step=2528100, episode=422 reward=0.7643006 (548.09 it/sec) -training >> step=2528200, episode=422 reward=0.7836624 (538.47 it/sec) -training >> step=2528300, episode=422 reward=0.7694928 (548.20 it/sec) -training >> step=2528400, episode=422 reward=0.7551552 (551.19 it/sec) -training >> step=2528500, episode=422 reward=0.754945 (505.25 it/sec) -training >> step=2528600, episode=422 reward=0.7776302 (555.72 it/sec) -training >> step=2528700, episode=422 reward=0.7745121 (551.04 it/sec) -training >> step=2528800, episode=422 reward=0.7852814 (564.26 it/sec) -training >> step=2528900, episode=422 reward=0.7706939 (515.49 it/sec) -training >> step=2529000, episode=422 reward=0.7706934 (516.12 it/sec) -training >> step=2529100, episode=422 reward=0.7620021 (572.42 it/sec) -training >> step=2529200, episode=422 reward=0.7394103 (564.03 it/sec) -training >> step=2529300, episode=422 reward=0.7773106 (575.86 it/sec) -training >> step=2529400, episode=422 reward=0.778859 (556.98 it/sec) -training >> step=2529500, episode=422 reward=0.7517555 (542.56 it/sec) -training >> step=2529600, episode=422 reward=0.7620109 (540.64 it/sec) -training >> step=2529700, episode=422 reward=0.7608681 (558.26 it/sec) -training >> step=2529800, episode=422 reward=0.7590693 (551.01 it/sec) -training >> step=2529900, episode=422 reward=0.7579917 (577.27 it/sec) -training >> step=2530000, episode=422 reward=0.7453507 (556.95 it/sec) -training >> step=2530100, episode=422 reward=0.7556872 (546.15 it/sec) -training >> step=2530200, episode=422 reward=0.7531257 (558.92 it/sec) -training >> step=2530300, episode=422 reward=0.7654969 (564.77 it/sec) -training >> step=2530400, episode=422 reward=0.7842771 (566.86 it/sec) -training >> step=2530500, episode=422 reward=0.7521995 (559.26 it/sec) -training >> step=2530600, episode=422 reward=0.7555138 (562.01 it/sec) -training >> step=2530700, episode=422 reward=0.7696965 (538.94 it/sec) -training >> step=2530800, episode=422 reward=0.7717609 (537.39 it/sec) -training >> step=2530900, episode=422 reward=0.7580914 (557.07 it/sec) -training >> step=2531000, episode=422 reward=0.7643174 (585.46 it/sec) -training >> step=2531100, episode=422 reward=0.7729785 (556.32 it/sec) -training >> step=2531200, episode=422 reward=0.7825503 (543.95 it/sec) -training >> step=2531300, episode=423 reward=0.7608234 (182.90 it/sec) -training >> step=2531400, episode=423 reward=0.7543444 (553.07 it/sec) -training >> step=2531500, episode=423 reward=0.7666486 (508.06 it/sec) -training >> step=2531600, episode=423 reward=0.7568713 (530.50 it/sec) -training >> step=2531700, episode=423 reward=0.7728058 (544.85 it/sec) -training >> step=2531800, episode=423 reward=0.7490861 (585.60 it/sec) -training >> step=2531900, episode=423 reward=0.7634238 (519.33 it/sec) -training >> step=2532000, episode=423 reward=0.7816695 (564.27 it/sec) -training >> step=2532100, episode=423 reward=0.7625183 (557.84 it/sec) -training >> step=2532200, episode=423 reward=0.7714244 (551.73 it/sec) -training >> step=2532300, episode=423 reward=0.7617499 (519.46 it/sec) -training >> step=2532400, episode=423 reward=0.7832879 (534.83 it/sec) -training >> step=2532500, episode=423 reward=0.7692416 (590.16 it/sec) -training >> step=2532600, episode=423 reward=0.7698191 (548.79 it/sec) -training >> step=2532700, episode=423 reward=0.770986 (539.08 it/sec) -training >> step=2532800, episode=423 reward=0.7645835 (525.11 it/sec) -training >> step=2532900, episode=423 reward=0.7734705 (561.28 it/sec) -training >> step=2533000, episode=423 reward=0.7766195 (525.24 it/sec) -training >> step=2533100, episode=423 reward=0.7786426 (555.29 it/sec) -training >> step=2533200, episode=423 reward=0.7615031 (382.37 it/sec) -training >> step=2533300, episode=423 reward=0.7603257 (577.90 it/sec) -training >> step=2533400, episode=423 reward=0.7745991 (520.26 it/sec) -training >> step=2533500, episode=423 reward=0.7781489 (533.54 it/sec) -training >> step=2533600, episode=423 reward=0.7659112 (572.13 it/sec) -training >> step=2533700, episode=423 reward=0.7813517 (559.20 it/sec) -training >> step=2533800, episode=423 reward=0.768529 (548.54 it/sec) -training >> step=2533900, episode=423 reward=0.7648742 (531.95 it/sec) -training >> step=2534000, episode=423 reward=0.7767294 (550.59 it/sec) -training >> step=2534100, episode=423 reward=0.766593 (588.06 it/sec) -training >> step=2534200, episode=423 reward=0.7590221 (537.85 it/sec) -training >> step=2534300, episode=423 reward=0.7439587 (523.66 it/sec) -training >> step=2534400, episode=423 reward=0.7936417 (584.98 it/sec) -training >> step=2534500, episode=423 reward=0.747295 (508.97 it/sec) -training >> step=2534600, episode=423 reward=0.7436935 (514.88 it/sec) -training >> step=2534700, episode=423 reward=0.7631078 (571.45 it/sec) -training >> step=2534800, episode=423 reward=0.7670457 (540.17 it/sec) -training >> step=2534900, episode=423 reward=0.79529 (557.88 it/sec) -training >> step=2535000, episode=423 reward=0.7707738 (548.72 it/sec) -training >> step=2535100, episode=423 reward=0.7869903 (519.08 it/sec) -training >> step=2535200, episode=423 reward=0.7544442 (584.76 it/sec) -training >> step=2535300, episode=423 reward=0.7772497 (547.04 it/sec) -training >> step=2535400, episode=423 reward=0.7894276 (507.47 it/sec) -training >> step=2535500, episode=423 reward=0.7531251 (562.25 it/sec) -training >> step=2535600, episode=423 reward=0.7279741 (498.86 it/sec) -training >> step=2535700, episode=423 reward=0.7860811 (527.75 it/sec) -training >> step=2535800, episode=423 reward=0.7494078 (553.78 it/sec) -training >> step=2535900, episode=423 reward=0.7732849 (521.66 it/sec) -training >> step=2536000, episode=423 reward=0.7601479 (529.96 it/sec) -training >> step=2536100, episode=423 reward=0.7556375 (471.03 it/sec) -training >> step=2536200, episode=423 reward=0.7678578 (515.64 it/sec) -training >> step=2536300, episode=423 reward=0.7706006 (572.73 it/sec) -training >> step=2536400, episode=423 reward=0.7626104 (525.85 it/sec) -training >> step=2536500, episode=423 reward=0.7610404 (520.30 it/sec) -training >> step=2536600, episode=423 reward=0.7576374 (561.32 it/sec) -training >> step=2536700, episode=423 reward=0.7486027 (451.06 it/sec) -training >> step=2536800, episode=423 reward=0.775665 (527.74 it/sec) -training >> step=2536900, episode=423 reward=0.7623258 (546.93 it/sec) -training >> step=2537000, episode=423 reward=0.764814 (543.22 it/sec) -training >> step=2537100, episode=423 reward=0.7727269 (538.21 it/sec) -training >> step=2537200, episode=423 reward=0.7661023 (503.37 it/sec) -training >> step=2537300, episode=424 reward=0.7647885 (164.85 it/sec) -training >> step=2537400, episode=424 reward=0.7841784 (536.89 it/sec) -training >> step=2537500, episode=424 reward=0.766448 (509.73 it/sec) -training >> step=2537600, episode=424 reward=0.7573503 (505.15 it/sec) -training >> step=2537700, episode=424 reward=0.7472203 (549.58 it/sec) -training >> step=2537800, episode=424 reward=0.7611905 (516.80 it/sec) -training >> step=2537900, episode=424 reward=0.7631974 (519.50 it/sec) -training >> step=2538000, episode=424 reward=0.7659455 (540.41 it/sec) -training >> step=2538100, episode=424 reward=0.760491 (449.95 it/sec) -training >> step=2538200, episode=424 reward=0.7731096 (558.11 it/sec) -training >> step=2538300, episode=424 reward=0.7705463 (531.79 it/sec) -training >> step=2538400, episode=424 reward=0.7903003 (530.12 it/sec) -training >> step=2538500, episode=424 reward=0.7610412 (538.77 it/sec) -training >> step=2538600, episode=424 reward=0.7603511 (502.98 it/sec) -training >> step=2538700, episode=424 reward=0.7604967 (533.00 it/sec) -training >> step=2538800, episode=424 reward=0.7498906 (539.92 it/sec) -training >> step=2538900, episode=424 reward=0.7594403 (519.35 it/sec) -training >> step=2539000, episode=424 reward=0.778622 (518.23 it/sec) -training >> step=2539100, episode=424 reward=0.7695454 (537.95 it/sec) -training >> step=2539200, episode=424 reward=0.7949511 (520.22 it/sec) -training >> step=2539300, episode=424 reward=0.7837409 (405.08 it/sec) -training >> step=2539400, episode=424 reward=0.7583771 (545.25 it/sec) -training >> step=2539500, episode=424 reward=0.7563733 (545.65 it/sec) -training >> step=2539600, episode=424 reward=0.7664117 (566.67 it/sec) -training >> step=2539700, episode=424 reward=0.7795578 (497.15 it/sec) -training >> step=2539800, episode=424 reward=0.737048 (526.92 it/sec) -training >> step=2539900, episode=424 reward=0.7572025 (554.97 it/sec) -training >> step=2540000, episode=424 reward=0.7697703 (567.67 it/sec) -training >> step=2540100, episode=424 reward=0.7726235 (545.16 it/sec) -training >> step=2540200, episode=424 reward=0.7576141 (447.07 it/sec) -training >> step=2540300, episode=424 reward=0.7812032 (544.13 it/sec) -training >> step=2540400, episode=424 reward=0.761897 (517.33 it/sec) -training >> step=2540500, episode=424 reward=0.7742916 (543.93 it/sec) -training >> step=2540600, episode=424 reward=0.7885877 (541.16 it/sec) -training >> step=2540700, episode=424 reward=0.7777144 (547.20 it/sec) -training >> step=2540800, episode=424 reward=0.7412626 (466.06 it/sec) -training >> step=2540900, episode=424 reward=0.7716321 (561.31 it/sec) -training >> step=2541000, episode=424 reward=0.7556909 (551.96 it/sec) -training >> step=2541100, episode=424 reward=0.777376 (576.82 it/sec) -training >> step=2541200, episode=424 reward=0.7551228 (561.58 it/sec) -training >> step=2541300, episode=424 reward=0.7597035 (542.88 it/sec) -training >> step=2541400, episode=424 reward=0.7714618 (540.31 it/sec) -training >> step=2541500, episode=424 reward=0.7944409 (544.31 it/sec) -training >> step=2541600, episode=424 reward=0.7643321 (560.41 it/sec) -training >> step=2541700, episode=424 reward=0.777614 (530.89 it/sec) -training >> step=2541800, episode=424 reward=0.7691924 (549.02 it/sec) -training >> step=2541900, episode=424 reward=0.7359629 (516.20 it/sec) -training >> step=2542000, episode=424 reward=0.7762924 (540.58 it/sec) -training >> step=2542100, episode=424 reward=0.7496982 (550.83 it/sec) -training >> step=2542200, episode=424 reward=0.7786996 (584.74 it/sec) -training >> step=2542300, episode=424 reward=0.7532012 (555.04 it/sec) -training >> step=2542400, episode=424 reward=0.7713832 (566.45 it/sec) -training >> step=2542500, episode=424 reward=0.7452722 (530.54 it/sec) -training >> step=2542600, episode=424 reward=0.7694758 (537.24 it/sec) -training >> step=2542700, episode=424 reward=0.7740588 (552.40 it/sec) -training >> step=2542800, episode=424 reward=0.7614486 (542.40 it/sec) -training >> step=2542900, episode=424 reward=0.7616287 (532.96 it/sec) -training >> step=2543000, episode=424 reward=0.7617559 (557.15 it/sec) -training >> step=2543100, episode=424 reward=0.7710376 (551.38 it/sec) -training >> step=2543200, episode=424 reward=0.7662497 (500.41 it/sec) -training >> step=2543300, episode=425 reward=0.7631168 (101.73 it/sec) -training >> step=2543400, episode=425 reward=0.7463127 (533.88 it/sec) -training >> step=2543500, episode=425 reward=0.7706659 (545.62 it/sec) -training >> step=2543600, episode=425 reward=0.7683061 (524.33 it/sec) -training >> step=2543700, episode=425 reward=0.7552569 (509.86 it/sec) -training >> step=2543800, episode=425 reward=0.745835 (557.25 it/sec) -training >> step=2543900, episode=425 reward=0.7695671 (561.43 it/sec) -training >> step=2544000, episode=425 reward=0.7619528 (555.37 it/sec) -training >> step=2544100, episode=425 reward=0.7872553 (550.75 it/sec) -training >> step=2544200, episode=425 reward=0.7520074 (565.05 it/sec) -training >> step=2544300, episode=425 reward=0.7581576 (525.33 it/sec) -training >> step=2544400, episode=425 reward=0.776087 (530.34 it/sec) -training >> step=2544500, episode=425 reward=0.7661176 (544.18 it/sec) -training >> step=2544600, episode=425 reward=0.7824548 (570.51 it/sec) -training >> step=2544700, episode=425 reward=0.7726904 (573.21 it/sec) -training >> step=2544800, episode=425 reward=0.768856 (536.48 it/sec) -training >> step=2544900, episode=425 reward=0.7540098 (558.08 it/sec) -training >> step=2545000, episode=425 reward=0.7707158 (518.16 it/sec) -training >> step=2545100, episode=425 reward=0.7609695 (526.81 it/sec) -training >> step=2545200, episode=425 reward=0.7476941 (532.79 it/sec) -training >> step=2545300, episode=425 reward=0.7621814 (529.52 it/sec) -training >> step=2545400, episode=425 reward=0.7761686 (529.33 it/sec) -training >> step=2545500, episode=425 reward=0.7671942 (374.42 it/sec) -training >> step=2545600, episode=425 reward=0.7661408 (506.78 it/sec) -training >> step=2545700, episode=425 reward=0.7744117 (548.28 it/sec) -training >> step=2545800, episode=425 reward=0.7607486 (558.02 it/sec) -training >> step=2545900, episode=425 reward=0.7769915 (511.89 it/sec) -training >> step=2546000, episode=425 reward=0.7684054 (523.85 it/sec) -training >> step=2546100, episode=425 reward=0.7462009 (514.48 it/sec) -training >> step=2546200, episode=425 reward=0.7703819 (514.02 it/sec) -training >> step=2546300, episode=425 reward=0.7707298 (543.58 it/sec) -training >> step=2546400, episode=425 reward=0.760379 (509.46 it/sec) -training >> step=2546500, episode=425 reward=0.7457285 (545.29 it/sec) -training >> step=2546600, episode=425 reward=0.7554954 (516.71 it/sec) -training >> step=2546700, episode=425 reward=0.7765831 (509.70 it/sec) -training >> step=2546800, episode=425 reward=0.7678525 (550.75 it/sec) -training >> step=2546900, episode=425 reward=0.766133 (551.84 it/sec) -training >> step=2547000, episode=425 reward=0.7524536 (526.27 it/sec) -training >> step=2547100, episode=425 reward=0.7582142 (489.76 it/sec) -training >> step=2547200, episode=425 reward=0.7691816 (493.35 it/sec) -training >> step=2547300, episode=425 reward=0.7655501 (575.00 it/sec) -training >> step=2547400, episode=425 reward=0.7577458 (519.87 it/sec) -training >> step=2547500, episode=425 reward=0.7736676 (513.81 it/sec) -training >> step=2547600, episode=425 reward=0.7754636 (532.47 it/sec) -training >> step=2547700, episode=425 reward=0.7912579 (555.99 it/sec) -training >> step=2547800, episode=425 reward=0.7763141 (541.66 it/sec) -training >> step=2547900, episode=425 reward=0.7378594 (508.94 it/sec) -training >> step=2548000, episode=425 reward=0.7634934 (531.17 it/sec) -training >> step=2548100, episode=425 reward=0.7580623 (481.16 it/sec) -training >> step=2548200, episode=425 reward=0.7732603 (508.08 it/sec) -training >> step=2548300, episode=425 reward=0.7613834 (523.62 it/sec) -training >> step=2548400, episode=425 reward=0.7754341 (525.62 it/sec) -training >> step=2548500, episode=425 reward=0.7811704 (553.21 it/sec) -training >> step=2548600, episode=425 reward=0.7512977 (503.54 it/sec) -training >> step=2548700, episode=425 reward=0.7773999 (533.14 it/sec) -training >> step=2548800, episode=425 reward=0.7665468 (514.09 it/sec) -training >> step=2548900, episode=425 reward=0.7510769 (532.14 it/sec) -training >> step=2549000, episode=425 reward=0.7702264 (526.63 it/sec) -training >> step=2549100, episode=425 reward=0.772185 (537.54 it/sec) -training >> step=2549200, episode=425 reward=0.7807271 (459.19 it/sec) -training >> step=2549300, episode=426 reward=0.789255 (208.30 it/sec) -training >> step=2549400, episode=426 reward=0.7689254 (537.31 it/sec) -training >> step=2549500, episode=426 reward=0.7565123 (532.48 it/sec) -training >> step=2549600, episode=426 reward=0.769387 (562.86 it/sec) -training >> step=2549700, episode=426 reward=0.764028 (516.98 it/sec) -training >> step=2549800, episode=426 reward=0.7683145 (532.76 it/sec) -training >> step=2549900, episode=426 reward=0.772017 (542.90 it/sec) -training >> step=2550000, episode=426 reward=0.7725458 (574.71 it/sec) -training >> step=2550100, episode=426 reward=0.7682754 (521.47 it/sec) -training >> step=2550200, episode=426 reward=0.7462429 (543.78 it/sec) -training >> step=2550300, episode=426 reward=0.7748966 (573.77 it/sec) -training >> step=2550400, episode=426 reward=0.7774484 (559.04 it/sec) -training >> step=2550500, episode=426 reward=0.7836112 (564.67 it/sec) -training >> step=2550600, episode=426 reward=0.773899 (534.87 it/sec) -training >> step=2550700, episode=426 reward=0.7590531 (540.65 it/sec) -training >> step=2550800, episode=426 reward=0.7697256 (571.16 it/sec) -training >> step=2550900, episode=426 reward=0.7718186 (558.34 it/sec) -training >> step=2551000, episode=426 reward=0.754521 (552.14 it/sec) -training >> step=2551100, episode=426 reward=0.7659731 (541.99 it/sec) -training >> step=2551200, episode=426 reward=0.7639325 (538.69 it/sec) -training >> step=2551300, episode=426 reward=0.7699108 (562.75 it/sec) -training >> step=2551400, episode=426 reward=0.7634581 (528.08 it/sec) -training >> step=2551500, episode=426 reward=0.7738823 (563.55 it/sec) -training >> step=2551600, episode=426 reward=0.7547278 (532.58 it/sec) -training >> step=2551700, episode=426 reward=0.7854934 (398.09 it/sec) -training >> step=2551800, episode=426 reward=0.747767 (576.70 it/sec) -training >> step=2551900, episode=426 reward=0.7647184 (523.54 it/sec) -training >> step=2552000, episode=426 reward=0.7620513 (517.48 it/sec) -training >> step=2552100, episode=426 reward=0.7787321 (530.82 it/sec) -training >> step=2552200, episode=426 reward=0.7776007 (513.94 it/sec) -training >> step=2552300, episode=426 reward=0.7761079 (538.63 it/sec) -training >> step=2552400, episode=426 reward=0.7773954 (525.98 it/sec) -training >> step=2552500, episode=426 reward=0.7724339 (559.64 it/sec) -training >> step=2552600, episode=426 reward=0.767242 (536.99 it/sec) -training >> step=2552700, episode=426 reward=0.7822335 (505.05 it/sec) -training >> step=2552800, episode=426 reward=0.7523607 (519.77 it/sec) -training >> step=2552900, episode=426 reward=0.7816373 (480.53 it/sec) -training >> step=2553000, episode=426 reward=0.7593328 (511.43 it/sec) -training >> step=2553100, episode=426 reward=0.7813643 (540.71 it/sec) -training >> step=2553200, episode=426 reward=0.7497789 (516.51 it/sec) -training >> step=2553300, episode=426 reward=0.7757868 (491.79 it/sec) -training >> step=2553400, episode=426 reward=0.7840005 (487.52 it/sec) -training >> step=2553500, episode=426 reward=0.7575707 (539.95 it/sec) -training >> step=2553600, episode=426 reward=0.7633077 (542.50 it/sec) -training >> step=2553700, episode=426 reward=0.7881933 (553.91 it/sec) -training >> step=2553800, episode=426 reward=0.7765682 (533.64 it/sec) -training >> step=2553900, episode=426 reward=0.771543 (510.81 it/sec) -training >> step=2554000, episode=426 reward=0.7224652 (522.29 it/sec) -training >> step=2554100, episode=426 reward=0.7632492 (484.99 it/sec) -training >> step=2554200, episode=426 reward=0.7397866 (508.22 it/sec) -training >> step=2554300, episode=426 reward=0.7745225 (530.71 it/sec) -training >> step=2554400, episode=426 reward=0.7592961 (549.96 it/sec) -training >> step=2554500, episode=426 reward=0.7589505 (534.16 it/sec) -training >> step=2554600, episode=426 reward=0.7584019 (515.75 it/sec) -training >> step=2554700, episode=426 reward=0.7537113 (527.31 it/sec) -training >> step=2554800, episode=426 reward=0.76977 (546.23 it/sec) -training >> step=2554900, episode=426 reward=0.7693416 (554.13 it/sec) -training >> step=2555000, episode=426 reward=0.780136 (509.70 it/sec) -training >> step=2555100, episode=426 reward=0.7665767 (521.66 it/sec) -training >> step=2555200, episode=426 reward=0.7642652 (532.44 it/sec) -training >> step=2555300, episode=427 reward=0.758329 (179.83 it/sec) -training >> step=2555400, episode=427 reward=0.7519877 (532.41 it/sec) -training >> step=2555500, episode=427 reward=0.7684326 (542.94 it/sec) -training >> step=2555600, episode=427 reward=0.7626688 (531.91 it/sec) -training >> step=2555700, episode=427 reward=0.7423527 (542.47 it/sec) -training >> step=2555800, episode=427 reward=0.7665586 (560.93 it/sec) -training >> step=2555900, episode=427 reward=0.7872105 (522.36 it/sec) -training >> step=2556000, episode=427 reward=0.7603364 (522.00 it/sec) -training >> step=2556100, episode=427 reward=0.7805247 (552.75 it/sec) -training >> step=2556200, episode=427 reward=0.7475734 (544.82 it/sec) -training >> step=2556300, episode=427 reward=0.7573669 (563.29 it/sec) -training >> step=2556400, episode=427 reward=0.7659147 (548.99 it/sec) -training >> step=2556500, episode=427 reward=0.7777302 (554.37 it/sec) -training >> step=2556600, episode=427 reward=0.7662361 (526.16 it/sec) -training >> step=2556700, episode=427 reward=0.7712349 (511.95 it/sec) -training >> step=2556800, episode=427 reward=0.7568537 (539.42 it/sec) -training >> step=2556900, episode=427 reward=0.7831173 (501.78 it/sec) -training >> step=2557000, episode=427 reward=0.7822055 (496.41 it/sec) -training >> step=2557100, episode=427 reward=0.7619162 (504.30 it/sec) -training >> step=2557200, episode=427 reward=0.7526986 (520.25 it/sec) -training >> step=2557300, episode=427 reward=0.7542132 (562.93 it/sec) -training >> step=2557400, episode=427 reward=0.7637118 (519.35 it/sec) -training >> step=2557500, episode=427 reward=0.7734697 (483.36 it/sec) -training >> step=2557600, episode=427 reward=0.7665351 (528.99 it/sec) -training >> step=2557700, episode=427 reward=0.7697475 (504.70 it/sec) -training >> step=2557800, episode=427 reward=0.7896793 (378.05 it/sec) -training >> step=2557900, episode=427 reward=0.7839676 (553.03 it/sec) -training >> step=2558000, episode=427 reward=0.7706127 (518.97 it/sec) -training >> step=2558100, episode=427 reward=0.7683133 (550.92 it/sec) -training >> step=2558200, episode=427 reward=0.7615813 (504.70 it/sec) -training >> step=2558300, episode=427 reward=0.7626909 (467.45 it/sec) -training >> step=2558400, episode=427 reward=0.7471173 (536.21 it/sec) -training >> step=2558500, episode=427 reward=0.7774274 (526.93 it/sec) -training >> step=2558600, episode=427 reward=0.7703108 (552.43 it/sec) -training >> step=2558700, episode=427 reward=0.7631484 (502.35 it/sec) -training >> step=2558800, episode=427 reward=0.7917822 (517.59 it/sec) -training >> step=2558900, episode=427 reward=0.7496041 (547.73 it/sec) -training >> step=2559000, episode=427 reward=0.7922839 (516.76 it/sec) -training >> step=2559100, episode=427 reward=0.7801253 (523.78 it/sec) -training >> step=2559200, episode=427 reward=0.7633484 (553.60 it/sec) -training >> step=2559300, episode=427 reward=0.7602381 (503.43 it/sec) -training >> step=2559400, episode=427 reward=0.7495472 (528.02 it/sec) -training >> step=2559500, episode=427 reward=0.7754608 (532.07 it/sec) -training >> step=2559600, episode=427 reward=0.777244 (504.51 it/sec) -training >> step=2559700, episode=427 reward=0.771061 (571.80 it/sec) -training >> step=2559800, episode=427 reward=0.7543482 (514.32 it/sec) -training >> step=2559900, episode=427 reward=0.7820262 (513.55 it/sec) -training >> step=2560000, episode=427 reward=0.7684407 (568.79 it/sec) -training >> step=2560100, episode=427 reward=0.7703971 (522.12 it/sec) -training >> step=2560200, episode=427 reward=0.7738126 (523.18 it/sec) -training >> step=2560300, episode=427 reward=0.7809438 (528.29 it/sec) -training >> step=2560400, episode=427 reward=0.7658041 (500.32 it/sec) -training >> step=2560500, episode=427 reward=0.7461202 (535.47 it/sec) -training >> step=2560600, episode=427 reward=0.7617006 (518.66 it/sec) -training >> step=2560700, episode=427 reward=0.776865 (494.03 it/sec) -training >> step=2560800, episode=427 reward=0.760148 (566.07 it/sec) -training >> step=2560900, episode=427 reward=0.7756743 (533.10 it/sec) -training >> step=2561000, episode=427 reward=0.7613227 (510.74 it/sec) -training >> step=2561100, episode=427 reward=0.7639863 (591.13 it/sec) -training >> step=2561200, episode=427 reward=0.7746504 (567.55 it/sec) -training >> step=2561300, episode=428 reward=0.7686437 (181.84 it/sec) -training >> step=2561400, episode=428 reward=0.7655573 (583.52 it/sec) -training >> step=2561500, episode=428 reward=0.7449942 (561.10 it/sec) -training >> step=2561600, episode=428 reward=0.7538086 (570.51 it/sec) -training >> step=2561700, episode=428 reward=0.7478122 (555.49 it/sec) -training >> step=2561800, episode=428 reward=0.7497894 (558.30 it/sec) -training >> step=2561900, episode=428 reward=0.7712579 (574.05 it/sec) -training >> step=2562000, episode=428 reward=0.7650502 (603.70 it/sec) -training >> step=2562100, episode=428 reward=0.7896745 (594.70 it/sec) -training >> step=2562200, episode=428 reward=0.7504552 (553.80 it/sec) -training >> step=2562300, episode=428 reward=0.7742358 (619.38 it/sec) -training >> step=2562400, episode=428 reward=0.7584022 (572.54 it/sec) -training >> step=2562500, episode=428 reward=0.7597123 (584.29 it/sec) -training >> step=2562600, episode=428 reward=0.7780612 (490.59 it/sec) -training >> step=2562700, episode=428 reward=0.7925291 (537.02 it/sec) -training >> step=2562800, episode=428 reward=0.7708174 (520.74 it/sec) -training >> step=2562900, episode=428 reward=0.7878788 (544.98 it/sec) -training >> step=2563000, episode=428 reward=0.7894852 (525.93 it/sec) -training >> step=2563100, episode=428 reward=0.7878658 (605.61 it/sec) -training >> step=2563200, episode=428 reward=0.7494838 (566.19 it/sec) -training >> step=2563300, episode=428 reward=0.732716 (540.82 it/sec) -training >> step=2563400, episode=428 reward=0.7915687 (568.29 it/sec) -training >> step=2563500, episode=428 reward=0.788495 (490.69 it/sec) -training >> step=2563600, episode=428 reward=0.7683254 (492.41 it/sec) -training >> step=2563700, episode=428 reward=0.7590123 (550.63 it/sec) -training >> step=2563800, episode=428 reward=0.7636557 (596.01 it/sec) -training >> step=2563900, episode=428 reward=0.7768422 (567.90 it/sec) -training >> step=2564000, episode=428 reward=0.7767751 (390.89 it/sec) -training >> step=2564100, episode=428 reward=0.7718162 (571.49 it/sec) -training >> step=2564200, episode=428 reward=0.7738643 (590.61 it/sec) -training >> step=2564300, episode=428 reward=0.7533643 (555.27 it/sec) -training >> step=2564400, episode=428 reward=0.7647483 (574.03 it/sec) -training >> step=2564500, episode=428 reward=0.7822634 (583.79 it/sec) -training >> step=2564600, episode=428 reward=0.7646939 (536.03 it/sec) -training >> step=2564700, episode=428 reward=0.7666966 (583.51 it/sec) -training >> step=2564800, episode=428 reward=0.7414821 (584.85 it/sec) -training >> step=2564900, episode=428 reward=0.76938 (602.53 it/sec) -training >> step=2565000, episode=428 reward=0.7608835 (588.29 it/sec) -training >> step=2565100, episode=428 reward=0.7519444 (559.79 it/sec) -training >> step=2565200, episode=428 reward=0.7761745 (560.86 it/sec) -training >> step=2565300, episode=428 reward=0.7724451 (600.43 it/sec) -training >> step=2565400, episode=428 reward=0.7692324 (570.31 it/sec) -training >> step=2565500, episode=428 reward=0.7507264 (586.32 it/sec) -training >> step=2565600, episode=428 reward=0.7642919 (578.27 it/sec) -training >> step=2565700, episode=428 reward=0.7356551 (608.00 it/sec) -training >> step=2565800, episode=428 reward=0.7593253 (558.21 it/sec) -training >> step=2565900, episode=428 reward=0.7640573 (574.18 it/sec) -training >> step=2566000, episode=428 reward=0.7540063 (578.44 it/sec) -training >> step=2566100, episode=428 reward=0.7741711 (615.07 it/sec) -training >> step=2566200, episode=428 reward=0.7517358 (572.21 it/sec) -training >> step=2566300, episode=428 reward=0.7588274 (585.11 it/sec) -training >> step=2566400, episode=428 reward=0.747524 (561.69 it/sec) -training >> step=2566500, episode=428 reward=0.7714701 (582.44 it/sec) -training >> step=2566600, episode=428 reward=0.7606308 (580.07 it/sec) -training >> step=2566700, episode=428 reward=0.7718945 (595.98 it/sec) -training >> step=2566800, episode=428 reward=0.7743325 (593.15 it/sec) -training >> step=2566900, episode=428 reward=0.7837706 (591.39 it/sec) -training >> step=2567000, episode=428 reward=0.7468764 (533.14 it/sec) -training >> step=2567100, episode=428 reward=0.7861279 (558.10 it/sec) -training >> step=2567200, episode=428 reward=0.7627764 (547.36 it/sec) -training >> step=2567300, episode=429 reward=0.7738284 (128.65 it/sec) -training >> step=2567400, episode=429 reward=0.7643651 (547.68 it/sec) -training >> step=2567500, episode=429 reward=0.7965552 (541.86 it/sec) -training >> step=2567600, episode=429 reward=0.7663839 (568.03 it/sec) -training >> step=2567700, episode=429 reward=0.7502174 (579.18 it/sec) -training >> step=2567800, episode=429 reward=0.7689282 (565.76 it/sec) -training >> step=2567900, episode=429 reward=0.7653731 (570.79 it/sec) -training >> step=2568000, episode=429 reward=0.7710766 (571.92 it/sec) -training >> step=2568100, episode=429 reward=0.7565454 (595.34 it/sec) -training >> step=2568200, episode=429 reward=0.7595643 (550.60 it/sec) -training >> step=2568300, episode=429 reward=0.7465581 (585.29 it/sec) -training >> step=2568400, episode=429 reward=0.7572998 (574.99 it/sec) -training >> step=2568500, episode=429 reward=0.7676415 (547.33 it/sec) -training >> step=2568600, episode=429 reward=0.7625214 (577.99 it/sec) -training >> step=2568700, episode=429 reward=0.7585077 (565.57 it/sec) -training >> step=2568800, episode=429 reward=0.766459 (561.97 it/sec) -training >> step=2568900, episode=429 reward=0.7678723 (525.46 it/sec) -training >> step=2569000, episode=429 reward=0.7782733 (497.95 it/sec) -training >> step=2569100, episode=429 reward=0.7665218 (543.69 it/sec) -training >> step=2569200, episode=429 reward=0.757625 (530.76 it/sec) -training >> step=2569300, episode=429 reward=0.7679705 (516.24 it/sec) -training >> step=2569400, episode=429 reward=0.7800639 (522.62 it/sec) -training >> step=2569500, episode=429 reward=0.7721003 (510.86 it/sec) -training >> step=2569600, episode=429 reward=0.7722547 (541.95 it/sec) -training >> step=2569700, episode=429 reward=0.7780203 (525.70 it/sec) -training >> step=2569800, episode=429 reward=0.7782524 (510.61 it/sec) -training >> step=2569900, episode=429 reward=0.7558653 (514.74 it/sec) -training >> step=2570000, episode=429 reward=0.7633684 (540.58 it/sec) -training >> step=2570100, episode=429 reward=0.7661642 (505.82 it/sec) -training >> step=2570200, episode=429 reward=0.7823449 (431.82 it/sec) -training >> step=2570300, episode=429 reward=0.7920377 (516.48 it/sec) -training >> step=2570400, episode=429 reward=0.7458134 (520.42 it/sec) -training >> step=2570500, episode=429 reward=0.7607864 (511.27 it/sec) -training >> step=2570600, episode=429 reward=0.7740636 (545.94 it/sec) -training >> step=2570700, episode=429 reward=0.7565686 (559.50 it/sec) -training >> step=2570800, episode=429 reward=0.7631895 (547.34 it/sec) -training >> step=2570900, episode=429 reward=0.7703766 (518.54 it/sec) -training >> step=2571000, episode=429 reward=0.7775671 (512.16 it/sec) -training >> step=2571100, episode=429 reward=0.7712135 (498.41 it/sec) -training >> step=2571200, episode=429 reward=0.7612711 (531.76 it/sec) -training >> step=2571300, episode=429 reward=0.7713701 (537.36 it/sec) -training >> step=2571400, episode=429 reward=0.7865415 (529.66 it/sec) -training >> step=2571500, episode=429 reward=0.7557431 (516.16 it/sec) -training >> step=2571600, episode=429 reward=0.7690445 (515.64 it/sec) -training >> step=2571700, episode=429 reward=0.7446166 (515.50 it/sec) -training >> step=2571800, episode=429 reward=0.7732922 (561.65 it/sec) -training >> step=2571900, episode=429 reward=0.7431265 (554.68 it/sec) -training >> step=2572000, episode=429 reward=0.7689608 (519.68 it/sec) -training >> step=2572100, episode=429 reward=0.7725017 (532.94 it/sec) -training >> step=2572200, episode=429 reward=0.7699791 (509.90 it/sec) -training >> step=2572300, episode=429 reward=0.7795704 (531.97 it/sec) -training >> step=2572400, episode=429 reward=0.7870266 (560.00 it/sec) -training >> step=2572500, episode=429 reward=0.7801754 (548.93 it/sec) -training >> step=2572600, episode=429 reward=0.7622906 (529.33 it/sec) -training >> step=2572700, episode=429 reward=0.7742627 (478.44 it/sec) -training >> step=2572800, episode=429 reward=0.7647948 (518.50 it/sec) -training >> step=2572900, episode=429 reward=0.7721403 (562.74 it/sec) -training >> step=2573000, episode=429 reward=0.7538273 (551.27 it/sec) -training >> step=2573100, episode=429 reward=0.7448199 (537.04 it/sec) -training >> step=2573200, episode=429 reward=0.7289791 (511.00 it/sec) -training >> step=2573300, episode=430 reward=0.7563075 (151.79 it/sec) -training >> step=2573400, episode=430 reward=0.7711829 (527.92 it/sec) -training >> step=2573500, episode=430 reward=0.7389554 (528.50 it/sec) -training >> step=2573600, episode=430 reward=0.7726163 (505.79 it/sec) -training >> step=2573700, episode=430 reward=0.7664539 (534.46 it/sec) -training >> step=2573800, episode=430 reward=0.7764497 (549.66 it/sec) -training >> step=2573900, episode=430 reward=0.7743661 (518.01 it/sec) -training >> step=2574000, episode=430 reward=0.7674519 (524.35 it/sec) -training >> step=2574100, episode=430 reward=0.7854155 (509.37 it/sec) -training >> step=2574200, episode=430 reward=0.7736192 (550.36 it/sec) -training >> step=2574300, episode=430 reward=0.7556965 (541.60 it/sec) -training >> step=2574400, episode=430 reward=0.7716509 (525.51 it/sec) -training >> step=2574500, episode=430 reward=0.7822705 (552.24 it/sec) -training >> step=2574600, episode=430 reward=0.7608964 (566.41 it/sec) -training >> step=2574700, episode=430 reward=0.7608079 (507.13 it/sec) -training >> step=2574800, episode=430 reward=0.7679701 (535.93 it/sec) -training >> step=2574900, episode=430 reward=0.7662216 (557.38 it/sec) -training >> step=2575000, episode=430 reward=0.7896261 (518.79 it/sec) -training >> step=2575100, episode=430 reward=0.7699372 (518.47 it/sec) -training >> step=2575200, episode=430 reward=0.7739899 (518.34 it/sec) -training >> step=2575300, episode=430 reward=0.7836128 (519.47 it/sec) -training >> step=2575400, episode=430 reward=0.7736987 (534.54 it/sec) -training >> step=2575500, episode=430 reward=0.7702738 (541.26 it/sec) -training >> step=2575600, episode=430 reward=0.7438652 (536.73 it/sec) -training >> step=2575700, episode=430 reward=0.7943118 (512.74 it/sec) -training >> step=2575800, episode=430 reward=0.7639023 (493.86 it/sec) -training >> step=2575900, episode=430 reward=0.7559814 (557.25 it/sec) -training >> step=2576000, episode=430 reward=0.7699299 (545.94 it/sec) -training >> step=2576100, episode=430 reward=0.7569304 (509.99 it/sec) -training >> step=2576200, episode=430 reward=0.7713594 (508.18 it/sec) -training >> step=2576300, episode=430 reward=0.7659652 (421.28 it/sec) -training >> step=2576400, episode=430 reward=0.7680686 (544.01 it/sec) -training >> step=2576500, episode=430 reward=0.7679341 (546.24 it/sec) -training >> step=2576600, episode=430 reward=0.7725521 (526.12 it/sec) -training >> step=2576700, episode=430 reward=0.7608574 (598.18 it/sec) -training >> step=2576800, episode=430 reward=0.7591221 (519.11 it/sec) -training >> step=2576900, episode=430 reward=0.7494799 (537.19 it/sec) -training >> step=2577000, episode=430 reward=0.7718859 (545.10 it/sec) -training >> step=2577100, episode=430 reward=0.7532557 (542.24 it/sec) -training >> step=2577200, episode=430 reward=0.7668334 (547.13 it/sec) -training >> step=2577300, episode=430 reward=0.7468688 (505.88 it/sec) -training >> step=2577400, episode=430 reward=0.7714368 (461.60 it/sec) -training >> step=2577500, episode=430 reward=0.7390032 (544.68 it/sec) -training >> step=2577600, episode=430 reward=0.761671 (554.89 it/sec) -training >> step=2577700, episode=430 reward=0.7785834 (551.07 it/sec) -training >> step=2577800, episode=430 reward=0.7472693 (547.24 it/sec) -training >> step=2577900, episode=430 reward=0.7720857 (535.54 it/sec) -training >> step=2578000, episode=430 reward=0.7672734 (546.98 it/sec) -training >> step=2578100, episode=430 reward=0.762596 (561.65 it/sec) -training >> step=2578200, episode=430 reward=0.7863735 (583.91 it/sec) -training >> step=2578300, episode=430 reward=0.7595455 (527.30 it/sec) -training >> step=2578400, episode=430 reward=0.739458 (522.06 it/sec) -training >> step=2578500, episode=430 reward=0.7624337 (522.51 it/sec) -training >> step=2578600, episode=430 reward=0.7549666 (589.67 it/sec) -training >> step=2578700, episode=430 reward=0.7734664 (570.48 it/sec) -training >> step=2578800, episode=430 reward=0.7567307 (568.43 it/sec) -training >> step=2578900, episode=430 reward=0.7860331 (541.23 it/sec) -training >> step=2579000, episode=430 reward=0.778143 (558.35 it/sec) -training >> step=2579100, episode=430 reward=0.7845838 (514.83 it/sec) -training >> step=2579200, episode=430 reward=0.7815814 (539.76 it/sec) -training >> step=2579300, episode=431 reward=0.7789326 (163.98 it/sec) -training >> step=2579400, episode=431 reward=0.7586686 (552.82 it/sec) -training >> step=2579500, episode=431 reward=0.7503695 (541.88 it/sec) -training >> step=2579600, episode=431 reward=0.7803051 (572.49 it/sec) -training >> step=2579700, episode=431 reward=0.7665818 (581.18 it/sec) -training >> step=2579800, episode=431 reward=0.765674 (552.24 it/sec) -training >> step=2579900, episode=431 reward=0.7743732 (493.17 it/sec) -training >> step=2580000, episode=431 reward=0.7672788 (546.51 it/sec) -training >> step=2580100, episode=431 reward=0.7844424 (553.45 it/sec) -training >> step=2580200, episode=431 reward=0.7754855 (558.56 it/sec) -training >> step=2580300, episode=431 reward=0.7652957 (556.34 it/sec) -training >> step=2580400, episode=431 reward=0.7876681 (523.71 it/sec) -training >> step=2580500, episode=431 reward=0.760392 (518.80 it/sec) -training >> step=2580600, episode=431 reward=0.7615688 (580.35 it/sec) -training >> step=2580700, episode=431 reward=0.7711169 (545.13 it/sec) -training >> step=2580800, episode=431 reward=0.7510906 (579.80 it/sec) -training >> step=2580900, episode=431 reward=0.7660621 (554.10 it/sec) -training >> step=2581000, episode=431 reward=0.7655982 (520.63 it/sec) -training >> step=2581100, episode=431 reward=0.7730247 (532.92 it/sec) -training >> step=2581200, episode=431 reward=0.7549321 (564.09 it/sec) -training >> step=2581300, episode=431 reward=0.7775497 (599.20 it/sec) -training >> step=2581400, episode=431 reward=0.7570373 (567.70 it/sec) -training >> step=2581500, episode=431 reward=0.7775704 (520.56 it/sec) -training >> step=2581600, episode=431 reward=0.7588294 (524.15 it/sec) -training >> step=2581700, episode=431 reward=0.7636487 (572.12 it/sec) -training >> step=2581800, episode=431 reward=0.7629297 (532.74 it/sec) -training >> step=2581900, episode=431 reward=0.7750856 (535.19 it/sec) -training >> step=2582000, episode=431 reward=0.7721611 (527.38 it/sec) -training >> step=2582100, episode=431 reward=0.7561856 (516.76 it/sec) -training >> step=2582200, episode=431 reward=0.7627549 (490.93 it/sec) -training >> step=2582300, episode=431 reward=0.7522089 (540.25 it/sec) -training >> step=2582400, episode=431 reward=0.7442932 (410.31 it/sec) -training >> step=2582500, episode=431 reward=0.7583552 (570.45 it/sec) -training >> step=2582600, episode=431 reward=0.7747879 (526.21 it/sec) -training >> step=2582700, episode=431 reward=0.760341 (502.09 it/sec) -training >> step=2582800, episode=431 reward=0.7602462 (559.41 it/sec) -training >> step=2582900, episode=431 reward=0.7672308 (562.12 it/sec) -training >> step=2583000, episode=431 reward=0.7689672 (549.21 it/sec) -training >> step=2583100, episode=431 reward=0.7734697 (512.12 it/sec) -training >> step=2583200, episode=431 reward=0.7790986 (523.85 it/sec) -training >> step=2583300, episode=431 reward=0.7678053 (514.44 it/sec) -training >> step=2583400, episode=431 reward=0.7662575 (541.20 it/sec) -training >> step=2583500, episode=431 reward=0.7629828 (556.16 it/sec) -training >> step=2583600, episode=431 reward=0.742301 (536.77 it/sec) -training >> step=2583700, episode=431 reward=0.7657991 (501.16 it/sec) -training >> step=2583800, episode=431 reward=0.7569027 (521.60 it/sec) -training >> step=2583900, episode=431 reward=0.7479939 (522.20 it/sec) -training >> step=2584000, episode=431 reward=0.7757363 (573.61 it/sec) -training >> step=2584100, episode=431 reward=0.7710952 (549.04 it/sec) -training >> step=2584200, episode=431 reward=0.7535089 (509.45 it/sec) -training >> step=2584300, episode=431 reward=0.7620746 (517.63 it/sec) -training >> step=2584400, episode=431 reward=0.7763375 (506.39 it/sec) -training >> step=2584500, episode=431 reward=0.7652134 (475.41 it/sec) -training >> step=2584600, episode=431 reward=0.7608647 (531.70 it/sec) -training >> step=2584700, episode=431 reward=0.7713173 (533.98 it/sec) -training >> step=2584800, episode=431 reward=0.7731918 (539.44 it/sec) -training >> step=2584900, episode=431 reward=0.7685765 (511.23 it/sec) -training >> step=2585000, episode=431 reward=0.7803268 (517.46 it/sec) -training >> step=2585100, episode=431 reward=0.7858565 (549.89 it/sec) -training >> step=2585200, episode=431 reward=0.739084 (567.44 it/sec) -training >> step=2585300, episode=432 reward=0.7594909 (145.91 it/sec) -training >> step=2585400, episode=432 reward=0.7477534 (515.45 it/sec) -training >> step=2585500, episode=432 reward=0.7523884 (548.83 it/sec) -training >> step=2585600, episode=432 reward=0.7481385 (558.23 it/sec) -training >> step=2585700, episode=432 reward=0.7773772 (512.09 it/sec) -training >> step=2585800, episode=432 reward=0.755152 (531.35 it/sec) -training >> step=2585900, episode=432 reward=0.7570571 (548.24 it/sec) -training >> step=2586000, episode=432 reward=0.7683185 (546.40 it/sec) -training >> step=2586100, episode=432 reward=0.7549506 (548.03 it/sec) -training >> step=2586200, episode=432 reward=0.7799001 (550.54 it/sec) -training >> step=2586300, episode=432 reward=0.754472 (522.75 it/sec) -training >> step=2586400, episode=432 reward=0.7640015 (543.25 it/sec) -training >> step=2586500, episode=432 reward=0.7671509 (555.92 it/sec) -training >> step=2586600, episode=432 reward=0.768055 (505.30 it/sec) -training >> step=2586700, episode=432 reward=0.7835873 (533.55 it/sec) -training >> step=2586800, episode=432 reward=0.7737751 (543.73 it/sec) -training >> step=2586900, episode=432 reward=0.7588913 (539.57 it/sec) -training >> step=2587000, episode=432 reward=0.7742942 (549.97 it/sec) -training >> step=2587100, episode=432 reward=0.7631822 (536.62 it/sec) -training >> step=2587200, episode=432 reward=0.7521757 (513.51 it/sec) -training >> step=2587300, episode=432 reward=0.7595576 (544.48 it/sec) -training >> step=2587400, episode=432 reward=0.7751099 (496.87 it/sec) -training >> step=2587500, episode=432 reward=0.8007007 (549.17 it/sec) -training >> step=2587600, episode=432 reward=0.7714471 (569.61 it/sec) -training >> step=2587700, episode=432 reward=0.7731444 (463.12 it/sec) -training >> step=2587800, episode=432 reward=0.7658204 (524.08 it/sec) -training >> step=2587900, episode=432 reward=0.7863189 (514.51 it/sec) -training >> step=2588000, episode=432 reward=0.7595828 (538.81 it/sec) -training >> step=2588100, episode=432 reward=0.7641327 (544.40 it/sec) -training >> step=2588200, episode=432 reward=0.761543 (512.94 it/sec) -training >> step=2588300, episode=432 reward=0.7733225 (542.15 it/sec) -training >> step=2588400, episode=432 reward=0.7707908 (402.38 it/sec) -training >> step=2588500, episode=432 reward=0.7709343 (543.62 it/sec) -training >> step=2588600, episode=432 reward=0.7568244 (543.17 it/sec) -training >> step=2588700, episode=432 reward=0.7883021 (540.54 it/sec) -training >> step=2588800, episode=432 reward=0.7492286 (491.51 it/sec) -training >> step=2588900, episode=432 reward=0.7457256 (540.36 it/sec) -training >> step=2589000, episode=432 reward=0.7536899 (513.10 it/sec) -training >> step=2589100, episode=432 reward=0.76271 (528.46 it/sec) -training >> step=2589200, episode=432 reward=0.7779143 (563.48 it/sec) -training >> step=2589300, episode=432 reward=0.7689815 (544.92 it/sec) -training >> step=2589400, episode=432 reward=0.7808461 (487.63 it/sec) -training >> step=2589500, episode=432 reward=0.7737821 (478.17 it/sec) -training >> step=2589600, episode=432 reward=0.7768856 (508.57 it/sec) -training >> step=2589700, episode=432 reward=0.7613081 (520.17 it/sec) -training >> step=2589800, episode=432 reward=0.7495528 (516.74 it/sec) -training >> step=2589900, episode=432 reward=0.7725071 (496.62 it/sec) -training >> step=2590000, episode=432 reward=0.7702198 (541.18 it/sec) -training >> step=2590100, episode=432 reward=0.7600334 (549.90 it/sec) -training >> step=2590200, episode=432 reward=0.7639684 (503.94 it/sec) -training >> step=2590300, episode=432 reward=0.75157 (449.76 it/sec) -training >> step=2590400, episode=432 reward=0.7633384 (525.09 it/sec) -training >> step=2590500, episode=432 reward=0.7728003 (503.35 it/sec) -training >> step=2590600, episode=432 reward=0.7547418 (519.78 it/sec) -training >> step=2590700, episode=432 reward=0.7768198 (512.24 it/sec) -training >> step=2590800, episode=432 reward=0.7574515 (527.26 it/sec) -training >> step=2590900, episode=432 reward=0.7759511 (532.37 it/sec) -training >> step=2591000, episode=432 reward=0.7683225 (537.78 it/sec) -training >> step=2591100, episode=432 reward=0.7639505 (509.55 it/sec) -training >> step=2591200, episode=432 reward=0.775074 (555.87 it/sec) -training >> step=2591300, episode=433 reward=0.7619807 (186.78 it/sec) -training >> step=2591400, episode=433 reward=0.7450983 (525.31 it/sec) -training >> step=2591500, episode=433 reward=0.7570372 (547.94 it/sec) -training >> step=2591600, episode=433 reward=0.7510243 (539.72 it/sec) -training >> step=2591700, episode=433 reward=0.778907 (536.70 it/sec) -training >> step=2591800, episode=433 reward=0.7834523 (508.70 it/sec) -training >> step=2591900, episode=433 reward=0.7675306 (539.93 it/sec) -training >> step=2592000, episode=433 reward=0.760936 (553.41 it/sec) -training >> step=2592100, episode=433 reward=0.7479027 (514.85 it/sec) -training >> step=2592200, episode=433 reward=0.7521654 (493.31 it/sec) -training >> step=2592300, episode=433 reward=0.7640616 (541.37 it/sec) -training >> step=2592400, episode=433 reward=0.758686 (522.24 it/sec) -training >> step=2592500, episode=433 reward=0.78069 (522.04 it/sec) -training >> step=2592600, episode=433 reward=0.7898786 (521.67 it/sec) -training >> step=2592700, episode=433 reward=0.7854277 (501.62 it/sec) -training >> step=2592800, episode=433 reward=0.7777671 (516.42 it/sec) -training >> step=2592900, episode=433 reward=0.7478839 (507.63 it/sec) -training >> step=2593000, episode=433 reward=0.7514024 (534.55 it/sec) -training >> step=2593100, episode=433 reward=0.777364 (553.82 it/sec) -training >> step=2593200, episode=433 reward=0.7900843 (539.25 it/sec) -training >> step=2593300, episode=433 reward=0.7782931 (521.76 it/sec) -training >> step=2593400, episode=433 reward=0.7836695 (517.45 it/sec) -training >> step=2593500, episode=433 reward=0.7659097 (535.82 it/sec) -training >> step=2593600, episode=433 reward=0.7773961 (500.94 it/sec) -training >> step=2593700, episode=433 reward=0.7486551 (510.56 it/sec) -training >> step=2593800, episode=433 reward=0.7739214 (483.87 it/sec) -training >> step=2593900, episode=433 reward=0.7746655 (531.65 it/sec) -training >> step=2594000, episode=433 reward=0.761277 (493.93 it/sec) -training >> step=2594100, episode=433 reward=0.786245 (495.29 it/sec) -training >> step=2594200, episode=433 reward=0.7723467 (516.60 it/sec) -training >> step=2594300, episode=433 reward=0.7571513 (530.07 it/sec) -training >> step=2594400, episode=433 reward=0.7912315 (507.44 it/sec) -training >> step=2594500, episode=433 reward=0.7632687 (365.88 it/sec) -training >> step=2594600, episode=433 reward=0.7775553 (508.75 it/sec) -training >> step=2594700, episode=433 reward=0.789001 (559.23 it/sec) -training >> step=2594800, episode=433 reward=0.7871212 (512.63 it/sec) -training >> step=2594900, episode=433 reward=0.7866537 (495.88 it/sec) -training >> step=2595000, episode=433 reward=0.7742825 (509.42 it/sec) -training >> step=2595100, episode=433 reward=0.7783954 (540.33 it/sec) -training >> step=2595200, episode=433 reward=0.7733831 (518.67 it/sec) -training >> step=2595300, episode=433 reward=0.7646263 (514.18 it/sec) -training >> step=2595400, episode=433 reward=0.7594407 (524.70 it/sec) -training >> step=2595500, episode=433 reward=0.7633392 (530.80 it/sec) -training >> step=2595600, episode=433 reward=0.7921587 (520.48 it/sec) -training >> step=2595700, episode=433 reward=0.7637913 (484.10 it/sec) -training >> step=2595800, episode=433 reward=0.7787313 (506.55 it/sec) -training >> step=2595900, episode=433 reward=0.7818863 (545.08 it/sec) -training >> step=2596000, episode=433 reward=0.7590152 (551.67 it/sec) -training >> step=2596100, episode=433 reward=0.765017 (543.80 it/sec) -training >> step=2596200, episode=433 reward=0.7890003 (539.59 it/sec) -training >> step=2596300, episode=433 reward=0.7727156 (567.28 it/sec) -training >> step=2596400, episode=433 reward=0.7585065 (538.39 it/sec) -training >> step=2596500, episode=433 reward=0.7688628 (553.82 it/sec) -training >> step=2596600, episode=433 reward=0.754868 (567.98 it/sec) -training >> step=2596700, episode=433 reward=0.7780491 (530.70 it/sec) -training >> step=2596800, episode=433 reward=0.776526 (515.45 it/sec) -training >> step=2596900, episode=433 reward=0.7563279 (545.54 it/sec) -training >> step=2597000, episode=433 reward=0.7775499 (539.92 it/sec) -training >> step=2597100, episode=433 reward=0.7636805 (580.67 it/sec) -training >> step=2597200, episode=433 reward=0.7676666 (526.41 it/sec) -training >> step=2597300, episode=434 reward=0.7650661 (157.34 it/sec) -training >> step=2597400, episode=434 reward=0.7402767 (530.95 it/sec) -training >> step=2597500, episode=434 reward=0.7629781 (529.52 it/sec) -training >> step=2597600, episode=434 reward=0.7430049 (530.42 it/sec) -training >> step=2597700, episode=434 reward=0.7482926 (524.74 it/sec) -training >> step=2597800, episode=434 reward=0.7753259 (548.48 it/sec) -training >> step=2597900, episode=434 reward=0.763182 (546.84 it/sec) -training >> step=2598000, episode=434 reward=0.7586212 (564.40 it/sec) -training >> step=2598100, episode=434 reward=0.7669452 (516.56 it/sec) -training >> step=2598200, episode=434 reward=0.7734146 (534.57 it/sec) -training >> step=2598300, episode=434 reward=0.7737226 (581.00 it/sec) -training >> step=2598400, episode=434 reward=0.7545233 (549.26 it/sec) -training >> step=2598500, episode=434 reward=0.7621807 (530.35 it/sec) -training >> step=2598600, episode=434 reward=0.7653133 (565.16 it/sec) -training >> step=2598700, episode=434 reward=0.7771243 (541.33 it/sec) -training >> step=2598800, episode=434 reward=0.7841174 (529.50 it/sec) -training >> step=2598900, episode=434 reward=0.7764763 (536.73 it/sec) -training >> step=2599000, episode=434 reward=0.7686803 (526.19 it/sec) -training >> step=2599100, episode=434 reward=0.7665842 (544.85 it/sec) -training >> step=2599200, episode=434 reward=0.7790927 (523.80 it/sec) -training >> step=2599300, episode=434 reward=0.7716798 (544.72 it/sec) -training >> step=2599400, episode=434 reward=0.7811958 (586.54 it/sec) -training >> step=2599500, episode=434 reward=0.7776762 (550.39 it/sec) -training >> step=2599600, episode=434 reward=0.7536823 (506.61 it/sec) -training >> step=2599700, episode=434 reward=0.778169 (538.27 it/sec) -training >> step=2599800, episode=434 reward=0.7670097 (576.57 it/sec) -training >> step=2599900, episode=434 reward=0.7628191 (515.45 it/sec) -training >> step=2600000, episode=434 reward=0.7634592 (525.93 it/sec) -training >> step=2600100, episode=434 reward=0.7661444 (539.99 it/sec) -training >> step=2600200, episode=434 reward=0.7757932 (542.55 it/sec) -training >> step=2600300, episode=434 reward=0.7589304 (552.66 it/sec) -training >> step=2600400, episode=434 reward=0.7670469 (555.65 it/sec) -training >> step=2600500, episode=434 reward=0.7517807 (522.90 it/sec) -training >> step=2600600, episode=434 reward=0.7815326 (518.80 it/sec) -training >> step=2600700, episode=434 reward=0.764247 (406.05 it/sec) -training >> step=2600800, episode=434 reward=0.7641813 (561.40 it/sec) -training >> step=2600900, episode=434 reward=0.7643423 (561.34 it/sec) -training >> step=2601000, episode=434 reward=0.7815477 (482.75 it/sec) -training >> step=2601100, episode=434 reward=0.7757568 (545.56 it/sec) -training >> step=2601200, episode=434 reward=0.7618451 (563.10 it/sec) -training >> step=2601300, episode=434 reward=0.7581859 (576.59 it/sec) -training >> step=2601400, episode=434 reward=0.74994 (541.79 it/sec) -training >> step=2601500, episode=434 reward=0.770665 (501.05 it/sec) -training >> step=2601600, episode=434 reward=0.7706941 (559.73 it/sec) -training >> step=2601700, episode=434 reward=0.7568748 (552.89 it/sec) -training >> step=2601800, episode=434 reward=0.7604828 (547.51 it/sec) -training >> step=2601900, episode=434 reward=0.7767499 (537.61 it/sec) -training >> step=2602000, episode=434 reward=0.7556448 (546.56 it/sec) -training >> step=2602100, episode=434 reward=0.7719494 (521.81 it/sec) -training >> step=2602200, episode=434 reward=0.7617487 (541.68 it/sec) -training >> step=2602300, episode=434 reward=0.7731746 (551.23 it/sec) -training >> step=2602400, episode=434 reward=0.7445922 (560.14 it/sec) -training >> step=2602500, episode=434 reward=0.769751 (580.39 it/sec) -training >> step=2602600, episode=434 reward=0.7789774 (515.54 it/sec) -training >> step=2602700, episode=434 reward=0.7738031 (523.11 it/sec) -training >> step=2602800, episode=434 reward=0.791084 (577.34 it/sec) -training >> step=2602900, episode=434 reward=0.7326336 (550.90 it/sec) -training >> step=2603000, episode=434 reward=0.7666766 (535.30 it/sec) -training >> step=2603100, episode=434 reward=0.7759157 (535.17 it/sec) -training >> step=2603200, episode=434 reward=0.7624183 (516.71 it/sec) -training >> step=2603300, episode=435 reward=0.7852573 (204.42 it/sec) -training >> step=2603400, episode=435 reward=0.7490907 (522.48 it/sec) -training >> step=2603500, episode=435 reward=0.7490677 (525.34 it/sec) -training >> step=2603600, episode=435 reward=0.768546 (563.95 it/sec) -training >> step=2603700, episode=435 reward=0.7400086 (524.38 it/sec) -training >> step=2603800, episode=435 reward=0.769991 (503.63 it/sec) -training >> step=2603900, episode=435 reward=0.7701759 (534.21 it/sec) -training >> step=2604000, episode=435 reward=0.7705914 (541.75 it/sec) -training >> step=2604100, episode=435 reward=0.7621324 (537.38 it/sec) -training >> step=2604200, episode=435 reward=0.7687435 (536.68 it/sec) -training >> step=2604300, episode=435 reward=0.7627716 (542.59 it/sec) -training >> step=2604400, episode=435 reward=0.7608198 (553.78 it/sec) -training >> step=2604500, episode=435 reward=0.7824129 (523.52 it/sec) -training >> step=2604600, episode=435 reward=0.7456884 (547.11 it/sec) -training >> step=2604700, episode=435 reward=0.7695086 (526.07 it/sec) -training >> step=2604800, episode=435 reward=0.7856348 (550.01 it/sec) -training >> step=2604900, episode=435 reward=0.782258 (555.30 it/sec) -training >> step=2605000, episode=435 reward=0.7717068 (548.58 it/sec) -training >> step=2605100, episode=435 reward=0.749247 (544.09 it/sec) -training >> step=2605200, episode=435 reward=0.7785446 (539.72 it/sec) -training >> step=2605300, episode=435 reward=0.77672 (530.71 it/sec) -training >> step=2605400, episode=435 reward=0.7631195 (503.71 it/sec) -training >> step=2605500, episode=435 reward=0.7706334 (582.20 it/sec) -training >> step=2605600, episode=435 reward=0.7472234 (543.42 it/sec) -training >> step=2605700, episode=435 reward=0.7641408 (532.54 it/sec) -training >> step=2605800, episode=435 reward=0.7623519 (547.31 it/sec) -training >> step=2605900, episode=435 reward=0.7774904 (544.11 it/sec) -training >> step=2606000, episode=435 reward=0.7637203 (556.09 it/sec) -training >> step=2606100, episode=435 reward=0.7873781 (551.75 it/sec) -training >> step=2606200, episode=435 reward=0.7744805 (558.24 it/sec) -training >> step=2606300, episode=435 reward=0.7406932 (524.61 it/sec) -training >> step=2606400, episode=435 reward=0.7665774 (486.70 it/sec) -training >> step=2606500, episode=435 reward=0.7818605 (544.33 it/sec) -training >> step=2606600, episode=435 reward=0.7745744 (558.24 it/sec) -training >> step=2606700, episode=435 reward=0.766671 (552.83 it/sec) -training >> step=2606800, episode=435 reward=0.7631004 (547.54 it/sec) -training >> step=2606900, episode=435 reward=0.7658634 (517.29 it/sec) -training >> step=2607000, episode=435 reward=0.766357 (385.89 it/sec) -training >> step=2607100, episode=435 reward=0.7856546 (576.81 it/sec) -training >> step=2607200, episode=435 reward=0.7918222 (540.32 it/sec) -training >> step=2607300, episode=435 reward=0.762099 (574.42 it/sec) -training >> step=2607400, episode=435 reward=0.7810974 (513.73 it/sec) -training >> step=2607500, episode=435 reward=0.7625601 (497.02 it/sec) -training >> step=2607600, episode=435 reward=0.7890664 (536.62 it/sec) -training >> step=2607700, episode=435 reward=0.7393481 (567.42 it/sec) -training >> step=2607800, episode=435 reward=0.7687984 (585.90 it/sec) -training >> step=2607900, episode=435 reward=0.7697634 (531.52 it/sec) -training >> step=2608000, episode=435 reward=0.75335 (485.62 it/sec) -training >> step=2608100, episode=435 reward=0.7653391 (554.05 it/sec) -training >> step=2608200, episode=435 reward=0.7729411 (562.12 it/sec) -training >> step=2608300, episode=435 reward=0.7474064 (559.02 it/sec) -training >> step=2608400, episode=435 reward=0.7343316 (542.96 it/sec) -training >> step=2608500, episode=435 reward=0.7733801 (542.59 it/sec) -training >> step=2608600, episode=435 reward=0.7662908 (578.86 it/sec) -training >> step=2608700, episode=435 reward=0.7775463 (552.70 it/sec) -training >> step=2608800, episode=435 reward=0.7449161 (545.28 it/sec) -training >> step=2608900, episode=435 reward=0.7652975 (541.18 it/sec) -training >> step=2609000, episode=435 reward=0.7526538 (554.71 it/sec) -training >> step=2609100, episode=435 reward=0.7888044 (482.01 it/sec) -training >> step=2609200, episode=435 reward=0.7621773 (521.33 it/sec) -training >> step=2609300, episode=436 reward=0.7519707 (209.18 it/sec) -training >> step=2609400, episode=436 reward=0.7567652 (553.94 it/sec) -training >> step=2609500, episode=436 reward=0.7537943 (537.08 it/sec) -training >> step=2609600, episode=436 reward=0.7394972 (547.95 it/sec) -training >> step=2609700, episode=436 reward=0.7596494 (563.77 it/sec) -training >> step=2609800, episode=436 reward=0.7738763 (567.78 it/sec) -training >> step=2609900, episode=436 reward=0.7583338 (536.27 it/sec) -training >> step=2610000, episode=436 reward=0.7672641 (505.30 it/sec) -training >> step=2610100, episode=436 reward=0.7559507 (472.78 it/sec) -training >> step=2610200, episode=436 reward=0.7599003 (517.98 it/sec) -training >> step=2610300, episode=436 reward=0.7632636 (548.85 it/sec) -training >> step=2610400, episode=436 reward=0.7644283 (519.61 it/sec) -training >> step=2610500, episode=436 reward=0.7724043 (513.59 it/sec) -training >> step=2610600, episode=436 reward=0.7662823 (519.78 it/sec) -training >> step=2610700, episode=436 reward=0.7635233 (523.41 it/sec) -training >> step=2610800, episode=436 reward=0.7585468 (528.77 it/sec) -training >> step=2610900, episode=436 reward=0.7910212 (503.33 it/sec) -training >> step=2611000, episode=436 reward=0.7775541 (540.71 it/sec) -training >> step=2611100, episode=436 reward=0.7539576 (529.10 it/sec) -training >> step=2611200, episode=436 reward=0.7684553 (481.46 it/sec) -training >> step=2611300, episode=436 reward=0.7792805 (537.65 it/sec) -training >> step=2611400, episode=436 reward=0.7745977 (517.21 it/sec) -training >> step=2611500, episode=436 reward=0.7609927 (534.75 it/sec) -training >> step=2611600, episode=436 reward=0.77885 (517.93 it/sec) -training >> step=2611700, episode=436 reward=0.7751407 (487.55 it/sec) -training >> step=2611800, episode=436 reward=0.7724305 (530.60 it/sec) -training >> step=2611900, episode=436 reward=0.7938763 (538.52 it/sec) -training >> step=2612000, episode=436 reward=0.7670262 (508.23 it/sec) -training >> step=2612100, episode=436 reward=0.7848499 (537.74 it/sec) -training >> step=2612200, episode=436 reward=0.7764501 (505.14 it/sec) -training >> step=2612300, episode=436 reward=0.7636461 (525.39 it/sec) -training >> step=2612400, episode=436 reward=0.754672 (521.61 it/sec) -training >> step=2612500, episode=436 reward=0.7845506 (535.14 it/sec) -training >> step=2612600, episode=436 reward=0.7761359 (530.95 it/sec) -training >> step=2612700, episode=436 reward=0.7684644 (532.20 it/sec) -training >> step=2612800, episode=436 reward=0.7717257 (528.89 it/sec) -training >> step=2612900, episode=436 reward=0.7743396 (569.00 it/sec) -training >> step=2613000, episode=436 reward=0.7644839 (484.96 it/sec) -training >> step=2613100, episode=436 reward=0.7675161 (525.81 it/sec) -training >> step=2613200, episode=436 reward=0.7592789 (554.33 it/sec) -training >> step=2613300, episode=436 reward=0.7725068 (431.33 it/sec) -training >> step=2613400, episode=436 reward=0.7718701 (564.96 it/sec) -training >> step=2613500, episode=436 reward=0.7621598 (541.27 it/sec) -training >> step=2613600, episode=436 reward=0.7769699 (543.18 it/sec) -training >> step=2613700, episode=436 reward=0.7705374 (511.41 it/sec) -training >> step=2613800, episode=436 reward=0.7552257 (476.94 it/sec) -training >> step=2613900, episode=436 reward=0.7703231 (539.41 it/sec) -training >> step=2614000, episode=436 reward=0.764208 (518.47 it/sec) -training >> step=2614100, episode=436 reward=0.7905434 (528.61 it/sec) -training >> step=2614200, episode=436 reward=0.760952 (525.54 it/sec) -training >> step=2614300, episode=436 reward=0.770199 (535.78 it/sec) -training >> step=2614400, episode=436 reward=0.7490193 (528.94 it/sec) -training >> step=2614500, episode=436 reward=0.7390274 (532.45 it/sec) -training >> step=2614600, episode=436 reward=0.7452042 (556.78 it/sec) -training >> step=2614700, episode=436 reward=0.7625221 (501.66 it/sec) -training >> step=2614800, episode=436 reward=0.764244 (561.18 it/sec) -training >> step=2614900, episode=436 reward=0.7526236 (519.59 it/sec) -training >> step=2615000, episode=436 reward=0.756852 (543.51 it/sec) -training >> step=2615100, episode=436 reward=0.7786809 (525.55 it/sec) -training >> step=2615200, episode=436 reward=0.7414894 (536.72 it/sec) -training >> step=2615300, episode=437 reward=0.7756817 (151.39 it/sec) -training >> step=2615400, episode=437 reward=0.7640046 (536.84 it/sec) -training >> step=2615500, episode=437 reward=0.7506022 (561.06 it/sec) -training >> step=2615600, episode=437 reward=0.7517179 (531.53 it/sec) -training >> step=2615700, episode=437 reward=0.7547153 (528.43 it/sec) -training >> step=2615800, episode=437 reward=0.7763734 (544.38 it/sec) -training >> step=2615900, episode=437 reward=0.7653978 (543.02 it/sec) -training >> step=2616000, episode=437 reward=0.7449132 (518.16 it/sec) -training >> step=2616100, episode=437 reward=0.7573226 (551.48 it/sec) -training >> step=2616200, episode=437 reward=0.7667357 (514.66 it/sec) -training >> step=2616300, episode=437 reward=0.7607595 (506.51 it/sec) -training >> step=2616400, episode=437 reward=0.7617482 (560.94 it/sec) -training >> step=2616500, episode=437 reward=0.7642673 (510.29 it/sec) -training >> step=2616600, episode=437 reward=0.7767456 (508.42 it/sec) -training >> step=2616700, episode=437 reward=0.7696424 (570.13 it/sec) -training >> step=2616800, episode=437 reward=0.7553051 (557.52 it/sec) -training >> step=2616900, episode=437 reward=0.7555326 (567.91 it/sec) -training >> step=2617000, episode=437 reward=0.7581789 (553.53 it/sec) -training >> step=2617100, episode=437 reward=0.7756613 (519.98 it/sec) -training >> step=2617200, episode=437 reward=0.7696451 (577.01 it/sec) -training >> step=2617300, episode=437 reward=0.7725694 (560.93 it/sec) -training >> step=2617400, episode=437 reward=0.7678661 (510.81 it/sec) -training >> step=2617500, episode=437 reward=0.775671 (559.88 it/sec) -training >> step=2617600, episode=437 reward=0.7813926 (541.71 it/sec) -training >> step=2617700, episode=437 reward=0.7804858 (498.20 it/sec) -training >> step=2617800, episode=437 reward=0.7691674 (540.09 it/sec) -training >> step=2617900, episode=437 reward=0.7636766 (509.44 it/sec) -training >> step=2618000, episode=437 reward=0.7803798 (541.99 it/sec) -training >> step=2618100, episode=437 reward=0.7639278 (532.66 it/sec) -training >> step=2618200, episode=437 reward=0.7447957 (524.80 it/sec) -training >> step=2618300, episode=437 reward=0.7599109 (504.27 it/sec) -training >> step=2618400, episode=437 reward=0.7764542 (539.12 it/sec) -training >> step=2618500, episode=437 reward=0.7636188 (538.62 it/sec) -training >> step=2618600, episode=437 reward=0.7728202 (533.85 it/sec) -training >> step=2618700, episode=437 reward=0.7536111 (549.48 it/sec) -training >> step=2618800, episode=437 reward=0.7689764 (518.55 it/sec) -training >> step=2618900, episode=437 reward=0.7526969 (526.79 it/sec) -training >> step=2619000, episode=437 reward=0.7851006 (554.93 it/sec) -training >> step=2619100, episode=437 reward=0.7791109 (569.62 it/sec) -training >> step=2619200, episode=437 reward=0.7659234 (531.89 it/sec) -training >> step=2619300, episode=437 reward=0.7642503 (528.55 it/sec) -training >> step=2619400, episode=437 reward=0.7593288 (379.24 it/sec) -training >> step=2619500, episode=437 reward=0.7653353 (547.77 it/sec) -training >> step=2619600, episode=437 reward=0.7806919 (519.29 it/sec) -training >> step=2619700, episode=437 reward=0.7655275 (531.93 it/sec) -training >> step=2619800, episode=437 reward=0.7616375 (501.45 it/sec) -training >> step=2619900, episode=437 reward=0.7415357 (545.24 it/sec) -training >> step=2620000, episode=437 reward=0.7580279 (520.88 it/sec) -training >> step=2620100, episode=437 reward=0.7940398 (478.34 it/sec) -training >> step=2620200, episode=437 reward=0.7757357 (538.96 it/sec) -training >> step=2620300, episode=437 reward=0.7635448 (513.83 it/sec) -training >> step=2620400, episode=437 reward=0.7786651 (458.66 it/sec) -training >> step=2620500, episode=437 reward=0.7547131 (507.65 it/sec) -training >> step=2620600, episode=437 reward=0.7812608 (526.47 it/sec) -training >> step=2620700, episode=437 reward=0.7650309 (530.08 it/sec) -training >> step=2620800, episode=437 reward=0.7600499 (530.97 it/sec) -training >> step=2620900, episode=437 reward=0.7575119 (512.43 it/sec) -training >> step=2621000, episode=437 reward=0.751353 (563.89 it/sec) -training >> step=2621100, episode=437 reward=0.7665486 (546.19 it/sec) -training >> step=2621200, episode=437 reward=0.7480743 (525.74 it/sec) -training >> step=2621300, episode=438 reward=0.7586381 (180.08 it/sec) -training >> step=2621400, episode=438 reward=0.7628213 (493.50 it/sec) -training >> step=2621500, episode=438 reward=0.7649038 (521.61 it/sec) -training >> step=2621600, episode=438 reward=0.7673466 (521.10 it/sec) -training >> step=2621700, episode=438 reward=0.7629061 (550.04 it/sec) -training >> step=2621800, episode=438 reward=0.7631054 (562.95 it/sec) -training >> step=2621900, episode=438 reward=0.7643722 (501.32 it/sec) -training >> step=2622000, episode=438 reward=0.7658768 (528.91 it/sec) -training >> step=2622100, episode=438 reward=0.78058 (554.75 it/sec) -training >> step=2622200, episode=438 reward=0.7699929 (553.55 it/sec) -training >> step=2622300, episode=438 reward=0.7674251 (499.11 it/sec) -training >> step=2622400, episode=438 reward=0.7893302 (517.78 it/sec) -training >> step=2622500, episode=438 reward=0.7634633 (543.93 it/sec) -training >> step=2622600, episode=438 reward=0.7557594 (540.15 it/sec) -training >> step=2622700, episode=438 reward=0.748012 (530.81 it/sec) -training >> step=2622800, episode=438 reward=0.7608926 (544.02 it/sec) -training >> step=2622900, episode=438 reward=0.7767457 (546.33 it/sec) -training >> step=2623000, episode=438 reward=0.7671608 (507.66 it/sec) -training >> step=2623100, episode=438 reward=0.7703294 (519.59 it/sec) -training >> step=2623200, episode=438 reward=0.7735385 (530.79 it/sec) -training >> step=2623300, episode=438 reward=0.7665447 (532.13 it/sec) -training >> step=2623400, episode=438 reward=0.770853 (542.24 it/sec) -training >> step=2623500, episode=438 reward=0.767822 (533.02 it/sec) -training >> step=2623600, episode=438 reward=0.7746449 (525.50 it/sec) -training >> step=2623700, episode=438 reward=0.7684373 (547.66 it/sec) -training >> step=2623800, episode=438 reward=0.7665458 (528.24 it/sec) -training >> step=2623900, episode=438 reward=0.7693841 (528.57 it/sec) -training >> step=2624000, episode=438 reward=0.7436956 (534.21 it/sec) -training >> step=2624100, episode=438 reward=0.7688736 (540.90 it/sec) -training >> step=2624200, episode=438 reward=0.7697517 (515.70 it/sec) -training >> step=2624300, episode=438 reward=0.7527265 (534.38 it/sec) -training >> step=2624400, episode=438 reward=0.7509285 (513.15 it/sec) -training >> step=2624500, episode=438 reward=0.7607431 (543.14 it/sec) -training >> step=2624600, episode=438 reward=0.7512627 (513.33 it/sec) -training >> step=2624700, episode=438 reward=0.769416 (523.50 it/sec) -training >> step=2624800, episode=438 reward=0.7791021 (546.64 it/sec) -training >> step=2624900, episode=438 reward=0.7737302 (517.57 it/sec) -training >> step=2625000, episode=438 reward=0.7651756 (521.08 it/sec) -training >> step=2625100, episode=438 reward=0.7516848 (518.83 it/sec) -training >> step=2625200, episode=438 reward=0.756902 (545.56 it/sec) -training >> step=2625300, episode=438 reward=0.7774546 (500.96 it/sec) -training >> step=2625400, episode=438 reward=0.758032 (505.74 it/sec) -training >> step=2625500, episode=438 reward=0.7773104 (478.22 it/sec) -training >> step=2625600, episode=438 reward=0.7547171 (351.94 it/sec) -training >> step=2625700, episode=438 reward=0.7856576 (524.09 it/sec) -training >> step=2625800, episode=438 reward=0.7721834 (555.14 it/sec) -training >> step=2625900, episode=438 reward=0.7465318 (512.22 it/sec) -training >> step=2626000, episode=438 reward=0.7260233 (563.94 it/sec) -training >> step=2626100, episode=438 reward=0.7575957 (523.75 it/sec) -training >> step=2626200, episode=438 reward=0.7534705 (530.21 it/sec) -training >> step=2626300, episode=438 reward=0.7774307 (530.61 it/sec) -training >> step=2626400, episode=438 reward=0.7659842 (547.09 it/sec) -training >> step=2626500, episode=438 reward=0.7615526 (537.30 it/sec) -training >> step=2626600, episode=438 reward=0.7903299 (516.75 it/sec) -training >> step=2626700, episode=438 reward=0.7587225 (538.09 it/sec) -training >> step=2626800, episode=438 reward=0.7709284 (518.66 it/sec) -training >> step=2626900, episode=438 reward=0.7350489 (542.79 it/sec) -training >> step=2627000, episode=438 reward=0.757812 (518.50 it/sec) -training >> step=2627100, episode=438 reward=0.7680706 (528.46 it/sec) -training >> step=2627200, episode=438 reward=0.7798734 (526.19 it/sec) -training >> step=2627300, episode=439 reward=0.7549787 (180.46 it/sec) -training >> step=2627400, episode=439 reward=0.7666026 (524.96 it/sec) -training >> step=2627500, episode=439 reward=0.7790489 (522.58 it/sec) -training >> step=2627600, episode=439 reward=0.7795649 (548.27 it/sec) -training >> step=2627700, episode=439 reward=0.7727406 (483.41 it/sec) -training >> step=2627800, episode=439 reward=0.7610813 (542.61 it/sec) -training >> step=2627900, episode=439 reward=0.7462983 (527.46 it/sec) -training >> step=2628000, episode=439 reward=0.7784595 (544.07 it/sec) -training >> step=2628100, episode=439 reward=0.774817 (525.75 it/sec) -training >> step=2628200, episode=439 reward=0.7464974 (521.06 it/sec) -training >> step=2628300, episode=439 reward=0.8022646 (586.39 it/sec) -training >> step=2628400, episode=439 reward=0.7941918 (523.52 it/sec) -training >> step=2628500, episode=439 reward=0.7646749 (513.70 it/sec) -training >> step=2628600, episode=439 reward=0.7836356 (527.47 it/sec) -training >> step=2628700, episode=439 reward=0.7736289 (541.13 it/sec) -training >> step=2628800, episode=439 reward=0.7756796 (513.72 it/sec) -training >> step=2628900, episode=439 reward=0.7788451 (543.12 it/sec) -training >> step=2629000, episode=439 reward=0.7631779 (541.89 it/sec) -training >> step=2629100, episode=439 reward=0.7815617 (527.63 it/sec) -training >> step=2629200, episode=439 reward=0.7735031 (546.42 it/sec) -training >> step=2629300, episode=439 reward=0.7613395 (518.10 it/sec) -training >> step=2629400, episode=439 reward=0.770279 (527.57 it/sec) -training >> step=2629500, episode=439 reward=0.7607437 (499.73 it/sec) -training >> step=2629600, episode=439 reward=0.7741144 (470.64 it/sec) -training >> step=2629700, episode=439 reward=0.7520066 (524.47 it/sec) -training >> step=2629800, episode=439 reward=0.7679033 (573.81 it/sec) -training >> step=2629900, episode=439 reward=0.7480323 (551.64 it/sec) -training >> step=2630000, episode=439 reward=0.7618129 (529.37 it/sec) -training >> step=2630100, episode=439 reward=0.751758 (532.35 it/sec) -training >> step=2630200, episode=439 reward=0.7678975 (517.95 it/sec) -training >> step=2630300, episode=439 reward=0.757433 (515.74 it/sec) -training >> step=2630400, episode=439 reward=0.7413246 (498.66 it/sec) -training >> step=2630500, episode=439 reward=0.7589356 (507.36 it/sec) -training >> step=2630600, episode=439 reward=0.7722628 (487.45 it/sec) -training >> step=2630700, episode=439 reward=0.7862245 (495.32 it/sec) -training >> step=2630800, episode=439 reward=0.7587576 (491.21 it/sec) -training >> step=2630900, episode=439 reward=0.7619348 (530.31 it/sec) -training >> step=2631000, episode=439 reward=0.770362 (541.49 it/sec) -training >> step=2631100, episode=439 reward=0.7834042 (513.08 it/sec) -training >> step=2631200, episode=439 reward=0.7466173 (533.04 it/sec) -training >> step=2631300, episode=439 reward=0.7796876 (560.76 it/sec) -training >> step=2631400, episode=439 reward=0.7746767 (529.77 it/sec) -training >> step=2631500, episode=439 reward=0.7369888 (527.80 it/sec) -training >> step=2631600, episode=439 reward=0.7742053 (528.00 it/sec) -training >> step=2631700, episode=439 reward=0.7861166 (483.63 it/sec) -training >> step=2631800, episode=439 reward=0.7762297 (387.23 it/sec) -training >> step=2631900, episode=439 reward=0.7534474 (530.05 it/sec) -training >> step=2632000, episode=439 reward=0.7719729 (550.23 it/sec) -training >> step=2632100, episode=439 reward=0.7692872 (545.57 it/sec) -training >> step=2632200, episode=439 reward=0.7822274 (534.74 it/sec) -training >> step=2632300, episode=439 reward=0.74896 (507.19 it/sec) -training >> step=2632400, episode=439 reward=0.7571991 (527.21 it/sec) -training >> step=2632500, episode=439 reward=0.766679 (517.55 it/sec) -training >> step=2632600, episode=439 reward=0.7663851 (521.22 it/sec) -training >> step=2632700, episode=439 reward=0.7611151 (498.54 it/sec) -training >> step=2632800, episode=439 reward=0.7596663 (536.32 it/sec) -training >> step=2632900, episode=439 reward=0.7507973 (533.76 it/sec) -training >> step=2633000, episode=439 reward=0.7485009 (558.56 it/sec) -training >> step=2633100, episode=439 reward=0.7730308 (553.44 it/sec) -training >> step=2633200, episode=439 reward=0.7575328 (521.09 it/sec) -training >> step=2633300, episode=440 reward=0.7672757 (188.19 it/sec) -training >> step=2633400, episode=440 reward=0.7909065 (543.72 it/sec) -training >> step=2633500, episode=440 reward=0.7623727 (556.99 it/sec) -training >> step=2633600, episode=440 reward=0.7612877 (533.58 it/sec) -training >> step=2633700, episode=440 reward=0.7663648 (530.06 it/sec) -training >> step=2633800, episode=440 reward=0.7717947 (510.42 it/sec) -training >> step=2633900, episode=440 reward=0.7993551 (530.45 it/sec) -training >> step=2634000, episode=440 reward=0.7827037 (566.98 it/sec) -training >> step=2634100, episode=440 reward=0.7630558 (547.40 it/sec) -training >> step=2634200, episode=440 reward=0.7614742 (450.35 it/sec) -training >> step=2634300, episode=440 reward=0.7565406 (505.33 it/sec) -training >> step=2634400, episode=440 reward=0.7675164 (552.76 it/sec) -training >> step=2634500, episode=440 reward=0.7593086 (524.65 it/sec) -training >> step=2634600, episode=440 reward=0.7649854 (549.76 it/sec) -training >> step=2634700, episode=440 reward=0.759473 (501.68 it/sec) -training >> step=2634800, episode=440 reward=0.7800648 (546.54 it/sec) -training >> step=2634900, episode=440 reward=0.7837043 (539.16 it/sec) -training >> step=2635000, episode=440 reward=0.7552515 (522.47 it/sec) -training >> step=2635100, episode=440 reward=0.7721278 (572.26 it/sec) -training >> step=2635200, episode=440 reward=0.7715917 (496.03 it/sec) -training >> step=2635300, episode=440 reward=0.780679 (531.85 it/sec) -training >> step=2635400, episode=440 reward=0.7557559 (531.31 it/sec) -training >> step=2635500, episode=440 reward=0.7683504 (526.13 it/sec) -training >> step=2635600, episode=440 reward=0.783623 (529.95 it/sec) -training >> step=2635700, episode=440 reward=0.7693015 (514.18 it/sec) -training >> step=2635800, episode=440 reward=0.7835724 (541.13 it/sec) -training >> step=2635900, episode=440 reward=0.7472596 (561.19 it/sec) -training >> step=2636000, episode=440 reward=0.7716676 (525.70 it/sec) -training >> step=2636100, episode=440 reward=0.7719724 (545.16 it/sec) -training >> step=2636200, episode=440 reward=0.75813 (566.62 it/sec) -training >> step=2636300, episode=440 reward=0.7732259 (527.54 it/sec) -training >> step=2636400, episode=440 reward=0.7785122 (568.37 it/sec) -training >> step=2636500, episode=440 reward=0.7699557 (541.20 it/sec) -training >> step=2636600, episode=440 reward=0.7656402 (539.52 it/sec) -training >> step=2636700, episode=440 reward=0.7541515 (547.05 it/sec) -training >> step=2636800, episode=440 reward=0.7610599 (555.62 it/sec) -training >> step=2636900, episode=440 reward=0.7647892 (545.64 it/sec) -training >> step=2637000, episode=440 reward=0.7557661 (539.75 it/sec) -training >> step=2637100, episode=440 reward=0.7847943 (577.89 it/sec) -training >> step=2637200, episode=440 reward=0.7654598 (528.66 it/sec) -training >> step=2637300, episode=440 reward=0.7669287 (569.88 it/sec) -training >> step=2637400, episode=440 reward=0.7641426 (530.67 it/sec) -training >> step=2637500, episode=440 reward=0.7666736 (553.54 it/sec) -training >> step=2637600, episode=440 reward=0.7665544 (525.55 it/sec) -training >> step=2637700, episode=440 reward=0.7680488 (531.99 it/sec) -training >> step=2637800, episode=440 reward=0.7745948 (542.71 it/sec) -training >> step=2637900, episode=440 reward=0.7714169 (525.47 it/sec) -training >> step=2638000, episode=440 reward=0.7552184 (508.13 it/sec) -training >> step=2638100, episode=440 reward=0.7562174 (403.06 it/sec) -training >> step=2638200, episode=440 reward=0.7805235 (556.18 it/sec) -training >> step=2638300, episode=440 reward=0.7707161 (544.43 it/sec) -training >> step=2638400, episode=440 reward=0.7598888 (511.79 it/sec) -training >> step=2638500, episode=440 reward=0.7700768 (547.11 it/sec) -training >> step=2638600, episode=440 reward=0.7803523 (570.34 it/sec) -training >> step=2638700, episode=440 reward=0.7658561 (516.54 it/sec) -training >> step=2638800, episode=440 reward=0.7674133 (546.32 it/sec) -training >> step=2638900, episode=440 reward=0.7646652 (535.97 it/sec) -training >> step=2639000, episode=440 reward=0.7809222 (571.85 it/sec) -training >> step=2639100, episode=440 reward=0.7447703 (560.46 it/sec) -training >> step=2639200, episode=440 reward=0.7482016 (534.67 it/sec) -training >> step=2639300, episode=441 reward=0.768439 (72.93 it/sec) -training >> step=2639400, episode=441 reward=0.7854648 (503.17 it/sec) -training >> step=2639500, episode=441 reward=0.7438857 (522.13 it/sec) -training >> step=2639600, episode=441 reward=0.7544329 (530.10 it/sec) -training >> step=2639700, episode=441 reward=0.7346924 (541.73 it/sec) -training >> step=2639800, episode=441 reward=0.7833246 (529.96 it/sec) -training >> step=2639900, episode=441 reward=0.7726963 (558.71 it/sec) -training >> step=2640000, episode=441 reward=0.7575586 (554.12 it/sec) -training >> step=2640100, episode=441 reward=0.7676294 (564.71 it/sec) -training >> step=2640200, episode=441 reward=0.7497592 (550.47 it/sec) -training >> step=2640300, episode=441 reward=0.7582633 (543.32 it/sec) -training >> step=2640400, episode=441 reward=0.7574549 (526.35 it/sec) -training >> step=2640500, episode=441 reward=0.7606367 (507.26 it/sec) -training >> step=2640600, episode=441 reward=0.7684289 (532.84 it/sec) -training >> step=2640700, episode=441 reward=0.7784342 (576.89 it/sec) -training >> step=2640800, episode=441 reward=0.7710266 (556.15 it/sec) -training >> step=2640900, episode=441 reward=0.7694994 (518.48 it/sec) -training >> step=2641000, episode=441 reward=0.7869724 (547.10 it/sec) -training >> step=2641100, episode=441 reward=0.7768735 (592.36 it/sec) -training >> step=2641200, episode=441 reward=0.7881274 (515.13 it/sec) -training >> step=2641300, episode=441 reward=0.763527 (528.75 it/sec) -training >> step=2641400, episode=441 reward=0.7708114 (531.24 it/sec) -training >> step=2641500, episode=441 reward=0.7716243 (511.76 it/sec) -training >> step=2641600, episode=441 reward=0.7415863 (554.67 it/sec) -training >> step=2641700, episode=441 reward=0.7701194 (565.76 it/sec) -training >> step=2641800, episode=441 reward=0.7592677 (525.50 it/sec) -training >> step=2641900, episode=441 reward=0.7756398 (530.75 it/sec) -training >> step=2642000, episode=441 reward=0.7593752 (553.15 it/sec) -training >> step=2642100, episode=441 reward=0.7770055 (543.73 it/sec) -training >> step=2642200, episode=441 reward=0.7599886 (567.61 it/sec) -training >> step=2642300, episode=441 reward=0.7706814 (505.07 it/sec) -training >> step=2642400, episode=441 reward=0.7542647 (552.75 it/sec) -training >> step=2642500, episode=441 reward=0.7705691 (566.74 it/sec) -training >> step=2642600, episode=441 reward=0.7617418 (567.99 it/sec) -training >> step=2642700, episode=441 reward=0.7737374 (569.63 it/sec) -training >> step=2642800, episode=441 reward=0.7642976 (559.79 it/sec) -training >> step=2642900, episode=441 reward=0.7864588 (554.26 it/sec) -training >> step=2643000, episode=441 reward=0.7599269 (560.64 it/sec) -training >> step=2643100, episode=441 reward=0.7729356 (560.49 it/sec) -training >> step=2643200, episode=441 reward=0.7934467 (588.41 it/sec) -training >> step=2643300, episode=441 reward=0.7723137 (529.61 it/sec) -training >> step=2643400, episode=441 reward=0.7516267 (564.58 it/sec) -training >> step=2643500, episode=441 reward=0.7663625 (566.63 it/sec) -training >> step=2643600, episode=441 reward=0.7746604 (551.02 it/sec) -training >> step=2643700, episode=441 reward=0.7557986 (558.09 it/sec) -training >> step=2643800, episode=441 reward=0.7483815 (530.07 it/sec) -training >> step=2643900, episode=441 reward=0.7655514 (588.84 it/sec) -training >> step=2644000, episode=441 reward=0.7885916 (554.50 it/sec) -training >> step=2644100, episode=441 reward=0.7609596 (510.74 it/sec) -training >> step=2644200, episode=441 reward=0.7480755 (561.54 it/sec) -training >> step=2644300, episode=441 reward=0.7685763 (378.61 it/sec) -training >> step=2644400, episode=441 reward=0.7644199 (542.60 it/sec) -training >> step=2644500, episode=441 reward=0.7646913 (570.88 it/sec) -training >> step=2644600, episode=441 reward=0.754433 (538.23 it/sec) -training >> step=2644700, episode=441 reward=0.7542505 (509.34 it/sec) -training >> step=2644800, episode=441 reward=0.7644259 (562.49 it/sec) -training >> step=2644900, episode=441 reward=0.7729185 (551.96 it/sec) -training >> step=2645000, episode=441 reward=0.7430227 (576.18 it/sec) -training >> step=2645100, episode=441 reward=0.7578676 (562.76 it/sec) -training >> step=2645200, episode=441 reward=0.7568123 (587.31 it/sec) -training >> step=2645300, episode=442 reward=0.7760586 (115.94 it/sec) -training >> step=2645400, episode=442 reward=0.77169 (447.62 it/sec) -training >> step=2645500, episode=442 reward=0.7789262 (506.65 it/sec) -training >> step=2645600, episode=442 reward=0.7454316 (544.21 it/sec) -training >> step=2645700, episode=442 reward=0.7648837 (528.98 it/sec) -training >> step=2645800, episode=442 reward=0.7740827 (557.30 it/sec) -training >> step=2645900, episode=442 reward=0.774029 (537.94 it/sec) -training >> step=2646000, episode=442 reward=0.7380007 (559.36 it/sec) -training >> step=2646100, episode=442 reward=0.7704487 (567.14 it/sec) -training >> step=2646200, episode=442 reward=0.7411952 (542.79 it/sec) -training >> step=2646300, episode=442 reward=0.7685395 (533.00 it/sec) -training >> step=2646400, episode=442 reward=0.7715026 (572.74 it/sec) -training >> step=2646500, episode=442 reward=0.7894724 (517.27 it/sec) -training >> step=2646600, episode=442 reward=0.7642478 (533.86 it/sec) -training >> step=2646700, episode=442 reward=0.7726179 (554.35 it/sec) -training >> step=2646800, episode=442 reward=0.7700983 (569.96 it/sec) -training >> step=2646900, episode=442 reward=0.7881305 (554.53 it/sec) -training >> step=2647000, episode=442 reward=0.7792448 (563.34 it/sec) -training >> step=2647100, episode=442 reward=0.7697417 (556.85 it/sec) -training >> step=2647200, episode=442 reward=0.7802074 (553.13 it/sec) -training >> step=2647300, episode=442 reward=0.7540731 (575.82 it/sec) -training >> step=2647400, episode=442 reward=0.7583551 (553.30 it/sec) -training >> step=2647500, episode=442 reward=0.7943984 (599.35 it/sec) -training >> step=2647600, episode=442 reward=0.7649174 (531.46 it/sec) -training >> step=2647700, episode=442 reward=0.7660091 (488.30 it/sec) -training >> step=2647800, episode=442 reward=0.7587882 (559.00 it/sec) -training >> step=2647900, episode=442 reward=0.7592473 (544.86 it/sec) -training >> step=2648000, episode=442 reward=0.7390805 (543.84 it/sec) -training >> step=2648100, episode=442 reward=0.7778209 (546.06 it/sec) -training >> step=2648200, episode=442 reward=0.7579514 (566.43 it/sec) -training >> step=2648300, episode=442 reward=0.7680731 (532.47 it/sec) -training >> step=2648400, episode=442 reward=0.7772737 (555.33 it/sec) -training >> step=2648500, episode=442 reward=0.7941537 (545.69 it/sec) -training >> step=2648600, episode=442 reward=0.7692978 (547.27 it/sec) -training >> step=2648700, episode=442 reward=0.7768235 (488.08 it/sec) -training >> step=2648800, episode=442 reward=0.8048067 (546.28 it/sec) -training >> step=2648900, episode=442 reward=0.767345 (542.19 it/sec) -training >> step=2649000, episode=442 reward=0.7649072 (522.85 it/sec) -training >> step=2649100, episode=442 reward=0.7914447 (572.38 it/sec) -training >> step=2649200, episode=442 reward=0.768906 (558.33 it/sec) -training >> step=2649300, episode=442 reward=0.7742071 (176.24 it/sec) -training >> step=2649400, episode=442 reward=0.7398528 (134.49 it/sec) -training >> step=2649500, episode=442 reward=0.7837 (390.25 it/sec) -training >> step=2649600, episode=442 reward=0.7777456 (396.80 it/sec) -training >> step=2649700, episode=442 reward=0.7738572 (372.98 it/sec) -training >> step=2649800, episode=442 reward=0.7571154 (372.46 it/sec) -training >> step=2649900, episode=442 reward=0.7584149 (363.85 it/sec) -training >> step=2650000, episode=442 reward=0.7681463 (433.54 it/sec) -training >> step=2650100, episode=442 reward=0.7803729 (435.34 it/sec) -training >> step=2650200, episode=442 reward=0.7711053 (427.89 it/sec) -training >> step=2650300, episode=442 reward=0.7880074 (413.10 it/sec) -training >> step=2650400, episode=442 reward=0.7727927 (431.68 it/sec) -training >> step=2650500, episode=442 reward=0.7576597 (378.32 it/sec) -training >> step=2650600, episode=442 reward=0.7596982 (353.03 it/sec) -training >> step=2650700, episode=442 reward=0.7821475 (332.53 it/sec) -training >> step=2650800, episode=442 reward=0.7653607 (392.23 it/sec) -training >> step=2650900, episode=442 reward=0.78364 (321.30 it/sec) -training >> step=2651000, episode=442 reward=0.7692296 (274.09 it/sec) -training >> step=2651100, episode=442 reward=0.7793298 (362.17 it/sec) -training >> step=2651200, episode=442 reward=0.7764068 (418.37 it/sec) -training >> step=2651300, episode=443 reward=0.7710985 (72.74 it/sec) -training >> step=2651400, episode=443 reward=0.7768725 (307.69 it/sec) -training >> step=2651500, episode=443 reward=0.7584158 (391.99 it/sec) -training >> step=2651600, episode=443 reward=0.7603061 (444.30 it/sec) -training >> step=2651700, episode=443 reward=0.7761862 (399.66 it/sec) -training >> step=2651800, episode=443 reward=0.734626 (389.46 it/sec) -training >> step=2651900, episode=443 reward=0.766485 (374.27 it/sec) -training >> step=2652000, episode=443 reward=0.7641054 (455.30 it/sec) -training >> step=2652100, episode=443 reward=0.7874581 (426.75 it/sec) -training >> step=2652200, episode=443 reward=0.7648523 (426.38 it/sec) -training >> step=2652300, episode=443 reward=0.7634383 (449.66 it/sec) -training >> step=2652400, episode=443 reward=0.7577757 (498.99 it/sec) -training >> step=2652500, episode=443 reward=0.767966 (492.80 it/sec) -training >> step=2652600, episode=443 reward=0.7474844 (501.05 it/sec) -training >> step=2652700, episode=443 reward=0.7827567 (501.49 it/sec) -training >> step=2652800, episode=443 reward=0.7911294 (571.54 it/sec) -training >> step=2652900, episode=443 reward=0.776596 (516.55 it/sec) -training >> step=2653000, episode=443 reward=0.7836108 (519.71 it/sec) -training >> step=2653100, episode=443 reward=0.7620066 (561.90 it/sec) -training >> step=2653200, episode=443 reward=0.7824938 (547.66 it/sec) -training >> step=2653300, episode=443 reward=0.7820864 (564.99 it/sec) -training >> step=2653400, episode=443 reward=0.7688429 (548.90 it/sec) -training >> step=2653500, episode=443 reward=0.7597058 (559.22 it/sec) -training >> step=2653600, episode=443 reward=0.7381757 (520.98 it/sec) -training >> step=2653700, episode=443 reward=0.7531168 (524.48 it/sec) -training >> step=2653800, episode=443 reward=0.767571 (529.06 it/sec) -training >> step=2653900, episode=443 reward=0.7733825 (561.39 it/sec) -training >> step=2654000, episode=443 reward=0.7565743 (527.05 it/sec) -training >> step=2654100, episode=443 reward=0.7802523 (521.22 it/sec) -training >> step=2654200, episode=443 reward=0.7901216 (521.12 it/sec) -training >> step=2654300, episode=443 reward=0.7599546 (553.33 it/sec) -training >> step=2654400, episode=443 reward=0.7716474 (527.51 it/sec) -training >> step=2654500, episode=443 reward=0.7842955 (553.45 it/sec) -training >> step=2654600, episode=443 reward=0.7669988 (562.05 it/sec) -training >> step=2654700, episode=443 reward=0.7712772 (509.55 it/sec) -training >> step=2654800, episode=443 reward=0.7837313 (487.55 it/sec) -training >> step=2654900, episode=443 reward=0.7597612 (530.88 it/sec) -training >> step=2655000, episode=443 reward=0.7783565 (536.61 it/sec) -training >> step=2655100, episode=443 reward=0.7575309 (569.95 it/sec) -training >> step=2655200, episode=443 reward=0.7553509 (516.43 it/sec) -training >> step=2655300, episode=443 reward=0.7565099 (519.52 it/sec) -training >> step=2655400, episode=443 reward=0.7765968 (532.37 it/sec) -training >> step=2655500, episode=443 reward=0.7736001 (537.07 it/sec) -training >> step=2655600, episode=443 reward=0.7724318 (529.14 it/sec) -training >> step=2655700, episode=443 reward=0.7702533 (535.47 it/sec) -training >> step=2655800, episode=443 reward=0.7703441 (542.12 it/sec) -training >> step=2655900, episode=443 reward=0.7572017 (549.87 it/sec) -training >> step=2656000, episode=443 reward=0.7716537 (533.13 it/sec) -training >> step=2656100, episode=443 reward=0.7856333 (529.81 it/sec) -training >> step=2656200, episode=443 reward=0.7781176 (590.42 it/sec) -training >> step=2656300, episode=443 reward=0.7429537 (535.84 it/sec) -training >> step=2656400, episode=443 reward=0.7929128 (537.43 it/sec) -training >> step=2656500, episode=443 reward=0.7581705 (549.08 it/sec) -training >> step=2656600, episode=443 reward=0.7787007 (538.91 it/sec) -training >> step=2656700, episode=443 reward=0.770247 (526.45 it/sec) -training >> step=2656800, episode=443 reward=0.7738461 (387.34 it/sec) -training >> step=2656900, episode=443 reward=0.7569381 (527.66 it/sec) -training >> step=2657000, episode=443 reward=0.7608683 (540.11 it/sec) -training >> step=2657100, episode=443 reward=0.7575131 (534.75 it/sec) -training >> step=2657200, episode=443 reward=0.7725274 (524.99 it/sec) -training >> step=2657300, episode=444 reward=0.7558165 (73.35 it/sec) -training >> step=2657400, episode=444 reward=0.7591081 (500.23 it/sec) -training >> step=2657500, episode=444 reward=0.7788149 (500.59 it/sec) -training >> step=2657600, episode=444 reward=0.783406 (494.66 it/sec) -training >> step=2657700, episode=444 reward=0.7456878 (541.90 it/sec) -training >> step=2657800, episode=444 reward=0.7606137 (516.24 it/sec) -training >> step=2657900, episode=444 reward=0.7612813 (527.42 it/sec) -training >> step=2658000, episode=444 reward=0.7710986 (518.89 it/sec) -training >> step=2658100, episode=444 reward=0.7811936 (480.49 it/sec) -training >> step=2658200, episode=444 reward=0.7712961 (516.18 it/sec) -training >> step=2658300, episode=444 reward=0.7642851 (496.22 it/sec) -training >> step=2658400, episode=444 reward=0.7661797 (362.32 it/sec) -training >> step=2658500, episode=444 reward=0.7774867 (401.72 it/sec) -training >> step=2658600, episode=444 reward=0.7756656 (538.18 it/sec) -training >> step=2658700, episode=444 reward=0.7806044 (515.47 it/sec) -training >> step=2658800, episode=444 reward=0.7828848 (552.08 it/sec) -training >> step=2658900, episode=444 reward=0.7992664 (533.22 it/sec) -training >> step=2659000, episode=444 reward=0.7703848 (555.00 it/sec) -training >> step=2659100, episode=444 reward=0.794055 (584.47 it/sec) -training >> step=2659200, episode=444 reward=0.7940571 (578.38 it/sec) -training >> step=2659300, episode=444 reward=0.7610247 (551.71 it/sec) -training >> step=2659400, episode=444 reward=0.7532377 (591.00 it/sec) -training >> step=2659500, episode=444 reward=0.7666861 (585.54 it/sec) -training >> step=2659600, episode=444 reward=0.766938 (610.69 it/sec) -training >> step=2659700, episode=444 reward=0.7931415 (581.39 it/sec) -training >> step=2659800, episode=444 reward=0.8029409 (576.16 it/sec) -training >> step=2659900, episode=444 reward=0.7805049 (611.78 it/sec) -training >> step=2660000, episode=444 reward=0.780645 (609.39 it/sec) -training >> step=2660100, episode=444 reward=0.7709786 (589.11 it/sec) -training >> step=2660200, episode=444 reward=0.7769277 (595.66 it/sec) -training >> step=2660300, episode=444 reward=0.7628835 (578.11 it/sec) -training >> step=2660400, episode=444 reward=0.7588431 (585.31 it/sec) -training >> step=2660500, episode=444 reward=0.7536641 (578.23 it/sec) -training >> step=2660600, episode=444 reward=0.7630064 (615.19 it/sec) -training >> step=2660700, episode=444 reward=0.7845877 (583.75 it/sec) -training >> step=2660800, episode=444 reward=0.792494 (516.82 it/sec) -training >> step=2660900, episode=444 reward=0.7581864 (563.91 it/sec) -training >> step=2661000, episode=444 reward=0.7715421 (552.91 it/sec) -training >> step=2661100, episode=444 reward=0.7681828 (529.66 it/sec) -training >> step=2661200, episode=444 reward=0.7662944 (524.77 it/sec) -training >> step=2661300, episode=444 reward=0.7918821 (530.25 it/sec) -training >> step=2661400, episode=444 reward=0.7609726 (543.04 it/sec) -training >> step=2661500, episode=444 reward=0.7500341 (564.61 it/sec) -training >> step=2661600, episode=444 reward=0.7937287 (509.33 it/sec) -training >> step=2661700, episode=444 reward=0.7796056 (541.25 it/sec) -training >> step=2661800, episode=444 reward=0.747682 (529.02 it/sec) -training >> step=2661900, episode=444 reward=0.7927994 (529.04 it/sec) -training >> step=2662000, episode=444 reward=0.7856337 (551.35 it/sec) -training >> step=2662100, episode=444 reward=0.7554641 (498.89 it/sec) -training >> step=2662200, episode=444 reward=0.7562519 (486.84 it/sec) -training >> step=2662300, episode=444 reward=0.781882 (505.04 it/sec) -training >> step=2662400, episode=444 reward=0.7529619 (532.29 it/sec) -training >> step=2662500, episode=444 reward=0.7552864 (547.15 it/sec) -training >> step=2662600, episode=444 reward=0.7860591 (484.93 it/sec) -training >> step=2662700, episode=444 reward=0.777187 (509.26 it/sec) -training >> step=2662800, episode=444 reward=0.764318 (501.19 it/sec) -training >> step=2662900, episode=444 reward=0.7830399 (553.07 it/sec) -training >> step=2663000, episode=444 reward=0.7509856 (415.46 it/sec) -training >> step=2663100, episode=444 reward=0.7714108 (532.58 it/sec) -training >> step=2663200, episode=444 reward=0.7583148 (511.23 it/sec) -training >> step=2663300, episode=445 reward=0.7798965 (72.17 it/sec) -training >> step=2663400, episode=445 reward=0.77692 (493.00 it/sec) -training >> step=2663500, episode=445 reward=0.7698981 (489.67 it/sec) -training >> step=2663600, episode=445 reward=0.7526254 (521.99 it/sec) -training >> step=2663700, episode=445 reward=0.7893314 (470.50 it/sec) -training >> step=2663800, episode=445 reward=0.7865317 (518.70 it/sec) -training >> step=2663900, episode=445 reward=0.7865314 (504.60 it/sec) -training >> step=2664000, episode=445 reward=0.782114 (534.84 it/sec) -training >> step=2664100, episode=445 reward=0.7731399 (512.05 it/sec) -training >> step=2664200, episode=445 reward=0.7900733 (509.73 it/sec) -training >> step=2664300, episode=445 reward=0.7720696 (529.24 it/sec) -training >> step=2664400, episode=445 reward=0.7876195 (492.51 it/sec) -training >> step=2664500, episode=445 reward=0.7661818 (515.21 it/sec) -training >> step=2664600, episode=445 reward=0.7735463 (531.78 it/sec) -training >> step=2664700, episode=445 reward=0.7691331 (531.51 it/sec) -training >> step=2664800, episode=445 reward=0.7619927 (487.54 it/sec) -training >> step=2664900, episode=445 reward=0.7890767 (521.11 it/sec) -training >> step=2665000, episode=445 reward=0.7642632 (563.55 it/sec) -training >> step=2665100, episode=445 reward=0.769969 (561.50 it/sec) -training >> step=2665200, episode=445 reward=0.7646803 (541.34 it/sec) -training >> step=2665300, episode=445 reward=0.7490072 (553.60 it/sec) -training >> step=2665400, episode=445 reward=0.7628847 (543.28 it/sec) -training >> step=2665500, episode=445 reward=0.7559065 (569.25 it/sec) -training >> step=2665600, episode=445 reward=0.7649465 (539.14 it/sec) -training >> step=2665700, episode=445 reward=0.7605019 (535.10 it/sec) -training >> step=2665800, episode=445 reward=0.7811019 (571.07 it/sec) -training >> step=2665900, episode=445 reward=0.7543136 (544.92 it/sec) -training >> step=2666000, episode=445 reward=0.7777475 (537.02 it/sec) -training >> step=2666100, episode=445 reward=0.7669357 (555.06 it/sec) -training >> step=2666200, episode=445 reward=0.7776851 (549.47 it/sec) -training >> step=2666300, episode=445 reward=0.7786765 (532.11 it/sec) -training >> step=2666400, episode=445 reward=0.7504147 (569.99 it/sec) -training >> step=2666500, episode=445 reward=0.7683566 (547.69 it/sec) -training >> step=2666600, episode=445 reward=0.7775012 (575.17 it/sec) -training >> step=2666700, episode=445 reward=0.755568 (524.51 it/sec) -training >> step=2666800, episode=445 reward=0.7940446 (511.72 it/sec) -training >> step=2666900, episode=445 reward=0.768333 (530.43 it/sec) -training >> step=2667000, episode=445 reward=0.7485311 (539.51 it/sec) -training >> step=2667100, episode=445 reward=0.7659985 (556.85 it/sec) -training >> step=2667200, episode=445 reward=0.7576051 (533.58 it/sec) -training >> step=2667300, episode=445 reward=0.7728142 (559.21 it/sec) -training >> step=2667400, episode=445 reward=0.7550479 (557.38 it/sec) -training >> step=2667500, episode=445 reward=0.7781309 (565.35 it/sec) -training >> step=2667600, episode=445 reward=0.7592116 (521.66 it/sec) -training >> step=2667700, episode=445 reward=0.7639434 (550.67 it/sec) -training >> step=2667800, episode=445 reward=0.7351231 (565.90 it/sec) -training >> step=2667900, episode=445 reward=0.7755703 (549.87 it/sec) -training >> step=2668000, episode=445 reward=0.7674715 (539.70 it/sec) -training >> step=2668100, episode=445 reward=0.7553783 (516.23 it/sec) -training >> step=2668200, episode=445 reward=0.7571982 (533.93 it/sec) -training >> step=2668300, episode=445 reward=0.7548969 (540.49 it/sec) -training >> step=2668400, episode=445 reward=0.7861066 (575.55 it/sec) -training >> step=2668500, episode=445 reward=0.7607798 (569.35 it/sec) -training >> step=2668600, episode=445 reward=0.7602695 (538.07 it/sec) -training >> step=2668700, episode=445 reward=0.7624419 (546.52 it/sec) -training >> step=2668800, episode=445 reward=0.7704892 (534.02 it/sec) -training >> step=2668900, episode=445 reward=0.7699167 (529.39 it/sec) -training >> step=2669000, episode=445 reward=0.7814805 (561.73 it/sec) -training >> step=2669100, episode=445 reward=0.7707818 (531.85 it/sec) -training >> step=2669200, episode=445 reward=0.7560425 (395.13 it/sec) -training >> step=2669300, episode=446 reward=0.7540098 (70.63 it/sec) -training >> step=2669400, episode=446 reward=0.7723486 (516.95 it/sec) -training >> step=2669500, episode=446 reward=0.7585056 (528.03 it/sec) -training >> step=2669600, episode=446 reward=0.7569158 (466.29 it/sec) -training >> step=2669700, episode=446 reward=0.752434 (484.87 it/sec) -training >> step=2669800, episode=446 reward=0.7682539 (438.57 it/sec) -training >> step=2669900, episode=446 reward=0.7766511 (525.11 it/sec) -training >> step=2670000, episode=446 reward=0.7669072 (504.88 it/sec) -training >> step=2670100, episode=446 reward=0.7748136 (459.89 it/sec) -training >> step=2670200, episode=446 reward=0.7935248 (521.36 it/sec) -training >> step=2670300, episode=446 reward=0.7718241 (507.37 it/sec) -training >> step=2670400, episode=446 reward=0.7592818 (483.12 it/sec) -training >> step=2670500, episode=446 reward=0.751113 (496.18 it/sec) -training >> step=2670600, episode=446 reward=0.7803354 (529.86 it/sec) -training >> step=2670700, episode=446 reward=0.765835 (465.57 it/sec) -training >> step=2670800, episode=446 reward=0.7581561 (520.96 it/sec) -training >> step=2670900, episode=446 reward=0.7655013 (524.26 it/sec) -training >> step=2671000, episode=446 reward=0.7643546 (548.33 it/sec) -training >> step=2671100, episode=446 reward=0.7736773 (544.56 it/sec) -training >> step=2671200, episode=446 reward=0.7667897 (547.18 it/sec) -training >> step=2671300, episode=446 reward=0.7819183 (522.44 it/sec) -training >> step=2671400, episode=446 reward=0.7792073 (533.49 it/sec) -training >> step=2671500, episode=446 reward=0.7809262 (568.89 it/sec) -training >> step=2671600, episode=446 reward=0.7568488 (489.40 it/sec) -training >> step=2671700, episode=446 reward=0.7743076 (534.71 it/sec) -training >> step=2671800, episode=446 reward=0.7767726 (552.17 it/sec) -training >> step=2671900, episode=446 reward=0.7641016 (519.66 it/sec) -training >> step=2672000, episode=446 reward=0.7774319 (524.15 it/sec) -training >> step=2672100, episode=446 reward=0.7661902 (537.89 it/sec) -training >> step=2672200, episode=446 reward=0.767037 (551.41 it/sec) -training >> step=2672300, episode=446 reward=0.7616167 (536.18 it/sec) -training >> step=2672400, episode=446 reward=0.7500961 (493.89 it/sec) -training >> step=2672500, episode=446 reward=0.7856992 (536.28 it/sec) -training >> step=2672600, episode=446 reward=0.7904837 (528.85 it/sec) -training >> step=2672700, episode=446 reward=0.7732178 (543.89 it/sec) -training >> step=2672800, episode=446 reward=0.768198 (558.25 it/sec) -training >> step=2672900, episode=446 reward=0.7736826 (543.18 it/sec) -training >> step=2673000, episode=446 reward=0.7444013 (553.57 it/sec) -training >> step=2673100, episode=446 reward=0.7757113 (570.37 it/sec) -training >> step=2673200, episode=446 reward=0.7585344 (561.48 it/sec) -training >> step=2673300, episode=446 reward=0.7520669 (528.96 it/sec) -training >> step=2673400, episode=446 reward=0.7706263 (506.81 it/sec) -training >> step=2673500, episode=446 reward=0.7477168 (522.80 it/sec) -training >> step=2673600, episode=446 reward=0.7867292 (579.33 it/sec) -training >> step=2673700, episode=446 reward=0.7823642 (541.65 it/sec) -training >> step=2673800, episode=446 reward=0.7567677 (521.96 it/sec) -training >> step=2673900, episode=446 reward=0.7699708 (506.15 it/sec) -training >> step=2674000, episode=446 reward=0.76357 (528.41 it/sec) -training >> step=2674100, episode=446 reward=0.7504256 (519.69 it/sec) -training >> step=2674200, episode=446 reward=0.7478741 (503.05 it/sec) -training >> step=2674300, episode=446 reward=0.7665688 (404.52 it/sec) -training >> step=2674400, episode=446 reward=0.7657474 (399.52 it/sec) -training >> step=2674500, episode=446 reward=0.7569878 (397.08 it/sec) -training >> step=2674600, episode=446 reward=0.7784137 (404.13 it/sec) -training >> step=2674700, episode=446 reward=0.7644333 (436.11 it/sec) -training >> step=2674800, episode=446 reward=0.7520711 (425.58 it/sec) -training >> step=2674900, episode=446 reward=0.7564276 (473.21 it/sec) -training >> step=2675000, episode=446 reward=0.7424456 (486.61 it/sec) -training >> step=2675100, episode=446 reward=0.7646952 (507.67 it/sec) -training >> step=2675200, episode=446 reward=0.7674747 (471.33 it/sec) -training >> step=2675300, episode=447 reward=0.7682812 (95.41 it/sec) -training >> step=2675400, episode=447 reward=0.7762548 (453.23 it/sec) -training >> step=2675500, episode=447 reward=0.7538212 (500.08 it/sec) -training >> step=2675600, episode=447 reward=0.770697 (512.59 it/sec) -training >> step=2675700, episode=447 reward=0.7606237 (547.70 it/sec) -training >> step=2675800, episode=447 reward=0.7518755 (468.19 it/sec) -training >> step=2675900, episode=447 reward=0.7518595 (329.77 it/sec) -training >> step=2676000, episode=447 reward=0.7889255 (369.08 it/sec) -training >> step=2676100, episode=447 reward=0.7593905 (442.51 it/sec) -training >> step=2676200, episode=447 reward=0.7597902 (531.28 it/sec) -training >> step=2676300, episode=447 reward=0.7620308 (389.57 it/sec) -training >> step=2676400, episode=447 reward=0.7934303 (449.87 it/sec) -training >> step=2676500, episode=447 reward=0.7741508 (406.05 it/sec) -training >> step=2676600, episode=447 reward=0.7795284 (449.11 it/sec) -training >> step=2676700, episode=447 reward=0.7626227 (447.14 it/sec) -training >> step=2676800, episode=447 reward=0.7512262 (428.47 it/sec) -training >> step=2676900, episode=447 reward=0.7734934 (385.53 it/sec) -training >> step=2677000, episode=447 reward=0.7668642 (427.43 it/sec) -training >> step=2677100, episode=447 reward=0.7742261 (422.40 it/sec) -training >> step=2677200, episode=447 reward=0.76638 (369.89 it/sec) -training >> step=2677300, episode=447 reward=0.7716588 (465.14 it/sec) -training >> step=2677400, episode=447 reward=0.7766172 (443.27 it/sec) -training >> step=2677500, episode=447 reward=0.773159 (479.68 it/sec) -training >> step=2677600, episode=447 reward=0.767423 (379.71 it/sec) -training >> step=2677700, episode=447 reward=0.7628703 (440.70 it/sec) -training >> step=2677800, episode=447 reward=0.7767726 (474.81 it/sec) -training >> step=2677900, episode=447 reward=0.764892 (476.21 it/sec) -training >> step=2678000, episode=447 reward=0.7631317 (467.86 it/sec) -training >> step=2678100, episode=447 reward=0.7824162 (418.36 it/sec) -training >> step=2678200, episode=447 reward=0.7654769 (488.41 it/sec) -training >> step=2678300, episode=447 reward=0.7494172 (504.09 it/sec) -training >> step=2678400, episode=447 reward=0.766678 (547.25 it/sec) -training >> step=2678500, episode=447 reward=0.7818472 (532.72 it/sec) -training >> step=2678600, episode=447 reward=0.7606866 (418.80 it/sec) -training >> step=2678700, episode=447 reward=0.743773 (294.81 it/sec) -training >> step=2678800, episode=447 reward=0.7631719 (312.70 it/sec) -training >> step=2678900, episode=447 reward=0.775562 (313.06 it/sec) -training >> step=2679000, episode=447 reward=0.7562342 (435.38 it/sec) -training >> step=2679100, episode=447 reward=0.7453336 (442.82 it/sec) -training >> step=2679200, episode=447 reward=0.7737389 (410.47 it/sec) -training >> step=2679300, episode=447 reward=0.7761984 (443.82 it/sec) -training >> step=2679400, episode=447 reward=0.7598596 (393.03 it/sec) -training >> step=2679500, episode=447 reward=0.7840261 (454.92 it/sec) -training >> step=2679600, episode=447 reward=0.7773288 (441.32 it/sec) -training >> step=2679700, episode=447 reward=0.7660702 (505.32 it/sec) -training >> step=2679800, episode=447 reward=0.741635 (467.78 it/sec) -training >> step=2679900, episode=447 reward=0.7645169 (465.18 it/sec) -training >> step=2680000, episode=447 reward=0.771352 (489.69 it/sec) -training >> step=2680100, episode=447 reward=0.7734875 (500.62 it/sec) -training >> step=2680200, episode=447 reward=0.7621416 (496.97 it/sec) -training >> step=2680300, episode=447 reward=0.7751716 (527.44 it/sec) -training >> step=2680400, episode=447 reward=0.745134 (467.07 it/sec) -training >> step=2680500, episode=447 reward=0.7757809 (491.45 it/sec) -training >> step=2680600, episode=447 reward=0.7638635 (501.43 it/sec) -training >> step=2680700, episode=447 reward=0.7478378 (482.43 it/sec) -training >> step=2680800, episode=447 reward=0.7856705 (517.87 it/sec) -training >> step=2680900, episode=447 reward=0.7805859 (469.46 it/sec) -training >> step=2681000, episode=447 reward=0.7834029 (385.55 it/sec) -training >> step=2681100, episode=447 reward=0.7508217 (478.13 it/sec) -training >> step=2681200, episode=447 reward=0.7750212 (506.08 it/sec) -training >> step=2681300, episode=448 reward=0.7584186 (239.64 it/sec) -training >> step=2681400, episode=448 reward=0.7641614 (526.80 it/sec) -training >> step=2681500, episode=448 reward=0.7857275 (532.32 it/sec) -training >> step=2681600, episode=448 reward=0.7700029 (536.91 it/sec) -training >> step=2681700, episode=448 reward=0.7723471 (509.39 it/sec) -training >> step=2681800, episode=448 reward=0.7760386 (479.55 it/sec) -training >> step=2681900, episode=448 reward=0.7598737 (494.82 it/sec) -training >> step=2682000, episode=448 reward=0.7688814 (539.05 it/sec) -training >> step=2682100, episode=448 reward=0.7655662 (485.57 it/sec) -training >> step=2682200, episode=448 reward=0.7627056 (531.87 it/sec) -training >> step=2682300, episode=448 reward=0.7693588 (483.49 it/sec) -training >> step=2682400, episode=448 reward=0.7498954 (529.12 it/sec) -training >> step=2682500, episode=448 reward=0.7469631 (452.93 it/sec) -training >> step=2682600, episode=448 reward=0.765624 (490.40 it/sec) -training >> step=2682700, episode=448 reward=0.7720419 (479.80 it/sec) -training >> step=2682800, episode=448 reward=0.768025 (512.85 it/sec) -training >> step=2682900, episode=448 reward=0.7640201 (455.83 it/sec) -training >> step=2683000, episode=448 reward=0.7722546 (478.33 it/sec) -training >> step=2683100, episode=448 reward=0.7817022 (480.85 it/sec) -training >> step=2683200, episode=448 reward=0.7734203 (468.21 it/sec) -training >> step=2683300, episode=448 reward=0.775039 (475.15 it/sec) -training >> step=2683400, episode=448 reward=0.7803205 (470.15 it/sec) -training >> step=2683500, episode=448 reward=0.7695551 (504.84 it/sec) -training >> step=2683600, episode=448 reward=0.790013 (492.13 it/sec) -training >> step=2683700, episode=448 reward=0.7619075 (474.21 it/sec) -training >> step=2683800, episode=448 reward=0.763542 (425.13 it/sec) -training >> step=2683900, episode=448 reward=0.7774734 (387.15 it/sec) -training >> step=2684000, episode=448 reward=0.773083 (436.96 it/sec) -training >> step=2684100, episode=448 reward=0.7770305 (391.15 it/sec) -training >> step=2684200, episode=448 reward=0.7728138 (382.53 it/sec) -training >> step=2684300, episode=448 reward=0.7945701 (422.71 it/sec) -training >> step=2684400, episode=448 reward=0.7477341 (464.96 it/sec) -training >> step=2684500, episode=448 reward=0.7636506 (441.86 it/sec) -training >> step=2684600, episode=448 reward=0.7840802 (494.76 it/sec) -training >> step=2684700, episode=448 reward=0.7620554 (462.13 it/sec) -training >> step=2684800, episode=448 reward=0.7709545 (472.28 it/sec) -training >> step=2684900, episode=448 reward=0.7800856 (543.42 it/sec) -training >> step=2685000, episode=448 reward=0.7862175 (540.76 it/sec) -training >> step=2685100, episode=448 reward=0.7656664 (528.40 it/sec) -training >> step=2685200, episode=448 reward=0.762508 (529.04 it/sec) -training >> step=2685300, episode=448 reward=0.7668508 (554.20 it/sec) -training >> step=2685400, episode=448 reward=0.752719 (513.68 it/sec) -training >> step=2685500, episode=448 reward=0.7803409 (568.90 it/sec) -training >> step=2685600, episode=448 reward=0.7730786 (542.61 it/sec) -training >> step=2685700, episode=448 reward=0.7586115 (556.55 it/sec) -training >> step=2685800, episode=448 reward=0.7612733 (513.02 it/sec) -training >> step=2685900, episode=448 reward=0.7664091 (479.19 it/sec) -training >> step=2686000, episode=448 reward=0.7669008 (537.76 it/sec) -training >> step=2686100, episode=448 reward=0.7702027 (468.88 it/sec) -training >> step=2686200, episode=448 reward=0.7512662 (535.66 it/sec) -training >> step=2686300, episode=448 reward=0.7841625 (492.87 it/sec) -training >> step=2686400, episode=448 reward=0.7405867 (524.88 it/sec) -training >> step=2686500, episode=448 reward=0.7468107 (546.16 it/sec) -training >> step=2686600, episode=448 reward=0.7614149 (513.26 it/sec) -training >> step=2686700, episode=448 reward=0.7529137 (526.20 it/sec) -training >> step=2686800, episode=448 reward=0.7707897 (554.34 it/sec) -training >> step=2686900, episode=448 reward=0.7635782 (535.47 it/sec) -training >> step=2687000, episode=448 reward=0.7655613 (446.92 it/sec) -training >> step=2687100, episode=448 reward=0.7717504 (502.19 it/sec) -training >> step=2687200, episode=448 reward=0.7662998 (508.17 it/sec) -training >> step=2687300, episode=449 reward=0.7830331 (95.48 it/sec) -training >> step=2687400, episode=449 reward=0.759368 (515.45 it/sec) -training >> step=2687500, episode=449 reward=0.757382 (511.73 it/sec) -training >> step=2687600, episode=449 reward=0.7667367 (493.67 it/sec) -training >> step=2687700, episode=449 reward=0.7448955 (461.45 it/sec) -training >> step=2687800, episode=449 reward=0.7596564 (503.28 it/sec) -training >> step=2687900, episode=449 reward=0.7343625 (455.47 it/sec) -training >> step=2688000, episode=449 reward=0.774902 (467.29 it/sec) -training >> step=2688100, episode=449 reward=0.7579 (452.63 it/sec) -training >> step=2688200, episode=449 reward=0.7758876 (447.86 it/sec) -training >> step=2688300, episode=449 reward=0.757288 (482.04 it/sec) -training >> step=2688400, episode=449 reward=0.775214 (445.80 it/sec) -training >> step=2688500, episode=449 reward=0.7754893 (442.34 it/sec) -training >> step=2688600, episode=449 reward=0.7801855 (456.39 it/sec) -training >> step=2688700, episode=449 reward=0.7738972 (399.44 it/sec) -training >> step=2688800, episode=449 reward=0.7607223 (477.34 it/sec) -training >> step=2688900, episode=449 reward=0.7567971 (423.16 it/sec) -training >> step=2689000, episode=449 reward=0.7739502 (486.55 it/sec) -training >> step=2689100, episode=449 reward=0.7914709 (490.36 it/sec) -training >> step=2689200, episode=449 reward=0.7724503 (483.82 it/sec) -training >> step=2689300, episode=449 reward=0.7396558 (510.51 it/sec) -training >> step=2689400, episode=449 reward=0.7549554 (478.36 it/sec) -training >> step=2689500, episode=449 reward=0.763944 (475.77 it/sec) -training >> step=2689600, episode=449 reward=0.7818061 (495.52 it/sec) -training >> step=2689700, episode=449 reward=0.7734061 (455.57 it/sec) -training >> step=2689800, episode=449 reward=0.7817474 (486.26 it/sec) -training >> step=2689900, episode=449 reward=0.779186 (474.65 it/sec) -training >> step=2690000, episode=449 reward=0.7551536 (484.82 it/sec) -training >> step=2690100, episode=449 reward=0.771481 (500.31 it/sec) -training >> step=2690200, episode=449 reward=0.7790159 (521.56 it/sec) -training >> step=2690300, episode=449 reward=0.7954469 (487.07 it/sec) -training >> step=2690400, episode=449 reward=0.7587607 (448.26 it/sec) -training >> step=2690500, episode=449 reward=0.7582961 (549.75 it/sec) -training >> step=2690600, episode=449 reward=0.7783483 (463.98 it/sec) -training >> step=2690700, episode=449 reward=0.7622529 (546.68 it/sec) -training >> step=2690800, episode=449 reward=0.7692288 (529.45 it/sec) -training >> step=2690900, episode=449 reward=0.778577 (529.04 it/sec) -training >> step=2691000, episode=449 reward=0.7593823 (502.95 it/sec) -training >> step=2691100, episode=449 reward=0.7413658 (491.06 it/sec) -training >> step=2691200, episode=449 reward=0.7703497 (495.81 it/sec) -training >> step=2691300, episode=449 reward=0.767176 (545.36 it/sec) -training >> step=2691400, episode=449 reward=0.7782643 (464.25 it/sec) -training >> step=2691500, episode=449 reward=0.7616264 (433.54 it/sec) -training >> step=2691600, episode=449 reward=0.7902815 (496.05 it/sec) -training >> step=2691700, episode=449 reward=0.7585883 (512.12 it/sec) -training >> step=2691800, episode=449 reward=0.7891852 (409.40 it/sec) -training >> step=2691900, episode=449 reward=0.7682914 (446.59 it/sec) -training >> step=2692000, episode=449 reward=0.768391 (547.22 it/sec) -training >> step=2692100, episode=449 reward=0.7528921 (521.91 it/sec) -training >> step=2692200, episode=449 reward=0.7644742 (428.18 it/sec) -training >> step=2692300, episode=449 reward=0.768772 (501.45 it/sec) -training >> step=2692400, episode=449 reward=0.7914781 (493.37 it/sec) -training >> step=2692500, episode=449 reward=0.7743435 (483.24 it/sec) -training >> step=2692600, episode=449 reward=0.7571853 (465.78 it/sec) -training >> step=2692700, episode=449 reward=0.7588718 (529.26 it/sec) -training >> step=2692800, episode=449 reward=0.7490467 (471.06 it/sec) -training >> step=2692900, episode=449 reward=0.7542582 (452.59 it/sec) -training >> step=2693000, episode=449 reward=0.7841766 (402.06 it/sec) -training >> step=2693100, episode=449 reward=0.7781538 (460.56 it/sec) -training >> step=2693200, episode=449 reward=0.7578877 (445.68 it/sec) -training >> step=2693300, episode=450 reward=0.7735097 (89.93 it/sec) -training >> step=2693400, episode=450 reward=0.7562062 (427.00 it/sec) -training >> step=2693500, episode=450 reward=0.7744063 (425.51 it/sec) -training >> step=2693600, episode=450 reward=0.755399 (457.57 it/sec) -training >> step=2693700, episode=450 reward=0.7875578 (517.99 it/sec) -training >> step=2693800, episode=450 reward=0.7574142 (492.14 it/sec) -training >> step=2693900, episode=450 reward=0.7381746 (522.74 it/sec) -training >> step=2694000, episode=450 reward=0.7521744 (542.07 it/sec) -training >> step=2694100, episode=450 reward=0.7604692 (500.89 it/sec) -training >> step=2694200, episode=450 reward=0.7361265 (547.66 it/sec) -training >> step=2694300, episode=450 reward=0.7790318 (537.07 it/sec) -training >> step=2694400, episode=450 reward=0.7778175 (511.43 it/sec) -training >> step=2694500, episode=450 reward=0.7743731 (529.37 it/sec) -training >> step=2694600, episode=450 reward=0.7606409 (506.84 it/sec) -training >> step=2694700, episode=450 reward=0.7754626 (512.09 it/sec) -training >> step=2694800, episode=450 reward=0.7729155 (453.63 it/sec) -training >> step=2694900, episode=450 reward=0.7614318 (465.48 it/sec) -training >> step=2695000, episode=450 reward=0.7617464 (475.22 it/sec) -training >> step=2695100, episode=450 reward=0.7452552 (518.30 it/sec) -training >> step=2695200, episode=450 reward=0.7704711 (506.93 it/sec) -training >> step=2695300, episode=450 reward=0.7729036 (504.18 it/sec) -training >> step=2695400, episode=450 reward=0.7703599 (478.83 it/sec) -training >> step=2695500, episode=450 reward=0.7697949 (480.62 it/sec) -training >> step=2695600, episode=450 reward=0.7690749 (490.08 it/sec) -training >> step=2695700, episode=450 reward=0.7765963 (478.26 it/sec) -training >> step=2695800, episode=450 reward=0.7742923 (504.24 it/sec) -training >> step=2695900, episode=450 reward=0.7669539 (474.97 it/sec) -training >> step=2696000, episode=450 reward=0.7980041 (495.31 it/sec) -training >> step=2696100, episode=450 reward=0.7564006 (517.49 it/sec) -training >> step=2696200, episode=450 reward=0.7690414 (524.78 it/sec) -training >> step=2696300, episode=450 reward=0.7718692 (521.87 it/sec) -training >> step=2696400, episode=450 reward=0.7593002 (477.01 it/sec) -training >> step=2696500, episode=450 reward=0.7718173 (535.24 it/sec) -training >> step=2696600, episode=450 reward=0.7897295 (471.05 it/sec) -training >> step=2696700, episode=450 reward=0.7781757 (528.77 it/sec) -training >> step=2696800, episode=450 reward=0.7790275 (480.04 it/sec) -training >> step=2696900, episode=450 reward=0.8032428 (536.64 it/sec) -training >> step=2697000, episode=450 reward=0.780824 (486.58 it/sec) -training >> step=2697100, episode=450 reward=0.7641917 (511.33 it/sec) -training >> step=2697200, episode=450 reward=0.7508729 (504.48 it/sec) -training >> step=2697300, episode=450 reward=0.7573605 (538.05 it/sec) -training >> step=2697400, episode=450 reward=0.7882611 (456.51 it/sec) -training >> step=2697500, episode=450 reward=0.7685547 (490.96 it/sec) -training >> step=2697600, episode=450 reward=0.7731845 (531.45 it/sec) -training >> step=2697700, episode=450 reward=0.7560217 (503.98 it/sec) -training >> step=2697800, episode=450 reward=0.7847581 (467.18 it/sec) -training >> step=2697900, episode=450 reward=0.7727594 (484.20 it/sec) -training >> step=2698000, episode=450 reward=0.7631981 (517.89 it/sec) -training >> step=2698100, episode=450 reward=0.7570835 (450.67 it/sec) -training >> step=2698200, episode=450 reward=0.7696241 (416.41 it/sec) -training >> step=2698300, episode=450 reward=0.7731199 (425.89 it/sec) -training >> step=2698400, episode=450 reward=0.7655861 (475.70 it/sec) -training >> step=2698500, episode=450 reward=0.7619972 (494.51 it/sec) -training >> step=2698600, episode=450 reward=0.7553555 (529.94 it/sec) -training >> step=2698700, episode=450 reward=0.787641 (445.31 it/sec) -training >> step=2698800, episode=450 reward=0.7591963 (365.26 it/sec) -training >> step=2698900, episode=450 reward=0.7793183 (393.25 it/sec) -training >> step=2699000, episode=450 reward=0.7712888 (446.18 it/sec) -training >> step=2699100, episode=450 reward=0.765063 (437.61 it/sec) -training >> step=2699200, episode=450 reward=0.763853 (435.55 it/sec) -training >> step=2699300, episode=451 reward=0.7718785 (56.31 it/sec) -training >> step=2699400, episode=451 reward=0.7548755 (524.59 it/sec) -training >> step=2699500, episode=451 reward=0.7666618 (499.07 it/sec) -training >> step=2699600, episode=451 reward=0.756187 (526.21 it/sec) -training >> step=2699700, episode=451 reward=0.7578285 (517.70 it/sec) -training >> step=2699800, episode=451 reward=0.7421157 (510.81 it/sec) -training >> step=2699900, episode=451 reward=0.7474806 (539.65 it/sec) -training >> step=2700000, episode=451 reward=0.773203 (546.83 it/sec) -training >> step=2700100, episode=451 reward=0.7650865 (495.04 it/sec) -training >> step=2700200, episode=451 reward=0.7752714 (505.62 it/sec) -training >> step=2700300, episode=451 reward=0.765147 (514.34 it/sec) -training >> step=2700400, episode=451 reward=0.7677838 (514.65 it/sec) -training >> step=2700500, episode=451 reward=0.7700492 (527.60 it/sec) -training >> step=2700600, episode=451 reward=0.7874687 (529.02 it/sec) -training >> step=2700700, episode=451 reward=0.7692808 (475.53 it/sec) -training >> step=2700800, episode=451 reward=0.7692526 (464.80 it/sec) -training >> step=2700900, episode=451 reward=0.777971 (524.35 it/sec) -training >> step=2701000, episode=451 reward=0.7580565 (514.29 it/sec) -training >> step=2701100, episode=451 reward=0.7778943 (534.48 it/sec) -training >> step=2701200, episode=451 reward=0.7862632 (511.00 it/sec) -training >> step=2701300, episode=451 reward=0.7749385 (521.88 it/sec) -training >> step=2701400, episode=451 reward=0.7854349 (507.00 it/sec) -training >> step=2701500, episode=451 reward=0.7646567 (532.35 it/sec) -training >> step=2701600, episode=451 reward=0.7723249 (506.50 it/sec) -training >> step=2701700, episode=451 reward=0.7771225 (569.16 it/sec) -training >> step=2701800, episode=451 reward=0.768922 (516.88 it/sec) -training >> step=2701900, episode=451 reward=0.7738801 (517.62 it/sec) -training >> step=2702000, episode=451 reward=0.7824823 (531.24 it/sec) -training >> step=2702100, episode=451 reward=0.7745603 (530.98 it/sec) -training >> step=2702200, episode=451 reward=0.757401 (531.94 it/sec) -training >> step=2702300, episode=451 reward=0.7816577 (528.98 it/sec) -training >> step=2702400, episode=451 reward=0.7713912 (545.75 it/sec) -training >> step=2702500, episode=451 reward=0.7540867 (541.36 it/sec) -training >> step=2702600, episode=451 reward=0.7886185 (521.11 it/sec) -training >> step=2702700, episode=451 reward=0.7503259 (528.26 it/sec) -training >> step=2702800, episode=451 reward=0.7543832 (516.19 it/sec) -training >> step=2702900, episode=451 reward=0.759106 (504.09 it/sec) -training >> step=2703000, episode=451 reward=0.7747894 (463.12 it/sec) -training >> step=2703100, episode=451 reward=0.7723938 (554.26 it/sec) -training >> step=2703200, episode=451 reward=0.7657056 (513.46 it/sec) -training >> step=2703300, episode=451 reward=0.7896144 (497.00 it/sec) -training >> step=2703400, episode=451 reward=0.7828475 (449.70 it/sec) -training >> step=2703500, episode=451 reward=0.774587 (524.98 it/sec) -training >> step=2703600, episode=451 reward=0.7737015 (530.47 it/sec) -training >> step=2703700, episode=451 reward=0.758032 (514.17 it/sec) -training >> step=2703800, episode=451 reward=0.7437059 (505.78 it/sec) -training >> step=2703900, episode=451 reward=0.7691152 (487.49 it/sec) -training >> step=2704000, episode=451 reward=0.7541777 (491.46 it/sec) -training >> step=2704100, episode=451 reward=0.777204 (483.20 it/sec) -training >> step=2704200, episode=451 reward=0.7639732 (539.59 it/sec) -training >> step=2704300, episode=451 reward=0.7624241 (518.21 it/sec) -training >> step=2704400, episode=451 reward=0.7530559 (466.93 it/sec) -training >> step=2704500, episode=451 reward=0.7683156 (482.98 it/sec) -training >> step=2704600, episode=451 reward=0.7536276 (504.84 it/sec) -training >> step=2704700, episode=451 reward=0.7654364 (518.60 it/sec) -training >> step=2704800, episode=451 reward=0.7582645 (546.72 it/sec) -training >> step=2704900, episode=451 reward=0.7710384 (571.33 it/sec) -training >> step=2705000, episode=451 reward=0.7559151 (538.31 it/sec) -training >> step=2705100, episode=451 reward=0.7696224 (505.56 it/sec) -training >> step=2705200, episode=451 reward=0.7725799 (508.07 it/sec) -training >> step=2705300, episode=452 reward=0.7394998 (61.40 it/sec) -training >> step=2705400, episode=452 reward=0.7575608 (514.19 it/sec) -training >> step=2705500, episode=452 reward=0.7460959 (558.95 it/sec) -training >> step=2705600, episode=452 reward=0.7710242 (491.78 it/sec) -training >> step=2705700, episode=452 reward=0.7697221 (538.20 it/sec) -training >> step=2705800, episode=452 reward=0.7602115 (567.98 it/sec) -training >> step=2705900, episode=452 reward=0.7423359 (539.35 it/sec) -training >> step=2706000, episode=452 reward=0.7558083 (503.78 it/sec) -training >> step=2706100, episode=452 reward=0.7666115 (519.26 it/sec) -training >> step=2706200, episode=452 reward=0.7734798 (549.19 it/sec) -training >> step=2706300, episode=452 reward=0.774538 (481.45 it/sec) -training >> step=2706400, episode=452 reward=0.7807215 (542.63 it/sec) -training >> step=2706500, episode=452 reward=0.7711177 (551.48 it/sec) -training >> step=2706600, episode=452 reward=0.7578393 (485.43 it/sec) -training >> step=2706700, episode=452 reward=0.7681075 (509.80 it/sec) -training >> step=2706800, episode=452 reward=0.7567076 (500.47 it/sec) -training >> step=2706900, episode=452 reward=0.767141 (530.79 it/sec) -training >> step=2707000, episode=452 reward=0.7530485 (522.02 it/sec) -training >> step=2707100, episode=452 reward=0.7717842 (499.42 it/sec) -training >> step=2707200, episode=452 reward=0.7862561 (531.70 it/sec) -training >> step=2707300, episode=452 reward=0.7730354 (519.29 it/sec) -training >> step=2707400, episode=452 reward=0.7810192 (539.41 it/sec) -training >> step=2707500, episode=452 reward=0.8006108 (487.70 it/sec) -training >> step=2707600, episode=452 reward=0.7712864 (525.69 it/sec) -training >> step=2707700, episode=452 reward=0.7647659 (525.30 it/sec) -training >> step=2707800, episode=452 reward=0.7675061 (537.34 it/sec) -training >> step=2707900, episode=452 reward=0.7620722 (547.90 it/sec) -training >> step=2708000, episode=452 reward=0.7908049 (514.32 it/sec) -training >> step=2708100, episode=452 reward=0.7875574 (528.79 it/sec) -training >> step=2708200, episode=452 reward=0.7735279 (560.96 it/sec) -training >> step=2708300, episode=452 reward=0.7540414 (510.28 it/sec) -training >> step=2708400, episode=452 reward=0.7708326 (523.64 it/sec) -training >> step=2708500, episode=452 reward=0.7660827 (499.19 it/sec) -training >> step=2708600, episode=452 reward=0.7813082 (506.35 it/sec) -training >> step=2708700, episode=452 reward=0.7713513 (506.09 it/sec) -training >> step=2708800, episode=452 reward=0.7417061 (553.56 it/sec) -training >> step=2708900, episode=452 reward=0.7443214 (528.72 it/sec) -training >> step=2709000, episode=452 reward=0.7675223 (538.36 it/sec) -training >> step=2709100, episode=452 reward=0.7820856 (529.21 it/sec) -training >> step=2709200, episode=452 reward=0.7758955 (524.65 it/sec) -training >> step=2709300, episode=452 reward=0.7643613 (569.72 it/sec) -training >> step=2709400, episode=452 reward=0.7686535 (558.97 it/sec) -training >> step=2709500, episode=452 reward=0.7767152 (537.37 it/sec) -training >> step=2709600, episode=452 reward=0.7553344 (523.60 it/sec) -training >> step=2709700, episode=452 reward=0.7700055 (499.03 it/sec) -training >> step=2709800, episode=452 reward=0.756546 (519.14 it/sec) -training >> step=2709900, episode=452 reward=0.7603979 (495.24 it/sec) -training >> step=2710000, episode=452 reward=0.7414843 (530.04 it/sec) -training >> step=2710100, episode=452 reward=0.7780629 (555.54 it/sec) -training >> step=2710200, episode=452 reward=0.7570918 (511.40 it/sec) -training >> step=2710300, episode=452 reward=0.7683845 (548.27 it/sec) -training >> step=2710400, episode=452 reward=0.7841453 (548.33 it/sec) -training >> step=2710500, episode=452 reward=0.7659333 (505.26 it/sec) -training >> step=2710600, episode=452 reward=0.7424213 (533.23 it/sec) -training >> step=2710700, episode=452 reward=0.7667958 (534.20 it/sec) -training >> step=2710800, episode=452 reward=0.7591866 (549.02 it/sec) -training >> step=2710900, episode=452 reward=0.7586651 (552.01 it/sec) -training >> step=2711000, episode=452 reward=0.7583818 (521.25 it/sec) -training >> step=2711100, episode=452 reward=0.7611439 (540.01 it/sec) -training >> step=2711200, episode=452 reward=0.7680253 (519.25 it/sec) -training >> step=2711300, episode=453 reward=0.7601752 (88.79 it/sec) -training >> step=2711400, episode=453 reward=0.7406111 (549.91 it/sec) -training >> step=2711500, episode=453 reward=0.7764171 (521.92 it/sec) -training >> step=2711600, episode=453 reward=0.7327397 (525.09 it/sec) -training >> step=2711700, episode=453 reward=0.7762582 (549.50 it/sec) -training >> step=2711800, episode=453 reward=0.7470077 (514.40 it/sec) -training >> step=2711900, episode=453 reward=0.7413959 (512.21 it/sec) -training >> step=2712000, episode=453 reward=0.7860794 (517.67 it/sec) -training >> step=2712100, episode=453 reward=0.7833562 (569.35 it/sec) -training >> step=2712200, episode=453 reward=0.7588591 (554.67 it/sec) -training >> step=2712300, episode=453 reward=0.782519 (540.85 it/sec) -training >> step=2712400, episode=453 reward=0.7731317 (523.41 it/sec) -training >> step=2712500, episode=453 reward=0.770727 (467.35 it/sec) -training >> step=2712600, episode=453 reward=0.764298 (477.29 it/sec) -training >> step=2712700, episode=453 reward=0.77351 (494.54 it/sec) -training >> step=2712800, episode=453 reward=0.7883406 (537.68 it/sec) -training >> step=2712900, episode=453 reward=0.7420748 (500.47 it/sec) -training >> step=2713000, episode=453 reward=0.7760042 (506.91 it/sec) -training >> step=2713100, episode=453 reward=0.7677261 (563.56 it/sec) -training >> step=2713200, episode=453 reward=0.763247 (524.67 it/sec) -training >> step=2713300, episode=453 reward=0.7622944 (503.51 it/sec) -training >> step=2713400, episode=453 reward=0.7730941 (515.45 it/sec) -training >> step=2713500, episode=453 reward=0.7740347 (551.87 it/sec) -training >> step=2713600, episode=453 reward=0.8036175 (501.65 it/sec) -training >> step=2713700, episode=453 reward=0.7710263 (522.57 it/sec) -training >> step=2713800, episode=453 reward=0.7619785 (531.01 it/sec) -training >> step=2713900, episode=453 reward=0.7843555 (582.69 it/sec) -training >> step=2714000, episode=453 reward=0.7861928 (526.36 it/sec) -training >> step=2714100, episode=453 reward=0.785884 (497.97 it/sec) -training >> step=2714200, episode=453 reward=0.7789761 (526.43 it/sec) -training >> step=2714300, episode=453 reward=0.760115 (502.64 it/sec) -training >> step=2714400, episode=453 reward=0.7772261 (507.91 it/sec) -training >> step=2714500, episode=453 reward=0.7711853 (456.29 it/sec) -training >> step=2714600, episode=453 reward=0.7666644 (506.69 it/sec) -training >> step=2714700, episode=453 reward=0.7698491 (473.98 it/sec) -training >> step=2714800, episode=453 reward=0.7663338 (495.67 it/sec) -training >> step=2714900, episode=453 reward=0.7690197 (460.18 it/sec) -training >> step=2715000, episode=453 reward=0.7598122 (495.85 it/sec) -training >> step=2715100, episode=453 reward=0.7783203 (468.13 it/sec) -training >> step=2715200, episode=453 reward=0.7708319 (503.31 it/sec) -training >> step=2715300, episode=453 reward=0.769691 (484.15 it/sec) -training >> step=2715400, episode=453 reward=0.7713674 (475.70 it/sec) -training >> step=2715500, episode=453 reward=0.7595624 (479.56 it/sec) -training >> step=2715600, episode=453 reward=0.7619146 (466.77 it/sec) -training >> step=2715700, episode=453 reward=0.750871 (572.47 it/sec) -training >> step=2715800, episode=453 reward=0.7748634 (542.38 it/sec) -training >> step=2715900, episode=453 reward=0.732247 (407.44 it/sec) -training >> step=2716000, episode=453 reward=0.7637876 (535.82 it/sec) -training >> step=2716100, episode=453 reward=0.766289 (436.10 it/sec) -training >> step=2716200, episode=453 reward=0.7579689 (456.99 it/sec) -training >> step=2716300, episode=453 reward=0.7740225 (471.48 it/sec) -training >> step=2716400, episode=453 reward=0.7756931 (502.44 it/sec) -training >> step=2716500, episode=453 reward=0.7731912 (428.77 it/sec) -training >> step=2716600, episode=453 reward=0.7611139 (450.31 it/sec) -training >> step=2716700, episode=453 reward=0.7478458 (477.61 it/sec) -training >> step=2716800, episode=453 reward=0.7662786 (497.95 it/sec) -training >> step=2716900, episode=453 reward=0.7789621 (448.81 it/sec) -training >> step=2717000, episode=453 reward=0.7830227 (463.52 it/sec) -training >> step=2717100, episode=453 reward=0.7631045 (475.66 it/sec) -training >> step=2717200, episode=453 reward=0.7649258 (484.67 it/sec) -training >> step=2717300, episode=454 reward=0.7678159 (74.10 it/sec) -training >> step=2717400, episode=454 reward=0.7747054 (498.16 it/sec) -training >> step=2717500, episode=454 reward=0.7535848 (355.11 it/sec) -training >> step=2717600, episode=454 reward=0.7605243 (402.80 it/sec) -training >> step=2717700, episode=454 reward=0.7571645 (458.40 it/sec) -training >> step=2717800, episode=454 reward=0.7768224 (458.78 it/sec) -training >> step=2717900, episode=454 reward=0.7553172 (481.05 it/sec) -training >> step=2718000, episode=454 reward=0.7856401 (539.14 it/sec) -training >> step=2718100, episode=454 reward=0.7752813 (534.26 it/sec) -training >> step=2718200, episode=454 reward=0.768933 (506.87 it/sec) -training >> step=2718300, episode=454 reward=0.7713568 (529.29 it/sec) -training >> step=2718400, episode=454 reward=0.7747098 (527.15 it/sec) -training >> step=2718500, episode=454 reward=0.7612764 (532.33 it/sec) -training >> step=2718600, episode=454 reward=0.7686906 (530.27 it/sec) -training >> step=2718700, episode=454 reward=0.7568279 (518.91 it/sec) -training >> step=2718800, episode=454 reward=0.7736555 (508.81 it/sec) -training >> step=2718900, episode=454 reward=0.7626576 (483.42 it/sec) -training >> step=2719000, episode=454 reward=0.7763017 (501.35 it/sec) -training >> step=2719100, episode=454 reward=0.7508867 (478.99 it/sec) -training >> step=2719200, episode=454 reward=0.7723077 (414.75 it/sec) -training >> step=2719300, episode=454 reward=0.7673365 (373.50 it/sec) -training >> step=2719400, episode=454 reward=0.7695789 (411.27 it/sec) -training >> step=2719500, episode=454 reward=0.7524885 (402.30 it/sec) -training >> step=2719600, episode=454 reward=0.7516171 (433.08 it/sec) -training >> step=2719700, episode=454 reward=0.7539759 (452.85 it/sec) -training >> step=2719800, episode=454 reward=0.7724288 (513.43 it/sec) -training >> step=2719900, episode=454 reward=0.7503259 (481.40 it/sec) -training >> step=2720000, episode=454 reward=0.7828794 (524.20 it/sec) -training >> step=2720100, episode=454 reward=0.7702191 (529.08 it/sec) -training >> step=2720200, episode=454 reward=0.7717873 (521.28 it/sec) -training >> step=2720300, episode=454 reward=0.7739261 (485.71 it/sec) -training >> step=2720400, episode=454 reward=0.7550411 (491.68 it/sec) -training >> step=2720500, episode=454 reward=0.776482 (514.00 it/sec) -training >> step=2720600, episode=454 reward=0.7625945 (504.89 it/sec) -training >> step=2720700, episode=454 reward=0.7716439 (465.28 it/sec) -training >> step=2720800, episode=454 reward=0.7721468 (492.37 it/sec) -training >> step=2720900, episode=454 reward=0.7643988 (508.52 it/sec) -training >> step=2721000, episode=454 reward=0.8067888 (524.55 it/sec) -training >> step=2721100, episode=454 reward=0.7804783 (487.83 it/sec) -training >> step=2721200, episode=454 reward=0.7852461 (507.22 it/sec) -training >> step=2721300, episode=454 reward=0.7847504 (492.90 it/sec) -training >> step=2721400, episode=454 reward=0.7627611 (502.65 it/sec) -training >> step=2721500, episode=454 reward=0.774204 (471.32 it/sec) -training >> step=2721600, episode=454 reward=0.7584251 (493.39 it/sec) -training >> step=2721700, episode=454 reward=0.7905182 (482.23 it/sec) -training >> step=2721800, episode=454 reward=0.7634377 (466.94 it/sec) -training >> step=2721900, episode=454 reward=0.7509701 (523.89 it/sec) -training >> step=2722000, episode=454 reward=0.7538052 (482.97 it/sec) -training >> step=2722100, episode=454 reward=0.7693936 (516.46 it/sec) -training >> step=2722200, episode=454 reward=0.7562349 (458.10 it/sec) -training >> step=2722300, episode=454 reward=0.7683983 (513.51 it/sec) -training >> step=2722400, episode=454 reward=0.764007 (506.83 it/sec) -training >> step=2722500, episode=454 reward=0.7709295 (496.58 it/sec) -training >> step=2722600, episode=454 reward=0.7662298 (488.04 it/sec) -training >> step=2722700, episode=454 reward=0.7753425 (491.82 it/sec) -training >> step=2722800, episode=454 reward=0.751658 (497.78 it/sec) -training >> step=2722900, episode=454 reward=0.7807623 (499.83 it/sec) -training >> step=2723000, episode=454 reward=0.7707881 (538.76 it/sec) -training >> step=2723100, episode=454 reward=0.7708644 (492.64 it/sec) -training >> step=2723200, episode=454 reward=0.7713919 (509.59 it/sec) -training >> step=2723300, episode=455 reward=0.7659008 (65.72 it/sec) -training >> step=2723400, episode=455 reward=0.7658527 (493.91 it/sec) -training >> step=2723500, episode=455 reward=0.7632454 (503.75 it/sec) -training >> step=2723600, episode=455 reward=0.753006 (463.80 it/sec) -training >> step=2723700, episode=455 reward=0.7574087 (502.48 it/sec) -training >> step=2723800, episode=455 reward=0.739314 (478.92 it/sec) -training >> step=2723900, episode=455 reward=0.7546352 (506.81 it/sec) -training >> step=2724000, episode=455 reward=0.7314779 (514.61 it/sec) -training >> step=2724100, episode=455 reward=0.7463996 (522.89 it/sec) -training >> step=2724200, episode=455 reward=0.7590235 (532.72 it/sec) -training >> step=2724300, episode=455 reward=0.7617056 (467.50 it/sec) -training >> step=2724400, episode=455 reward=0.7712208 (477.69 it/sec) -training >> step=2724500, episode=455 reward=0.7819018 (488.04 it/sec) -training >> step=2724600, episode=455 reward=0.7484468 (519.89 it/sec) -training >> step=2724700, episode=455 reward=0.7744851 (539.49 it/sec) -training >> step=2724800, episode=455 reward=0.7631423 (467.11 it/sec) -training >> step=2724900, episode=455 reward=0.7707444 (473.75 it/sec) -training >> step=2725000, episode=455 reward=0.7747626 (483.21 it/sec) -training >> step=2725100, episode=455 reward=0.7668697 (459.55 it/sec) -training >> step=2725200, episode=455 reward=0.762289 (484.09 it/sec) -training >> step=2725300, episode=455 reward=0.7509668 (529.71 it/sec) -training >> step=2725400, episode=455 reward=0.7834468 (516.46 it/sec) -training >> step=2725500, episode=455 reward=0.7840374 (554.89 it/sec) -training >> step=2725600, episode=455 reward=0.7694526 (491.52 it/sec) -training >> step=2725700, episode=455 reward=0.7523738 (538.10 it/sec) -training >> step=2725800, episode=455 reward=0.7583283 (521.93 it/sec) -training >> step=2725900, episode=455 reward=0.7720081 (546.89 it/sec) -training >> step=2726000, episode=455 reward=0.7796203 (522.90 it/sec) -training >> step=2726100, episode=455 reward=0.7766655 (497.46 it/sec) -training >> step=2726200, episode=455 reward=0.7566481 (520.61 it/sec) -training >> step=2726300, episode=455 reward=0.7721392 (503.62 it/sec) -training >> step=2726400, episode=455 reward=0.7826632 (491.83 it/sec) -training >> step=2726500, episode=455 reward=0.7659507 (504.42 it/sec) -training >> step=2726600, episode=455 reward=0.7705834 (563.64 it/sec) -training >> step=2726700, episode=455 reward=0.7733501 (512.08 it/sec) -training >> step=2726800, episode=455 reward=0.7667014 (477.32 it/sec) -training >> step=2726900, episode=455 reward=0.7632085 (511.13 it/sec) -training >> step=2727000, episode=455 reward=0.749532 (505.01 it/sec) -training >> step=2727100, episode=455 reward=0.7703822 (515.42 it/sec) -training >> step=2727200, episode=455 reward=0.7670144 (460.38 it/sec) -training >> step=2727300, episode=455 reward=0.7655479 (511.15 it/sec) -training >> step=2727400, episode=455 reward=0.7660391 (478.71 it/sec) -training >> step=2727500, episode=455 reward=0.777536 (491.75 it/sec) -training >> step=2727600, episode=455 reward=0.784832 (497.03 it/sec) -training >> step=2727700, episode=455 reward=0.7524697 (560.84 it/sec) -training >> step=2727800, episode=455 reward=0.77308 (520.08 it/sec) -training >> step=2727900, episode=455 reward=0.7638509 (495.99 it/sec) -training >> step=2728000, episode=455 reward=0.764613 (494.24 it/sec) -training >> step=2728100, episode=455 reward=0.77357 (538.55 it/sec) -training >> step=2728200, episode=455 reward=0.7778479 (534.35 it/sec) -training >> step=2728300, episode=455 reward=0.7585098 (548.69 it/sec) -training >> step=2728400, episode=455 reward=0.7700656 (511.35 it/sec) -training >> step=2728500, episode=455 reward=0.7623782 (534.88 it/sec) -training >> step=2728600, episode=455 reward=0.7820828 (448.90 it/sec) -training >> step=2728700, episode=455 reward=0.7723619 (520.14 it/sec) -training >> step=2728800, episode=455 reward=0.7596398 (496.38 it/sec) -training >> step=2728900, episode=455 reward=0.7721274 (455.10 it/sec) -training >> step=2729000, episode=455 reward=0.7630911 (495.65 it/sec) -training >> step=2729100, episode=455 reward=0.7728184 (452.95 it/sec) -training >> step=2729200, episode=455 reward=0.764864 (477.98 it/sec) -training >> step=2729300, episode=456 reward=0.7625641 (72.03 it/sec) -training >> step=2729400, episode=456 reward=0.7538048 (527.42 it/sec) -training >> step=2729500, episode=456 reward=0.7848459 (464.79 it/sec) -training >> step=2729600, episode=456 reward=0.7805369 (537.62 it/sec) -training >> step=2729700, episode=456 reward=0.754882 (502.24 it/sec) -training >> step=2729800, episode=456 reward=0.7551951 (457.19 it/sec) -training >> step=2729900, episode=456 reward=0.7638692 (475.63 it/sec) -training >> step=2730000, episode=456 reward=0.7454993 (503.27 it/sec) -training >> step=2730100, episode=456 reward=0.7558059 (492.73 it/sec) -training >> step=2730200, episode=456 reward=0.751224 (539.03 it/sec) -training >> step=2730300, episode=456 reward=0.7773174 (541.19 it/sec) -training >> step=2730400, episode=456 reward=0.7901654 (515.32 it/sec) -training >> step=2730500, episode=456 reward=0.7689793 (525.15 it/sec) -training >> step=2730600, episode=456 reward=0.7846962 (564.08 it/sec) -training >> step=2730700, episode=456 reward=0.7889785 (526.77 it/sec) -training >> step=2730800, episode=456 reward=0.7921042 (539.42 it/sec) -training >> step=2730900, episode=456 reward=0.7816213 (552.71 it/sec) -training >> step=2731000, episode=456 reward=0.7499824 (465.97 it/sec) -training >> step=2731100, episode=456 reward=0.7536619 (500.18 it/sec) -training >> step=2731200, episode=456 reward=0.7761547 (488.80 it/sec) -training >> step=2731300, episode=456 reward=0.8079899 (434.76 it/sec) -training >> step=2731400, episode=456 reward=0.7646803 (513.77 it/sec) -training >> step=2731500, episode=456 reward=0.7483302 (499.55 it/sec) -training >> step=2731600, episode=456 reward=0.772943 (569.57 it/sec) -training >> step=2731700, episode=456 reward=0.7489691 (555.27 it/sec) -training >> step=2731800, episode=456 reward=0.7923551 (538.79 it/sec) -training >> step=2731900, episode=456 reward=0.7635679 (529.31 it/sec) -training >> step=2732000, episode=456 reward=0.7783374 (504.22 it/sec) -training >> step=2732100, episode=456 reward=0.7719433 (499.46 it/sec) -training >> step=2732200, episode=456 reward=0.7552606 (480.25 it/sec) -training >> step=2732300, episode=456 reward=0.7944766 (503.76 it/sec) -training >> step=2732400, episode=456 reward=0.7773339 (425.62 it/sec) -training >> step=2732500, episode=456 reward=0.7556966 (433.21 it/sec) -training >> step=2732600, episode=456 reward=0.764087 (488.30 it/sec) -training >> step=2732700, episode=456 reward=0.7708954 (503.99 it/sec) -training >> step=2732800, episode=456 reward=0.7681404 (470.51 it/sec) -training >> step=2732900, episode=456 reward=0.7468362 (482.18 it/sec) -training >> step=2733000, episode=456 reward=0.761288 (501.48 it/sec) -training >> step=2733100, episode=456 reward=0.7865586 (517.41 it/sec) -training >> step=2733200, episode=456 reward=0.7399867 (487.06 it/sec) -training >> step=2733300, episode=456 reward=0.7531855 (462.29 it/sec) -training >> step=2733400, episode=456 reward=0.7913405 (504.23 it/sec) -training >> step=2733500, episode=456 reward=0.7715801 (450.63 it/sec) -training >> step=2733600, episode=456 reward=0.7667588 (383.10 it/sec) -training >> step=2733700, episode=456 reward=0.7712069 (401.00 it/sec) -training >> step=2733800, episode=456 reward=0.758122 (432.49 it/sec) -training >> step=2733900, episode=456 reward=0.7754386 (375.39 it/sec) -training >> step=2734000, episode=456 reward=0.7486287 (392.75 it/sec) -training >> step=2734100, episode=456 reward=0.7860962 (457.43 it/sec) -training >> step=2734200, episode=456 reward=0.798449 (440.81 it/sec) -training >> step=2734300, episode=456 reward=0.7676467 (429.38 it/sec) -training >> step=2734400, episode=456 reward=0.7723225 (465.17 it/sec) -training >> step=2734500, episode=456 reward=0.7533129 (510.51 it/sec) -training >> step=2734600, episode=456 reward=0.7768981 (432.45 it/sec) -training >> step=2734700, episode=456 reward=0.7614572 (407.81 it/sec) -training >> step=2734800, episode=456 reward=0.761196 (402.80 it/sec) -training >> step=2734900, episode=456 reward=0.76475 (414.47 it/sec) -training >> step=2735000, episode=456 reward=0.7458311 (409.28 it/sec) -training >> step=2735100, episode=456 reward=0.7646784 (435.57 it/sec) -training >> step=2735200, episode=456 reward=0.7855745 (479.78 it/sec) -training >> step=2735300, episode=457 reward=0.7786145 (59.62 it/sec) -training >> step=2735400, episode=457 reward=0.7615974 (457.56 it/sec) -training >> step=2735500, episode=457 reward=0.7473032 (492.62 it/sec) -training >> step=2735600, episode=457 reward=0.755253 (524.95 it/sec) -training >> step=2735700, episode=457 reward=0.7556906 (521.57 it/sec) -training >> step=2735800, episode=457 reward=0.7743223 (481.55 it/sec) -training >> step=2735900, episode=457 reward=0.762486 (442.51 it/sec) -training >> step=2736000, episode=457 reward=0.7560844 (428.12 it/sec) -training >> step=2736100, episode=457 reward=0.7677204 (468.05 it/sec) -training >> step=2736200, episode=457 reward=0.7606491 (523.13 it/sec) -training >> step=2736300, episode=457 reward=0.7959436 (478.86 it/sec) -training >> step=2736400, episode=457 reward=0.7869423 (458.05 it/sec) -training >> step=2736500, episode=457 reward=0.7759197 (540.02 it/sec) -training >> step=2736600, episode=457 reward=0.7796987 (518.92 it/sec) -training >> step=2736700, episode=457 reward=0.7535414 (517.84 it/sec) -training >> step=2736800, episode=457 reward=0.7618077 (500.46 it/sec) -training >> step=2736900, episode=457 reward=0.7542751 (511.39 it/sec) -training >> step=2737000, episode=457 reward=0.7556349 (441.26 it/sec) -training >> step=2737100, episode=457 reward=0.7782868 (521.47 it/sec) -training >> step=2737200, episode=457 reward=0.7794147 (516.39 it/sec) -training >> step=2737300, episode=457 reward=0.781433 (466.05 it/sec) -training >> step=2737400, episode=457 reward=0.7804779 (444.43 it/sec) -training >> step=2737500, episode=457 reward=0.7712737 (414.41 it/sec) -training >> step=2737600, episode=457 reward=0.7780132 (499.37 it/sec) -training >> step=2737700, episode=457 reward=0.7616681 (466.46 it/sec) -training >> step=2737800, episode=457 reward=0.7811087 (470.79 it/sec) -training >> step=2737900, episode=457 reward=0.7723155 (420.79 it/sec) -training >> step=2738000, episode=457 reward=0.7681392 (543.65 it/sec) -training >> step=2738100, episode=457 reward=0.757461 (455.79 it/sec) -training >> step=2738200, episode=457 reward=0.7825894 (480.27 it/sec) -training >> step=2738300, episode=457 reward=0.7692302 (493.78 it/sec) -training >> step=2738400, episode=457 reward=0.7710317 (495.75 it/sec) -training >> step=2738500, episode=457 reward=0.7557834 (459.90 it/sec) -training >> step=2738600, episode=457 reward=0.7615432 (496.70 it/sec) -training >> step=2738700, episode=457 reward=0.7762716 (503.71 it/sec) -training >> step=2738800, episode=457 reward=0.7839203 (532.55 it/sec) -training >> step=2738900, episode=457 reward=0.7707606 (477.71 it/sec) -training >> step=2739000, episode=457 reward=0.8052574 (441.77 it/sec) -training >> step=2739100, episode=457 reward=0.7855819 (472.16 it/sec) -training >> step=2739200, episode=457 reward=0.7701814 (482.38 it/sec) -training >> step=2739300, episode=457 reward=0.7583268 (526.15 it/sec) -training >> step=2739400, episode=457 reward=0.7849985 (480.13 it/sec) -training >> step=2739500, episode=457 reward=0.7451491 (461.84 it/sec) -training >> step=2739600, episode=457 reward=0.770033 (477.25 it/sec) -training >> step=2739700, episode=457 reward=0.8009943 (435.17 it/sec) -training >> step=2739800, episode=457 reward=0.771294 (463.04 it/sec) -training >> step=2739900, episode=457 reward=0.7412276 (498.88 it/sec) -training >> step=2740000, episode=457 reward=0.761083 (468.72 it/sec) -training >> step=2740100, episode=457 reward=0.7603227 (499.56 it/sec) -training >> step=2740200, episode=457 reward=0.7549573 (518.47 it/sec) -training >> step=2740300, episode=457 reward=0.7744817 (537.09 it/sec) -training >> step=2740400, episode=457 reward=0.7534485 (489.92 it/sec) -training >> step=2740500, episode=457 reward=0.7768548 (473.56 it/sec) -training >> step=2740600, episode=457 reward=0.7737731 (495.63 it/sec) -training >> step=2740700, episode=457 reward=0.7850536 (521.27 it/sec) -training >> step=2740800, episode=457 reward=0.7620048 (537.81 it/sec) -training >> step=2740900, episode=457 reward=0.7312445 (543.06 it/sec) -training >> step=2741000, episode=457 reward=0.7519355 (530.83 it/sec) -training >> step=2741100, episode=457 reward=0.7712203 (463.64 it/sec) -training >> step=2741200, episode=457 reward=0.7780268 (477.81 it/sec) -training >> step=2741300, episode=458 reward=0.7688398 (65.38 it/sec) -training >> step=2741400, episode=458 reward=0.7777798 (501.46 it/sec) -training >> step=2741500, episode=458 reward=0.7592595 (510.14 it/sec) -training >> step=2741600, episode=458 reward=0.7800382 (437.63 it/sec) -training >> step=2741700, episode=458 reward=0.7523673 (444.52 it/sec) -training >> step=2741800, episode=458 reward=0.7669587 (408.37 it/sec) -training >> step=2741900, episode=458 reward=0.7708179 (448.41 it/sec) -training >> step=2742000, episode=458 reward=0.7697911 (525.97 it/sec) -training >> step=2742100, episode=458 reward=0.7640827 (537.22 it/sec) -training >> step=2742200, episode=458 reward=0.7651678 (504.42 it/sec) -training >> step=2742300, episode=458 reward=0.7757992 (539.44 it/sec) -training >> step=2742400, episode=458 reward=0.7785004 (533.44 it/sec) -training >> step=2742500, episode=458 reward=0.7898358 (540.72 it/sec) -training >> step=2742600, episode=458 reward=0.7623096 (499.13 it/sec) -training >> step=2742700, episode=458 reward=0.7510973 (556.05 it/sec) -training >> step=2742800, episode=458 reward=0.7801131 (573.49 it/sec) -training >> step=2742900, episode=458 reward=0.7638624 (538.09 it/sec) -training >> step=2743000, episode=458 reward=0.766335 (518.92 it/sec) -training >> step=2743100, episode=458 reward=0.7624548 (555.44 it/sec) -training >> step=2743200, episode=458 reward=0.7643359 (501.47 it/sec) -training >> step=2743300, episode=458 reward=0.7650865 (485.86 it/sec) -training >> step=2743400, episode=458 reward=0.7768316 (520.19 it/sec) -training >> step=2743500, episode=458 reward=0.7806865 (542.22 it/sec) -training >> step=2743600, episode=458 reward=0.7851076 (504.59 it/sec) -training >> step=2743700, episode=458 reward=0.7796496 (527.59 it/sec) -training >> step=2743800, episode=458 reward=0.7875604 (561.39 it/sec) -training >> step=2743900, episode=458 reward=0.7445771 (505.87 it/sec) -training >> step=2744000, episode=458 reward=0.7780823 (518.22 it/sec) -training >> step=2744100, episode=458 reward=0.7901942 (521.61 it/sec) -training >> step=2744200, episode=458 reward=0.7657351 (550.72 it/sec) -training >> step=2744300, episode=458 reward=0.7699857 (514.62 it/sec) -training >> step=2744400, episode=458 reward=0.7589564 (549.19 it/sec) -training >> step=2744500, episode=458 reward=0.7709613 (522.38 it/sec) -training >> step=2744600, episode=458 reward=0.7651121 (497.77 it/sec) -training >> step=2744700, episode=458 reward=0.7477066 (541.93 it/sec) -training >> step=2744800, episode=458 reward=0.7659284 (497.69 it/sec) -training >> step=2744900, episode=458 reward=0.7671052 (560.91 it/sec) -training >> step=2745000, episode=458 reward=0.7452182 (546.66 it/sec) -training >> step=2745100, episode=458 reward=0.779758 (517.23 it/sec) -training >> step=2745200, episode=458 reward=0.7718397 (511.12 it/sec) -training >> step=2745300, episode=458 reward=0.7641954 (545.39 it/sec) -training >> step=2745400, episode=458 reward=0.7788761 (524.61 it/sec) -training >> step=2745500, episode=458 reward=0.7676393 (529.69 it/sec) -training >> step=2745600, episode=458 reward=0.7813193 (501.22 it/sec) -training >> step=2745700, episode=458 reward=0.7973231 (518.77 it/sec) -training >> step=2745800, episode=458 reward=0.7801418 (527.01 it/sec) -training >> step=2745900, episode=458 reward=0.7615298 (528.04 it/sec) -training >> step=2746000, episode=458 reward=0.7611515 (452.18 it/sec) -training >> step=2746100, episode=458 reward=0.7619905 (535.17 it/sec) -training >> step=2746200, episode=458 reward=0.7606039 (547.27 it/sec) -training >> step=2746300, episode=458 reward=0.7763675 (526.40 it/sec) -training >> step=2746400, episode=458 reward=0.7642562 (566.07 it/sec) -training >> step=2746500, episode=458 reward=0.7558289 (536.04 it/sec) -training >> step=2746600, episode=458 reward=0.7606062 (511.73 it/sec) -training >> step=2746700, episode=458 reward=0.7704813 (538.25 it/sec) -training >> step=2746800, episode=458 reward=0.7632763 (538.12 it/sec) -training >> step=2746900, episode=458 reward=0.7465433 (512.75 it/sec) -training >> step=2747000, episode=458 reward=0.7690179 (556.57 it/sec) -training >> step=2747100, episode=458 reward=0.7646253 (576.50 it/sec) -training >> step=2747200, episode=458 reward=0.7556765 (501.31 it/sec) -training >> step=2747300, episode=459 reward=0.7979717 (62.76 it/sec) -training >> step=2747400, episode=459 reward=0.7549295 (477.44 it/sec) -training >> step=2747500, episode=459 reward=0.7487527 (470.23 it/sec) -training >> step=2747600, episode=459 reward=0.7602642 (438.36 it/sec) -training >> step=2747700, episode=459 reward=0.7579282 (477.73 it/sec) -training >> step=2747800, episode=459 reward=0.7822295 (463.75 it/sec) -training >> step=2747900, episode=459 reward=0.770844 (440.68 it/sec) -training >> step=2748000, episode=459 reward=0.7455 (516.26 it/sec) -training >> step=2748100, episode=459 reward=0.7703414 (514.93 it/sec) -training >> step=2748200, episode=459 reward=0.7612236 (548.55 it/sec) -training >> step=2748300, episode=459 reward=0.7544349 (556.41 it/sec) -training >> step=2748400, episode=459 reward=0.762425 (523.37 it/sec) -training >> step=2748500, episode=459 reward=0.7695941 (558.58 it/sec) -training >> step=2748600, episode=459 reward=0.7993965 (536.30 it/sec) -training >> step=2748700, episode=459 reward=0.7545375 (571.38 it/sec) -training >> step=2748800, episode=459 reward=0.7730329 (543.21 it/sec) -training >> step=2748900, episode=459 reward=0.7711272 (535.95 it/sec) -training >> step=2749000, episode=459 reward=0.7732177 (561.99 it/sec) -training >> step=2749100, episode=459 reward=0.7796925 (510.57 it/sec) -training >> step=2749200, episode=459 reward=0.7829693 (520.16 it/sec) -training >> step=2749300, episode=459 reward=0.7801604 (525.83 it/sec) -training >> step=2749400, episode=459 reward=0.7809504 (571.60 it/sec) -training >> step=2749500, episode=459 reward=0.7499488 (517.28 it/sec) -training >> step=2749600, episode=459 reward=0.7748885 (531.92 it/sec) -training >> step=2749700, episode=459 reward=0.7931917 (526.87 it/sec) -training >> step=2749800, episode=459 reward=0.7739414 (552.33 it/sec) -training >> step=2749900, episode=459 reward=0.7783161 (530.85 it/sec) -training >> step=2750000, episode=459 reward=0.7553269 (546.90 it/sec) -training >> step=2750100, episode=459 reward=0.7739083 (529.71 it/sec) -training >> step=2750200, episode=459 reward=0.7497753 (513.06 it/sec) -training >> step=2750300, episode=459 reward=0.7728788 (487.66 it/sec) -training >> step=2750400, episode=459 reward=0.7592599 (533.26 it/sec) -training >> step=2750500, episode=459 reward=0.7894362 (585.24 it/sec) -training >> step=2750600, episode=459 reward=0.7449958 (501.13 it/sec) -training >> step=2750700, episode=459 reward=0.7529255 (515.86 it/sec) -training >> step=2750800, episode=459 reward=0.7524986 (534.82 it/sec) -training >> step=2750900, episode=459 reward=0.7701371 (548.75 it/sec) -training >> step=2751000, episode=459 reward=0.7843874 (543.27 it/sec) -training >> step=2751100, episode=459 reward=0.7476897 (532.22 it/sec) -training >> step=2751200, episode=459 reward=0.7597668 (556.09 it/sec) -training >> step=2751300, episode=459 reward=0.7676327 (524.12 it/sec) -training >> step=2751400, episode=459 reward=0.7918935 (472.62 it/sec) -training >> step=2751500, episode=459 reward=0.767095 (474.43 it/sec) -training >> step=2751600, episode=459 reward=0.7741312 (563.48 it/sec) -training >> step=2751700, episode=459 reward=0.7564343 (528.69 it/sec) -training >> step=2751800, episode=459 reward=0.7540801 (530.80 it/sec) -training >> step=2751900, episode=459 reward=0.7649001 (513.41 it/sec) -training >> step=2752000, episode=459 reward=0.773214 (528.87 it/sec) -training >> step=2752100, episode=459 reward=0.738858 (550.41 it/sec) -training >> step=2752200, episode=459 reward=0.7676111 (500.54 it/sec) -training >> step=2752300, episode=459 reward=0.7623049 (551.93 it/sec) -training >> step=2752400, episode=459 reward=0.7614616 (521.58 it/sec) -training >> step=2752500, episode=459 reward=0.7637321 (447.10 it/sec) -training >> step=2752600, episode=459 reward=0.772513 (517.05 it/sec) -training >> step=2752700, episode=459 reward=0.7629645 (427.57 it/sec) -training >> step=2752800, episode=459 reward=0.7566516 (515.44 it/sec) -training >> step=2752900, episode=459 reward=0.770013 (510.70 it/sec) -training >> step=2753000, episode=459 reward=0.7672008 (474.67 it/sec) -training >> step=2753100, episode=459 reward=0.7619666 (502.13 it/sec) -training >> step=2753200, episode=459 reward=0.7492464 (500.68 it/sec) -training >> step=2753300, episode=460 reward=0.7619489 (113.96 it/sec) -training >> step=2753400, episode=460 reward=0.7728272 (570.57 it/sec) -training >> step=2753500, episode=460 reward=0.7692422 (522.85 it/sec) -training >> step=2753600, episode=460 reward=0.763432 (513.03 it/sec) -training >> step=2753700, episode=460 reward=0.7620014 (524.22 it/sec) -training >> step=2753800, episode=460 reward=0.7686074 (520.46 it/sec) -training >> step=2753900, episode=460 reward=0.754343 (524.38 it/sec) -training >> step=2754000, episode=460 reward=0.7598071 (562.86 it/sec) -training >> step=2754100, episode=460 reward=0.7787826 (563.48 it/sec) -training >> step=2754200, episode=460 reward=0.7685307 (507.98 it/sec) -training >> step=2754300, episode=460 reward=0.77102 (533.67 it/sec) -training >> step=2754400, episode=460 reward=0.7810495 (550.50 it/sec) -training >> step=2754500, episode=460 reward=0.7692216 (562.59 it/sec) -training >> step=2754600, episode=460 reward=0.7707955 (507.70 it/sec) -training >> step=2754700, episode=460 reward=0.7881143 (534.49 it/sec) -training >> step=2754800, episode=460 reward=0.7572786 (531.09 it/sec) -training >> step=2754900, episode=460 reward=0.7757334 (488.01 it/sec) -training >> step=2755000, episode=460 reward=0.7883097 (460.82 it/sec) -training >> step=2755100, episode=460 reward=0.7505921 (454.65 it/sec) -training >> step=2755200, episode=460 reward=0.7714282 (504.06 it/sec) -training >> step=2755300, episode=460 reward=0.7642164 (497.13 it/sec) -training >> step=2755400, episode=460 reward=0.7935081 (570.86 it/sec) -training >> step=2755500, episode=460 reward=0.7783434 (529.55 it/sec) -training >> step=2755600, episode=460 reward=0.7820691 (501.04 it/sec) -training >> step=2755700, episode=460 reward=0.7724012 (532.74 it/sec) -training >> step=2755800, episode=460 reward=0.7909217 (506.26 it/sec) -training >> step=2755900, episode=460 reward=0.7889256 (530.79 it/sec) -training >> step=2756000, episode=460 reward=0.7776252 (544.90 it/sec) -training >> step=2756100, episode=460 reward=0.7642648 (510.25 it/sec) -training >> step=2756200, episode=460 reward=0.7904354 (490.24 it/sec) -training >> step=2756300, episode=460 reward=0.7471964 (423.83 it/sec) -training >> step=2756400, episode=460 reward=0.7813317 (458.73 it/sec) -training >> step=2756500, episode=460 reward=0.7763452 (444.94 it/sec) -training >> step=2756600, episode=460 reward=0.7600406 (494.34 it/sec) -training >> step=2756700, episode=460 reward=0.770847 (517.96 it/sec) -training >> step=2756800, episode=460 reward=0.7576054 (507.74 it/sec) -training >> step=2756900, episode=460 reward=0.7510657 (425.40 it/sec) -training >> step=2757000, episode=460 reward=0.7612045 (525.90 it/sec) -training >> step=2757100, episode=460 reward=0.7514619 (503.28 it/sec) -training >> step=2757200, episode=460 reward=0.7527008 (503.96 it/sec) -training >> step=2757300, episode=460 reward=0.7724665 (455.85 it/sec) -training >> step=2757400, episode=460 reward=0.7676621 (499.11 it/sec) -training >> step=2757500, episode=460 reward=0.7683579 (465.75 it/sec) -training >> step=2757600, episode=460 reward=0.750448 (492.57 it/sec) -training >> step=2757700, episode=460 reward=0.7873807 (492.58 it/sec) -training >> step=2757800, episode=460 reward=0.7712443 (421.68 it/sec) -training >> step=2757900, episode=460 reward=0.766798 (463.91 it/sec) -training >> step=2758000, episode=460 reward=0.7518262 (492.01 it/sec) -training >> step=2758100, episode=460 reward=0.7800957 (442.32 it/sec) -training >> step=2758200, episode=460 reward=0.7512802 (447.35 it/sec) -training >> step=2758300, episode=460 reward=0.7557406 (458.62 it/sec) -training >> step=2758400, episode=460 reward=0.7418565 (482.72 it/sec) -training >> step=2758500, episode=460 reward=0.7845818 (436.95 it/sec) -training >> step=2758600, episode=460 reward=0.7555693 (435.44 it/sec) -training >> step=2758700, episode=460 reward=0.7502012 (444.10 it/sec) -training >> step=2758800, episode=460 reward=0.7659634 (464.49 it/sec) -training >> step=2758900, episode=460 reward=0.7559828 (507.77 it/sec) -training >> step=2759000, episode=460 reward=0.7651832 (511.29 it/sec) -training >> step=2759100, episode=460 reward=0.7705415 (522.17 it/sec) -training >> step=2759200, episode=460 reward=0.7425458 (505.31 it/sec) -training >> step=2759300, episode=461 reward=0.771974 (76.11 it/sec) -training >> step=2759400, episode=461 reward=0.765281 (521.48 it/sec) -training >> step=2759500, episode=461 reward=0.7490616 (485.23 it/sec) -training >> step=2759600, episode=461 reward=0.7569195 (502.87 it/sec) -training >> step=2759700, episode=461 reward=0.7802886 (479.91 it/sec) -training >> step=2759800, episode=461 reward=0.7712745 (510.32 it/sec) -training >> step=2759900, episode=461 reward=0.7556063 (478.46 it/sec) -training >> step=2760000, episode=461 reward=0.7642041 (546.97 it/sec) -training >> step=2760100, episode=461 reward=0.8034979 (523.67 it/sec) -training >> step=2760200, episode=461 reward=0.7870949 (504.56 it/sec) -training >> step=2760300, episode=461 reward=0.7338442 (531.59 it/sec) -training >> step=2760400, episode=461 reward=0.7690688 (519.35 it/sec) -training >> step=2760500, episode=461 reward=0.7749598 (516.03 it/sec) -training >> step=2760600, episode=461 reward=0.7537875 (503.99 it/sec) -training >> step=2760700, episode=461 reward=0.7521815 (512.57 it/sec) -training >> step=2760800, episode=461 reward=0.7727125 (497.02 it/sec) -training >> step=2760900, episode=461 reward=0.767617 (512.82 it/sec) -training >> step=2761000, episode=461 reward=0.7514252 (528.64 it/sec) -training >> step=2761100, episode=461 reward=0.7850111 (514.84 it/sec) -training >> step=2761200, episode=461 reward=0.7872669 (513.29 it/sec) -training >> step=2761300, episode=461 reward=0.7718018 (471.49 it/sec) -training >> step=2761400, episode=461 reward=0.7583395 (514.82 it/sec) -training >> step=2761500, episode=461 reward=0.7635394 (529.55 it/sec) -training >> step=2761600, episode=461 reward=0.7848793 (536.07 it/sec) -training >> step=2761700, episode=461 reward=0.7704639 (457.25 it/sec) -training >> step=2761800, episode=461 reward=0.7751263 (478.96 it/sec) -training >> step=2761900, episode=461 reward=0.7580699 (474.29 it/sec) -training >> step=2762000, episode=461 reward=0.7587005 (439.15 it/sec) -training >> step=2762100, episode=461 reward=0.739163 (509.19 it/sec) -training >> step=2762200, episode=461 reward=0.7568936 (521.02 it/sec) -training >> step=2762300, episode=461 reward=0.7489061 (501.85 it/sec) -training >> step=2762400, episode=461 reward=0.7760839 (560.49 it/sec) -training >> step=2762500, episode=461 reward=0.7855102 (549.89 it/sec) -training >> step=2762600, episode=461 reward=0.7621998 (527.00 it/sec) -training >> step=2762700, episode=461 reward=0.7551395 (530.37 it/sec) -training >> step=2762800, episode=461 reward=0.7734817 (362.59 it/sec) -training >> step=2762900, episode=461 reward=0.7846701 (384.81 it/sec) -training >> step=2763000, episode=461 reward=0.7806734 (417.46 it/sec) -training >> step=2763100, episode=461 reward=0.7845865 (500.35 it/sec) -training >> step=2763200, episode=461 reward=0.7727219 (554.60 it/sec) -training >> step=2763300, episode=461 reward=0.7506747 (497.51 it/sec) -training >> step=2763400, episode=461 reward=0.7658433 (500.39 it/sec) -training >> step=2763500, episode=461 reward=0.7715009 (442.01 it/sec) -training >> step=2763600, episode=461 reward=0.7739201 (543.43 it/sec) -training >> step=2763700, episode=461 reward=0.775155 (544.86 it/sec) -training >> step=2763800, episode=461 reward=0.7616163 (502.88 it/sec) -training >> step=2763900, episode=461 reward=0.7854918 (565.72 it/sec) -training >> step=2764000, episode=461 reward=0.7714643 (559.03 it/sec) -training >> step=2764100, episode=461 reward=0.7868426 (548.72 it/sec) -training >> step=2764200, episode=461 reward=0.7590809 (560.55 it/sec) -training >> step=2764300, episode=461 reward=0.7537439 (558.29 it/sec) -training >> step=2764400, episode=461 reward=0.7649716 (487.44 it/sec) -training >> step=2764500, episode=461 reward=0.7687029 (442.76 it/sec) -training >> step=2764600, episode=461 reward=0.7600792 (547.88 it/sec) -training >> step=2764700, episode=461 reward=0.7681509 (535.42 it/sec) -training >> step=2764800, episode=461 reward=0.7599185 (537.28 it/sec) -training >> step=2764900, episode=461 reward=0.8025452 (519.91 it/sec) -training >> step=2765000, episode=461 reward=0.7742724 (584.82 it/sec) -training >> step=2765100, episode=461 reward=0.7607785 (537.78 it/sec) -training >> step=2765200, episode=461 reward=0.7720534 (523.35 it/sec) -training >> step=2765300, episode=462 reward=0.783501 (110.35 it/sec) -training >> step=2765400, episode=462 reward=0.7538977 (571.67 it/sec) -training >> step=2765500, episode=462 reward=0.7647644 (539.80 it/sec) -training >> step=2765600, episode=462 reward=0.7680076 (481.34 it/sec) -training >> step=2765700, episode=462 reward=0.7737322 (544.46 it/sec) -training >> step=2765800, episode=462 reward=0.7912416 (502.61 it/sec) -training >> step=2765900, episode=462 reward=0.7812305 (540.89 it/sec) -training >> step=2766000, episode=462 reward=0.762702 (532.84 it/sec) -training >> step=2766100, episode=462 reward=0.7543485 (485.74 it/sec) -training >> step=2766200, episode=462 reward=0.7795653 (506.83 it/sec) -training >> step=2766300, episode=462 reward=0.76695 (486.55 it/sec) -training >> step=2766400, episode=462 reward=0.7678316 (515.89 it/sec) -training >> step=2766500, episode=462 reward=0.7460651 (538.70 it/sec) -training >> step=2766600, episode=462 reward=0.7885301 (497.00 it/sec) -training >> step=2766700, episode=462 reward=0.7719232 (478.59 it/sec) -training >> step=2766800, episode=462 reward=0.7620747 (523.36 it/sec) -training >> step=2766900, episode=462 reward=0.7653818 (508.20 it/sec) -training >> step=2767000, episode=462 reward=0.7801291 (524.82 it/sec) -training >> step=2767100, episode=462 reward=0.7688814 (524.82 it/sec) -training >> step=2767200, episode=462 reward=0.7781113 (500.63 it/sec) -training >> step=2767300, episode=462 reward=0.7750777 (510.89 it/sec) -training >> step=2767400, episode=462 reward=0.7518368 (507.94 it/sec) -training >> step=2767500, episode=462 reward=0.7594917 (518.10 it/sec) -training >> step=2767600, episode=462 reward=0.7813782 (536.38 it/sec) -training >> step=2767700, episode=462 reward=0.7709793 (513.76 it/sec) -training >> step=2767800, episode=462 reward=0.7510285 (509.27 it/sec) -training >> step=2767900, episode=462 reward=0.7658176 (544.88 it/sec) -training >> step=2768000, episode=462 reward=0.7520385 (512.16 it/sec) -training >> step=2768100, episode=462 reward=0.7620715 (522.23 it/sec) -training >> step=2768200, episode=462 reward=0.7759159 (520.04 it/sec) -training >> step=2768300, episode=462 reward=0.7589208 (538.97 it/sec) -training >> step=2768400, episode=462 reward=0.7717454 (507.16 it/sec) -training >> step=2768500, episode=462 reward=0.7389956 (522.81 it/sec) -training >> step=2768600, episode=462 reward=0.7492643 (542.66 it/sec) -training >> step=2768700, episode=462 reward=0.756482 (507.66 it/sec) -training >> step=2768800, episode=462 reward=0.7752296 (511.31 it/sec) -training >> step=2768900, episode=462 reward=0.7849889 (503.88 it/sec) -training >> step=2769000, episode=462 reward=0.7568101 (567.84 it/sec) -training >> step=2769100, episode=462 reward=0.7770039 (529.22 it/sec) -training >> step=2769200, episode=462 reward=0.7793342 (506.60 it/sec) -training >> step=2769300, episode=462 reward=0.7876456 (449.24 it/sec) -training >> step=2769400, episode=462 reward=0.7530353 (504.86 it/sec) -training >> step=2769500, episode=462 reward=0.7833729 (530.85 it/sec) -training >> step=2769600, episode=462 reward=0.7806152 (525.23 it/sec) -training >> step=2769700, episode=462 reward=0.7606668 (550.68 it/sec) -training >> step=2769800, episode=462 reward=0.7514721 (395.69 it/sec) -training >> step=2769900, episode=462 reward=0.7774175 (471.40 it/sec) -training >> step=2770000, episode=462 reward=0.7855574 (518.80 it/sec) -training >> step=2770100, episode=462 reward=0.7653621 (567.50 it/sec) -training >> step=2770200, episode=462 reward=0.7477759 (538.43 it/sec) -training >> step=2770300, episode=462 reward=0.7871079 (524.93 it/sec) -training >> step=2770400, episode=462 reward=0.7846466 (495.39 it/sec) -training >> step=2770500, episode=462 reward=0.7525777 (530.70 it/sec) -training >> step=2770600, episode=462 reward=0.7841707 (538.35 it/sec) -training >> step=2770700, episode=462 reward=0.7571911 (533.83 it/sec) -training >> step=2770800, episode=462 reward=0.7599161 (535.32 it/sec) -training >> step=2770900, episode=462 reward=0.7567071 (508.46 it/sec) -training >> step=2771000, episode=462 reward=0.7650442 (494.29 it/sec) -training >> step=2771100, episode=462 reward=0.7584921 (558.22 it/sec) -training >> step=2771200, episode=462 reward=0.7858961 (558.31 it/sec) -training >> step=2771300, episode=463 reward=0.7584936 (114.88 it/sec) -training >> step=2771400, episode=463 reward=0.7945274 (530.14 it/sec) -training >> step=2771500, episode=463 reward=0.7372082 (510.15 it/sec) -training >> step=2771600, episode=463 reward=0.76283 (522.36 it/sec) -training >> step=2771700, episode=463 reward=0.7883567 (526.36 it/sec) -training >> step=2771800, episode=463 reward=0.7833629 (522.20 it/sec) -training >> step=2771900, episode=463 reward=0.7529981 (551.93 it/sec) -training >> step=2772000, episode=463 reward=0.7817282 (523.14 it/sec) -training >> step=2772100, episode=463 reward=0.7731696 (509.35 it/sec) -training >> step=2772200, episode=463 reward=0.7659377 (530.53 it/sec) -training >> step=2772300, episode=463 reward=0.7937606 (545.52 it/sec) -training >> step=2772400, episode=463 reward=0.7615545 (534.47 it/sec) -training >> step=2772500, episode=463 reward=0.7888795 (500.70 it/sec) -training >> step=2772600, episode=463 reward=0.7490409 (556.05 it/sec) -training >> step=2772700, episode=463 reward=0.7534501 (487.60 it/sec) -training >> step=2772800, episode=463 reward=0.7743577 (495.26 it/sec) -training >> step=2772900, episode=463 reward=0.7879287 (562.38 it/sec) -training >> step=2773000, episode=463 reward=0.768503 (550.59 it/sec) -training >> step=2773100, episode=463 reward=0.764771 (526.39 it/sec) -training >> step=2773200, episode=463 reward=0.7848525 (537.58 it/sec) -training >> step=2773300, episode=463 reward=0.7829971 (552.74 it/sec) -training >> step=2773400, episode=463 reward=0.7684274 (520.93 it/sec) -training >> step=2773500, episode=463 reward=0.7830085 (550.87 it/sec) -training >> step=2773600, episode=463 reward=0.7834371 (533.04 it/sec) -training >> step=2773700, episode=463 reward=0.7391587 (478.56 it/sec) -training >> step=2773800, episode=463 reward=0.7702149 (481.15 it/sec) -training >> step=2773900, episode=463 reward=0.7787461 (477.35 it/sec) -training >> step=2774000, episode=463 reward=0.7777105 (502.51 it/sec) -training >> step=2774100, episode=463 reward=0.7532629 (516.57 it/sec) -training >> step=2774200, episode=463 reward=0.782665 (471.55 it/sec) -training >> step=2774300, episode=463 reward=0.7779062 (462.96 it/sec) -training >> step=2774400, episode=463 reward=0.7745856 (506.18 it/sec) -training >> step=2774500, episode=463 reward=0.7689273 (446.70 it/sec) -training >> step=2774600, episode=463 reward=0.7876086 (501.72 it/sec) -training >> step=2774700, episode=463 reward=0.7671797 (496.71 it/sec) -training >> step=2774800, episode=463 reward=0.762295 (514.49 it/sec) -training >> step=2774900, episode=463 reward=0.7590163 (508.70 it/sec) -training >> step=2775000, episode=463 reward=0.7774501 (445.80 it/sec) -training >> step=2775100, episode=463 reward=0.7827747 (437.58 it/sec) -training >> step=2775200, episode=463 reward=0.75103 (436.31 it/sec) -training >> step=2775300, episode=463 reward=0.7614595 (476.30 it/sec) -training >> step=2775400, episode=463 reward=0.7854516 (495.11 it/sec) -training >> step=2775500, episode=463 reward=0.7506808 (445.89 it/sec) -training >> step=2775600, episode=463 reward=0.7813981 (447.37 it/sec) -training >> step=2775700, episode=463 reward=0.7627095 (436.89 it/sec) -training >> step=2775800, episode=463 reward=0.7700691 (454.30 it/sec) -training >> step=2775900, episode=463 reward=0.7450811 (531.00 it/sec) -training >> step=2776000, episode=463 reward=0.7488842 (468.11 it/sec) -training >> step=2776100, episode=463 reward=0.7716107 (454.26 it/sec) -training >> step=2776200, episode=463 reward=0.7547466 (397.88 it/sec) -training >> step=2776300, episode=463 reward=0.767477 (463.27 it/sec) -training >> step=2776400, episode=463 reward=0.7509043 (509.94 it/sec) -training >> step=2776500, episode=463 reward=0.7380006 (448.41 it/sec) -training >> step=2776600, episode=463 reward=0.769419 (510.04 it/sec) -training >> step=2776700, episode=463 reward=0.7565117 (477.15 it/sec) -training >> step=2776800, episode=463 reward=0.7530662 (519.54 it/sec) -training >> step=2776900, episode=463 reward=0.7750961 (553.76 it/sec) -training >> step=2777000, episode=463 reward=0.7666748 (503.29 it/sec) -training >> step=2777100, episode=463 reward=0.7375014 (521.73 it/sec) -training >> step=2777200, episode=463 reward=0.7710954 (481.51 it/sec) -training >> step=2777300, episode=464 reward=0.7611355 (113.58 it/sec) -training >> step=2777400, episode=464 reward=0.7441845 (493.61 it/sec) -training >> step=2777500, episode=464 reward=0.7838128 (368.37 it/sec) -training >> step=2777600, episode=464 reward=0.7689862 (499.20 it/sec) -training >> step=2777700, episode=464 reward=0.785193 (506.45 it/sec) -training >> step=2777800, episode=464 reward=0.7564229 (487.47 it/sec) -training >> step=2777900, episode=464 reward=0.7592391 (512.08 it/sec) -training >> step=2778000, episode=464 reward=0.7734267 (517.03 it/sec) -training >> step=2778100, episode=464 reward=0.7666585 (512.11 it/sec) -training >> step=2778200, episode=464 reward=0.7751944 (507.83 it/sec) -training >> step=2778300, episode=464 reward=0.7749797 (527.79 it/sec) -training >> step=2778400, episode=464 reward=0.7740992 (486.36 it/sec) -training >> step=2778500, episode=464 reward=0.7729622 (484.11 it/sec) -training >> step=2778600, episode=464 reward=0.7725027 (515.62 it/sec) -training >> step=2778700, episode=464 reward=0.7604756 (503.00 it/sec) -training >> step=2778800, episode=464 reward=0.7935226 (476.52 it/sec) -training >> step=2778900, episode=464 reward=0.7627032 (529.76 it/sec) -training >> step=2779000, episode=464 reward=0.7471703 (522.49 it/sec) -training >> step=2779100, episode=464 reward=0.749504 (483.00 it/sec) -training >> step=2779200, episode=464 reward=0.781092 (532.47 it/sec) -training >> step=2779300, episode=464 reward=0.7787077 (522.99 it/sec) -training >> step=2779400, episode=464 reward=0.7635683 (533.86 it/sec) -training >> step=2779500, episode=464 reward=0.7738343 (489.89 it/sec) -training >> step=2779600, episode=464 reward=0.7654155 (500.42 it/sec) -training >> step=2779700, episode=464 reward=0.7727923 (517.23 it/sec) -training >> step=2779800, episode=464 reward=0.7703212 (539.64 it/sec) -training >> step=2779900, episode=464 reward=0.7721788 (510.39 it/sec) -training >> step=2780000, episode=464 reward=0.7508331 (474.84 it/sec) -training >> step=2780100, episode=464 reward=0.7565168 (536.17 it/sec) -training >> step=2780200, episode=464 reward=0.7686103 (513.79 it/sec) -training >> step=2780300, episode=464 reward=0.7693917 (554.70 it/sec) -training >> step=2780400, episode=464 reward=0.777104 (522.54 it/sec) -training >> step=2780500, episode=464 reward=0.7847083 (545.05 it/sec) -training >> step=2780600, episode=464 reward=0.7727067 (519.74 it/sec) -training >> step=2780700, episode=464 reward=0.7552691 (456.98 it/sec) -training >> step=2780800, episode=464 reward=0.7783046 (534.08 it/sec) -training >> step=2780900, episode=464 reward=0.7509692 (524.49 it/sec) -training >> step=2781000, episode=464 reward=0.7696332 (515.42 it/sec) -training >> step=2781100, episode=464 reward=0.7750084 (535.68 it/sec) -training >> step=2781200, episode=464 reward=0.7540259 (525.27 it/sec) -training >> step=2781300, episode=464 reward=0.7702655 (485.78 it/sec) -training >> step=2781400, episode=464 reward=0.7716593 (471.47 it/sec) -training >> step=2781500, episode=464 reward=0.775281 (486.79 it/sec) -training >> step=2781600, episode=464 reward=0.7612855 (485.87 it/sec) -training >> step=2781700, episode=464 reward=0.7682258 (433.29 it/sec) -training >> step=2781800, episode=464 reward=0.7473443 (499.78 it/sec) -training >> step=2781900, episode=464 reward=0.7893912 (507.02 it/sec) -training >> step=2782000, episode=464 reward=0.741297 (531.67 it/sec) -training >> step=2782100, episode=464 reward=0.7673334 (501.36 it/sec) -training >> step=2782200, episode=464 reward=0.7349647 (469.56 it/sec) -training >> step=2782300, episode=464 reward=0.7691737 (514.83 it/sec) -training >> step=2782400, episode=464 reward=0.7598413 (406.21 it/sec) -training >> step=2782500, episode=464 reward=0.7730756 (436.63 it/sec) -training >> step=2782600, episode=464 reward=0.7829179 (488.82 it/sec) -training >> step=2782700, episode=464 reward=0.7603669 (492.94 it/sec) -training >> step=2782800, episode=464 reward=0.7819611 (512.22 it/sec) -training >> step=2782900, episode=464 reward=0.7170947 (492.38 it/sec) -training >> step=2783000, episode=464 reward=0.7576346 (529.84 it/sec) -training >> step=2783100, episode=464 reward=0.7512599 (480.45 it/sec) -training >> step=2783200, episode=464 reward=0.7542313 (501.20 it/sec) -training >> step=2783300, episode=465 reward=0.7647296 (58.32 it/sec) -training >> step=2783400, episode=465 reward=0.7661691 (512.19 it/sec) -training >> step=2783500, episode=465 reward=0.7701548 (538.08 it/sec) -training >> step=2783600, episode=465 reward=0.7627542 (556.15 it/sec) -training >> step=2783700, episode=465 reward=0.7860335 (517.24 it/sec) -training >> step=2783800, episode=465 reward=0.7752876 (451.23 it/sec) -training >> step=2783900, episode=465 reward=0.7740921 (489.65 it/sec) -training >> step=2784000, episode=465 reward=0.7671006 (513.48 it/sec) -training >> step=2784100, episode=465 reward=0.7584741 (515.52 it/sec) -training >> step=2784200, episode=465 reward=0.7846741 (493.10 it/sec) -training >> step=2784300, episode=465 reward=0.7819777 (527.97 it/sec) -training >> step=2784400, episode=465 reward=0.7754493 (534.63 it/sec) -training >> step=2784500, episode=465 reward=0.7629778 (528.79 it/sec) -training >> step=2784600, episode=465 reward=0.7673672 (512.24 it/sec) -training >> step=2784700, episode=465 reward=0.7735102 (506.52 it/sec) -training >> step=2784800, episode=465 reward=0.7731863 (528.20 it/sec) -training >> step=2784900, episode=465 reward=0.7608846 (386.16 it/sec) -training >> step=2785000, episode=465 reward=0.7642993 (513.59 it/sec) -training >> step=2785100, episode=465 reward=0.7688884 (528.75 it/sec) -training >> step=2785200, episode=465 reward=0.7788234 (482.43 it/sec) -training >> step=2785300, episode=465 reward=0.7469805 (497.44 it/sec) -training >> step=2785400, episode=465 reward=0.7634249 (524.96 it/sec) -training >> step=2785500, episode=465 reward=0.7907274 (502.64 it/sec) -training >> step=2785600, episode=465 reward=0.7638619 (470.59 it/sec) -training >> step=2785700, episode=465 reward=0.7821983 (508.68 it/sec) -training >> step=2785800, episode=465 reward=0.7692362 (505.04 it/sec) -training >> step=2785900, episode=465 reward=0.7522488 (485.36 it/sec) -training >> step=2786000, episode=465 reward=0.763145 (486.85 it/sec) -training >> step=2786100, episode=465 reward=0.7673022 (456.27 it/sec) -training >> step=2786200, episode=465 reward=0.771965 (496.31 it/sec) -training >> step=2786300, episode=465 reward=0.7880252 (500.75 it/sec) -training >> step=2786400, episode=465 reward=0.7399578 (458.75 it/sec) -training >> step=2786500, episode=465 reward=0.7719191 (442.46 it/sec) -training >> step=2786600, episode=465 reward=0.7703011 (404.68 it/sec) -training >> step=2786700, episode=465 reward=0.7726694 (448.58 it/sec) -training >> step=2786800, episode=465 reward=0.7543821 (469.65 it/sec) -training >> step=2786900, episode=465 reward=0.7770686 (410.80 it/sec) -training >> step=2787000, episode=465 reward=0.7808671 (521.82 it/sec) -training >> step=2787100, episode=465 reward=0.7704961 (548.63 it/sec) -training >> step=2787200, episode=465 reward=0.7592192 (540.79 it/sec) -training >> step=2787300, episode=465 reward=0.7608394 (513.51 it/sec) -training >> step=2787400, episode=465 reward=0.7602677 (513.39 it/sec) -training >> step=2787500, episode=465 reward=0.7704622 (525.39 it/sec) -training >> step=2787600, episode=465 reward=0.7866591 (546.30 it/sec) -training >> step=2787700, episode=465 reward=0.7652052 (539.57 it/sec) -training >> step=2787800, episode=465 reward=0.7656021 (550.10 it/sec) -training >> step=2787900, episode=465 reward=0.7545068 (496.88 it/sec) -training >> step=2788000, episode=465 reward=0.7590268 (498.66 it/sec) -training >> step=2788100, episode=465 reward=0.7796004 (539.53 it/sec) -training >> step=2788200, episode=465 reward=0.7654883 (571.02 it/sec) -training >> step=2788300, episode=465 reward=0.7490026 (544.41 it/sec) -training >> step=2788400, episode=465 reward=0.7792284 (534.93 it/sec) -training >> step=2788500, episode=465 reward=0.7747599 (513.11 it/sec) -training >> step=2788600, episode=465 reward=0.7564198 (541.58 it/sec) -training >> step=2788700, episode=465 reward=0.7714633 (552.50 it/sec) -training >> step=2788800, episode=465 reward=0.7619222 (533.04 it/sec) -training >> step=2788900, episode=465 reward=0.761775 (548.86 it/sec) -training >> step=2789000, episode=465 reward=0.762802 (483.52 it/sec) -training >> step=2789100, episode=465 reward=0.761116 (486.76 it/sec) -training >> step=2789200, episode=465 reward=0.7403617 (553.64 it/sec) -training >> step=2789300, episode=466 reward=0.7931508 (121.12 it/sec) -training >> step=2789400, episode=466 reward=0.7546348 (532.49 it/sec) -training >> step=2789500, episode=466 reward=0.7590134 (504.69 it/sec) -training >> step=2789600, episode=466 reward=0.7471108 (551.42 it/sec) -training >> step=2789700, episode=466 reward=0.7757823 (510.52 it/sec) -training >> step=2789800, episode=466 reward=0.7690349 (502.59 it/sec) -training >> step=2789900, episode=466 reward=0.757498 (524.63 it/sec) -training >> step=2790000, episode=466 reward=0.7627896 (538.18 it/sec) -training >> step=2790100, episode=466 reward=0.7567738 (496.93 it/sec) -training >> step=2790200, episode=466 reward=0.7744685 (506.45 it/sec) -training >> step=2790300, episode=466 reward=0.7790638 (434.77 it/sec) -training >> step=2790400, episode=466 reward=0.7733809 (447.59 it/sec) -training >> step=2790500, episode=466 reward=0.7737314 (453.17 it/sec) -training >> step=2790600, episode=466 reward=0.7743543 (480.51 it/sec) -training >> step=2790700, episode=466 reward=0.7683748 (451.55 it/sec) -training >> step=2790800, episode=466 reward=0.7704892 (469.06 it/sec) -training >> step=2790900, episode=466 reward=0.7715746 (518.78 it/sec) -training >> step=2791000, episode=466 reward=0.7517755 (500.52 it/sec) -training >> step=2791100, episode=466 reward=0.7709655 (520.57 it/sec) -training >> step=2791200, episode=466 reward=0.7750027 (519.65 it/sec) -training >> step=2791300, episode=466 reward=0.7545725 (521.90 it/sec) -training >> step=2791400, episode=466 reward=0.7736712 (517.78 it/sec) -training >> step=2791500, episode=466 reward=0.7720574 (538.85 it/sec) -training >> step=2791600, episode=466 reward=0.7496952 (524.53 it/sec) -training >> step=2791700, episode=466 reward=0.7523159 (532.65 it/sec) -training >> step=2791800, episode=466 reward=0.7606829 (518.00 it/sec) -training >> step=2791900, episode=466 reward=0.7803004 (506.31 it/sec) -training >> step=2792000, episode=466 reward=0.7732467 (541.63 it/sec) -training >> step=2792100, episode=466 reward=0.7761579 (538.38 it/sec) -training >> step=2792200, episode=466 reward=0.76162 (519.86 it/sec) -training >> step=2792300, episode=466 reward=0.7852902 (533.61 it/sec) -training >> step=2792400, episode=466 reward=0.7752794 (498.53 it/sec) -training >> step=2792500, episode=466 reward=0.7872614 (534.54 it/sec) -training >> step=2792600, episode=466 reward=0.761078 (534.24 it/sec) -training >> step=2792700, episode=466 reward=0.741842 (536.77 it/sec) -training >> step=2792800, episode=466 reward=0.7542081 (527.93 it/sec) -training >> step=2792900, episode=466 reward=0.7589377 (470.70 it/sec) -training >> step=2793000, episode=466 reward=0.7568734 (490.62 it/sec) -training >> step=2793100, episode=466 reward=0.7623751 (530.57 it/sec) -training >> step=2793200, episode=466 reward=0.7703561 (530.76 it/sec) -training >> step=2793300, episode=466 reward=0.777674 (531.49 it/sec) -training >> step=2793400, episode=466 reward=0.8072301 (531.03 it/sec) -training >> step=2793500, episode=466 reward=0.7762916 (481.09 it/sec) -training >> step=2793600, episode=466 reward=0.7646125 (526.10 it/sec) -training >> step=2793700, episode=466 reward=0.7589791 (531.52 it/sec) -training >> step=2793800, episode=466 reward=0.7581393 (514.55 it/sec) -training >> step=2793900, episode=466 reward=0.7816852 (556.59 it/sec) -training >> step=2794000, episode=466 reward=0.7960349 (485.58 it/sec) -training >> step=2794100, episode=466 reward=0.7754325 (530.78 it/sec) -training >> step=2794200, episode=466 reward=0.756214 (545.88 it/sec) -training >> step=2794300, episode=466 reward=0.7661605 (536.96 it/sec) -training >> step=2794400, episode=466 reward=0.7673004 (521.92 it/sec) -training >> step=2794500, episode=466 reward=0.7879366 (553.00 it/sec) -training >> step=2794600, episode=466 reward=0.7747525 (528.95 it/sec) -training >> step=2794700, episode=466 reward=0.7676661 (495.74 it/sec) -training >> step=2794800, episode=466 reward=0.7590063 (563.18 it/sec) -training >> step=2794900, episode=466 reward=0.770363 (541.29 it/sec) -training >> step=2795000, episode=466 reward=0.7742478 (510.51 it/sec) -training >> step=2795100, episode=466 reward=0.7418494 (486.10 it/sec) -training >> step=2795200, episode=466 reward=0.7714931 (553.90 it/sec) -training >> step=2795300, episode=467 reward=0.770496 (119.63 it/sec) -training >> step=2795400, episode=467 reward=0.7578332 (382.95 it/sec) -training >> step=2795500, episode=467 reward=0.7699004 (524.32 it/sec) -training >> step=2795600, episode=467 reward=0.7511529 (559.04 it/sec) -training >> step=2795700, episode=467 reward=0.7743131 (530.64 it/sec) -training >> step=2795800, episode=467 reward=0.7783839 (525.13 it/sec) -training >> step=2795900, episode=467 reward=0.7669736 (549.52 it/sec) -training >> step=2796000, episode=467 reward=0.7565954 (520.87 it/sec) -training >> step=2796100, episode=467 reward=0.779733 (524.64 it/sec) -training >> step=2796200, episode=467 reward=0.7725595 (563.29 it/sec) -training >> step=2796300, episode=467 reward=0.7622275 (515.40 it/sec) -training >> step=2796400, episode=467 reward=0.7803834 (497.36 it/sec) -training >> step=2796500, episode=467 reward=0.7687369 (567.50 it/sec) -training >> step=2796600, episode=467 reward=0.779003 (513.50 it/sec) -training >> step=2796700, episode=467 reward=0.7840742 (562.75 it/sec) -training >> step=2796800, episode=467 reward=0.7627537 (545.15 it/sec) -training >> step=2796900, episode=467 reward=0.7746058 (541.44 it/sec) -training >> step=2797000, episode=467 reward=0.7680824 (501.34 it/sec) -training >> step=2797100, episode=467 reward=0.7530699 (511.96 it/sec) -training >> step=2797200, episode=467 reward=0.7486619 (580.68 it/sec) -training >> step=2797300, episode=467 reward=0.7445542 (546.72 it/sec) -training >> step=2797400, episode=467 reward=0.7664053 (504.65 it/sec) -training >> step=2797500, episode=467 reward=0.7809274 (506.51 it/sec) -training >> step=2797600, episode=467 reward=0.7619535 (550.10 it/sec) -training >> step=2797700, episode=467 reward=0.7567679 (517.80 it/sec) -training >> step=2797800, episode=467 reward=0.7672376 (503.06 it/sec) -training >> step=2797900, episode=467 reward=0.7666695 (529.01 it/sec) -training >> step=2798000, episode=467 reward=0.7699178 (522.89 it/sec) -training >> step=2798100, episode=467 reward=0.7927872 (543.63 it/sec) -training >> step=2798200, episode=467 reward=0.7621972 (532.87 it/sec) -training >> step=2798300, episode=467 reward=0.7808483 (512.47 it/sec) -training >> step=2798400, episode=467 reward=0.7798562 (524.87 it/sec) -training >> step=2798500, episode=467 reward=0.7561091 (527.05 it/sec) -training >> step=2798600, episode=467 reward=0.7584888 (534.82 it/sec) -training >> step=2798700, episode=467 reward=0.7678532 (573.47 it/sec) -training >> step=2798800, episode=467 reward=0.7659711 (521.52 it/sec) -training >> step=2798900, episode=467 reward=0.7786549 (508.31 it/sec) -training >> step=2799000, episode=467 reward=0.7765819 (544.95 it/sec) -training >> step=2799100, episode=467 reward=0.7716801 (542.51 it/sec) -training >> step=2799200, episode=467 reward=0.7732156 (517.85 it/sec) -training >> step=2799300, episode=467 reward=0.7699065 (507.78 it/sec) -training >> step=2799400, episode=467 reward=0.7681426 (556.54 it/sec) -training >> step=2799500, episode=467 reward=0.7819438 (510.56 it/sec) -training >> step=2799600, episode=467 reward=0.7709903 (495.47 it/sec) -training >> step=2799700, episode=467 reward=0.7789984 (508.89 it/sec) -training >> step=2799800, episode=467 reward=0.7507439 (548.29 it/sec) -training >> step=2799900, episode=467 reward=0.7738861 (537.37 it/sec) -training >> step=2800000, episode=467 reward=0.7634775 (518.72 it/sec) -training >> step=2800100, episode=467 reward=0.7712696 (498.13 it/sec) -training >> step=2800200, episode=467 reward=0.7818241 (521.52 it/sec) -training >> step=2800300, episode=467 reward=0.7595745 (540.68 it/sec) -training >> step=2800400, episode=467 reward=0.7656004 (534.39 it/sec) -training >> step=2800500, episode=467 reward=0.7739252 (562.63 it/sec) -training >> step=2800600, episode=467 reward=0.7767109 (513.72 it/sec) -training >> step=2800700, episode=467 reward=0.7756648 (512.45 it/sec) -training >> step=2800800, episode=467 reward=0.7792549 (524.63 it/sec) -training >> step=2800900, episode=467 reward=0.7685894 (550.35 it/sec) -training >> step=2801000, episode=467 reward=0.7755707 (529.61 it/sec) -training >> step=2801100, episode=467 reward=0.7483931 (538.39 it/sec) -training >> step=2801200, episode=467 reward=0.7521965 (542.84 it/sec) -training >> step=2801300, episode=468 reward=0.7495728 (93.20 it/sec) -training >> step=2801400, episode=468 reward=0.7906563 (520.20 it/sec) -training >> step=2801500, episode=468 reward=0.7626296 (514.33 it/sec) -training >> step=2801600, episode=468 reward=0.7769799 (498.42 it/sec) -training >> step=2801700, episode=468 reward=0.7845989 (542.25 it/sec) -training >> step=2801800, episode=468 reward=0.7822858 (513.13 it/sec) -training >> step=2801900, episode=468 reward=0.7713042 (552.40 it/sec) -training >> step=2802000, episode=468 reward=0.7629405 (500.25 it/sec) -training >> step=2802100, episode=468 reward=0.7684967 (527.85 it/sec) -training >> step=2802200, episode=468 reward=0.7647876 (534.88 it/sec) -training >> step=2802300, episode=468 reward=0.770704 (534.43 it/sec) -training >> step=2802400, episode=468 reward=0.7737765 (510.04 it/sec) -training >> step=2802500, episode=468 reward=0.7889434 (541.43 it/sec) -training >> step=2802600, episode=468 reward=0.7798529 (525.96 it/sec) -training >> step=2802700, episode=468 reward=0.765169 (538.96 it/sec) -training >> step=2802800, episode=468 reward=0.7723349 (543.12 it/sec) -training >> step=2802900, episode=468 reward=0.7646501 (545.00 it/sec) -training >> step=2803000, episode=468 reward=0.7825123 (529.29 it/sec) -training >> step=2803100, episode=468 reward=0.7709385 (515.71 it/sec) -training >> step=2803200, episode=468 reward=0.7732739 (540.64 it/sec) -training >> step=2803300, episode=468 reward=0.7621096 (556.00 it/sec) -training >> step=2803400, episode=468 reward=0.7664598 (530.33 it/sec) -training >> step=2803500, episode=468 reward=0.7829759 (487.75 it/sec) -training >> step=2803600, episode=468 reward=0.7733456 (546.12 it/sec) -training >> step=2803700, episode=468 reward=0.7613631 (527.06 it/sec) -training >> step=2803800, episode=468 reward=0.7733813 (553.20 it/sec) -training >> step=2803900, episode=468 reward=0.7607365 (537.96 it/sec) -training >> step=2804000, episode=468 reward=0.7863264 (518.98 it/sec) -training >> step=2804100, episode=468 reward=0.7756439 (548.59 it/sec) -training >> step=2804200, episode=468 reward=0.7642609 (522.38 it/sec) -training >> step=2804300, episode=468 reward=0.7836319 (557.98 it/sec) -training >> step=2804400, episode=468 reward=0.7668881 (524.39 it/sec) -training >> step=2804500, episode=468 reward=0.7852537 (514.38 it/sec) -training >> step=2804600, episode=468 reward=0.7833459 (506.24 it/sec) -training >> step=2804700, episode=468 reward=0.7567261 (510.06 it/sec) -training >> step=2804800, episode=468 reward=0.7677044 (499.72 it/sec) -training >> step=2804900, episode=468 reward=0.7835147 (537.18 it/sec) -training >> step=2805000, episode=468 reward=0.7590196 (490.21 it/sec) -training >> step=2805100, episode=468 reward=0.7693629 (497.93 it/sec) -training >> step=2805200, episode=468 reward=0.7838109 (465.87 it/sec) -training >> step=2805300, episode=468 reward=0.7773678 (408.82 it/sec) -training >> step=2805400, episode=468 reward=0.751275 (527.84 it/sec) -training >> step=2805500, episode=468 reward=0.7532426 (467.69 it/sec) -training >> step=2805600, episode=468 reward=0.7643913 (428.13 it/sec) -training >> step=2805700, episode=468 reward=0.7609122 (522.30 it/sec) -training >> step=2805800, episode=468 reward=0.7747253 (569.18 it/sec) -training >> step=2805900, episode=468 reward=0.7332166 (506.10 it/sec) -training >> step=2806000, episode=468 reward=0.7517352 (523.56 it/sec) -training >> step=2806100, episode=468 reward=0.7517534 (502.54 it/sec) -training >> step=2806200, episode=468 reward=0.7527958 (559.01 it/sec) -training >> step=2806300, episode=468 reward=0.7719591 (561.84 it/sec) -training >> step=2806400, episode=468 reward=0.7780976 (520.60 it/sec) -training >> step=2806500, episode=468 reward=0.7488627 (573.64 it/sec) -training >> step=2806600, episode=468 reward=0.7806635 (521.05 it/sec) -training >> step=2806700, episode=468 reward=0.7745199 (518.56 it/sec) -training >> step=2806800, episode=468 reward=0.7567158 (538.39 it/sec) -training >> step=2806900, episode=468 reward=0.7677603 (560.92 it/sec) -training >> step=2807000, episode=468 reward=0.7595032 (520.82 it/sec) -training >> step=2807100, episode=468 reward=0.7561743 (535.25 it/sec) -training >> step=2807200, episode=468 reward=0.7795104 (516.36 it/sec) -training >> step=2807300, episode=469 reward=0.7879867 (113.04 it/sec) -training >> step=2807400, episode=469 reward=0.7747916 (485.04 it/sec) -training >> step=2807500, episode=469 reward=0.7393794 (508.13 it/sec) -training >> step=2807600, episode=469 reward=0.7617392 (551.66 it/sec) -training >> step=2807700, episode=469 reward=0.7694236 (535.22 it/sec) -training >> step=2807800, episode=469 reward=0.7643082 (542.84 it/sec) -training >> step=2807900, episode=469 reward=0.7740245 (506.13 it/sec) -training >> step=2808000, episode=469 reward=0.7460146 (534.14 it/sec) -training >> step=2808100, episode=469 reward=0.7649254 (544.56 it/sec) -training >> step=2808200, episode=469 reward=0.7684358 (510.34 it/sec) -training >> step=2808300, episode=469 reward=0.7427115 (526.63 it/sec) -training >> step=2808400, episode=469 reward=0.7790471 (516.56 it/sec) -training >> step=2808500, episode=469 reward=0.7500693 (519.77 it/sec) -training >> step=2808600, episode=469 reward=0.7632737 (533.64 it/sec) -training >> step=2808700, episode=469 reward=0.7720595 (589.12 it/sec) -training >> step=2808800, episode=469 reward=0.7734046 (550.86 it/sec) -training >> step=2808900, episode=469 reward=0.8037235 (534.01 it/sec) -training >> step=2809000, episode=469 reward=0.7607793 (545.64 it/sec) -training >> step=2809100, episode=469 reward=0.7820599 (537.97 it/sec) -training >> step=2809200, episode=469 reward=0.7618752 (529.00 it/sec) -training >> step=2809300, episode=469 reward=0.7733417 (494.73 it/sec) -training >> step=2809400, episode=469 reward=0.7710559 (549.73 it/sec) -training >> step=2809500, episode=469 reward=0.7737544 (529.12 it/sec) -training >> step=2809600, episode=469 reward=0.7639467 (536.67 it/sec) -training >> step=2809700, episode=469 reward=0.7625428 (520.71 it/sec) -training >> step=2809800, episode=469 reward=0.7495465 (559.09 it/sec) -training >> step=2809900, episode=469 reward=0.7878697 (501.66 it/sec) -training >> step=2810000, episode=469 reward=0.767126 (537.46 it/sec) -training >> step=2810100, episode=469 reward=0.7658195 (507.34 it/sec) -training >> step=2810200, episode=469 reward=0.77105 (521.58 it/sec) -training >> step=2810300, episode=469 reward=0.7910771 (503.74 it/sec) -training >> step=2810400, episode=469 reward=0.772803 (546.02 it/sec) -training >> step=2810500, episode=469 reward=0.7586663 (557.24 it/sec) -training >> step=2810600, episode=469 reward=0.7831291 (533.11 it/sec) -training >> step=2810700, episode=469 reward=0.7908937 (531.45 it/sec) -training >> step=2810800, episode=469 reward=0.7615633 (538.88 it/sec) -training >> step=2810900, episode=469 reward=0.7755626 (534.92 it/sec) -training >> step=2811000, episode=469 reward=0.7670587 (538.04 it/sec) -training >> step=2811100, episode=469 reward=0.768333 (547.54 it/sec) -training >> step=2811200, episode=469 reward=0.7540814 (532.74 it/sec) -training >> step=2811300, episode=469 reward=0.7708281 (515.25 it/sec) -training >> step=2811400, episode=469 reward=0.7626284 (539.79 it/sec) -training >> step=2811500, episode=469 reward=0.7936029 (521.78 it/sec) -training >> step=2811600, episode=469 reward=0.762369 (542.36 it/sec) -training >> step=2811700, episode=469 reward=0.7816101 (512.73 it/sec) -training >> step=2811800, episode=469 reward=0.7760812 (514.79 it/sec) -training >> step=2811900, episode=469 reward=0.748009 (546.75 it/sec) -training >> step=2812000, episode=469 reward=0.7605941 (496.96 it/sec) -training >> step=2812100, episode=469 reward=0.7544116 (547.90 it/sec) -training >> step=2812200, episode=469 reward=0.759856 (543.78 it/sec) -training >> step=2812300, episode=469 reward=0.770372 (517.99 it/sec) -training >> step=2812400, episode=469 reward=0.7620718 (480.12 it/sec) -training >> step=2812500, episode=469 reward=0.7647434 (527.06 it/sec) -training >> step=2812600, episode=469 reward=0.7706479 (533.60 it/sec) -training >> step=2812700, episode=469 reward=0.7565569 (543.43 it/sec) -training >> step=2812800, episode=469 reward=0.7785661 (503.15 it/sec) -training >> step=2812900, episode=469 reward=0.7520954 (487.75 it/sec) -training >> step=2813000, episode=469 reward=0.755279 (553.61 it/sec) -training >> step=2813100, episode=469 reward=0.7722446 (503.63 it/sec) -training >> step=2813200, episode=469 reward=0.756723 (523.04 it/sec) -training >> step=2813300, episode=470 reward=0.7759417 (92.79 it/sec) -training >> step=2813400, episode=470 reward=0.7601117 (469.28 it/sec) -training >> step=2813500, episode=470 reward=0.7635511 (523.29 it/sec) -training >> step=2813600, episode=470 reward=0.7658749 (538.36 it/sec) -training >> step=2813700, episode=470 reward=0.7621209 (533.62 it/sec) -training >> step=2813800, episode=470 reward=0.7687608 (514.53 it/sec) -training >> step=2813900, episode=470 reward=0.74932 (526.91 it/sec) -training >> step=2814000, episode=470 reward=0.7589319 (521.94 it/sec) -training >> step=2814100, episode=470 reward=0.765713 (540.89 it/sec) -training >> step=2814200, episode=470 reward=0.7754597 (532.38 it/sec) -training >> step=2814300, episode=470 reward=0.8029659 (512.04 it/sec) -training >> step=2814400, episode=470 reward=0.7780216 (507.08 it/sec) -training >> step=2814500, episode=470 reward=0.7695698 (527.92 it/sec) -training >> step=2814600, episode=470 reward=0.768815 (535.92 it/sec) -training >> step=2814700, episode=470 reward=0.7656593 (518.58 it/sec) -training >> step=2814800, episode=470 reward=0.7689835 (548.89 it/sec) -training >> step=2814900, episode=470 reward=0.7681593 (504.34 it/sec) -training >> step=2815000, episode=470 reward=0.7750629 (523.35 it/sec) -training >> step=2815100, episode=470 reward=0.7694091 (514.02 it/sec) -training >> step=2815200, episode=470 reward=0.7811145 (552.35 it/sec) -training >> step=2815300, episode=470 reward=0.7625144 (523.04 it/sec) -training >> step=2815400, episode=470 reward=0.7835335 (521.28 it/sec) -training >> step=2815500, episode=470 reward=0.7597001 (516.24 it/sec) -training >> step=2815600, episode=470 reward=0.7511984 (518.65 it/sec) -training >> step=2815700, episode=470 reward=0.7679326 (549.69 it/sec) -training >> step=2815800, episode=470 reward=0.777518 (547.73 it/sec) -training >> step=2815900, episode=470 reward=0.7585658 (567.74 it/sec) -training >> step=2816000, episode=470 reward=0.7700248 (505.18 it/sec) -training >> step=2816100, episode=470 reward=0.7664112 (514.78 it/sec) -training >> step=2816200, episode=470 reward=0.7448011 (534.82 it/sec) -training >> step=2816300, episode=470 reward=0.7834271 (527.44 it/sec) -training >> step=2816400, episode=470 reward=0.781618 (518.28 it/sec) -training >> step=2816500, episode=470 reward=0.7657168 (514.59 it/sec) -training >> step=2816600, episode=470 reward=0.7899773 (517.38 it/sec) -training >> step=2816700, episode=470 reward=0.7726212 (488.81 it/sec) -training >> step=2816800, episode=470 reward=0.7940982 (515.80 it/sec) -training >> step=2816900, episode=470 reward=0.7643418 (516.62 it/sec) -training >> step=2817000, episode=470 reward=0.7738683 (569.96 it/sec) -training >> step=2817100, episode=470 reward=0.793941 (536.07 it/sec) -training >> step=2817200, episode=470 reward=0.7561517 (527.85 it/sec) -training >> step=2817300, episode=470 reward=0.758222 (533.11 it/sec) -training >> step=2817400, episode=470 reward=0.7497471 (543.59 it/sec) -training >> step=2817500, episode=470 reward=0.7604628 (501.63 it/sec) -training >> step=2817600, episode=470 reward=0.7552866 (532.41 it/sec) -training >> step=2817700, episode=470 reward=0.7749217 (556.55 it/sec) -training >> step=2817800, episode=470 reward=0.7449855 (525.68 it/sec) -training >> step=2817900, episode=470 reward=0.7413741 (523.82 it/sec) -training >> step=2818000, episode=470 reward=0.7759181 (530.18 it/sec) -training >> step=2818100, episode=470 reward=0.7533012 (545.93 it/sec) -training >> step=2818200, episode=470 reward=0.7674119 (515.79 it/sec) -training >> step=2818300, episode=470 reward=0.7569878 (535.61 it/sec) -training >> step=2818400, episode=470 reward=0.7839701 (503.36 it/sec) -training >> step=2818500, episode=470 reward=0.7834938 (517.58 it/sec) -training >> step=2818600, episode=470 reward=0.775729 (524.59 it/sec) -training >> step=2818700, episode=470 reward=0.7680982 (530.64 it/sec) -training >> step=2818800, episode=470 reward=0.7526827 (545.88 it/sec) -training >> step=2818900, episode=470 reward=0.7638673 (502.14 it/sec) -training >> step=2819000, episode=470 reward=0.7547374 (509.06 it/sec) -training >> step=2819100, episode=470 reward=0.7857451 (535.33 it/sec) -training >> step=2819200, episode=470 reward=0.761394 (487.63 it/sec) -training >> step=2819300, episode=471 reward=0.7567513 (167.83 it/sec) -training >> step=2819400, episode=471 reward=0.7683565 (503.03 it/sec) -training >> step=2819500, episode=471 reward=0.7444453 (488.81 it/sec) -training >> step=2819600, episode=471 reward=0.7620187 (493.73 it/sec) -training >> step=2819700, episode=471 reward=0.7641644 (441.94 it/sec) -training >> step=2819800, episode=471 reward=0.7557741 (491.25 it/sec) -training >> step=2819900, episode=471 reward=0.7685407 (441.68 it/sec) -training >> step=2820000, episode=471 reward=0.7821134 (456.90 it/sec) -training >> step=2820100, episode=471 reward=0.7714401 (467.01 it/sec) -training >> step=2820200, episode=471 reward=0.7737548 (497.40 it/sec) -training >> step=2820300, episode=471 reward=0.7765692 (545.06 it/sec) -training >> step=2820400, episode=471 reward=0.7660151 (514.26 it/sec) -training >> step=2820500, episode=471 reward=0.7649465 (562.12 it/sec) -training >> step=2820600, episode=471 reward=0.7668061 (522.31 it/sec) -training >> step=2820700, episode=471 reward=0.7434983 (532.80 it/sec) -training >> step=2820800, episode=471 reward=0.7562401 (525.52 it/sec) -training >> step=2820900, episode=471 reward=0.7727327 (557.14 it/sec) -training >> step=2821000, episode=471 reward=0.7708789 (520.81 it/sec) -training >> step=2821100, episode=471 reward=0.7760017 (555.21 it/sec) -training >> step=2821200, episode=471 reward=0.7467868 (493.08 it/sec) -training >> step=2821300, episode=471 reward=0.7752532 (559.19 it/sec) -training >> step=2821400, episode=471 reward=0.7479857 (565.91 it/sec) -training >> step=2821500, episode=471 reward=0.7676845 (498.71 it/sec) -training >> step=2821600, episode=471 reward=0.7886115 (551.98 it/sec) -training >> step=2821700, episode=471 reward=0.7732413 (521.14 it/sec) -training >> step=2821800, episode=471 reward=0.7645072 (531.46 it/sec) -training >> step=2821900, episode=471 reward=0.7489128 (538.81 it/sec) -training >> step=2822000, episode=471 reward=0.7796084 (587.22 it/sec) -training >> step=2822100, episode=471 reward=0.7732477 (502.92 it/sec) -training >> step=2822200, episode=471 reward=0.776057 (538.39 it/sec) -training >> step=2822300, episode=471 reward=0.7565563 (546.14 it/sec) -training >> step=2822400, episode=471 reward=0.7489939 (578.83 it/sec) -training >> step=2822500, episode=471 reward=0.7787848 (542.54 it/sec) -training >> step=2822600, episode=471 reward=0.7643581 (486.07 it/sec) -training >> step=2822700, episode=471 reward=0.7667626 (523.82 it/sec) -training >> step=2822800, episode=471 reward=0.7557256 (491.09 it/sec) -training >> step=2822900, episode=471 reward=0.749795 (479.11 it/sec) -training >> step=2823000, episode=471 reward=0.7689205 (520.13 it/sec) -training >> step=2823100, episode=471 reward=0.7724779 (541.04 it/sec) -training >> step=2823200, episode=471 reward=0.7890688 (521.77 it/sec) -training >> step=2823300, episode=471 reward=0.7604977 (502.51 it/sec) -training >> step=2823400, episode=471 reward=0.7726824 (520.07 it/sec) -training >> step=2823500, episode=471 reward=0.780183 (549.71 it/sec) -training >> step=2823600, episode=471 reward=0.7675399 (554.22 it/sec) -training >> step=2823700, episode=471 reward=0.7571216 (550.92 it/sec) -training >> step=2823800, episode=471 reward=0.7407299 (558.73 it/sec) -training >> step=2823900, episode=471 reward=0.7671171 (484.92 it/sec) -training >> step=2824000, episode=471 reward=0.762181 (518.65 it/sec) -training >> step=2824100, episode=471 reward=0.7390072 (537.37 it/sec) -training >> step=2824200, episode=471 reward=0.7553927 (527.74 it/sec) -training >> step=2824300, episode=471 reward=0.7546063 (527.10 it/sec) -training >> step=2824400, episode=471 reward=0.7716525 (562.32 it/sec) -training >> step=2824500, episode=471 reward=0.7505329 (555.06 it/sec) -training >> step=2824600, episode=471 reward=0.7643666 (517.23 it/sec) -training >> step=2824700, episode=471 reward=0.7520579 (538.24 it/sec) -training >> step=2824800, episode=471 reward=0.7586951 (557.68 it/sec) -training >> step=2824900, episode=471 reward=0.7633578 (568.12 it/sec) -training >> step=2825000, episode=471 reward=0.7541972 (528.35 it/sec) -training >> step=2825100, episode=471 reward=0.7702563 (492.94 it/sec) -training >> step=2825200, episode=471 reward=0.7627943 (535.19 it/sec) -training >> step=2825300, episode=472 reward=0.7545949 (65.39 it/sec) -training >> step=2825400, episode=472 reward=0.7757912 (519.18 it/sec) -training >> step=2825500, episode=472 reward=0.762868 (537.56 it/sec) -training >> step=2825600, episode=472 reward=0.7604634 (535.61 it/sec) -training >> step=2825700, episode=472 reward=0.7719075 (511.95 it/sec) -training >> step=2825800, episode=472 reward=0.774075 (532.13 it/sec) -training >> step=2825900, episode=472 reward=0.7858055 (537.16 it/sec) -training >> step=2826000, episode=472 reward=0.745872 (520.39 it/sec) -training >> step=2826100, episode=472 reward=0.7538715 (547.73 it/sec) -training >> step=2826200, episode=472 reward=0.7551382 (554.83 it/sec) -training >> step=2826300, episode=472 reward=0.7822446 (549.11 it/sec) -training >> step=2826400, episode=472 reward=0.7457231 (550.20 it/sec) -training >> step=2826500, episode=472 reward=0.7735579 (521.92 it/sec) -training >> step=2826600, episode=472 reward=0.7717561 (537.36 it/sec) -training >> step=2826700, episode=472 reward=0.7383827 (537.97 it/sec) -training >> step=2826800, episode=472 reward=0.7641335 (556.28 it/sec) -training >> step=2826900, episode=472 reward=0.7709742 (540.49 it/sec) -training >> step=2827000, episode=472 reward=0.7820711 (535.20 it/sec) -training >> step=2827100, episode=472 reward=0.7612783 (509.01 it/sec) -training >> step=2827200, episode=472 reward=0.756295 (538.56 it/sec) -training >> step=2827300, episode=472 reward=0.7763249 (514.52 it/sec) -training >> step=2827400, episode=472 reward=0.7509241 (529.47 it/sec) -training >> step=2827500, episode=472 reward=0.7796726 (595.99 it/sec) -training >> step=2827600, episode=472 reward=0.7840563 (525.42 it/sec) -training >> step=2827700, episode=472 reward=0.7648749 (488.21 it/sec) -training >> step=2827800, episode=472 reward=0.7576982 (533.04 it/sec) -training >> step=2827900, episode=472 reward=0.7594606 (546.20 it/sec) -training >> step=2828000, episode=472 reward=0.7828688 (530.33 it/sec) -training >> step=2828100, episode=472 reward=0.7590272 (536.39 it/sec) -training >> step=2828200, episode=472 reward=0.7665852 (531.36 it/sec) -training >> step=2828300, episode=472 reward=0.7729385 (537.45 it/sec) -training >> step=2828400, episode=472 reward=0.7620547 (525.01 it/sec) -training >> step=2828500, episode=472 reward=0.7632773 (540.37 it/sec) -training >> step=2828600, episode=472 reward=0.7807578 (565.42 it/sec) -training >> step=2828700, episode=472 reward=0.7679645 (535.92 it/sec) -training >> step=2828800, episode=472 reward=0.7481524 (503.68 it/sec) -training >> step=2828900, episode=472 reward=0.735283 (576.34 it/sec) -training >> step=2829000, episode=472 reward=0.7717165 (540.83 it/sec) -training >> step=2829100, episode=472 reward=0.7665941 (537.09 it/sec) -training >> step=2829200, episode=472 reward=0.7652641 (538.86 it/sec) -training >> step=2829300, episode=472 reward=0.7566888 (543.27 it/sec) -training >> step=2829400, episode=472 reward=0.7697617 (482.15 it/sec) -training >> step=2829500, episode=472 reward=0.7955347 (523.23 it/sec) -training >> step=2829600, episode=472 reward=0.7718982 (520.88 it/sec) -training >> step=2829700, episode=472 reward=0.7791227 (575.39 it/sec) -training >> step=2829800, episode=472 reward=0.7536478 (514.44 it/sec) -training >> step=2829900, episode=472 reward=0.7859678 (513.43 it/sec) -training >> step=2830000, episode=472 reward=0.7717195 (573.31 it/sec) -training >> step=2830100, episode=472 reward=0.7708277 (526.14 it/sec) -training >> step=2830200, episode=472 reward=0.7694743 (497.93 it/sec) -training >> step=2830300, episode=472 reward=0.7646924 (534.97 it/sec) -training >> step=2830400, episode=472 reward=0.7629886 (558.43 it/sec) -training >> step=2830500, episode=472 reward=0.7583798 (509.10 it/sec) -training >> step=2830600, episode=472 reward=0.7391046 (541.29 it/sec) -training >> step=2830700, episode=472 reward=0.7454143 (516.81 it/sec) -training >> step=2830800, episode=472 reward=0.7680443 (541.05 it/sec) -training >> step=2830900, episode=472 reward=0.7816039 (514.05 it/sec) -training >> step=2831000, episode=472 reward=0.7660727 (540.18 it/sec) -training >> step=2831100, episode=472 reward=0.7645354 (545.29 it/sec) -training >> step=2831200, episode=472 reward=0.7652819 (491.10 it/sec) -training >> step=2831300, episode=473 reward=0.7649477 (62.46 it/sec) -training >> step=2831400, episode=473 reward=0.7685924 (482.04 it/sec) -training >> step=2831500, episode=473 reward=0.7556806 (549.67 it/sec) -training >> step=2831600, episode=473 reward=0.7640727 (498.01 it/sec) -training >> step=2831700, episode=473 reward=0.7584402 (554.75 it/sec) -training >> step=2831800, episode=473 reward=0.7592927 (490.98 it/sec) -training >> step=2831900, episode=473 reward=0.7722203 (555.73 it/sec) -training >> step=2832000, episode=473 reward=0.7687524 (536.56 it/sec) -training >> step=2832100, episode=473 reward=0.7571876 (516.90 it/sec) -training >> step=2832200, episode=473 reward=0.7795374 (537.43 it/sec) -training >> step=2832300, episode=473 reward=0.7721139 (532.14 it/sec) -training >> step=2832400, episode=473 reward=0.764811 (541.75 it/sec) -training >> step=2832500, episode=473 reward=0.7675417 (519.12 it/sec) -training >> step=2832600, episode=473 reward=0.7685004 (541.69 it/sec) -training >> step=2832700, episode=473 reward=0.7856216 (565.96 it/sec) -training >> step=2832800, episode=473 reward=0.7639558 (505.45 it/sec) -training >> step=2832900, episode=473 reward=0.7708443 (534.89 it/sec) -training >> step=2833000, episode=473 reward=0.7692999 (540.80 it/sec) -training >> step=2833100, episode=473 reward=0.7766468 (565.08 it/sec) -training >> step=2833200, episode=473 reward=0.76215 (535.89 it/sec) -training >> step=2833300, episode=473 reward=0.753372 (492.87 it/sec) -training >> step=2833400, episode=473 reward=0.7488991 (541.14 it/sec) -training >> step=2833500, episode=473 reward=0.7166248 (526.65 it/sec) -training >> step=2833600, episode=473 reward=0.7539827 (549.77 it/sec) -training >> step=2833700, episode=473 reward=0.7433764 (506.18 it/sec) -training >> step=2833800, episode=473 reward=0.7848513 (538.68 it/sec) -training >> step=2833900, episode=473 reward=0.7462847 (508.40 it/sec) -training >> step=2834000, episode=473 reward=0.7927891 (468.78 it/sec) -training >> step=2834100, episode=473 reward=0.770022 (528.32 it/sec) -training >> step=2834200, episode=473 reward=0.7824992 (465.47 it/sec) -training >> step=2834300, episode=473 reward=0.7718081 (420.40 it/sec) -training >> step=2834400, episode=473 reward=0.7722982 (480.16 it/sec) -training >> step=2834500, episode=473 reward=0.7700475 (485.50 it/sec) -training >> step=2834600, episode=473 reward=0.7566004 (470.12 it/sec) -training >> step=2834700, episode=473 reward=0.7705835 (523.60 it/sec) -training >> step=2834800, episode=473 reward=0.7900019 (513.61 it/sec) -training >> step=2834900, episode=473 reward=0.7620738 (541.21 it/sec) -training >> step=2835000, episode=473 reward=0.780148 (512.65 it/sec) -training >> step=2835100, episode=473 reward=0.7746149 (548.95 it/sec) -training >> step=2835200, episode=473 reward=0.7538123 (533.02 it/sec) -training >> step=2835300, episode=473 reward=0.782225 (552.81 it/sec) -training >> step=2835400, episode=473 reward=0.7620966 (552.46 it/sec) -training >> step=2835500, episode=473 reward=0.7678205 (521.25 it/sec) -training >> step=2835600, episode=473 reward=0.783909 (528.78 it/sec) -training >> step=2835700, episode=473 reward=0.7749867 (530.48 it/sec) -training >> step=2835800, episode=473 reward=0.7607559 (518.24 it/sec) -training >> step=2835900, episode=473 reward=0.782425 (581.06 it/sec) -training >> step=2836000, episode=473 reward=0.773919 (536.51 it/sec) -training >> step=2836100, episode=473 reward=0.7670558 (523.99 it/sec) -training >> step=2836200, episode=473 reward=0.7728193 (546.99 it/sec) -training >> step=2836300, episode=473 reward=0.7585853 (533.70 it/sec) -training >> step=2836400, episode=473 reward=0.7836517 (521.50 it/sec) -training >> step=2836500, episode=473 reward=0.7687867 (566.42 it/sec) -training >> step=2836600, episode=473 reward=0.7645435 (532.85 it/sec) -training >> step=2836700, episode=473 reward=0.7526386 (518.36 it/sec) -training >> step=2836800, episode=473 reward=0.7570668 (554.71 it/sec) -training >> step=2836900, episode=473 reward=0.7825727 (543.29 it/sec) -training >> step=2837000, episode=473 reward=0.7585145 (549.99 it/sec) -training >> step=2837100, episode=473 reward=0.7465198 (503.64 it/sec) -training >> step=2837200, episode=473 reward=0.7497486 (520.10 it/sec) -training >> step=2837300, episode=474 reward=0.7637328 (91.89 it/sec) -training >> step=2837400, episode=474 reward=0.7712161 (549.57 it/sec) -training >> step=2837500, episode=474 reward=0.7570675 (540.67 it/sec) -training >> step=2837600, episode=474 reward=0.7572591 (514.16 it/sec) -training >> step=2837700, episode=474 reward=0.7789949 (530.45 it/sec) -training >> step=2837800, episode=474 reward=0.7771752 (525.41 it/sec) -training >> step=2837900, episode=474 reward=0.7457529 (512.24 it/sec) -training >> step=2838000, episode=474 reward=0.7612177 (546.45 it/sec) -training >> step=2838100, episode=474 reward=0.7740068 (556.79 it/sec) -training >> step=2838200, episode=474 reward=0.769756 (512.30 it/sec) -training >> step=2838300, episode=474 reward=0.7529477 (534.75 it/sec) -training >> step=2838400, episode=474 reward=0.7428238 (575.39 it/sec) -training >> step=2838500, episode=474 reward=0.7557516 (508.66 it/sec) -training >> step=2838600, episode=474 reward=0.7727649 (537.59 it/sec) -training >> step=2838700, episode=474 reward=0.7801509 (566.41 it/sec) -training >> step=2838800, episode=474 reward=0.7669976 (542.48 it/sec) -training >> step=2838900, episode=474 reward=0.7660935 (524.31 it/sec) -training >> step=2839000, episode=474 reward=0.7506003 (542.64 it/sec) -training >> step=2839100, episode=474 reward=0.7692759 (552.84 it/sec) -training >> step=2839200, episode=474 reward=0.765385 (551.08 it/sec) -training >> step=2839300, episode=474 reward=0.7866861 (543.57 it/sec) -training >> step=2839400, episode=474 reward=0.7600736 (554.69 it/sec) -training >> step=2839500, episode=474 reward=0.7466332 (534.72 it/sec) -training >> step=2839600, episode=474 reward=0.7647116 (544.51 it/sec) -training >> step=2839700, episode=474 reward=0.7588416 (512.51 it/sec) -training >> step=2839800, episode=474 reward=0.7635684 (559.79 it/sec) -training >> step=2839900, episode=474 reward=0.7716635 (553.98 it/sec) -training >> step=2840000, episode=474 reward=0.7684035 (542.53 it/sec) -training >> step=2840100, episode=474 reward=0.7797399 (530.77 it/sec) -training >> step=2840200, episode=474 reward=0.7784899 (543.66 it/sec) -training >> step=2840300, episode=474 reward=0.7353776 (524.99 it/sec) -training >> step=2840400, episode=474 reward=0.7726054 (556.96 it/sec) -training >> step=2840500, episode=474 reward=0.7686049 (547.08 it/sec) -training >> step=2840600, episode=474 reward=0.7703089 (497.01 it/sec) -training >> step=2840700, episode=474 reward=0.7749434 (542.96 it/sec) -training >> step=2840800, episode=474 reward=0.7674696 (544.04 it/sec) -training >> step=2840900, episode=474 reward=0.770013 (587.16 it/sec) -training >> step=2841000, episode=474 reward=0.7504286 (528.70 it/sec) -training >> step=2841100, episode=474 reward=0.7697025 (519.76 it/sec) -training >> step=2841200, episode=474 reward=0.7700636 (561.86 it/sec) -training >> step=2841300, episode=474 reward=0.7688842 (549.72 it/sec) -training >> step=2841400, episode=474 reward=0.7535205 (548.35 it/sec) -training >> step=2841500, episode=474 reward=0.7728121 (556.04 it/sec) -training >> step=2841600, episode=474 reward=0.7665576 (559.59 it/sec) -training >> step=2841700, episode=474 reward=0.7825549 (511.89 it/sec) -training >> step=2841800, episode=474 reward=0.7563845 (555.03 it/sec) -training >> step=2841900, episode=474 reward=0.7698828 (515.43 it/sec) -training >> step=2842000, episode=474 reward=0.7950063 (538.01 it/sec) -training >> step=2842100, episode=474 reward=0.7624184 (530.04 it/sec) -training >> step=2842200, episode=474 reward=0.7658421 (510.70 it/sec) -training >> step=2842300, episode=474 reward=0.7508297 (532.25 it/sec) -training >> step=2842400, episode=474 reward=0.7584276 (558.92 it/sec) -training >> step=2842500, episode=474 reward=0.7741438 (502.46 it/sec) -training >> step=2842600, episode=474 reward=0.7288129 (542.48 it/sec) -training >> step=2842700, episode=474 reward=0.7667823 (552.90 it/sec) -training >> step=2842800, episode=474 reward=0.7613484 (510.05 it/sec) -training >> step=2842900, episode=474 reward=0.7712297 (504.98 it/sec) -training >> step=2843000, episode=474 reward=0.748212 (548.06 it/sec) -training >> step=2843100, episode=474 reward=0.7617475 (565.28 it/sec) -training >> step=2843200, episode=474 reward=0.7718084 (551.75 it/sec) -training >> step=2843300, episode=475 reward=0.7395624 (131.05 it/sec) -training >> step=2843400, episode=475 reward=0.7903829 (554.55 it/sec) -training >> step=2843500, episode=475 reward=0.7717453 (508.54 it/sec) -training >> step=2843600, episode=475 reward=0.7513031 (509.56 it/sec) -training >> step=2843700, episode=475 reward=0.7420442 (526.74 it/sec) -training >> step=2843800, episode=475 reward=0.7593464 (563.07 it/sec) -training >> step=2843900, episode=475 reward=0.788805 (545.12 it/sec) -training >> step=2844000, episode=475 reward=0.776306 (533.78 it/sec) -training >> step=2844100, episode=475 reward=0.7683293 (520.34 it/sec) -training >> step=2844200, episode=475 reward=0.7744633 (540.33 it/sec) -training >> step=2844300, episode=475 reward=0.7704753 (526.83 it/sec) -training >> step=2844400, episode=475 reward=0.7805109 (526.53 it/sec) -training >> step=2844500, episode=475 reward=0.7777007 (588.68 it/sec) -training >> step=2844600, episode=475 reward=0.7779471 (531.20 it/sec) -training >> step=2844700, episode=475 reward=0.7744121 (531.77 it/sec) -training >> step=2844800, episode=475 reward=0.763085 (549.35 it/sec) -training >> step=2844900, episode=475 reward=0.759304 (560.36 it/sec) -training >> step=2845000, episode=475 reward=0.7734614 (553.01 it/sec) -training >> step=2845100, episode=475 reward=0.7800756 (514.97 it/sec) -training >> step=2845200, episode=475 reward=0.748556 (522.83 it/sec) -training >> step=2845300, episode=475 reward=0.7790866 (512.07 it/sec) -training >> step=2845400, episode=475 reward=0.7589497 (546.43 it/sec) -training >> step=2845500, episode=475 reward=0.775194 (553.40 it/sec) -training >> step=2845600, episode=475 reward=0.7653421 (563.05 it/sec) -training >> step=2845700, episode=475 reward=0.764725 (535.43 it/sec) -training >> step=2845800, episode=475 reward=0.7737949 (483.33 it/sec) -training >> step=2845900, episode=475 reward=0.7691646 (522.72 it/sec) -training >> step=2846000, episode=475 reward=0.7766455 (542.42 it/sec) -training >> step=2846100, episode=475 reward=0.7708234 (535.40 it/sec) -training >> step=2846200, episode=475 reward=0.7770628 (525.29 it/sec) -training >> step=2846300, episode=475 reward=0.7586541 (513.36 it/sec) -training >> step=2846400, episode=475 reward=0.7515192 (510.71 it/sec) -training >> step=2846500, episode=475 reward=0.7473809 (507.80 it/sec) -training >> step=2846600, episode=475 reward=0.7403708 (578.30 it/sec) -training >> step=2846700, episode=475 reward=0.7507741 (549.12 it/sec) -training >> step=2846800, episode=475 reward=0.7832577 (527.66 it/sec) -training >> step=2846900, episode=475 reward=0.7844719 (506.23 it/sec) -training >> step=2847000, episode=475 reward=0.7712756 (544.22 it/sec) -training >> step=2847100, episode=475 reward=0.7726276 (527.81 it/sec) -training >> step=2847200, episode=475 reward=0.7678903 (516.61 it/sec) -training >> step=2847300, episode=475 reward=0.8014853 (538.12 it/sec) -training >> step=2847400, episode=475 reward=0.7582152 (507.61 it/sec) -training >> step=2847500, episode=475 reward=0.7920026 (528.21 it/sec) -training >> step=2847600, episode=475 reward=0.7756102 (547.59 it/sec) -training >> step=2847700, episode=475 reward=0.769478 (573.01 it/sec) -training >> step=2847800, episode=475 reward=0.7643024 (511.83 it/sec) -training >> step=2847900, episode=475 reward=0.7575721 (546.46 it/sec) -training >> step=2848000, episode=475 reward=0.7866617 (524.05 it/sec) -training >> step=2848100, episode=475 reward=0.7609594 (476.20 it/sec) -training >> step=2848200, episode=475 reward=0.7731671 (543.43 it/sec) -training >> step=2848300, episode=475 reward=0.7731438 (478.75 it/sec) -training >> step=2848400, episode=475 reward=0.7622405 (500.56 it/sec) -training >> step=2848500, episode=475 reward=0.761108 (528.29 it/sec) -training >> step=2848600, episode=475 reward=0.7529415 (561.99 it/sec) -training >> step=2848700, episode=475 reward=0.7703539 (553.06 it/sec) -training >> step=2848800, episode=475 reward=0.7800083 (519.53 it/sec) -training >> step=2848900, episode=475 reward=0.7307635 (487.10 it/sec) -training >> step=2849000, episode=475 reward=0.7553755 (461.55 it/sec) -training >> step=2849100, episode=475 reward=0.7703847 (493.63 it/sec) -training >> step=2849200, episode=475 reward=0.758761 (545.35 it/sec) -training >> step=2849300, episode=476 reward=0.7594977 (104.95 it/sec) -training >> step=2849400, episode=476 reward=0.7718631 (413.11 it/sec) -training >> step=2849500, episode=476 reward=0.7740484 (514.70 it/sec) -training >> step=2849600, episode=476 reward=0.7622094 (514.75 it/sec) -training >> step=2849700, episode=476 reward=0.772163 (521.15 it/sec) -training >> step=2849800, episode=476 reward=0.7504377 (532.30 it/sec) -training >> step=2849900, episode=476 reward=0.7582206 (525.56 it/sec) -training >> step=2850000, episode=476 reward=0.7494434 (558.28 it/sec) -training >> step=2850100, episode=476 reward=0.7688351 (535.64 it/sec) -training >> step=2850200, episode=476 reward=0.7468771 (496.53 it/sec) -training >> step=2850300, episode=476 reward=0.7742392 (530.63 it/sec) -training >> step=2850400, episode=476 reward=0.7345904 (547.62 it/sec) -training >> step=2850500, episode=476 reward=0.7600851 (542.37 it/sec) -training >> step=2850600, episode=476 reward=0.7583306 (523.76 it/sec) -training >> step=2850700, episode=476 reward=0.7742078 (492.55 it/sec) -training >> step=2850800, episode=476 reward=0.7930344 (526.62 it/sec) -training >> step=2850900, episode=476 reward=0.7378747 (539.19 it/sec) -training >> step=2851000, episode=476 reward=0.7698299 (558.11 it/sec) -training >> step=2851100, episode=476 reward=0.7816106 (543.41 it/sec) -training >> step=2851200, episode=476 reward=0.779676 (538.15 it/sec) -training >> step=2851300, episode=476 reward=0.7758141 (489.03 it/sec) -training >> step=2851400, episode=476 reward=0.7728447 (551.09 it/sec) -training >> step=2851500, episode=476 reward=0.7550063 (545.77 it/sec) -training >> step=2851600, episode=476 reward=0.763606 (539.63 it/sec) -training >> step=2851700, episode=476 reward=0.7560796 (554.17 it/sec) -training >> step=2851800, episode=476 reward=0.7785482 (543.99 it/sec) -training >> step=2851900, episode=476 reward=0.783366 (479.75 it/sec) -training >> step=2852000, episode=476 reward=0.7890247 (524.94 it/sec) -training >> step=2852100, episode=476 reward=0.7726246 (554.12 it/sec) -training >> step=2852200, episode=476 reward=0.7707508 (533.57 it/sec) -training >> step=2852300, episode=476 reward=0.7587726 (516.23 it/sec) -training >> step=2852400, episode=476 reward=0.7880704 (508.42 it/sec) -training >> step=2852500, episode=476 reward=0.7525279 (514.97 it/sec) -training >> step=2852600, episode=476 reward=0.7597844 (531.06 it/sec) -training >> step=2852700, episode=476 reward=0.7481045 (530.59 it/sec) -training >> step=2852800, episode=476 reward=0.7734177 (523.45 it/sec) -training >> step=2852900, episode=476 reward=0.7745098 (535.79 it/sec) -training >> step=2853000, episode=476 reward=0.7653574 (533.64 it/sec) -training >> step=2853100, episode=476 reward=0.7954713 (519.81 it/sec) -training >> step=2853200, episode=476 reward=0.7798606 (547.45 it/sec) -training >> step=2853300, episode=476 reward=0.7758436 (524.40 it/sec) -training >> step=2853400, episode=476 reward=0.7653603 (506.89 it/sec) -training >> step=2853500, episode=476 reward=0.7677787 (520.42 it/sec) -training >> step=2853600, episode=476 reward=0.7775238 (552.14 it/sec) -training >> step=2853700, episode=476 reward=0.7601536 (536.09 it/sec) -training >> step=2853800, episode=476 reward=0.7795776 (477.27 it/sec) -training >> step=2853900, episode=476 reward=0.7647507 (524.78 it/sec) -training >> step=2854000, episode=476 reward=0.7596803 (546.07 it/sec) -training >> step=2854100, episode=476 reward=0.7676544 (535.14 it/sec) -training >> step=2854200, episode=476 reward=0.7783688 (536.09 it/sec) -training >> step=2854300, episode=476 reward=0.7663519 (545.96 it/sec) -training >> step=2854400, episode=476 reward=0.7689643 (542.30 it/sec) -training >> step=2854500, episode=476 reward=0.7475386 (519.44 it/sec) -training >> step=2854600, episode=476 reward=0.7609965 (495.07 it/sec) -training >> step=2854700, episode=476 reward=0.7620557 (579.30 it/sec) -training >> step=2854800, episode=476 reward=0.7711104 (558.23 it/sec) -training >> step=2854900, episode=476 reward=0.7210366 (493.68 it/sec) -training >> step=2855000, episode=476 reward=0.7557315 (575.10 it/sec) -training >> step=2855100, episode=476 reward=0.7598927 (520.32 it/sec) -training >> step=2855200, episode=476 reward=0.7745916 (537.89 it/sec) -training >> step=2855300, episode=477 reward=0.7592878 (156.43 it/sec) -training >> step=2855400, episode=477 reward=0.7515463 (419.76 it/sec) -training >> step=2855500, episode=477 reward=0.7568276 (509.50 it/sec) -training >> step=2855600, episode=477 reward=0.7398113 (530.35 it/sec) -training >> step=2855700, episode=477 reward=0.7491566 (504.03 it/sec) -training >> step=2855800, episode=477 reward=0.7714918 (565.59 it/sec) -training >> step=2855900, episode=477 reward=0.7523432 (506.40 it/sec) -training >> step=2856000, episode=477 reward=0.7799856 (532.32 it/sec) -training >> step=2856100, episode=477 reward=0.7777864 (561.82 it/sec) -training >> step=2856200, episode=477 reward=0.7611288 (531.24 it/sec) -training >> step=2856300, episode=477 reward=0.7739899 (541.69 it/sec) -training >> step=2856400, episode=477 reward=0.7539554 (551.56 it/sec) -training >> step=2856500, episode=477 reward=0.762868 (508.78 it/sec) -training >> step=2856600, episode=477 reward=0.7682775 (551.04 it/sec) -training >> step=2856700, episode=477 reward=0.7677053 (531.54 it/sec) -training >> step=2856800, episode=477 reward=0.7894087 (532.67 it/sec) -training >> step=2856900, episode=477 reward=0.7621068 (503.62 it/sec) -training >> step=2857000, episode=477 reward=0.7839767 (481.53 it/sec) -training >> step=2857100, episode=477 reward=0.7591316 (529.10 it/sec) -training >> step=2857200, episode=477 reward=0.755486 (535.44 it/sec) -training >> step=2857300, episode=477 reward=0.7718092 (546.08 it/sec) -training >> step=2857400, episode=477 reward=0.7639406 (548.81 it/sec) -training >> step=2857500, episode=477 reward=0.776274 (504.59 it/sec) -training >> step=2857600, episode=477 reward=0.7875423 (528.88 it/sec) -training >> step=2857700, episode=477 reward=0.7445133 (520.06 it/sec) -training >> step=2857800, episode=477 reward=0.7525444 (536.34 it/sec) -training >> step=2857900, episode=477 reward=0.7641048 (546.07 it/sec) -training >> step=2858000, episode=477 reward=0.7802064 (492.62 it/sec) -training >> step=2858100, episode=477 reward=0.791456 (494.52 it/sec) -training >> step=2858200, episode=477 reward=0.7688686 (532.30 it/sec) -training >> step=2858300, episode=477 reward=0.7987505 (561.67 it/sec) -training >> step=2858400, episode=477 reward=0.7678453 (544.13 it/sec) -training >> step=2858500, episode=477 reward=0.7681153 (480.54 it/sec) -training >> step=2858600, episode=477 reward=0.7796994 (459.92 it/sec) -training >> step=2858700, episode=477 reward=0.7800467 (483.75 it/sec) -training >> step=2858800, episode=477 reward=0.7678216 (545.85 it/sec) -training >> step=2858900, episode=477 reward=0.7734061 (522.95 it/sec) -training >> step=2859000, episode=477 reward=0.7585831 (506.02 it/sec) -training >> step=2859100, episode=477 reward=0.7658407 (516.83 it/sec) -training >> step=2859200, episode=477 reward=0.7801762 (485.50 it/sec) -training >> step=2859300, episode=477 reward=0.7607746 (536.33 it/sec) -training >> step=2859400, episode=477 reward=0.7746354 (561.46 it/sec) -training >> step=2859500, episode=477 reward=0.762492 (477.48 it/sec) -training >> step=2859600, episode=477 reward=0.7847144 (496.60 it/sec) -training >> step=2859700, episode=477 reward=0.7628717 (474.28 it/sec) -training >> step=2859800, episode=477 reward=0.7480469 (549.27 it/sec) -training >> step=2859900, episode=477 reward=0.7767762 (533.53 it/sec) -training >> step=2860000, episode=477 reward=0.7721132 (506.95 it/sec) -training >> step=2860100, episode=477 reward=0.7475449 (590.02 it/sec) -training >> step=2860200, episode=477 reward=0.7861245 (551.40 it/sec) -training >> step=2860300, episode=477 reward=0.7555907 (460.71 it/sec) -training >> step=2860400, episode=477 reward=0.7645421 (564.74 it/sec) -training >> step=2860500, episode=477 reward=0.7457407 (517.24 it/sec) -training >> step=2860600, episode=477 reward=0.760897 (508.97 it/sec) -training >> step=2860700, episode=477 reward=0.7591177 (553.50 it/sec) -training >> step=2860800, episode=477 reward=0.7737173 (536.97 it/sec) -training >> step=2860900, episode=477 reward=0.7645156 (464.24 it/sec) -training >> step=2861000, episode=477 reward=0.7802131 (491.89 it/sec) -training >> step=2861100, episode=477 reward=0.7620485 (550.97 it/sec) -training >> step=2861200, episode=477 reward=0.7590315 (550.95 it/sec) -training >> step=2861300, episode=478 reward=0.7707473 (162.92 it/sec) -training >> step=2861400, episode=478 reward=0.7689919 (546.79 it/sec) -training >> step=2861500, episode=478 reward=0.7473953 (563.12 it/sec) -training >> step=2861600, episode=478 reward=0.7664601 (377.02 it/sec) -training >> step=2861700, episode=478 reward=0.7471253 (509.16 it/sec) -training >> step=2861800, episode=478 reward=0.7560894 (527.26 it/sec) -training >> step=2861900, episode=478 reward=0.7596672 (527.69 it/sec) -training >> step=2862000, episode=478 reward=0.7745298 (491.82 it/sec) -training >> step=2862100, episode=478 reward=0.7674866 (507.00 it/sec) -training >> step=2862200, episode=478 reward=0.7392826 (519.84 it/sec) -training >> step=2862300, episode=478 reward=0.7757491 (553.18 it/sec) -training >> step=2862400, episode=478 reward=0.7775439 (524.82 it/sec) -training >> step=2862500, episode=478 reward=0.7687765 (524.80 it/sec) -training >> step=2862600, episode=478 reward=0.7546692 (553.23 it/sec) -training >> step=2862700, episode=478 reward=0.7746443 (473.03 it/sec) -training >> step=2862800, episode=478 reward=0.7750993 (524.36 it/sec) -training >> step=2862900, episode=478 reward=0.7608915 (510.90 it/sec) -training >> step=2863000, episode=478 reward=0.7645618 (566.40 it/sec) -training >> step=2863100, episode=478 reward=0.7674118 (558.85 it/sec) -training >> step=2863200, episode=478 reward=0.7702284 (516.05 it/sec) -training >> step=2863300, episode=478 reward=0.7703053 (533.39 it/sec) -training >> step=2863400, episode=478 reward=0.775232 (527.75 it/sec) -training >> step=2863500, episode=478 reward=0.7608301 (521.45 it/sec) -training >> step=2863600, episode=478 reward=0.7469149 (534.89 it/sec) -training >> step=2863700, episode=478 reward=0.7877745 (524.94 it/sec) -training >> step=2863800, episode=478 reward=0.7542714 (462.30 it/sec) -training >> step=2863900, episode=478 reward=0.7931122 (494.95 it/sec) -training >> step=2864000, episode=478 reward=0.7659409 (503.11 it/sec) -training >> step=2864100, episode=478 reward=0.7789462 (516.36 it/sec) -training >> step=2864200, episode=478 reward=0.7597039 (424.71 it/sec) -training >> step=2864300, episode=478 reward=0.7699625 (419.98 it/sec) -training >> step=2864400, episode=478 reward=0.7450619 (401.59 it/sec) -training >> step=2864500, episode=478 reward=0.789901 (453.37 it/sec) -training >> step=2864600, episode=478 reward=0.7596562 (518.98 it/sec) -training >> step=2864700, episode=478 reward=0.7524568 (535.33 it/sec) -training >> step=2864800, episode=478 reward=0.765385 (556.51 it/sec) -training >> step=2864900, episode=478 reward=0.7531959 (539.28 it/sec) -training >> step=2865000, episode=478 reward=0.7808109 (478.07 it/sec) -training >> step=2865100, episode=478 reward=0.7680551 (536.56 it/sec) -training >> step=2865200, episode=478 reward=0.7524817 (514.22 it/sec) -training >> step=2865300, episode=478 reward=0.7709411 (491.98 it/sec) -training >> step=2865400, episode=478 reward=0.7709615 (543.75 it/sec) -training >> step=2865500, episode=478 reward=0.7533506 (566.35 it/sec) -training >> step=2865600, episode=478 reward=0.7750374 (509.67 it/sec) -training >> step=2865700, episode=478 reward=0.7583697 (487.73 it/sec) -training >> step=2865800, episode=478 reward=0.7695857 (575.97 it/sec) -training >> step=2865900, episode=478 reward=0.7681898 (525.30 it/sec) -training >> step=2866000, episode=478 reward=0.756 (541.70 it/sec) -training >> step=2866100, episode=478 reward=0.7793788 (530.08 it/sec) -training >> step=2866200, episode=478 reward=0.7766895 (533.44 it/sec) -training >> step=2866300, episode=478 reward=0.7586628 (497.69 it/sec) -training >> step=2866400, episode=478 reward=0.7647337 (503.68 it/sec) -training >> step=2866500, episode=478 reward=0.7633283 (544.78 it/sec) -training >> step=2866600, episode=478 reward=0.767493 (514.61 it/sec) -training >> step=2866700, episode=478 reward=0.7755308 (516.39 it/sec) -training >> step=2866800, episode=478 reward=0.7737179 (486.44 it/sec) -training >> step=2866900, episode=478 reward=0.749709 (548.22 it/sec) -training >> step=2867000, episode=478 reward=0.7457048 (531.22 it/sec) -training >> step=2867100, episode=478 reward=0.7660991 (487.44 it/sec) -training >> step=2867200, episode=478 reward=0.73577 (551.37 it/sec) -training >> step=2867300, episode=479 reward=0.761223 (169.10 it/sec) -training >> step=2867400, episode=479 reward=0.7626074 (526.43 it/sec) -training >> step=2867500, episode=479 reward=0.7673287 (507.27 it/sec) -training >> step=2867600, episode=479 reward=0.758543 (567.32 it/sec) -training >> step=2867700, episode=479 reward=0.7574419 (540.76 it/sec) -training >> step=2867800, episode=479 reward=0.7577725 (526.49 it/sec) -training >> step=2867900, episode=479 reward=0.7882134 (386.99 it/sec) -training >> step=2868000, episode=479 reward=0.7551805 (579.88 it/sec) -training >> step=2868100, episode=479 reward=0.780395 (518.85 it/sec) -training >> step=2868200, episode=479 reward=0.7796926 (540.03 it/sec) -training >> step=2868300, episode=479 reward=0.7761744 (504.14 it/sec) -training >> step=2868400, episode=479 reward=0.7677326 (558.19 it/sec) -training >> step=2868500, episode=479 reward=0.7574437 (538.94 it/sec) -training >> step=2868600, episode=479 reward=0.7837252 (488.75 it/sec) -training >> step=2868700, episode=479 reward=0.7604446 (538.19 it/sec) -training >> step=2868800, episode=479 reward=0.7721496 (524.04 it/sec) -training >> step=2868900, episode=479 reward=0.7688507 (544.43 it/sec) -training >> step=2869000, episode=479 reward=0.7621292 (526.24 it/sec) -training >> step=2869100, episode=479 reward=0.7645515 (548.65 it/sec) -training >> step=2869200, episode=479 reward=0.7651707 (534.19 it/sec) -training >> step=2869300, episode=479 reward=0.7758104 (483.23 it/sec) -training >> step=2869400, episode=479 reward=0.7684157 (550.56 it/sec) -training >> step=2869500, episode=479 reward=0.7641477 (543.21 it/sec) -training >> step=2869600, episode=479 reward=0.7884424 (537.62 it/sec) -training >> step=2869700, episode=479 reward=0.7757989 (506.08 it/sec) -training >> step=2869800, episode=479 reward=0.7766092 (516.85 it/sec) -training >> step=2869900, episode=479 reward=0.7570629 (495.15 it/sec) -training >> step=2870000, episode=479 reward=0.7810705 (527.07 it/sec) -training >> step=2870100, episode=479 reward=0.7709072 (500.42 it/sec) -training >> step=2870200, episode=479 reward=0.8000728 (563.37 it/sec) -training >> step=2870300, episode=479 reward=0.759588 (527.42 it/sec) -training >> step=2870400, episode=479 reward=0.7637885 (521.12 it/sec) -training >> step=2870500, episode=479 reward=0.7583606 (542.16 it/sec) -training >> step=2870600, episode=479 reward=0.7776587 (517.06 it/sec) -training >> step=2870700, episode=479 reward=0.774177 (553.93 it/sec) -training >> step=2870800, episode=479 reward=0.7764329 (539.01 it/sec) -training >> step=2870900, episode=479 reward=0.7660968 (565.59 it/sec) -training >> step=2871000, episode=479 reward=0.7627528 (508.04 it/sec) -training >> step=2871100, episode=479 reward=0.7618904 (501.13 it/sec) -training >> step=2871200, episode=479 reward=0.7686208 (532.50 it/sec) -training >> step=2871300, episode=479 reward=0.7628796 (536.22 it/sec) -training >> step=2871400, episode=479 reward=0.7777099 (518.40 it/sec) -training >> step=2871500, episode=479 reward=0.784572 (542.61 it/sec) -training >> step=2871600, episode=479 reward=0.7624145 (493.01 it/sec) -training >> step=2871700, episode=479 reward=0.7807513 (512.78 it/sec) -training >> step=2871800, episode=479 reward=0.7632328 (527.94 it/sec) -training >> step=2871900, episode=479 reward=0.7783235 (550.89 it/sec) -training >> step=2872000, episode=479 reward=0.7663668 (566.33 it/sec) -training >> step=2872100, episode=479 reward=0.7718551 (514.11 it/sec) -training >> step=2872200, episode=479 reward=0.7473844 (481.18 it/sec) -training >> step=2872300, episode=479 reward=0.7711538 (552.97 it/sec) -training >> step=2872400, episode=479 reward=0.7646529 (526.95 it/sec) -training >> step=2872500, episode=479 reward=0.7528611 (506.52 it/sec) -training >> step=2872600, episode=479 reward=0.7723057 (512.44 it/sec) -training >> step=2872700, episode=479 reward=0.7588377 (556.69 it/sec) -training >> step=2872800, episode=479 reward=0.7765675 (494.74 it/sec) -training >> step=2872900, episode=479 reward=0.7511695 (499.30 it/sec) -training >> step=2873000, episode=479 reward=0.7879176 (515.98 it/sec) -training >> step=2873100, episode=479 reward=0.7688411 (566.33 it/sec) -training >> step=2873200, episode=479 reward=0.7651488 (514.33 it/sec) -training >> step=2873300, episode=480 reward=0.764055 (166.10 it/sec) -training >> step=2873400, episode=480 reward=0.7545205 (555.94 it/sec) -training >> step=2873500, episode=480 reward=0.7463601 (488.68 it/sec) -training >> step=2873600, episode=480 reward=0.7561749 (537.13 it/sec) -training >> step=2873700, episode=480 reward=0.7786347 (540.37 it/sec) -training >> step=2873800, episode=480 reward=0.7776105 (574.38 it/sec) -training >> step=2873900, episode=480 reward=0.7806844 (515.20 it/sec) -training >> step=2874000, episode=480 reward=0.7739009 (534.36 it/sec) -training >> step=2874100, episode=480 reward=0.7791405 (429.73 it/sec) -training >> step=2874200, episode=480 reward=0.7903042 (545.34 it/sec) -training >> step=2874300, episode=480 reward=0.7701994 (506.56 it/sec) -training >> step=2874400, episode=480 reward=0.7750171 (543.25 it/sec) -training >> step=2874500, episode=480 reward=0.7697102 (549.01 it/sec) -training >> step=2874600, episode=480 reward=0.7778124 (472.52 it/sec) -training >> step=2874700, episode=480 reward=0.7759332 (500.73 it/sec) -training >> step=2874800, episode=480 reward=0.7711801 (506.62 it/sec) -training >> step=2874900, episode=480 reward=0.781141 (515.87 it/sec) -training >> step=2875000, episode=480 reward=0.7572146 (539.25 it/sec) -training >> step=2875100, episode=480 reward=0.7785687 (495.82 it/sec) -training >> step=2875200, episode=480 reward=0.763992 (549.40 it/sec) -training >> step=2875300, episode=480 reward=0.7713124 (501.05 it/sec) -training >> step=2875400, episode=480 reward=0.7763051 (523.71 it/sec) -training >> step=2875500, episode=480 reward=0.7835178 (548.85 it/sec) -training >> step=2875600, episode=480 reward=0.7665286 (499.36 it/sec) -training >> step=2875700, episode=480 reward=0.7944597 (446.66 it/sec) -training >> step=2875800, episode=480 reward=0.7580527 (488.27 it/sec) -training >> step=2875900, episode=480 reward=0.7724475 (510.30 it/sec) -training >> step=2876000, episode=480 reward=0.7789293 (501.70 it/sec) -training >> step=2876100, episode=480 reward=0.7738084 (549.89 it/sec) -training >> step=2876200, episode=480 reward=0.7668589 (488.73 it/sec) -training >> step=2876300, episode=480 reward=0.7812579 (546.81 it/sec) -training >> step=2876400, episode=480 reward=0.7704571 (527.50 it/sec) -training >> step=2876500, episode=480 reward=0.7675071 (493.71 it/sec) -training >> step=2876600, episode=480 reward=0.787746 (526.93 it/sec) -training >> step=2876700, episode=480 reward=0.7758225 (503.79 it/sec) -training >> step=2876800, episode=480 reward=0.769111 (524.60 it/sec) -training >> step=2876900, episode=480 reward=0.7679657 (531.51 it/sec) -training >> step=2877000, episode=480 reward=0.7575307 (512.61 it/sec) -training >> step=2877100, episode=480 reward=0.7640098 (474.30 it/sec) -training >> step=2877200, episode=480 reward=0.7745195 (499.36 it/sec) -training >> step=2877300, episode=480 reward=0.7563052 (530.82 it/sec) -training >> step=2877400, episode=480 reward=0.7534458 (559.70 it/sec) -training >> step=2877500, episode=480 reward=0.7803205 (488.39 it/sec) -training >> step=2877600, episode=480 reward=0.7704377 (552.05 it/sec) -training >> step=2877700, episode=480 reward=0.7560413 (522.22 it/sec) -training >> step=2877800, episode=480 reward=0.7588635 (474.08 it/sec) -training >> step=2877900, episode=480 reward=0.7804368 (543.63 it/sec) -training >> step=2878000, episode=480 reward=0.7518076 (510.50 it/sec) -training >> step=2878100, episode=480 reward=0.7591195 (552.66 it/sec) -training >> step=2878200, episode=480 reward=0.7682106 (553.98 it/sec) -training >> step=2878300, episode=480 reward=0.7729458 (494.79 it/sec) -training >> step=2878400, episode=480 reward=0.7659926 (531.21 it/sec) -training >> step=2878500, episode=480 reward=0.7570133 (533.04 it/sec) -training >> step=2878600, episode=480 reward=0.7518767 (524.87 it/sec) -training >> step=2878700, episode=480 reward=0.7523729 (539.93 it/sec) -training >> step=2878800, episode=480 reward=0.7554241 (491.55 it/sec) -training >> step=2878900, episode=480 reward=0.7672451 (464.80 it/sec) -training >> step=2879000, episode=480 reward=0.7570323 (468.30 it/sec) -training >> step=2879100, episode=480 reward=0.7553235 (485.67 it/sec) -training >> step=2879200, episode=480 reward=0.7804376 (467.19 it/sec) -training >> step=2879300, episode=481 reward=0.7649775 (98.57 it/sec) -training >> step=2879400, episode=481 reward=0.7674925 (473.37 it/sec) -training >> step=2879500, episode=481 reward=0.765123 (485.54 it/sec) -training >> step=2879600, episode=481 reward=0.7309914 (485.90 it/sec) -training >> step=2879700, episode=481 reward=0.7463439 (536.49 it/sec) -training >> step=2879800, episode=481 reward=0.7563572 (544.33 it/sec) -training >> step=2879900, episode=481 reward=0.7900853 (526.59 it/sec) -training >> step=2880000, episode=481 reward=0.770844 (502.41 it/sec) -training >> step=2880100, episode=481 reward=0.7641402 (535.29 it/sec) -training >> step=2880200, episode=481 reward=0.7821777 (376.93 it/sec) -training >> step=2880300, episode=481 reward=0.7771651 (535.22 it/sec) -training >> step=2880400, episode=481 reward=0.7712455 (493.70 it/sec) -training >> step=2880500, episode=481 reward=0.7554607 (498.39 it/sec) -training >> step=2880600, episode=481 reward=0.777815 (517.28 it/sec) -training >> step=2880700, episode=481 reward=0.7756461 (499.40 it/sec) -training >> step=2880800, episode=481 reward=0.7728277 (540.17 it/sec) -training >> step=2880900, episode=481 reward=0.7612048 (515.54 it/sec) -training >> step=2881000, episode=481 reward=0.7686576 (497.49 it/sec) -training >> step=2881100, episode=481 reward=0.7486633 (509.20 it/sec) -training >> step=2881200, episode=481 reward=0.7629244 (537.46 it/sec) -training >> step=2881300, episode=481 reward=0.7573711 (512.79 it/sec) -training >> step=2881400, episode=481 reward=0.7618636 (555.38 it/sec) -training >> step=2881500, episode=481 reward=0.767135 (502.88 it/sec) -training >> step=2881600, episode=481 reward=0.760762 (513.59 it/sec) -training >> step=2881700, episode=481 reward=0.7834048 (524.31 it/sec) -training >> step=2881800, episode=481 reward=0.7788205 (432.22 it/sec) -training >> step=2881900, episode=481 reward=0.7768941 (537.57 it/sec) -training >> step=2882000, episode=481 reward=0.7628043 (519.89 it/sec) -training >> step=2882100, episode=481 reward=0.8111683 (544.86 it/sec) -training >> step=2882200, episode=481 reward=0.7690569 (524.64 it/sec) -training >> step=2882300, episode=481 reward=0.8062518 (541.77 it/sec) -training >> step=2882400, episode=481 reward=0.7937513 (538.09 it/sec) -training >> step=2882500, episode=481 reward=0.7672892 (518.94 it/sec) -training >> step=2882600, episode=481 reward=0.7495865 (518.29 it/sec) -training >> step=2882700, episode=481 reward=0.7806569 (526.14 it/sec) -training >> step=2882800, episode=481 reward=0.7867324 (530.23 it/sec) -training >> step=2882900, episode=481 reward=0.7544179 (531.62 it/sec) -training >> step=2883000, episode=481 reward=0.7731118 (591.13 it/sec) -training >> step=2883100, episode=481 reward=0.743907 (490.32 it/sec) -training >> step=2883200, episode=481 reward=0.7895841 (508.58 it/sec) -training >> step=2883300, episode=481 reward=0.7721782 (513.22 it/sec) -training >> step=2883400, episode=481 reward=0.7913312 (537.07 it/sec) -training >> step=2883500, episode=481 reward=0.757188 (544.11 it/sec) -training >> step=2883600, episode=481 reward=0.7749856 (504.07 it/sec) -training >> step=2883700, episode=481 reward=0.7584315 (517.10 it/sec) -training >> step=2883800, episode=481 reward=0.7816112 (509.90 it/sec) -training >> step=2883900, episode=481 reward=0.7658596 (539.52 it/sec) -training >> step=2884000, episode=481 reward=0.7763862 (509.92 it/sec) -training >> step=2884100, episode=481 reward=0.786205 (576.57 it/sec) -training >> step=2884200, episode=481 reward=0.7944915 (497.86 it/sec) -training >> step=2884300, episode=481 reward=0.7573564 (499.21 it/sec) -training >> step=2884400, episode=481 reward=0.7781562 (524.55 it/sec) -training >> step=2884500, episode=481 reward=0.7671968 (523.84 it/sec) -training >> step=2884600, episode=481 reward=0.7561086 (506.43 it/sec) -training >> step=2884700, episode=481 reward=0.7794409 (464.09 it/sec) -training >> step=2884800, episode=481 reward=0.7301033 (521.04 it/sec) -training >> step=2884900, episode=481 reward=0.7592152 (492.75 it/sec) -training >> step=2885000, episode=481 reward=0.7672701 (488.54 it/sec) -training >> step=2885100, episode=481 reward=0.7470144 (521.98 it/sec) -training >> step=2885200, episode=481 reward=0.7589313 (523.62 it/sec) -training >> step=2885300, episode=482 reward=0.7833458 (141.30 it/sec) -training >> step=2885400, episode=482 reward=0.7672065 (517.07 it/sec) -training >> step=2885500, episode=482 reward=0.7737943 (511.96 it/sec) -training >> step=2885600, episode=482 reward=0.7550538 (484.94 it/sec) -training >> step=2885700, episode=482 reward=0.7525565 (458.94 it/sec) -training >> step=2885800, episode=482 reward=0.7480732 (482.58 it/sec) -training >> step=2885900, episode=482 reward=0.7479059 (584.41 it/sec) -training >> step=2886000, episode=482 reward=0.7622637 (484.38 it/sec) -training >> step=2886100, episode=482 reward=0.7787522 (525.61 it/sec) -training >> step=2886200, episode=482 reward=0.7789747 (548.49 it/sec) -training >> step=2886300, episode=482 reward=0.7659881 (397.84 it/sec) -training >> step=2886400, episode=482 reward=0.7597378 (555.28 it/sec) -training >> step=2886500, episode=482 reward=0.7727412 (526.22 it/sec) -training >> step=2886600, episode=482 reward=0.7641402 (501.39 it/sec) -training >> step=2886700, episode=482 reward=0.7733452 (482.31 it/sec) -training >> step=2886800, episode=482 reward=0.7840582 (513.65 it/sec) -training >> step=2886900, episode=482 reward=0.7688977 (540.62 it/sec) -training >> step=2887000, episode=482 reward=0.7638323 (533.74 it/sec) -training >> step=2887100, episode=482 reward=0.7561988 (498.28 it/sec) -training >> step=2887200, episode=482 reward=0.7921965 (487.79 it/sec) -training >> step=2887300, episode=482 reward=0.772466 (546.03 it/sec) -training >> step=2887400, episode=482 reward=0.7629936 (562.43 it/sec) -training >> step=2887500, episode=482 reward=0.7909302 (521.19 it/sec) -training >> step=2887600, episode=482 reward=0.747857 (506.73 it/sec) -training >> step=2887700, episode=482 reward=0.7907267 (575.77 it/sec) -training >> step=2887800, episode=482 reward=0.7958108 (504.48 it/sec) -training >> step=2887900, episode=482 reward=0.7728115 (531.25 it/sec) -training >> step=2888000, episode=482 reward=0.77817 (549.90 it/sec) -training >> step=2888100, episode=482 reward=0.7707194 (531.56 it/sec) -training >> step=2888200, episode=482 reward=0.7606483 (504.30 it/sec) -training >> step=2888300, episode=482 reward=0.7635328 (511.54 it/sec) -training >> step=2888400, episode=482 reward=0.7667157 (524.57 it/sec) -training >> step=2888500, episode=482 reward=0.7754567 (551.54 it/sec) -training >> step=2888600, episode=482 reward=0.765328 (552.46 it/sec) -training >> step=2888700, episode=482 reward=0.7594398 (518.00 it/sec) -training >> step=2888800, episode=482 reward=0.7469373 (550.23 it/sec) -training >> step=2888900, episode=482 reward=0.7670162 (524.66 it/sec) -training >> step=2889000, episode=482 reward=0.7807971 (534.99 it/sec) -training >> step=2889100, episode=482 reward=0.7727034 (540.43 it/sec) -training >> step=2889200, episode=482 reward=0.7658224 (525.28 it/sec) -training >> step=2889300, episode=482 reward=0.7668637 (517.37 it/sec) -training >> step=2889400, episode=482 reward=0.7842741 (512.52 it/sec) -training >> step=2889500, episode=482 reward=0.7586272 (542.17 it/sec) -training >> step=2889600, episode=482 reward=0.7703307 (543.73 it/sec) -training >> step=2889700, episode=482 reward=0.7689822 (541.79 it/sec) -training >> step=2889800, episode=482 reward=0.7550384 (493.38 it/sec) -training >> step=2889900, episode=482 reward=0.7396056 (570.77 it/sec) -training >> step=2890000, episode=482 reward=0.7719149 (532.55 it/sec) -training >> step=2890100, episode=482 reward=0.7812661 (515.26 it/sec) -training >> step=2890200, episode=482 reward=0.7602518 (535.92 it/sec) -training >> step=2890300, episode=482 reward=0.7586328 (510.50 it/sec) -training >> step=2890400, episode=482 reward=0.7694671 (500.44 it/sec) -training >> step=2890500, episode=482 reward=0.7650197 (487.70 it/sec) -training >> step=2890600, episode=482 reward=0.7558399 (542.23 it/sec) -training >> step=2890700, episode=482 reward=0.7582089 (482.49 it/sec) -training >> step=2890800, episode=482 reward=0.761711 (510.10 it/sec) -training >> step=2890900, episode=482 reward=0.7763296 (484.45 it/sec) -training >> step=2891000, episode=482 reward=0.7445176 (507.43 it/sec) -training >> step=2891100, episode=482 reward=0.7746931 (504.97 it/sec) -training >> step=2891200, episode=482 reward=0.767232 (529.49 it/sec) -training >> step=2891300, episode=483 reward=0.7548658 (132.40 it/sec) -training >> step=2891400, episode=483 reward=0.7460129 (497.38 it/sec) -training >> step=2891500, episode=483 reward=0.7665933 (502.46 it/sec) -training >> step=2891600, episode=483 reward=0.7685074 (487.70 it/sec) -training >> step=2891700, episode=483 reward=0.767875 (505.71 it/sec) -training >> step=2891800, episode=483 reward=0.7597291 (495.02 it/sec) -training >> step=2891900, episode=483 reward=0.7795007 (481.17 it/sec) -training >> step=2892000, episode=483 reward=0.7590328 (492.98 it/sec) -training >> step=2892100, episode=483 reward=0.7576372 (506.41 it/sec) -training >> step=2892200, episode=483 reward=0.7813842 (499.29 it/sec) -training >> step=2892300, episode=483 reward=0.7890627 (552.27 it/sec) -training >> step=2892400, episode=483 reward=0.7801762 (386.33 it/sec) -training >> step=2892500, episode=483 reward=0.7761605 (546.01 it/sec) -training >> step=2892600, episode=483 reward=0.7644737 (512.43 it/sec) -training >> step=2892700, episode=483 reward=0.7870338 (539.32 it/sec) -training >> step=2892800, episode=483 reward=0.752165 (529.89 it/sec) -training >> step=2892900, episode=483 reward=0.7752794 (527.75 it/sec) -training >> step=2893000, episode=483 reward=0.7765564 (527.87 it/sec) -training >> step=2893100, episode=483 reward=0.773879 (554.61 it/sec) -training >> step=2893200, episode=483 reward=0.7753261 (462.30 it/sec) -training >> step=2893300, episode=483 reward=0.7746975 (499.16 it/sec) -training >> step=2893400, episode=483 reward=0.7866754 (492.14 it/sec) -training >> step=2893500, episode=483 reward=0.7773292 (512.58 it/sec) -training >> step=2893600, episode=483 reward=0.7355849 (463.39 it/sec) -training >> step=2893700, episode=483 reward=0.7847307 (425.82 it/sec) -training >> step=2893800, episode=483 reward=0.7569423 (455.22 it/sec) -training >> step=2893900, episode=483 reward=0.773425 (516.10 it/sec) -training >> step=2894000, episode=483 reward=0.7720997 (502.15 it/sec) -training >> step=2894100, episode=483 reward=0.7711039 (490.08 it/sec) -training >> step=2894200, episode=483 reward=0.7914378 (523.45 it/sec) -training >> step=2894300, episode=483 reward=0.760013 (541.69 it/sec) -training >> step=2894400, episode=483 reward=0.7716085 (507.25 it/sec) -training >> step=2894500, episode=483 reward=0.7688867 (549.52 it/sec) -training >> step=2894600, episode=483 reward=0.7929664 (506.80 it/sec) -training >> step=2894700, episode=483 reward=0.7769464 (513.47 it/sec) -training >> step=2894800, episode=483 reward=0.7527801 (537.08 it/sec) -training >> step=2894900, episode=483 reward=0.7685863 (541.04 it/sec) -training >> step=2895000, episode=483 reward=0.7246459 (521.30 it/sec) -training >> step=2895100, episode=483 reward=0.7667144 (514.22 it/sec) -training >> step=2895200, episode=483 reward=0.7680272 (524.11 it/sec) -training >> step=2895300, episode=483 reward=0.7907853 (554.73 it/sec) -training >> step=2895400, episode=483 reward=0.7679064 (568.54 it/sec) -training >> step=2895500, episode=483 reward=0.7574501 (535.08 it/sec) -training >> step=2895600, episode=483 reward=0.7810375 (560.02 it/sec) -training >> step=2895700, episode=483 reward=0.7688792 (543.49 it/sec) -training >> step=2895800, episode=483 reward=0.7830816 (537.27 it/sec) -training >> step=2895900, episode=483 reward=0.782655 (551.37 it/sec) -training >> step=2896000, episode=483 reward=0.7761606 (581.14 it/sec) -training >> step=2896100, episode=483 reward=0.7869601 (509.62 it/sec) -training >> step=2896200, episode=483 reward=0.7775995 (553.43 it/sec) -training >> step=2896300, episode=483 reward=0.7934357 (537.30 it/sec) -training >> step=2896400, episode=483 reward=0.7768384 (569.96 it/sec) -training >> step=2896500, episode=483 reward=0.7778377 (565.65 it/sec) -training >> step=2896600, episode=483 reward=0.7596698 (542.52 it/sec) -training >> step=2896700, episode=483 reward=0.7592199 (547.94 it/sec) -training >> step=2896800, episode=483 reward=0.7710232 (541.70 it/sec) -training >> step=2896900, episode=483 reward=0.7540153 (513.90 it/sec) -training >> step=2897000, episode=483 reward=0.7459385 (516.83 it/sec) -training >> step=2897100, episode=483 reward=0.7561479 (583.93 it/sec) -training >> step=2897200, episode=483 reward=0.7635258 (522.27 it/sec) -training >> step=2897300, episode=484 reward=0.7691243 (151.87 it/sec) -training >> step=2897400, episode=484 reward=0.7703125 (536.53 it/sec) -training >> step=2897500, episode=484 reward=0.7320555 (551.93 it/sec) -training >> step=2897600, episode=484 reward=0.768146 (537.17 it/sec) -training >> step=2897700, episode=484 reward=0.7603863 (534.22 it/sec) -training >> step=2897800, episode=484 reward=0.7576071 (553.27 it/sec) -training >> step=2897900, episode=484 reward=0.7931055 (525.46 it/sec) -training >> step=2898000, episode=484 reward=0.7868499 (530.82 it/sec) -training >> step=2898100, episode=484 reward=0.7615696 (533.64 it/sec) -training >> step=2898200, episode=484 reward=0.7518722 (564.10 it/sec) -training >> step=2898300, episode=484 reward=0.7742454 (523.86 it/sec) -training >> step=2898400, episode=484 reward=0.7787904 (509.49 it/sec) -training >> step=2898500, episode=484 reward=0.7700585 (562.56 it/sec) -training >> step=2898600, episode=484 reward=0.7584669 (398.06 it/sec) -training >> step=2898700, episode=484 reward=0.7928896 (509.13 it/sec) -training >> step=2898800, episode=484 reward=0.7518317 (557.45 it/sec) -training >> step=2898900, episode=484 reward=0.7761324 (530.14 it/sec) -training >> step=2899000, episode=484 reward=0.7643764 (520.13 it/sec) -training >> step=2899100, episode=484 reward=0.7660146 (526.94 it/sec) -training >> step=2899200, episode=484 reward=0.7685223 (517.06 it/sec) -training >> step=2899300, episode=484 reward=0.755547 (538.88 it/sec) -training >> step=2899400, episode=484 reward=0.7725208 (541.17 it/sec) -training >> step=2899500, episode=484 reward=0.7846913 (548.12 it/sec) -training >> step=2899600, episode=484 reward=0.7665814 (558.27 it/sec) -training >> step=2899700, episode=484 reward=0.7651155 (512.54 it/sec) -training >> step=2899800, episode=484 reward=0.7553282 (547.96 it/sec) -training >> step=2899900, episode=484 reward=0.7764219 (543.44 it/sec) -training >> step=2900000, episode=484 reward=0.7625233 (548.55 it/sec) -training >> step=2900100, episode=484 reward=0.7670429 (505.06 it/sec) -training >> step=2900200, episode=484 reward=0.7660307 (542.90 it/sec) -training >> step=2900300, episode=484 reward=0.7397342 (505.10 it/sec) -training >> step=2900400, episode=484 reward=0.7729496 (517.11 it/sec) -training >> step=2900500, episode=484 reward=0.7690369 (502.53 it/sec) -training >> step=2900600, episode=484 reward=0.7809661 (522.59 it/sec) -training >> step=2900700, episode=484 reward=0.7729136 (553.16 it/sec) -training >> step=2900800, episode=484 reward=0.7878152 (511.90 it/sec) -training >> step=2900900, episode=484 reward=0.7729424 (524.93 it/sec) -training >> step=2901000, episode=484 reward=0.7662805 (522.35 it/sec) -training >> step=2901100, episode=484 reward=0.7664945 (543.65 it/sec) -training >> step=2901200, episode=484 reward=0.7679885 (516.21 it/sec) -training >> step=2901300, episode=484 reward=0.7755653 (546.96 it/sec) -training >> step=2901400, episode=484 reward=0.75993 (533.67 it/sec) -training >> step=2901500, episode=484 reward=0.7573195 (565.01 it/sec) -training >> step=2901600, episode=484 reward=0.7884096 (518.87 it/sec) -training >> step=2901700, episode=484 reward=0.7667172 (534.66 it/sec) -training >> step=2901800, episode=484 reward=0.7834694 (553.45 it/sec) -training >> step=2901900, episode=484 reward=0.7846954 (523.26 it/sec) -training >> step=2902000, episode=484 reward=0.7529823 (530.04 it/sec) -training >> step=2902100, episode=484 reward=0.7758788 (528.07 it/sec) -training >> step=2902200, episode=484 reward=0.7652779 (494.63 it/sec) -training >> step=2902300, episode=484 reward=0.7662424 (530.61 it/sec) -training >> step=2902400, episode=484 reward=0.7618962 (529.29 it/sec) -training >> step=2902500, episode=484 reward=0.7606456 (501.53 it/sec) -training >> step=2902600, episode=484 reward=0.7698133 (559.25 it/sec) -training >> step=2902700, episode=484 reward=0.773578 (537.41 it/sec) -training >> step=2902800, episode=484 reward=0.7624763 (541.88 it/sec) -training >> step=2902900, episode=484 reward=0.7828816 (527.50 it/sec) -training >> step=2903000, episode=484 reward=0.7558175 (511.26 it/sec) -training >> step=2903100, episode=484 reward=0.7566439 (513.18 it/sec) -training >> step=2903200, episode=484 reward=0.7788973 (472.54 it/sec) -training >> step=2903300, episode=485 reward=0.7465564 (147.89 it/sec) -training >> step=2903400, episode=485 reward=0.7696622 (538.96 it/sec) -training >> step=2903500, episode=485 reward=0.7624017 (489.39 it/sec) -training >> step=2903600, episode=485 reward=0.783193 (574.24 it/sec) -training >> step=2903700, episode=485 reward=0.7833756 (546.56 it/sec) -training >> step=2903800, episode=485 reward=0.7806476 (532.08 it/sec) -training >> step=2903900, episode=485 reward=0.774338 (533.40 it/sec) -training >> step=2904000, episode=485 reward=0.7983153 (555.56 it/sec) -training >> step=2904100, episode=485 reward=0.7882909 (537.17 it/sec) -training >> step=2904200, episode=485 reward=0.7659341 (551.64 it/sec) -training >> step=2904300, episode=485 reward=0.763225 (518.75 it/sec) -training >> step=2904400, episode=485 reward=0.767406 (558.44 it/sec) -training >> step=2904500, episode=485 reward=0.7757492 (501.68 it/sec) -training >> step=2904600, episode=485 reward=0.7845835 (512.10 it/sec) -training >> step=2904700, episode=485 reward=0.7549251 (568.03 it/sec) -training >> step=2904800, episode=485 reward=0.7684651 (395.42 it/sec) -training >> step=2904900, episode=485 reward=0.761815 (539.61 it/sec) -training >> step=2905000, episode=485 reward=0.7785385 (516.51 it/sec) -training >> step=2905100, episode=485 reward=0.7221795 (548.39 it/sec) -training >> step=2905200, episode=485 reward=0.7782274 (509.46 it/sec) -training >> step=2905300, episode=485 reward=0.765981 (525.23 it/sec) -training >> step=2905400, episode=485 reward=0.7856659 (547.50 it/sec) -training >> step=2905500, episode=485 reward=0.7819394 (558.70 it/sec) -training >> step=2905600, episode=485 reward=0.7611894 (512.54 it/sec) -training >> step=2905700, episode=485 reward=0.7767794 (544.23 it/sec) -training >> step=2905800, episode=485 reward=0.7907696 (566.41 it/sec) -training >> step=2905900, episode=485 reward=0.7702478 (529.17 it/sec) -training >> step=2906000, episode=485 reward=0.7735884 (526.69 it/sec) -training >> step=2906100, episode=485 reward=0.764854 (518.08 it/sec) -training >> step=2906200, episode=485 reward=0.7846745 (552.14 it/sec) -training >> step=2906300, episode=485 reward=0.790708 (527.54 it/sec) -training >> step=2906400, episode=485 reward=0.7649013 (515.63 it/sec) -training >> step=2906500, episode=485 reward=0.7757139 (569.43 it/sec) -training >> step=2906600, episode=485 reward=0.7547059 (548.03 it/sec) -training >> step=2906700, episode=485 reward=0.7633154 (536.94 it/sec) -training >> step=2906800, episode=485 reward=0.7714986 (546.48 it/sec) -training >> step=2906900, episode=485 reward=0.7775687 (566.08 it/sec) -training >> step=2907000, episode=485 reward=0.760949 (513.85 it/sec) -training >> step=2907100, episode=485 reward=0.7812864 (539.70 it/sec) -training >> step=2907200, episode=485 reward=0.786319 (543.95 it/sec) -training >> step=2907300, episode=485 reward=0.7727736 (547.86 it/sec) -training >> step=2907400, episode=485 reward=0.776117 (537.11 it/sec) -training >> step=2907500, episode=485 reward=0.7621679 (507.88 it/sec) -training >> step=2907600, episode=485 reward=0.76493 (585.35 it/sec) -training >> step=2907700, episode=485 reward=0.7857639 (547.28 it/sec) -training >> step=2907800, episode=485 reward=0.7657539 (549.24 it/sec) -training >> step=2907900, episode=485 reward=0.7646981 (532.33 it/sec) -training >> step=2908000, episode=485 reward=0.7656395 (548.59 it/sec) -training >> step=2908100, episode=485 reward=0.7710164 (542.97 it/sec) -training >> step=2908200, episode=485 reward=0.7769828 (522.23 it/sec) -training >> step=2908300, episode=485 reward=0.7706876 (570.26 it/sec) -training >> step=2908400, episode=485 reward=0.7691785 (517.94 it/sec) -training >> step=2908500, episode=485 reward=0.7747844 (530.70 it/sec) -training >> step=2908600, episode=485 reward=0.7514334 (511.61 it/sec) -training >> step=2908700, episode=485 reward=0.7596712 (547.47 it/sec) -training >> step=2908800, episode=485 reward=0.7801594 (519.82 it/sec) -training >> step=2908900, episode=485 reward=0.7442956 (454.85 it/sec) -training >> step=2909000, episode=485 reward=0.7559959 (437.62 it/sec) -training >> step=2909100, episode=485 reward=0.7538652 (470.93 it/sec) -training >> step=2909200, episode=485 reward=0.7482578 (471.77 it/sec) -training >> step=2909300, episode=486 reward=0.7737056 (124.23 it/sec) -training >> step=2909400, episode=486 reward=0.7677736 (552.07 it/sec) -training >> step=2909500, episode=486 reward=0.7726802 (536.13 it/sec) -training >> step=2909600, episode=486 reward=0.757055 (512.03 it/sec) -training >> step=2909700, episode=486 reward=0.7640265 (508.27 it/sec) -training >> step=2909800, episode=486 reward=0.7798862 (534.47 it/sec) -training >> step=2909900, episode=486 reward=0.7799563 (553.72 it/sec) -training >> step=2910000, episode=486 reward=0.7607481 (502.76 it/sec) -training >> step=2910100, episode=486 reward=0.792076 (556.80 it/sec) -training >> step=2910200, episode=486 reward=0.7879727 (553.45 it/sec) -training >> step=2910300, episode=486 reward=0.7553298 (544.41 it/sec) -training >> step=2910400, episode=486 reward=0.7891928 (530.84 it/sec) -training >> step=2910500, episode=486 reward=0.7812443 (516.64 it/sec) -training >> step=2910600, episode=486 reward=0.7687603 (526.83 it/sec) -training >> step=2910700, episode=486 reward=0.7708735 (533.49 it/sec) -training >> step=2910800, episode=486 reward=0.7777834 (511.82 it/sec) -training >> step=2910900, episode=486 reward=0.7742361 (393.45 it/sec) -training >> step=2911000, episode=486 reward=0.7888674 (552.45 it/sec) -training >> step=2911100, episode=486 reward=0.7823504 (547.11 it/sec) -training >> step=2911200, episode=486 reward=0.772202 (541.71 it/sec) -training >> step=2911300, episode=486 reward=0.7630383 (506.75 it/sec) -training >> step=2911400, episode=486 reward=0.769622 (543.23 it/sec) -training >> step=2911500, episode=486 reward=0.7617835 (526.72 it/sec) -training >> step=2911600, episode=486 reward=0.7627201 (548.23 it/sec) -training >> step=2911700, episode=486 reward=0.7734193 (542.77 it/sec) -training >> step=2911800, episode=486 reward=0.7716365 (504.21 it/sec) -training >> step=2911900, episode=486 reward=0.7889193 (527.26 it/sec) -training >> step=2912000, episode=486 reward=0.7524951 (538.85 it/sec) -training >> step=2912100, episode=486 reward=0.7735392 (547.91 it/sec) -training >> step=2912200, episode=486 reward=0.7709407 (543.17 it/sec) -training >> step=2912300, episode=486 reward=0.791602 (507.93 it/sec) -training >> step=2912400, episode=486 reward=0.7693135 (528.90 it/sec) -training >> step=2912500, episode=486 reward=0.770108 (531.18 it/sec) -training >> step=2912600, episode=486 reward=0.7845256 (545.70 it/sec) -training >> step=2912700, episode=486 reward=0.7565657 (539.92 it/sec) -training >> step=2912800, episode=486 reward=0.783717 (557.90 it/sec) -training >> step=2912900, episode=486 reward=0.7850307 (531.99 it/sec) -training >> step=2913000, episode=486 reward=0.772007 (518.21 it/sec) -training >> step=2913100, episode=486 reward=0.7561148 (556.92 it/sec) -training >> step=2913200, episode=486 reward=0.7699262 (549.37 it/sec) -training >> step=2913300, episode=486 reward=0.765173 (521.56 it/sec) -training >> step=2913400, episode=486 reward=0.7751116 (515.31 it/sec) -training >> step=2913500, episode=486 reward=0.7654607 (521.81 it/sec) -training >> step=2913600, episode=486 reward=0.7721879 (541.11 it/sec) -training >> step=2913700, episode=486 reward=0.7838274 (534.82 it/sec) -training >> step=2913800, episode=486 reward=0.7743006 (536.21 it/sec) -training >> step=2913900, episode=486 reward=0.7578139 (560.77 it/sec) -training >> step=2914000, episode=486 reward=0.777463 (512.05 it/sec) -training >> step=2914100, episode=486 reward=0.7701957 (535.08 it/sec) -training >> step=2914200, episode=486 reward=0.7723289 (556.54 it/sec) -training >> step=2914300, episode=486 reward=0.7410312 (532.45 it/sec) -training >> step=2914400, episode=486 reward=0.7595341 (560.81 it/sec) -training >> step=2914500, episode=486 reward=0.7547395 (528.07 it/sec) -training >> step=2914600, episode=486 reward=0.748393 (539.03 it/sec) -training >> step=2914700, episode=486 reward=0.7630516 (544.85 it/sec) -training >> step=2914800, episode=486 reward=0.7649176 (526.32 it/sec) -training >> step=2914900, episode=486 reward=0.7704767 (548.29 it/sec) -training >> step=2915000, episode=486 reward=0.7776225 (560.46 it/sec) -training >> step=2915100, episode=486 reward=0.780058 (512.31 it/sec) -training >> step=2915200, episode=486 reward=0.7647594 (498.48 it/sec) -training >> step=2915300, episode=487 reward=0.781166 (156.67 it/sec) -training >> step=2915400, episode=487 reward=0.7661902 (504.62 it/sec) -training >> step=2915500, episode=487 reward=0.7540365 (539.52 it/sec) -training >> step=2915600, episode=487 reward=0.760268 (540.69 it/sec) -training >> step=2915700, episode=487 reward=0.7779096 (554.59 it/sec) -training >> step=2915800, episode=487 reward=0.773376 (531.70 it/sec) -training >> step=2915900, episode=487 reward=0.7708018 (522.81 it/sec) -training >> step=2916000, episode=487 reward=0.7720683 (524.95 it/sec) -training >> step=2916100, episode=487 reward=0.7635984 (539.66 it/sec) -training >> step=2916200, episode=487 reward=0.7775682 (522.10 it/sec) -training >> step=2916300, episode=487 reward=0.7661193 (526.79 it/sec) -training >> step=2916400, episode=487 reward=0.7911029 (572.00 it/sec) -training >> step=2916500, episode=487 reward=0.7701506 (507.72 it/sec) -training >> step=2916600, episode=487 reward=0.780519 (533.18 it/sec) -training >> step=2916700, episode=487 reward=0.7588804 (558.41 it/sec) -training >> step=2916800, episode=487 reward=0.76091 (553.35 it/sec) -training >> step=2916900, episode=487 reward=0.770536 (555.35 it/sec) -training >> step=2917000, episode=487 reward=0.7770256 (530.90 it/sec) -training >> step=2917100, episode=487 reward=0.7790923 (391.09 it/sec) -training >> step=2917200, episode=487 reward=0.7671478 (537.96 it/sec) -training >> step=2917300, episode=487 reward=0.7744359 (535.54 it/sec) -training >> step=2917400, episode=487 reward=0.7753592 (565.26 it/sec) -training >> step=2917500, episode=487 reward=0.7837625 (601.07 it/sec) -training >> step=2917600, episode=487 reward=0.7691026 (543.07 it/sec) -training >> step=2917700, episode=487 reward=0.7840143 (561.31 it/sec) -training >> step=2917800, episode=487 reward=0.7673526 (565.19 it/sec) -training >> step=2917900, episode=487 reward=0.778114 (564.56 it/sec) -training >> step=2918000, episode=487 reward=0.7650861 (553.00 it/sec) -training >> step=2918100, episode=487 reward=0.7466456 (518.15 it/sec) -training >> step=2918200, episode=487 reward=0.7494044 (573.90 it/sec) -training >> step=2918300, episode=487 reward=0.7695676 (492.95 it/sec) -training >> step=2918400, episode=487 reward=0.7665444 (555.44 it/sec) -training >> step=2918500, episode=487 reward=0.7416306 (557.03 it/sec) -training >> step=2918600, episode=487 reward=0.7866994 (548.88 it/sec) -training >> step=2918700, episode=487 reward=0.7626049 (546.87 it/sec) -training >> step=2918800, episode=487 reward=0.7709078 (542.61 it/sec) -training >> step=2918900, episode=487 reward=0.777479 (571.76 it/sec) -training >> step=2919000, episode=487 reward=0.7663171 (528.25 it/sec) -training >> step=2919100, episode=487 reward=0.7704793 (567.39 it/sec) -training >> step=2919200, episode=487 reward=0.758375 (527.58 it/sec) -training >> step=2919300, episode=487 reward=0.789005 (560.00 it/sec) -training >> step=2919400, episode=487 reward=0.7676991 (567.88 it/sec) -training >> step=2919500, episode=487 reward=0.7715259 (544.32 it/sec) -training >> step=2919600, episode=487 reward=0.7813831 (567.53 it/sec) -training >> step=2919700, episode=487 reward=0.7872302 (577.62 it/sec) -training >> step=2919800, episode=487 reward=0.7593785 (527.83 it/sec) -training >> step=2919900, episode=487 reward=0.7810224 (478.65 it/sec) -training >> step=2920000, episode=487 reward=0.7727491 (566.54 it/sec) -training >> step=2920100, episode=487 reward=0.763909 (493.80 it/sec) -training >> step=2920200, episode=487 reward=0.7509947 (536.32 it/sec) -training >> step=2920300, episode=487 reward=0.8040453 (516.86 it/sec) -training >> step=2920400, episode=487 reward=0.7687386 (513.46 it/sec) -training >> step=2920500, episode=487 reward=0.7754853 (549.13 it/sec) -training >> step=2920600, episode=487 reward=0.742999 (523.95 it/sec) -training >> step=2920700, episode=487 reward=0.7601761 (522.80 it/sec) -training >> step=2920800, episode=487 reward=0.7756176 (532.15 it/sec) -training >> step=2920900, episode=487 reward=0.7661087 (507.71 it/sec) -training >> step=2921000, episode=487 reward=0.7705394 (497.19 it/sec) -training >> step=2921100, episode=487 reward=0.7519957 (568.66 it/sec) -training >> step=2921200, episode=487 reward=0.7606371 (507.37 it/sec) -training >> step=2921300, episode=488 reward=0.7801952 (135.60 it/sec) -training >> step=2921400, episode=488 reward=0.776997 (535.96 it/sec) -training >> step=2921500, episode=488 reward=0.7523116 (554.14 it/sec) -training >> step=2921600, episode=488 reward=0.7809471 (546.68 it/sec) -training >> step=2921700, episode=488 reward=0.7884333 (530.00 it/sec) -training >> step=2921800, episode=488 reward=0.7634534 (537.22 it/sec) -training >> step=2921900, episode=488 reward=0.7984959 (519.24 it/sec) -training >> step=2922000, episode=488 reward=0.7699372 (526.25 it/sec) -training >> step=2922100, episode=488 reward=0.7654709 (515.44 it/sec) -training >> step=2922200, episode=488 reward=0.7801442 (534.53 it/sec) -training >> step=2922300, episode=488 reward=0.7576882 (514.64 it/sec) -training >> step=2922400, episode=488 reward=0.7709284 (556.50 it/sec) -training >> step=2922500, episode=488 reward=0.7553237 (548.10 it/sec) -training >> step=2922600, episode=488 reward=0.7694575 (516.11 it/sec) -training >> step=2922700, episode=488 reward=0.7644652 (536.37 it/sec) -training >> step=2922800, episode=488 reward=0.7610143 (506.39 it/sec) -training >> step=2922900, episode=488 reward=0.7742518 (544.17 it/sec) -training >> step=2923000, episode=488 reward=0.7824796 (535.04 it/sec) -training >> step=2923100, episode=488 reward=0.7679802 (520.33 it/sec) -training >> step=2923200, episode=488 reward=0.7649743 (572.83 it/sec) -training >> step=2923300, episode=488 reward=0.778796 (376.91 it/sec) -training >> step=2923400, episode=488 reward=0.7912977 (548.42 it/sec) -training >> step=2923500, episode=488 reward=0.7687562 (536.12 it/sec) -training >> step=2923600, episode=488 reward=0.7754619 (542.79 it/sec) -training >> step=2923700, episode=488 reward=0.7646765 (493.24 it/sec) -training >> step=2923800, episode=488 reward=0.7653242 (508.33 it/sec) -training >> step=2923900, episode=488 reward=0.7659426 (476.33 it/sec) -training >> step=2924000, episode=488 reward=0.7961442 (536.13 it/sec) -training >> step=2924100, episode=488 reward=0.7478822 (456.77 it/sec) -training >> step=2924200, episode=488 reward=0.7469949 (422.38 it/sec) -training >> step=2924300, episode=488 reward=0.7384757 (470.31 it/sec) -training >> step=2924400, episode=488 reward=0.7674701 (475.80 it/sec) -training >> step=2924500, episode=488 reward=0.7382901 (486.80 it/sec) -training >> step=2924600, episode=488 reward=0.746935 (501.58 it/sec) -training >> step=2924700, episode=488 reward=0.7794516 (544.48 it/sec) -training >> step=2924800, episode=488 reward=0.7642863 (507.37 it/sec) -training >> step=2924900, episode=488 reward=0.7658758 (474.95 it/sec) -training >> step=2925000, episode=488 reward=0.7654781 (539.56 it/sec) -training >> step=2925100, episode=488 reward=0.7813588 (536.78 it/sec) -training >> step=2925200, episode=488 reward=0.7804877 (550.80 it/sec) -training >> step=2925300, episode=488 reward=0.7676362 (510.08 it/sec) -training >> step=2925400, episode=488 reward=0.758716 (524.31 it/sec) -training >> step=2925500, episode=488 reward=0.7533922 (488.75 it/sec) -training >> step=2925600, episode=488 reward=0.7713265 (510.51 it/sec) -training >> step=2925700, episode=488 reward=0.7758723 (521.35 it/sec) -training >> step=2925800, episode=488 reward=0.7918469 (539.01 it/sec) -training >> step=2925900, episode=488 reward=0.7440525 (516.03 it/sec) -training >> step=2926000, episode=488 reward=0.7649022 (484.26 it/sec) -training >> step=2926100, episode=488 reward=0.7807495 (546.59 it/sec) -training >> step=2926200, episode=488 reward=0.7840304 (525.48 it/sec) -training >> step=2926300, episode=488 reward=0.7729199 (542.06 it/sec) -training >> step=2926400, episode=488 reward=0.762364 (519.92 it/sec) -training >> step=2926500, episode=488 reward=0.7668782 (525.33 it/sec) -training >> step=2926600, episode=488 reward=0.7652426 (481.98 it/sec) -training >> step=2926700, episode=488 reward=0.766673 (518.76 it/sec) -training >> step=2926800, episode=488 reward=0.7474228 (545.21 it/sec) -training >> step=2926900, episode=488 reward=0.7628043 (517.17 it/sec) -training >> step=2927000, episode=488 reward=0.7615755 (509.04 it/sec) -training >> step=2927100, episode=488 reward=0.7731731 (503.62 it/sec) -training >> step=2927200, episode=488 reward=0.7691121 (542.77 it/sec) -training >> step=2927300, episode=489 reward=0.7734481 (139.22 it/sec) -training >> step=2927400, episode=489 reward=0.7703412 (518.60 it/sec) -training >> step=2927500, episode=489 reward=0.7563577 (521.35 it/sec) -training >> step=2927600, episode=489 reward=0.7627817 (522.33 it/sec) -training >> step=2927700, episode=489 reward=0.7542187 (550.85 it/sec) -training >> step=2927800, episode=489 reward=0.7614766 (524.54 it/sec) -training >> step=2927900, episode=489 reward=0.7420816 (521.30 it/sec) -training >> step=2928000, episode=489 reward=0.7804519 (525.58 it/sec) -training >> step=2928100, episode=489 reward=0.7837498 (544.12 it/sec) -training >> step=2928200, episode=489 reward=0.7554692 (561.43 it/sec) -training >> step=2928300, episode=489 reward=0.7749825 (528.20 it/sec) -training >> step=2928400, episode=489 reward=0.7761844 (528.20 it/sec) -training >> step=2928500, episode=489 reward=0.7643482 (520.58 it/sec) -training >> step=2928600, episode=489 reward=0.7679724 (567.67 it/sec) -training >> step=2928700, episode=489 reward=0.765691 (543.29 it/sec) -training >> step=2928800, episode=489 reward=0.7804056 (511.06 it/sec) -training >> step=2928900, episode=489 reward=0.7606096 (568.52 it/sec) -training >> step=2929000, episode=489 reward=0.7719488 (494.90 it/sec) -training >> step=2929100, episode=489 reward=0.7511258 (519.97 it/sec) -training >> step=2929200, episode=489 reward=0.7635623 (528.84 it/sec) -training >> step=2929300, episode=489 reward=0.771211 (532.38 it/sec) -training >> step=2929400, episode=489 reward=0.7841867 (381.38 it/sec) -training >> step=2929500, episode=489 reward=0.7639667 (514.44 it/sec) -training >> step=2929600, episode=489 reward=0.7809491 (541.84 it/sec) -training >> step=2929700, episode=489 reward=0.750918 (589.15 it/sec) -training >> step=2929800, episode=489 reward=0.7706336 (524.88 it/sec) -training >> step=2929900, episode=489 reward=0.7741392 (534.42 it/sec) -training >> step=2930000, episode=489 reward=0.7737015 (510.20 it/sec) -training >> step=2930100, episode=489 reward=0.7855701 (533.88 it/sec) -training >> step=2930200, episode=489 reward=0.7566502 (520.08 it/sec) -training >> step=2930300, episode=489 reward=0.7698486 (520.41 it/sec) -training >> step=2930400, episode=489 reward=0.7713361 (548.38 it/sec) -training >> step=2930500, episode=489 reward=0.7687727 (550.93 it/sec) -training >> step=2930600, episode=489 reward=0.7828613 (513.44 it/sec) -training >> step=2930700, episode=489 reward=0.7882538 (506.13 it/sec) -training >> step=2930800, episode=489 reward=0.7818331 (527.88 it/sec) -training >> step=2930900, episode=489 reward=0.7473923 (489.27 it/sec) -training >> step=2931000, episode=489 reward=0.7650521 (510.64 it/sec) -training >> step=2931100, episode=489 reward=0.7615438 (527.99 it/sec) -training >> step=2931200, episode=489 reward=0.7662936 (542.89 it/sec) -training >> step=2931300, episode=489 reward=0.7591733 (516.46 it/sec) -training >> step=2931400, episode=489 reward=0.7641373 (498.12 it/sec) -training >> step=2931500, episode=489 reward=0.783667 (521.86 it/sec) -training >> step=2931600, episode=489 reward=0.7753782 (527.72 it/sec) -training >> step=2931700, episode=489 reward=0.7646823 (557.40 it/sec) -training >> step=2931800, episode=489 reward=0.7719205 (513.87 it/sec) -training >> step=2931900, episode=489 reward=0.7606464 (544.55 it/sec) -training >> step=2932000, episode=489 reward=0.7719917 (528.82 it/sec) -training >> step=2932100, episode=489 reward=0.7753609 (501.41 it/sec) -training >> step=2932200, episode=489 reward=0.7578732 (538.85 it/sec) -training >> step=2932300, episode=489 reward=0.7675808 (493.41 it/sec) -training >> step=2932400, episode=489 reward=0.7751254 (497.13 it/sec) -training >> step=2932500, episode=489 reward=0.750333 (544.25 it/sec) -training >> step=2932600, episode=489 reward=0.7617747 (529.43 it/sec) -training >> step=2932700, episode=489 reward=0.7467241 (485.72 it/sec) -training >> step=2932800, episode=489 reward=0.7747193 (537.42 it/sec) -training >> step=2932900, episode=489 reward=0.7630289 (525.26 it/sec) -training >> step=2933000, episode=489 reward=0.7852186 (563.96 it/sec) -training >> step=2933100, episode=489 reward=0.7596226 (525.74 it/sec) -training >> step=2933200, episode=489 reward=0.7543094 (514.17 it/sec) -training >> step=2933300, episode=490 reward=0.7725065 (71.13 it/sec) -training >> step=2933400, episode=490 reward=0.7642646 (538.46 it/sec) -training >> step=2933500, episode=490 reward=0.7772751 (546.41 it/sec) -training >> step=2933600, episode=490 reward=0.7775402 (561.96 it/sec) -training >> step=2933700, episode=490 reward=0.7541718 (533.54 it/sec) -training >> step=2933800, episode=490 reward=0.7629542 (539.03 it/sec) -training >> step=2933900, episode=490 reward=0.7631418 (548.13 it/sec) -training >> step=2934000, episode=490 reward=0.7819052 (553.29 it/sec) -training >> step=2934100, episode=490 reward=0.7658594 (535.76 it/sec) -training >> step=2934200, episode=490 reward=0.7757859 (538.84 it/sec) -training >> step=2934300, episode=490 reward=0.770537 (518.67 it/sec) -training >> step=2934400, episode=490 reward=0.7566218 (522.82 it/sec) -training >> step=2934500, episode=490 reward=0.7672884 (564.11 it/sec) -training >> step=2934600, episode=490 reward=0.7785069 (555.43 it/sec) -training >> step=2934700, episode=490 reward=0.7740626 (542.89 it/sec) -training >> step=2934800, episode=490 reward=0.7753983 (575.07 it/sec) -training >> step=2934900, episode=490 reward=0.7739618 (542.28 it/sec) -training >> step=2935000, episode=490 reward=0.765321 (496.94 it/sec) -training >> step=2935100, episode=490 reward=0.7571228 (573.76 it/sec) -training >> step=2935200, episode=490 reward=0.7618881 (508.56 it/sec) -training >> step=2935300, episode=490 reward=0.7391394 (536.09 it/sec) -training >> step=2935400, episode=490 reward=0.7732983 (529.80 it/sec) -training >> step=2935500, episode=490 reward=0.7493491 (391.94 it/sec) -training >> step=2935600, episode=490 reward=0.7800276 (539.52 it/sec) -training >> step=2935700, episode=490 reward=0.7469285 (532.40 it/sec) -training >> step=2935800, episode=490 reward=0.7628163 (526.12 it/sec) -training >> step=2935900, episode=490 reward=0.7660915 (552.72 it/sec) -training >> step=2936000, episode=490 reward=0.7436989 (526.53 it/sec) -training >> step=2936100, episode=490 reward=0.7596909 (482.13 it/sec) -training >> step=2936200, episode=490 reward=0.7705552 (561.89 it/sec) -training >> step=2936300, episode=490 reward=0.7707238 (536.09 it/sec) -training >> step=2936400, episode=490 reward=0.7661805 (537.00 it/sec) -training >> step=2936500, episode=490 reward=0.7695455 (535.19 it/sec) -training >> step=2936600, episode=490 reward=0.7635719 (511.24 it/sec) -training >> step=2936700, episode=490 reward=0.768414 (506.72 it/sec) -training >> step=2936800, episode=490 reward=0.769329 (538.71 it/sec) -training >> step=2936900, episode=490 reward=0.7590564 (501.33 it/sec) -training >> step=2937000, episode=490 reward=0.7687174 (494.54 it/sec) -training >> step=2937100, episode=490 reward=0.7791355 (533.27 it/sec) -training >> step=2937200, episode=490 reward=0.7625289 (490.78 it/sec) -training >> step=2937300, episode=490 reward=0.7717623 (554.39 it/sec) -training >> step=2937400, episode=490 reward=0.7659606 (513.38 it/sec) -training >> step=2937500, episode=490 reward=0.7693676 (568.77 it/sec) -training >> step=2937600, episode=490 reward=0.7755081 (550.38 it/sec) -training >> step=2937700, episode=490 reward=0.7583341 (545.16 it/sec) -training >> step=2937800, episode=490 reward=0.7860316 (545.12 it/sec) -training >> step=2937900, episode=490 reward=0.7693081 (511.52 it/sec) -training >> step=2938000, episode=490 reward=0.7801407 (500.82 it/sec) -training >> step=2938100, episode=490 reward=0.7792287 (536.20 it/sec) -training >> step=2938200, episode=490 reward=0.7686763 (513.58 it/sec) -training >> step=2938300, episode=490 reward=0.7605202 (559.19 it/sec) -training >> step=2938400, episode=490 reward=0.7716013 (531.02 it/sec) -training >> step=2938500, episode=490 reward=0.7817206 (508.60 it/sec) -training >> step=2938600, episode=490 reward=0.7800412 (519.04 it/sec) -training >> step=2938700, episode=490 reward=0.7521242 (481.89 it/sec) -training >> step=2938800, episode=490 reward=0.7829115 (477.43 it/sec) -training >> step=2938900, episode=490 reward=0.7613662 (434.48 it/sec) -training >> step=2939000, episode=490 reward=0.7759387 (501.88 it/sec) -training >> step=2939100, episode=490 reward=0.7700962 (432.48 it/sec) -training >> step=2939200, episode=490 reward=0.7533195 (501.93 it/sec) -training >> step=2939300, episode=491 reward=0.7669009 (57.50 it/sec) -training >> step=2939400, episode=491 reward=0.7792989 (545.54 it/sec) -training >> step=2939500, episode=491 reward=0.7420992 (521.08 it/sec) -training >> step=2939600, episode=491 reward=0.7713935 (559.16 it/sec) -training >> step=2939700, episode=491 reward=0.7656405 (504.46 it/sec) -training >> step=2939800, episode=491 reward=0.7613967 (567.53 it/sec) -training >> step=2939900, episode=491 reward=0.7566399 (492.92 it/sec) -training >> step=2940000, episode=491 reward=0.7499051 (534.34 it/sec) -training >> step=2940100, episode=491 reward=0.746053 (492.55 it/sec) -training >> step=2940200, episode=491 reward=0.7800227 (582.65 it/sec) -training >> step=2940300, episode=491 reward=0.7528116 (495.35 it/sec) -training >> step=2940400, episode=491 reward=0.7640451 (505.33 it/sec) -training >> step=2940500, episode=491 reward=0.7645122 (589.37 it/sec) -training >> step=2940600, episode=491 reward=0.7487223 (543.02 it/sec) -training >> step=2940700, episode=491 reward=0.7666525 (520.02 it/sec) -training >> step=2940800, episode=491 reward=0.7583129 (535.10 it/sec) -training >> step=2940900, episode=491 reward=0.7597982 (541.55 it/sec) -training >> step=2941000, episode=491 reward=0.7618593 (532.25 it/sec) -training >> step=2941100, episode=491 reward=0.7763008 (554.91 it/sec) -training >> step=2941200, episode=491 reward=0.7665617 (562.45 it/sec) -training >> step=2941300, episode=491 reward=0.7928662 (512.49 it/sec) -training >> step=2941400, episode=491 reward=0.7612021 (528.00 it/sec) -training >> step=2941500, episode=491 reward=0.7884027 (549.78 it/sec) -training >> step=2941600, episode=491 reward=0.7740986 (508.09 it/sec) -training >> step=2941700, episode=491 reward=0.7660379 (555.13 it/sec) -training >> step=2941800, episode=491 reward=0.7850753 (532.05 it/sec) -training >> step=2941900, episode=491 reward=0.7755806 (419.59 it/sec) -training >> step=2942000, episode=491 reward=0.7807129 (555.44 it/sec) -training >> step=2942100, episode=491 reward=0.7613547 (486.08 it/sec) -training >> step=2942200, episode=491 reward=0.7765671 (583.84 it/sec) -training >> step=2942300, episode=491 reward=0.7924517 (545.60 it/sec) -training >> step=2942400, episode=491 reward=0.7614524 (536.65 it/sec) -training >> step=2942500, episode=491 reward=0.7952317 (528.89 it/sec) -training >> step=2942600, episode=491 reward=0.7644635 (516.98 it/sec) -training >> step=2942700, episode=491 reward=0.770965 (578.19 it/sec) -training >> step=2942800, episode=491 reward=0.7646246 (532.77 it/sec) -training >> step=2942900, episode=491 reward=0.7620006 (540.21 it/sec) -training >> step=2943000, episode=491 reward=0.7594001 (551.08 it/sec) -training >> step=2943100, episode=491 reward=0.7770805 (554.60 it/sec) -training >> step=2943200, episode=491 reward=0.774634 (528.17 it/sec) -training >> step=2943300, episode=491 reward=0.7658086 (548.03 it/sec) -training >> step=2943400, episode=491 reward=0.7583316 (521.09 it/sec) -training >> step=2943500, episode=491 reward=0.7809685 (501.44 it/sec) -training >> step=2943600, episode=491 reward=0.752687 (538.18 it/sec) -training >> step=2943700, episode=491 reward=0.7691153 (557.65 it/sec) -training >> step=2943800, episode=491 reward=0.7518865 (539.12 it/sec) -training >> step=2943900, episode=491 reward=0.7696183 (504.44 it/sec) -training >> step=2944000, episode=491 reward=0.7631404 (556.76 it/sec) -training >> step=2944100, episode=491 reward=0.7649828 (556.16 it/sec) -training >> step=2944200, episode=491 reward=0.7766449 (520.07 it/sec) -training >> step=2944300, episode=491 reward=0.7616946 (446.99 it/sec) -training >> step=2944400, episode=491 reward=0.7746043 (503.10 it/sec) -training >> step=2944500, episode=491 reward=0.7809697 (529.41 it/sec) -training >> step=2944600, episode=491 reward=0.7512507 (572.50 it/sec) -training >> step=2944700, episode=491 reward=0.7666935 (520.86 it/sec) -training >> step=2944800, episode=491 reward=0.7629235 (533.77 it/sec) -training >> step=2944900, episode=491 reward=0.7370149 (534.06 it/sec) -training >> step=2945000, episode=491 reward=0.7643152 (507.42 it/sec) -training >> step=2945100, episode=491 reward=0.7632673 (551.22 it/sec) -training >> step=2945200, episode=491 reward=0.7575771 (523.69 it/sec) -training >> step=2945300, episode=492 reward=0.7726015 (91.94 it/sec) -training >> step=2945400, episode=492 reward=0.7607662 (497.13 it/sec) -training >> step=2945500, episode=492 reward=0.76289 (521.39 it/sec) -training >> step=2945600, episode=492 reward=0.7544256 (526.10 it/sec) -training >> step=2945700, episode=492 reward=0.7533675 (537.75 it/sec) -training >> step=2945800, episode=492 reward=0.7875642 (519.66 it/sec) -training >> step=2945900, episode=492 reward=0.7812991 (524.48 it/sec) -training >> step=2946000, episode=492 reward=0.7723762 (517.53 it/sec) -training >> step=2946100, episode=492 reward=0.7610435 (576.05 it/sec) -training >> step=2946200, episode=492 reward=0.7646127 (496.80 it/sec) -training >> step=2946300, episode=492 reward=0.7649302 (514.61 it/sec) -training >> step=2946400, episode=492 reward=0.7553718 (540.78 it/sec) -training >> step=2946500, episode=492 reward=0.7644731 (561.73 it/sec) -training >> step=2946600, episode=492 reward=0.7614267 (557.26 it/sec) -training >> step=2946700, episode=492 reward=0.765916 (542.74 it/sec) -training >> step=2946800, episode=492 reward=0.7611851 (530.06 it/sec) -training >> step=2946900, episode=492 reward=0.7713133 (525.42 it/sec) -training >> step=2947000, episode=492 reward=0.7613285 (537.03 it/sec) -training >> step=2947100, episode=492 reward=0.7762234 (512.94 it/sec) -training >> step=2947200, episode=492 reward=0.7667182 (571.89 it/sec) -training >> step=2947300, episode=492 reward=0.7929219 (512.32 it/sec) -training >> step=2947400, episode=492 reward=0.7471212 (565.15 it/sec) -training >> step=2947500, episode=492 reward=0.7667232 (564.55 it/sec) -training >> step=2947600, episode=492 reward=0.78103 (532.26 it/sec) -training >> step=2947700, episode=492 reward=0.7819765 (542.69 it/sec) -training >> step=2947800, episode=492 reward=0.7548683 (531.15 it/sec) -training >> step=2947900, episode=492 reward=0.7793422 (402.15 it/sec) -training >> step=2948000, episode=492 reward=0.7533886 (537.28 it/sec) -training >> step=2948100, episode=492 reward=0.7584704 (504.94 it/sec) -training >> step=2948200, episode=492 reward=0.7725131 (556.82 it/sec) -training >> step=2948300, episode=492 reward=0.7754319 (549.37 it/sec) -training >> step=2948400, episode=492 reward=0.7710807 (507.69 it/sec) -training >> step=2948500, episode=492 reward=0.75422 (546.49 it/sec) -training >> step=2948600, episode=492 reward=0.7798008 (514.90 it/sec) -training >> step=2948700, episode=492 reward=0.7626933 (520.54 it/sec) -training >> step=2948800, episode=492 reward=0.7774258 (518.26 it/sec) -training >> step=2948900, episode=492 reward=0.7642896 (536.51 it/sec) -training >> step=2949000, episode=492 reward=0.7668884 (511.56 it/sec) -training >> step=2949100, episode=492 reward=0.76371 (494.38 it/sec) -training >> step=2949200, episode=492 reward=0.7884505 (524.87 it/sec) -training >> step=2949300, episode=492 reward=0.7603154 (527.38 it/sec) -training >> step=2949400, episode=492 reward=0.7725281 (564.91 it/sec) -training >> step=2949500, episode=492 reward=0.7758683 (492.86 it/sec) -training >> step=2949600, episode=492 reward=0.763711 (531.91 it/sec) -training >> step=2949700, episode=492 reward=0.7723285 (552.34 it/sec) -training >> step=2949800, episode=492 reward=0.7675047 (536.64 it/sec) -training >> step=2949900, episode=492 reward=0.7799557 (541.55 it/sec) -training >> step=2950000, episode=492 reward=0.7771847 (542.84 it/sec) -training >> step=2950100, episode=492 reward=0.7802708 (553.31 it/sec) -training >> step=2950200, episode=492 reward=0.7704471 (478.30 it/sec) -training >> step=2950300, episode=492 reward=0.7592955 (512.82 it/sec) -training >> step=2950400, episode=492 reward=0.7866507 (532.14 it/sec) -training >> step=2950500, episode=492 reward=0.7700529 (576.17 it/sec) -training >> step=2950600, episode=492 reward=0.7892019 (560.69 it/sec) -training >> step=2950700, episode=492 reward=0.7636934 (531.43 it/sec) -training >> step=2950800, episode=492 reward=0.747518 (550.46 it/sec) -training >> step=2950900, episode=492 reward=0.7519723 (508.65 it/sec) -training >> step=2951000, episode=492 reward=0.76616 (535.75 it/sec) -training >> step=2951100, episode=492 reward=0.7726258 (549.76 it/sec) -training >> step=2951200, episode=492 reward=0.7559567 (568.31 it/sec) -training >> step=2951300, episode=493 reward=0.7468072 (54.85 it/sec) -training >> step=2951400, episode=493 reward=0.7658996 (518.50 it/sec) -training >> step=2951500, episode=493 reward=0.7575834 (559.32 it/sec) -training >> step=2951600, episode=493 reward=0.7559059 (516.01 it/sec) -training >> step=2951700, episode=493 reward=0.7735134 (505.00 it/sec) -training >> step=2951800, episode=493 reward=0.7803636 (572.47 it/sec) -training >> step=2951900, episode=493 reward=0.7619501 (514.51 it/sec) -training >> step=2952000, episode=493 reward=0.7582769 (539.54 it/sec) -training >> step=2952100, episode=493 reward=0.7572126 (496.78 it/sec) -training >> step=2952200, episode=493 reward=0.7713723 (516.31 it/sec) -training >> step=2952300, episode=493 reward=0.7888529 (509.45 it/sec) -training >> step=2952400, episode=493 reward=0.7583467 (531.13 it/sec) -training >> step=2952500, episode=493 reward=0.7711392 (486.37 it/sec) -training >> step=2952600, episode=493 reward=0.7658979 (428.99 it/sec) -training >> step=2952700, episode=493 reward=0.7698762 (508.15 it/sec) -training >> step=2952800, episode=493 reward=0.7690604 (531.96 it/sec) -training >> step=2952900, episode=493 reward=0.7810766 (513.35 it/sec) -training >> step=2953000, episode=493 reward=0.7780411 (535.69 it/sec) -training >> step=2953100, episode=493 reward=0.7557372 (550.83 it/sec) -training >> step=2953200, episode=493 reward=0.7617563 (517.30 it/sec) -training >> step=2953300, episode=493 reward=0.7632399 (536.37 it/sec) -training >> step=2953400, episode=493 reward=0.7802172 (542.68 it/sec) -training >> step=2953500, episode=493 reward=0.7749383 (571.61 it/sec) -training >> step=2953600, episode=493 reward=0.768954 (507.90 it/sec) -training >> step=2953700, episode=493 reward=0.7704577 (522.21 it/sec) -training >> step=2953800, episode=493 reward=0.7592191 (540.98 it/sec) -training >> step=2953900, episode=493 reward=0.7688253 (579.01 it/sec) -training >> step=2954000, episode=493 reward=0.7772732 (510.17 it/sec) -training >> step=2954100, episode=493 reward=0.7699019 (478.40 it/sec) -training >> step=2954200, episode=493 reward=0.7746801 (501.21 it/sec) -training >> step=2954300, episode=493 reward=0.7690905 (350.66 it/sec) -training >> step=2954400, episode=493 reward=0.7908405 (518.54 it/sec) -training >> step=2954500, episode=493 reward=0.7942746 (539.41 it/sec) -training >> step=2954600, episode=493 reward=0.7941021 (503.57 it/sec) -training >> step=2954700, episode=493 reward=0.7549238 (486.83 it/sec) -training >> step=2954800, episode=493 reward=0.7647204 (525.95 it/sec) -training >> step=2954900, episode=493 reward=0.7725229 (540.14 it/sec) -training >> step=2955000, episode=493 reward=0.7564645 (552.31 it/sec) -training >> step=2955100, episode=493 reward=0.7448992 (551.93 it/sec) -training >> step=2955200, episode=493 reward=0.7714034 (490.61 it/sec) -training >> step=2955300, episode=493 reward=0.7631629 (515.91 it/sec) -training >> step=2955400, episode=493 reward=0.7918242 (517.48 it/sec) -training >> step=2955500, episode=493 reward=0.7774393 (564.41 it/sec) -training >> step=2955600, episode=493 reward=0.7839117 (535.55 it/sec) -training >> step=2955700, episode=493 reward=0.7671416 (562.28 it/sec) -training >> step=2955800, episode=493 reward=0.7875978 (520.06 it/sec) -training >> step=2955900, episode=493 reward=0.7386164 (501.96 it/sec) -training >> step=2956000, episode=493 reward=0.769436 (493.84 it/sec) -training >> step=2956100, episode=493 reward=0.7656318 (516.32 it/sec) -training >> step=2956200, episode=493 reward=0.7699297 (524.85 it/sec) -training >> step=2956300, episode=493 reward=0.7818941 (531.00 it/sec) -training >> step=2956400, episode=493 reward=0.778104 (567.39 it/sec) -training >> step=2956500, episode=493 reward=0.7712758 (540.19 it/sec) -training >> step=2956600, episode=493 reward=0.7735936 (518.20 it/sec) -training >> step=2956700, episode=493 reward=0.7781166 (535.68 it/sec) -training >> step=2956800, episode=493 reward=0.7699302 (545.38 it/sec) -training >> step=2956900, episode=493 reward=0.7679158 (551.02 it/sec) -training >> step=2957000, episode=493 reward=0.7538417 (524.52 it/sec) -training >> step=2957100, episode=493 reward=0.7682936 (542.42 it/sec) -training >> step=2957200, episode=493 reward=0.7722097 (535.83 it/sec) -training >> step=2957300, episode=494 reward=0.7670267 (96.70 it/sec) -training >> step=2957400, episode=494 reward=0.763807 (519.36 it/sec) -training >> step=2957500, episode=494 reward=0.765959 (525.90 it/sec) -training >> step=2957600, episode=494 reward=0.7601359 (497.43 it/sec) -training >> step=2957700, episode=494 reward=0.7715123 (537.69 it/sec) -training >> step=2957800, episode=494 reward=0.756466 (541.10 it/sec) -training >> step=2957900, episode=494 reward=0.7584164 (548.47 it/sec) -training >> step=2958000, episode=494 reward=0.7807394 (464.72 it/sec) -training >> step=2958100, episode=494 reward=0.7645105 (461.94 it/sec) -training >> step=2958200, episode=494 reward=0.7870322 (532.90 it/sec) -training >> step=2958300, episode=494 reward=0.7452376 (502.70 it/sec) -training >> step=2958400, episode=494 reward=0.7668287 (521.52 it/sec) -training >> step=2958500, episode=494 reward=0.7557316 (455.11 it/sec) -training >> step=2958600, episode=494 reward=0.7804847 (518.57 it/sec) -training >> step=2958700, episode=494 reward=0.7773373 (449.76 it/sec) -training >> step=2958800, episode=494 reward=0.7580581 (493.02 it/sec) -training >> step=2958900, episode=494 reward=0.7534829 (523.34 it/sec) -training >> step=2959000, episode=494 reward=0.7841055 (472.25 it/sec) -training >> step=2959100, episode=494 reward=0.7686601 (485.25 it/sec) -training >> step=2959200, episode=494 reward=0.7617851 (500.77 it/sec) -training >> step=2959300, episode=494 reward=0.7983979 (488.00 it/sec) -training >> step=2959400, episode=494 reward=0.7735986 (490.53 it/sec) -training >> step=2959500, episode=494 reward=0.7652763 (451.49 it/sec) -training >> step=2959600, episode=494 reward=0.77201 (490.32 it/sec) -training >> step=2959700, episode=494 reward=0.7457538 (485.61 it/sec) -training >> step=2959800, episode=494 reward=0.7803239 (497.08 it/sec) -training >> step=2959900, episode=494 reward=0.755677 (504.97 it/sec) -training >> step=2960000, episode=494 reward=0.7732805 (498.52 it/sec) -training >> step=2960100, episode=494 reward=0.7738346 (513.22 it/sec) -training >> step=2960200, episode=494 reward=0.7664087 (490.32 it/sec) -training >> step=2960300, episode=494 reward=0.7797529 (544.11 it/sec) -training >> step=2960400, episode=494 reward=0.7754083 (432.92 it/sec) -training >> step=2960500, episode=494 reward=0.7792763 (513.73 it/sec) -training >> step=2960600, episode=494 reward=0.7592816 (504.56 it/sec) -training >> step=2960700, episode=494 reward=0.7721512 (515.44 it/sec) -training >> step=2960800, episode=494 reward=0.7548406 (540.80 it/sec) -training >> step=2960900, episode=494 reward=0.7403655 (482.40 it/sec) -training >> step=2961000, episode=494 reward=0.7802016 (536.32 it/sec) -training >> step=2961100, episode=494 reward=0.7518941 (536.85 it/sec) -training >> step=2961200, episode=494 reward=0.7489635 (505.81 it/sec) -training >> step=2961300, episode=494 reward=0.7847178 (522.46 it/sec) -training >> step=2961400, episode=494 reward=0.7511898 (546.68 it/sec) -training >> step=2961500, episode=494 reward=0.7572118 (497.53 it/sec) -training >> step=2961600, episode=494 reward=0.784166 (515.71 it/sec) -training >> step=2961700, episode=494 reward=0.7463892 (536.95 it/sec) -training >> step=2961800, episode=494 reward=0.7755042 (531.49 it/sec) -training >> step=2961900, episode=494 reward=0.7603512 (518.82 it/sec) -training >> step=2962000, episode=494 reward=0.7664188 (532.71 it/sec) -training >> step=2962100, episode=494 reward=0.7436935 (537.40 it/sec) -training >> step=2962200, episode=494 reward=0.7849253 (521.61 it/sec) -training >> step=2962300, episode=494 reward=0.7748678 (519.83 it/sec) -training >> step=2962400, episode=494 reward=0.7548647 (531.90 it/sec) -training >> step=2962500, episode=494 reward=0.7772605 (553.71 it/sec) -training >> step=2962600, episode=494 reward=0.7615063 (525.84 it/sec) -training >> step=2962700, episode=494 reward=0.7473406 (525.58 it/sec) -training >> step=2962800, episode=494 reward=0.7529482 (547.34 it/sec) -training >> step=2962900, episode=494 reward=0.7502723 (539.09 it/sec) -training >> step=2963000, episode=494 reward=0.7355467 (541.38 it/sec) -training >> step=2963100, episode=494 reward=0.7485379 (531.86 it/sec) -training >> step=2963200, episode=494 reward=0.7489834 (548.60 it/sec) -training >> step=2963300, episode=495 reward=0.7695967 (124.78 it/sec) -training >> step=2963400, episode=495 reward=0.7653442 (534.61 it/sec) -training >> step=2963500, episode=495 reward=0.7760778 (532.68 it/sec) -training >> step=2963600, episode=495 reward=0.7700487 (517.92 it/sec) -training >> step=2963700, episode=495 reward=0.7617159 (516.61 it/sec) -training >> step=2963800, episode=495 reward=0.7672009 (516.10 it/sec) -training >> step=2963900, episode=495 reward=0.7517203 (495.12 it/sec) -training >> step=2964000, episode=495 reward=0.7835377 (482.90 it/sec) -training >> step=2964100, episode=495 reward=0.7649297 (512.75 it/sec) -training >> step=2964200, episode=495 reward=0.7662936 (507.08 it/sec) -training >> step=2964300, episode=495 reward=0.7763582 (529.56 it/sec) -training >> step=2964400, episode=495 reward=0.7510679 (478.28 it/sec) -training >> step=2964500, episode=495 reward=0.7692815 (504.59 it/sec) -training >> step=2964600, episode=495 reward=0.7736595 (474.97 it/sec) -training >> step=2964700, episode=495 reward=0.7715791 (486.95 it/sec) -training >> step=2964800, episode=495 reward=0.7400557 (429.74 it/sec) -training >> step=2964900, episode=495 reward=0.7756297 (531.24 it/sec) -training >> step=2965000, episode=495 reward=0.7808868 (499.08 it/sec) -training >> step=2965100, episode=495 reward=0.7545491 (481.11 it/sec) -training >> step=2965200, episode=495 reward=0.7872432 (497.16 it/sec) -training >> step=2965300, episode=495 reward=0.7545305 (510.78 it/sec) -training >> step=2965400, episode=495 reward=0.7529185 (464.14 it/sec) -training >> step=2965500, episode=495 reward=0.7828737 (470.56 it/sec) -training >> step=2965600, episode=495 reward=0.7568059 (485.73 it/sec) -training >> step=2965700, episode=495 reward=0.7587289 (443.55 it/sec) -training >> step=2965800, episode=495 reward=0.7747994 (467.92 it/sec) -training >> step=2965900, episode=495 reward=0.788258 (469.70 it/sec) -training >> step=2966000, episode=495 reward=0.7687569 (510.33 it/sec) -training >> step=2966100, episode=495 reward=0.783887 (509.19 it/sec) -training >> step=2966200, episode=495 reward=0.7988254 (467.31 it/sec) -training >> step=2966300, episode=495 reward=0.7656989 (520.77 it/sec) -training >> step=2966400, episode=495 reward=0.7749071 (528.66 it/sec) -training >> step=2966500, episode=495 reward=0.7839515 (385.66 it/sec) -training >> step=2966600, episode=495 reward=0.7822195 (487.03 it/sec) -training >> step=2966700, episode=495 reward=0.7729986 (519.28 it/sec) -training >> step=2966800, episode=495 reward=0.756075 (428.72 it/sec) -training >> step=2966900, episode=495 reward=0.767189 (451.34 it/sec) -training >> step=2967000, episode=495 reward=0.7623098 (420.00 it/sec) -training >> step=2967100, episode=495 reward=0.7749672 (512.11 it/sec) -training >> step=2967200, episode=495 reward=0.7820603 (466.22 it/sec) -training >> step=2967300, episode=495 reward=0.7819805 (505.68 it/sec) -training >> step=2967400, episode=495 reward=0.7887332 (574.53 it/sec) -training >> step=2967500, episode=495 reward=0.7644424 (547.40 it/sec) -training >> step=2967600, episode=495 reward=0.7710643 (534.49 it/sec) -training >> step=2967700, episode=495 reward=0.7714704 (538.46 it/sec) -training >> step=2967800, episode=495 reward=0.7453515 (552.83 it/sec) -training >> step=2967900, episode=495 reward=0.7621011 (507.82 it/sec) -training >> step=2968000, episode=495 reward=0.7822724 (499.58 it/sec) -training >> step=2968100, episode=495 reward=0.7529489 (466.02 it/sec) -training >> step=2968200, episode=495 reward=0.7693239 (525.79 it/sec) -training >> step=2968300, episode=495 reward=0.7707093 (492.49 it/sec) -training >> step=2968400, episode=495 reward=0.7756397 (461.59 it/sec) -training >> step=2968500, episode=495 reward=0.7920079 (535.80 it/sec) -training >> step=2968600, episode=495 reward=0.7736955 (500.28 it/sec) -training >> step=2968700, episode=495 reward=0.769082 (484.57 it/sec) -training >> step=2968800, episode=495 reward=0.7751663 (490.24 it/sec) -training >> step=2968900, episode=495 reward=0.776333 (424.43 it/sec) -training >> step=2969000, episode=495 reward=0.7466233 (429.37 it/sec) -training >> step=2969100, episode=495 reward=0.7415076 (399.06 it/sec) -training >> step=2969200, episode=495 reward=0.7638108 (449.81 it/sec) -training >> step=2969300, episode=496 reward=0.7821861 (68.20 it/sec) -training >> step=2969400, episode=496 reward=0.7498362 (475.69 it/sec) -training >> step=2969500, episode=496 reward=0.7718292 (490.11 it/sec) -training >> step=2969600, episode=496 reward=0.7748345 (514.45 it/sec) -training >> step=2969700, episode=496 reward=0.7862654 (484.24 it/sec) -training >> step=2969800, episode=496 reward=0.7600403 (455.62 it/sec) -training >> step=2969900, episode=496 reward=0.778729 (492.29 it/sec) -training >> step=2970000, episode=496 reward=0.7652628 (451.23 it/sec) -training >> step=2970100, episode=496 reward=0.7551542 (483.84 it/sec) -training >> step=2970200, episode=496 reward=0.7538631 (440.67 it/sec) -training >> step=2970300, episode=496 reward=0.7470533 (490.91 it/sec) -training >> step=2970400, episode=496 reward=0.7661663 (425.66 it/sec) -training >> step=2970500, episode=496 reward=0.752526 (471.33 it/sec) -training >> step=2970600, episode=496 reward=0.7530015 (481.52 it/sec) -training >> step=2970700, episode=496 reward=0.7896163 (486.70 it/sec) -training >> step=2970800, episode=496 reward=0.7782609 (480.12 it/sec) -training >> step=2970900, episode=496 reward=0.758777 (535.28 it/sec) -training >> step=2971000, episode=496 reward=0.7448321 (512.29 it/sec) -training >> step=2971100, episode=496 reward=0.7677962 (500.11 it/sec) -training >> step=2971200, episode=496 reward=0.7555324 (543.20 it/sec) -training >> step=2971300, episode=496 reward=0.7616552 (518.57 it/sec) -training >> step=2971400, episode=496 reward=0.7797747 (536.12 it/sec) -training >> step=2971500, episode=496 reward=0.7582545 (502.90 it/sec) -training >> step=2971600, episode=496 reward=0.7609241 (559.24 it/sec) -training >> step=2971700, episode=496 reward=0.7639369 (548.26 it/sec) -training >> step=2971800, episode=496 reward=0.761205 (534.65 it/sec) -training >> step=2971900, episode=496 reward=0.7598418 (554.34 it/sec) -training >> step=2972000, episode=496 reward=0.7737654 (501.59 it/sec) -training >> step=2972100, episode=496 reward=0.7691096 (540.44 it/sec) -training >> step=2972200, episode=496 reward=0.7635997 (521.88 it/sec) -training >> step=2972300, episode=496 reward=0.7906715 (508.86 it/sec) -training >> step=2972400, episode=496 reward=0.7775061 (463.68 it/sec) -training >> step=2972500, episode=496 reward=0.765605 (479.89 it/sec) -training >> step=2972600, episode=496 reward=0.765084 (426.28 it/sec) -training >> step=2972700, episode=496 reward=0.7385409 (513.26 it/sec) -training >> step=2972800, episode=496 reward=0.7813067 (483.40 it/sec) -training >> step=2972900, episode=496 reward=0.7542629 (494.52 it/sec) -training >> step=2973000, episode=496 reward=0.7839888 (527.56 it/sec) -training >> step=2973100, episode=496 reward=0.7680767 (519.01 it/sec) -training >> step=2973200, episode=496 reward=0.7646608 (505.27 it/sec) -training >> step=2973300, episode=496 reward=0.7481591 (527.89 it/sec) -training >> step=2973400, episode=496 reward=0.7725856 (519.60 it/sec) -training >> step=2973500, episode=496 reward=0.7798983 (545.07 it/sec) -training >> step=2973600, episode=496 reward=0.7747707 (470.36 it/sec) -training >> step=2973700, episode=496 reward=0.7546455 (510.64 it/sec) -training >> step=2973800, episode=496 reward=0.7636901 (479.68 it/sec) -training >> step=2973900, episode=496 reward=0.7659973 (493.99 it/sec) -training >> step=2974000, episode=496 reward=0.7756671 (525.90 it/sec) -training >> step=2974100, episode=496 reward=0.7830579 (536.21 it/sec) -training >> step=2974200, episode=496 reward=0.7819865 (493.55 it/sec) -training >> step=2974300, episode=496 reward=0.7823915 (531.84 it/sec) -training >> step=2974400, episode=496 reward=0.7804204 (549.91 it/sec) -training >> step=2974500, episode=496 reward=0.776769 (459.22 it/sec) -training >> step=2974600, episode=496 reward=0.7625787 (486.91 it/sec) -training >> step=2974700, episode=496 reward=0.777257 (450.52 it/sec) -training >> step=2974800, episode=496 reward=0.7542061 (429.46 it/sec) -training >> step=2974900, episode=496 reward=0.7667302 (501.36 it/sec) -training >> step=2975000, episode=496 reward=0.7656509 (486.83 it/sec) -training >> step=2975100, episode=496 reward=0.7767001 (481.05 it/sec) -training >> step=2975200, episode=496 reward=0.7569469 (512.39 it/sec) -training >> step=2975300, episode=497 reward=0.751097 (86.57 it/sec) -training >> step=2975400, episode=497 reward=0.7665669 (525.80 it/sec) -training >> step=2975500, episode=497 reward=0.7660357 (578.97 it/sec) -training >> step=2975600, episode=497 reward=0.7649266 (503.90 it/sec) -training >> step=2975700, episode=497 reward=0.7795388 (510.42 it/sec) -training >> step=2975800, episode=497 reward=0.7711611 (539.11 it/sec) -training >> step=2975900, episode=497 reward=0.7769678 (572.95 it/sec) -training >> step=2976000, episode=497 reward=0.7713222 (518.53 it/sec) -training >> step=2976100, episode=497 reward=0.7843653 (542.88 it/sec) -training >> step=2976200, episode=497 reward=0.7546068 (537.58 it/sec) -training >> step=2976300, episode=497 reward=0.7802671 (546.46 it/sec) -training >> step=2976400, episode=497 reward=0.7709505 (519.12 it/sec) -training >> step=2976500, episode=497 reward=0.7778032 (544.12 it/sec) -training >> step=2976600, episode=497 reward=0.7601973 (518.60 it/sec) -training >> step=2976700, episode=497 reward=0.7659563 (533.04 it/sec) -training >> step=2976800, episode=497 reward=0.7865368 (512.37 it/sec) -training >> step=2976900, episode=497 reward=0.736393 (557.34 it/sec) -training >> step=2977000, episode=497 reward=0.7715761 (517.73 it/sec) -training >> step=2977100, episode=497 reward=0.7630563 (526.91 it/sec) -training >> step=2977200, episode=497 reward=0.7514486 (558.15 it/sec) -training >> step=2977300, episode=497 reward=0.7558285 (497.13 it/sec) -training >> step=2977400, episode=497 reward=0.7768889 (549.67 it/sec) -training >> step=2977500, episode=497 reward=0.7623161 (539.30 it/sec) -training >> step=2977600, episode=497 reward=0.7876242 (558.89 it/sec) -training >> step=2977700, episode=497 reward=0.7736716 (483.99 it/sec) -training >> step=2977800, episode=497 reward=0.7671947 (542.12 it/sec) -training >> step=2977900, episode=497 reward=0.7755786 (530.70 it/sec) -training >> step=2978000, episode=497 reward=0.7772369 (551.43 it/sec) -training >> step=2978100, episode=497 reward=0.775228 (530.49 it/sec) -training >> step=2978200, episode=497 reward=0.7890346 (519.98 it/sec) -training >> step=2978300, episode=497 reward=0.7785359 (506.44 it/sec) -training >> step=2978400, episode=497 reward=0.7406639 (384.81 it/sec) -training >> step=2978500, episode=497 reward=0.7644547 (545.01 it/sec) -training >> step=2978600, episode=497 reward=0.7779025 (486.51 it/sec) -training >> step=2978700, episode=497 reward=0.7789674 (565.07 it/sec) -training >> step=2978800, episode=497 reward=0.7663386 (526.86 it/sec) -training >> step=2978900, episode=497 reward=0.7659706 (542.36 it/sec) -training >> step=2979000, episode=497 reward=0.7584763 (513.24 it/sec) -training >> step=2979100, episode=497 reward=0.768621 (509.70 it/sec) -training >> step=2979200, episode=497 reward=0.7570607 (514.94 it/sec) -training >> step=2979300, episode=497 reward=0.7776284 (506.17 it/sec) -training >> step=2979400, episode=497 reward=0.7749726 (482.13 it/sec) -training >> step=2979500, episode=497 reward=0.736132 (490.26 it/sec) -training >> step=2979600, episode=497 reward=0.7760013 (475.44 it/sec) -training >> step=2979700, episode=497 reward=0.7583668 (506.11 it/sec) -training >> step=2979800, episode=497 reward=0.7804632 (505.66 it/sec) -training >> step=2979900, episode=497 reward=0.7556643 (490.64 it/sec) -training >> step=2980000, episode=497 reward=0.766705 (461.41 it/sec) -training >> step=2980100, episode=497 reward=0.7638897 (527.35 it/sec) -training >> step=2980200, episode=497 reward=0.7667008 (511.31 it/sec) -training >> step=2980300, episode=497 reward=0.7574408 (489.77 it/sec) -training >> step=2980400, episode=497 reward=0.7833937 (542.82 it/sec) -training >> step=2980500, episode=497 reward=0.7740026 (507.38 it/sec) -training >> step=2980600, episode=497 reward=0.763885 (460.13 it/sec) -training >> step=2980700, episode=497 reward=0.7737923 (476.23 it/sec) -training >> step=2980800, episode=497 reward=0.7513644 (515.40 it/sec) -training >> step=2980900, episode=497 reward=0.7727268 (490.78 it/sec) -training >> step=2981000, episode=497 reward=0.7825018 (417.89 it/sec) -training >> step=2981100, episode=497 reward=0.768394 (461.53 it/sec) -training >> step=2981200, episode=497 reward=0.78775 (511.35 it/sec) -training >> step=2981300, episode=498 reward=0.7694854 (94.85 it/sec) -training >> step=2981400, episode=498 reward=0.7471192 (520.43 it/sec) -training >> step=2981500, episode=498 reward=0.7661378 (543.22 it/sec) -training >> step=2981600, episode=498 reward=0.7523833 (534.86 it/sec) -training >> step=2981700, episode=498 reward=0.759333 (523.20 it/sec) -training >> step=2981800, episode=498 reward=0.7642882 (534.42 it/sec) -training >> step=2981900, episode=498 reward=0.7705948 (548.35 it/sec) -training >> step=2982000, episode=498 reward=0.7501363 (539.07 it/sec) -training >> step=2982100, episode=498 reward=0.7913253 (500.77 it/sec) -training >> step=2982200, episode=498 reward=0.785257 (522.95 it/sec) -training >> step=2982300, episode=498 reward=0.7647041 (553.32 it/sec) -training >> step=2982400, episode=498 reward=0.7704837 (539.13 it/sec) -training >> step=2982500, episode=498 reward=0.7657943 (484.21 it/sec) -training >> step=2982600, episode=498 reward=0.767935 (515.22 it/sec) -training >> step=2982700, episode=498 reward=0.7721389 (507.76 it/sec) -training >> step=2982800, episode=498 reward=0.7653797 (526.36 it/sec) -training >> step=2982900, episode=498 reward=0.774582 (581.21 it/sec) -training >> step=2983000, episode=498 reward=0.7618389 (495.28 it/sec) -training >> step=2983100, episode=498 reward=0.7866083 (513.78 it/sec) -training >> step=2983200, episode=498 reward=0.7678061 (471.50 it/sec) -training >> step=2983300, episode=498 reward=0.798904 (545.44 it/sec) -training >> step=2983400, episode=498 reward=0.7583401 (505.58 it/sec) -training >> step=2983500, episode=498 reward=0.7525355 (499.39 it/sec) -training >> step=2983600, episode=498 reward=0.7706344 (538.64 it/sec) -training >> step=2983700, episode=498 reward=0.7609134 (509.86 it/sec) -training >> step=2983800, episode=498 reward=0.756841 (520.55 it/sec) -training >> step=2983900, episode=498 reward=0.7744223 (499.50 it/sec) -training >> step=2984000, episode=498 reward=0.7661974 (531.00 it/sec) -training >> step=2984100, episode=498 reward=0.7822897 (455.11 it/sec) -training >> step=2984200, episode=498 reward=0.7887892 (471.48 it/sec) -training >> step=2984300, episode=498 reward=0.7975794 (457.67 it/sec) -training >> step=2984400, episode=498 reward=0.7709181 (533.21 it/sec) -training >> step=2984500, episode=498 reward=0.7628281 (378.22 it/sec) -training >> step=2984600, episode=498 reward=0.7758391 (515.17 it/sec) -training >> step=2984700, episode=498 reward=0.7757192 (507.02 it/sec) -training >> step=2984800, episode=498 reward=0.7636954 (514.10 it/sec) -training >> step=2984900, episode=498 reward=0.7845202 (518.03 it/sec) -training >> step=2985000, episode=498 reward=0.7784632 (533.49 it/sec) -training >> step=2985100, episode=498 reward=0.7721723 (547.88 it/sec) -training >> step=2985200, episode=498 reward=0.7850142 (515.48 it/sec) -training >> step=2985300, episode=498 reward=0.7690566 (439.14 it/sec) -training >> step=2985400, episode=498 reward=0.7671564 (524.33 it/sec) -training >> step=2985500, episode=498 reward=0.7523191 (499.34 it/sec) -training >> step=2985600, episode=498 reward=0.7773628 (505.63 it/sec) -training >> step=2985700, episode=498 reward=0.776421 (527.67 it/sec) -training >> step=2985800, episode=498 reward=0.7583733 (453.80 it/sec) -training >> step=2985900, episode=498 reward=0.7772742 (524.67 it/sec) -training >> step=2986000, episode=498 reward=0.7693825 (482.63 it/sec) -training >> step=2986100, episode=498 reward=0.763106 (499.31 it/sec) -training >> step=2986200, episode=498 reward=0.7705004 (516.25 it/sec) -training >> step=2986300, episode=498 reward=0.7577655 (439.37 it/sec) -training >> step=2986400, episode=498 reward=0.7727364 (463.91 it/sec) -training >> step=2986500, episode=498 reward=0.7619978 (458.25 it/sec) -training >> step=2986600, episode=498 reward=0.755823 (435.23 it/sec) -training >> step=2986700, episode=498 reward=0.7707834 (434.06 it/sec) -training >> step=2986800, episode=498 reward=0.7789963 (428.94 it/sec) -training >> step=2986900, episode=498 reward=0.7586561 (427.77 it/sec) -training >> step=2987000, episode=498 reward=0.7766383 (415.45 it/sec) -training >> step=2987100, episode=498 reward=0.7707996 (428.07 it/sec) -training >> step=2987200, episode=498 reward=0.7689668 (382.52 it/sec) -training >> step=2987300, episode=499 reward=0.7666072 (136.16 it/sec) -training >> step=2987400, episode=499 reward=0.7952147 (511.79 it/sec) -training >> step=2987500, episode=499 reward=0.7736129 (527.51 it/sec) -training >> step=2987600, episode=499 reward=0.7732989 (570.07 it/sec) -training >> step=2987700, episode=499 reward=0.7704639 (498.09 it/sec) -training >> step=2987800, episode=499 reward=0.7555779 (484.22 it/sec) -training >> step=2987900, episode=499 reward=0.7382128 (499.89 it/sec) -training >> step=2988000, episode=499 reward=0.7896992 (460.60 it/sec) -training >> step=2988100, episode=499 reward=0.7574139 (497.73 it/sec) -training >> step=2988200, episode=499 reward=0.7639903 (511.66 it/sec) -training >> step=2988300, episode=499 reward=0.7801149 (553.45 it/sec) -training >> step=2988400, episode=499 reward=0.7608923 (531.43 it/sec) -training >> step=2988500, episode=499 reward=0.7584504 (503.05 it/sec) -training >> step=2988600, episode=499 reward=0.7693035 (558.42 it/sec) -training >> step=2988700, episode=499 reward=0.7849525 (437.86 it/sec) -training >> step=2988800, episode=499 reward=0.7601089 (533.31 it/sec) -training >> step=2988900, episode=499 reward=0.7593682 (513.01 it/sec) -training >> step=2989000, episode=499 reward=0.751182 (529.36 it/sec) -training >> step=2989100, episode=499 reward=0.7621313 (520.31 it/sec) -training >> step=2989200, episode=499 reward=0.7764075 (516.42 it/sec) -training >> step=2989300, episode=499 reward=0.7842951 (539.79 it/sec) -training >> step=2989400, episode=499 reward=0.7582486 (533.89 it/sec) -training >> step=2989500, episode=499 reward=0.7659054 (475.40 it/sec) -training >> step=2989600, episode=499 reward=0.7729881 (547.05 it/sec) -training >> step=2989700, episode=499 reward=0.7802514 (553.17 it/sec) -training >> step=2989800, episode=499 reward=0.77486 (466.54 it/sec) -training >> step=2989900, episode=499 reward=0.7464941 (549.69 it/sec) -training >> step=2990000, episode=499 reward=0.7931335 (485.14 it/sec) -training >> step=2990100, episode=499 reward=0.7616943 (519.65 it/sec) -training >> step=2990200, episode=499 reward=0.7774209 (558.65 it/sec) -training >> step=2990300, episode=499 reward=0.7761833 (500.51 it/sec) -training >> step=2990400, episode=499 reward=0.7580734 (552.53 it/sec) -training >> step=2990500, episode=499 reward=0.7510473 (530.03 it/sec) -training >> step=2990600, episode=499 reward=0.771683 (461.75 it/sec) -training >> step=2990700, episode=499 reward=0.7668802 (513.35 it/sec) -training >> step=2990800, episode=499 reward=0.7524862 (440.62 it/sec) -training >> step=2990900, episode=499 reward=0.7756653 (544.40 it/sec) -training >> step=2991000, episode=499 reward=0.7544253 (510.70 it/sec) -training >> step=2991100, episode=499 reward=0.7653323 (455.23 it/sec) -training >> step=2991200, episode=499 reward=0.7656095 (551.32 it/sec) -training >> step=2991300, episode=499 reward=0.7478891 (513.60 it/sec) -training >> step=2991400, episode=499 reward=0.7825029 (530.15 it/sec) -training >> step=2991500, episode=499 reward=0.7911025 (579.77 it/sec) -training >> step=2991600, episode=499 reward=0.7767388 (484.06 it/sec) -training >> step=2991700, episode=499 reward=0.7389421 (535.90 it/sec) -training >> step=2991800, episode=499 reward=0.7588646 (545.69 it/sec) -training >> step=2991900, episode=499 reward=0.7896739 (513.51 it/sec) -training >> step=2992000, episode=499 reward=0.7542084 (532.43 it/sec) -training >> step=2992100, episode=499 reward=0.7482284 (521.05 it/sec) -training >> step=2992200, episode=499 reward=0.7754232 (493.12 it/sec) -training >> step=2992300, episode=499 reward=0.7674788 (540.73 it/sec) -training >> step=2992400, episode=499 reward=0.7667023 (538.60 it/sec) -training >> step=2992500, episode=499 reward=0.7946468 (540.60 it/sec) -training >> step=2992600, episode=499 reward=0.7827235 (584.85 it/sec) -training >> step=2992700, episode=499 reward=0.7818651 (497.77 it/sec) -training >> step=2992800, episode=499 reward=0.7683516 (559.33 it/sec) -training >> step=2992900, episode=499 reward=0.7620922 (566.58 it/sec) -training >> step=2993000, episode=499 reward=0.7400296 (540.72 it/sec) -training >> step=2993100, episode=499 reward=0.7727887 (578.15 it/sec) -training >> step=2993200, episode=499 reward=0.761493 (549.36 it/sec) -training >> step=2993300, episode=500 reward=0.7790931 (124.00 it/sec) -training >> step=2993400, episode=500 reward=0.7848995 (495.40 it/sec) -training >> step=2993500, episode=500 reward=0.7487952 (478.22 it/sec) -training >> step=2993600, episode=500 reward=0.7846973 (511.87 it/sec) -training >> step=2993700, episode=500 reward=0.7782546 (506.82 it/sec) -training >> step=2993800, episode=500 reward=0.7466753 (564.01 it/sec) -training >> step=2993900, episode=500 reward=0.7498484 (525.21 it/sec) -training >> step=2994000, episode=500 reward=0.7662079 (518.05 it/sec) -training >> step=2994100, episode=500 reward=0.7694772 (550.94 it/sec) -training >> step=2994200, episode=500 reward=0.8028265 (548.83 it/sec) -training >> step=2994300, episode=500 reward=0.7737454 (459.16 it/sec) -training >> step=2994400, episode=500 reward=0.7657943 (552.50 it/sec) -training >> step=2994500, episode=500 reward=0.7714344 (530.04 it/sec) -training >> step=2994600, episode=500 reward=0.7724597 (498.46 it/sec) -training >> step=2994700, episode=500 reward=0.7811065 (513.02 it/sec) -training >> step=2994800, episode=500 reward=0.7768685 (550.33 it/sec) -training >> step=2994900, episode=500 reward=0.7453934 (534.42 it/sec) -training >> step=2995000, episode=500 reward=0.7833182 (517.68 it/sec) -training >> step=2995100, episode=500 reward=0.7752547 (500.12 it/sec) -training >> step=2995200, episode=500 reward=0.7464973 (512.77 it/sec) -training >> step=2995300, episode=500 reward=0.7656447 (522.68 it/sec) -training >> step=2995400, episode=500 reward=0.7604656 (556.35 it/sec) -training >> step=2995500, episode=500 reward=0.7793747 (523.13 it/sec) -training >> step=2995600, episode=500 reward=0.7774532 (467.61 it/sec) -training >> step=2995700, episode=500 reward=0.7809812 (464.92 it/sec) -training >> step=2995800, episode=500 reward=0.7759189 (533.95 it/sec) -training >> step=2995900, episode=500 reward=0.7646563 (541.74 it/sec) -training >> step=2996000, episode=500 reward=0.7921726 (550.40 it/sec) -training >> step=2996100, episode=500 reward=0.7592942 (491.99 it/sec) -training >> step=2996200, episode=500 reward=0.7749504 (470.99 it/sec) -training >> step=2996300, episode=500 reward=0.7565529 (442.37 it/sec) -training >> step=2996400, episode=500 reward=0.7829992 (548.87 it/sec) -training >> step=2996500, episode=500 reward=0.7594884 (521.51 it/sec) -training >> step=2996600, episode=500 reward=0.758794 (532.10 it/sec) -training >> step=2996700, episode=500 reward=0.7696383 (485.86 it/sec) -training >> step=2996800, episode=500 reward=0.78471 (501.09 it/sec) -training >> step=2996900, episode=500 reward=0.7709116 (387.52 it/sec) -training >> step=2997000, episode=500 reward=0.7678939 (525.95 it/sec) -training >> step=2997100, episode=500 reward=0.7558104 (498.73 it/sec) -training >> step=2997200, episode=500 reward=0.785522 (515.21 it/sec) -training >> step=2997300, episode=500 reward=0.7727947 (529.53 it/sec) -training >> step=2997400, episode=500 reward=0.7638945 (529.17 it/sec) -training >> step=2997500, episode=500 reward=0.7753577 (533.97 it/sec) -training >> step=2997600, episode=500 reward=0.7871596 (521.23 it/sec) -training >> step=2997700, episode=500 reward=0.7695259 (522.83 it/sec) -training >> step=2997800, episode=500 reward=0.7754691 (523.69 it/sec) -training >> step=2997900, episode=500 reward=0.7735909 (519.28 it/sec) -training >> step=2998000, episode=500 reward=0.7776859 (477.63 it/sec) -training >> step=2998100, episode=500 reward=0.742884 (480.94 it/sec) -training >> step=2998200, episode=500 reward=0.7650772 (472.78 it/sec) -training >> step=2998300, episode=500 reward=0.7524582 (465.44 it/sec) -training >> step=2998400, episode=500 reward=0.7738588 (452.53 it/sec) -training >> step=2998500, episode=500 reward=0.7636072 (468.51 it/sec) -training >> step=2998600, episode=500 reward=0.7545108 (504.21 it/sec) -training >> step=2998700, episode=500 reward=0.764035 (427.72 it/sec) -training >> step=2998800, episode=500 reward=0.7626747 (428.32 it/sec) -training >> step=2998900, episode=500 reward=0.7567468 (517.38 it/sec) -training >> step=2999000, episode=500 reward=0.7555857 (499.80 it/sec) -training >> step=2999100, episode=500 reward=0.758665 (466.16 it/sec) -training >> step=2999200, episode=500 reward=0.7502583 (462.72 it/sec) -training >> step=2999300, episode=501 reward=0.747955 (92.35 it/sec) -training >> step=2999400, episode=501 reward=0.7553977 (458.66 it/sec) -training >> step=2999500, episode=501 reward=0.7576022 (455.67 it/sec) -training >> step=2999600, episode=501 reward=0.7523596 (420.21 it/sec) -training >> step=2999700, episode=501 reward=0.7879736 (457.44 it/sec) -training >> step=2999800, episode=501 reward=0.7616023 (434.98 it/sec) -training >> step=2999900, episode=501 reward=0.7627714 (496.47 it/sec) -training >> step=3000000, episode=501 reward=0.780333 (535.15 it/sec) -training >> step=3000100, episode=501 reward=0.7797725 (488.66 it/sec) -training >> step=3000200, episode=501 reward=0.7499123 (505.84 it/sec) -training >> step=3000300, episode=501 reward=0.7882765 (524.33 it/sec) -training >> step=3000400, episode=501 reward=0.7679937 (485.96 it/sec) -training >> step=3000500, episode=501 reward=0.7697129 (465.09 it/sec) -training >> step=3000600, episode=501 reward=0.7626866 (515.75 it/sec) -training >> step=3000700, episode=501 reward=0.7974065 (492.76 it/sec) -training >> step=3000800, episode=501 reward=0.8003796 (452.29 it/sec) -training >> step=3000900, episode=501 reward=0.789112 (468.43 it/sec) -training >> step=3001000, episode=501 reward=0.7640264 (518.46 it/sec) -training >> step=3001100, episode=501 reward=0.7616644 (516.93 it/sec) -training >> step=3001200, episode=501 reward=0.767482 (498.69 it/sec) -training >> step=3001300, episode=501 reward=0.7743561 (503.28 it/sec) -training >> step=3001400, episode=501 reward=0.7620083 (507.42 it/sec) -training >> step=3001500, episode=501 reward=0.7624195 (508.60 it/sec) -training >> step=3001600, episode=501 reward=0.7766584 (531.92 it/sec) -training >> step=3001700, episode=501 reward=0.761683 (509.99 it/sec) -training >> step=3001800, episode=501 reward=0.7640666 (550.04 it/sec) -training >> step=3001900, episode=501 reward=0.7715393 (533.60 it/sec) -training >> step=3002000, episode=501 reward=0.7832713 (555.53 it/sec) -training >> step=3002100, episode=501 reward=0.7820054 (534.90 it/sec) -training >> step=3002200, episode=501 reward=0.7669588 (528.23 it/sec) -training >> step=3002300, episode=501 reward=0.7771965 (513.10 it/sec) -training >> step=3002400, episode=501 reward=0.7731187 (513.12 it/sec) -training >> step=3002500, episode=501 reward=0.7792903 (524.00 it/sec) -training >> step=3002600, episode=501 reward=0.7734478 (501.77 it/sec) -training >> step=3002700, episode=501 reward=0.767612 (480.78 it/sec) -training >> step=3002800, episode=501 reward=0.7748388 (480.66 it/sec) -training >> step=3002900, episode=501 reward=0.7707131 (549.85 it/sec) -training >> step=3003000, episode=501 reward=0.7631634 (542.24 it/sec) -training >> step=3003100, episode=501 reward=0.7662129 (524.63 it/sec) -training >> step=3003200, episode=501 reward=0.7710982 (384.00 it/sec) -training >> step=3003300, episode=501 reward=0.7341612 (533.75 it/sec) -training >> step=3003400, episode=501 reward=0.7739111 (518.14 it/sec) -training >> step=3003500, episode=501 reward=0.7592044 (566.40 it/sec) -training >> step=3003600, episode=501 reward=0.7636413 (554.45 it/sec) -training >> step=3003700, episode=501 reward=0.7783247 (516.02 it/sec) -training >> step=3003800, episode=501 reward=0.7697932 (507.48 it/sec) -training >> step=3003900, episode=501 reward=0.7655848 (481.85 it/sec) -training >> step=3004000, episode=501 reward=0.7658525 (509.54 it/sec) -training >> step=3004100, episode=501 reward=0.7770617 (491.78 it/sec) -training >> step=3004200, episode=501 reward=0.7816983 (514.39 it/sec) -training >> step=3004300, episode=501 reward=0.7527581 (493.30 it/sec) -training >> step=3004400, episode=501 reward=0.7592236 (521.61 it/sec) -training >> step=3004500, episode=501 reward=0.7616864 (510.00 it/sec) -training >> step=3004600, episode=501 reward=0.7585921 (545.87 it/sec) -training >> step=3004700, episode=501 reward=0.7568852 (529.71 it/sec) -training >> step=3004800, episode=501 reward=0.7628649 (499.79 it/sec) -training >> step=3004900, episode=501 reward=0.7690381 (535.03 it/sec) -training >> step=3005000, episode=501 reward=0.7462036 (463.08 it/sec) -training >> step=3005100, episode=501 reward=0.7396553 (535.81 it/sec) -training >> step=3005200, episode=501 reward=0.7721847 (520.01 it/sec) -training >> step=3005300, episode=502 reward=0.7489882 (95.97 it/sec) -training >> step=3005400, episode=502 reward=0.7806924 (455.54 it/sec) -training >> step=3005500, episode=502 reward=0.7653749 (514.39 it/sec) -training >> step=3005600, episode=502 reward=0.7565638 (545.58 it/sec) -training >> step=3005700, episode=502 reward=0.7670719 (525.51 it/sec) -training >> step=3005800, episode=502 reward=0.7641737 (552.79 it/sec) -training >> step=3005900, episode=502 reward=0.7673765 (534.15 it/sec) -training >> step=3006000, episode=502 reward=0.7710402 (520.21 it/sec) -training >> step=3006100, episode=502 reward=0.7701727 (520.15 it/sec) -training >> step=3006200, episode=502 reward=0.7831048 (541.56 it/sec) -training >> step=3006300, episode=502 reward=0.764703 (559.15 it/sec) -training >> step=3006400, episode=502 reward=0.7588143 (529.99 it/sec) -training >> step=3006500, episode=502 reward=0.7639914 (505.61 it/sec) -training >> step=3006600, episode=502 reward=0.7646112 (556.22 it/sec) -training >> step=3006700, episode=502 reward=0.7692899 (529.90 it/sec) -training >> step=3006800, episode=502 reward=0.754575 (532.19 it/sec) -training >> step=3006900, episode=502 reward=0.7778358 (551.51 it/sec) -training >> step=3007000, episode=502 reward=0.7765443 (561.43 it/sec) -training >> step=3007100, episode=502 reward=0.7688152 (539.40 it/sec) -training >> step=3007200, episode=502 reward=0.767056 (516.44 it/sec) -training >> step=3007300, episode=502 reward=0.75893 (575.18 it/sec) -training >> step=3007400, episode=502 reward=0.7667356 (550.11 it/sec) -training >> step=3007500, episode=502 reward=0.7849584 (548.32 it/sec) -training >> step=3007600, episode=502 reward=0.7606413 (517.32 it/sec) -training >> step=3007700, episode=502 reward=0.7614725 (527.72 it/sec) -training >> step=3007800, episode=502 reward=0.7776073 (512.47 it/sec) -training >> step=3007900, episode=502 reward=0.773279 (517.77 it/sec) -training >> step=3008000, episode=502 reward=0.7839401 (501.88 it/sec) -training >> step=3008100, episode=502 reward=0.7672141 (550.16 it/sec) -training >> step=3008200, episode=502 reward=0.7645917 (528.69 it/sec) -training >> step=3008300, episode=502 reward=0.7529922 (518.74 it/sec) -training >> step=3008400, episode=502 reward=0.7861716 (556.58 it/sec) -training >> step=3008500, episode=502 reward=0.7730564 (529.30 it/sec) -training >> step=3008600, episode=502 reward=0.7841039 (537.26 it/sec) -training >> step=3008700, episode=502 reward=0.7554768 (516.81 it/sec) -training >> step=3008800, episode=502 reward=0.7710862 (532.34 it/sec) -training >> step=3008900, episode=502 reward=0.7667328 (546.02 it/sec) -training >> step=3009000, episode=502 reward=0.7739923 (515.99 it/sec) -training >> step=3009100, episode=502 reward=0.7726771 (511.35 it/sec) -training >> step=3009200, episode=502 reward=0.7463084 (394.54 it/sec) -training >> step=3009300, episode=502 reward=0.7715175 (513.92 it/sec) -training >> step=3009400, episode=502 reward=0.7510594 (503.55 it/sec) -training >> step=3009500, episode=502 reward=0.7646748 (557.06 it/sec) -training >> step=3009600, episode=502 reward=0.7635175 (512.16 it/sec) -training >> step=3009700, episode=502 reward=0.7627176 (526.97 it/sec) -training >> step=3009800, episode=502 reward=0.7483465 (476.33 it/sec) -training >> step=3009900, episode=502 reward=0.7590008 (470.09 it/sec) -training >> step=3010000, episode=502 reward=0.7589524 (411.75 it/sec) -training >> step=3010100, episode=502 reward=0.7709926 (430.02 it/sec) -training >> step=3010200, episode=502 reward=0.7851093 (436.62 it/sec) -training >> step=3010300, episode=502 reward=0.7706866 (391.28 it/sec) -training >> step=3010400, episode=502 reward=0.7582606 (514.24 it/sec) -training >> step=3010500, episode=502 reward=0.7630752 (539.18 it/sec) -training >> step=3010600, episode=502 reward=0.7607496 (569.98 it/sec) -training >> step=3010700, episode=502 reward=0.7425481 (573.28 it/sec) -training >> step=3010800, episode=502 reward=0.7266498 (529.34 it/sec) -training >> step=3010900, episode=502 reward=0.7679545 (562.02 it/sec) -training >> step=3011000, episode=502 reward=0.7793362 (559.88 it/sec) -training >> step=3011100, episode=502 reward=0.7567742 (484.94 it/sec) -training >> step=3011200, episode=502 reward=0.7757254 (522.43 it/sec) -training >> step=3011300, episode=503 reward=0.7626649 (121.09 it/sec) -training >> step=3011400, episode=503 reward=0.7808807 (447.11 it/sec) -training >> step=3011500, episode=503 reward=0.7749415 (491.25 it/sec) -training >> step=3011600, episode=503 reward=0.7473569 (463.31 it/sec) -training >> step=3011700, episode=503 reward=0.7683821 (542.28 it/sec) -training >> step=3011800, episode=503 reward=0.7612348 (470.70 it/sec) -training >> step=3011900, episode=503 reward=0.7709483 (467.37 it/sec) -training >> step=3012000, episode=503 reward=0.7727001 (534.75 it/sec) -training >> step=3012100, episode=503 reward=0.7714093 (527.54 it/sec) -training >> step=3012200, episode=503 reward=0.7885701 (553.02 it/sec) -training >> step=3012300, episode=503 reward=0.7767164 (526.55 it/sec) -training >> step=3012400, episode=503 reward=0.7467764 (551.12 it/sec) -training >> step=3012500, episode=503 reward=0.7761575 (530.48 it/sec) -training >> step=3012600, episode=503 reward=0.7841375 (482.28 it/sec) -training >> step=3012700, episode=503 reward=0.7464876 (478.70 it/sec) -training >> step=3012800, episode=503 reward=0.7771668 (533.08 it/sec) -training >> step=3012900, episode=503 reward=0.7775543 (545.94 it/sec) -training >> step=3013000, episode=503 reward=0.7713804 (541.61 it/sec) -training >> step=3013100, episode=503 reward=0.775673 (518.21 it/sec) -training >> step=3013200, episode=503 reward=0.7920789 (521.49 it/sec) -training >> step=3013300, episode=503 reward=0.7740617 (523.09 it/sec) -training >> step=3013400, episode=503 reward=0.7743666 (537.28 it/sec) -training >> step=3013500, episode=503 reward=0.7813696 (538.36 it/sec) -training >> step=3013600, episode=503 reward=0.7746826 (482.84 it/sec) -training >> step=3013700, episode=503 reward=0.7448186 (516.97 it/sec) -training >> step=3013800, episode=503 reward=0.7602018 (513.32 it/sec) -training >> step=3013900, episode=503 reward=0.7524016 (549.67 it/sec) -training >> step=3014000, episode=503 reward=0.7888678 (536.13 it/sec) -training >> step=3014100, episode=503 reward=0.7705328 (524.87 it/sec) -training >> step=3014200, episode=503 reward=0.7862532 (553.04 it/sec) -training >> step=3014300, episode=503 reward=0.801681 (507.63 it/sec) -training >> step=3014400, episode=503 reward=0.7613891 (559.33 it/sec) -training >> step=3014500, episode=503 reward=0.7595605 (533.85 it/sec) -training >> step=3014600, episode=503 reward=0.7681894 (547.75 it/sec) -training >> step=3014700, episode=503 reward=0.7766833 (507.48 it/sec) -training >> step=3014800, episode=503 reward=0.77367 (496.40 it/sec) -training >> step=3014900, episode=503 reward=0.8095542 (493.07 it/sec) -training >> step=3015000, episode=503 reward=0.7753084 (570.61 it/sec) -training >> step=3015100, episode=503 reward=0.785139 (553.31 it/sec) -training >> step=3015200, episode=503 reward=0.785997 (538.20 it/sec) -training >> step=3015300, episode=503 reward=0.7611546 (406.75 it/sec) -training >> step=3015400, episode=503 reward=0.7703411 (530.62 it/sec) -training >> step=3015500, episode=503 reward=0.7722827 (524.87 it/sec) -training >> step=3015600, episode=503 reward=0.7475364 (498.46 it/sec) -training >> step=3015700, episode=503 reward=0.7555858 (522.27 it/sec) -training >> step=3015800, episode=503 reward=0.7556226 (547.85 it/sec) -training >> step=3015900, episode=503 reward=0.7574973 (490.74 it/sec) -training >> step=3016000, episode=503 reward=0.7530319 (554.87 it/sec) -training >> step=3016100, episode=503 reward=0.7810043 (539.42 it/sec) -training >> step=3016200, episode=503 reward=0.7408062 (519.33 it/sec) -training >> step=3016300, episode=503 reward=0.7321169 (535.81 it/sec) -training >> step=3016400, episode=503 reward=0.788197 (542.19 it/sec) -training >> step=3016500, episode=503 reward=0.7592014 (532.15 it/sec) -training >> step=3016600, episode=503 reward=0.76957 (554.32 it/sec) -training >> step=3016700, episode=503 reward=0.7564761 (530.92 it/sec) -training >> step=3016800, episode=503 reward=0.7602183 (546.10 it/sec) -training >> step=3016900, episode=503 reward=0.7526203 (546.76 it/sec) -training >> step=3017000, episode=503 reward=0.7476916 (558.29 it/sec) -training >> step=3017100, episode=503 reward=0.7711806 (528.14 it/sec) -training >> step=3017200, episode=503 reward=0.7294859 (537.64 it/sec) -training >> step=3017300, episode=504 reward=0.7516266 (49.49 it/sec) -training >> step=3017400, episode=504 reward=0.7731421 (483.84 it/sec) -training >> step=3017500, episode=504 reward=0.7761657 (540.46 it/sec) -training >> step=3017600, episode=504 reward=0.7925832 (524.57 it/sec) -training >> step=3017700, episode=504 reward=0.7434713 (520.06 it/sec) -training >> step=3017800, episode=504 reward=0.7584746 (538.64 it/sec) -training >> step=3017900, episode=504 reward=0.776477 (529.11 it/sec) -training >> step=3018000, episode=504 reward=0.7789891 (547.06 it/sec) -training >> step=3018100, episode=504 reward=0.7693998 (492.99 it/sec) -training >> step=3018200, episode=504 reward=0.76904 (481.78 it/sec) -training >> step=3018300, episode=504 reward=0.7725984 (517.49 it/sec) -training >> step=3018400, episode=504 reward=0.7726926 (474.15 it/sec) -training >> step=3018500, episode=504 reward=0.7788487 (531.70 it/sec) -training >> step=3018600, episode=504 reward=0.7473526 (510.94 it/sec) -training >> step=3018700, episode=504 reward=0.7726197 (450.19 it/sec) -training >> step=3018800, episode=504 reward=0.7488921 (499.04 it/sec) -training >> step=3018900, episode=504 reward=0.7491578 (497.83 it/sec) -training >> step=3019000, episode=504 reward=0.7608716 (470.05 it/sec) -training >> step=3019100, episode=504 reward=0.7728996 (449.76 it/sec) -training >> step=3019200, episode=504 reward=0.7726604 (481.39 it/sec) -training >> step=3019300, episode=504 reward=0.7652237 (517.12 it/sec) -training >> step=3019400, episode=504 reward=0.7662752 (487.21 it/sec) -training >> step=3019500, episode=504 reward=0.7775099 (520.85 it/sec) -training >> step=3019600, episode=504 reward=0.7910685 (464.20 it/sec) -training >> step=3019700, episode=504 reward=0.7650148 (438.18 it/sec) -training >> step=3019800, episode=504 reward=0.7735054 (441.19 it/sec) -training >> step=3019900, episode=504 reward=0.7602164 (486.06 it/sec) -training >> step=3020000, episode=504 reward=0.7829094 (554.81 it/sec) -training >> step=3020100, episode=504 reward=0.7602547 (437.55 it/sec) -training >> step=3020200, episode=504 reward=0.7604875 (515.97 it/sec) -training >> step=3020300, episode=504 reward=0.7693941 (463.29 it/sec) -training >> step=3020400, episode=504 reward=0.7770963 (539.41 it/sec) -training >> step=3020500, episode=504 reward=0.7586077 (476.89 it/sec) -training >> step=3020600, episode=504 reward=0.7544174 (507.83 it/sec) -training >> step=3020700, episode=504 reward=0.7531449 (524.64 it/sec) -training >> step=3020800, episode=504 reward=0.7713735 (453.99 it/sec) -training >> step=3020900, episode=504 reward=0.7801738 (529.87 it/sec) -training >> step=3021000, episode=504 reward=0.7513104 (522.48 it/sec) -training >> step=3021100, episode=504 reward=0.7759353 (459.02 it/sec) -training >> step=3021200, episode=504 reward=0.7784665 (486.12 it/sec) -training >> step=3021300, episode=504 reward=0.7677355 (534.94 it/sec) -training >> step=3021400, episode=504 reward=0.7587186 (500.99 it/sec) -training >> step=3021500, episode=504 reward=0.7650335 (389.78 it/sec) -training >> step=3021600, episode=504 reward=0.7677844 (524.76 it/sec) -training >> step=3021700, episode=504 reward=0.7662003 (539.78 it/sec) -training >> step=3021800, episode=504 reward=0.7606848 (567.77 it/sec) -training >> step=3021900, episode=504 reward=0.7384774 (512.46 it/sec) -training >> step=3022000, episode=504 reward=0.7640979 (482.62 it/sec) -training >> step=3022100, episode=504 reward=0.7734147 (500.18 it/sec) -training >> step=3022200, episode=504 reward=0.7593224 (525.45 it/sec) -training >> step=3022300, episode=504 reward=0.780778 (526.56 it/sec) -training >> step=3022400, episode=504 reward=0.7581093 (551.09 it/sec) -training >> step=3022500, episode=504 reward=0.7809209 (500.87 it/sec) -training >> step=3022600, episode=504 reward=0.7682314 (549.54 it/sec) -training >> step=3022700, episode=504 reward=0.7593824 (512.69 it/sec) -training >> step=3022800, episode=504 reward=0.7572545 (529.07 it/sec) -training >> step=3022900, episode=504 reward=0.7532764 (563.60 it/sec) -training >> step=3023000, episode=504 reward=0.7594489 (514.83 it/sec) -training >> step=3023100, episode=504 reward=0.765323 (535.47 it/sec) -training >> step=3023200, episode=504 reward=0.7710138 (526.36 it/sec) -training >> step=3023300, episode=505 reward=0.7655821 (113.16 it/sec) -training >> step=3023400, episode=505 reward=0.7543437 (529.95 it/sec) -training >> step=3023500, episode=505 reward=0.7598264 (480.23 it/sec) -training >> step=3023600, episode=505 reward=0.777336 (467.52 it/sec) -training >> step=3023700, episode=505 reward=0.7449337 (440.48 it/sec) -training >> step=3023800, episode=505 reward=0.764517 (430.00 it/sec) -training >> step=3023900, episode=505 reward=0.7754075 (378.97 it/sec) -training >> step=3024000, episode=505 reward=0.7848394 (523.78 it/sec) -training >> step=3024100, episode=505 reward=0.7508172 (481.83 it/sec) -training >> step=3024200, episode=505 reward=0.7881815 (516.05 it/sec) -training >> step=3024300, episode=505 reward=0.7796999 (519.26 it/sec) -training >> step=3024400, episode=505 reward=0.7668027 (518.08 it/sec) -training >> step=3024500, episode=505 reward=0.7797552 (471.15 it/sec) -training >> step=3024600, episode=505 reward=0.7694323 (540.20 it/sec) -training >> step=3024700, episode=505 reward=0.7612406 (556.09 it/sec) -training >> step=3024800, episode=505 reward=0.7682034 (536.00 it/sec) -training >> step=3024900, episode=505 reward=0.7703725 (517.22 it/sec) -training >> step=3025000, episode=505 reward=0.7640619 (538.42 it/sec) -training >> step=3025100, episode=505 reward=0.7658286 (529.69 it/sec) -training >> step=3025200, episode=505 reward=0.7745818 (523.57 it/sec) -training >> step=3025300, episode=505 reward=0.7664324 (532.01 it/sec) -training >> step=3025400, episode=505 reward=0.7930403 (528.68 it/sec) -training >> step=3025500, episode=505 reward=0.7680621 (477.40 it/sec) -training >> step=3025600, episode=505 reward=0.7608278 (533.95 it/sec) -training >> step=3025700, episode=505 reward=0.7609401 (548.05 it/sec) -training >> step=3025800, episode=505 reward=0.7556748 (541.09 it/sec) -training >> step=3025900, episode=505 reward=0.7624813 (514.70 it/sec) -training >> step=3026000, episode=505 reward=0.7571425 (515.08 it/sec) -training >> step=3026100, episode=505 reward=0.7719936 (529.94 it/sec) -training >> step=3026200, episode=505 reward=0.7725652 (539.19 it/sec) -training >> step=3026300, episode=505 reward=0.7734783 (530.15 it/sec) -training >> step=3026400, episode=505 reward=0.7760788 (537.91 it/sec) -training >> step=3026500, episode=505 reward=0.7718253 (535.40 it/sec) -training >> step=3026600, episode=505 reward=0.7471057 (486.09 it/sec) -training >> step=3026700, episode=505 reward=0.7793593 (490.01 it/sec) -training >> step=3026800, episode=505 reward=0.7759222 (525.56 it/sec) -training >> step=3026900, episode=505 reward=0.7537845 (516.31 it/sec) -training >> step=3027000, episode=505 reward=0.7582315 (509.54 it/sec) -training >> step=3027100, episode=505 reward=0.7580183 (511.13 it/sec) -training >> step=3027200, episode=505 reward=0.7635081 (564.51 it/sec) -training >> step=3027300, episode=505 reward=0.7748633 (539.40 it/sec) -training >> step=3027400, episode=505 reward=0.7733485 (523.64 it/sec) -training >> step=3027500, episode=505 reward=0.7857417 (523.44 it/sec) -training >> step=3027600, episode=505 reward=0.7596289 (424.81 it/sec) -training >> step=3027700, episode=505 reward=0.7604702 (513.53 it/sec) -training >> step=3027800, episode=505 reward=0.781706 (496.39 it/sec) -training >> step=3027900, episode=505 reward=0.7747616 (533.74 it/sec) -training >> step=3028000, episode=505 reward=0.7814851 (544.71 it/sec) -training >> step=3028100, episode=505 reward=0.7643453 (506.81 it/sec) -training >> step=3028200, episode=505 reward=0.7601985 (495.67 it/sec) -training >> step=3028300, episode=505 reward=0.7590055 (537.55 it/sec) -training >> step=3028400, episode=505 reward=0.7507883 (516.45 it/sec) -training >> step=3028500, episode=505 reward=0.7721988 (480.34 it/sec) -training >> step=3028600, episode=505 reward=0.7755732 (533.00 it/sec) -training >> step=3028700, episode=505 reward=0.770528 (487.80 it/sec) -training >> step=3028800, episode=505 reward=0.7380068 (540.63 it/sec) -training >> step=3028900, episode=505 reward=0.7461337 (517.57 it/sec) -training >> step=3029000, episode=505 reward=0.7727151 (546.76 it/sec) -training >> step=3029100, episode=505 reward=0.750101 (507.08 it/sec) -training >> step=3029200, episode=505 reward=0.7691584 (511.24 it/sec) -training >> step=3029300, episode=506 reward=0.7780123 (69.17 it/sec) -training >> step=3029400, episode=506 reward=0.7806903 (492.07 it/sec) -training >> step=3029500, episode=506 reward=0.7516564 (505.89 it/sec) -training >> step=3029600, episode=506 reward=0.7601461 (533.91 it/sec) -training >> step=3029700, episode=506 reward=0.7603635 (528.75 it/sec) -training >> step=3029800, episode=506 reward=0.777113 (537.29 it/sec) -training >> step=3029900, episode=506 reward=0.7798977 (548.72 it/sec) -training >> step=3030000, episode=506 reward=0.7815823 (556.66 it/sec) -training >> step=3030100, episode=506 reward=0.768568 (542.62 it/sec) -training >> step=3030200, episode=506 reward=0.7556595 (573.98 it/sec) -training >> step=3030300, episode=506 reward=0.7543514 (536.99 it/sec) -training >> step=3030400, episode=506 reward=0.7501709 (530.21 it/sec) -training >> step=3030500, episode=506 reward=0.7730473 (523.86 it/sec) -training >> step=3030600, episode=506 reward=0.7590297 (583.39 it/sec) -training >> step=3030700, episode=506 reward=0.7709209 (510.69 it/sec) -training >> step=3030800, episode=506 reward=0.7668827 (519.30 it/sec) -training >> step=3030900, episode=506 reward=0.7817052 (521.23 it/sec) -training >> step=3031000, episode=506 reward=0.7587112 (499.26 it/sec) -training >> step=3031100, episode=506 reward=0.7504401 (536.81 it/sec) -training >> step=3031200, episode=506 reward=0.7665147 (543.86 it/sec) -training >> step=3031300, episode=506 reward=0.7700778 (565.51 it/sec) -training >> step=3031400, episode=506 reward=0.7741786 (497.78 it/sec) -training >> step=3031500, episode=506 reward=0.781981 (505.12 it/sec) -training >> step=3031600, episode=506 reward=0.7911597 (560.20 it/sec) -training >> step=3031700, episode=506 reward=0.7609491 (560.72 it/sec) -training >> step=3031800, episode=506 reward=0.7787231 (537.30 it/sec) -training >> step=3031900, episode=506 reward=0.7754208 (509.40 it/sec) -training >> step=3032000, episode=506 reward=0.7746361 (524.82 it/sec) -training >> step=3032100, episode=506 reward=0.7649838 (518.81 it/sec) -training >> step=3032200, episode=506 reward=0.7737991 (541.76 it/sec) -training >> step=3032300, episode=506 reward=0.7548513 (530.75 it/sec) -training >> step=3032400, episode=506 reward=0.7815484 (542.87 it/sec) -training >> step=3032500, episode=506 reward=0.7688655 (547.77 it/sec) -training >> step=3032600, episode=506 reward=0.7807294 (517.14 it/sec) -training >> step=3032700, episode=506 reward=0.7703334 (545.78 it/sec) -training >> step=3032800, episode=506 reward=0.7769074 (545.20 it/sec) -training >> step=3032900, episode=506 reward=0.7781669 (519.92 it/sec) -training >> step=3033000, episode=506 reward=0.7727333 (538.22 it/sec) -training >> step=3033100, episode=506 reward=0.765888 (533.41 it/sec) -training >> step=3033200, episode=506 reward=0.7718582 (520.92 it/sec) -training >> step=3033300, episode=506 reward=0.7812034 (538.44 it/sec) -training >> step=3033400, episode=506 reward=0.7603842 (555.51 it/sec) -training >> step=3033500, episode=506 reward=0.7753314 (564.45 it/sec) -training >> step=3033600, episode=506 reward=0.7774141 (540.87 it/sec) -training >> step=3033700, episode=506 reward=0.7928728 (504.66 it/sec) -training >> step=3033800, episode=506 reward=0.788078 (550.95 it/sec) -training >> step=3033900, episode=506 reward=0.7672766 (559.70 it/sec) -training >> step=3034000, episode=506 reward=0.8005015 (429.17 it/sec) -training >> step=3034100, episode=506 reward=0.7505462 (560.00 it/sec) -training >> step=3034200, episode=506 reward=0.761765 (536.43 it/sec) -training >> step=3034300, episode=506 reward=0.7649863 (544.47 it/sec) -training >> step=3034400, episode=506 reward=0.7506682 (548.49 it/sec) -training >> step=3034500, episode=506 reward=0.7691638 (551.84 it/sec) -training >> step=3034600, episode=506 reward=0.7616652 (507.55 it/sec) -training >> step=3034700, episode=506 reward=0.7669988 (506.93 it/sec) -training >> step=3034800, episode=506 reward=0.7532599 (553.63 it/sec) -training >> step=3034900, episode=506 reward=0.7607871 (557.55 it/sec) -training >> step=3035000, episode=506 reward=0.7408936 (533.80 it/sec) -training >> step=3035100, episode=506 reward=0.7676811 (523.34 it/sec) -training >> step=3035200, episode=506 reward=0.7822283 (529.95 it/sec) -training >> step=3035300, episode=507 reward=0.7672529 (68.11 it/sec) -training >> step=3035400, episode=507 reward=0.7665976 (534.58 it/sec) -training >> step=3035500, episode=507 reward=0.7745606 (557.45 it/sec) -training >> step=3035600, episode=507 reward=0.7479768 (521.72 it/sec) -training >> step=3035700, episode=507 reward=0.7697075 (541.43 it/sec) -training >> step=3035800, episode=507 reward=0.7469312 (532.48 it/sec) -training >> step=3035900, episode=507 reward=0.7894394 (519.68 it/sec) -training >> step=3036000, episode=507 reward=0.7790421 (532.67 it/sec) -training >> step=3036100, episode=507 reward=0.7583981 (520.97 it/sec) -training >> step=3036200, episode=507 reward=0.7507915 (524.62 it/sec) -training >> step=3036300, episode=507 reward=0.7779527 (554.19 it/sec) -training >> step=3036400, episode=507 reward=0.7818074 (548.25 it/sec) -training >> step=3036500, episode=507 reward=0.749554 (526.39 it/sec) -training >> step=3036600, episode=507 reward=0.7407783 (520.71 it/sec) -training >> step=3036700, episode=507 reward=0.7622017 (559.63 it/sec) -training >> step=3036800, episode=507 reward=0.760721 (543.29 it/sec) -training >> step=3036900, episode=507 reward=0.7717759 (525.53 it/sec) -training >> step=3037000, episode=507 reward=0.7598217 (546.24 it/sec) -training >> step=3037100, episode=507 reward=0.7539256 (526.69 it/sec) -training >> step=3037200, episode=507 reward=0.785301 (520.95 it/sec) -training >> step=3037300, episode=507 reward=0.7724329 (562.59 it/sec) -training >> step=3037400, episode=507 reward=0.7513881 (506.67 it/sec) -training >> step=3037500, episode=507 reward=0.7627144 (527.62 it/sec) -training >> step=3037600, episode=507 reward=0.7733454 (528.68 it/sec) -training >> step=3037700, episode=507 reward=0.775034 (497.73 it/sec) -training >> step=3037800, episode=507 reward=0.7690824 (554.66 it/sec) -training >> step=3037900, episode=507 reward=0.7847393 (504.31 it/sec) -training >> step=3038000, episode=507 reward=0.7691886 (508.49 it/sec) -training >> step=3038100, episode=507 reward=0.7698506 (518.33 it/sec) -training >> step=3038200, episode=507 reward=0.7601687 (413.44 it/sec) -training >> step=3038300, episode=507 reward=0.7649623 (413.52 it/sec) -training >> step=3038400, episode=507 reward=0.7698602 (460.91 it/sec) -training >> step=3038500, episode=507 reward=0.7892828 (507.57 it/sec) -training >> step=3038600, episode=507 reward=0.7615097 (521.73 it/sec) -training >> step=3038700, episode=507 reward=0.7689967 (492.10 it/sec) -training >> step=3038800, episode=507 reward=0.7581378 (572.12 it/sec) -training >> step=3038900, episode=507 reward=0.7813297 (507.67 it/sec) -training >> step=3039000, episode=507 reward=0.766249 (558.79 it/sec) -training >> step=3039100, episode=507 reward=0.7718009 (546.95 it/sec) -training >> step=3039200, episode=507 reward=0.776809 (576.62 it/sec) -training >> step=3039300, episode=507 reward=0.7695947 (512.23 it/sec) -training >> step=3039400, episode=507 reward=0.775273 (527.16 it/sec) -training >> step=3039500, episode=507 reward=0.7577948 (525.29 it/sec) -training >> step=3039600, episode=507 reward=0.7572856 (550.76 it/sec) -training >> step=3039700, episode=507 reward=0.7782351 (527.83 it/sec) -training >> step=3039800, episode=507 reward=0.7833835 (523.91 it/sec) -training >> step=3039900, episode=507 reward=0.7586144 (558.24 it/sec) -training >> step=3040000, episode=507 reward=0.7435915 (532.12 it/sec) -training >> step=3040100, episode=507 reward=0.7665367 (515.78 it/sec) -training >> step=3040200, episode=507 reward=0.7745123 (529.99 it/sec) -training >> step=3040300, episode=507 reward=0.7648706 (527.70 it/sec) -training >> step=3040400, episode=507 reward=0.758841 (395.56 it/sec) -training >> step=3040500, episode=507 reward=0.7460439 (529.12 it/sec) -training >> step=3040600, episode=507 reward=0.7438775 (544.63 it/sec) -training >> step=3040700, episode=507 reward=0.7592915 (539.73 it/sec) -training >> step=3040800, episode=507 reward=0.7417679 (536.71 it/sec) -training >> step=3040900, episode=507 reward=0.7400033 (534.76 it/sec) -training >> step=3041000, episode=507 reward=0.7766691 (527.54 it/sec) -training >> step=3041100, episode=507 reward=0.7486451 (481.07 it/sec) -training >> step=3041200, episode=507 reward=0.7626828 (545.24 it/sec) -training >> step=3041300, episode=508 reward=0.784188 (56.10 it/sec) -training >> step=3041400, episode=508 reward=0.7848732 (520.62 it/sec) -training >> step=3041500, episode=508 reward=0.7658317 (506.87 it/sec) -training >> step=3041600, episode=508 reward=0.791621 (550.15 it/sec) -training >> step=3041700, episode=508 reward=0.7702248 (496.86 it/sec) -training >> step=3041800, episode=508 reward=0.7727148 (505.50 it/sec) -training >> step=3041900, episode=508 reward=0.7788873 (543.63 it/sec) -training >> step=3042000, episode=508 reward=0.7836331 (553.76 it/sec) -training >> step=3042100, episode=508 reward=0.7769111 (507.27 it/sec) -training >> step=3042200, episode=508 reward=0.7585488 (516.20 it/sec) -training >> step=3042300, episode=508 reward=0.7635049 (532.08 it/sec) -training >> step=3042400, episode=508 reward=0.7614994 (513.88 it/sec) -training >> step=3042500, episode=508 reward=0.7704047 (542.93 it/sec) -training >> step=3042600, episode=508 reward=0.7420214 (566.74 it/sec) -training >> step=3042700, episode=508 reward=0.7733063 (528.75 it/sec) -training >> step=3042800, episode=508 reward=0.7716418 (506.39 it/sec) -training >> step=3042900, episode=508 reward=0.7793902 (533.46 it/sec) -training >> step=3043000, episode=508 reward=0.7804539 (549.13 it/sec) -training >> step=3043100, episode=508 reward=0.7630075 (535.72 it/sec) -training >> step=3043200, episode=508 reward=0.7545781 (508.17 it/sec) -training >> step=3043300, episode=508 reward=0.7599605 (549.02 it/sec) -training >> step=3043400, episode=508 reward=0.7796934 (496.67 it/sec) -training >> step=3043500, episode=508 reward=0.7815779 (520.37 it/sec) -training >> step=3043600, episode=508 reward=0.7616526 (551.05 it/sec) -training >> step=3043700, episode=508 reward=0.7384937 (578.56 it/sec) -training >> step=3043800, episode=508 reward=0.7874577 (527.49 it/sec) -training >> step=3043900, episode=508 reward=0.7619475 (515.97 it/sec) -training >> step=3044000, episode=508 reward=0.7991815 (516.19 it/sec) -training >> step=3044100, episode=508 reward=0.7811969 (494.81 it/sec) -training >> step=3044200, episode=508 reward=0.7691048 (541.64 it/sec) -training >> step=3044300, episode=508 reward=0.7710716 (549.17 it/sec) -training >> step=3044400, episode=508 reward=0.7563477 (545.05 it/sec) -training >> step=3044500, episode=508 reward=0.7716963 (533.82 it/sec) -training >> step=3044600, episode=508 reward=0.7682589 (521.99 it/sec) -training >> step=3044700, episode=508 reward=0.7779986 (536.66 it/sec) -training >> step=3044800, episode=508 reward=0.7533396 (529.61 it/sec) -training >> step=3044900, episode=508 reward=0.7736237 (549.17 it/sec) -training >> step=3045000, episode=508 reward=0.7754553 (551.29 it/sec) -training >> step=3045100, episode=508 reward=0.7762542 (547.22 it/sec) -training >> step=3045200, episode=508 reward=0.7919345 (509.83 it/sec) -training >> step=3045300, episode=508 reward=0.7626339 (509.93 it/sec) -training >> step=3045400, episode=508 reward=0.7684035 (524.81 it/sec) -training >> step=3045500, episode=508 reward=0.735061 (538.85 it/sec) -training >> step=3045600, episode=508 reward=0.7556425 (549.92 it/sec) -training >> step=3045700, episode=508 reward=0.7747176 (520.92 it/sec) -training >> step=3045800, episode=508 reward=0.7741285 (547.73 it/sec) -training >> step=3045900, episode=508 reward=0.7516945 (504.94 it/sec) -training >> step=3046000, episode=508 reward=0.7498314 (524.78 it/sec) -training >> step=3046100, episode=508 reward=0.7669381 (558.86 it/sec) -training >> step=3046200, episode=508 reward=0.7672415 (565.45 it/sec) -training >> step=3046300, episode=508 reward=0.7625018 (548.81 it/sec) -training >> step=3046400, episode=508 reward=0.7704906 (540.38 it/sec) -training >> step=3046500, episode=508 reward=0.7498056 (433.31 it/sec) -training >> step=3046600, episode=508 reward=0.7748587 (511.77 it/sec) -training >> step=3046700, episode=508 reward=0.7449976 (560.66 it/sec) -training >> step=3046800, episode=508 reward=0.7717257 (532.75 it/sec) -training >> step=3046900, episode=508 reward=0.7796612 (546.47 it/sec) -training >> step=3047000, episode=508 reward=0.7685582 (510.51 it/sec) -training >> step=3047100, episode=508 reward=0.7439781 (540.66 it/sec) -training >> step=3047200, episode=508 reward=0.7392775 (566.02 it/sec) -training >> step=3047300, episode=509 reward=0.7661211 (86.49 it/sec) -training >> step=3047400, episode=509 reward=0.7731146 (507.02 it/sec) -training >> step=3047500, episode=509 reward=0.7702099 (523.62 it/sec) -training >> step=3047600, episode=509 reward=0.7694244 (550.77 it/sec) -training >> step=3047700, episode=509 reward=0.7458107 (520.74 it/sec) -training >> step=3047800, episode=509 reward=0.7659256 (539.98 it/sec) -training >> step=3047900, episode=509 reward=0.7958547 (557.67 it/sec) -training >> step=3048000, episode=509 reward=0.7632641 (551.31 it/sec) -training >> step=3048100, episode=509 reward=0.7590783 (525.92 it/sec) -training >> step=3048200, episode=509 reward=0.7654598 (533.57 it/sec) -training >> step=3048300, episode=509 reward=0.7807837 (547.90 it/sec) -training >> step=3048400, episode=509 reward=0.745293 (535.00 it/sec) -training >> step=3048500, episode=509 reward=0.774446 (535.92 it/sec) -training >> step=3048600, episode=509 reward=0.7531264 (559.52 it/sec) -training >> step=3048700, episode=509 reward=0.7619399 (531.56 it/sec) -training >> step=3048800, episode=509 reward=0.7698841 (533.64 it/sec) -training >> step=3048900, episode=509 reward=0.7695481 (554.66 it/sec) -training >> step=3049000, episode=509 reward=0.7487842 (547.30 it/sec) -training >> step=3049100, episode=509 reward=0.7765797 (554.95 it/sec) -training >> step=3049200, episode=509 reward=0.7768069 (563.95 it/sec) -training >> step=3049300, episode=509 reward=0.7630822 (548.32 it/sec) -training >> step=3049400, episode=509 reward=0.7813419 (497.32 it/sec) -training >> step=3049500, episode=509 reward=0.7692022 (474.63 it/sec) -training >> step=3049600, episode=509 reward=0.767917 (576.92 it/sec) -training >> step=3049700, episode=509 reward=0.7823379 (556.35 it/sec) -training >> step=3049800, episode=509 reward=0.764906 (509.42 it/sec) -training >> step=3049900, episode=509 reward=0.7891353 (522.15 it/sec) -training >> step=3050000, episode=509 reward=0.7905546 (552.31 it/sec) -training >> step=3050100, episode=509 reward=0.7732912 (530.79 it/sec) -training >> step=3050200, episode=509 reward=0.7759101 (520.01 it/sec) -training >> step=3050300, episode=509 reward=0.7782471 (520.38 it/sec) -training >> step=3050400, episode=509 reward=0.7642943 (516.60 it/sec) -training >> step=3050500, episode=509 reward=0.7651451 (481.72 it/sec) -training >> step=3050600, episode=509 reward=0.7585062 (457.59 it/sec) -training >> step=3050700, episode=509 reward=0.7625591 (519.29 it/sec) -training >> step=3050800, episode=509 reward=0.7620654 (503.64 it/sec) -training >> step=3050900, episode=509 reward=0.7653121 (464.75 it/sec) -training >> step=3051000, episode=509 reward=0.765352 (522.69 it/sec) -training >> step=3051100, episode=509 reward=0.771162 (496.74 it/sec) -training >> step=3051200, episode=509 reward=0.7926067 (536.48 it/sec) -training >> step=3051300, episode=509 reward=0.768667 (554.28 it/sec) -training >> step=3051400, episode=509 reward=0.761685 (557.83 it/sec) -training >> step=3051500, episode=509 reward=0.7546991 (563.26 it/sec) -training >> step=3051600, episode=509 reward=0.7768422 (537.16 it/sec) -training >> step=3051700, episode=509 reward=0.7576314 (534.79 it/sec) -training >> step=3051800, episode=509 reward=0.7789023 (541.65 it/sec) -training >> step=3051900, episode=509 reward=0.7585791 (519.01 it/sec) -training >> step=3052000, episode=509 reward=0.7689447 (546.33 it/sec) -training >> step=3052100, episode=509 reward=0.7655038 (520.61 it/sec) -training >> step=3052200, episode=509 reward=0.7478182 (565.19 it/sec) -training >> step=3052300, episode=509 reward=0.7597616 (506.35 it/sec) -training >> step=3052400, episode=509 reward=0.7513108 (483.36 it/sec) -training >> step=3052500, episode=509 reward=0.7673733 (457.56 it/sec) -training >> step=3052600, episode=509 reward=0.7465051 (328.80 it/sec) -training >> step=3052700, episode=509 reward=0.7661862 (427.48 it/sec) -training >> step=3052800, episode=509 reward=0.7734804 (494.04 it/sec) -training >> step=3052900, episode=509 reward=0.7881882 (515.91 it/sec) -training >> step=3053000, episode=509 reward=0.7779949 (490.71 it/sec) -training >> step=3053100, episode=509 reward=0.7524734 (516.90 it/sec) -training >> step=3053200, episode=509 reward=0.7747453 (502.09 it/sec) -training >> step=3053300, episode=510 reward=0.7532866 (86.85 it/sec) -training >> step=3053400, episode=510 reward=0.7736108 (491.95 it/sec) -training >> step=3053500, episode=510 reward=0.7633195 (449.97 it/sec) -training >> step=3053600, episode=510 reward=0.7784502 (534.02 it/sec) -training >> step=3053700, episode=510 reward=0.7669893 (524.49 it/sec) -training >> step=3053800, episode=510 reward=0.7787218 (518.22 it/sec) -training >> step=3053900, episode=510 reward=0.7661132 (543.19 it/sec) -training >> step=3054000, episode=510 reward=0.7661661 (539.72 it/sec) -training >> step=3054100, episode=510 reward=0.7672662 (528.44 it/sec) -training >> step=3054200, episode=510 reward=0.7720971 (534.19 it/sec) -training >> step=3054300, episode=510 reward=0.7739078 (537.63 it/sec) -training >> step=3054400, episode=510 reward=0.7651927 (530.96 it/sec) -training >> step=3054500, episode=510 reward=0.7712215 (512.58 it/sec) -training >> step=3054600, episode=510 reward=0.7691396 (571.77 it/sec) -training >> step=3054700, episode=510 reward=0.7594869 (545.08 it/sec) -training >> step=3054800, episode=510 reward=0.7699397 (523.04 it/sec) -training >> step=3054900, episode=510 reward=0.763186 (538.98 it/sec) -training >> step=3055000, episode=510 reward=0.790713 (561.44 it/sec) -training >> step=3055100, episode=510 reward=0.7647727 (543.59 it/sec) -training >> step=3055200, episode=510 reward=0.7753255 (522.54 it/sec) -training >> step=3055300, episode=510 reward=0.7727112 (530.36 it/sec) -training >> step=3055400, episode=510 reward=0.7673572 (515.53 it/sec) -training >> step=3055500, episode=510 reward=0.774067 (548.37 it/sec) -training >> step=3055600, episode=510 reward=0.7757004 (536.80 it/sec) -training >> step=3055700, episode=510 reward=0.7746139 (557.03 it/sec) -training >> step=3055800, episode=510 reward=0.7670439 (500.16 it/sec) -training >> step=3055900, episode=510 reward=0.7763824 (507.81 it/sec) -training >> step=3056000, episode=510 reward=0.7483126 (550.45 it/sec) -training >> step=3056100, episode=510 reward=0.7650987 (533.29 it/sec) -training >> step=3056200, episode=510 reward=0.7636616 (525.33 it/sec) -training >> step=3056300, episode=510 reward=0.7738456 (537.65 it/sec) -training >> step=3056400, episode=510 reward=0.7483063 (498.94 it/sec) -training >> step=3056500, episode=510 reward=0.7747216 (499.61 it/sec) -training >> step=3056600, episode=510 reward=0.7573609 (548.55 it/sec) -training >> step=3056700, episode=510 reward=0.7693141 (479.56 it/sec) -training >> step=3056800, episode=510 reward=0.7944583 (546.11 it/sec) -training >> step=3056900, episode=510 reward=0.7770749 (526.61 it/sec) -training >> step=3057000, episode=510 reward=0.7722999 (470.23 it/sec) -training >> step=3057100, episode=510 reward=0.7740342 (532.04 it/sec) -training >> step=3057200, episode=510 reward=0.7791772 (479.75 it/sec) -training >> step=3057300, episode=510 reward=0.7824938 (488.71 it/sec) -training >> step=3057400, episode=510 reward=0.7749916 (513.25 it/sec) -training >> step=3057500, episode=510 reward=0.7922331 (532.75 it/sec) -training >> step=3057600, episode=510 reward=0.776664 (498.73 it/sec) -training >> step=3057700, episode=510 reward=0.7788729 (464.35 it/sec) -training >> step=3057800, episode=510 reward=0.7577754 (508.89 it/sec) -training >> step=3057900, episode=510 reward=0.7678172 (551.93 it/sec) -training >> step=3058000, episode=510 reward=0.7635772 (489.73 it/sec) -training >> step=3058100, episode=510 reward=0.7710698 (459.33 it/sec) -training >> step=3058200, episode=510 reward=0.783951 (556.72 it/sec) -training >> step=3058300, episode=510 reward=0.7614332 (525.57 it/sec) -training >> step=3058400, episode=510 reward=0.7773991 (522.10 it/sec) -training >> step=3058500, episode=510 reward=0.7681798 (552.79 it/sec) -training >> step=3058600, episode=510 reward=0.7391064 (564.77 it/sec) -training >> step=3058700, episode=510 reward=0.7746449 (393.87 it/sec) -training >> step=3058800, episode=510 reward=0.7603108 (530.90 it/sec) -training >> step=3058900, episode=510 reward=0.7344822 (567.08 it/sec) -training >> step=3059000, episode=510 reward=0.7662109 (513.60 it/sec) -training >> step=3059100, episode=510 reward=0.7723447 (546.62 it/sec) -training >> step=3059200, episode=510 reward=0.7767818 (563.36 it/sec) -training >> step=3059300, episode=511 reward=0.7532569 (125.10 it/sec) -training >> step=3059400, episode=511 reward=0.7792811 (531.92 it/sec) -training >> step=3059500, episode=511 reward=0.7584572 (536.80 it/sec) -training >> step=3059600, episode=511 reward=0.7640078 (548.39 it/sec) -training >> step=3059700, episode=511 reward=0.7736839 (551.69 it/sec) -training >> step=3059800, episode=511 reward=0.765157 (542.57 it/sec) -training >> step=3059900, episode=511 reward=0.7782897 (560.30 it/sec) -training >> step=3060000, episode=511 reward=0.7518057 (565.62 it/sec) -training >> step=3060100, episode=511 reward=0.7628095 (518.35 it/sec) -training >> step=3060200, episode=511 reward=0.7849348 (467.46 it/sec) -training >> step=3060300, episode=511 reward=0.7668071 (528.72 it/sec) -training >> step=3060400, episode=511 reward=0.7760574 (526.46 it/sec) -training >> step=3060500, episode=511 reward=0.7716994 (546.35 it/sec) -training >> step=3060600, episode=511 reward=0.7698361 (542.22 it/sec) -training >> step=3060700, episode=511 reward=0.7835598 (556.08 it/sec) -training >> step=3060800, episode=511 reward=0.7708451 (521.27 it/sec) -training >> step=3060900, episode=511 reward=0.7817847 (499.28 it/sec) -training >> step=3061000, episode=511 reward=0.7711164 (496.80 it/sec) -training >> step=3061100, episode=511 reward=0.7785735 (527.52 it/sec) -training >> step=3061200, episode=511 reward=0.7576796 (531.11 it/sec) -training >> step=3061300, episode=511 reward=0.777446 (516.49 it/sec) -training >> step=3061400, episode=511 reward=0.7554379 (545.36 it/sec) -training >> step=3061500, episode=511 reward=0.7686527 (539.16 it/sec) -training >> step=3061600, episode=511 reward=0.7817385 (524.43 it/sec) -training >> step=3061700, episode=511 reward=0.7650942 (495.98 it/sec) -training >> step=3061800, episode=511 reward=0.7489739 (558.30 it/sec) -training >> step=3061900, episode=511 reward=0.7681243 (489.74 it/sec) -training >> step=3062000, episode=511 reward=0.7732708 (552.92 it/sec) -training >> step=3062100, episode=511 reward=0.7556396 (532.67 it/sec) -training >> step=3062200, episode=511 reward=0.7777096 (568.77 it/sec) -training >> step=3062300, episode=511 reward=0.7774286 (536.78 it/sec) -training >> step=3062400, episode=511 reward=0.76423 (517.15 it/sec) -training >> step=3062500, episode=511 reward=0.7835641 (527.45 it/sec) -training >> step=3062600, episode=511 reward=0.7576656 (565.57 it/sec) -training >> step=3062700, episode=511 reward=0.7497734 (504.85 it/sec) -training >> step=3062800, episode=511 reward=0.7766384 (531.83 it/sec) -training >> step=3062900, episode=511 reward=0.7642904 (567.50 it/sec) -training >> step=3063000, episode=511 reward=0.758471 (495.98 it/sec) -training >> step=3063100, episode=511 reward=0.7665316 (533.91 it/sec) -training >> step=3063200, episode=511 reward=0.7633631 (542.88 it/sec) -training >> step=3063300, episode=511 reward=0.7542964 (539.99 it/sec) -training >> step=3063400, episode=511 reward=0.7743838 (561.81 it/sec) -training >> step=3063500, episode=511 reward=0.7803873 (554.03 it/sec) -training >> step=3063600, episode=511 reward=0.7812855 (514.65 it/sec) -training >> step=3063700, episode=511 reward=0.7661061 (533.59 it/sec) -training >> step=3063800, episode=511 reward=0.7926821 (534.64 it/sec) -training >> step=3063900, episode=511 reward=0.7711951 (537.88 it/sec) -training >> step=3064000, episode=511 reward=0.7852368 (563.96 it/sec) -training >> step=3064100, episode=511 reward=0.7661974 (517.40 it/sec) -training >> step=3064200, episode=511 reward=0.7851179 (522.41 it/sec) -training >> step=3064300, episode=511 reward=0.7687648 (539.22 it/sec) -training >> step=3064400, episode=511 reward=0.7708563 (541.60 it/sec) -training >> step=3064500, episode=511 reward=0.7723976 (517.72 it/sec) -training >> step=3064600, episode=511 reward=0.7675994 (528.02 it/sec) -training >> step=3064700, episode=511 reward=0.7851321 (509.88 it/sec) -training >> step=3064800, episode=511 reward=0.7798883 (502.48 it/sec) -training >> step=3064900, episode=511 reward=0.7476743 (436.93 it/sec) -training >> step=3065000, episode=511 reward=0.7658828 (529.87 it/sec) -training >> step=3065100, episode=511 reward=0.741735 (548.92 it/sec) -training >> step=3065200, episode=511 reward=0.7650434 (522.24 it/sec) -training >> step=3065300, episode=512 reward=0.7705906 (69.36 it/sec) -training >> step=3065400, episode=512 reward=0.7684231 (504.41 it/sec) -training >> step=3065500, episode=512 reward=0.7654846 (503.80 it/sec) -training >> step=3065600, episode=512 reward=0.7776026 (541.92 it/sec) -training >> step=3065700, episode=512 reward=0.7546925 (526.78 it/sec) -training >> step=3065800, episode=512 reward=0.775919 (530.41 it/sec) -training >> step=3065900, episode=512 reward=0.7787191 (509.83 it/sec) -training >> step=3066000, episode=512 reward=0.7600227 (520.28 it/sec) -training >> step=3066100, episode=512 reward=0.753395 (510.23 it/sec) -training >> step=3066200, episode=512 reward=0.7594941 (500.05 it/sec) -training >> step=3066300, episode=512 reward=0.7665676 (556.48 it/sec) -training >> step=3066400, episode=512 reward=0.7739067 (507.38 it/sec) -training >> step=3066500, episode=512 reward=0.7645607 (481.89 it/sec) -training >> step=3066600, episode=512 reward=0.7830453 (519.37 it/sec) -training >> step=3066700, episode=512 reward=0.788654 (452.30 it/sec) -training >> step=3066800, episode=512 reward=0.7797977 (458.77 it/sec) -training >> step=3066900, episode=512 reward=0.7831395 (441.64 it/sec) -training >> step=3067000, episode=512 reward=0.7672029 (444.29 it/sec) -training >> step=3067100, episode=512 reward=0.7726 (480.21 it/sec) -training >> step=3067200, episode=512 reward=0.7624497 (507.44 it/sec) -training >> step=3067300, episode=512 reward=0.7793947 (555.30 it/sec) -training >> step=3067400, episode=512 reward=0.7735267 (502.26 it/sec) -training >> step=3067500, episode=512 reward=0.7696372 (518.69 it/sec) -training >> step=3067600, episode=512 reward=0.7590635 (525.32 it/sec) -training >> step=3067700, episode=512 reward=0.7639298 (540.21 it/sec) -training >> step=3067800, episode=512 reward=0.7599425 (504.11 it/sec) -training >> step=3067900, episode=512 reward=0.7444217 (470.04 it/sec) -training >> step=3068000, episode=512 reward=0.761019 (583.14 it/sec) -training >> step=3068100, episode=512 reward=0.7763218 (515.18 it/sec) -training >> step=3068200, episode=512 reward=0.7798423 (540.91 it/sec) -training >> step=3068300, episode=512 reward=0.7761109 (579.39 it/sec) -training >> step=3068400, episode=512 reward=0.7539323 (591.69 it/sec) -training >> step=3068500, episode=512 reward=0.7776587 (524.58 it/sec) -training >> step=3068600, episode=512 reward=0.7694556 (521.68 it/sec) -training >> step=3068700, episode=512 reward=0.7799633 (516.00 it/sec) -training >> step=3068800, episode=512 reward=0.7405837 (555.51 it/sec) -training >> step=3068900, episode=512 reward=0.7618545 (579.93 it/sec) -training >> step=3069000, episode=512 reward=0.7701738 (564.51 it/sec) -training >> step=3069100, episode=512 reward=0.7712459 (596.26 it/sec) -training >> step=3069200, episode=512 reward=0.7730092 (521.46 it/sec) -training >> step=3069300, episode=512 reward=0.7812306 (523.08 it/sec) -training >> step=3069400, episode=512 reward=0.7665415 (547.62 it/sec) -training >> step=3069500, episode=512 reward=0.7651728 (542.64 it/sec) -training >> step=3069600, episode=512 reward=0.7473119 (522.62 it/sec) -training >> step=3069700, episode=512 reward=0.7720859 (516.78 it/sec) -training >> step=3069800, episode=512 reward=0.7512114 (548.95 it/sec) -training >> step=3069900, episode=512 reward=0.7817498 (570.82 it/sec) -training >> step=3070000, episode=512 reward=0.7600271 (571.36 it/sec) -training >> step=3070100, episode=512 reward=0.7857503 (539.03 it/sec) -training >> step=3070200, episode=512 reward=0.7343628 (549.31 it/sec) -training >> step=3070300, episode=512 reward=0.7551579 (514.26 it/sec) -training >> step=3070400, episode=512 reward=0.7840925 (462.72 it/sec) -training >> step=3070500, episode=512 reward=0.76451 (487.94 it/sec) -training >> step=3070600, episode=512 reward=0.7477908 (529.32 it/sec) -training >> step=3070700, episode=512 reward=0.7490309 (518.14 it/sec) -training >> step=3070800, episode=512 reward=0.7591445 (501.18 it/sec) -training >> step=3070900, episode=512 reward=0.7587845 (553.63 it/sec) -training >> step=3071000, episode=512 reward=0.7752158 (428.67 it/sec) -training >> step=3071100, episode=512 reward=0.7858 (540.30 it/sec) -training >> step=3071200, episode=512 reward=0.7817513 (506.13 it/sec) -training >> step=3071300, episode=513 reward=0.759106 (92.06 it/sec) -training >> step=3071400, episode=513 reward=0.7552558 (452.41 it/sec) -training >> step=3071500, episode=513 reward=0.7516012 (474.83 it/sec) -training >> step=3071600, episode=513 reward=0.7616551 (526.09 it/sec) -training >> step=3071700, episode=513 reward=0.7895197 (552.17 it/sec) -training >> step=3071800, episode=513 reward=0.7686028 (512.06 it/sec) -training >> step=3071900, episode=513 reward=0.7606496 (537.91 it/sec) -training >> step=3072000, episode=513 reward=0.7830839 (478.02 it/sec) -training >> step=3072100, episode=513 reward=0.77719 (526.00 it/sec) -training >> step=3072200, episode=513 reward=0.7653305 (544.55 it/sec) -training >> step=3072300, episode=513 reward=0.7507578 (561.72 it/sec) -training >> step=3072400, episode=513 reward=0.7728483 (510.08 it/sec) -training >> step=3072500, episode=513 reward=0.7718399 (492.07 it/sec) -training >> step=3072600, episode=513 reward=0.770761 (520.93 it/sec) -training >> step=3072700, episode=513 reward=0.7561312 (525.85 it/sec) -training >> step=3072800, episode=513 reward=0.7698475 (544.51 it/sec) -training >> step=3072900, episode=513 reward=0.7891433 (538.50 it/sec) -training >> step=3073000, episode=513 reward=0.7599454 (538.32 it/sec) -training >> step=3073100, episode=513 reward=0.7704396 (569.51 it/sec) -training >> step=3073200, episode=513 reward=0.7813869 (535.74 it/sec) -training >> step=3073300, episode=513 reward=0.772174 (561.03 it/sec) -training >> step=3073400, episode=513 reward=0.7708861 (527.91 it/sec) -training >> step=3073500, episode=513 reward=0.763793 (526.09 it/sec) -training >> step=3073600, episode=513 reward=0.7420112 (533.18 it/sec) -training >> step=3073700, episode=513 reward=0.7911162 (540.42 it/sec) -training >> step=3073800, episode=513 reward=0.7722256 (543.57 it/sec) -training >> step=3073900, episode=513 reward=0.7906118 (513.18 it/sec) -training >> step=3074000, episode=513 reward=0.762674 (524.53 it/sec) -training >> step=3074100, episode=513 reward=0.7535502 (520.61 it/sec) -training >> step=3074200, episode=513 reward=0.7703937 (539.28 it/sec) -training >> step=3074300, episode=513 reward=0.7574761 (526.16 it/sec) -training >> step=3074400, episode=513 reward=0.7878977 (573.02 it/sec) -training >> step=3074500, episode=513 reward=0.7540073 (564.64 it/sec) -training >> step=3074600, episode=513 reward=0.7740811 (527.79 it/sec) -training >> step=3074700, episode=513 reward=0.7878637 (478.93 it/sec) -training >> step=3074800, episode=513 reward=0.7879138 (491.90 it/sec) -training >> step=3074900, episode=513 reward=0.7513993 (503.96 it/sec) -training >> step=3075000, episode=513 reward=0.7753674 (531.06 it/sec) -training >> step=3075100, episode=513 reward=0.769931 (570.20 it/sec) -training >> step=3075200, episode=513 reward=0.7763665 (496.16 it/sec) -training >> step=3075300, episode=513 reward=0.7879706 (533.26 it/sec) -training >> step=3075400, episode=513 reward=0.7794974 (537.25 it/sec) -training >> step=3075500, episode=513 reward=0.7681293 (563.79 it/sec) -training >> step=3075600, episode=513 reward=0.7973588 (510.38 it/sec) -training >> step=3075700, episode=513 reward=0.7798889 (535.42 it/sec) -training >> step=3075800, episode=513 reward=0.7486603 (549.92 it/sec) -training >> step=3075900, episode=513 reward=0.7899912 (544.16 it/sec) -training >> step=3076000, episode=513 reward=0.7542239 (499.41 it/sec) -training >> step=3076100, episode=513 reward=0.7626066 (563.68 it/sec) -training >> step=3076200, episode=513 reward=0.7639043 (550.85 it/sec) -training >> step=3076300, episode=513 reward=0.7475141 (466.97 it/sec) -training >> step=3076400, episode=513 reward=0.7877535 (541.23 it/sec) -training >> step=3076500, episode=513 reward=0.7758644 (546.25 it/sec) -training >> step=3076600, episode=513 reward=0.768965 (539.06 it/sec) -training >> step=3076700, episode=513 reward=0.7550818 (550.71 it/sec) -training >> step=3076800, episode=513 reward=0.7515106 (539.66 it/sec) -training >> step=3076900, episode=513 reward=0.7530003 (560.58 it/sec) -training >> step=3077000, episode=513 reward=0.7726907 (504.92 it/sec) -training >> step=3077100, episode=513 reward=0.7741671 (551.85 it/sec) -training >> step=3077200, episode=513 reward=0.7525137 (557.23 it/sec) -training >> step=3077300, episode=514 reward=0.7696738 (69.67 it/sec) -training >> step=3077400, episode=514 reward=0.7760215 (516.42 it/sec) -training >> step=3077500, episode=514 reward=0.7579013 (549.86 it/sec) -training >> step=3077600, episode=514 reward=0.7721025 (507.47 it/sec) -training >> step=3077700, episode=514 reward=0.7946693 (562.86 it/sec) -training >> step=3077800, episode=514 reward=0.7725832 (532.61 it/sec) -training >> step=3077900, episode=514 reward=0.7587786 (534.65 it/sec) -training >> step=3078000, episode=514 reward=0.7720674 (515.93 it/sec) -training >> step=3078100, episode=514 reward=0.7662787 (537.11 it/sec) -training >> step=3078200, episode=514 reward=0.7765694 (559.68 it/sec) -training >> step=3078300, episode=514 reward=0.7878287 (510.30 it/sec) -training >> step=3078400, episode=514 reward=0.7961999 (538.45 it/sec) -training >> step=3078500, episode=514 reward=0.7726085 (531.59 it/sec) -training >> step=3078600, episode=514 reward=0.7661774 (537.57 it/sec) -training >> step=3078700, episode=514 reward=0.7633004 (565.30 it/sec) -training >> step=3078800, episode=514 reward=0.7732599 (556.43 it/sec) -training >> step=3078900, episode=514 reward=0.764414 (501.13 it/sec) -training >> step=3079000, episode=514 reward=0.7631058 (525.20 it/sec) -training >> step=3079100, episode=514 reward=0.7492983 (528.94 it/sec) -training >> step=3079200, episode=514 reward=0.7706887 (557.67 it/sec) -training >> step=3079300, episode=514 reward=0.7791064 (530.70 it/sec) -training >> step=3079400, episode=514 reward=0.7715086 (497.17 it/sec) -training >> step=3079500, episode=514 reward=0.7910903 (536.37 it/sec) -training >> step=3079600, episode=514 reward=0.7813029 (489.29 it/sec) -training >> step=3079700, episode=514 reward=0.7672882 (538.72 it/sec) -training >> step=3079800, episode=514 reward=0.7728103 (548.24 it/sec) -training >> step=3079900, episode=514 reward=0.7630168 (555.04 it/sec) -training >> step=3080000, episode=514 reward=0.7871098 (521.70 it/sec) -training >> step=3080100, episode=514 reward=0.7722758 (550.90 it/sec) -training >> step=3080200, episode=514 reward=0.7642648 (529.69 it/sec) -training >> step=3080300, episode=514 reward=0.7847306 (526.02 it/sec) -training >> step=3080400, episode=514 reward=0.7770494 (525.16 it/sec) -training >> step=3080500, episode=514 reward=0.7755899 (533.48 it/sec) -training >> step=3080600, episode=514 reward=0.7777097 (536.79 it/sec) -training >> step=3080700, episode=514 reward=0.7874478 (532.76 it/sec) -training >> step=3080800, episode=514 reward=0.7746977 (537.12 it/sec) -training >> step=3080900, episode=514 reward=0.7626915 (557.92 it/sec) -training >> step=3081000, episode=514 reward=0.792665 (577.53 it/sec) -training >> step=3081100, episode=514 reward=0.7729689 (485.49 it/sec) -training >> step=3081200, episode=514 reward=0.7823961 (498.03 it/sec) -training >> step=3081300, episode=514 reward=0.7603536 (448.43 it/sec) -training >> step=3081400, episode=514 reward=0.7565974 (481.93 it/sec) -training >> step=3081500, episode=514 reward=0.7731335 (408.93 it/sec) -training >> step=3081600, episode=514 reward=0.7748021 (455.45 it/sec) -training >> step=3081700, episode=514 reward=0.775907 (502.61 it/sec) -training >> step=3081800, episode=514 reward=0.7580749 (535.99 it/sec) -training >> step=3081900, episode=514 reward=0.7970556 (552.86 it/sec) -training >> step=3082000, episode=514 reward=0.7736609 (564.34 it/sec) -training >> step=3082100, episode=514 reward=0.7625446 (543.37 it/sec) -training >> step=3082200, episode=514 reward=0.7492893 (549.47 it/sec) -training >> step=3082300, episode=514 reward=0.7792408 (501.84 it/sec) -training >> step=3082400, episode=514 reward=0.7495008 (582.50 it/sec) -training >> step=3082500, episode=514 reward=0.7394541 (540.95 it/sec) -training >> step=3082600, episode=514 reward=0.7958136 (523.82 it/sec) -training >> step=3082700, episode=514 reward=0.7687138 (540.36 it/sec) -training >> step=3082800, episode=514 reward=0.7578582 (558.91 it/sec) -training >> step=3082900, episode=514 reward=0.7658928 (523.17 it/sec) -training >> step=3083000, episode=514 reward=0.7624067 (547.60 it/sec) -training >> step=3083100, episode=514 reward=0.7573262 (532.97 it/sec) -training >> step=3083200, episode=514 reward=0.7505195 (511.27 it/sec) -training >> step=3083300, episode=515 reward=0.7668357 (147.56 it/sec) -training >> step=3083400, episode=515 reward=0.7670951 (540.38 it/sec) -training >> step=3083500, episode=515 reward=0.7335818 (539.11 it/sec) -training >> step=3083600, episode=515 reward=0.7473392 (542.42 it/sec) -training >> step=3083700, episode=515 reward=0.7415956 (510.96 it/sec) -training >> step=3083800, episode=515 reward=0.7744687 (545.50 it/sec) -training >> step=3083900, episode=515 reward=0.7991183 (570.28 it/sec) -training >> step=3084000, episode=515 reward=0.7756861 (524.62 it/sec) -training >> step=3084100, episode=515 reward=0.7665285 (518.88 it/sec) -training >> step=3084200, episode=515 reward=0.7398203 (558.03 it/sec) -training >> step=3084300, episode=515 reward=0.727806 (575.46 it/sec) -training >> step=3084400, episode=515 reward=0.7479385 (523.95 it/sec) -training >> step=3084500, episode=515 reward=0.7702426 (547.91 it/sec) -training >> step=3084600, episode=515 reward=0.77015 (520.49 it/sec) -training >> step=3084700, episode=515 reward=0.7539062 (519.71 it/sec) -training >> step=3084800, episode=515 reward=0.7836313 (524.78 it/sec) -training >> step=3084900, episode=515 reward=0.7863156 (553.34 it/sec) -training >> step=3085000, episode=515 reward=0.7880279 (554.22 it/sec) -training >> step=3085100, episode=515 reward=0.777169 (523.71 it/sec) -training >> step=3085200, episode=515 reward=0.7535186 (527.35 it/sec) -training >> step=3085300, episode=515 reward=0.7390965 (560.15 it/sec) -training >> step=3085400, episode=515 reward=0.7741714 (557.21 it/sec) -training >> step=3085500, episode=515 reward=0.7319506 (550.18 it/sec) -training >> step=3085600, episode=515 reward=0.7743222 (529.44 it/sec) -training >> step=3085700, episode=515 reward=0.7482488 (524.27 it/sec) -training >> step=3085800, episode=515 reward=0.7581952 (518.72 it/sec) -training >> step=3085900, episode=515 reward=0.7436966 (525.90 it/sec) -training >> step=3086000, episode=515 reward=0.7596993 (532.16 it/sec) -training >> step=3086100, episode=515 reward=0.7794307 (569.73 it/sec) -training >> step=3086200, episode=515 reward=0.7796518 (556.37 it/sec) -training >> step=3086300, episode=515 reward=0.768685 (502.28 it/sec) -training >> step=3086400, episode=515 reward=0.7753221 (539.74 it/sec) -training >> step=3086500, episode=515 reward=0.7553092 (522.53 it/sec) -training >> step=3086600, episode=515 reward=0.7750239 (527.58 it/sec) -training >> step=3086700, episode=515 reward=0.7875789 (551.40 it/sec) -training >> step=3086800, episode=515 reward=0.7696364 (560.79 it/sec) -training >> step=3086900, episode=515 reward=0.7774756 (522.44 it/sec) -training >> step=3087000, episode=515 reward=0.7621208 (540.97 it/sec) -training >> step=3087100, episode=515 reward=0.7746339 (537.69 it/sec) -training >> step=3087200, episode=515 reward=0.7815456 (580.12 it/sec) -training >> step=3087300, episode=515 reward=0.7840518 (547.03 it/sec) -training >> step=3087400, episode=515 reward=0.7659193 (530.70 it/sec) -training >> step=3087500, episode=515 reward=0.7459321 (540.92 it/sec) -training >> step=3087600, episode=515 reward=0.7700938 (532.65 it/sec) -training >> step=3087700, episode=515 reward=0.7765471 (565.65 it/sec) -training >> step=3087800, episode=515 reward=0.752003 (545.61 it/sec) -training >> step=3087900, episode=515 reward=0.7643098 (581.37 it/sec) -training >> step=3088000, episode=515 reward=0.7911618 (515.33 it/sec) -training >> step=3088100, episode=515 reward=0.7694929 (501.97 it/sec) -training >> step=3088200, episode=515 reward=0.7822419 (571.31 it/sec) -training >> step=3088300, episode=515 reward=0.7883198 (559.76 it/sec) -training >> step=3088400, episode=515 reward=0.7889855 (531.68 it/sec) -training >> step=3088500, episode=515 reward=0.7740096 (530.58 it/sec) -training >> step=3088600, episode=515 reward=0.7651457 (542.19 it/sec) -training >> step=3088700, episode=515 reward=0.7630649 (512.20 it/sec) -training >> step=3088800, episode=515 reward=0.7586262 (537.84 it/sec) -training >> step=3088900, episode=515 reward=0.7765865 (560.94 it/sec) -training >> step=3089000, episode=515 reward=0.7804578 (587.02 it/sec) -training >> step=3089100, episode=515 reward=0.7631145 (535.45 it/sec) -training >> step=3089200, episode=515 reward=0.7797822 (539.03 it/sec) -training >> step=3089300, episode=516 reward=0.7839949 (123.67 it/sec) -training >> step=3089400, episode=516 reward=0.7742849 (528.39 it/sec) -training >> step=3089500, episode=516 reward=0.790186 (550.81 it/sec) -training >> step=3089600, episode=516 reward=0.7530822 (523.04 it/sec) -training >> step=3089700, episode=516 reward=0.7631798 (553.68 it/sec) -training >> step=3089800, episode=516 reward=0.7474988 (488.02 it/sec) -training >> step=3089900, episode=516 reward=0.7638042 (525.39 it/sec) -training >> step=3090000, episode=516 reward=0.7643507 (538.49 it/sec) -training >> step=3090100, episode=516 reward=0.7797367 (535.71 it/sec) -training >> step=3090200, episode=516 reward=0.7881031 (530.95 it/sec) -training >> step=3090300, episode=516 reward=0.7688612 (544.40 it/sec) -training >> step=3090400, episode=516 reward=0.7615294 (537.07 it/sec) -training >> step=3090500, episode=516 reward=0.77025 (489.48 it/sec) -training >> step=3090600, episode=516 reward=0.7709703 (525.95 it/sec) -training >> step=3090700, episode=516 reward=0.7996064 (539.87 it/sec) -training >> step=3090800, episode=516 reward=0.7722441 (573.86 it/sec) -training >> step=3090900, episode=516 reward=0.7697709 (507.97 it/sec) -training >> step=3091000, episode=516 reward=0.7569492 (545.79 it/sec) -training >> step=3091100, episode=516 reward=0.7653347 (539.36 it/sec) -training >> step=3091200, episode=516 reward=0.7562959 (545.25 it/sec) -training >> step=3091300, episode=516 reward=0.766854 (558.41 it/sec) -training >> step=3091400, episode=516 reward=0.7773431 (538.10 it/sec) -training >> step=3091500, episode=516 reward=0.7717662 (535.21 it/sec) -training >> step=3091600, episode=516 reward=0.7786511 (524.10 it/sec) -training >> step=3091700, episode=516 reward=0.7665672 (512.30 it/sec) -training >> step=3091800, episode=516 reward=0.784888 (489.81 it/sec) -training >> step=3091900, episode=516 reward=0.7602128 (527.45 it/sec) -training >> step=3092000, episode=516 reward=0.7852593 (439.25 it/sec) -training >> step=3092100, episode=516 reward=0.7685804 (476.65 it/sec) -training >> step=3092200, episode=516 reward=0.7747961 (543.73 it/sec) -training >> step=3092300, episode=516 reward=0.753644 (518.98 it/sec) -training >> step=3092400, episode=516 reward=0.7825843 (550.06 it/sec) -training >> step=3092500, episode=516 reward=0.7826966 (493.06 it/sec) -training >> step=3092600, episode=516 reward=0.7649564 (573.50 it/sec) -training >> step=3092700, episode=516 reward=0.7740753 (552.82 it/sec) -training >> step=3092800, episode=516 reward=0.7811217 (524.99 it/sec) -training >> step=3092900, episode=516 reward=0.7634895 (555.13 it/sec) -training >> step=3093000, episode=516 reward=0.7850986 (526.15 it/sec) -training >> step=3093100, episode=516 reward=0.7822794 (455.79 it/sec) -training >> step=3093200, episode=516 reward=0.7893335 (568.33 it/sec) -training >> step=3093300, episode=516 reward=0.7570155 (515.65 it/sec) -training >> step=3093400, episode=516 reward=0.7588557 (514.61 it/sec) -training >> step=3093500, episode=516 reward=0.776434 (520.86 it/sec) -training >> step=3093600, episode=516 reward=0.7683908 (500.14 it/sec) -training >> step=3093700, episode=516 reward=0.7820228 (550.24 it/sec) -training >> step=3093800, episode=516 reward=0.7378403 (514.59 it/sec) -training >> step=3093900, episode=516 reward=0.7649394 (550.64 it/sec) -training >> step=3094000, episode=516 reward=0.769136 (550.64 it/sec) -training >> step=3094100, episode=516 reward=0.7647099 (537.40 it/sec) -training >> step=3094200, episode=516 reward=0.7507284 (526.24 it/sec) -training >> step=3094300, episode=516 reward=0.7743511 (528.76 it/sec) -training >> step=3094400, episode=516 reward=0.7785758 (540.07 it/sec) -training >> step=3094500, episode=516 reward=0.7582916 (500.22 it/sec) -training >> step=3094600, episode=516 reward=0.7876372 (523.04 it/sec) -training >> step=3094700, episode=516 reward=0.778161 (535.74 it/sec) -training >> step=3094800, episode=516 reward=0.7506041 (537.64 it/sec) -training >> step=3094900, episode=516 reward=0.7642813 (543.10 it/sec) -training >> step=3095000, episode=516 reward=0.7686843 (510.14 it/sec) -training >> step=3095100, episode=516 reward=0.7666869 (564.74 it/sec) -training >> step=3095200, episode=516 reward=0.766867 (521.24 it/sec) -training >> step=3095300, episode=517 reward=0.7545646 (62.72 it/sec) -training >> step=3095400, episode=517 reward=0.75717 (530.81 it/sec) -training >> step=3095500, episode=517 reward=0.7620353 (530.51 it/sec) -training >> step=3095600, episode=517 reward=0.7374403 (526.97 it/sec) -training >> step=3095700, episode=517 reward=0.7719631 (502.59 it/sec) -training >> step=3095800, episode=517 reward=0.7727944 (481.36 it/sec) -training >> step=3095900, episode=517 reward=0.7719132 (461.82 it/sec) -training >> step=3096000, episode=517 reward=0.763831 (502.97 it/sec) -training >> step=3096100, episode=517 reward=0.7587028 (435.28 it/sec) -training >> step=3096200, episode=517 reward=0.7570736 (479.82 it/sec) -training >> step=3096300, episode=517 reward=0.7668943 (519.45 it/sec) -training >> step=3096400, episode=517 reward=0.7646368 (526.26 it/sec) -training >> step=3096500, episode=517 reward=0.7798166 (513.23 it/sec) -training >> step=3096600, episode=517 reward=0.7595599 (567.94 it/sec) -training >> step=3096700, episode=517 reward=0.7636381 (558.28 it/sec) -training >> step=3096800, episode=517 reward=0.780156 (522.50 it/sec) -training >> step=3096900, episode=517 reward=0.7803102 (531.48 it/sec) -training >> step=3097000, episode=517 reward=0.7573563 (584.82 it/sec) -training >> step=3097100, episode=517 reward=0.7612635 (535.31 it/sec) -training >> step=3097200, episode=517 reward=0.7734869 (532.88 it/sec) -training >> step=3097300, episode=517 reward=0.7850703 (543.63 it/sec) -training >> step=3097400, episode=517 reward=0.7670444 (501.94 it/sec) -training >> step=3097500, episode=517 reward=0.7599048 (538.59 it/sec) -training >> step=3097600, episode=517 reward=0.7498004 (505.56 it/sec) -training >> step=3097700, episode=517 reward=0.7616997 (560.76 it/sec) -training >> step=3097800, episode=517 reward=0.7802651 (530.60 it/sec) -training >> step=3097900, episode=517 reward=0.7433013 (531.02 it/sec) -training >> step=3098000, episode=517 reward=0.785265 (548.40 it/sec) -training >> step=3098100, episode=517 reward=0.7721364 (575.96 it/sec) -training >> step=3098200, episode=517 reward=0.7922284 (526.08 it/sec) -training >> step=3098300, episode=517 reward=0.7801077 (557.55 it/sec) -training >> step=3098400, episode=517 reward=0.7751895 (509.13 it/sec) -training >> step=3098500, episode=517 reward=0.7797244 (567.72 it/sec) -training >> step=3098600, episode=517 reward=0.7607281 (533.22 it/sec) -training >> step=3098700, episode=517 reward=0.7636142 (530.68 it/sec) -training >> step=3098800, episode=517 reward=0.7670395 (534.79 it/sec) -training >> step=3098900, episode=517 reward=0.7584142 (508.34 it/sec) -training >> step=3099000, episode=517 reward=0.7676399 (533.89 it/sec) -training >> step=3099100, episode=517 reward=0.7597429 (533.89 it/sec) -training >> step=3099200, episode=517 reward=0.7659562 (551.92 it/sec) -training >> step=3099300, episode=517 reward=0.7629007 (549.32 it/sec) -training >> step=3099400, episode=517 reward=0.7739157 (516.98 it/sec) -training >> step=3099500, episode=517 reward=0.7588919 (504.75 it/sec) -training >> step=3099600, episode=517 reward=0.7637332 (524.35 it/sec) -training >> step=3099700, episode=517 reward=0.775869 (536.36 it/sec) -training >> step=3099800, episode=517 reward=0.7792674 (545.65 it/sec) -training >> step=3099900, episode=517 reward=0.7595223 (564.37 it/sec) -training >> step=3100000, episode=517 reward=0.7719228 (516.27 it/sec) -training >> step=3100100, episode=517 reward=0.7619017 (536.72 it/sec) -training >> step=3100200, episode=517 reward=0.770595 (553.73 it/sec) -training >> step=3100300, episode=517 reward=0.782601 (566.79 it/sec) -training >> step=3100400, episode=517 reward=0.7520622 (554.34 it/sec) -training >> step=3100500, episode=517 reward=0.7687777 (485.53 it/sec) -training >> step=3100600, episode=517 reward=0.747963 (493.00 it/sec) -training >> step=3100700, episode=517 reward=0.7694647 (531.17 it/sec) -training >> step=3100800, episode=517 reward=0.7707791 (551.65 it/sec) -training >> step=3100900, episode=517 reward=0.7595737 (474.11 it/sec) -training >> step=3101000, episode=517 reward=0.7761227 (526.26 it/sec) -training >> step=3101100, episode=517 reward=0.7714597 (481.05 it/sec) -training >> step=3101200, episode=517 reward=0.7824315 (538.06 it/sec) -training >> step=3101300, episode=518 reward=0.7954844 (93.90 it/sec) -training >> step=3101400, episode=518 reward=0.7671756 (523.11 it/sec) -training >> step=3101500, episode=518 reward=0.7646323 (540.93 it/sec) -training >> step=3101600, episode=518 reward=0.7581797 (531.67 it/sec) -training >> step=3101700, episode=518 reward=0.7771291 (515.89 it/sec) -training >> step=3101800, episode=518 reward=0.7579145 (445.62 it/sec) -training >> step=3101900, episode=518 reward=0.7562109 (512.46 it/sec) -training >> step=3102000, episode=518 reward=0.7564593 (466.82 it/sec) -training >> step=3102100, episode=518 reward=0.7676924 (412.48 it/sec) -training >> step=3102200, episode=518 reward=0.7820165 (431.37 it/sec) -training >> step=3102300, episode=518 reward=0.7749114 (509.66 it/sec) -training >> step=3102400, episode=518 reward=0.7722061 (518.01 it/sec) -training >> step=3102500, episode=518 reward=0.7870839 (520.33 it/sec) -training >> step=3102600, episode=518 reward=0.7930233 (574.74 it/sec) -training >> step=3102700, episode=518 reward=0.7710508 (523.39 it/sec) -training >> step=3102800, episode=518 reward=0.7792583 (512.76 it/sec) -training >> step=3102900, episode=518 reward=0.7590129 (548.58 it/sec) -training >> step=3103000, episode=518 reward=0.7773237 (548.40 it/sec) -training >> step=3103100, episode=518 reward=0.7676962 (533.89 it/sec) -training >> step=3103200, episode=518 reward=0.7571017 (548.78 it/sec) -training >> step=3103300, episode=518 reward=0.7927452 (521.12 it/sec) -training >> step=3103400, episode=518 reward=0.7720411 (527.62 it/sec) -training >> step=3103500, episode=518 reward=0.7529207 (494.93 it/sec) -training >> step=3103600, episode=518 reward=0.7527032 (534.76 it/sec) -training >> step=3103700, episode=518 reward=0.778069 (482.13 it/sec) -training >> step=3103800, episode=518 reward=0.7669841 (491.53 it/sec) -training >> step=3103900, episode=518 reward=0.7653431 (542.67 it/sec) -training >> step=3104000, episode=518 reward=0.7817333 (577.70 it/sec) -training >> step=3104100, episode=518 reward=0.7695282 (517.08 it/sec) -training >> step=3104200, episode=518 reward=0.7528481 (509.90 it/sec) -training >> step=3104300, episode=518 reward=0.8007519 (546.83 it/sec) -training >> step=3104400, episode=518 reward=0.7925739 (510.31 it/sec) -training >> step=3104500, episode=518 reward=0.7830391 (536.88 it/sec) -training >> step=3104600, episode=518 reward=0.7839919 (546.80 it/sec) -training >> step=3104700, episode=518 reward=0.7711909 (543.51 it/sec) -training >> step=3104800, episode=518 reward=0.778203 (526.34 it/sec) -training >> step=3104900, episode=518 reward=0.75061 (520.74 it/sec) -training >> step=3105000, episode=518 reward=0.7594816 (539.90 it/sec) -training >> step=3105100, episode=518 reward=0.7674433 (569.58 it/sec) -training >> step=3105200, episode=518 reward=0.77654 (549.25 it/sec) -training >> step=3105300, episode=518 reward=0.772551 (521.66 it/sec) -training >> step=3105400, episode=518 reward=0.7875535 (574.20 it/sec) -training >> step=3105500, episode=518 reward=0.7773311 (533.65 it/sec) -training >> step=3105600, episode=518 reward=0.7814731 (540.17 it/sec) -training >> step=3105700, episode=518 reward=0.7681192 (505.76 it/sec) -training >> step=3105800, episode=518 reward=0.7727445 (542.31 it/sec) -training >> step=3105900, episode=518 reward=0.758087 (517.31 it/sec) -training >> step=3106000, episode=518 reward=0.7925631 (497.99 it/sec) -training >> step=3106100, episode=518 reward=0.7659847 (505.22 it/sec) -training >> step=3106200, episode=518 reward=0.7569503 (573.22 it/sec) -training >> step=3106300, episode=518 reward=0.7805809 (531.71 it/sec) -training >> step=3106400, episode=518 reward=0.7567992 (551.69 it/sec) -training >> step=3106500, episode=518 reward=0.7756718 (539.58 it/sec) -training >> step=3106600, episode=518 reward=0.7814215 (499.29 it/sec) -training >> step=3106700, episode=518 reward=0.7541963 (558.66 it/sec) -training >> step=3106800, episode=518 reward=0.7914767 (536.18 it/sec) -training >> step=3106900, episode=518 reward=0.7821765 (573.05 it/sec) -training >> step=3107000, episode=518 reward=0.7810678 (548.11 it/sec) -training >> step=3107100, episode=518 reward=0.7814578 (516.30 it/sec) -training >> step=3107200, episode=518 reward=0.7743577 (539.55 it/sec) -training >> step=3107300, episode=519 reward=0.7783457 (135.45 it/sec) -training >> step=3107400, episode=519 reward=0.7519467 (497.89 it/sec) -training >> step=3107500, episode=519 reward=0.7424847 (542.89 it/sec) -training >> step=3107600, episode=519 reward=0.782154 (547.76 it/sec) -training >> step=3107700, episode=519 reward=0.7896667 (533.60 it/sec) -training >> step=3107800, episode=519 reward=0.7748423 (553.62 it/sec) -training >> step=3107900, episode=519 reward=0.7645232 (537.29 it/sec) -training >> step=3108000, episode=519 reward=0.7721237 (555.99 it/sec) -training >> step=3108100, episode=519 reward=0.7673675 (547.63 it/sec) -training >> step=3108200, episode=519 reward=0.7859268 (543.54 it/sec) -training >> step=3108300, episode=519 reward=0.7607464 (542.63 it/sec) -training >> step=3108400, episode=519 reward=0.759792 (544.59 it/sec) -training >> step=3108500, episode=519 reward=0.7652021 (513.60 it/sec) -training >> step=3108600, episode=519 reward=0.7724383 (518.66 it/sec) -training >> step=3108700, episode=519 reward=0.7726455 (573.10 it/sec) -training >> step=3108800, episode=519 reward=0.7688982 (527.51 it/sec) -training >> step=3108900, episode=519 reward=0.7524031 (549.09 it/sec) -training >> step=3109000, episode=519 reward=0.7585237 (555.45 it/sec) -training >> step=3109100, episode=519 reward=0.7903275 (518.44 it/sec) -training >> step=3109200, episode=519 reward=0.7786295 (533.68 it/sec) -training >> step=3109300, episode=519 reward=0.7768237 (519.54 it/sec) -training >> step=3109400, episode=519 reward=0.7761574 (529.89 it/sec) -training >> step=3109500, episode=519 reward=0.7727011 (530.81 it/sec) -training >> step=3109600, episode=519 reward=0.7741959 (509.97 it/sec) -training >> step=3109700, episode=519 reward=0.7583059 (531.33 it/sec) -training >> step=3109800, episode=519 reward=0.7601728 (538.78 it/sec) -training >> step=3109900, episode=519 reward=0.7792912 (535.08 it/sec) -training >> step=3110000, episode=519 reward=0.7898895 (537.62 it/sec) -training >> step=3110100, episode=519 reward=0.7927078 (580.14 it/sec) -training >> step=3110200, episode=519 reward=0.7737029 (503.34 it/sec) -training >> step=3110300, episode=519 reward=0.7854056 (535.35 it/sec) -training >> step=3110400, episode=519 reward=0.7942051 (559.27 it/sec) -training >> step=3110500, episode=519 reward=0.7697667 (554.20 it/sec) -training >> step=3110600, episode=519 reward=0.7558873 (525.53 it/sec) -training >> step=3110700, episode=519 reward=0.7777959 (531.86 it/sec) -training >> step=3110800, episode=519 reward=0.7658674 (502.04 it/sec) -training >> step=3110900, episode=519 reward=0.7543823 (481.49 it/sec) -training >> step=3111000, episode=519 reward=0.7449546 (423.58 it/sec) -training >> step=3111100, episode=519 reward=0.75889 (435.67 it/sec) -training >> step=3111200, episode=519 reward=0.7767984 (435.24 it/sec) -training >> step=3111300, episode=519 reward=0.7500496 (522.97 it/sec) -training >> step=3111400, episode=519 reward=0.7775246 (515.44 it/sec) -training >> step=3111500, episode=519 reward=0.7599683 (575.61 it/sec) -training >> step=3111600, episode=519 reward=0.7744412 (536.17 it/sec) -training >> step=3111700, episode=519 reward=0.7761576 (515.23 it/sec) -training >> step=3111800, episode=519 reward=0.7681451 (549.64 it/sec) -training >> step=3111900, episode=519 reward=0.7676306 (553.34 it/sec) -training >> step=3112000, episode=519 reward=0.7706209 (534.43 it/sec) -training >> step=3112100, episode=519 reward=0.7838441 (544.04 it/sec) -training >> step=3112200, episode=519 reward=0.765405 (554.41 it/sec) -training >> step=3112300, episode=519 reward=0.7598996 (545.73 it/sec) -training >> step=3112400, episode=519 reward=0.772509 (551.25 it/sec) -training >> step=3112500, episode=519 reward=0.7439979 (560.03 it/sec) -training >> step=3112600, episode=519 reward=0.7704346 (568.42 it/sec) -training >> step=3112700, episode=519 reward=0.7686059 (516.50 it/sec) -training >> step=3112800, episode=519 reward=0.7570546 (541.09 it/sec) -training >> step=3112900, episode=519 reward=0.7650658 (532.17 it/sec) -training >> step=3113000, episode=519 reward=0.7535185 (550.35 it/sec) -training >> step=3113100, episode=519 reward=0.7592444 (536.97 it/sec) -training >> step=3113200, episode=519 reward=0.7591224 (524.22 it/sec) -training >> step=3113300, episode=520 reward=0.7720532 (119.14 it/sec) -training >> step=3113400, episode=520 reward=0.7718322 (531.53 it/sec) -training >> step=3113500, episode=520 reward=0.7545878 (539.94 it/sec) -training >> step=3113600, episode=520 reward=0.7647055 (514.60 it/sec) -training >> step=3113700, episode=520 reward=0.7657436 (519.29 it/sec) -training >> step=3113800, episode=520 reward=0.7655184 (514.05 it/sec) -training >> step=3113900, episode=520 reward=0.7630476 (506.45 it/sec) -training >> step=3114000, episode=520 reward=0.775276 (547.35 it/sec) -training >> step=3114100, episode=520 reward=0.781221 (493.14 it/sec) -training >> step=3114200, episode=520 reward=0.7792251 (533.96 it/sec) -training >> step=3114300, episode=520 reward=0.7666913 (535.47 it/sec) -training >> step=3114400, episode=520 reward=0.7710965 (540.03 it/sec) -training >> step=3114500, episode=520 reward=0.7714053 (516.25 it/sec) -training >> step=3114600, episode=520 reward=0.7785981 (526.34 it/sec) -training >> step=3114700, episode=520 reward=0.766425 (522.87 it/sec) -training >> step=3114800, episode=520 reward=0.7594607 (583.48 it/sec) -training >> step=3114900, episode=520 reward=0.7662856 (526.97 it/sec) -training >> step=3115000, episode=520 reward=0.7797725 (534.77 it/sec) -training >> step=3115100, episode=520 reward=0.7684723 (556.64 it/sec) -training >> step=3115200, episode=520 reward=0.7677399 (545.24 it/sec) -training >> step=3115300, episode=520 reward=0.7794023 (531.11 it/sec) -training >> step=3115400, episode=520 reward=0.7741728 (527.47 it/sec) -training >> step=3115500, episode=520 reward=0.7625508 (538.88 it/sec) -training >> step=3115600, episode=520 reward=0.7659035 (548.67 it/sec) -training >> step=3115700, episode=520 reward=0.7745444 (524.25 it/sec) -training >> step=3115800, episode=520 reward=0.7783881 (534.91 it/sec) -training >> step=3115900, episode=520 reward=0.7586786 (543.27 it/sec) -training >> step=3116000, episode=520 reward=0.7821563 (531.05 it/sec) -training >> step=3116100, episode=520 reward=0.7841954 (514.31 it/sec) -training >> step=3116200, episode=520 reward=0.7642809 (557.59 it/sec) -training >> step=3116300, episode=520 reward=0.7740557 (485.26 it/sec) -training >> step=3116400, episode=520 reward=0.7637183 (509.80 it/sec) -training >> step=3116500, episode=520 reward=0.7699506 (557.09 it/sec) -training >> step=3116600, episode=520 reward=0.7748984 (570.04 it/sec) -training >> step=3116700, episode=520 reward=0.8082458 (542.03 it/sec) -training >> step=3116800, episode=520 reward=0.7805815 (532.15 it/sec) -training >> step=3116900, episode=520 reward=0.7670791 (524.91 it/sec) -training >> step=3117000, episode=520 reward=0.7893764 (534.69 it/sec) -training >> step=3117100, episode=520 reward=0.7727095 (562.13 it/sec) -training >> step=3117200, episode=520 reward=0.7553806 (524.78 it/sec) -training >> step=3117300, episode=520 reward=0.7756127 (532.56 it/sec) -training >> step=3117400, episode=520 reward=0.7530427 (508.73 it/sec) -training >> step=3117500, episode=520 reward=0.7728989 (533.74 it/sec) -training >> step=3117600, episode=520 reward=0.7789054 (512.63 it/sec) -training >> step=3117700, episode=520 reward=0.7364525 (555.65 it/sec) -training >> step=3117800, episode=520 reward=0.7795365 (549.78 it/sec) -training >> step=3117900, episode=520 reward=0.7682661 (554.87 it/sec) -training >> step=3118000, episode=520 reward=0.7572393 (513.96 it/sec) -training >> step=3118100, episode=520 reward=0.7735915 (546.11 it/sec) -training >> step=3118200, episode=520 reward=0.755391 (552.93 it/sec) -training >> step=3118300, episode=520 reward=0.753802 (555.83 it/sec) -training >> step=3118400, episode=520 reward=0.77479 (532.34 it/sec) -training >> step=3118500, episode=520 reward=0.7771286 (494.65 it/sec) -training >> step=3118600, episode=520 reward=0.7442883 (543.63 it/sec) -training >> step=3118700, episode=520 reward=0.7603641 (512.10 it/sec) -training >> step=3118800, episode=520 reward=0.7743069 (536.12 it/sec) -training >> step=3118900, episode=520 reward=0.7752478 (545.27 it/sec) -training >> step=3119000, episode=520 reward=0.7648939 (553.15 it/sec) -training >> step=3119100, episode=520 reward=0.7730447 (565.77 it/sec) -training >> step=3119200, episode=520 reward=0.7817264 (531.69 it/sec) -training >> step=3119300, episode=521 reward=0.7749537 (138.23 it/sec) -training >> step=3119400, episode=521 reward=0.7683325 (514.91 it/sec) -training >> step=3119500, episode=521 reward=0.7722533 (516.38 it/sec) -training >> step=3119600, episode=521 reward=0.7486796 (482.46 it/sec) -training >> step=3119700, episode=521 reward=0.7565196 (497.47 it/sec) -training >> step=3119800, episode=521 reward=0.7799308 (570.56 it/sec) -training >> step=3119900, episode=521 reward=0.7808403 (527.61 it/sec) -training >> step=3120000, episode=521 reward=0.785842 (555.10 it/sec) -training >> step=3120100, episode=521 reward=0.774193 (561.44 it/sec) -training >> step=3120200, episode=521 reward=0.7760262 (535.31 it/sec) -training >> step=3120300, episode=521 reward=0.7612644 (528.28 it/sec) -training >> step=3120400, episode=521 reward=0.780124 (507.46 it/sec) -training >> step=3120500, episode=521 reward=0.7742933 (520.02 it/sec) -training >> step=3120600, episode=521 reward=0.7801864 (538.50 it/sec) -training >> step=3120700, episode=521 reward=0.7562054 (527.61 it/sec) -training >> step=3120800, episode=521 reward=0.7648694 (542.32 it/sec) -training >> step=3120900, episode=521 reward=0.7601379 (480.17 it/sec) -training >> step=3121000, episode=521 reward=0.7685497 (492.62 it/sec) -training >> step=3121100, episode=521 reward=0.7811817 (480.10 it/sec) -training >> step=3121200, episode=521 reward=0.7686075 (526.00 it/sec) -training >> step=3121300, episode=521 reward=0.7701922 (514.17 it/sec) -training >> step=3121400, episode=521 reward=0.7713404 (526.93 it/sec) -training >> step=3121500, episode=521 reward=0.7564128 (506.32 it/sec) -training >> step=3121600, episode=521 reward=0.7790845 (574.68 it/sec) -training >> step=3121700, episode=521 reward=0.7776517 (520.82 it/sec) -training >> step=3121800, episode=521 reward=0.7739869 (533.10 it/sec) -training >> step=3121900, episode=521 reward=0.7708017 (476.92 it/sec) -training >> step=3122000, episode=521 reward=0.7661797 (536.11 it/sec) -training >> step=3122100, episode=521 reward=0.7651621 (508.18 it/sec) -training >> step=3122200, episode=521 reward=0.7321823 (483.70 it/sec) -training >> step=3122300, episode=521 reward=0.7420906 (407.91 it/sec) -training >> step=3122400, episode=521 reward=0.7635532 (497.83 it/sec) -training >> step=3122500, episode=521 reward=0.7697192 (431.85 it/sec) -training >> step=3122600, episode=521 reward=0.7687353 (433.88 it/sec) -training >> step=3122700, episode=521 reward=0.7621932 (502.01 it/sec) -training >> step=3122800, episode=521 reward=0.7581386 (408.93 it/sec) -training >> step=3122900, episode=521 reward=0.7708014 (475.17 it/sec) -training >> step=3123000, episode=521 reward=0.7582515 (505.64 it/sec) -training >> step=3123100, episode=521 reward=0.7710352 (516.10 it/sec) -training >> step=3123200, episode=521 reward=0.7686176 (496.09 it/sec) -training >> step=3123300, episode=521 reward=0.7666262 (482.35 it/sec) -training >> step=3123400, episode=521 reward=0.7520541 (473.23 it/sec) -training >> step=3123500, episode=521 reward=0.7795656 (469.85 it/sec) -training >> step=3123600, episode=521 reward=0.7666277 (484.23 it/sec) -training >> step=3123700, episode=521 reward=0.7351825 (471.72 it/sec) -training >> step=3123800, episode=521 reward=0.7476591 (532.52 it/sec) -training >> step=3123900, episode=521 reward=0.7629943 (463.94 it/sec) -training >> step=3124000, episode=521 reward=0.7644089 (416.78 it/sec) -training >> step=3124100, episode=521 reward=0.7592392 (430.29 it/sec) -training >> step=3124200, episode=521 reward=0.7763872 (486.04 it/sec) -training >> step=3124300, episode=521 reward=0.7647925 (460.21 it/sec) -training >> step=3124400, episode=521 reward=0.7490385 (437.40 it/sec) -training >> step=3124500, episode=521 reward=0.769774 (463.24 it/sec) -training >> step=3124600, episode=521 reward=0.7815674 (476.34 it/sec) -training >> step=3124700, episode=521 reward=0.7698094 (490.00 it/sec) -training >> step=3124800, episode=521 reward=0.7621945 (457.66 it/sec) -training >> step=3124900, episode=521 reward=0.7734742 (521.80 it/sec) -training >> step=3125000, episode=521 reward=0.7659912 (454.86 it/sec) -training >> step=3125100, episode=521 reward=0.7611113 (484.46 it/sec) -training >> step=3125200, episode=521 reward=0.770618 (490.82 it/sec) -training >> step=3125300, episode=522 reward=0.7478841 (91.64 it/sec) -training >> step=3125400, episode=522 reward=0.7570871 (471.44 it/sec) -training >> step=3125500, episode=522 reward=0.76241 (406.92 it/sec) -training >> step=3125600, episode=522 reward=0.750379 (513.66 it/sec) -training >> step=3125700, episode=522 reward=0.756175 (451.15 it/sec) -training >> step=3125800, episode=522 reward=0.7573019 (491.46 it/sec) -training >> step=3125900, episode=522 reward=0.7802363 (510.44 it/sec) -training >> step=3126000, episode=522 reward=0.7561155 (483.45 it/sec) -training >> step=3126100, episode=522 reward=0.7513476 (482.12 it/sec) -training >> step=3126200, episode=522 reward=0.7636227 (483.38 it/sec) -training >> step=3126300, episode=522 reward=0.7749865 (499.74 it/sec) -training >> step=3126400, episode=522 reward=0.771282 (499.22 it/sec) -training >> step=3126500, episode=522 reward=0.7616122 (480.08 it/sec) -training >> step=3126600, episode=522 reward=0.7747855 (525.91 it/sec) -training >> step=3126700, episode=522 reward=0.7619693 (488.60 it/sec) -training >> step=3126800, episode=522 reward=0.7741043 (548.61 it/sec) -training >> step=3126900, episode=522 reward=0.7778286 (493.85 it/sec) -training >> step=3127000, episode=522 reward=0.7887551 (507.42 it/sec) -training >> step=3127100, episode=522 reward=0.7615183 (496.05 it/sec) -training >> step=3127200, episode=522 reward=0.7694677 (522.10 it/sec) -training >> step=3127300, episode=522 reward=0.763594 (518.36 it/sec) -training >> step=3127400, episode=522 reward=0.7576438 (520.26 it/sec) -training >> step=3127500, episode=522 reward=0.7768661 (485.93 it/sec) -training >> step=3127600, episode=522 reward=0.7722785 (533.49 it/sec) -training >> step=3127700, episode=522 reward=0.767198 (534.09 it/sec) -training >> step=3127800, episode=522 reward=0.7557368 (506.59 it/sec) -training >> step=3127900, episode=522 reward=0.7551153 (478.95 it/sec) -training >> step=3128000, episode=522 reward=0.7764263 (524.48 it/sec) -training >> step=3128100, episode=522 reward=0.8011691 (448.70 it/sec) -training >> step=3128200, episode=522 reward=0.786273 (448.84 it/sec) -training >> step=3128300, episode=522 reward=0.7589623 (491.71 it/sec) -training >> step=3128400, episode=522 reward=0.7815849 (494.45 it/sec) -training >> step=3128500, episode=522 reward=0.7776546 (467.75 it/sec) -training >> step=3128600, episode=522 reward=0.7704198 (448.41 it/sec) -training >> step=3128700, episode=522 reward=0.7450131 (509.73 it/sec) -training >> step=3128800, episode=522 reward=0.758388 (463.74 it/sec) -training >> step=3128900, episode=522 reward=0.7442451 (441.29 it/sec) -training >> step=3129000, episode=522 reward=0.760474 (443.96 it/sec) -training >> step=3129100, episode=522 reward=0.7707421 (498.62 it/sec) -training >> step=3129200, episode=522 reward=0.7636696 (449.75 it/sec) -training >> step=3129300, episode=522 reward=0.7696853 (474.48 it/sec) -training >> step=3129400, episode=522 reward=0.7853751 (459.01 it/sec) -training >> step=3129500, episode=522 reward=0.7744083 (466.92 it/sec) -training >> step=3129600, episode=522 reward=0.7549295 (471.39 it/sec) -training >> step=3129700, episode=522 reward=0.7674979 (496.26 it/sec) -training >> step=3129800, episode=522 reward=0.7751064 (470.59 it/sec) -training >> step=3129900, episode=522 reward=0.7588085 (456.94 it/sec) -training >> step=3130000, episode=522 reward=0.7779671 (463.54 it/sec) -training >> step=3130100, episode=522 reward=0.7769524 (446.57 it/sec) -training >> step=3130200, episode=522 reward=0.7556572 (463.70 it/sec) -training >> step=3130300, episode=522 reward=0.7877371 (460.68 it/sec) -training >> step=3130400, episode=522 reward=0.7477685 (455.32 it/sec) -training >> step=3130500, episode=522 reward=0.7611485 (484.16 it/sec) -training >> step=3130600, episode=522 reward=0.7502256 (492.02 it/sec) -training >> step=3130700, episode=522 reward=0.7683788 (455.07 it/sec) -training >> step=3130800, episode=522 reward=0.7681852 (449.95 it/sec) -training >> step=3130900, episode=522 reward=0.7657333 (529.38 it/sec) -training >> step=3131000, episode=522 reward=0.7601806 (454.19 it/sec) -training >> step=3131100, episode=522 reward=0.7697881 (482.29 it/sec) -training >> step=3131200, episode=522 reward=0.7673066 (476.12 it/sec) -training >> step=3131300, episode=523 reward=0.7834246 (94.00 it/sec) -training >> step=3131400, episode=523 reward=0.7636582 (493.97 it/sec) -training >> step=3131500, episode=523 reward=0.7594495 (438.98 it/sec) -training >> step=3131600, episode=523 reward=0.7704182 (489.81 it/sec) -training >> step=3131700, episode=523 reward=0.7742174 (504.58 it/sec) -training >> step=3131800, episode=523 reward=0.7622588 (457.31 it/sec) -training >> step=3131900, episode=523 reward=0.7842664 (534.45 it/sec) -training >> step=3132000, episode=523 reward=0.7835085 (480.42 it/sec) -training >> step=3132100, episode=523 reward=0.7550589 (498.50 it/sec) -training >> step=3132200, episode=523 reward=0.7470858 (535.13 it/sec) -training >> step=3132300, episode=523 reward=0.7578872 (498.77 it/sec) -training >> step=3132400, episode=523 reward=0.7915618 (497.28 it/sec) -training >> step=3132500, episode=523 reward=0.7654302 (475.49 it/sec) -training >> step=3132600, episode=523 reward=0.7751815 (497.16 it/sec) -training >> step=3132700, episode=523 reward=0.7810367 (515.09 it/sec) -training >> step=3132800, episode=523 reward=0.7531904 (486.75 it/sec) -training >> step=3132900, episode=523 reward=0.7498825 (489.20 it/sec) -training >> step=3133000, episode=523 reward=0.7824568 (488.60 it/sec) -training >> step=3133100, episode=523 reward=0.7862231 (485.30 it/sec) -training >> step=3133200, episode=523 reward=0.76197 (501.59 it/sec) -training >> step=3133300, episode=523 reward=0.7773477 (477.03 it/sec) -training >> step=3133400, episode=523 reward=0.7825544 (481.01 it/sec) -training >> step=3133500, episode=523 reward=0.7576896 (526.39 it/sec) -training >> step=3133600, episode=523 reward=0.7503516 (490.64 it/sec) -training >> step=3133700, episode=523 reward=0.7511079 (545.39 it/sec) -training >> step=3133800, episode=523 reward=0.7788424 (512.80 it/sec) -training >> step=3133900, episode=523 reward=0.7621493 (513.16 it/sec) -training >> step=3134000, episode=523 reward=0.7756248 (556.00 it/sec) -training >> step=3134100, episode=523 reward=0.7847915 (457.43 it/sec) -training >> step=3134200, episode=523 reward=0.7722165 (482.47 it/sec) -training >> step=3134300, episode=523 reward=0.7633371 (508.99 it/sec) -training >> step=3134400, episode=523 reward=0.7980257 (538.81 it/sec) -training >> step=3134500, episode=523 reward=0.7746732 (498.84 it/sec) -training >> step=3134600, episode=523 reward=0.7558953 (519.66 it/sec) -training >> step=3134700, episode=523 reward=0.7752361 (529.44 it/sec) -training >> step=3134800, episode=523 reward=0.7664808 (537.19 it/sec) -training >> step=3134900, episode=523 reward=0.7533487 (488.92 it/sec) -training >> step=3135000, episode=523 reward=0.7687749 (547.95 it/sec) -training >> step=3135100, episode=523 reward=0.7690179 (496.43 it/sec) -training >> step=3135200, episode=523 reward=0.747215 (535.57 it/sec) -training >> step=3135300, episode=523 reward=0.776567 (501.87 it/sec) -training >> step=3135400, episode=523 reward=0.7606466 (457.46 it/sec) -training >> step=3135500, episode=523 reward=0.7582306 (511.31 it/sec) -training >> step=3135600, episode=523 reward=0.7874463 (462.92 it/sec) -training >> step=3135700, episode=523 reward=0.7760676 (438.62 it/sec) -training >> step=3135800, episode=523 reward=0.7600057 (539.26 it/sec) -training >> step=3135900, episode=523 reward=0.7491945 (496.05 it/sec) -training >> step=3136000, episode=523 reward=0.7844791 (512.46 it/sec) -training >> step=3136100, episode=523 reward=0.7573651 (469.48 it/sec) -training >> step=3136200, episode=523 reward=0.7724045 (492.20 it/sec) -training >> step=3136300, episode=523 reward=0.7424291 (462.89 it/sec) -training >> step=3136400, episode=523 reward=0.7834799 (522.31 it/sec) -training >> step=3136500, episode=523 reward=0.78462 (498.52 it/sec) -training >> step=3136600, episode=523 reward=0.751448 (480.70 it/sec) -training >> step=3136700, episode=523 reward=0.7657893 (472.50 it/sec) -training >> step=3136800, episode=523 reward=0.7613512 (483.00 it/sec) -training >> step=3136900, episode=523 reward=0.7599244 (506.51 it/sec) -training >> step=3137000, episode=523 reward=0.7685646 (508.07 it/sec) -training >> step=3137100, episode=523 reward=0.7613533 (496.54 it/sec) -training >> step=3137200, episode=523 reward=0.7628503 (462.36 it/sec) -training >> step=3137300, episode=524 reward=0.7593104 (78.89 it/sec) -training >> step=3137400, episode=524 reward=0.7786412 (476.56 it/sec) -training >> step=3137500, episode=524 reward=0.7457428 (563.56 it/sec) -training >> step=3137600, episode=524 reward=0.7600343 (470.71 it/sec) -training >> step=3137700, episode=524 reward=0.76419 (425.62 it/sec) -training >> step=3137800, episode=524 reward=0.7805145 (481.75 it/sec) -training >> step=3137900, episode=524 reward=0.74801 (482.46 it/sec) -training >> step=3138000, episode=524 reward=0.7675402 (470.17 it/sec) -training >> step=3138100, episode=524 reward=0.7664765 (466.14 it/sec) -training >> step=3138200, episode=524 reward=0.7541165 (480.63 it/sec) -training >> step=3138300, episode=524 reward=0.7758211 (497.13 it/sec) -training >> step=3138400, episode=524 reward=0.7731372 (514.64 it/sec) -training >> step=3138500, episode=524 reward=0.7833399 (532.89 it/sec) -training >> step=3138600, episode=524 reward=0.7768629 (519.03 it/sec) -training >> step=3138700, episode=524 reward=0.7615926 (490.54 it/sec) -training >> step=3138800, episode=524 reward=0.7683991 (486.92 it/sec) -training >> step=3138900, episode=524 reward=0.7848709 (484.60 it/sec) -training >> step=3139000, episode=524 reward=0.7633063 (428.44 it/sec) -training >> step=3139100, episode=524 reward=0.7743684 (486.14 it/sec) -training >> step=3139200, episode=524 reward=0.7694462 (552.63 it/sec) -training >> step=3139300, episode=524 reward=0.7481821 (535.86 it/sec) -training >> step=3139400, episode=524 reward=0.7579519 (520.83 it/sec) -training >> step=3139500, episode=524 reward=0.759046 (501.20 it/sec) -training >> step=3139600, episode=524 reward=0.7922289 (488.67 it/sec) -training >> step=3139700, episode=524 reward=0.7934233 (503.33 it/sec) -training >> step=3139800, episode=524 reward=0.7750832 (512.71 it/sec) -training >> step=3139900, episode=524 reward=0.7524572 (518.91 it/sec) -training >> step=3140000, episode=524 reward=0.7644837 (530.05 it/sec) -training >> step=3140100, episode=524 reward=0.7843855 (498.09 it/sec) -training >> step=3140200, episode=524 reward=0.7483687 (487.10 it/sec) -training >> step=3140300, episode=524 reward=0.7850404 (534.89 it/sec) -training >> step=3140400, episode=524 reward=0.7920059 (513.85 it/sec) -training >> step=3140500, episode=524 reward=0.7569667 (510.61 it/sec) -training >> step=3140600, episode=524 reward=0.7528577 (476.27 it/sec) -training >> step=3140700, episode=524 reward=0.7838477 (507.04 it/sec) -training >> step=3140800, episode=524 reward=0.7771401 (523.36 it/sec) -training >> step=3140900, episode=524 reward=0.7915117 (545.52 it/sec) -training >> step=3141000, episode=524 reward=0.770862 (537.37 it/sec) -training >> step=3141100, episode=524 reward=0.7784995 (535.07 it/sec) -training >> step=3141200, episode=524 reward=0.7738248 (495.45 it/sec) -training >> step=3141300, episode=524 reward=0.7880381 (549.21 it/sec) -training >> step=3141400, episode=524 reward=0.7790791 (530.61 it/sec) -training >> step=3141500, episode=524 reward=0.7487903 (523.40 it/sec) -training >> step=3141600, episode=524 reward=0.7616675 (482.91 it/sec) -training >> step=3141700, episode=524 reward=0.7638232 (544.97 it/sec) -training >> step=3141800, episode=524 reward=0.7771112 (482.82 it/sec) -training >> step=3141900, episode=524 reward=0.7590176 (501.08 it/sec) -training >> step=3142000, episode=524 reward=0.7733867 (503.93 it/sec) -training >> step=3142100, episode=524 reward=0.7743569 (530.79 it/sec) -training >> step=3142200, episode=524 reward=0.7692491 (457.88 it/sec) -training >> step=3142300, episode=524 reward=0.7626215 (495.41 it/sec) -training >> step=3142400, episode=524 reward=0.7618535 (502.30 it/sec) -training >> step=3142500, episode=524 reward=0.7614 (493.13 it/sec) -training >> step=3142600, episode=524 reward=0.7694779 (529.01 it/sec) -training >> step=3142700, episode=524 reward=0.7651998 (523.53 it/sec) -training >> step=3142800, episode=524 reward=0.7628856 (506.55 it/sec) -training >> step=3142900, episode=524 reward=0.7315426 (497.92 it/sec) -training >> step=3143000, episode=524 reward=0.7653701 (494.07 it/sec) -training >> step=3143100, episode=524 reward=0.7600281 (559.16 it/sec) -training >> step=3143200, episode=524 reward=0.7607392 (528.50 it/sec) -training >> step=3143300, episode=525 reward=0.773953 (87.23 it/sec) -training >> step=3143400, episode=525 reward=0.7751147 (489.43 it/sec) -training >> step=3143500, episode=525 reward=0.7745725 (548.64 it/sec) -training >> step=3143600, episode=525 reward=0.7681253 (526.59 it/sec) -training >> step=3143700, episode=525 reward=0.7637313 (526.42 it/sec) -training >> step=3143800, episode=525 reward=0.7725934 (544.09 it/sec) -training >> step=3143900, episode=525 reward=0.7533827 (509.38 it/sec) -training >> step=3144000, episode=525 reward=0.7582857 (501.01 it/sec) -training >> step=3144100, episode=525 reward=0.782957 (555.40 it/sec) -training >> step=3144200, episode=525 reward=0.7694134 (512.40 it/sec) -training >> step=3144300, episode=525 reward=0.8038288 (528.93 it/sec) -training >> step=3144400, episode=525 reward=0.7800467 (512.49 it/sec) -training >> step=3144500, episode=525 reward=0.7821497 (538.10 it/sec) -training >> step=3144600, episode=525 reward=0.7708864 (530.21 it/sec) -training >> step=3144700, episode=525 reward=0.7849733 (514.11 it/sec) -training >> step=3144800, episode=525 reward=0.7679062 (570.94 it/sec) -training >> step=3144900, episode=525 reward=0.7782958 (510.24 it/sec) -training >> step=3145000, episode=525 reward=0.7813364 (457.90 it/sec) -training >> step=3145100, episode=525 reward=0.7842983 (483.64 it/sec) -training >> step=3145200, episode=525 reward=0.7623613 (532.00 it/sec) -training >> step=3145300, episode=525 reward=0.7694395 (502.42 it/sec) -training >> step=3145400, episode=525 reward=0.7716753 (522.81 it/sec) -training >> step=3145500, episode=525 reward=0.7750256 (510.72 it/sec) -training >> step=3145600, episode=525 reward=0.7900169 (538.96 it/sec) -training >> step=3145700, episode=525 reward=0.7892403 (495.99 it/sec) -training >> step=3145800, episode=525 reward=0.7841665 (505.83 it/sec) -training >> step=3145900, episode=525 reward=0.7657313 (510.34 it/sec) -training >> step=3146000, episode=525 reward=0.7636974 (525.89 it/sec) -training >> step=3146100, episode=525 reward=0.7883217 (493.71 it/sec) -training >> step=3146200, episode=525 reward=0.7771509 (478.54 it/sec) -training >> step=3146300, episode=525 reward=0.7448598 (535.31 it/sec) -training >> step=3146400, episode=525 reward=0.8017728 (540.39 it/sec) -training >> step=3146500, episode=525 reward=0.77425 (499.52 it/sec) -training >> step=3146600, episode=525 reward=0.771374 (493.10 it/sec) -training >> step=3146700, episode=525 reward=0.76458 (486.89 it/sec) -training >> step=3146800, episode=525 reward=0.7699987 (493.28 it/sec) -training >> step=3146900, episode=525 reward=0.7814773 (513.37 it/sec) -training >> step=3147000, episode=525 reward=0.7588407 (530.44 it/sec) -training >> step=3147100, episode=525 reward=0.7663991 (467.40 it/sec) -training >> step=3147200, episode=525 reward=0.7693902 (523.14 it/sec) -training >> step=3147300, episode=525 reward=0.7610201 (523.23 it/sec) -training >> step=3147400, episode=525 reward=0.7676727 (538.21 it/sec) -training >> step=3147500, episode=525 reward=0.7801198 (420.47 it/sec) -training >> step=3147600, episode=525 reward=0.7716644 (398.59 it/sec) -training >> step=3147700, episode=525 reward=0.7735263 (435.35 it/sec) -training >> step=3147800, episode=525 reward=0.7637881 (412.20 it/sec) -training >> step=3147900, episode=525 reward=0.7747602 (406.27 it/sec) -training >> step=3148000, episode=525 reward=0.7760372 (408.88 it/sec) -training >> step=3148100, episode=525 reward=0.7429938 (450.23 it/sec) -training >> step=3148200, episode=525 reward=0.7528399 (530.76 it/sec) -training >> step=3148300, episode=525 reward=0.7862181 (442.75 it/sec) -training >> step=3148400, episode=525 reward=0.7674643 (484.69 it/sec) -training >> step=3148500, episode=525 reward=0.7510472 (466.86 it/sec) -training >> step=3148600, episode=525 reward=0.7587259 (442.55 it/sec) -training >> step=3148700, episode=525 reward=0.7658654 (464.28 it/sec) -training >> step=3148800, episode=525 reward=0.7717074 (459.01 it/sec) -training >> step=3148900, episode=525 reward=0.7763298 (471.48 it/sec) -training >> step=3149000, episode=525 reward=0.766992 (477.83 it/sec) -training >> step=3149100, episode=525 reward=0.7803448 (525.07 it/sec) -training >> step=3149200, episode=525 reward=0.7729121 (533.20 it/sec) -training >> step=3149300, episode=526 reward=0.7851709 (60.75 it/sec) -training >> step=3149400, episode=526 reward=0.7760548 (503.54 it/sec) -training >> step=3149500, episode=526 reward=0.7566652 (480.71 it/sec) -training >> step=3149600, episode=526 reward=0.7680264 (535.03 it/sec) -training >> step=3149700, episode=526 reward=0.7639323 (543.97 it/sec) -training >> step=3149800, episode=526 reward=0.7591105 (537.25 it/sec) -training >> step=3149900, episode=526 reward=0.7886634 (425.49 it/sec) -training >> step=3150000, episode=526 reward=0.7750531 (476.04 it/sec) -training >> step=3150100, episode=526 reward=0.7825496 (497.03 it/sec) -training >> step=3150200, episode=526 reward=0.7730429 (442.11 it/sec) -training >> step=3150300, episode=526 reward=0.7675784 (435.41 it/sec) -training >> step=3150400, episode=526 reward=0.773322 (546.65 it/sec) -training >> step=3150500, episode=526 reward=0.7812085 (526.83 it/sec) -training >> step=3150600, episode=526 reward=0.769866 (506.46 it/sec) -training >> step=3150700, episode=526 reward=0.7590894 (567.55 it/sec) -training >> step=3150800, episode=526 reward=0.7766789 (553.74 it/sec) -training >> step=3150900, episode=526 reward=0.7726452 (508.94 it/sec) -training >> step=3151000, episode=526 reward=0.7632403 (522.59 it/sec) -training >> step=3151100, episode=526 reward=0.7692563 (508.57 it/sec) -training >> step=3151200, episode=526 reward=0.7707388 (498.17 it/sec) -training >> step=3151300, episode=526 reward=0.7665539 (490.88 it/sec) -training >> step=3151400, episode=526 reward=0.772561 (506.03 it/sec) -training >> step=3151500, episode=526 reward=0.7639908 (530.89 it/sec) -training >> step=3151600, episode=526 reward=0.7637493 (535.31 it/sec) -training >> step=3151700, episode=526 reward=0.7821071 (479.56 it/sec) -training >> step=3151800, episode=526 reward=0.7645091 (513.05 it/sec) -training >> step=3151900, episode=526 reward=0.7637968 (474.19 it/sec) -training >> step=3152000, episode=526 reward=0.7659243 (492.48 it/sec) -training >> step=3152100, episode=526 reward=0.7568277 (492.41 it/sec) -training >> step=3152200, episode=526 reward=0.7522442 (527.75 it/sec) -training >> step=3152300, episode=526 reward=0.767646 (504.84 it/sec) -training >> step=3152400, episode=526 reward=0.7866825 (480.95 it/sec) -training >> step=3152500, episode=526 reward=0.7616323 (485.26 it/sec) -training >> step=3152600, episode=526 reward=0.7696669 (481.75 it/sec) -training >> step=3152700, episode=526 reward=0.7634175 (555.00 it/sec) -training >> step=3152800, episode=526 reward=0.7698238 (541.60 it/sec) -training >> step=3152900, episode=526 reward=0.7771502 (541.87 it/sec) -training >> step=3153000, episode=526 reward=0.7622943 (487.25 it/sec) -training >> step=3153100, episode=526 reward=0.7521768 (525.49 it/sec) -training >> step=3153200, episode=526 reward=0.7700523 (501.30 it/sec) -training >> step=3153300, episode=526 reward=0.7734507 (517.64 it/sec) -training >> step=3153400, episode=526 reward=0.7567663 (523.40 it/sec) -training >> step=3153500, episode=526 reward=0.7652299 (501.35 it/sec) -training >> step=3153600, episode=526 reward=0.7577596 (505.40 it/sec) -training >> step=3153700, episode=526 reward=0.7778363 (493.59 it/sec) -training >> step=3153800, episode=526 reward=0.7556536 (496.93 it/sec) -training >> step=3153900, episode=526 reward=0.7558388 (532.57 it/sec) -training >> step=3154000, episode=526 reward=0.7636457 (505.30 it/sec) -training >> step=3154100, episode=526 reward=0.7597485 (478.17 it/sec) -training >> step=3154200, episode=526 reward=0.7954424 (517.46 it/sec) -training >> step=3154300, episode=526 reward=0.7556042 (516.02 it/sec) -training >> step=3154400, episode=526 reward=0.7693202 (547.91 it/sec) -training >> step=3154500, episode=526 reward=0.7808831 (495.20 it/sec) -training >> step=3154600, episode=526 reward=0.762062 (474.45 it/sec) -training >> step=3154700, episode=526 reward=0.7443964 (505.50 it/sec) -training >> step=3154800, episode=526 reward=0.7489929 (505.32 it/sec) -training >> step=3154900, episode=526 reward=0.7643886 (521.23 it/sec) -training >> step=3155000, episode=526 reward=0.775546 (523.61 it/sec) -training >> step=3155100, episode=526 reward=0.7688834 (500.82 it/sec) -training >> step=3155200, episode=526 reward=0.730217 (475.00 it/sec) -training >> step=3155300, episode=527 reward=0.7700005 (120.66 it/sec) -training >> step=3155400, episode=527 reward=0.7715713 (530.19 it/sec) -training >> step=3155500, episode=527 reward=0.7579249 (504.41 it/sec) -training >> step=3155600, episode=527 reward=0.7670406 (515.05 it/sec) -training >> step=3155700, episode=527 reward=0.7528987 (513.43 it/sec) -training >> step=3155800, episode=527 reward=0.7673084 (496.35 it/sec) -training >> step=3155900, episode=527 reward=0.7550716 (507.74 it/sec) -training >> step=3156000, episode=527 reward=0.763118 (499.95 it/sec) -training >> step=3156100, episode=527 reward=0.7802593 (500.98 it/sec) -training >> step=3156200, episode=527 reward=0.7640758 (512.87 it/sec) -training >> step=3156300, episode=527 reward=0.7777499 (542.18 it/sec) -training >> step=3156400, episode=527 reward=0.7775461 (474.76 it/sec) -training >> step=3156500, episode=527 reward=0.7581092 (508.68 it/sec) -training >> step=3156600, episode=527 reward=0.77363 (485.63 it/sec) -training >> step=3156700, episode=527 reward=0.7454947 (524.96 it/sec) -training >> step=3156800, episode=527 reward=0.7836269 (494.83 it/sec) -training >> step=3156900, episode=527 reward=0.7516528 (511.63 it/sec) -training >> step=3157000, episode=527 reward=0.7823696 (522.10 it/sec) -training >> step=3157100, episode=527 reward=0.7757027 (511.60 it/sec) -training >> step=3157200, episode=527 reward=0.7664826 (531.61 it/sec) -training >> step=3157300, episode=527 reward=0.7734647 (570.59 it/sec) -training >> step=3157400, episode=527 reward=0.7710831 (534.67 it/sec) -training >> step=3157500, episode=527 reward=0.797753 (562.84 it/sec) -training >> step=3157600, episode=527 reward=0.7873856 (559.14 it/sec) -training >> step=3157700, episode=527 reward=0.7746786 (521.24 it/sec) -training >> step=3157800, episode=527 reward=0.7723801 (504.27 it/sec) -training >> step=3157900, episode=527 reward=0.754512 (525.36 it/sec) -training >> step=3158000, episode=527 reward=0.777508 (552.95 it/sec) -training >> step=3158100, episode=527 reward=0.7721537 (547.29 it/sec) -training >> step=3158200, episode=527 reward=0.7508884 (541.98 it/sec) -training >> step=3158300, episode=527 reward=0.7831112 (530.50 it/sec) -training >> step=3158400, episode=527 reward=0.7778473 (518.66 it/sec) -training >> step=3158500, episode=527 reward=0.7615665 (491.37 it/sec) -training >> step=3158600, episode=527 reward=0.7706812 (537.04 it/sec) -training >> step=3158700, episode=527 reward=0.7684592 (574.13 it/sec) -training >> step=3158800, episode=527 reward=0.7675606 (523.47 it/sec) -training >> step=3158900, episode=527 reward=0.7511865 (522.43 it/sec) -training >> step=3159000, episode=527 reward=0.7817762 (541.73 it/sec) -training >> step=3159100, episode=527 reward=0.761009 (487.54 it/sec) -training >> step=3159200, episode=527 reward=0.7760829 (511.45 it/sec) -training >> step=3159300, episode=527 reward=0.7803431 (520.64 it/sec) -training >> step=3159400, episode=527 reward=0.7653873 (553.53 it/sec) -training >> step=3159500, episode=527 reward=0.7842935 (522.61 it/sec) -training >> step=3159600, episode=527 reward=0.7847233 (515.52 it/sec) -training >> step=3159700, episode=527 reward=0.7528253 (535.49 it/sec) -training >> step=3159800, episode=527 reward=0.7531479 (533.87 it/sec) -training >> step=3159900, episode=527 reward=0.7557979 (538.93 it/sec) -training >> step=3160000, episode=527 reward=0.7736999 (504.97 it/sec) -training >> step=3160100, episode=527 reward=0.7687664 (521.67 it/sec) -training >> step=3160200, episode=527 reward=0.7630762 (524.44 it/sec) -training >> step=3160300, episode=527 reward=0.7663361 (536.31 it/sec) -training >> step=3160400, episode=527 reward=0.7644966 (530.17 it/sec) -training >> step=3160500, episode=527 reward=0.768214 (557.35 it/sec) -training >> step=3160600, episode=527 reward=0.7482849 (512.71 it/sec) -training >> step=3160700, episode=527 reward=0.7584344 (504.25 it/sec) -training >> step=3160800, episode=527 reward=0.7420378 (535.55 it/sec) -training >> step=3160900, episode=527 reward=0.7432362 (524.92 it/sec) -training >> step=3161000, episode=527 reward=0.7530537 (558.39 it/sec) -training >> step=3161100, episode=527 reward=0.7710512 (551.29 it/sec) -training >> step=3161200, episode=527 reward=0.757587 (512.43 it/sec) -training >> step=3161300, episode=528 reward=0.7748752 (104.53 it/sec) -training >> step=3161400, episode=528 reward=0.7734777 (492.67 it/sec) -training >> step=3161500, episode=528 reward=0.7546015 (535.25 it/sec) -training >> step=3161600, episode=528 reward=0.7636278 (540.35 it/sec) -training >> step=3161700, episode=528 reward=0.7926388 (530.94 it/sec) -training >> step=3161800, episode=528 reward=0.7666731 (532.54 it/sec) -training >> step=3161900, episode=528 reward=0.768496 (474.96 it/sec) -training >> step=3162000, episode=528 reward=0.7595375 (545.98 it/sec) -training >> step=3162100, episode=528 reward=0.7833292 (527.07 it/sec) -training >> step=3162200, episode=528 reward=0.7661209 (561.90 it/sec) -training >> step=3162300, episode=528 reward=0.7779039 (549.49 it/sec) -training >> step=3162400, episode=528 reward=0.7641555 (537.35 it/sec) -training >> step=3162500, episode=528 reward=0.7551002 (497.59 it/sec) -training >> step=3162600, episode=528 reward=0.7661248 (514.08 it/sec) -training >> step=3162700, episode=528 reward=0.7841607 (525.51 it/sec) -training >> step=3162800, episode=528 reward=0.7815014 (526.37 it/sec) -training >> step=3162900, episode=528 reward=0.7612307 (554.57 it/sec) -training >> step=3163000, episode=528 reward=0.7866573 (493.86 it/sec) -training >> step=3163100, episode=528 reward=0.7827477 (532.10 it/sec) -training >> step=3163200, episode=528 reward=0.7796023 (526.29 it/sec) -training >> step=3163300, episode=528 reward=0.7579752 (543.06 it/sec) -training >> step=3163400, episode=528 reward=0.7632937 (533.57 it/sec) -training >> step=3163500, episode=528 reward=0.7620424 (535.99 it/sec) -training >> step=3163600, episode=528 reward=0.7741585 (426.76 it/sec) -training >> step=3163700, episode=528 reward=0.7881843 (529.55 it/sec) -training >> step=3163800, episode=528 reward=0.7931557 (501.22 it/sec) -training >> step=3163900, episode=528 reward=0.7494589 (508.88 it/sec) -training >> step=3164000, episode=528 reward=0.7656859 (553.10 it/sec) -training >> step=3164100, episode=528 reward=0.7749397 (505.22 it/sec) -training >> step=3164200, episode=528 reward=0.788056 (540.49 it/sec) -training >> step=3164300, episode=528 reward=0.7580495 (562.03 it/sec) -training >> step=3164400, episode=528 reward=0.7586559 (515.08 it/sec) -training >> step=3164500, episode=528 reward=0.7813191 (537.76 it/sec) -training >> step=3164600, episode=528 reward=0.772029 (533.30 it/sec) -training >> step=3164700, episode=528 reward=0.7853329 (552.76 it/sec) -training >> step=3164800, episode=528 reward=0.7648895 (540.11 it/sec) -training >> step=3164900, episode=528 reward=0.7683285 (522.14 it/sec) -training >> step=3165000, episode=528 reward=0.7571764 (546.86 it/sec) -training >> step=3165100, episode=528 reward=0.76505 (524.83 it/sec) -training >> step=3165200, episode=528 reward=0.7783738 (514.88 it/sec) -training >> step=3165300, episode=528 reward=0.7608135 (517.91 it/sec) -training >> step=3165400, episode=528 reward=0.7657711 (567.03 it/sec) -training >> step=3165500, episode=528 reward=0.7696991 (505.80 it/sec) -training >> step=3165600, episode=528 reward=0.7735603 (525.20 it/sec) -training >> step=3165700, episode=528 reward=0.7529717 (507.94 it/sec) -training >> step=3165800, episode=528 reward=0.7434711 (538.96 it/sec) -training >> step=3165900, episode=528 reward=0.7641618 (533.12 it/sec) -training >> step=3166000, episode=528 reward=0.7556666 (505.04 it/sec) -training >> step=3166100, episode=528 reward=0.754096 (592.48 it/sec) -training >> step=3166200, episode=528 reward=0.7683871 (533.48 it/sec) -training >> step=3166300, episode=528 reward=0.7698649 (514.90 it/sec) -training >> step=3166400, episode=528 reward=0.759253 (558.05 it/sec) -training >> step=3166500, episode=528 reward=0.7700419 (547.94 it/sec) -training >> step=3166600, episode=528 reward=0.7375014 (213.73 it/sec) -training >> step=3166700, episode=528 reward=0.761435 (442.63 it/sec) -training >> step=3166800, episode=528 reward=0.7764966 (522.07 it/sec) -training >> step=3166900, episode=528 reward=0.7526715 (476.50 it/sec) -training >> step=3167000, episode=528 reward=0.7662414 (474.35 it/sec) -training >> step=3167100, episode=528 reward=0.7782266 (493.75 it/sec) -training >> step=3167200, episode=528 reward=0.7559281 (505.13 it/sec) -training >> step=3167300, episode=529 reward=0.7658152 (86.89 it/sec) -training >> step=3167400, episode=529 reward=0.755241 (527.62 it/sec) -training >> step=3167500, episode=529 reward=0.7494417 (515.99 it/sec) -training >> step=3167600, episode=529 reward=0.7637128 (524.42 it/sec) -training >> step=3167700, episode=529 reward=0.8075072 (505.07 it/sec) -training >> step=3167800, episode=529 reward=0.7531167 (541.56 it/sec) -training >> step=3167900, episode=529 reward=0.7744368 (533.34 it/sec) -training >> step=3168000, episode=529 reward=0.7762473 (524.27 it/sec) -training >> step=3168100, episode=529 reward=0.7566869 (515.06 it/sec) -training >> step=3168200, episode=529 reward=0.7669851 (534.02 it/sec) -training >> step=3168300, episode=529 reward=0.7622555 (480.49 it/sec) -training >> step=3168400, episode=529 reward=0.7746946 (526.75 it/sec) -training >> step=3168500, episode=529 reward=0.7707298 (523.45 it/sec) -training >> step=3168600, episode=529 reward=0.7538163 (517.29 it/sec) -training >> step=3168700, episode=529 reward=0.7674292 (510.75 it/sec) -training >> step=3168800, episode=529 reward=0.7606061 (520.08 it/sec) -training >> step=3168900, episode=529 reward=0.7479779 (535.92 it/sec) -training >> step=3169000, episode=529 reward=0.7844421 (501.76 it/sec) -training >> step=3169100, episode=529 reward=0.7843361 (521.17 it/sec) -training >> step=3169200, episode=529 reward=0.7901178 (523.66 it/sec) -training >> step=3169300, episode=529 reward=0.7806627 (510.88 it/sec) -training >> step=3169400, episode=529 reward=0.7506211 (533.08 it/sec) -training >> step=3169500, episode=529 reward=0.7662151 (549.91 it/sec) -training >> step=3169600, episode=529 reward=0.7624034 (525.47 it/sec) -training >> step=3169700, episode=529 reward=0.7786288 (497.69 it/sec) -training >> step=3169800, episode=529 reward=0.7514824 (511.75 it/sec) -training >> step=3169900, episode=529 reward=0.7810114 (512.88 it/sec) -training >> step=3170000, episode=529 reward=0.75059 (496.93 it/sec) -training >> step=3170100, episode=529 reward=0.7609854 (506.01 it/sec) -training >> step=3170200, episode=529 reward=0.7586619 (515.86 it/sec) -training >> step=3170300, episode=529 reward=0.7544692 (495.95 it/sec) -training >> step=3170400, episode=529 reward=0.7739002 (524.35 it/sec) -training >> step=3170500, episode=529 reward=0.7652097 (526.35 it/sec) -training >> step=3170600, episode=529 reward=0.7831091 (562.68 it/sec) -training >> step=3170700, episode=529 reward=0.7793359 (502.30 it/sec) -training >> step=3170800, episode=529 reward=0.7772079 (510.02 it/sec) -training >> step=3170900, episode=529 reward=0.734759 (520.76 it/sec) -training >> step=3171000, episode=529 reward=0.7641618 (535.48 it/sec) -training >> step=3171100, episode=529 reward=0.7683735 (494.09 it/sec) -training >> step=3171200, episode=529 reward=0.7498813 (458.77 it/sec) -training >> step=3171300, episode=529 reward=0.7667727 (537.68 it/sec) -training >> step=3171400, episode=529 reward=0.774727 (505.96 it/sec) -training >> step=3171500, episode=529 reward=0.7439701 (495.04 it/sec) -training >> step=3171600, episode=529 reward=0.7639412 (535.32 it/sec) -training >> step=3171700, episode=529 reward=0.7698764 (532.19 it/sec) -training >> step=3171800, episode=529 reward=0.7570534 (525.33 it/sec) -training >> step=3171900, episode=529 reward=0.7723824 (550.79 it/sec) -training >> step=3172000, episode=529 reward=0.7802595 (518.63 it/sec) -training >> step=3172100, episode=529 reward=0.7710314 (514.52 it/sec) -training >> step=3172200, episode=529 reward=0.7703039 (538.86 it/sec) -training >> step=3172300, episode=529 reward=0.7465175 (502.11 it/sec) -training >> step=3172400, episode=529 reward=0.7627124 (572.27 it/sec) -training >> step=3172500, episode=529 reward=0.7601127 (512.26 it/sec) -training >> step=3172600, episode=529 reward=0.730678 (512.87 it/sec) -training >> step=3172700, episode=529 reward=0.7589862 (574.60 it/sec) -training >> step=3172800, episode=529 reward=0.7446015 (529.72 it/sec) -training >> step=3172900, episode=529 reward=0.7148902 (535.94 it/sec) -training >> step=3173000, episode=529 reward=0.7257357 (524.38 it/sec) -training >> step=3173100, episode=529 reward=0.7175184 (529.91 it/sec) -training >> step=3173200, episode=529 reward=0.7302668 (540.82 it/sec) -training >> step=3173300, episode=530 reward=0.7626984 (107.83 it/sec) -training >> step=3173400, episode=530 reward=0.7748264 (505.71 it/sec) -training >> step=3173500, episode=530 reward=0.7576897 (536.42 it/sec) -training >> step=3173600, episode=530 reward=0.7979585 (526.85 it/sec) -training >> step=3173700, episode=530 reward=0.7562342 (526.37 it/sec) -training >> step=3173800, episode=530 reward=0.7678241 (540.92 it/sec) -training >> step=3173900, episode=530 reward=0.7655894 (510.71 it/sec) -training >> step=3174000, episode=530 reward=0.7407486 (541.90 it/sec) -training >> step=3174100, episode=530 reward=0.7520853 (507.44 it/sec) -training >> step=3174200, episode=530 reward=0.7555985 (541.14 it/sec) -training >> step=3174300, episode=530 reward=0.7748492 (520.90 it/sec) -training >> step=3174400, episode=530 reward=0.780566 (519.56 it/sec) -training >> step=3174500, episode=530 reward=0.7772526 (419.99 it/sec) -training >> step=3174600, episode=530 reward=0.7608052 (453.74 it/sec) -training >> step=3174700, episode=530 reward=0.7724077 (385.24 it/sec) -training >> step=3174800, episode=530 reward=0.7802398 (458.01 it/sec) -training >> step=3174900, episode=530 reward=0.7531136 (518.89 it/sec) -training >> step=3175000, episode=530 reward=0.7560747 (480.45 it/sec) -training >> step=3175100, episode=530 reward=0.7824283 (484.11 it/sec) -training >> step=3175200, episode=530 reward=0.7801782 (543.47 it/sec) -training >> step=3175300, episode=530 reward=0.7671575 (522.60 it/sec) -training >> step=3175400, episode=530 reward=0.7742109 (536.16 it/sec) -training >> step=3175500, episode=530 reward=0.7743104 (522.15 it/sec) -training >> step=3175600, episode=530 reward=0.7580184 (550.08 it/sec) -training >> step=3175700, episode=530 reward=0.7614386 (500.04 it/sec) -training >> step=3175800, episode=530 reward=0.783912 (531.94 it/sec) -training >> step=3175900, episode=530 reward=0.7471007 (519.71 it/sec) -training >> step=3176000, episode=530 reward=0.7791659 (529.90 it/sec) -training >> step=3176100, episode=530 reward=0.7670559 (498.25 it/sec) -training >> step=3176200, episode=530 reward=0.7534573 (540.15 it/sec) -training >> step=3176300, episode=530 reward=0.7674634 (538.61 it/sec) -training >> step=3176400, episode=530 reward=0.7772025 (488.47 it/sec) -training >> step=3176500, episode=530 reward=0.751467 (525.86 it/sec) -training >> step=3176600, episode=530 reward=0.7699588 (525.59 it/sec) -training >> step=3176700, episode=530 reward=0.7585731 (536.23 it/sec) -training >> step=3176800, episode=530 reward=0.753811 (522.57 it/sec) -training >> step=3176900, episode=530 reward=0.7672202 (505.32 it/sec) -training >> step=3177000, episode=530 reward=0.7769191 (526.01 it/sec) -training >> step=3177100, episode=530 reward=0.764744 (536.83 it/sec) -training >> step=3177200, episode=530 reward=0.7584015 (527.70 it/sec) -training >> step=3177300, episode=530 reward=0.7539838 (525.77 it/sec) -training >> step=3177400, episode=530 reward=0.7490078 (527.67 it/sec) -training >> step=3177500, episode=530 reward=0.7751742 (517.56 it/sec) -training >> step=3177600, episode=530 reward=0.7569066 (527.83 it/sec) -training >> step=3177700, episode=530 reward=0.7260252 (536.56 it/sec) -training >> step=3177800, episode=530 reward=0.7667298 (541.19 it/sec) -training >> step=3177900, episode=530 reward=0.7755833 (535.73 it/sec) -training >> step=3178000, episode=530 reward=0.7679263 (518.57 it/sec) -training >> step=3178100, episode=530 reward=0.7748213 (534.87 it/sec) -training >> step=3178200, episode=530 reward=0.7648945 (539.60 it/sec) -training >> step=3178300, episode=530 reward=0.758506 (517.44 it/sec) -training >> step=3178400, episode=530 reward=0.7495238 (511.77 it/sec) -training >> step=3178500, episode=530 reward=0.7504193 (530.80 it/sec) -training >> step=3178600, episode=530 reward=0.7358726 (514.04 it/sec) -training >> step=3178700, episode=530 reward=0.7454815 (509.99 it/sec) -training >> step=3178800, episode=530 reward=0.7588827 (508.38 it/sec) -training >> step=3178900, episode=530 reward=0.7657517 (543.43 it/sec) -training >> step=3179000, episode=530 reward=0.7397189 (509.53 it/sec) -training >> step=3179100, episode=530 reward=0.7539063 (530.96 it/sec) -training >> step=3179200, episode=530 reward=0.7525983 (545.56 it/sec) -training >> step=3179300, episode=531 reward=0.7574461 (90.15 it/sec) -training >> step=3179400, episode=531 reward=0.7685163 (486.03 it/sec) -training >> step=3179500, episode=531 reward=0.7643811 (531.45 it/sec) -training >> step=3179600, episode=531 reward=0.7648619 (534.16 it/sec) -training >> step=3179700, episode=531 reward=0.7937151 (531.42 it/sec) -training >> step=3179800, episode=531 reward=0.7926925 (529.64 it/sec) -training >> step=3179900, episode=531 reward=0.7842215 (532.60 it/sec) -training >> step=3180000, episode=531 reward=0.7762952 (500.86 it/sec) -training >> step=3180100, episode=531 reward=0.7716621 (530.49 it/sec) -training >> step=3180200, episode=531 reward=0.7583066 (532.75 it/sec) -training >> step=3180300, episode=531 reward=0.7757485 (562.58 it/sec) -training >> step=3180400, episode=531 reward=0.7637563 (518.45 it/sec) -training >> step=3180500, episode=531 reward=0.7725635 (511.81 it/sec) -training >> step=3180600, episode=531 reward=0.7682556 (522.33 it/sec) -training >> step=3180700, episode=531 reward=0.7608273 (471.68 it/sec) -training >> step=3180800, episode=531 reward=0.7665244 (510.39 it/sec) -training >> step=3180900, episode=531 reward=0.7744218 (539.60 it/sec) -training >> step=3181000, episode=531 reward=0.7988739 (512.51 it/sec) -training >> step=3181100, episode=531 reward=0.7826849 (500.80 it/sec) -training >> step=3181200, episode=531 reward=0.7934215 (525.12 it/sec) -training >> step=3181300, episode=531 reward=0.7589142 (527.22 it/sec) -training >> step=3181400, episode=531 reward=0.765179 (529.05 it/sec) -training >> step=3181500, episode=531 reward=0.7836887 (530.99 it/sec) -training >> step=3181600, episode=531 reward=0.7639658 (531.61 it/sec) -training >> step=3181700, episode=531 reward=0.7678357 (479.79 it/sec) -training >> step=3181800, episode=531 reward=0.7649321 (541.94 it/sec) -training >> step=3181900, episode=531 reward=0.769358 (525.13 it/sec) -training >> step=3182000, episode=531 reward=0.7746038 (538.07 it/sec) -training >> step=3182100, episode=531 reward=0.7815587 (498.13 it/sec) -training >> step=3182200, episode=531 reward=0.7670542 (489.26 it/sec) -training >> step=3182300, episode=531 reward=0.766785 (554.91 it/sec) -training >> step=3182400, episode=531 reward=0.8015011 (499.40 it/sec) -training >> step=3182500, episode=531 reward=0.7713431 (544.93 it/sec) -training >> step=3182600, episode=531 reward=0.7787288 (510.57 it/sec) -training >> step=3182700, episode=531 reward=0.7456124 (566.49 it/sec) -training >> step=3182800, episode=531 reward=0.775409 (516.88 it/sec) -training >> step=3182900, episode=531 reward=0.7501523 (532.68 it/sec) -training >> step=3183000, episode=531 reward=0.7505973 (537.58 it/sec) -training >> step=3183100, episode=531 reward=0.779835 (538.66 it/sec) -training >> step=3183200, episode=531 reward=0.7619984 (484.72 it/sec) -training >> step=3183300, episode=531 reward=0.773455 (501.60 it/sec) -training >> step=3183400, episode=531 reward=0.7765993 (548.72 it/sec) -training >> step=3183500, episode=531 reward=0.7626254 (522.39 it/sec) -training >> step=3183600, episode=531 reward=0.7736221 (542.96 it/sec) -training >> step=3183700, episode=531 reward=0.7766992 (502.04 it/sec) -training >> step=3183800, episode=531 reward=0.7493408 (522.31 it/sec) -training >> step=3183900, episode=531 reward=0.7754842 (526.95 it/sec) -training >> step=3184000, episode=531 reward=0.7675683 (503.60 it/sec) -training >> step=3184100, episode=531 reward=0.7452878 (517.12 it/sec) -training >> step=3184200, episode=531 reward=0.7476732 (554.76 it/sec) -training >> step=3184300, episode=531 reward=0.7447451 (502.74 it/sec) -training >> step=3184400, episode=531 reward=0.7480208 (512.18 it/sec) -training >> step=3184500, episode=531 reward=0.7603199 (527.45 it/sec) -training >> step=3184600, episode=531 reward=0.7604313 (512.24 it/sec) -training >> step=3184700, episode=531 reward=0.7485467 (515.60 it/sec) -training >> step=3184800, episode=531 reward=0.7164846 (518.32 it/sec) -training >> step=3184900, episode=531 reward=0.7478122 (533.31 it/sec) -training >> step=3185000, episode=531 reward=0.753696 (525.77 it/sec) -training >> step=3185100, episode=531 reward=0.7439424 (520.16 it/sec) -training >> step=3185200, episode=531 reward=0.7279445 (522.38 it/sec) -training >> step=3185300, episode=532 reward=0.7372866 (98.70 it/sec) -training >> step=3185400, episode=532 reward=0.7727488 (499.98 it/sec) -training >> step=3185500, episode=532 reward=0.7595291 (479.06 it/sec) -training >> step=3185600, episode=532 reward=0.7627009 (526.80 it/sec) -training >> step=3185700, episode=532 reward=0.7670175 (505.44 it/sec) -training >> step=3185800, episode=532 reward=0.7808406 (522.69 it/sec) -training >> step=3185900, episode=532 reward=0.7560612 (515.93 it/sec) -training >> step=3186000, episode=532 reward=0.7757092 (508.06 it/sec) -training >> step=3186100, episode=532 reward=0.7603053 (504.73 it/sec) -training >> step=3186200, episode=532 reward=0.7692009 (534.59 it/sec) -training >> step=3186300, episode=532 reward=0.7609826 (561.24 it/sec) -training >> step=3186400, episode=532 reward=0.7518955 (513.26 it/sec) -training >> step=3186500, episode=532 reward=0.758687 (523.05 it/sec) -training >> step=3186600, episode=532 reward=0.7566977 (529.29 it/sec) -training >> step=3186700, episode=532 reward=0.7622925 (516.53 it/sec) -training >> step=3186800, episode=532 reward=0.7859364 (551.09 it/sec) -training >> step=3186900, episode=532 reward=0.7870346 (535.49 it/sec) -training >> step=3187000, episode=532 reward=0.7759327 (503.92 it/sec) -training >> step=3187100, episode=532 reward=0.7864162 (512.56 it/sec) -training >> step=3187200, episode=532 reward=0.7645013 (487.74 it/sec) -training >> step=3187300, episode=532 reward=0.7775422 (515.85 it/sec) -training >> step=3187400, episode=532 reward=0.7795675 (521.98 it/sec) -training >> step=3187500, episode=532 reward=0.7678621 (537.94 it/sec) -training >> step=3187600, episode=532 reward=0.7814195 (507.67 it/sec) -training >> step=3187700, episode=532 reward=0.7675649 (523.05 it/sec) -training >> step=3187800, episode=532 reward=0.7963558 (528.89 it/sec) -training >> step=3187900, episode=532 reward=0.7729046 (541.45 it/sec) -training >> step=3188000, episode=532 reward=0.7849674 (541.87 it/sec) -training >> step=3188100, episode=532 reward=0.7539572 (535.39 it/sec) -training >> step=3188200, episode=532 reward=0.7735896 (501.52 it/sec) -training >> step=3188300, episode=532 reward=0.7782772 (529.47 it/sec) -training >> step=3188400, episode=532 reward=0.7550811 (557.31 it/sec) -training >> step=3188500, episode=532 reward=0.7708249 (530.65 it/sec) -training >> step=3188600, episode=532 reward=0.7944943 (490.12 it/sec) -training >> step=3188700, episode=532 reward=0.7788301 (538.38 it/sec) -training >> step=3188800, episode=532 reward=0.769423 (490.43 it/sec) -training >> step=3188900, episode=532 reward=0.7468296 (510.46 it/sec) -training >> step=3189000, episode=532 reward=0.770354 (509.93 it/sec) -training >> step=3189100, episode=532 reward=0.7800906 (520.99 it/sec) -training >> step=3189200, episode=532 reward=0.7598541 (500.16 it/sec) -training >> step=3189300, episode=532 reward=0.7765307 (507.81 it/sec) -training >> step=3189400, episode=532 reward=0.7548632 (530.57 it/sec) -training >> step=3189500, episode=532 reward=0.7754701 (521.50 it/sec) -training >> step=3189600, episode=532 reward=0.7653874 (528.55 it/sec) -training >> step=3189700, episode=532 reward=0.7505546 (493.38 it/sec) -training >> step=3189800, episode=532 reward=0.7439531 (523.91 it/sec) -training >> step=3189900, episode=532 reward=0.7721153 (538.09 it/sec) -training >> step=3190000, episode=532 reward=0.7508962 (514.58 it/sec) -training >> step=3190100, episode=532 reward=0.7522684 (532.12 it/sec) -training >> step=3190200, episode=532 reward=0.7655146 (520.14 it/sec) -training >> step=3190300, episode=532 reward=0.7502577 (519.99 it/sec) -training >> step=3190400, episode=532 reward=0.734179 (524.50 it/sec) -training >> step=3190500, episode=532 reward=0.743017 (538.63 it/sec) -training >> step=3190600, episode=532 reward=0.7446139 (516.04 it/sec) -training >> step=3190700, episode=532 reward=0.7291586 (492.17 it/sec) -training >> step=3190800, episode=532 reward=0.7395911 (468.45 it/sec) -training >> step=3190900, episode=532 reward=0.7348537 (550.94 it/sec) -training >> step=3191000, episode=532 reward=0.7227373 (524.54 it/sec) -training >> step=3191100, episode=532 reward=0.7527323 (535.23 it/sec) -training >> step=3191200, episode=532 reward=0.7462609 (531.52 it/sec) -training >> step=3191300, episode=533 reward=0.7703152 (112.04 it/sec) -training >> step=3191400, episode=533 reward=0.7657939 (509.25 it/sec) -training >> step=3191500, episode=533 reward=0.7635843 (478.94 it/sec) -training >> step=3191600, episode=533 reward=0.7870644 (536.28 it/sec) -training >> step=3191700, episode=533 reward=0.7391914 (542.90 it/sec) -training >> step=3191800, episode=533 reward=0.778511 (520.01 it/sec) -training >> step=3191900, episode=533 reward=0.741043 (558.44 it/sec) -training >> step=3192000, episode=533 reward=0.7561412 (523.02 it/sec) -training >> step=3192100, episode=533 reward=0.7780453 (515.01 it/sec) -training >> step=3192200, episode=533 reward=0.7879363 (531.32 it/sec) -training >> step=3192300, episode=533 reward=0.782282 (522.66 it/sec) -training >> step=3192400, episode=533 reward=0.7762603 (543.30 it/sec) -training >> step=3192500, episode=533 reward=0.7859505 (509.04 it/sec) -training >> step=3192600, episode=533 reward=0.7683066 (495.90 it/sec) -training >> step=3192700, episode=533 reward=0.7711444 (575.97 it/sec) -training >> step=3192800, episode=533 reward=0.7717593 (523.47 it/sec) -training >> step=3192900, episode=533 reward=0.7759028 (523.17 it/sec) -training >> step=3193000, episode=533 reward=0.7773946 (511.52 it/sec) -training >> step=3193100, episode=533 reward=0.7865255 (509.09 it/sec) -training >> step=3193200, episode=533 reward=0.7657745 (539.54 it/sec) -training >> step=3193300, episode=533 reward=0.7644649 (551.26 it/sec) -training >> step=3193400, episode=533 reward=0.7739725 (542.92 it/sec) -training >> step=3193500, episode=533 reward=0.7705669 (520.17 it/sec) -training >> step=3193600, episode=533 reward=0.7630765 (469.67 it/sec) -training >> step=3193700, episode=533 reward=0.7952944 (513.52 it/sec) -training >> step=3193800, episode=533 reward=0.7913713 (565.84 it/sec) -training >> step=3193900, episode=533 reward=0.7832199 (546.25 it/sec) -training >> step=3194000, episode=533 reward=0.7792864 (533.17 it/sec) -training >> step=3194100, episode=533 reward=0.7590173 (523.33 it/sec) -training >> step=3194200, episode=533 reward=0.7614556 (485.69 it/sec) -training >> step=3194300, episode=533 reward=0.7610486 (513.85 it/sec) -training >> step=3194400, episode=533 reward=0.7672917 (529.49 it/sec) -training >> step=3194500, episode=533 reward=0.7575523 (551.13 it/sec) -training >> step=3194600, episode=533 reward=0.7573541 (512.50 it/sec) -training >> step=3194700, episode=533 reward=0.7864091 (490.61 it/sec) -training >> step=3194800, episode=533 reward=0.7696597 (548.42 it/sec) -training >> step=3194900, episode=533 reward=0.7581834 (525.76 it/sec) -training >> step=3195000, episode=533 reward=0.7625926 (520.62 it/sec) -training >> step=3195100, episode=533 reward=0.7877483 (525.14 it/sec) -training >> step=3195200, episode=533 reward=0.7609242 (537.41 it/sec) -training >> step=3195300, episode=533 reward=0.784424 (489.56 it/sec) -training >> step=3195400, episode=533 reward=0.7832652 (540.14 it/sec) -training >> step=3195500, episode=533 reward=0.7764555 (524.02 it/sec) -training >> step=3195600, episode=533 reward=0.7611933 (522.49 it/sec) -training >> step=3195700, episode=533 reward=0.7644764 (501.54 it/sec) -training >> step=3195800, episode=533 reward=0.7838309 (491.31 it/sec) -training >> step=3195900, episode=533 reward=0.7578464 (558.20 it/sec) -training >> step=3196000, episode=533 reward=0.7688835 (514.46 it/sec) -training >> step=3196100, episode=533 reward=0.7581035 (520.75 it/sec) -training >> step=3196200, episode=533 reward=0.7506142 (522.34 it/sec) -training >> step=3196300, episode=533 reward=0.7688478 (482.63 it/sec) -training >> step=3196400, episode=533 reward=0.7515321 (538.75 it/sec) -training >> step=3196500, episode=533 reward=0.761731 (544.07 it/sec) -training >> step=3196600, episode=533 reward=0.7650302 (544.39 it/sec) -training >> step=3196700, episode=533 reward=0.7587813 (530.49 it/sec) -training >> step=3196800, episode=533 reward=0.765278 (479.54 it/sec) -training >> step=3196900, episode=533 reward=0.73989 (483.86 it/sec) -training >> step=3197000, episode=533 reward=0.7425594 (570.50 it/sec) -training >> step=3197100, episode=533 reward=0.7518115 (507.10 it/sec) -training >> step=3197200, episode=533 reward=0.7353286 (521.09 it/sec) -training >> step=3197300, episode=534 reward=0.7637812 (97.11 it/sec) -training >> step=3197400, episode=534 reward=0.7431756 (492.70 it/sec) -training >> step=3197500, episode=534 reward=0.7751252 (474.39 it/sec) -training >> step=3197600, episode=534 reward=0.7612302 (579.71 it/sec) -training >> step=3197700, episode=534 reward=0.7722728 (514.06 it/sec) -training >> step=3197800, episode=534 reward=0.742765 (528.63 it/sec) -training >> step=3197900, episode=534 reward=0.7666987 (548.05 it/sec) -training >> step=3198000, episode=534 reward=0.7901208 (493.58 it/sec) -training >> step=3198100, episode=534 reward=0.7932552 (518.37 it/sec) -training >> step=3198200, episode=534 reward=0.77279 (531.14 it/sec) -training >> step=3198300, episode=534 reward=0.7766581 (510.27 it/sec) -training >> step=3198400, episode=534 reward=0.7446514 (533.78 it/sec) -training >> step=3198500, episode=534 reward=0.7718909 (515.34 it/sec) -training >> step=3198600, episode=534 reward=0.765181 (518.83 it/sec) -training >> step=3198700, episode=534 reward=0.7539292 (530.27 it/sec) -training >> step=3198800, episode=534 reward=0.7655335 (515.44 it/sec) -training >> step=3198900, episode=534 reward=0.7673639 (503.36 it/sec) -training >> step=3199000, episode=534 reward=0.7770589 (534.85 it/sec) -training >> step=3199100, episode=534 reward=0.7716538 (529.26 it/sec) -training >> step=3199200, episode=534 reward=0.7684349 (523.79 it/sec) -training >> step=3199300, episode=534 reward=0.7533712 (539.10 it/sec) -training >> step=3199400, episode=534 reward=0.7686852 (537.85 it/sec) -training >> step=3199500, episode=534 reward=0.7652498 (518.06 it/sec) -training >> step=3199600, episode=534 reward=0.7747204 (523.80 it/sec) -training >> step=3199700, episode=534 reward=0.770907 (553.09 it/sec) -training >> step=3199800, episode=534 reward=0.7628999 (528.40 it/sec) -training >> step=3199900, episode=534 reward=0.7678165 (528.01 it/sec) -training >> step=3200000, episode=534 reward=0.748966 (510.65 it/sec) -training >> step=3200100, episode=534 reward=0.7573222 (551.45 it/sec) -training >> step=3200200, episode=534 reward=0.7740095 (498.57 it/sec) -training >> step=3200300, episode=534 reward=0.7768553 (521.44 it/sec) -training >> step=3200400, episode=534 reward=0.7772416 (470.96 it/sec) -training >> step=3200500, episode=534 reward=0.7544632 (487.67 it/sec) -training >> step=3200600, episode=534 reward=0.7710313 (525.21 it/sec) -training >> step=3200700, episode=534 reward=0.8015993 (483.46 it/sec) -training >> step=3200800, episode=534 reward=0.7396321 (576.19 it/sec) -training >> step=3200900, episode=534 reward=0.780324 (522.16 it/sec) -training >> step=3201000, episode=534 reward=0.7612953 (489.84 it/sec) -training >> step=3201100, episode=534 reward=0.7665294 (527.54 it/sec) -training >> step=3201200, episode=534 reward=0.7609168 (503.17 it/sec) -training >> step=3201300, episode=534 reward=0.7759364 (523.80 it/sec) -training >> step=3201400, episode=534 reward=0.7703154 (528.33 it/sec) -training >> step=3201500, episode=534 reward=0.7843974 (542.65 it/sec) -training >> step=3201600, episode=534 reward=0.7851195 (499.76 it/sec) -training >> step=3201700, episode=534 reward=0.7557632 (520.64 it/sec) -training >> step=3201800, episode=534 reward=0.7710898 (493.40 it/sec) -training >> step=3201900, episode=534 reward=0.7691417 (566.97 it/sec) -training >> step=3202000, episode=534 reward=0.7484805 (505.28 it/sec) -training >> step=3202100, episode=534 reward=0.7605281 (487.03 it/sec) -training >> step=3202200, episode=534 reward=0.7512794 (568.22 it/sec) -training >> step=3202300, episode=534 reward=0.7416284 (521.42 it/sec) -training >> step=3202400, episode=534 reward=0.7492778 (542.35 it/sec) -training >> step=3202500, episode=534 reward=0.7442473 (520.97 it/sec) -training >> step=3202600, episode=534 reward=0.7510691 (558.54 it/sec) -training >> step=3202700, episode=534 reward=0.7351648 (488.47 it/sec) -training >> step=3202800, episode=534 reward=0.7488508 (511.32 it/sec) -training >> step=3202900, episode=534 reward=0.7245141 (518.40 it/sec) -training >> step=3203000, episode=534 reward=0.740112 (580.88 it/sec) -training >> step=3203100, episode=534 reward=0.7462471 (519.12 it/sec) -training >> step=3203200, episode=534 reward=0.7527818 (528.27 it/sec) -training >> step=3203300, episode=535 reward=0.7645969 (92.24 it/sec) -training >> step=3203400, episode=535 reward=0.7655203 (478.14 it/sec) -training >> step=3203500, episode=535 reward=0.765083 (526.20 it/sec) -training >> step=3203600, episode=535 reward=0.7595328 (551.98 it/sec) -training >> step=3203700, episode=535 reward=0.773386 (531.45 it/sec) -training >> step=3203800, episode=535 reward=0.7665672 (499.10 it/sec) -training >> step=3203900, episode=535 reward=0.770291 (523.79 it/sec) -training >> step=3204000, episode=535 reward=0.7682236 (537.65 it/sec) -training >> step=3204100, episode=535 reward=0.7658545 (535.16 it/sec) -training >> step=3204200, episode=535 reward=0.7526367 (516.70 it/sec) -training >> step=3204300, episode=535 reward=0.7822193 (519.79 it/sec) -training >> step=3204400, episode=535 reward=0.7891801 (539.51 it/sec) -training >> step=3204500, episode=535 reward=0.7721273 (502.35 it/sec) -training >> step=3204600, episode=535 reward=0.749283 (555.06 it/sec) -training >> step=3204700, episode=535 reward=0.7772295 (529.69 it/sec) -training >> step=3204800, episode=535 reward=0.765811 (516.54 it/sec) -training >> step=3204900, episode=535 reward=0.7737035 (524.96 it/sec) -training >> step=3205000, episode=535 reward=0.7615141 (525.27 it/sec) -training >> step=3205100, episode=535 reward=0.7772554 (521.98 it/sec) -training >> step=3205200, episode=535 reward=0.7642684 (488.37 it/sec) -training >> step=3205300, episode=535 reward=0.7733562 (519.73 it/sec) -training >> step=3205400, episode=535 reward=0.7817771 (545.06 it/sec) -training >> step=3205500, episode=535 reward=0.7540941 (520.61 it/sec) -training >> step=3205600, episode=535 reward=0.7994602 (476.99 it/sec) -training >> step=3205700, episode=535 reward=0.7560795 (543.35 it/sec) -training >> step=3205800, episode=535 reward=0.7807626 (519.64 it/sec) -training >> step=3205900, episode=535 reward=0.7552475 (540.81 it/sec) -training >> step=3206000, episode=535 reward=0.7720668 (517.45 it/sec) -training >> step=3206100, episode=535 reward=0.7817903 (563.74 it/sec) -training >> step=3206200, episode=535 reward=0.765352 (491.49 it/sec) -training >> step=3206300, episode=535 reward=0.7768346 (522.08 it/sec) -training >> step=3206400, episode=535 reward=0.7401295 (509.93 it/sec) -training >> step=3206500, episode=535 reward=0.7667407 (536.22 it/sec) -training >> step=3206600, episode=535 reward=0.7771301 (484.69 it/sec) -training >> step=3206700, episode=535 reward=0.7849706 (498.57 it/sec) -training >> step=3206800, episode=535 reward=0.7687972 (536.74 it/sec) -training >> step=3206900, episode=535 reward=0.7831678 (523.16 it/sec) -training >> step=3207000, episode=535 reward=0.7553378 (512.40 it/sec) -training >> step=3207100, episode=535 reward=0.7722741 (496.78 it/sec) -training >> step=3207200, episode=535 reward=0.7661062 (523.98 it/sec) -training >> step=3207300, episode=535 reward=0.7728508 (521.03 it/sec) -training >> step=3207400, episode=535 reward=0.7544253 (527.40 it/sec) -training >> step=3207500, episode=535 reward=0.7532489 (494.10 it/sec) -training >> step=3207600, episode=535 reward=0.7559381 (549.97 it/sec) -training >> step=3207700, episode=535 reward=0.7598542 (482.78 it/sec) -training >> step=3207800, episode=535 reward=0.7576591 (506.21 it/sec) -training >> step=3207900, episode=535 reward=0.7764773 (549.19 it/sec) -training >> step=3208000, episode=535 reward=0.7708867 (534.04 it/sec) -training >> step=3208100, episode=535 reward=0.733826 (506.02 it/sec) -training >> step=3208200, episode=535 reward=0.7517858 (528.28 it/sec) -training >> step=3208300, episode=535 reward=0.7517569 (503.07 it/sec) -training >> step=3208400, episode=535 reward=0.7586365 (508.74 it/sec) -training >> step=3208500, episode=535 reward=0.7499483 (528.38 it/sec) -training >> step=3208600, episode=535 reward=0.7734964 (523.69 it/sec) -training >> step=3208700, episode=535 reward=0.7502627 (537.29 it/sec) -training >> step=3208800, episode=535 reward=0.7648311 (501.91 it/sec) -training >> step=3208900, episode=535 reward=0.7497492 (497.08 it/sec) -training >> step=3209000, episode=535 reward=0.7229301 (522.40 it/sec) -training >> step=3209100, episode=535 reward=0.7554157 (533.09 it/sec) -training >> step=3209200, episode=535 reward=0.749066 (512.95 it/sec) -training >> step=3209300, episode=536 reward=0.7504649 (98.98 it/sec) -training >> step=3209400, episode=536 reward=0.773002 (538.55 it/sec) -training >> step=3209500, episode=536 reward=0.7609797 (491.93 it/sec) -training >> step=3209600, episode=536 reward=0.7822927 (502.17 it/sec) -training >> step=3209700, episode=536 reward=0.7775587 (509.97 it/sec) -training >> step=3209800, episode=536 reward=0.7922561 (503.55 it/sec) -training >> step=3209900, episode=536 reward=0.8009544 (498.38 it/sec) -training >> step=3210000, episode=536 reward=0.7924944 (488.30 it/sec) -training >> step=3210100, episode=536 reward=0.7730079 (539.74 it/sec) -training >> step=3210200, episode=536 reward=0.7740335 (517.29 it/sec) -training >> step=3210300, episode=536 reward=0.7659019 (535.80 it/sec) -training >> step=3210400, episode=536 reward=0.7345996 (529.06 it/sec) -training >> step=3210500, episode=536 reward=0.7820906 (520.90 it/sec) -training >> step=3210600, episode=536 reward=0.7775677 (529.45 it/sec) -training >> step=3210700, episode=536 reward=0.7814731 (558.71 it/sec) -training >> step=3210800, episode=536 reward=0.7485048 (516.91 it/sec) -training >> step=3210900, episode=536 reward=0.7672349 (529.32 it/sec) -training >> step=3211000, episode=536 reward=0.7728448 (530.27 it/sec) -training >> step=3211100, episode=536 reward=0.7732814 (514.74 it/sec) -training >> step=3211200, episode=536 reward=0.7718299 (524.47 it/sec) -training >> step=3211300, episode=536 reward=0.7589526 (551.21 it/sec) -training >> step=3211400, episode=536 reward=0.7716828 (507.51 it/sec) -training >> step=3211500, episode=536 reward=0.7796527 (562.93 it/sec) -training >> step=3211600, episode=536 reward=0.7658175 (514.34 it/sec) -training >> step=3211700, episode=536 reward=0.7737825 (516.16 it/sec) -training >> step=3211800, episode=536 reward=0.7634061 (561.09 it/sec) -training >> step=3211900, episode=536 reward=0.7694222 (497.61 it/sec) -training >> step=3212000, episode=536 reward=0.7799366 (491.86 it/sec) -training >> step=3212100, episode=536 reward=0.7673982 (528.95 it/sec) -training >> step=3212200, episode=536 reward=0.7763286 (535.08 it/sec) -training >> step=3212300, episode=536 reward=0.8060917 (514.73 it/sec) -training >> step=3212400, episode=536 reward=0.7917133 (515.06 it/sec) -training >> step=3212500, episode=536 reward=0.7755174 (533.79 it/sec) -training >> step=3212600, episode=536 reward=0.7597767 (538.80 it/sec) -training >> step=3212700, episode=536 reward=0.7646891 (503.73 it/sec) -training >> step=3212800, episode=536 reward=0.7765881 (535.41 it/sec) -training >> step=3212900, episode=536 reward=0.7659817 (530.10 it/sec) -training >> step=3213000, episode=536 reward=0.7761987 (515.55 it/sec) -training >> step=3213100, episode=536 reward=0.785121 (517.10 it/sec) -training >> step=3213200, episode=536 reward=0.7532977 (484.92 it/sec) -training >> step=3213300, episode=536 reward=0.7624658 (526.83 it/sec) -training >> step=3213400, episode=536 reward=0.7519985 (529.48 it/sec) -training >> step=3213500, episode=536 reward=0.7803378 (520.03 it/sec) -training >> step=3213600, episode=536 reward=0.7647257 (539.11 it/sec) -training >> step=3213700, episode=536 reward=0.7588115 (517.74 it/sec) -training >> step=3213800, episode=536 reward=0.7613913 (483.92 it/sec) -training >> step=3213900, episode=536 reward=0.7535556 (541.70 it/sec) -training >> step=3214000, episode=536 reward=0.7495755 (549.39 it/sec) -training >> step=3214100, episode=536 reward=0.7672917 (486.33 it/sec) -training >> step=3214200, episode=536 reward=0.7177526 (530.83 it/sec) -training >> step=3214300, episode=536 reward=0.7678781 (531.01 it/sec) -training >> step=3214400, episode=536 reward=0.7416297 (495.23 it/sec) -training >> step=3214500, episode=536 reward=0.7626885 (515.41 it/sec) -training >> step=3214600, episode=536 reward=0.7560033 (487.34 it/sec) -training >> step=3214700, episode=536 reward=0.7624923 (571.33 it/sec) -training >> step=3214800, episode=536 reward=0.7635168 (502.08 it/sec) -training >> step=3214900, episode=536 reward=0.7436274 (529.91 it/sec) -training >> step=3215000, episode=536 reward=0.7763525 (502.95 it/sec) -training >> step=3215100, episode=536 reward=0.7423854 (531.57 it/sec) -training >> step=3215200, episode=536 reward=0.7581379 (494.71 it/sec) -training >> step=3215300, episode=537 reward=0.7600152 (96.26 it/sec) -training >> step=3215400, episode=537 reward=0.7480414 (491.36 it/sec) -training >> step=3215500, episode=537 reward=0.7688105 (490.91 it/sec) -training >> step=3215600, episode=537 reward=0.7534353 (515.29 it/sec) -training >> step=3215700, episode=537 reward=0.7485682 (563.93 it/sec) -training >> step=3215800, episode=537 reward=0.7581508 (511.67 it/sec) -training >> step=3215900, episode=537 reward=0.7809336 (544.74 it/sec) -training >> step=3216000, episode=537 reward=0.7399847 (516.02 it/sec) -training >> step=3216100, episode=537 reward=0.7719231 (494.91 it/sec) -training >> step=3216200, episode=537 reward=0.7795137 (514.55 it/sec) -training >> step=3216300, episode=537 reward=0.7613211 (532.18 it/sec) -training >> step=3216400, episode=537 reward=0.7822055 (568.64 it/sec) -training >> step=3216500, episode=537 reward=0.8063624 (482.96 it/sec) -training >> step=3216600, episode=537 reward=0.7652956 (522.30 it/sec) -training >> step=3216700, episode=537 reward=0.7858909 (521.83 it/sec) -training >> step=3216800, episode=537 reward=0.7777258 (517.74 it/sec) -training >> step=3216900, episode=537 reward=0.7685727 (533.45 it/sec) -training >> step=3217000, episode=537 reward=0.764867 (510.57 it/sec) -training >> step=3217100, episode=537 reward=0.7666501 (506.76 it/sec) -training >> step=3217200, episode=537 reward=0.7753057 (506.64 it/sec) -training >> step=3217300, episode=537 reward=0.7959898 (493.47 it/sec) -training >> step=3217400, episode=537 reward=0.7769663 (533.26 it/sec) -training >> step=3217500, episode=537 reward=0.777817 (516.29 it/sec) -training >> step=3217600, episode=537 reward=0.778465 (504.06 it/sec) -training >> step=3217700, episode=537 reward=0.7555898 (505.62 it/sec) -training >> step=3217800, episode=537 reward=0.7682419 (538.46 it/sec) -training >> step=3217900, episode=537 reward=0.7645112 (517.34 it/sec) -training >> step=3218000, episode=537 reward=0.7733366 (545.24 it/sec) -training >> step=3218100, episode=537 reward=0.7652578 (476.50 it/sec) -training >> step=3218200, episode=537 reward=0.7560793 (546.69 it/sec) -training >> step=3218300, episode=537 reward=0.7826384 (523.66 it/sec) -training >> step=3218400, episode=537 reward=0.7665509 (494.31 it/sec) -training >> step=3218500, episode=537 reward=0.7656754 (556.42 it/sec) -training >> step=3218600, episode=537 reward=0.7740592 (511.55 it/sec) -training >> step=3218700, episode=537 reward=0.7706327 (524.89 it/sec) -training >> step=3218800, episode=537 reward=0.7696986 (520.85 it/sec) -training >> step=3218900, episode=537 reward=0.7633585 (557.08 it/sec) -training >> step=3219000, episode=537 reward=0.763343 (544.29 it/sec) -training >> step=3219100, episode=537 reward=0.773934 (526.20 it/sec) -training >> step=3219200, episode=537 reward=0.7744265 (489.43 it/sec) -training >> step=3219300, episode=537 reward=0.7688172 (500.21 it/sec) -training >> step=3219400, episode=537 reward=0.7821162 (523.21 it/sec) -training >> step=3219500, episode=537 reward=0.7640871 (535.64 it/sec) -training >> step=3219600, episode=537 reward=0.7489585 (513.43 it/sec) -training >> step=3219700, episode=537 reward=0.7499069 (506.36 it/sec) -training >> step=3219800, episode=537 reward=0.7528444 (511.53 it/sec) -training >> step=3219900, episode=537 reward=0.7478145 (490.51 it/sec) -training >> step=3220000, episode=537 reward=0.7420729 (509.49 it/sec) -training >> step=3220100, episode=537 reward=0.7750812 (514.51 it/sec) -training >> step=3220200, episode=537 reward=0.7744785 (499.91 it/sec) -training >> step=3220300, episode=537 reward=0.7478597 (502.90 it/sec) -training >> step=3220400, episode=537 reward=0.7729003 (545.68 it/sec) -training >> step=3220500, episode=537 reward=0.778963 (508.79 it/sec) -training >> step=3220600, episode=537 reward=0.7642544 (541.06 it/sec) -training >> step=3220700, episode=537 reward=0.7695307 (544.60 it/sec) -training >> step=3220800, episode=537 reward=0.7650134 (491.23 it/sec) -training >> step=3220900, episode=537 reward=0.7560071 (495.63 it/sec) -training >> step=3221000, episode=537 reward=0.7610827 (542.68 it/sec) -training >> step=3221100, episode=537 reward=0.7825069 (547.42 it/sec) -training >> step=3221200, episode=537 reward=0.7767023 (521.51 it/sec) -training >> step=3221300, episode=538 reward=0.7854396 (109.23 it/sec) -training >> step=3221400, episode=538 reward=0.78396 (519.32 it/sec) -training >> step=3221500, episode=538 reward=0.7601948 (471.14 it/sec) -training >> step=3221600, episode=538 reward=0.7989364 (523.70 it/sec) -training >> step=3221700, episode=538 reward=0.7613147 (492.02 it/sec) -training >> step=3221800, episode=538 reward=0.7804982 (528.17 it/sec) -training >> step=3221900, episode=538 reward=0.7748683 (510.53 it/sec) -training >> step=3222000, episode=538 reward=0.7787575 (482.56 it/sec) -training >> step=3222100, episode=538 reward=0.7906659 (537.18 it/sec) -training >> step=3222200, episode=538 reward=0.786091 (510.87 it/sec) -training >> step=3222300, episode=538 reward=0.7709685 (511.45 it/sec) -training >> step=3222400, episode=538 reward=0.7816818 (534.76 it/sec) -training >> step=3222500, episode=538 reward=0.7650086 (498.26 it/sec) -training >> step=3222600, episode=538 reward=0.7593254 (495.30 it/sec) -training >> step=3222700, episode=538 reward=0.7656785 (528.69 it/sec) -training >> step=3222800, episode=538 reward=0.7859026 (540.90 it/sec) -training >> step=3222900, episode=538 reward=0.7687581 (568.67 it/sec) -training >> step=3223000, episode=538 reward=0.7559525 (511.24 it/sec) -training >> step=3223100, episode=538 reward=0.763171 (489.80 it/sec) -training >> step=3223200, episode=538 reward=0.7864777 (537.33 it/sec) -training >> step=3223300, episode=538 reward=0.7727887 (529.67 it/sec) -training >> step=3223400, episode=538 reward=0.7580448 (542.96 it/sec) -training >> step=3223500, episode=538 reward=0.7472436 (529.97 it/sec) -training >> step=3223600, episode=538 reward=0.7792974 (493.64 it/sec) -training >> step=3223700, episode=538 reward=0.7801952 (506.31 it/sec) -training >> step=3223800, episode=538 reward=0.7694194 (510.09 it/sec) -training >> step=3223900, episode=538 reward=0.7675816 (531.81 it/sec) -training >> step=3224000, episode=538 reward=0.7817427 (514.04 it/sec) -training >> step=3224100, episode=538 reward=0.7665965 (501.48 it/sec) -training >> step=3224200, episode=538 reward=0.7669269 (521.88 it/sec) -training >> step=3224300, episode=538 reward=0.7635171 (519.83 it/sec) -training >> step=3224400, episode=538 reward=0.7737488 (501.26 it/sec) -training >> step=3224500, episode=538 reward=0.7712387 (520.12 it/sec) -training >> step=3224600, episode=538 reward=0.784729 (543.82 it/sec) -training >> step=3224700, episode=538 reward=0.7809089 (533.58 it/sec) -training >> step=3224800, episode=538 reward=0.7634609 (518.98 it/sec) -training >> step=3224900, episode=538 reward=0.7595174 (504.05 it/sec) -training >> step=3225000, episode=538 reward=0.7736453 (543.95 it/sec) -training >> step=3225100, episode=538 reward=0.7496187 (526.36 it/sec) -training >> step=3225200, episode=538 reward=0.7759318 (477.23 it/sec) -training >> step=3225300, episode=538 reward=0.7724862 (521.56 it/sec) -training >> step=3225400, episode=538 reward=0.7556936 (564.15 it/sec) -training >> step=3225500, episode=538 reward=0.7478923 (526.28 it/sec) -training >> step=3225600, episode=538 reward=0.7653578 (540.01 it/sec) -training >> step=3225700, episode=538 reward=0.7621761 (499.57 it/sec) -training >> step=3225800, episode=538 reward=0.7524858 (521.98 it/sec) -training >> step=3225900, episode=538 reward=0.7486848 (528.93 it/sec) -training >> step=3226000, episode=538 reward=0.7608905 (545.36 it/sec) -training >> step=3226100, episode=538 reward=0.7555289 (552.48 it/sec) -training >> step=3226200, episode=538 reward=0.7625153 (521.96 it/sec) -training >> step=3226300, episode=538 reward=0.7528899 (482.01 it/sec) -training >> step=3226400, episode=538 reward=0.7624696 (558.13 it/sec) -training >> step=3226500, episode=538 reward=0.7484474 (518.55 it/sec) -training >> step=3226600, episode=538 reward=0.7694264 (529.25 it/sec) -training >> step=3226700, episode=538 reward=0.7480655 (535.00 it/sec) -training >> step=3226800, episode=538 reward=0.764056 (521.24 it/sec) -training >> step=3226900, episode=538 reward=0.7627946 (507.35 it/sec) -training >> step=3227000, episode=538 reward=0.7450274 (540.87 it/sec) -training >> step=3227100, episode=538 reward=0.7291471 (519.50 it/sec) -training >> step=3227200, episode=538 reward=0.730993 (552.77 it/sec) -training >> step=3227300, episode=539 reward=0.7688161 (98.86 it/sec) -training >> step=3227400, episode=539 reward=0.8076341 (520.28 it/sec) -training >> step=3227500, episode=539 reward=0.7630678 (500.88 it/sec) -training >> step=3227600, episode=539 reward=0.7814106 (532.34 it/sec) -training >> step=3227700, episode=539 reward=0.7810899 (535.18 it/sec) -training >> step=3227800, episode=539 reward=0.772953 (520.22 it/sec) -training >> step=3227900, episode=539 reward=0.7590023 (523.93 it/sec) -training >> step=3228000, episode=539 reward=0.7585473 (523.16 it/sec) -training >> step=3228100, episode=539 reward=0.7869011 (514.63 it/sec) -training >> step=3228200, episode=539 reward=0.7761553 (553.95 it/sec) -training >> step=3228300, episode=539 reward=0.7682309 (543.52 it/sec) -training >> step=3228400, episode=539 reward=0.7729244 (529.94 it/sec) -training >> step=3228500, episode=539 reward=0.767003 (512.70 it/sec) -training >> step=3228600, episode=539 reward=0.7825679 (523.73 it/sec) -training >> step=3228700, episode=539 reward=0.7524904 (512.45 it/sec) -training >> step=3228800, episode=539 reward=0.7922966 (509.74 it/sec) -training >> step=3228900, episode=539 reward=0.7720477 (511.42 it/sec) -training >> step=3229000, episode=539 reward=0.7735235 (542.84 it/sec) -training >> step=3229100, episode=539 reward=0.7494971 (515.20 it/sec) -training >> step=3229200, episode=539 reward=0.7627186 (535.73 it/sec) -training >> step=3229300, episode=539 reward=0.7754957 (538.34 it/sec) -training >> step=3229400, episode=539 reward=0.7598813 (524.27 it/sec) -training >> step=3229500, episode=539 reward=0.7785645 (529.73 it/sec) -training >> step=3229600, episode=539 reward=0.7608394 (478.60 it/sec) -training >> step=3229700, episode=539 reward=0.7762703 (526.20 it/sec) -training >> step=3229800, episode=539 reward=0.759714 (503.93 it/sec) -training >> step=3229900, episode=539 reward=0.7700345 (526.36 it/sec) -training >> step=3230000, episode=539 reward=0.7539576 (527.91 it/sec) -training >> step=3230100, episode=539 reward=0.7799998 (539.87 it/sec) -training >> step=3230200, episode=539 reward=0.7865053 (498.23 it/sec) -training >> step=3230300, episode=539 reward=0.802649 (517.33 it/sec) -training >> step=3230400, episode=539 reward=0.7908828 (509.59 it/sec) -training >> step=3230500, episode=539 reward=0.7636065 (500.65 it/sec) -training >> step=3230600, episode=539 reward=0.7765927 (513.77 it/sec) -training >> step=3230700, episode=539 reward=0.7756968 (485.50 it/sec) -training >> step=3230800, episode=539 reward=0.7601134 (565.88 it/sec) -training >> step=3230900, episode=539 reward=0.7705804 (547.49 it/sec) -training >> step=3231000, episode=539 reward=0.7792159 (510.00 it/sec) -training >> step=3231100, episode=539 reward=0.771017 (559.01 it/sec) -training >> step=3231200, episode=539 reward=0.760491 (537.24 it/sec) -training >> step=3231300, episode=539 reward=0.7614726 (525.99 it/sec) -training >> step=3231400, episode=539 reward=0.7551403 (544.50 it/sec) -training >> step=3231500, episode=539 reward=0.7810423 (524.33 it/sec) -training >> step=3231600, episode=539 reward=0.7608578 (506.20 it/sec) -training >> step=3231700, episode=539 reward=0.7934939 (514.18 it/sec) -training >> step=3231800, episode=539 reward=0.7817559 (478.00 it/sec) -training >> step=3231900, episode=539 reward=0.7494493 (536.07 it/sec) -training >> step=3232000, episode=539 reward=0.7843695 (520.31 it/sec) -training >> step=3232100, episode=539 reward=0.7794853 (527.76 it/sec) -training >> step=3232200, episode=539 reward=0.7602686 (555.54 it/sec) -training >> step=3232300, episode=539 reward=0.7474526 (489.19 it/sec) -training >> step=3232400, episode=539 reward=0.760335 (521.30 it/sec) -training >> step=3232500, episode=539 reward=0.7643859 (521.69 it/sec) -training >> step=3232600, episode=539 reward=0.7684999 (575.65 it/sec) -training >> step=3232700, episode=539 reward=0.7573206 (520.12 it/sec) -training >> step=3232800, episode=539 reward=0.7528438 (482.66 it/sec) -training >> step=3232900, episode=539 reward=0.7625522 (528.15 it/sec) -training >> step=3233000, episode=539 reward=0.7594913 (542.59 it/sec) -training >> step=3233100, episode=539 reward=0.7435368 (523.59 it/sec) -training >> step=3233200, episode=539 reward=0.7606711 (514.35 it/sec) -training >> step=3233300, episode=540 reward=0.7687508 (97.94 it/sec) -training >> step=3233400, episode=540 reward=0.7731962 (495.98 it/sec) -training >> step=3233500, episode=540 reward=0.7790611 (497.47 it/sec) -training >> step=3233600, episode=540 reward=0.7729738 (513.62 it/sec) -training >> step=3233700, episode=540 reward=0.7912629 (514.90 it/sec) -training >> step=3233800, episode=540 reward=0.7732958 (520.00 it/sec) -training >> step=3233900, episode=540 reward=0.7731587 (545.55 it/sec) -training >> step=3234000, episode=540 reward=0.7791536 (504.87 it/sec) -training >> step=3234100, episode=540 reward=0.779983 (486.50 it/sec) -training >> step=3234200, episode=540 reward=0.7695811 (567.63 it/sec) -training >> step=3234300, episode=540 reward=0.7755494 (532.54 it/sec) -training >> step=3234400, episode=540 reward=0.7725178 (501.77 it/sec) -training >> step=3234500, episode=540 reward=0.7447903 (468.82 it/sec) -training >> step=3234600, episode=540 reward=0.7661506 (522.33 it/sec) -training >> step=3234700, episode=540 reward=0.7565806 (514.47 it/sec) -training >> step=3234800, episode=540 reward=0.7577754 (509.87 it/sec) -training >> step=3234900, episode=540 reward=0.7590636 (541.19 it/sec) -training >> step=3235000, episode=540 reward=0.7616067 (585.94 it/sec) -training >> step=3235100, episode=540 reward=0.77697 (504.55 it/sec) -training >> step=3235200, episode=540 reward=0.776858 (520.26 it/sec) -training >> step=3235300, episode=540 reward=0.7914877 (539.26 it/sec) -training >> step=3235400, episode=540 reward=0.7855889 (539.18 it/sec) -training >> step=3235500, episode=540 reward=0.7817783 (524.58 it/sec) -training >> step=3235600, episode=540 reward=0.7735138 (484.38 it/sec) -training >> step=3235700, episode=540 reward=0.7717284 (536.99 it/sec) -training >> step=3235800, episode=540 reward=0.7549297 (514.54 it/sec) -training >> step=3235900, episode=540 reward=0.7621332 (535.12 it/sec) -training >> step=3236000, episode=540 reward=0.7467498 (523.19 it/sec) -training >> step=3236100, episode=540 reward=0.768494 (553.76 it/sec) -training >> step=3236200, episode=540 reward=0.7732202 (467.88 it/sec) -training >> step=3236300, episode=540 reward=0.7632726 (515.61 it/sec) -training >> step=3236400, episode=540 reward=0.7703652 (526.62 it/sec) -training >> step=3236500, episode=540 reward=0.7562457 (519.62 it/sec) -training >> step=3236600, episode=540 reward=0.7508443 (533.96 it/sec) -training >> step=3236700, episode=540 reward=0.778611 (483.39 it/sec) -training >> step=3236800, episode=540 reward=0.7556162 (527.64 it/sec) -training >> step=3236900, episode=540 reward=0.776064 (507.41 it/sec) -training >> step=3237000, episode=540 reward=0.7533071 (496.07 it/sec) -training >> step=3237100, episode=540 reward=0.7751052 (542.75 it/sec) -training >> step=3237200, episode=540 reward=0.7548836 (488.59 it/sec) -training >> step=3237300, episode=540 reward=0.7634296 (522.91 it/sec) -training >> step=3237400, episode=540 reward=0.7654244 (544.50 it/sec) -training >> step=3237500, episode=540 reward=0.7706864 (552.45 it/sec) -training >> step=3237600, episode=540 reward=0.7774011 (505.06 it/sec) -training >> step=3237700, episode=540 reward=0.7605948 (535.63 it/sec) -training >> step=3237800, episode=540 reward=0.7647982 (489.74 it/sec) -training >> step=3237900, episode=540 reward=0.7874498 (568.59 it/sec) -training >> step=3238000, episode=540 reward=0.784133 (515.34 it/sec) -training >> step=3238100, episode=540 reward=0.7888442 (508.50 it/sec) -training >> step=3238200, episode=540 reward=0.7513103 (529.27 it/sec) -training >> step=3238300, episode=540 reward=0.7421895 (504.03 it/sec) -training >> step=3238400, episode=540 reward=0.7629712 (565.32 it/sec) -training >> step=3238500, episode=540 reward=0.7716972 (531.24 it/sec) -training >> step=3238600, episode=540 reward=0.77075 (538.86 it/sec) -training >> step=3238700, episode=540 reward=0.7701591 (527.77 it/sec) -training >> step=3238800, episode=540 reward=0.759315 (486.58 it/sec) -training >> step=3238900, episode=540 reward=0.7672959 (481.16 it/sec) -training >> step=3239000, episode=540 reward=0.7520114 (545.08 it/sec) -training >> step=3239100, episode=540 reward=0.771049 (528.11 it/sec) -training >> step=3239200, episode=540 reward=0.7654758 (526.72 it/sec) -training >> step=3239300, episode=541 reward=0.7404328 (111.57 it/sec) -training >> step=3239400, episode=541 reward=0.7594263 (516.68 it/sec) -training >> step=3239500, episode=541 reward=0.7504016 (478.85 it/sec) -training >> step=3239600, episode=541 reward=0.7821498 (526.98 it/sec) -training >> step=3239700, episode=541 reward=0.7735984 (541.91 it/sec) -training >> step=3239800, episode=541 reward=0.7684391 (526.89 it/sec) -training >> step=3239900, episode=541 reward=0.7735267 (493.97 it/sec) -training >> step=3240000, episode=541 reward=0.783942 (554.65 it/sec) -training >> step=3240100, episode=541 reward=0.771008 (495.41 it/sec) -training >> step=3240200, episode=541 reward=0.7687147 (536.30 it/sec) -training >> step=3240300, episode=541 reward=0.7826403 (541.50 it/sec) -training >> step=3240400, episode=541 reward=0.7687246 (558.34 it/sec) -training >> step=3240500, episode=541 reward=0.7943631 (510.34 it/sec) -training >> step=3240600, episode=541 reward=0.7814866 (490.86 it/sec) -training >> step=3240700, episode=541 reward=0.7904074 (532.02 it/sec) -training >> step=3240800, episode=541 reward=0.7549817 (540.18 it/sec) -training >> step=3240900, episode=541 reward=0.7727235 (535.52 it/sec) -training >> step=3241000, episode=541 reward=0.7659642 (517.70 it/sec) -training >> step=3241100, episode=541 reward=0.7910545 (523.21 it/sec) -training >> step=3241200, episode=541 reward=0.7469572 (487.41 it/sec) -training >> step=3241300, episode=541 reward=0.7421447 (528.23 it/sec) -training >> step=3241400, episode=541 reward=0.7698025 (517.91 it/sec) -training >> step=3241500, episode=541 reward=0.7630566 (538.66 it/sec) -training >> step=3241600, episode=541 reward=0.7712201 (522.34 it/sec) -training >> step=3241700, episode=541 reward=0.7691939 (490.58 it/sec) -training >> step=3241800, episode=541 reward=0.7740362 (538.34 it/sec) -training >> step=3241900, episode=541 reward=0.7755659 (507.78 it/sec) -training >> step=3242000, episode=541 reward=0.7760689 (536.49 it/sec) -training >> step=3242100, episode=541 reward=0.7844608 (531.65 it/sec) -training >> step=3242200, episode=541 reward=0.7765848 (508.66 it/sec) -training >> step=3242300, episode=541 reward=0.7738335 (500.67 it/sec) -training >> step=3242400, episode=541 reward=0.7745172 (515.36 it/sec) -training >> step=3242500, episode=541 reward=0.7631661 (538.26 it/sec) -training >> step=3242600, episode=541 reward=0.7649298 (566.49 it/sec) -training >> step=3242700, episode=541 reward=0.7613222 (547.68 it/sec) -training >> step=3242800, episode=541 reward=0.7802981 (465.21 it/sec) -training >> step=3242900, episode=541 reward=0.7726839 (491.97 it/sec) -training >> step=3243000, episode=541 reward=0.7625459 (503.10 it/sec) -training >> step=3243100, episode=541 reward=0.7765653 (516.56 it/sec) -training >> step=3243200, episode=541 reward=0.7524206 (513.58 it/sec) -training >> step=3243300, episode=541 reward=0.7839552 (504.64 it/sec) -training >> step=3243400, episode=541 reward=0.7769094 (494.48 it/sec) -training >> step=3243500, episode=541 reward=0.7709696 (503.96 it/sec) -training >> step=3243600, episode=541 reward=0.7891074 (551.96 it/sec) -training >> step=3243700, episode=541 reward=0.7508245 (536.51 it/sec) -training >> step=3243800, episode=541 reward=0.7819657 (491.00 it/sec) -training >> step=3243900, episode=541 reward=0.7703673 (468.18 it/sec) -training >> step=3244000, episode=541 reward=0.767623 (470.69 it/sec) -training >> step=3244100, episode=541 reward=0.7787432 (513.41 it/sec) -training >> step=3244200, episode=541 reward=0.7492836 (535.75 it/sec) -training >> step=3244300, episode=541 reward=0.7577394 (486.31 it/sec) -training >> step=3244400, episode=541 reward=0.7677911 (557.19 it/sec) -training >> step=3244500, episode=541 reward=0.7422734 (492.96 it/sec) -training >> step=3244600, episode=541 reward=0.7799656 (528.65 it/sec) -training >> step=3244700, episode=541 reward=0.7677687 (549.21 it/sec) -training >> step=3244800, episode=541 reward=0.7521842 (510.14 it/sec) -training >> step=3244900, episode=541 reward=0.7402368 (502.30 it/sec) -training >> step=3245000, episode=541 reward=0.7427589 (530.73 it/sec) -training >> step=3245100, episode=541 reward=0.7625304 (548.45 it/sec) -training >> step=3245200, episode=541 reward=0.7592368 (534.26 it/sec) -training >> step=3245300, episode=542 reward=0.7683594 (99.46 it/sec) -training >> step=3245400, episode=542 reward=0.769267 (517.24 it/sec) -training >> step=3245500, episode=542 reward=0.7625201 (486.50 it/sec) -training >> step=3245600, episode=542 reward=0.7678414 (530.37 it/sec) -training >> step=3245700, episode=542 reward=0.7627857 (536.95 it/sec) -training >> step=3245800, episode=542 reward=0.7607036 (534.26 it/sec) -training >> step=3245900, episode=542 reward=0.752422 (523.34 it/sec) -training >> step=3246000, episode=542 reward=0.7760249 (554.70 it/sec) -training >> step=3246100, episode=542 reward=0.7668085 (496.82 it/sec) -training >> step=3246200, episode=542 reward=0.7640701 (507.29 it/sec) -training >> step=3246300, episode=542 reward=0.7751833 (515.53 it/sec) -training >> step=3246400, episode=542 reward=0.7616323 (546.43 it/sec) -training >> step=3246500, episode=542 reward=0.7695724 (531.03 it/sec) -training >> step=3246600, episode=542 reward=0.7774635 (490.60 it/sec) -training >> step=3246700, episode=542 reward=0.7762305 (526.89 it/sec) -training >> step=3246800, episode=542 reward=0.7878491 (507.28 it/sec) -training >> step=3246900, episode=542 reward=0.746852 (517.94 it/sec) -training >> step=3247000, episode=542 reward=0.7960109 (520.59 it/sec) -training >> step=3247100, episode=542 reward=0.7773908 (538.39 it/sec) -training >> step=3247200, episode=542 reward=0.7932233 (521.93 it/sec) -training >> step=3247300, episode=542 reward=0.7914943 (513.40 it/sec) -training >> step=3247400, episode=542 reward=0.762678 (549.89 it/sec) -training >> step=3247500, episode=542 reward=0.7546388 (528.54 it/sec) -training >> step=3247600, episode=542 reward=0.7720498 (531.07 it/sec) -training >> step=3247700, episode=542 reward=0.7709588 (491.48 it/sec) -training >> step=3247800, episode=542 reward=0.7542906 (520.78 it/sec) -training >> step=3247900, episode=542 reward=0.7663526 (544.50 it/sec) -training >> step=3248000, episode=542 reward=0.7786281 (520.05 it/sec) -training >> step=3248100, episode=542 reward=0.7741734 (513.46 it/sec) -training >> step=3248200, episode=542 reward=0.7770114 (526.66 it/sec) -training >> step=3248300, episode=542 reward=0.7720413 (497.76 it/sec) -training >> step=3248400, episode=542 reward=0.7690505 (538.65 it/sec) -training >> step=3248500, episode=542 reward=0.7866762 (531.29 it/sec) -training >> step=3248600, episode=542 reward=0.7703201 (532.34 it/sec) -training >> step=3248700, episode=542 reward=0.7687212 (528.78 it/sec) -training >> step=3248800, episode=542 reward=0.7833376 (480.30 it/sec) -training >> step=3248900, episode=542 reward=0.7643459 (533.57 it/sec) -training >> step=3249000, episode=542 reward=0.7731348 (518.57 it/sec) -training >> step=3249100, episode=542 reward=0.7735244 (517.16 it/sec) -training >> step=3249200, episode=542 reward=0.7636287 (542.57 it/sec) -training >> step=3249300, episode=542 reward=0.7871194 (465.89 it/sec) -training >> step=3249400, episode=542 reward=0.769105 (508.22 it/sec) -training >> step=3249500, episode=542 reward=0.7550364 (537.95 it/sec) -training >> step=3249600, episode=542 reward=0.7623721 (532.74 it/sec) -training >> step=3249700, episode=542 reward=0.7799287 (510.64 it/sec) -training >> step=3249800, episode=542 reward=0.7721664 (489.51 it/sec) -training >> step=3249900, episode=542 reward=0.7756553 (545.65 it/sec) -training >> step=3250000, episode=542 reward=0.7675804 (544.23 it/sec) -training >> step=3250100, episode=542 reward=0.7384979 (522.73 it/sec) -training >> step=3250200, episode=542 reward=0.753571 (505.87 it/sec) -training >> step=3250300, episode=542 reward=0.7680618 (547.50 it/sec) -training >> step=3250400, episode=542 reward=0.770718 (494.35 it/sec) -training >> step=3250500, episode=542 reward=0.7897697 (518.54 it/sec) -training >> step=3250600, episode=542 reward=0.7597058 (509.66 it/sec) -training >> step=3250700, episode=542 reward=0.7730893 (564.12 it/sec) -training >> step=3250800, episode=542 reward=0.7643015 (533.58 it/sec) -training >> step=3250900, episode=542 reward=0.7531416 (494.08 it/sec) -training >> step=3251000, episode=542 reward=0.7606398 (520.06 it/sec) -training >> step=3251100, episode=542 reward=0.755416 (548.44 it/sec) -training >> step=3251200, episode=542 reward=0.7475398 (533.39 it/sec) -training >> step=3251300, episode=543 reward=0.7571976 (76.57 it/sec) -training >> step=3251400, episode=543 reward=0.7688066 (502.26 it/sec) -training >> step=3251500, episode=543 reward=0.7665306 (491.37 it/sec) -training >> step=3251600, episode=543 reward=0.7650149 (533.45 it/sec) -training >> step=3251700, episode=543 reward=0.7678365 (526.69 it/sec) -training >> step=3251800, episode=543 reward=0.7661467 (525.76 it/sec) -training >> step=3251900, episode=543 reward=0.7856297 (482.46 it/sec) -training >> step=3252000, episode=543 reward=0.7851679 (555.06 it/sec) -training >> step=3252100, episode=543 reward=0.7983935 (501.14 it/sec) -training >> step=3252200, episode=543 reward=0.7719482 (535.23 it/sec) -training >> step=3252300, episode=543 reward=0.781295 (549.20 it/sec) -training >> step=3252400, episode=543 reward=0.7562113 (520.29 it/sec) -training >> step=3252500, episode=543 reward=0.7931554 (480.41 it/sec) -training >> step=3252600, episode=543 reward=0.7759942 (562.74 it/sec) -training >> step=3252700, episode=543 reward=0.7699705 (542.10 it/sec) -training >> step=3252800, episode=543 reward=0.8016356 (507.15 it/sec) -training >> step=3252900, episode=543 reward=0.7536623 (561.27 it/sec) -training >> step=3253000, episode=543 reward=0.7764562 (503.13 it/sec) -training >> step=3253100, episode=543 reward=0.7842752 (543.48 it/sec) -training >> step=3253200, episode=543 reward=0.7665769 (530.66 it/sec) -training >> step=3253300, episode=543 reward=0.7578241 (512.88 it/sec) -training >> step=3253400, episode=543 reward=0.7480993 (490.58 it/sec) -training >> step=3253500, episode=543 reward=0.7623554 (526.17 it/sec) -training >> step=3253600, episode=543 reward=0.7867192 (510.98 it/sec) -training >> step=3253700, episode=543 reward=0.7744916 (553.54 it/sec) -training >> step=3253800, episode=543 reward=0.7554435 (521.34 it/sec) -training >> step=3253900, episode=543 reward=0.751985 (494.71 it/sec) -training >> step=3254000, episode=543 reward=0.7593776 (502.02 it/sec) -training >> step=3254100, episode=543 reward=0.7441167 (498.73 it/sec) -training >> step=3254200, episode=543 reward=0.7794599 (524.05 it/sec) -training >> step=3254300, episode=543 reward=0.7808856 (483.71 it/sec) -training >> step=3254400, episode=543 reward=0.762413 (456.35 it/sec) -training >> step=3254500, episode=543 reward=0.7505471 (400.90 it/sec) -training >> step=3254600, episode=543 reward=0.77655 (396.16 it/sec) -training >> step=3254700, episode=543 reward=0.7561643 (429.03 it/sec) -training >> step=3254800, episode=543 reward=0.7811368 (429.18 it/sec) -training >> step=3254900, episode=543 reward=0.7586705 (453.78 it/sec) -training >> step=3255000, episode=543 reward=0.7827096 (438.19 it/sec) -training >> step=3255100, episode=543 reward=0.7574471 (493.46 it/sec) -training >> step=3255200, episode=543 reward=0.7648298 (505.52 it/sec) -training >> step=3255300, episode=543 reward=0.7905419 (464.85 it/sec) -training >> step=3255400, episode=543 reward=0.754052 (519.68 it/sec) -training >> step=3255500, episode=543 reward=0.7780109 (479.42 it/sec) -training >> step=3255600, episode=543 reward=0.7852975 (480.01 it/sec) -training >> step=3255700, episode=543 reward=0.7606166 (486.63 it/sec) -training >> step=3255800, episode=543 reward=0.775658 (492.92 it/sec) -training >> step=3255900, episode=543 reward=0.7902805 (550.68 it/sec) -training >> step=3256000, episode=543 reward=0.7717093 (496.45 it/sec) -training >> step=3256100, episode=543 reward=0.7838104 (476.08 it/sec) -training >> step=3256200, episode=543 reward=0.7659795 (590.81 it/sec) -training >> step=3256300, episode=543 reward=0.7570789 (476.11 it/sec) -training >> step=3256400, episode=543 reward=0.7310518 (511.59 it/sec) -training >> step=3256500, episode=543 reward=0.7928039 (488.00 it/sec) -training >> step=3256600, episode=543 reward=0.7635469 (510.91 it/sec) -training >> step=3256700, episode=543 reward=0.7569585 (510.21 it/sec) -training >> step=3256800, episode=543 reward=0.7763292 (515.59 it/sec) -training >> step=3256900, episode=543 reward=0.7485074 (513.17 it/sec) -training >> step=3257000, episode=543 reward=0.749374 (491.32 it/sec) -training >> step=3257100, episode=543 reward=0.7427139 (482.06 it/sec) -training >> step=3257200, episode=543 reward=0.7694954 (501.05 it/sec) -training >> step=3257300, episode=544 reward=0.779779 (91.16 it/sec) -training >> step=3257400, episode=544 reward=0.773991 (459.74 it/sec) -training >> step=3257500, episode=544 reward=0.763166 (476.78 it/sec) -training >> step=3257600, episode=544 reward=0.7727622 (491.85 it/sec) -training >> step=3257700, episode=544 reward=0.7762849 (484.34 it/sec) -training >> step=3257800, episode=544 reward=0.757663 (459.41 it/sec) -training >> step=3257900, episode=544 reward=0.7922139 (518.79 it/sec) -training >> step=3258000, episode=544 reward=0.762602 (518.92 it/sec) -training >> step=3258100, episode=544 reward=0.7789966 (413.09 it/sec) -training >> step=3258200, episode=544 reward=0.7731712 (486.05 it/sec) -training >> step=3258300, episode=544 reward=0.7733012 (425.31 it/sec) -training >> step=3258400, episode=544 reward=0.7664485 (473.31 it/sec) -training >> step=3258500, episode=544 reward=0.762021 (488.31 it/sec) -training >> step=3258600, episode=544 reward=0.784696 (506.01 it/sec) -training >> step=3258700, episode=544 reward=0.7591358 (531.83 it/sec) -training >> step=3258800, episode=544 reward=0.7759722 (559.82 it/sec) -training >> step=3258900, episode=544 reward=0.7787841 (527.63 it/sec) -training >> step=3259000, episode=544 reward=0.7694085 (585.06 it/sec) -training >> step=3259100, episode=544 reward=0.7518277 (514.93 it/sec) -training >> step=3259200, episode=544 reward=0.7545084 (501.96 it/sec) -training >> step=3259300, episode=544 reward=0.7880761 (567.40 it/sec) -training >> step=3259400, episode=544 reward=0.7741343 (561.97 it/sec) -training >> step=3259500, episode=544 reward=0.7715539 (549.14 it/sec) -training >> step=3259600, episode=544 reward=0.7625914 (546.66 it/sec) -training >> step=3259700, episode=544 reward=0.780347 (512.41 it/sec) -training >> step=3259800, episode=544 reward=0.7875184 (511.04 it/sec) -training >> step=3259900, episode=544 reward=0.772285 (546.77 it/sec) -training >> step=3260000, episode=544 reward=0.7691627 (515.51 it/sec) -training >> step=3260100, episode=544 reward=0.7712163 (580.31 it/sec) -training >> step=3260200, episode=544 reward=0.7839803 (513.76 it/sec) -training >> step=3260300, episode=544 reward=0.752845 (513.17 it/sec) -training >> step=3260400, episode=544 reward=0.7911679 (553.64 it/sec) -training >> step=3260500, episode=544 reward=0.7828649 (493.19 it/sec) -training >> step=3260600, episode=544 reward=0.7800961 (533.48 it/sec) -training >> step=3260700, episode=544 reward=0.7566103 (496.95 it/sec) -training >> step=3260800, episode=544 reward=0.7958171 (556.84 it/sec) -training >> step=3260900, episode=544 reward=0.7613185 (528.16 it/sec) -training >> step=3261000, episode=544 reward=0.7787583 (524.99 it/sec) -training >> step=3261100, episode=544 reward=0.760829 (560.00 it/sec) -training >> step=3261200, episode=544 reward=0.7733441 (537.73 it/sec) -training >> step=3261300, episode=544 reward=0.7731106 (503.99 it/sec) -training >> step=3261400, episode=544 reward=0.7899499 (539.27 it/sec) -training >> step=3261500, episode=544 reward=0.7412546 (555.18 it/sec) -training >> step=3261600, episode=544 reward=0.7431487 (525.93 it/sec) -training >> step=3261700, episode=544 reward=0.7746241 (548.36 it/sec) -training >> step=3261800, episode=544 reward=0.7523742 (519.01 it/sec) -training >> step=3261900, episode=544 reward=0.7894447 (466.02 it/sec) -training >> step=3262000, episode=544 reward=0.7881048 (514.50 it/sec) -training >> step=3262100, episode=544 reward=0.7707894 (541.12 it/sec) -training >> step=3262200, episode=544 reward=0.7525662 (542.76 it/sec) -training >> step=3262300, episode=544 reward=0.7568687 (504.76 it/sec) -training >> step=3262400, episode=544 reward=0.7596108 (507.01 it/sec) -training >> step=3262500, episode=544 reward=0.7642792 (537.78 it/sec) -training >> step=3262600, episode=544 reward=0.7857321 (545.68 it/sec) -training >> step=3262700, episode=544 reward=0.7498838 (511.36 it/sec) -training >> step=3262800, episode=544 reward=0.7425276 (518.39 it/sec) -training >> step=3262900, episode=544 reward=0.7513637 (537.36 it/sec) -training >> step=3263000, episode=544 reward=0.7510476 (478.12 it/sec) -training >> step=3263100, episode=544 reward=0.735118 (503.24 it/sec) -training >> step=3263200, episode=544 reward=0.7670122 (543.14 it/sec) -training >> step=3263300, episode=545 reward=0.7625567 (72.92 it/sec) -training >> step=3263400, episode=545 reward=0.7764115 (468.85 it/sec) -training >> step=3263500, episode=545 reward=0.7475421 (541.68 it/sec) -training >> step=3263600, episode=545 reward=0.752538 (501.54 it/sec) -training >> step=3263700, episode=545 reward=0.737097 (469.32 it/sec) -training >> step=3263800, episode=545 reward=0.7587134 (532.92 it/sec) -training >> step=3263900, episode=545 reward=0.785183 (517.66 it/sec) -training >> step=3264000, episode=545 reward=0.7758686 (502.08 it/sec) -training >> step=3264100, episode=545 reward=0.7696515 (555.35 it/sec) -training >> step=3264200, episode=545 reward=0.7817715 (520.84 it/sec) -training >> step=3264300, episode=545 reward=0.7812856 (484.48 it/sec) -training >> step=3264400, episode=545 reward=0.7675363 (454.42 it/sec) -training >> step=3264500, episode=545 reward=0.7692003 (533.80 it/sec) -training >> step=3264600, episode=545 reward=0.7791207 (522.37 it/sec) -training >> step=3264700, episode=545 reward=0.784088 (468.39 it/sec) -training >> step=3264800, episode=545 reward=0.7790955 (515.33 it/sec) -training >> step=3264900, episode=545 reward=0.7751997 (530.38 it/sec) -training >> step=3265000, episode=545 reward=0.7707928 (453.01 it/sec) -training >> step=3265100, episode=545 reward=0.7652954 (517.68 it/sec) -training >> step=3265200, episode=545 reward=0.7810282 (502.63 it/sec) -training >> step=3265300, episode=545 reward=0.7729573 (483.52 it/sec) -training >> step=3265400, episode=545 reward=0.7846197 (510.13 it/sec) -training >> step=3265500, episode=545 reward=0.7670618 (510.78 it/sec) -training >> step=3265600, episode=545 reward=0.7572747 (517.31 it/sec) -training >> step=3265700, episode=545 reward=0.766761 (499.69 it/sec) -training >> step=3265800, episode=545 reward=0.7606655 (532.08 it/sec) -training >> step=3265900, episode=545 reward=0.7591872 (507.46 it/sec) -training >> step=3266000, episode=545 reward=0.760035 (514.55 it/sec) -training >> step=3266100, episode=545 reward=0.7586662 (535.91 it/sec) -training >> step=3266200, episode=545 reward=0.7687001 (475.87 it/sec) -training >> step=3266300, episode=545 reward=0.7601073 (526.43 it/sec) -training >> step=3266400, episode=545 reward=0.7556968 (526.52 it/sec) -training >> step=3266500, episode=545 reward=0.7645925 (504.33 it/sec) -training >> step=3266600, episode=545 reward=0.7745699 (503.32 it/sec) -training >> step=3266700, episode=545 reward=0.7636659 (552.77 it/sec) -training >> step=3266800, episode=545 reward=0.7590865 (490.22 it/sec) -training >> step=3266900, episode=545 reward=0.7561544 (524.11 it/sec) -training >> step=3267000, episode=545 reward=0.774556 (471.39 it/sec) -training >> step=3267100, episode=545 reward=0.7819104 (483.05 it/sec) -training >> step=3267200, episode=545 reward=0.7571912 (533.81 it/sec) -training >> step=3267300, episode=545 reward=0.7776743 (505.86 it/sec) -training >> step=3267400, episode=545 reward=0.7864485 (524.67 it/sec) -training >> step=3267500, episode=545 reward=0.7681009 (500.49 it/sec) -training >> step=3267600, episode=545 reward=0.7676595 (515.71 it/sec) -training >> step=3267700, episode=545 reward=0.7621375 (536.19 it/sec) -training >> step=3267800, episode=545 reward=0.7673087 (503.57 it/sec) -training >> step=3267900, episode=545 reward=0.7917984 (516.38 it/sec) -training >> step=3268000, episode=545 reward=0.7625895 (535.29 it/sec) -training >> step=3268100, episode=545 reward=0.7606088 (512.58 it/sec) -training >> step=3268200, episode=545 reward=0.7607601 (486.92 it/sec) -training >> step=3268300, episode=545 reward=0.7535186 (473.65 it/sec) -training >> step=3268400, episode=545 reward=0.7873589 (503.83 it/sec) -training >> step=3268500, episode=545 reward=0.7518903 (521.43 it/sec) -training >> step=3268600, episode=545 reward=0.7698477 (480.81 it/sec) -training >> step=3268700, episode=545 reward=0.7559067 (457.10 it/sec) -training >> step=3268800, episode=545 reward=0.765988 (441.97 it/sec) -training >> step=3268900, episode=545 reward=0.7578558 (422.64 it/sec) -training >> step=3269000, episode=545 reward=0.7451752 (455.84 it/sec) -training >> step=3269100, episode=545 reward=0.750874 (458.03 it/sec) -training >> step=3269200, episode=545 reward=0.7490805 (555.36 it/sec) -training >> step=3269300, episode=546 reward=0.7640792 (137.31 it/sec) -training >> step=3269400, episode=546 reward=0.7599206 (502.42 it/sec) -training >> step=3269500, episode=546 reward=0.768568 (485.84 it/sec) -training >> step=3269600, episode=546 reward=0.7735505 (525.29 it/sec) -training >> step=3269700, episode=546 reward=0.7600237 (484.98 it/sec) -training >> step=3269800, episode=546 reward=0.7776307 (506.95 it/sec) -training >> step=3269900, episode=546 reward=0.7665243 (474.90 it/sec) -training >> step=3270000, episode=546 reward=0.7848186 (502.92 it/sec) -training >> step=3270100, episode=546 reward=0.7699866 (517.85 it/sec) -training >> step=3270200, episode=546 reward=0.7570398 (503.41 it/sec) -training >> step=3270300, episode=546 reward=0.7854782 (541.90 it/sec) -training >> step=3270400, episode=546 reward=0.7722965 (473.80 it/sec) -training >> step=3270500, episode=546 reward=0.7761283 (470.06 it/sec) -training >> step=3270600, episode=546 reward=0.7824051 (486.53 it/sec) -training >> step=3270700, episode=546 reward=0.7862903 (509.22 it/sec) -training >> step=3270800, episode=546 reward=0.7723124 (493.31 it/sec) -training >> step=3270900, episode=546 reward=0.7681292 (466.07 it/sec) -training >> step=3271000, episode=546 reward=0.7812363 (513.63 it/sec) -training >> step=3271100, episode=546 reward=0.7435629 (516.50 it/sec) -training >> step=3271200, episode=546 reward=0.7655091 (522.72 it/sec) -training >> step=3271300, episode=546 reward=0.7602068 (546.34 it/sec) -training >> step=3271400, episode=546 reward=0.7729761 (505.54 it/sec) -training >> step=3271500, episode=546 reward=0.7599609 (490.97 it/sec) -training >> step=3271600, episode=546 reward=0.7645183 (524.12 it/sec) -training >> step=3271700, episode=546 reward=0.771951 (551.03 it/sec) -training >> step=3271800, episode=546 reward=0.7559213 (495.51 it/sec) -training >> step=3271900, episode=546 reward=0.7473219 (480.65 it/sec) -training >> step=3272000, episode=546 reward=0.7754024 (523.30 it/sec) -training >> step=3272100, episode=546 reward=0.7537732 (564.18 it/sec) -training >> step=3272200, episode=546 reward=0.767656 (525.30 it/sec) -training >> step=3272300, episode=546 reward=0.7659287 (484.95 it/sec) -training >> step=3272400, episode=546 reward=0.7440286 (520.38 it/sec) -training >> step=3272500, episode=546 reward=0.7760017 (509.58 it/sec) -training >> step=3272600, episode=546 reward=0.7536647 (470.22 it/sec) -training >> step=3272700, episode=546 reward=0.7661883 (534.37 it/sec) -training >> step=3272800, episode=546 reward=0.7931978 (548.07 it/sec) -training >> step=3272900, episode=546 reward=0.760802 (481.45 it/sec) -training >> step=3273000, episode=546 reward=0.7678091 (434.87 it/sec) -training >> step=3273100, episode=546 reward=0.7726209 (547.89 it/sec) -training >> step=3273200, episode=546 reward=0.7773604 (548.43 it/sec) -training >> step=3273300, episode=546 reward=0.7590049 (545.60 it/sec) -training >> step=3273400, episode=546 reward=0.7446088 (509.77 it/sec) -training >> step=3273500, episode=546 reward=0.7412321 (545.35 it/sec) -training >> step=3273600, episode=546 reward=0.7760313 (511.72 it/sec) -training >> step=3273700, episode=546 reward=0.7491525 (533.87 it/sec) -training >> step=3273800, episode=546 reward=0.7897124 (544.51 it/sec) -training >> step=3273900, episode=546 reward=0.7665161 (513.91 it/sec) -training >> step=3274000, episode=546 reward=0.7777228 (532.25 it/sec) -training >> step=3274100, episode=546 reward=0.7234825 (495.67 it/sec) -training >> step=3274200, episode=546 reward=0.7532135 (539.55 it/sec) -training >> step=3274300, episode=546 reward=0.7668627 (520.29 it/sec) -training >> step=3274400, episode=546 reward=0.7529748 (557.49 it/sec) -training >> step=3274500, episode=546 reward=0.7759146 (527.66 it/sec) -training >> step=3274600, episode=546 reward=0.756652 (524.56 it/sec) -training >> step=3274700, episode=546 reward=0.7635293 (482.50 it/sec) -training >> step=3274800, episode=546 reward=0.7573587 (468.27 it/sec) -training >> step=3274900, episode=546 reward=0.7685227 (528.61 it/sec) -training >> step=3275000, episode=546 reward=0.7393971 (558.11 it/sec) -training >> step=3275100, episode=546 reward=0.7671396 (486.91 it/sec) -training >> step=3275200, episode=546 reward=0.742406 (516.33 it/sec) -training >> step=3275300, episode=547 reward=0.7436454 (122.59 it/sec) -training >> step=3275400, episode=547 reward=0.7485559 (469.29 it/sec) -training >> step=3275500, episode=547 reward=0.7740816 (530.79 it/sec) -training >> step=3275600, episode=547 reward=0.7727082 (541.68 it/sec) -training >> step=3275700, episode=547 reward=0.7603479 (476.40 it/sec) -training >> step=3275800, episode=547 reward=0.7679512 (510.04 it/sec) -training >> step=3275900, episode=547 reward=0.7640903 (510.94 it/sec) -training >> step=3276000, episode=547 reward=0.7632817 (548.48 it/sec) -training >> step=3276100, episode=547 reward=0.7685991 (543.95 it/sec) -training >> step=3276200, episode=547 reward=0.747319 (535.44 it/sec) -training >> step=3276300, episode=547 reward=0.7744643 (480.27 it/sec) -training >> step=3276400, episode=547 reward=0.7861218 (496.81 it/sec) -training >> step=3276500, episode=547 reward=0.7533309 (505.00 it/sec) -training >> step=3276600, episode=547 reward=0.7622755 (519.81 it/sec) -training >> step=3276700, episode=547 reward=0.7651533 (546.13 it/sec) -training >> step=3276800, episode=547 reward=0.7540178 (548.66 it/sec) -training >> step=3276900, episode=547 reward=0.7612823 (494.87 it/sec) -training >> step=3277000, episode=547 reward=0.7740191 (532.29 it/sec) -training >> step=3277100, episode=547 reward=0.7772264 (496.77 it/sec) -training >> step=3277200, episode=547 reward=0.7751166 (535.92 it/sec) -training >> step=3277300, episode=547 reward=0.757581 (499.57 it/sec) -training >> step=3277400, episode=547 reward=0.7919238 (502.45 it/sec) -training >> step=3277500, episode=547 reward=0.7630764 (549.00 it/sec) -training >> step=3277600, episode=547 reward=0.7889195 (475.88 it/sec) -training >> step=3277700, episode=547 reward=0.770708 (527.56 it/sec) -training >> step=3277800, episode=547 reward=0.7995253 (479.52 it/sec) -training >> step=3277900, episode=547 reward=0.7774888 (551.42 it/sec) -training >> step=3278000, episode=547 reward=0.754912 (508.90 it/sec) -training >> step=3278100, episode=547 reward=0.7788821 (530.25 it/sec) -training >> step=3278200, episode=547 reward=0.7761075 (564.46 it/sec) -training >> step=3278300, episode=547 reward=0.7675232 (505.65 it/sec) -training >> step=3278400, episode=547 reward=0.7579807 (481.22 it/sec) -training >> step=3278500, episode=547 reward=0.7839893 (504.24 it/sec) -training >> step=3278600, episode=547 reward=0.7702492 (555.59 it/sec) -training >> step=3278700, episode=547 reward=0.7443686 (538.91 it/sec) -training >> step=3278800, episode=547 reward=0.7741679 (522.73 it/sec) -training >> step=3278900, episode=547 reward=0.7626896 (529.98 it/sec) -training >> step=3279000, episode=547 reward=0.7494368 (516.54 it/sec) -training >> step=3279100, episode=547 reward=0.7809117 (500.45 it/sec) -training >> step=3279200, episode=547 reward=0.7623098 (530.17 it/sec) -training >> step=3279300, episode=547 reward=0.7610873 (577.38 it/sec) -training >> step=3279400, episode=547 reward=0.7883388 (511.18 it/sec) -training >> step=3279500, episode=547 reward=0.7763739 (441.87 it/sec) -training >> step=3279600, episode=547 reward=0.7620687 (502.06 it/sec) -training >> step=3279700, episode=547 reward=0.7664837 (498.99 it/sec) -training >> step=3279800, episode=547 reward=0.7638026 (486.19 it/sec) -training >> step=3279900, episode=547 reward=0.7623614 (506.11 it/sec) -training >> step=3280000, episode=547 reward=0.7668693 (504.50 it/sec) -training >> step=3280100, episode=547 reward=0.752167 (509.41 it/sec) -training >> step=3280200, episode=547 reward=0.7671819 (539.28 it/sec) -training >> step=3280300, episode=547 reward=0.7625028 (507.41 it/sec) -training >> step=3280400, episode=547 reward=0.7637881 (554.67 it/sec) -training >> step=3280500, episode=547 reward=0.7726391 (508.01 it/sec) -training >> step=3280600, episode=547 reward=0.7458096 (493.38 it/sec) -training >> step=3280700, episode=547 reward=0.7709213 (537.35 it/sec) -training >> step=3280800, episode=547 reward=0.7524678 (534.85 it/sec) -training >> step=3280900, episode=547 reward=0.7609138 (544.74 it/sec) -training >> step=3281000, episode=547 reward=0.771534 (526.66 it/sec) -training >> step=3281100, episode=547 reward=0.773576 (517.95 it/sec) -training >> step=3281200, episode=547 reward=0.7589652 (521.20 it/sec) -training >> step=3281300, episode=548 reward=0.7567439 (139.74 it/sec) -training >> step=3281400, episode=548 reward=0.7600036 (417.38 it/sec) -training >> step=3281500, episode=548 reward=0.7615126 (530.55 it/sec) -training >> step=3281600, episode=548 reward=0.7732599 (513.51 it/sec) -training >> step=3281700, episode=548 reward=0.7768963 (485.86 it/sec) -training >> step=3281800, episode=548 reward=0.789764 (518.00 it/sec) -training >> step=3281900, episode=548 reward=0.7608411 (494.75 it/sec) -training >> step=3282000, episode=548 reward=0.7587736 (514.40 it/sec) -training >> step=3282100, episode=548 reward=0.782482 (531.11 it/sec) -training >> step=3282200, episode=548 reward=0.7624164 (487.61 it/sec) -training >> step=3282300, episode=548 reward=0.7809795 (508.61 it/sec) -training >> step=3282400, episode=548 reward=0.7652724 (528.20 it/sec) -training >> step=3282500, episode=548 reward=0.7674313 (472.85 it/sec) -training >> step=3282600, episode=548 reward=0.7873824 (552.74 it/sec) -training >> step=3282700, episode=548 reward=0.7771428 (501.10 it/sec) -training >> step=3282800, episode=548 reward=0.7612467 (459.78 it/sec) -training >> step=3282900, episode=548 reward=0.7676286 (527.24 it/sec) -training >> step=3283000, episode=548 reward=0.7647785 (469.09 it/sec) -training >> step=3283100, episode=548 reward=0.7797682 (446.98 it/sec) -training >> step=3283200, episode=548 reward=0.7662488 (467.94 it/sec) -training >> step=3283300, episode=548 reward=0.7684032 (471.95 it/sec) -training >> step=3283400, episode=548 reward=0.7641767 (365.62 it/sec) -training >> step=3283500, episode=548 reward=0.7759182 (497.42 it/sec) -training >> step=3283600, episode=548 reward=0.7766198 (446.78 it/sec) -training >> step=3283700, episode=548 reward=0.7527369 (579.21 it/sec) -training >> step=3283800, episode=548 reward=0.7738926 (521.28 it/sec) -training >> step=3283900, episode=548 reward=0.7730159 (459.30 it/sec) -training >> step=3284000, episode=548 reward=0.757024 (518.10 it/sec) -training >> step=3284100, episode=548 reward=0.7698668 (529.58 it/sec) -training >> step=3284200, episode=548 reward=0.773308 (533.66 it/sec) -training >> step=3284300, episode=548 reward=0.787602 (437.29 it/sec) -training >> step=3284400, episode=548 reward=0.7676128 (489.74 it/sec) -training >> step=3284500, episode=548 reward=0.7710561 (493.28 it/sec) -training >> step=3284600, episode=548 reward=0.7618381 (492.08 it/sec) -training >> step=3284700, episode=548 reward=0.7833074 (456.56 it/sec) -training >> step=3284800, episode=548 reward=0.7513233 (512.79 it/sec) -training >> step=3284900, episode=548 reward=0.7697186 (462.02 it/sec) -training >> step=3285000, episode=548 reward=0.7759191 (464.67 it/sec) -training >> step=3285100, episode=548 reward=0.7884404 (443.40 it/sec) -training >> step=3285200, episode=548 reward=0.7555506 (494.59 it/sec) -training >> step=3285300, episode=548 reward=0.7652524 (447.83 it/sec) -training >> step=3285400, episode=548 reward=0.7854326 (474.38 it/sec) -training >> step=3285500, episode=548 reward=0.7839775 (519.07 it/sec) -training >> step=3285600, episode=548 reward=0.7722533 (503.40 it/sec) -training >> step=3285700, episode=548 reward=0.800108 (469.60 it/sec) -training >> step=3285800, episode=548 reward=0.7583612 (471.16 it/sec) -training >> step=3285900, episode=548 reward=0.764169 (449.16 it/sec) -training >> step=3286000, episode=548 reward=0.7793736 (509.91 it/sec) -training >> step=3286100, episode=548 reward=0.7457668 (509.57 it/sec) -training >> step=3286200, episode=548 reward=0.7814468 (552.10 it/sec) -training >> step=3286300, episode=548 reward=0.7772945 (480.74 it/sec) -training >> step=3286400, episode=548 reward=0.7595542 (480.06 it/sec) -training >> step=3286500, episode=548 reward=0.7681194 (502.40 it/sec) -training >> step=3286600, episode=548 reward=0.7515507 (504.16 it/sec) -training >> step=3286700, episode=548 reward=0.7621464 (470.71 it/sec) -training >> step=3286800, episode=548 reward=0.7498983 (452.41 it/sec) -training >> step=3286900, episode=548 reward=0.7478432 (485.46 it/sec) -training >> step=3287000, episode=548 reward=0.7354468 (481.73 it/sec) -training >> step=3287100, episode=548 reward=0.7565246 (447.95 it/sec) -training >> step=3287200, episode=548 reward=0.7812854 (472.51 it/sec) -training >> step=3287300, episode=549 reward=0.7580193 (97.15 it/sec) -training >> step=3287400, episode=549 reward=0.753644 (316.99 it/sec) -training >> step=3287500, episode=549 reward=0.7468227 (457.59 it/sec) -training >> step=3287600, episode=549 reward=0.7830914 (486.34 it/sec) -training >> step=3287700, episode=549 reward=0.7614182 (474.36 it/sec) -training >> step=3287800, episode=549 reward=0.7797177 (458.54 it/sec) -training >> step=3287900, episode=549 reward=0.7701936 (456.04 it/sec) -training >> step=3288000, episode=549 reward=0.7586409 (476.25 it/sec) -training >> step=3288100, episode=549 reward=0.777258 (475.40 it/sec) -training >> step=3288200, episode=549 reward=0.7843856 (509.45 it/sec) -training >> step=3288300, episode=549 reward=0.7728699 (475.83 it/sec) -training >> step=3288400, episode=549 reward=0.753297 (470.85 it/sec) -training >> step=3288500, episode=549 reward=0.772053 (454.90 it/sec) -training >> step=3288600, episode=549 reward=0.7866091 (485.33 it/sec) -training >> step=3288700, episode=549 reward=0.7741188 (462.80 it/sec) -training >> step=3288800, episode=549 reward=0.7677856 (496.34 it/sec) -training >> step=3288900, episode=549 reward=0.7749779 (424.35 it/sec) -training >> step=3289000, episode=549 reward=0.7802566 (471.33 it/sec) -training >> step=3289100, episode=549 reward=0.7737048 (439.22 it/sec) -training >> step=3289200, episode=549 reward=0.7737055 (489.51 it/sec) -training >> step=3289300, episode=549 reward=0.7766247 (487.65 it/sec) -training >> step=3289400, episode=549 reward=0.7781675 (450.19 it/sec) -training >> step=3289500, episode=549 reward=0.7726974 (481.95 it/sec) -training >> step=3289600, episode=549 reward=0.775149 (471.57 it/sec) -training >> step=3289700, episode=549 reward=0.7609126 (496.47 it/sec) -training >> step=3289800, episode=549 reward=0.758827 (454.59 it/sec) -training >> step=3289900, episode=549 reward=0.768446 (467.73 it/sec) -training >> step=3290000, episode=549 reward=0.7639038 (511.84 it/sec) -training >> step=3290100, episode=549 reward=0.7856063 (513.59 it/sec) -training >> step=3290200, episode=549 reward=0.773654 (503.28 it/sec) -training >> step=3290300, episode=549 reward=0.7633143 (535.32 it/sec) -training >> step=3290400, episode=549 reward=0.7928583 (446.62 it/sec) -training >> step=3290500, episode=549 reward=0.7778476 (498.65 it/sec) -training >> step=3290600, episode=549 reward=0.7794269 (470.48 it/sec) -training >> step=3290700, episode=549 reward=0.7685128 (516.00 it/sec) -training >> step=3290800, episode=549 reward=0.7556514 (497.20 it/sec) -training >> step=3290900, episode=549 reward=0.7665021 (499.89 it/sec) -training >> step=3291000, episode=549 reward=0.788261 (451.11 it/sec) -training >> step=3291100, episode=549 reward=0.7754815 (481.90 it/sec) -training >> step=3291200, episode=549 reward=0.7740054 (512.83 it/sec) -training >> step=3291300, episode=549 reward=0.7795149 (545.48 it/sec) -training >> step=3291400, episode=549 reward=0.7878624 (508.46 it/sec) -training >> step=3291500, episode=549 reward=0.7717713 (553.81 it/sec) -training >> step=3291600, episode=549 reward=0.7682445 (547.86 it/sec) -training >> step=3291700, episode=549 reward=0.7479945 (517.82 it/sec) -training >> step=3291800, episode=549 reward=0.7763416 (502.02 it/sec) -training >> step=3291900, episode=549 reward=0.7968504 (415.39 it/sec) -training >> step=3292000, episode=549 reward=0.7725825 (458.02 it/sec) -training >> step=3292100, episode=549 reward=0.7634858 (497.19 it/sec) -training >> step=3292200, episode=549 reward=0.7713583 (549.49 it/sec) -training >> step=3292300, episode=549 reward=0.7596076 (490.59 it/sec) -training >> step=3292400, episode=549 reward=0.775629 (451.50 it/sec) -training >> step=3292500, episode=549 reward=0.7683931 (493.83 it/sec) -training >> step=3292600, episode=549 reward=0.7360849 (478.54 it/sec) -training >> step=3292700, episode=549 reward=0.7827695 (524.41 it/sec) -training >> step=3292800, episode=549 reward=0.7716201 (483.24 it/sec) -training >> step=3292900, episode=549 reward=0.7681764 (444.93 it/sec) -training >> step=3293000, episode=549 reward=0.7652761 (544.72 it/sec) -training >> step=3293100, episode=549 reward=0.7554445 (522.73 it/sec) -training >> step=3293200, episode=549 reward=0.7430749 (516.44 it/sec) -training >> step=3293300, episode=550 reward=0.7691126 (100.55 it/sec) -training >> step=3293400, episode=550 reward=0.7577384 (460.12 it/sec) -training >> step=3293500, episode=550 reward=0.7602521 (512.83 it/sec) -training >> step=3293600, episode=550 reward=0.7848749 (503.20 it/sec) -training >> step=3293700, episode=550 reward=0.7612867 (373.46 it/sec) -training >> step=3293800, episode=550 reward=0.762296 (498.17 it/sec) -training >> step=3293900, episode=550 reward=0.7812064 (526.42 it/sec) -training >> step=3294000, episode=550 reward=0.764344 (452.62 it/sec) -training >> step=3294100, episode=550 reward=0.7638513 (425.02 it/sec) -training >> step=3294200, episode=550 reward=0.7480748 (532.46 it/sec) -training >> step=3294300, episode=550 reward=0.7875612 (420.09 it/sec) -training >> step=3294400, episode=550 reward=0.7924708 (539.16 it/sec) -training >> step=3294500, episode=550 reward=0.7713631 (507.42 it/sec) -training >> step=3294600, episode=550 reward=0.7698254 (531.96 it/sec) -training >> step=3294700, episode=550 reward=0.7606945 (514.27 it/sec) -training >> step=3294800, episode=550 reward=0.7534752 (509.10 it/sec) -training >> step=3294900, episode=550 reward=0.7619227 (512.89 it/sec) -training >> step=3295000, episode=550 reward=0.7811726 (502.27 it/sec) -training >> step=3295100, episode=550 reward=0.7725991 (521.25 it/sec) -training >> step=3295200, episode=550 reward=0.7771483 (515.26 it/sec) -training >> step=3295300, episode=550 reward=0.7771233 (528.55 it/sec) -training >> step=3295400, episode=550 reward=0.7878023 (549.55 it/sec) -training >> step=3295500, episode=550 reward=0.7857452 (570.20 it/sec) -training >> step=3295600, episode=550 reward=0.780921 (437.66 it/sec) -training >> step=3295700, episode=550 reward=0.7927015 (510.62 it/sec) -training >> step=3295800, episode=550 reward=0.7722223 (426.58 it/sec) -training >> step=3295900, episode=550 reward=0.8023357 (510.76 it/sec) -training >> step=3296000, episode=550 reward=0.7608915 (545.32 it/sec) -training >> step=3296100, episode=550 reward=0.7636144 (525.32 it/sec) -training >> step=3296200, episode=550 reward=0.7810156 (569.81 it/sec) -training >> step=3296300, episode=550 reward=0.7733985 (530.02 it/sec) -training >> step=3296400, episode=550 reward=0.7837251 (520.13 it/sec) -training >> step=3296500, episode=550 reward=0.7876263 (498.81 it/sec) -training >> step=3296600, episode=550 reward=0.780766 (481.00 it/sec) -training >> step=3296700, episode=550 reward=0.7863964 (492.00 it/sec) -training >> step=3296800, episode=550 reward=0.7754677 (504.23 it/sec) -training >> step=3296900, episode=550 reward=0.7362004 (441.01 it/sec) -training >> step=3297000, episode=550 reward=0.7635017 (464.26 it/sec) -training >> step=3297100, episode=550 reward=0.7571704 (416.78 it/sec) -training >> step=3297200, episode=550 reward=0.760726 (495.59 it/sec) -training >> step=3297300, episode=550 reward=0.7826727 (427.04 it/sec) -training >> step=3297400, episode=550 reward=0.779763 (494.37 it/sec) -training >> step=3297500, episode=550 reward=0.7685109 (517.17 it/sec) -training >> step=3297600, episode=550 reward=0.7718915 (506.83 it/sec) -training >> step=3297700, episode=550 reward=0.7760227 (536.63 it/sec) -training >> step=3297800, episode=550 reward=0.797392 (501.23 it/sec) -training >> step=3297900, episode=550 reward=0.7594146 (532.34 it/sec) -training >> step=3298000, episode=550 reward=0.7742965 (507.87 it/sec) -training >> step=3298100, episode=550 reward=0.769342 (481.12 it/sec) -training >> step=3298200, episode=550 reward=0.7601249 (508.56 it/sec) -training >> step=3298300, episode=550 reward=0.7655314 (535.67 it/sec) -training >> step=3298400, episode=550 reward=0.7717538 (526.72 it/sec) -training >> step=3298500, episode=550 reward=0.7731677 (514.60 it/sec) -training >> step=3298600, episode=550 reward=0.7562807 (550.77 it/sec) -training >> step=3298700, episode=550 reward=0.7506498 (503.51 it/sec) -training >> step=3298800, episode=550 reward=0.773405 (490.74 it/sec) -training >> step=3298900, episode=550 reward=0.7666543 (497.92 it/sec) -training >> step=3299000, episode=550 reward=0.7652919 (533.20 it/sec) -training >> step=3299100, episode=550 reward=0.7583934 (517.57 it/sec) -training >> step=3299200, episode=550 reward=0.7690273 (490.23 it/sec) -training >> step=3299300, episode=551 reward=0.7884631 (128.23 it/sec) -training >> step=3299400, episode=551 reward=0.7960334 (496.49 it/sec) -training >> step=3299500, episode=551 reward=0.7695127 (488.39 it/sec) -training >> step=3299600, episode=551 reward=0.7792879 (519.95 it/sec) -training >> step=3299700, episode=551 reward=0.7752757 (501.56 it/sec) -training >> step=3299800, episode=551 reward=0.764545 (379.71 it/sec) -training >> step=3299900, episode=551 reward=0.7598203 (463.37 it/sec) -training >> step=3300000, episode=551 reward=0.7802318 (502.90 it/sec) -training >> step=3300100, episode=551 reward=0.7462345 (549.74 it/sec) -training >> step=3300200, episode=551 reward=0.7620984 (545.49 it/sec) -training >> step=3300300, episode=551 reward=0.7675132 (555.07 it/sec) -training >> step=3300400, episode=551 reward=0.7644552 (500.81 it/sec) -training >> step=3300500, episode=551 reward=0.7784023 (500.00 it/sec) -training >> step=3300600, episode=551 reward=0.7700789 (492.17 it/sec) -training >> step=3300700, episode=551 reward=0.7670584 (524.67 it/sec) -training >> step=3300800, episode=551 reward=0.7761809 (547.39 it/sec) -training >> step=3300900, episode=551 reward=0.7563472 (535.79 it/sec) -training >> step=3301000, episode=551 reward=0.7840542 (526.58 it/sec) -training >> step=3301100, episode=551 reward=0.7728931 (538.22 it/sec) -training >> step=3301200, episode=551 reward=0.7849813 (500.03 it/sec) -training >> step=3301300, episode=551 reward=0.7746131 (477.14 it/sec) -training >> step=3301400, episode=551 reward=0.7668161 (512.35 it/sec) -training >> step=3301500, episode=551 reward=0.7790367 (514.14 it/sec) -training >> step=3301600, episode=551 reward=0.7702545 (516.61 it/sec) -training >> step=3301700, episode=551 reward=0.7626879 (491.79 it/sec) -training >> step=3301800, episode=551 reward=0.766713 (529.02 it/sec) -training >> step=3301900, episode=551 reward=0.7619197 (497.51 it/sec) -training >> step=3302000, episode=551 reward=0.7755076 (472.26 it/sec) -training >> step=3302100, episode=551 reward=0.7729393 (490.81 it/sec) -training >> step=3302200, episode=551 reward=0.7844039 (531.99 it/sec) -training >> step=3302300, episode=551 reward=0.7804457 (507.92 it/sec) -training >> step=3302400, episode=551 reward=0.7653958 (487.32 it/sec) -training >> step=3302500, episode=551 reward=0.7772094 (532.04 it/sec) -training >> step=3302600, episode=551 reward=0.78507 (505.38 it/sec) -training >> step=3302700, episode=551 reward=0.7928976 (531.99 it/sec) -training >> step=3302800, episode=551 reward=0.7762286 (501.44 it/sec) -training >> step=3302900, episode=551 reward=0.7649013 (529.95 it/sec) -training >> step=3303000, episode=551 reward=0.7748159 (524.55 it/sec) -training >> step=3303100, episode=551 reward=0.7717504 (482.00 it/sec) -training >> step=3303200, episode=551 reward=0.7762272 (517.11 it/sec) -training >> step=3303300, episode=551 reward=0.760076 (560.23 it/sec) -training >> step=3303400, episode=551 reward=0.7500324 (515.26 it/sec) -training >> step=3303500, episode=551 reward=0.7611043 (527.29 it/sec) -training >> step=3303600, episode=551 reward=0.7604159 (498.52 it/sec) -training >> step=3303700, episode=551 reward=0.7744602 (511.57 it/sec) -training >> step=3303800, episode=551 reward=0.7565817 (504.90 it/sec) -training >> step=3303900, episode=551 reward=0.7478779 (519.66 it/sec) -training >> step=3304000, episode=551 reward=0.7502158 (516.10 it/sec) -training >> step=3304100, episode=551 reward=0.7468179 (481.10 it/sec) -training >> step=3304200, episode=551 reward=0.7902271 (486.93 it/sec) -training >> step=3304300, episode=551 reward=0.7295809 (547.90 it/sec) -training >> step=3304400, episode=551 reward=0.7699085 (480.32 it/sec) -training >> step=3304500, episode=551 reward=0.7609468 (529.00 it/sec) -training >> step=3304600, episode=551 reward=0.7548111 (524.46 it/sec) -training >> step=3304700, episode=551 reward=0.7656026 (509.00 it/sec) -training >> step=3304800, episode=551 reward=0.7729005 (508.89 it/sec) -training >> step=3304900, episode=551 reward=0.7657771 (513.83 it/sec) -training >> step=3305000, episode=551 reward=0.7822524 (525.14 it/sec) -training >> step=3305100, episode=551 reward=0.7743153 (549.56 it/sec) -training >> step=3305200, episode=551 reward=0.7539191 (492.44 it/sec) -training >> step=3305300, episode=552 reward=0.7624543 (148.32 it/sec) -training >> step=3305400, episode=552 reward=0.7866685 (533.43 it/sec) -training >> step=3305500, episode=552 reward=0.7436059 (501.96 it/sec) -training >> step=3305600, episode=552 reward=0.781991 (488.62 it/sec) -training >> step=3305700, episode=552 reward=0.7875297 (530.32 it/sec) -training >> step=3305800, episode=552 reward=0.7508461 (534.79 it/sec) -training >> step=3305900, episode=552 reward=0.7837808 (532.73 it/sec) -training >> step=3306000, episode=552 reward=0.7874728 (491.88 it/sec) -training >> step=3306100, episode=552 reward=0.7798054 (362.41 it/sec) -training >> step=3306200, episode=552 reward=0.7553151 (501.98 it/sec) -training >> step=3306300, episode=552 reward=0.7648423 (511.07 it/sec) -training >> step=3306400, episode=552 reward=0.7880857 (534.67 it/sec) -training >> step=3306500, episode=552 reward=0.752301 (526.27 it/sec) -training >> step=3306600, episode=552 reward=0.7737682 (513.45 it/sec) -training >> step=3306700, episode=552 reward=0.7798244 (522.36 it/sec) -training >> step=3306800, episode=552 reward=0.7689498 (548.31 it/sec) -training >> step=3306900, episode=552 reward=0.7732919 (538.95 it/sec) -training >> step=3307000, episode=552 reward=0.7616587 (487.49 it/sec) -training >> step=3307100, episode=552 reward=0.7490603 (525.16 it/sec) -training >> step=3307200, episode=552 reward=0.7737243 (552.62 it/sec) -training >> step=3307300, episode=552 reward=0.75713 (520.09 it/sec) -training >> step=3307400, episode=552 reward=0.7892532 (525.86 it/sec) -training >> step=3307500, episode=552 reward=0.7784009 (491.93 it/sec) -training >> step=3307600, episode=552 reward=0.7905843 (467.43 it/sec) -training >> step=3307700, episode=552 reward=0.7506812 (498.57 it/sec) -training >> step=3307800, episode=552 reward=0.7673083 (498.50 it/sec) -training >> step=3307900, episode=552 reward=0.7613604 (556.85 it/sec) -training >> step=3308000, episode=552 reward=0.7667676 (538.94 it/sec) -training >> step=3308100, episode=552 reward=0.7639945 (499.03 it/sec) -training >> step=3308200, episode=552 reward=0.7768788 (509.82 it/sec) -training >> step=3308300, episode=552 reward=0.7662848 (538.89 it/sec) -training >> step=3308400, episode=552 reward=0.7736614 (490.71 it/sec) -training >> step=3308500, episode=552 reward=0.7828965 (494.11 it/sec) -training >> step=3308600, episode=552 reward=0.7838011 (487.29 it/sec) -training >> step=3308700, episode=552 reward=0.7856326 (554.34 it/sec) -training >> step=3308800, episode=552 reward=0.787915 (525.01 it/sec) -training >> step=3308900, episode=552 reward=0.7600166 (492.15 it/sec) -training >> step=3309000, episode=552 reward=0.7634135 (518.69 it/sec) -training >> step=3309100, episode=552 reward=0.7867032 (514.28 it/sec) -training >> step=3309200, episode=552 reward=0.7630969 (519.05 it/sec) -training >> step=3309300, episode=552 reward=0.7693417 (528.55 it/sec) -training >> step=3309400, episode=552 reward=0.7878982 (536.63 it/sec) -training >> step=3309500, episode=552 reward=0.7779097 (511.94 it/sec) -training >> step=3309600, episode=552 reward=0.78176 (504.49 it/sec) -training >> step=3309700, episode=552 reward=0.7754093 (502.33 it/sec) -training >> step=3309800, episode=552 reward=0.7774947 (539.99 it/sec) -training >> step=3309900, episode=552 reward=0.7480937 (530.54 it/sec) -training >> step=3310000, episode=552 reward=0.7767606 (523.41 it/sec) -training >> step=3310100, episode=552 reward=0.7846872 (526.99 it/sec) -training >> step=3310200, episode=552 reward=0.7689092 (498.16 it/sec) -training >> step=3310300, episode=552 reward=0.7578103 (554.53 it/sec) -training >> step=3310400, episode=552 reward=0.7838801 (506.36 it/sec) -training >> step=3310500, episode=552 reward=0.7583268 (585.17 it/sec) -training >> step=3310600, episode=552 reward=0.7662617 (500.31 it/sec) -training >> step=3310700, episode=552 reward=0.7669276 (530.12 it/sec) -training >> step=3310800, episode=552 reward=0.7588843 (542.89 it/sec) -training >> step=3310900, episode=552 reward=0.7701308 (497.15 it/sec) -training >> step=3311000, episode=552 reward=0.7601742 (493.77 it/sec) -training >> step=3311100, episode=552 reward=0.7642661 (532.07 it/sec) -training >> step=3311200, episode=552 reward=0.7494084 (525.20 it/sec) -training >> step=3311300, episode=553 reward=0.7562519 (110.87 it/sec) -training >> step=3311400, episode=553 reward=0.7902597 (414.76 it/sec) -training >> step=3311500, episode=553 reward=0.7774237 (411.07 it/sec) -training >> step=3311600, episode=553 reward=0.7711572 (454.22 it/sec) -training >> step=3311700, episode=553 reward=0.7569022 (494.16 it/sec) -training >> step=3311800, episode=553 reward=0.7850329 (486.60 it/sec) -training >> step=3311900, episode=553 reward=0.756637 (492.86 it/sec) -training >> step=3312000, episode=553 reward=0.7856246 (523.83 it/sec) -training >> step=3312100, episode=553 reward=0.77901 (518.11 it/sec) -training >> step=3312200, episode=553 reward=0.7684547 (405.72 it/sec) -training >> step=3312300, episode=553 reward=0.7827959 (508.34 it/sec) -training >> step=3312400, episode=553 reward=0.7933402 (518.69 it/sec) -training >> step=3312500, episode=553 reward=0.7752619 (519.19 it/sec) -training >> step=3312600, episode=553 reward=0.7526037 (537.21 it/sec) -training >> step=3312700, episode=553 reward=0.7615936 (561.44 it/sec) -training >> step=3312800, episode=553 reward=0.7769574 (519.96 it/sec) -training >> step=3312900, episode=553 reward=0.7740951 (517.25 it/sec) -training >> step=3313000, episode=553 reward=0.7714481 (511.32 it/sec) -training >> step=3313100, episode=553 reward=0.7821056 (517.02 it/sec) -training >> step=3313200, episode=553 reward=0.7629671 (470.06 it/sec) -training >> step=3313300, episode=553 reward=0.7852582 (524.20 it/sec) -training >> step=3313400, episode=553 reward=0.747573 (526.61 it/sec) -training >> step=3313500, episode=553 reward=0.7782128 (493.64 it/sec) -training >> step=3313600, episode=553 reward=0.7768191 (497.81 it/sec) -training >> step=3313700, episode=553 reward=0.7864636 (514.04 it/sec) -training >> step=3313800, episode=553 reward=0.7920859 (461.97 it/sec) -training >> step=3313900, episode=553 reward=0.7712435 (506.67 it/sec) -training >> step=3314000, episode=553 reward=0.7657262 (492.31 it/sec) -training >> step=3314100, episode=553 reward=0.7702751 (532.40 it/sec) -training >> step=3314200, episode=553 reward=0.7800794 (541.64 it/sec) -training >> step=3314300, episode=553 reward=0.7690698 (445.31 it/sec) -training >> step=3314400, episode=553 reward=0.7702308 (552.85 it/sec) -training >> step=3314500, episode=553 reward=0.7705473 (500.67 it/sec) -training >> step=3314600, episode=553 reward=0.7563177 (507.48 it/sec) -training >> step=3314700, episode=553 reward=0.7706236 (534.42 it/sec) -training >> step=3314800, episode=553 reward=0.7748102 (537.68 it/sec) -training >> step=3314900, episode=553 reward=0.7465382 (536.38 it/sec) -training >> step=3315000, episode=553 reward=0.7843893 (491.55 it/sec) -training >> step=3315100, episode=553 reward=0.7760221 (526.40 it/sec) -training >> step=3315200, episode=553 reward=0.7622368 (539.84 it/sec) -training >> step=3315300, episode=553 reward=0.7863526 (520.40 it/sec) -training >> step=3315400, episode=553 reward=0.7707917 (491.72 it/sec) -training >> step=3315500, episode=553 reward=0.7802939 (534.35 it/sec) -training >> step=3315600, episode=553 reward=0.7676326 (482.92 it/sec) -training >> step=3315700, episode=553 reward=0.7730868 (541.51 it/sec) -training >> step=3315800, episode=553 reward=0.7565044 (522.44 it/sec) -training >> step=3315900, episode=553 reward=0.7824022 (552.00 it/sec) -training >> step=3316000, episode=553 reward=0.7501299 (501.54 it/sec) -training >> step=3316100, episode=553 reward=0.7519931 (470.28 it/sec) -training >> step=3316200, episode=553 reward=0.7821728 (522.13 it/sec) -training >> step=3316300, episode=553 reward=0.777557 (503.83 it/sec) -training >> step=3316400, episode=553 reward=0.7941155 (556.78 it/sec) -training >> step=3316500, episode=553 reward=0.7620003 (491.42 it/sec) -training >> step=3316600, episode=553 reward=0.7696902 (540.40 it/sec) -training >> step=3316700, episode=553 reward=0.769125 (503.50 it/sec) -training >> step=3316800, episode=553 reward=0.7911992 (498.18 it/sec) -training >> step=3316900, episode=553 reward=0.7688624 (555.52 it/sec) -training >> step=3317000, episode=553 reward=0.7303507 (522.64 it/sec) -training >> step=3317100, episode=553 reward=0.7743438 (486.81 it/sec) -training >> step=3317200, episode=553 reward=0.7888736 (470.02 it/sec) -training >> step=3317300, episode=554 reward=0.7797241 (134.10 it/sec) -training >> step=3317400, episode=554 reward=0.7571468 (490.34 it/sec) -training >> step=3317500, episode=554 reward=0.7530121 (532.65 it/sec) -training >> step=3317600, episode=554 reward=0.7815031 (505.44 it/sec) -training >> step=3317700, episode=554 reward=0.7857096 (530.70 it/sec) -training >> step=3317800, episode=554 reward=0.7568376 (498.28 it/sec) -training >> step=3317900, episode=554 reward=0.7787219 (497.14 it/sec) -training >> step=3318000, episode=554 reward=0.7695405 (539.30 it/sec) -training >> step=3318100, episode=554 reward=0.7693177 (492.68 it/sec) -training >> step=3318200, episode=554 reward=0.7577288 (537.37 it/sec) -training >> step=3318300, episode=554 reward=0.7714285 (374.34 it/sec) -training >> step=3318400, episode=554 reward=0.7845614 (510.29 it/sec) -training >> step=3318500, episode=554 reward=0.7676455 (521.47 it/sec) -training >> step=3318600, episode=554 reward=0.7455429 (503.22 it/sec) -training >> step=3318700, episode=554 reward=0.7708746 (578.46 it/sec) -training >> step=3318800, episode=554 reward=0.7695185 (511.33 it/sec) -training >> step=3318900, episode=554 reward=0.77191 (481.37 it/sec) -training >> step=3319000, episode=554 reward=0.7675659 (517.80 it/sec) -training >> step=3319100, episode=554 reward=0.7723086 (524.55 it/sec) -training >> step=3319200, episode=554 reward=0.7642736 (512.32 it/sec) -training >> step=3319300, episode=554 reward=0.7620419 (530.28 it/sec) -training >> step=3319400, episode=554 reward=0.7820126 (527.07 it/sec) -training >> step=3319500, episode=554 reward=0.7613174 (508.85 it/sec) -training >> step=3319600, episode=554 reward=0.75171 (516.67 it/sec) -training >> step=3319700, episode=554 reward=0.7804089 (538.80 it/sec) -training >> step=3319800, episode=554 reward=0.774513 (542.53 it/sec) -training >> step=3319900, episode=554 reward=0.7883683 (503.20 it/sec) -training >> step=3320000, episode=554 reward=0.7834144 (528.76 it/sec) -training >> step=3320100, episode=554 reward=0.7840216 (523.66 it/sec) -training >> step=3320200, episode=554 reward=0.7546047 (527.26 it/sec) -training >> step=3320300, episode=554 reward=0.7697226 (541.04 it/sec) -training >> step=3320400, episode=554 reward=0.7635412 (497.88 it/sec) -training >> step=3320500, episode=554 reward=0.7642014 (512.60 it/sec) -training >> step=3320600, episode=554 reward=0.7776798 (494.43 it/sec) -training >> step=3320700, episode=554 reward=0.7868618 (507.26 it/sec) -training >> step=3320800, episode=554 reward=0.7697742 (491.09 it/sec) -training >> step=3320900, episode=554 reward=0.755084 (468.61 it/sec) -training >> step=3321000, episode=554 reward=0.7638836 (478.88 it/sec) -training >> step=3321100, episode=554 reward=0.7768968 (469.64 it/sec) -training >> step=3321200, episode=554 reward=0.7785292 (532.74 it/sec) -training >> step=3321300, episode=554 reward=0.7865248 (544.04 it/sec) -training >> step=3321400, episode=554 reward=0.7812383 (462.71 it/sec) -training >> step=3321500, episode=554 reward=0.7698619 (424.66 it/sec) -training >> step=3321600, episode=554 reward=0.7930167 (502.06 it/sec) -training >> step=3321700, episode=554 reward=0.7745847 (542.31 it/sec) -training >> step=3321800, episode=554 reward=0.7735963 (497.69 it/sec) -training >> step=3321900, episode=554 reward=0.7597401 (493.91 it/sec) -training >> step=3322000, episode=554 reward=0.7844773 (462.01 it/sec) -training >> step=3322100, episode=554 reward=0.7774802 (425.53 it/sec) -training >> step=3322200, episode=554 reward=0.7674911 (430.73 it/sec) -training >> step=3322300, episode=554 reward=0.776416 (482.15 it/sec) -training >> step=3322400, episode=554 reward=0.7540642 (482.98 it/sec) -training >> step=3322500, episode=554 reward=0.7729443 (477.54 it/sec) -training >> step=3322600, episode=554 reward=0.7506758 (441.72 it/sec) -training >> step=3322700, episode=554 reward=0.7620773 (506.20 it/sec) -training >> step=3322800, episode=554 reward=0.7809237 (476.79 it/sec) -training >> step=3322900, episode=554 reward=0.7460729 (437.49 it/sec) -training >> step=3323000, episode=554 reward=0.7653844 (454.92 it/sec) -training >> step=3323100, episode=554 reward=0.7566227 (462.53 it/sec) -training >> step=3323200, episode=554 reward=0.7603694 (463.05 it/sec) -training >> step=3323300, episode=555 reward=0.7656463 (108.45 it/sec) -training >> step=3323400, episode=555 reward=0.7756546 (376.31 it/sec) -training >> step=3323500, episode=555 reward=0.768849 (445.95 it/sec) -training >> step=3323600, episode=555 reward=0.7768928 (509.37 it/sec) -training >> step=3323700, episode=555 reward=0.7828914 (461.57 it/sec) -training >> step=3323800, episode=555 reward=0.7842863 (588.60 it/sec) -training >> step=3323900, episode=555 reward=0.7858943 (477.39 it/sec) -training >> step=3324000, episode=555 reward=0.7568659 (512.09 it/sec) -training >> step=3324100, episode=555 reward=0.7735451 (446.81 it/sec) -training >> step=3324200, episode=555 reward=0.7819204 (432.48 it/sec) -training >> step=3324300, episode=555 reward=0.7408738 (496.53 it/sec) -training >> step=3324400, episode=555 reward=0.7626805 (356.12 it/sec) -training >> step=3324500, episode=555 reward=0.7869241 (513.84 it/sec) -training >> step=3324600, episode=555 reward=0.7790286 (448.40 it/sec) -training >> step=3324700, episode=555 reward=0.7809818 (523.38 it/sec) -training >> step=3324800, episode=555 reward=0.7906904 (454.76 it/sec) -training >> step=3324900, episode=555 reward=0.7794396 (508.63 it/sec) -training >> step=3325000, episode=555 reward=0.7570464 (521.92 it/sec) -training >> step=3325100, episode=555 reward=0.7776521 (527.03 it/sec) -training >> step=3325200, episode=555 reward=0.7980369 (517.69 it/sec) -training >> step=3325300, episode=555 reward=0.7840946 (469.56 it/sec) -training >> step=3325400, episode=555 reward=0.7694626 (462.28 it/sec) -training >> step=3325500, episode=555 reward=0.7620571 (485.29 it/sec) -training >> step=3325600, episode=555 reward=0.7488971 (463.86 it/sec) -training >> step=3325700, episode=555 reward=0.7922719 (431.29 it/sec) -training >> step=3325800, episode=555 reward=0.7473357 (403.87 it/sec) -training >> step=3325900, episode=555 reward=0.7493883 (422.18 it/sec) -training >> step=3326000, episode=555 reward=0.7293447 (477.93 it/sec) -training >> step=3326100, episode=555 reward=0.7693102 (494.83 it/sec) -training >> step=3326200, episode=555 reward=0.7761222 (503.82 it/sec) -training >> step=3326300, episode=555 reward=0.771464 (473.24 it/sec) -training >> step=3326400, episode=555 reward=0.7797329 (473.42 it/sec) -training >> step=3326500, episode=555 reward=0.7501404 (467.12 it/sec) -training >> step=3326600, episode=555 reward=0.7733366 (533.01 it/sec) -training >> step=3326700, episode=555 reward=0.7644207 (550.77 it/sec) -training >> step=3326800, episode=555 reward=0.7547183 (526.31 it/sec) -training >> step=3326900, episode=555 reward=0.7848062 (517.30 it/sec) -training >> step=3327000, episode=555 reward=0.7619097 (503.14 it/sec) -training >> step=3327100, episode=555 reward=0.7785293 (495.41 it/sec) -training >> step=3327200, episode=555 reward=0.7689625 (535.74 it/sec) -training >> step=3327300, episode=555 reward=0.7453429 (497.68 it/sec) -training >> step=3327400, episode=555 reward=0.783056 (433.33 it/sec) -training >> step=3327500, episode=555 reward=0.7801826 (440.84 it/sec) -training >> step=3327600, episode=555 reward=0.7643497 (432.62 it/sec) -training >> step=3327700, episode=555 reward=0.7689073 (504.49 it/sec) -training >> step=3327800, episode=555 reward=0.7754923 (467.39 it/sec) -training >> step=3327900, episode=555 reward=0.7429219 (446.62 it/sec) -training >> step=3328000, episode=555 reward=0.7729062 (473.88 it/sec) -training >> step=3328100, episode=555 reward=0.7514012 (512.15 it/sec) -training >> step=3328200, episode=555 reward=0.7690359 (515.26 it/sec) -training >> step=3328300, episode=555 reward=0.7895489 (528.14 it/sec) -training >> step=3328400, episode=555 reward=0.7755746 (469.78 it/sec) -training >> step=3328500, episode=555 reward=0.7627572 (512.17 it/sec) -training >> step=3328600, episode=555 reward=0.7630586 (455.46 it/sec) -training >> step=3328700, episode=555 reward=0.7787206 (564.06 it/sec) -training >> step=3328800, episode=555 reward=0.7562005 (510.35 it/sec) -training >> step=3328900, episode=555 reward=0.7634775 (530.53 it/sec) -training >> step=3329000, episode=555 reward=0.7700249 (540.54 it/sec) -training >> step=3329100, episode=555 reward=0.7403509 (461.53 it/sec) -training >> step=3329200, episode=555 reward=0.7578572 (471.91 it/sec) -training >> step=3329300, episode=556 reward=0.7724867 (111.54 it/sec) -training >> step=3329400, episode=556 reward=0.7797822 (544.54 it/sec) -training >> step=3329500, episode=556 reward=0.7658281 (486.05 it/sec) -training >> step=3329600, episode=556 reward=0.764336 (511.48 it/sec) -training >> step=3329700, episode=556 reward=0.766738 (531.60 it/sec) -training >> step=3329800, episode=556 reward=0.7684186 (484.25 it/sec) -training >> step=3329900, episode=556 reward=0.7711847 (508.98 it/sec) -training >> step=3330000, episode=556 reward=0.7531586 (513.81 it/sec) -training >> step=3330100, episode=556 reward=0.7850816 (488.44 it/sec) -training >> step=3330200, episode=556 reward=0.750117 (550.14 it/sec) -training >> step=3330300, episode=556 reward=0.7677678 (483.98 it/sec) -training >> step=3330400, episode=556 reward=0.7481689 (504.56 it/sec) -training >> step=3330500, episode=556 reward=0.7800418 (375.50 it/sec) -training >> step=3330600, episode=556 reward=0.7534281 (490.84 it/sec) -training >> step=3330700, episode=556 reward=0.7500135 (520.12 it/sec) -training >> step=3330800, episode=556 reward=0.786599 (514.93 it/sec) -training >> step=3330900, episode=556 reward=0.7735906 (571.66 it/sec) -training >> step=3331000, episode=556 reward=0.7746851 (533.97 it/sec) -training >> step=3331100, episode=556 reward=0.7749819 (487.23 it/sec) -training >> step=3331200, episode=556 reward=0.782913 (452.50 it/sec) -training >> step=3331300, episode=556 reward=0.7744648 (470.06 it/sec) -training >> step=3331400, episode=556 reward=0.7669395 (468.89 it/sec) -training >> step=3331500, episode=556 reward=0.7562249 (446.22 it/sec) -training >> step=3331600, episode=556 reward=0.7654511 (457.79 it/sec) -training >> step=3331700, episode=556 reward=0.8015799 (440.61 it/sec) -training >> step=3331800, episode=556 reward=0.7502136 (422.31 it/sec) -training >> step=3331900, episode=556 reward=0.7769703 (472.03 it/sec) -training >> step=3332000, episode=556 reward=0.7895095 (475.79 it/sec) -training >> step=3332100, episode=556 reward=0.7765095 (529.96 it/sec) -training >> step=3332200, episode=556 reward=0.7680367 (481.10 it/sec) -training >> step=3332300, episode=556 reward=0.7921658 (492.89 it/sec) -training >> step=3332400, episode=556 reward=0.778047 (511.66 it/sec) -training >> step=3332500, episode=556 reward=0.7851676 (474.99 it/sec) -training >> step=3332600, episode=556 reward=0.7426305 (485.39 it/sec) -training >> step=3332700, episode=556 reward=0.7528381 (510.58 it/sec) -training >> step=3332800, episode=556 reward=0.7728976 (515.52 it/sec) -training >> step=3332900, episode=556 reward=0.7721683 (471.07 it/sec) -training >> step=3333000, episode=556 reward=0.7718083 (496.71 it/sec) -training >> step=3333100, episode=556 reward=0.7696652 (497.30 it/sec) -training >> step=3333200, episode=556 reward=0.7692007 (468.01 it/sec) -training >> step=3333300, episode=556 reward=0.794584 (494.08 it/sec) -training >> step=3333400, episode=556 reward=0.7574737 (526.44 it/sec) -training >> step=3333500, episode=556 reward=0.7831226 (502.62 it/sec) -training >> step=3333600, episode=556 reward=0.7652951 (512.85 it/sec) -training >> step=3333700, episode=556 reward=0.7844901 (522.97 it/sec) -training >> step=3333800, episode=556 reward=0.7955527 (527.17 it/sec) -training >> step=3333900, episode=556 reward=0.7761058 (523.51 it/sec) -training >> step=3334000, episode=556 reward=0.7675153 (508.22 it/sec) -training >> step=3334100, episode=556 reward=0.7424553 (535.16 it/sec) -training >> step=3334200, episode=556 reward=0.794285 (485.49 it/sec) -training >> step=3334300, episode=556 reward=0.7829403 (487.87 it/sec) -training >> step=3334400, episode=556 reward=0.7747751 (479.64 it/sec) -training >> step=3334500, episode=556 reward=0.7808577 (468.71 it/sec) -training >> step=3334600, episode=556 reward=0.7663492 (450.89 it/sec) -training >> step=3334700, episode=556 reward=0.7726928 (433.75 it/sec) -training >> step=3334800, episode=556 reward=0.7670953 (480.81 it/sec) -training >> step=3334900, episode=556 reward=0.7634676 (458.38 it/sec) -training >> step=3335000, episode=556 reward=0.7808626 (532.34 it/sec) -training >> step=3335100, episode=556 reward=0.7568437 (522.17 it/sec) -training >> step=3335200, episode=556 reward=0.7730498 (537.92 it/sec) -training >> step=3335300, episode=557 reward=0.7278199 (96.84 it/sec) -training >> step=3335400, episode=557 reward=0.7688017 (471.12 it/sec) -training >> step=3335500, episode=557 reward=0.7739025 (505.67 it/sec) -training >> step=3335600, episode=557 reward=0.7822922 (446.82 it/sec) -training >> step=3335700, episode=557 reward=0.7778153 (416.69 it/sec) -training >> step=3335800, episode=557 reward=0.7668251 (462.89 it/sec) -training >> step=3335900, episode=557 reward=0.7581753 (502.47 it/sec) -training >> step=3336000, episode=557 reward=0.7690514 (465.09 it/sec) -training >> step=3336100, episode=557 reward=0.7712057 (458.30 it/sec) -training >> step=3336200, episode=557 reward=0.7756654 (468.14 it/sec) -training >> step=3336300, episode=557 reward=0.7616381 (501.75 it/sec) -training >> step=3336400, episode=557 reward=0.7590628 (512.69 it/sec) -training >> step=3336500, episode=557 reward=0.7709669 (370.68 it/sec) -training >> step=3336600, episode=557 reward=0.7623245 (428.22 it/sec) -training >> step=3336700, episode=557 reward=0.7763761 (481.44 it/sec) -training >> step=3336800, episode=557 reward=0.7760071 (452.18 it/sec) -training >> step=3336900, episode=557 reward=0.7650374 (490.60 it/sec) -training >> step=3337000, episode=557 reward=0.7871339 (473.54 it/sec) -training >> step=3337100, episode=557 reward=0.7971063 (458.34 it/sec) -training >> step=3337200, episode=557 reward=0.7760643 (479.75 it/sec) -training >> step=3337300, episode=557 reward=0.7565159 (480.86 it/sec) -training >> step=3337400, episode=557 reward=0.800909 (485.88 it/sec) -training >> step=3337500, episode=557 reward=0.7814589 (525.86 it/sec) -training >> step=3337600, episode=557 reward=0.7613344 (469.16 it/sec) -training >> step=3337700, episode=557 reward=0.7639424 (486.28 it/sec) -training >> step=3337800, episode=557 reward=0.7712353 (441.04 it/sec) -training >> step=3337900, episode=557 reward=0.780934 (465.69 it/sec) -training >> step=3338000, episode=557 reward=0.7784002 (507.81 it/sec) -training >> step=3338100, episode=557 reward=0.7656154 (473.96 it/sec) -training >> step=3338200, episode=557 reward=0.800441 (463.61 it/sec) -training >> step=3338300, episode=557 reward=0.800814 (480.26 it/sec) -training >> step=3338400, episode=557 reward=0.7616274 (394.65 it/sec) -training >> step=3338500, episode=557 reward=0.7864779 (454.64 it/sec) -training >> step=3338600, episode=557 reward=0.7891809 (413.19 it/sec) -training >> step=3338700, episode=557 reward=0.7756007 (470.95 it/sec) -training >> step=3338800, episode=557 reward=0.776611 (492.17 it/sec) -training >> step=3338900, episode=557 reward=0.7657551 (469.38 it/sec) -training >> step=3339000, episode=557 reward=0.7609218 (431.21 it/sec) -training >> step=3339100, episode=557 reward=0.7762153 (426.95 it/sec) -training >> step=3339200, episode=557 reward=0.7636795 (438.60 it/sec) -training >> step=3339300, episode=557 reward=0.7785466 (450.22 it/sec) -training >> step=3339400, episode=557 reward=0.7492421 (436.65 it/sec) -training >> step=3339500, episode=557 reward=0.7640871 (405.27 it/sec) -training >> step=3339600, episode=557 reward=0.7839788 (390.94 it/sec) -training >> step=3339700, episode=557 reward=0.7747104 (449.30 it/sec) -training >> step=3339800, episode=557 reward=0.7937468 (504.12 it/sec) -training >> step=3339900, episode=557 reward=0.7538916 (481.87 it/sec) -training >> step=3340000, episode=557 reward=0.7738416 (444.82 it/sec) -training >> step=3340100, episode=557 reward=0.7627701 (426.20 it/sec) -training >> step=3340200, episode=557 reward=0.799998 (431.66 it/sec) -training >> step=3340300, episode=557 reward=0.7547833 (466.62 it/sec) -training >> step=3340400, episode=557 reward=0.773914 (459.78 it/sec) -training >> step=3340500, episode=557 reward=0.7660088 (490.57 it/sec) -training >> step=3340600, episode=557 reward=0.7554258 (462.62 it/sec) -training >> step=3340700, episode=557 reward=0.7748377 (464.87 it/sec) -training >> step=3340800, episode=557 reward=0.7680943 (461.35 it/sec) -training >> step=3340900, episode=557 reward=0.7831033 (450.81 it/sec) -training >> step=3341000, episode=557 reward=0.7641392 (437.75 it/sec) -training >> step=3341100, episode=557 reward=0.7850038 (452.35 it/sec) -training >> step=3341200, episode=557 reward=0.7526876 (506.96 it/sec) -training >> step=3341300, episode=558 reward=0.7725681 (103.61 it/sec) -training >> step=3341400, episode=558 reward=0.7684176 (516.10 it/sec) -training >> step=3341500, episode=558 reward=0.7621939 (482.57 it/sec) -training >> step=3341600, episode=558 reward=0.7804711 (529.81 it/sec) -training >> step=3341700, episode=558 reward=0.7667903 (505.44 it/sec) -training >> step=3341800, episode=558 reward=0.7746052 (466.84 it/sec) -training >> step=3341900, episode=558 reward=0.7665426 (548.03 it/sec) -training >> step=3342000, episode=558 reward=0.7668316 (468.78 it/sec) -training >> step=3342100, episode=558 reward=0.7712325 (515.90 it/sec) -training >> step=3342200, episode=558 reward=0.7867567 (506.69 it/sec) -training >> step=3342300, episode=558 reward=0.7789251 (487.00 it/sec) -training >> step=3342400, episode=558 reward=0.7761115 (481.83 it/sec) -training >> step=3342500, episode=558 reward=0.780261 (500.67 it/sec) -training >> step=3342600, episode=558 reward=0.7747557 (496.36 it/sec) -training >> step=3342700, episode=558 reward=0.7795138 (556.89 it/sec) -training >> step=3342800, episode=558 reward=0.7901508 (397.81 it/sec) -training >> step=3342900, episode=558 reward=0.7828144 (537.52 it/sec) -training >> step=3343000, episode=558 reward=0.7848276 (495.66 it/sec) -training >> step=3343100, episode=558 reward=0.7699812 (496.65 it/sec) -training >> step=3343200, episode=558 reward=0.7856851 (502.38 it/sec) -training >> step=3343300, episode=558 reward=0.7787922 (510.15 it/sec) -training >> step=3343400, episode=558 reward=0.744835 (542.24 it/sec) -training >> step=3343500, episode=558 reward=0.7688387 (459.36 it/sec) -training >> step=3343600, episode=558 reward=0.7928519 (494.68 it/sec) -training >> step=3343700, episode=558 reward=0.7576948 (526.05 it/sec) -training >> step=3343800, episode=558 reward=0.7550965 (513.59 it/sec) -training >> step=3343900, episode=558 reward=0.771903 (539.63 it/sec) -training >> step=3344000, episode=558 reward=0.7742436 (541.27 it/sec) -training >> step=3344100, episode=558 reward=0.7652636 (520.54 it/sec) -training >> step=3344200, episode=558 reward=0.7686149 (518.09 it/sec) -training >> step=3344300, episode=558 reward=0.7839085 (503.36 it/sec) -training >> step=3344400, episode=558 reward=0.778232 (496.93 it/sec) -training >> step=3344500, episode=558 reward=0.7733116 (531.83 it/sec) -training >> step=3344600, episode=558 reward=0.7547697 (491.30 it/sec) -training >> step=3344700, episode=558 reward=0.7870228 (491.73 it/sec) -training >> step=3344800, episode=558 reward=0.778529 (522.55 it/sec) -training >> step=3344900, episode=558 reward=0.7811607 (499.90 it/sec) -training >> step=3345000, episode=558 reward=0.7705412 (529.49 it/sec) -training >> step=3345100, episode=558 reward=0.7887144 (488.84 it/sec) -training >> step=3345200, episode=558 reward=0.76643 (525.65 it/sec) -training >> step=3345300, episode=558 reward=0.783738 (524.49 it/sec) -training >> step=3345400, episode=558 reward=0.7777329 (524.50 it/sec) -training >> step=3345500, episode=558 reward=0.7832374 (503.44 it/sec) -training >> step=3345600, episode=558 reward=0.7883312 (523.34 it/sec) -training >> step=3345700, episode=558 reward=0.7730669 (509.37 it/sec) -training >> step=3345800, episode=558 reward=0.7705272 (533.23 it/sec) -training >> step=3345900, episode=558 reward=0.7691159 (497.93 it/sec) -training >> step=3346000, episode=558 reward=0.7590305 (514.77 it/sec) -training >> step=3346100, episode=558 reward=0.7636912 (483.80 it/sec) -training >> step=3346200, episode=558 reward=0.7811131 (464.46 it/sec) -training >> step=3346300, episode=558 reward=0.7808266 (448.88 it/sec) -training >> step=3346400, episode=558 reward=0.7746447 (390.18 it/sec) -training >> step=3346500, episode=558 reward=0.7562315 (454.56 it/sec) -training >> step=3346600, episode=558 reward=0.7880966 (493.08 it/sec) -training >> step=3346700, episode=558 reward=0.7749217 (485.41 it/sec) -training >> step=3346800, episode=558 reward=0.7377916 (533.90 it/sec) -training >> step=3346900, episode=558 reward=0.7542681 (506.21 it/sec) -training >> step=3347000, episode=558 reward=0.7708892 (482.86 it/sec) -training >> step=3347100, episode=558 reward=0.7584416 (466.97 it/sec) -training >> step=3347200, episode=558 reward=0.744105 (511.69 it/sec) -training >> step=3347300, episode=559 reward=0.7557881 (117.05 it/sec) -training >> step=3347400, episode=559 reward=0.7693017 (537.91 it/sec) -training >> step=3347500, episode=559 reward=0.7698432 (470.02 it/sec) -training >> step=3347600, episode=559 reward=0.7742389 (483.45 it/sec) -training >> step=3347700, episode=559 reward=0.7762254 (484.76 it/sec) -training >> step=3347800, episode=559 reward=0.7849948 (445.62 it/sec) -training >> step=3347900, episode=559 reward=0.7745032 (469.23 it/sec) -training >> step=3348000, episode=559 reward=0.7850723 (468.07 it/sec) -training >> step=3348100, episode=559 reward=0.7721774 (443.59 it/sec) -training >> step=3348200, episode=559 reward=0.7657621 (458.20 it/sec) -training >> step=3348300, episode=559 reward=0.7604563 (464.06 it/sec) -training >> step=3348400, episode=559 reward=0.7925221 (527.18 it/sec) -training >> step=3348500, episode=559 reward=0.7724258 (524.93 it/sec) -training >> step=3348600, episode=559 reward=0.7789171 (522.42 it/sec) -training >> step=3348700, episode=559 reward=0.7818813 (539.30 it/sec) -training >> step=3348800, episode=559 reward=0.7620295 (532.05 it/sec) -training >> step=3348900, episode=559 reward=0.7748976 (337.47 it/sec) -training >> step=3349000, episode=559 reward=0.7667886 (504.96 it/sec) -training >> step=3349100, episode=559 reward=0.7638655 (496.52 it/sec) -training >> step=3349200, episode=559 reward=0.7786563 (497.08 it/sec) -training >> step=3349300, episode=559 reward=0.7720746 (447.59 it/sec) -training >> step=3349400, episode=559 reward=0.768554 (403.49 it/sec) -training >> step=3349500, episode=559 reward=0.7438916 (419.42 it/sec) -training >> step=3349600, episode=559 reward=0.7739508 (442.24 it/sec) -training >> step=3349700, episode=559 reward=0.7758858 (409.70 it/sec) -training >> step=3349800, episode=559 reward=0.7917597 (459.44 it/sec) -training >> step=3349900, episode=559 reward=0.7768496 (534.56 it/sec) -training >> step=3350000, episode=559 reward=0.7570856 (515.75 it/sec) -training >> step=3350100, episode=559 reward=0.7830102 (540.00 it/sec) -training >> step=3350200, episode=559 reward=0.785111 (493.19 it/sec) -training >> step=3350300, episode=559 reward=0.7752514 (486.33 it/sec) -training >> step=3350400, episode=559 reward=0.7749788 (413.93 it/sec) -training >> step=3350500, episode=559 reward=0.8016543 (450.98 it/sec) -training >> step=3350600, episode=559 reward=0.7482943 (498.48 it/sec) -training >> step=3350700, episode=559 reward=0.7523102 (475.42 it/sec) -training >> step=3350800, episode=559 reward=0.7677721 (490.66 it/sec) -training >> step=3350900, episode=559 reward=0.7717245 (472.07 it/sec) -training >> step=3351000, episode=559 reward=0.7763348 (532.72 it/sec) -training >> step=3351100, episode=559 reward=0.7764328 (490.23 it/sec) -training >> step=3351200, episode=559 reward=0.7860888 (485.35 it/sec) -training >> step=3351300, episode=559 reward=0.7828628 (502.41 it/sec) -training >> step=3351400, episode=559 reward=0.7891629 (469.73 it/sec) -training >> step=3351500, episode=559 reward=0.7718806 (537.27 it/sec) -training >> step=3351600, episode=559 reward=0.782911 (497.42 it/sec) -training >> step=3351700, episode=559 reward=0.7801887 (485.85 it/sec) -training >> step=3351800, episode=559 reward=0.7861005 (519.30 it/sec) -training >> step=3351900, episode=559 reward=0.7851931 (480.03 it/sec) -training >> step=3352000, episode=559 reward=0.768625 (488.31 it/sec) -training >> step=3352100, episode=559 reward=0.7724632 (496.55 it/sec) -training >> step=3352200, episode=559 reward=0.7585901 (513.10 it/sec) -training >> step=3352300, episode=559 reward=0.7858967 (534.91 it/sec) -training >> step=3352400, episode=559 reward=0.7593958 (483.86 it/sec) -training >> step=3352500, episode=559 reward=0.7912567 (491.21 it/sec) -training >> step=3352600, episode=559 reward=0.7559252 (499.00 it/sec) -training >> step=3352700, episode=559 reward=0.7360332 (497.07 it/sec) -training >> step=3352800, episode=559 reward=0.733471 (514.30 it/sec) -training >> step=3352900, episode=559 reward=0.7623658 (490.96 it/sec) -training >> step=3353000, episode=559 reward=0.7251517 (475.79 it/sec) -training >> step=3353100, episode=559 reward=0.7401555 (423.67 it/sec) -training >> step=3353200, episode=559 reward=0.751292 (427.64 it/sec) -training >> step=3353300, episode=560 reward=0.7832486 (54.55 it/sec) -training >> step=3353400, episode=560 reward=0.7865013 (458.79 it/sec) -training >> step=3353500, episode=560 reward=0.7529414 (471.42 it/sec) -training >> step=3353600, episode=560 reward=0.7912815 (470.32 it/sec) -training >> step=3353700, episode=560 reward=0.7829757 (415.78 it/sec) -training >> step=3353800, episode=560 reward=0.7620654 (449.82 it/sec) -training >> step=3353900, episode=560 reward=0.7518291 (502.06 it/sec) -training >> step=3354000, episode=560 reward=0.7998993 (513.19 it/sec) -training >> step=3354100, episode=560 reward=0.7818258 (497.65 it/sec) -training >> step=3354200, episode=560 reward=0.7628149 (487.49 it/sec) -training >> step=3354300, episode=560 reward=0.7751996 (456.69 it/sec) -training >> step=3354400, episode=560 reward=0.7567045 (446.42 it/sec) -training >> step=3354500, episode=560 reward=0.7904825 (507.26 it/sec) -training >> step=3354600, episode=560 reward=0.7788717 (488.37 it/sec) -training >> step=3354700, episode=560 reward=0.7788662 (494.90 it/sec) -training >> step=3354800, episode=560 reward=0.7791749 (506.49 it/sec) -training >> step=3354900, episode=560 reward=0.7689632 (476.42 it/sec) -training >> step=3355000, episode=560 reward=0.7824839 (472.50 it/sec) -training >> step=3355100, episode=560 reward=0.7855785 (348.39 it/sec) -training >> step=3355200, episode=560 reward=0.7742125 (502.11 it/sec) -training >> step=3355300, episode=560 reward=0.775664 (495.04 it/sec) -training >> step=3355400, episode=560 reward=0.7594722 (507.14 it/sec) -training >> step=3355500, episode=560 reward=0.7883503 (446.65 it/sec) -training >> step=3355600, episode=560 reward=0.7633884 (504.69 it/sec) -training >> step=3355700, episode=560 reward=0.773881 (495.26 it/sec) -training >> step=3355800, episode=560 reward=0.7596009 (475.32 it/sec) -training >> step=3355900, episode=560 reward=0.7690478 (453.68 it/sec) -training >> step=3356000, episode=560 reward=0.7794687 (467.29 it/sec) -training >> step=3356100, episode=560 reward=0.7810808 (492.54 it/sec) -training >> step=3356200, episode=560 reward=0.7493793 (470.16 it/sec) -training >> step=3356300, episode=560 reward=0.772818 (472.95 it/sec) -training >> step=3356400, episode=560 reward=0.7468414 (494.28 it/sec) -training >> step=3356500, episode=560 reward=0.7780498 (466.95 it/sec) -training >> step=3356600, episode=560 reward=0.7524566 (502.26 it/sec) -training >> step=3356700, episode=560 reward=0.7823303 (533.39 it/sec) -training >> step=3356800, episode=560 reward=0.7480434 (494.05 it/sec) -training >> step=3356900, episode=560 reward=0.7903284 (485.93 it/sec) -training >> step=3357000, episode=560 reward=0.7768214 (459.62 it/sec) -training >> step=3357100, episode=560 reward=0.7799202 (484.54 it/sec) -training >> step=3357200, episode=560 reward=0.7540965 (512.47 it/sec) -training >> step=3357300, episode=560 reward=0.7779824 (490.57 it/sec) -training >> step=3357400, episode=560 reward=0.7684265 (513.56 it/sec) -training >> step=3357500, episode=560 reward=0.7861956 (465.10 it/sec) -training >> step=3357600, episode=560 reward=0.7690747 (462.90 it/sec) -training >> step=3357700, episode=560 reward=0.7722877 (466.54 it/sec) -training >> step=3357800, episode=560 reward=0.7724838 (489.07 it/sec) -training >> step=3357900, episode=560 reward=0.7759245 (422.64 it/sec) -training >> step=3358000, episode=560 reward=0.7830117 (482.24 it/sec) -training >> step=3358100, episode=560 reward=0.7550578 (540.36 it/sec) -training >> step=3358200, episode=560 reward=0.7881232 (496.31 it/sec) -training >> step=3358300, episode=560 reward=0.8000411 (455.49 it/sec) -training >> step=3358400, episode=560 reward=0.783116 (469.17 it/sec) -training >> step=3358500, episode=560 reward=0.7550029 (509.14 it/sec) -training >> step=3358600, episode=560 reward=0.7775911 (513.14 it/sec) -training >> step=3358700, episode=560 reward=0.7547187 (508.75 it/sec) -training >> step=3358800, episode=560 reward=0.7648386 (456.72 it/sec) -training >> step=3358900, episode=560 reward=0.7716217 (478.04 it/sec) -training >> step=3359000, episode=560 reward=0.7407671 (496.44 it/sec) -training >> step=3359100, episode=560 reward=0.7297608 (463.22 it/sec) -training >> step=3359200, episode=560 reward=0.7440723 (475.91 it/sec) -training >> step=3359300, episode=561 reward=0.782482 (92.00 it/sec) -training >> step=3359400, episode=561 reward=0.7776091 (517.79 it/sec) -training >> step=3359500, episode=561 reward=0.7488195 (516.76 it/sec) -training >> step=3359600, episode=561 reward=0.7575797 (515.38 it/sec) -training >> step=3359700, episode=561 reward=0.7516046 (496.46 it/sec) -training >> step=3359800, episode=561 reward=0.7825569 (524.28 it/sec) -training >> step=3359900, episode=561 reward=0.788148 (524.53 it/sec) -training >> step=3360000, episode=561 reward=0.7706196 (546.60 it/sec) -training >> step=3360100, episode=561 reward=0.7673376 (521.21 it/sec) -training >> step=3360200, episode=561 reward=0.8011094 (519.03 it/sec) -training >> step=3360300, episode=561 reward=0.7755632 (498.73 it/sec) -training >> step=3360400, episode=561 reward=0.7764212 (469.79 it/sec) -training >> step=3360500, episode=561 reward=0.7791459 (501.91 it/sec) -training >> step=3360600, episode=561 reward=0.7787355 (565.69 it/sec) -training >> step=3360700, episode=561 reward=0.7719489 (476.19 it/sec) -training >> step=3360800, episode=561 reward=0.7787622 (427.35 it/sec) -training >> step=3360900, episode=561 reward=0.77748 (498.02 it/sec) -training >> step=3361000, episode=561 reward=0.7764809 (481.44 it/sec) -training >> step=3361100, episode=561 reward=0.7981343 (521.59 it/sec) -training >> step=3361200, episode=561 reward=0.7545598 (512.15 it/sec) -training >> step=3361300, episode=561 reward=0.772845 (389.41 it/sec) -training >> step=3361400, episode=561 reward=0.7757002 (480.07 it/sec) -training >> step=3361500, episode=561 reward=0.7677081 (432.14 it/sec) -training >> step=3361600, episode=561 reward=0.7802842 (495.13 it/sec) -training >> step=3361700, episode=561 reward=0.7634013 (485.66 it/sec) -training >> step=3361800, episode=561 reward=0.7691426 (472.88 it/sec) -training >> step=3361900, episode=561 reward=0.7704347 (491.14 it/sec) -training >> step=3362000, episode=561 reward=0.794804 (511.10 it/sec) -training >> step=3362100, episode=561 reward=0.7766587 (482.10 it/sec) -training >> step=3362200, episode=561 reward=0.786089 (485.35 it/sec) -training >> step=3362300, episode=561 reward=0.7690536 (472.58 it/sec) -training >> step=3362400, episode=561 reward=0.772581 (509.49 it/sec) -training >> step=3362500, episode=561 reward=0.7726136 (483.74 it/sec) -training >> step=3362600, episode=561 reward=0.7782204 (502.56 it/sec) -training >> step=3362700, episode=561 reward=0.7765505 (533.97 it/sec) -training >> step=3362800, episode=561 reward=0.7704776 (495.41 it/sec) -training >> step=3362900, episode=561 reward=0.7647087 (528.07 it/sec) -training >> step=3363000, episode=561 reward=0.7683865 (554.34 it/sec) -training >> step=3363100, episode=561 reward=0.778248 (577.49 it/sec) -training >> step=3363200, episode=561 reward=0.7896816 (539.89 it/sec) -training >> step=3363300, episode=561 reward=0.7720945 (512.49 it/sec) -training >> step=3363400, episode=561 reward=0.7914976 (475.03 it/sec) -training >> step=3363500, episode=561 reward=0.7754702 (482.53 it/sec) -training >> step=3363600, episode=561 reward=0.7592735 (486.14 it/sec) -training >> step=3363700, episode=561 reward=0.7685865 (526.75 it/sec) -training >> step=3363800, episode=561 reward=0.7824234 (548.07 it/sec) -training >> step=3363900, episode=561 reward=0.7653462 (538.69 it/sec) -training >> step=3364000, episode=561 reward=0.7615989 (520.23 it/sec) -training >> step=3364100, episode=561 reward=0.7752604 (497.90 it/sec) -training >> step=3364200, episode=561 reward=0.7846624 (526.97 it/sec) -training >> step=3364300, episode=561 reward=0.7774667 (522.31 it/sec) -training >> step=3364400, episode=561 reward=0.7631109 (529.20 it/sec) -training >> step=3364500, episode=561 reward=0.7773405 (529.76 it/sec) -training >> step=3364600, episode=561 reward=0.760094 (466.20 it/sec) -training >> step=3364700, episode=561 reward=0.7605335 (500.64 it/sec) -training >> step=3364800, episode=561 reward=0.7513409 (523.35 it/sec) -training >> step=3364900, episode=561 reward=0.7594339 (520.39 it/sec) -training >> step=3365000, episode=561 reward=0.7645916 (534.04 it/sec) -training >> step=3365100, episode=561 reward=0.7731156 (523.73 it/sec) -training >> step=3365200, episode=561 reward=0.7662971 (530.65 it/sec) -training >> step=3365300, episode=562 reward=0.7766629 (112.71 it/sec) -training >> step=3365400, episode=562 reward=0.7726545 (495.80 it/sec) -training >> step=3365500, episode=562 reward=0.7561401 (495.98 it/sec) -training >> step=3365600, episode=562 reward=0.7491348 (543.81 it/sec) -training >> step=3365700, episode=562 reward=0.7704381 (549.75 it/sec) -training >> step=3365800, episode=562 reward=0.7644847 (465.97 it/sec) -training >> step=3365900, episode=562 reward=0.7779246 (530.83 it/sec) -training >> step=3366000, episode=562 reward=0.7918757 (537.21 it/sec) -training >> step=3366100, episode=562 reward=0.7836961 (484.26 it/sec) -training >> step=3366200, episode=562 reward=0.756183 (465.97 it/sec) -training >> step=3366300, episode=562 reward=0.7944863 (485.44 it/sec) -training >> step=3366400, episode=562 reward=0.7804062 (422.81 it/sec) -training >> step=3366500, episode=562 reward=0.7762936 (495.85 it/sec) -training >> step=3366600, episode=562 reward=0.7633406 (436.50 it/sec) -training >> step=3366700, episode=562 reward=0.7799749 (462.13 it/sec) -training >> step=3366800, episode=562 reward=0.7773093 (409.44 it/sec) -training >> step=3366900, episode=562 reward=0.7540579 (479.85 it/sec) -training >> step=3367000, episode=562 reward=0.778342 (489.26 it/sec) -training >> step=3367100, episode=562 reward=0.7756166 (554.47 it/sec) -training >> step=3367200, episode=562 reward=0.7667801 (543.65 it/sec) -training >> step=3367300, episode=562 reward=0.7429504 (555.27 it/sec) -training >> step=3367400, episode=562 reward=0.7934275 (567.49 it/sec) -training >> step=3367500, episode=562 reward=0.790219 (379.62 it/sec) -training >> step=3367600, episode=562 reward=0.7779175 (530.95 it/sec) -training >> step=3367700, episode=562 reward=0.7575015 (568.66 it/sec) -training >> step=3367800, episode=562 reward=0.7569817 (556.77 it/sec) -training >> step=3367900, episode=562 reward=0.763182 (514.82 it/sec) -training >> step=3368000, episode=562 reward=0.7680252 (484.34 it/sec) -training >> step=3368100, episode=562 reward=0.762917 (571.67 it/sec) -training >> step=3368200, episode=562 reward=0.8000392 (536.25 it/sec) -training >> step=3368300, episode=562 reward=0.7663747 (563.42 it/sec) -training >> step=3368400, episode=562 reward=0.7864053 (555.34 it/sec) -training >> step=3368500, episode=562 reward=0.759463 (538.29 it/sec) -training >> step=3368600, episode=562 reward=0.7654315 (527.82 it/sec) -training >> step=3368700, episode=562 reward=0.7790425 (486.82 it/sec) -training >> step=3368800, episode=562 reward=0.7762467 (537.81 it/sec) -training >> step=3368900, episode=562 reward=0.7707125 (512.07 it/sec) -training >> step=3369000, episode=562 reward=0.7657177 (487.67 it/sec) -training >> step=3369100, episode=562 reward=0.7793781 (534.78 it/sec) -training >> step=3369200, episode=562 reward=0.7837585 (550.89 it/sec) -training >> step=3369300, episode=562 reward=0.7788063 (492.88 it/sec) -training >> step=3369400, episode=562 reward=0.7802538 (481.92 it/sec) -training >> step=3369500, episode=562 reward=0.7577798 (501.05 it/sec) -training >> step=3369600, episode=562 reward=0.7811627 (506.93 it/sec) -training >> step=3369700, episode=562 reward=0.77659 (509.87 it/sec) -training >> step=3369800, episode=562 reward=0.7636163 (487.29 it/sec) -training >> step=3369900, episode=562 reward=0.7663651 (479.08 it/sec) -training >> step=3370000, episode=562 reward=0.7668024 (489.64 it/sec) -training >> step=3370100, episode=562 reward=0.7914603 (487.89 it/sec) -training >> step=3370200, episode=562 reward=0.7666844 (479.55 it/sec) -training >> step=3370300, episode=562 reward=0.7498296 (489.06 it/sec) -training >> step=3370400, episode=562 reward=0.7668118 (494.02 it/sec) -training >> step=3370500, episode=562 reward=0.7768468 (502.50 it/sec) -training >> step=3370600, episode=562 reward=0.7749228 (513.39 it/sec) -training >> step=3370700, episode=562 reward=0.7689289 (442.42 it/sec) -training >> step=3370800, episode=562 reward=0.7608467 (499.15 it/sec) -training >> step=3370900, episode=562 reward=0.7834438 (514.69 it/sec) -training >> step=3371000, episode=562 reward=0.7500744 (517.61 it/sec) -training >> step=3371100, episode=562 reward=0.7532342 (476.38 it/sec) -training >> step=3371200, episode=562 reward=0.755988 (440.98 it/sec) -training >> step=3371300, episode=563 reward=0.7690428 (67.05 it/sec) -training >> step=3371400, episode=563 reward=0.7955531 (482.99 it/sec) -training >> step=3371500, episode=563 reward=0.7883022 (460.06 it/sec) -training >> step=3371600, episode=563 reward=0.7692283 (466.46 it/sec) -training >> step=3371700, episode=563 reward=0.7456992 (469.40 it/sec) -training >> step=3371800, episode=563 reward=0.7712489 (516.88 it/sec) -training >> step=3371900, episode=563 reward=0.7724595 (526.09 it/sec) -training >> step=3372000, episode=563 reward=0.7860599 (523.86 it/sec) -training >> step=3372100, episode=563 reward=0.7933903 (485.13 it/sec) -training >> step=3372200, episode=563 reward=0.7730544 (466.22 it/sec) -training >> step=3372300, episode=563 reward=0.7594652 (507.98 it/sec) -training >> step=3372400, episode=563 reward=0.7708904 (504.24 it/sec) -training >> step=3372500, episode=563 reward=0.7848989 (539.99 it/sec) -training >> step=3372600, episode=563 reward=0.7678659 (484.49 it/sec) -training >> step=3372700, episode=563 reward=0.7812715 (428.83 it/sec) -training >> step=3372800, episode=563 reward=0.7736021 (542.68 it/sec) -training >> step=3372900, episode=563 reward=0.7488467 (516.57 it/sec) -training >> step=3373000, episode=563 reward=0.7714064 (499.34 it/sec) -training >> step=3373100, episode=563 reward=0.7494042 (495.88 it/sec) -training >> step=3373200, episode=563 reward=0.7419586 (537.57 it/sec) -training >> step=3373300, episode=563 reward=0.7757196 (548.23 it/sec) -training >> step=3373400, episode=563 reward=0.7542782 (513.70 it/sec) -training >> step=3373500, episode=563 reward=0.7881711 (516.20 it/sec) -training >> step=3373600, episode=563 reward=0.7633324 (365.20 it/sec) -training >> step=3373700, episode=563 reward=0.7602972 (510.93 it/sec) -training >> step=3373800, episode=563 reward=0.7607564 (520.37 it/sec) -training >> step=3373900, episode=563 reward=0.7748911 (521.32 it/sec) -training >> step=3374000, episode=563 reward=0.7445593 (529.92 it/sec) -training >> step=3374100, episode=563 reward=0.7896301 (477.97 it/sec) -training >> step=3374200, episode=563 reward=0.7747273 (535.52 it/sec) -training >> step=3374300, episode=563 reward=0.7807812 (530.25 it/sec) -training >> step=3374400, episode=563 reward=0.7717151 (472.96 it/sec) -training >> step=3374500, episode=563 reward=0.7734067 (479.42 it/sec) -training >> step=3374600, episode=563 reward=0.7945121 (533.59 it/sec) -training >> step=3374700, episode=563 reward=0.7868604 (477.15 it/sec) -training >> step=3374800, episode=563 reward=0.7643797 (466.06 it/sec) -training >> step=3374900, episode=563 reward=0.7746111 (528.72 it/sec) -training >> step=3375000, episode=563 reward=0.7653138 (492.33 it/sec) -training >> step=3375100, episode=563 reward=0.7614451 (556.42 it/sec) -training >> step=3375200, episode=563 reward=0.752892 (483.89 it/sec) -training >> step=3375300, episode=563 reward=0.7578414 (509.06 it/sec) -training >> step=3375400, episode=563 reward=0.7890068 (508.36 it/sec) -training >> step=3375500, episode=563 reward=0.7699256 (507.06 it/sec) -training >> step=3375600, episode=563 reward=0.7750798 (466.94 it/sec) -training >> step=3375700, episode=563 reward=0.7488299 (512.79 it/sec) -training >> step=3375800, episode=563 reward=0.7798505 (519.59 it/sec) -training >> step=3375900, episode=563 reward=0.761542 (509.84 it/sec) -training >> step=3376000, episode=563 reward=0.7717988 (499.23 it/sec) -training >> step=3376100, episode=563 reward=0.7625062 (516.04 it/sec) -training >> step=3376200, episode=563 reward=0.7845404 (518.31 it/sec) -training >> step=3376300, episode=563 reward=0.7687029 (498.32 it/sec) -training >> step=3376400, episode=563 reward=0.7662179 (490.34 it/sec) -training >> step=3376500, episode=563 reward=0.7461494 (491.87 it/sec) -training >> step=3376600, episode=563 reward=0.7609239 (478.87 it/sec) -training >> step=3376700, episode=563 reward=0.7738833 (506.99 it/sec) -training >> step=3376800, episode=563 reward=0.7563282 (490.45 it/sec) -training >> step=3376900, episode=563 reward=0.7865717 (497.58 it/sec) -training >> step=3377000, episode=563 reward=0.7660657 (507.14 it/sec) -training >> step=3377100, episode=563 reward=0.7605751 (518.78 it/sec) -training >> step=3377200, episode=563 reward=0.7680783 (440.85 it/sec) -training >> step=3377300, episode=564 reward=0.7657812 (94.43 it/sec) -training >> step=3377400, episode=564 reward=0.7586436 (540.74 it/sec) -training >> step=3377500, episode=564 reward=0.7818555 (514.70 it/sec) -training >> step=3377600, episode=564 reward=0.7681579 (484.51 it/sec) -training >> step=3377700, episode=564 reward=0.7715614 (545.04 it/sec) -training >> step=3377800, episode=564 reward=0.778289 (542.88 it/sec) -training >> step=3377900, episode=564 reward=0.7911009 (511.98 it/sec) -training >> step=3378000, episode=564 reward=0.7819992 (478.09 it/sec) -training >> step=3378100, episode=564 reward=0.7763605 (532.50 it/sec) -training >> step=3378200, episode=564 reward=0.7932689 (468.40 it/sec) -training >> step=3378300, episode=564 reward=0.7724553 (521.41 it/sec) -training >> step=3378400, episode=564 reward=0.7718084 (461.13 it/sec) -training >> step=3378500, episode=564 reward=0.7786279 (506.13 it/sec) -training >> step=3378600, episode=564 reward=0.7818265 (513.17 it/sec) -training >> step=3378700, episode=564 reward=0.7601293 (512.59 it/sec) -training >> step=3378800, episode=564 reward=0.7696374 (528.25 it/sec) -training >> step=3378900, episode=564 reward=0.7881382 (456.41 it/sec) -training >> step=3379000, episode=564 reward=0.7767789 (527.15 it/sec) -training >> step=3379100, episode=564 reward=0.7745007 (506.57 it/sec) -training >> step=3379200, episode=564 reward=0.7803802 (453.70 it/sec) -training >> step=3379300, episode=564 reward=0.7972977 (496.75 it/sec) -training >> step=3379400, episode=564 reward=0.789777 (469.53 it/sec) -training >> step=3379500, episode=564 reward=0.8001984 (472.74 it/sec) -training >> step=3379600, episode=564 reward=0.7774855 (510.47 it/sec) -training >> step=3379700, episode=564 reward=0.7573133 (485.00 it/sec) -training >> step=3379800, episode=564 reward=0.7449281 (522.92 it/sec) -training >> step=3379900, episode=564 reward=0.7577879 (361.83 it/sec) -training >> step=3380000, episode=564 reward=0.7770811 (469.34 it/sec) -training >> step=3380100, episode=564 reward=0.7658881 (469.86 it/sec) -training >> step=3380200, episode=564 reward=0.7676111 (458.39 it/sec) -training >> step=3380300, episode=564 reward=0.7829321 (512.45 it/sec) -training >> step=3380400, episode=564 reward=0.7862095 (497.80 it/sec) -training >> step=3380500, episode=564 reward=0.7561003 (461.51 it/sec) -training >> step=3380600, episode=564 reward=0.789544 (431.81 it/sec) -training >> step=3380700, episode=564 reward=0.7849816 (502.58 it/sec) -training >> step=3380800, episode=564 reward=0.7671878 (495.99 it/sec) -training >> step=3380900, episode=564 reward=0.7290737 (508.65 it/sec) -training >> step=3381000, episode=564 reward=0.7905409 (490.92 it/sec) -training >> step=3381100, episode=564 reward=0.761768 (503.10 it/sec) -training >> step=3381200, episode=564 reward=0.7769523 (549.28 it/sec) -training >> step=3381300, episode=564 reward=0.7779791 (498.16 it/sec) -training >> step=3381400, episode=564 reward=0.7738989 (495.30 it/sec) -training >> step=3381500, episode=564 reward=0.7565371 (533.07 it/sec) -training >> step=3381600, episode=564 reward=0.7656187 (503.88 it/sec) -training >> step=3381700, episode=564 reward=0.7662023 (496.97 it/sec) -training >> step=3381800, episode=564 reward=0.759526 (528.63 it/sec) -training >> step=3381900, episode=564 reward=0.7797813 (532.27 it/sec) -training >> step=3382000, episode=564 reward=0.7747535 (516.21 it/sec) -training >> step=3382100, episode=564 reward=0.7554214 (498.35 it/sec) -training >> step=3382200, episode=564 reward=0.7667159 (478.14 it/sec) -training >> step=3382300, episode=564 reward=0.7665421 (573.07 it/sec) -training >> step=3382400, episode=564 reward=0.783681 (500.08 it/sec) -training >> step=3382500, episode=564 reward=0.7744831 (514.08 it/sec) -training >> step=3382600, episode=564 reward=0.7591062 (541.60 it/sec) -training >> step=3382700, episode=564 reward=0.7744644 (508.89 it/sec) -training >> step=3382800, episode=564 reward=0.7560672 (530.06 it/sec) -training >> step=3382900, episode=564 reward=0.7740584 (475.68 it/sec) -training >> step=3383000, episode=564 reward=0.7832053 (540.63 it/sec) -training >> step=3383100, episode=564 reward=0.7531492 (489.41 it/sec) -training >> step=3383200, episode=564 reward=0.7598929 (499.72 it/sec) -training >> step=3383300, episode=565 reward=0.7834584 (91.58 it/sec) -training >> step=3383400, episode=565 reward=0.7926633 (259.58 it/sec) -training >> step=3383500, episode=565 reward=0.7663359 (520.38 it/sec) -training >> step=3383600, episode=565 reward=0.7748896 (497.81 it/sec) -training >> step=3383700, episode=565 reward=0.7699511 (528.33 it/sec) -training >> step=3383800, episode=565 reward=0.7918767 (507.94 it/sec) -training >> step=3383900, episode=565 reward=0.7811871 (539.02 it/sec) -training >> step=3384000, episode=565 reward=0.7490581 (502.56 it/sec) -training >> step=3384100, episode=565 reward=0.7917754 (502.62 it/sec) -training >> step=3384200, episode=565 reward=0.7611802 (518.32 it/sec) -training >> step=3384300, episode=565 reward=0.7828197 (509.34 it/sec) -training >> step=3384400, episode=565 reward=0.7709969 (492.25 it/sec) -training >> step=3384500, episode=565 reward=0.7910854 (475.99 it/sec) -training >> step=3384600, episode=565 reward=0.7799011 (500.75 it/sec) -training >> step=3384700, episode=565 reward=0.766117 (461.78 it/sec) -training >> step=3384800, episode=565 reward=0.7985346 (501.58 it/sec) -training >> step=3384900, episode=565 reward=0.7745858 (536.72 it/sec) -training >> step=3385000, episode=565 reward=0.797321 (502.31 it/sec) -training >> step=3385100, episode=565 reward=0.7692844 (531.49 it/sec) -training >> step=3385200, episode=565 reward=0.7568254 (523.95 it/sec) -training >> step=3385300, episode=565 reward=0.7548933 (519.28 it/sec) -training >> step=3385400, episode=565 reward=0.7807871 (533.06 it/sec) -training >> step=3385500, episode=565 reward=0.7682639 (477.06 it/sec) -training >> step=3385600, episode=565 reward=0.7641575 (525.74 it/sec) -training >> step=3385700, episode=565 reward=0.7767256 (455.30 it/sec) -training >> step=3385800, episode=565 reward=0.7586984 (523.96 it/sec) -training >> step=3385900, episode=565 reward=0.7833915 (394.35 it/sec) -training >> step=3386000, episode=565 reward=0.7621188 (498.28 it/sec) -training >> step=3386100, episode=565 reward=0.7582914 (525.81 it/sec) -training >> step=3386200, episode=565 reward=0.78325 (529.23 it/sec) -training >> step=3386300, episode=565 reward=0.753972 (488.01 it/sec) -training >> step=3386400, episode=565 reward=0.7809555 (456.64 it/sec) -training >> step=3386500, episode=565 reward=0.7719225 (433.91 it/sec) -training >> step=3386600, episode=565 reward=0.7604244 (552.31 it/sec) -training >> step=3386700, episode=565 reward=0.7912806 (531.50 it/sec) -training >> step=3386800, episode=565 reward=0.7670563 (507.95 it/sec) -training >> step=3386900, episode=565 reward=0.7569414 (517.43 it/sec) -training >> step=3387000, episode=565 reward=0.758704 (527.25 it/sec) -training >> step=3387100, episode=565 reward=0.7751931 (509.57 it/sec) -training >> step=3387200, episode=565 reward=0.7531332 (500.82 it/sec) -training >> step=3387300, episode=565 reward=0.7510871 (509.02 it/sec) -training >> step=3387400, episode=565 reward=0.785242 (501.59 it/sec) -training >> step=3387500, episode=565 reward=0.7670156 (446.95 it/sec) -training >> step=3387600, episode=565 reward=0.7669327 (522.74 it/sec) -training >> step=3387700, episode=565 reward=0.759851 (497.83 it/sec) -training >> step=3387800, episode=565 reward=0.7869568 (513.07 it/sec) -training >> step=3387900, episode=565 reward=0.7836189 (512.52 it/sec) -training >> step=3388000, episode=565 reward=0.7440533 (521.73 it/sec) -training >> step=3388100, episode=565 reward=0.7665994 (422.96 it/sec) -training >> step=3388200, episode=565 reward=0.7692903 (504.96 it/sec) -training >> step=3388300, episode=565 reward=0.7834032 (521.04 it/sec) -training >> step=3388400, episode=565 reward=0.777178 (548.40 it/sec) -training >> step=3388500, episode=565 reward=0.7569008 (504.13 it/sec) -training >> step=3388600, episode=565 reward=0.772615 (483.16 it/sec) -training >> step=3388700, episode=565 reward=0.7717078 (538.17 it/sec) -training >> step=3388800, episode=565 reward=0.7704543 (544.11 it/sec) -training >> step=3388900, episode=565 reward=0.7597229 (520.43 it/sec) -training >> step=3389000, episode=565 reward=0.7730429 (508.51 it/sec) -training >> step=3389100, episode=565 reward=0.7634076 (509.74 it/sec) -training >> step=3389200, episode=565 reward=0.7641361 (532.16 it/sec) -training >> step=3389300, episode=566 reward=0.7582896 (95.91 it/sec) -training >> step=3389400, episode=566 reward=0.7699553 (535.41 it/sec) -training >> step=3389500, episode=566 reward=0.7696455 (534.84 it/sec) -training >> step=3389600, episode=566 reward=0.7837995 (554.89 it/sec) -training >> step=3389700, episode=566 reward=0.7682871 (522.42 it/sec) -training >> step=3389800, episode=566 reward=0.782096 (542.63 it/sec) -training >> step=3389900, episode=566 reward=0.7829611 (512.66 it/sec) -training >> step=3390000, episode=566 reward=0.7800477 (549.15 it/sec) -training >> step=3390100, episode=566 reward=0.7618095 (549.81 it/sec) -training >> step=3390200, episode=566 reward=0.7802031 (525.46 it/sec) -training >> step=3390300, episode=566 reward=0.7840581 (517.35 it/sec) -training >> step=3390400, episode=566 reward=0.7884377 (576.17 it/sec) -training >> step=3390500, episode=566 reward=0.7644963 (518.00 it/sec) -training >> step=3390600, episode=566 reward=0.7742106 (514.95 it/sec) -training >> step=3390700, episode=566 reward=0.7593858 (542.09 it/sec) -training >> step=3390800, episode=566 reward=0.757941 (512.07 it/sec) -training >> step=3390900, episode=566 reward=0.7625397 (517.67 it/sec) -training >> step=3391000, episode=566 reward=0.7889861 (535.92 it/sec) -training >> step=3391100, episode=566 reward=0.7782663 (542.41 it/sec) -training >> step=3391200, episode=566 reward=0.7676353 (515.70 it/sec) -training >> step=3391300, episode=566 reward=0.752647 (531.90 it/sec) -training >> step=3391400, episode=566 reward=0.7710933 (492.16 it/sec) -training >> step=3391500, episode=566 reward=0.7761807 (565.47 it/sec) -training >> step=3391600, episode=566 reward=0.7785237 (538.16 it/sec) -training >> step=3391700, episode=566 reward=0.7791808 (503.72 it/sec) -training >> step=3391800, episode=566 reward=0.7434661 (537.98 it/sec) -training >> step=3391900, episode=566 reward=0.766256 (535.64 it/sec) -training >> step=3392000, episode=566 reward=0.7752728 (531.40 it/sec) -training >> step=3392100, episode=566 reward=0.7873063 (527.75 it/sec) -training >> step=3392200, episode=566 reward=0.7743508 (406.89 it/sec) -training >> step=3392300, episode=566 reward=0.7788625 (518.95 it/sec) -training >> step=3392400, episode=566 reward=0.7448161 (509.55 it/sec) -training >> step=3392500, episode=566 reward=0.7604244 (507.67 it/sec) -training >> step=3392600, episode=566 reward=0.7727399 (553.30 it/sec) -training >> step=3392700, episode=566 reward=0.7739863 (533.14 it/sec) -training >> step=3392800, episode=566 reward=0.7611786 (517.78 it/sec) -training >> step=3392900, episode=566 reward=0.765445 (534.55 it/sec) -training >> step=3393000, episode=566 reward=0.7640204 (522.00 it/sec) -training >> step=3393100, episode=566 reward=0.7625839 (512.12 it/sec) -training >> step=3393200, episode=566 reward=0.7687879 (544.39 it/sec) -training >> step=3393300, episode=566 reward=0.7541127 (534.08 it/sec) -training >> step=3393400, episode=566 reward=0.7824448 (493.84 it/sec) -training >> step=3393500, episode=566 reward=0.7762829 (505.34 it/sec) -training >> step=3393600, episode=566 reward=0.7726744 (557.46 it/sec) -training >> step=3393700, episode=566 reward=0.7667713 (564.15 it/sec) -training >> step=3393800, episode=566 reward=0.7577585 (517.89 it/sec) -training >> step=3393900, episode=566 reward=0.7967703 (543.52 it/sec) -training >> step=3394000, episode=566 reward=0.7747638 (532.85 it/sec) -training >> step=3394100, episode=566 reward=0.7713986 (537.25 it/sec) -training >> step=3394200, episode=566 reward=0.7554711 (547.06 it/sec) -training >> step=3394300, episode=566 reward=0.773246 (549.57 it/sec) -training >> step=3394400, episode=566 reward=0.7758054 (516.52 it/sec) -training >> step=3394500, episode=566 reward=0.7985753 (511.98 it/sec) -training >> step=3394600, episode=566 reward=0.8039998 (451.22 it/sec) -training >> step=3394700, episode=566 reward=0.7768741 (572.92 it/sec) -training >> step=3394800, episode=566 reward=0.7582327 (555.53 it/sec) -training >> step=3394900, episode=566 reward=0.7555794 (518.87 it/sec) -training >> step=3395000, episode=566 reward=0.7875727 (536.37 it/sec) -training >> step=3395100, episode=566 reward=0.7791854 (541.90 it/sec) -training >> step=3395200, episode=566 reward=0.7589523 (512.78 it/sec) -training >> step=3395300, episode=567 reward=0.7746724 (108.52 it/sec) -training >> step=3395400, episode=567 reward=0.773841 (545.08 it/sec) -training >> step=3395500, episode=567 reward=0.7633367 (560.84 it/sec) -training >> step=3395600, episode=567 reward=0.7883724 (512.75 it/sec) -training >> step=3395700, episode=567 reward=0.7739938 (529.74 it/sec) -training >> step=3395800, episode=567 reward=0.7725832 (513.66 it/sec) -training >> step=3395900, episode=567 reward=0.7655854 (519.06 it/sec) -training >> step=3396000, episode=567 reward=0.7701969 (527.51 it/sec) -training >> step=3396100, episode=567 reward=0.7676551 (527.95 it/sec) -training >> step=3396200, episode=567 reward=0.7859385 (512.72 it/sec) -training >> step=3396300, episode=567 reward=0.7772287 (521.40 it/sec) -training >> step=3396400, episode=567 reward=0.7802059 (481.82 it/sec) -training >> step=3396500, episode=567 reward=0.7769859 (551.22 it/sec) -training >> step=3396600, episode=567 reward=0.7768511 (572.77 it/sec) -training >> step=3396700, episode=567 reward=0.7734548 (495.89 it/sec) -training >> step=3396800, episode=567 reward=0.7718595 (537.81 it/sec) -training >> step=3396900, episode=567 reward=0.782589 (540.92 it/sec) -training >> step=3397000, episode=567 reward=0.788569 (524.36 it/sec) -training >> step=3397100, episode=567 reward=0.7757311 (551.20 it/sec) -training >> step=3397200, episode=567 reward=0.7733538 (513.19 it/sec) -training >> step=3397300, episode=567 reward=0.7540337 (517.70 it/sec) -training >> step=3397400, episode=567 reward=0.7726927 (462.29 it/sec) -training >> step=3397500, episode=567 reward=0.770834 (507.77 it/sec) -training >> step=3397600, episode=567 reward=0.7848861 (556.90 it/sec) -training >> step=3397700, episode=567 reward=0.7641333 (539.86 it/sec) -training >> step=3397800, episode=567 reward=0.7742007 (484.47 it/sec) -training >> step=3397900, episode=567 reward=0.7612344 (482.61 it/sec) -training >> step=3398000, episode=567 reward=0.7797141 (541.96 it/sec) -training >> step=3398100, episode=567 reward=0.7844264 (502.80 it/sec) -training >> step=3398200, episode=567 reward=0.750111 (539.78 it/sec) -training >> step=3398300, episode=567 reward=0.7656146 (367.15 it/sec) -training >> step=3398400, episode=567 reward=0.7558341 (501.53 it/sec) -training >> step=3398500, episode=567 reward=0.7769704 (503.56 it/sec) -training >> step=3398600, episode=567 reward=0.7730964 (498.30 it/sec) -training >> step=3398700, episode=567 reward=0.7801272 (536.07 it/sec) -training >> step=3398800, episode=567 reward=0.7711269 (524.94 it/sec) -training >> step=3398900, episode=567 reward=0.7540122 (511.36 it/sec) -training >> step=3399000, episode=567 reward=0.7873056 (510.51 it/sec) -training >> step=3399100, episode=567 reward=0.7715415 (531.35 it/sec) -training >> step=3399200, episode=567 reward=0.756851 (524.42 it/sec) -training >> step=3399300, episode=567 reward=0.7650885 (477.38 it/sec) -training >> step=3399400, episode=567 reward=0.7889045 (459.32 it/sec) -training >> step=3399500, episode=567 reward=0.7510349 (551.77 it/sec) -training >> step=3399600, episode=567 reward=0.7646195 (527.41 it/sec) -training >> step=3399700, episode=567 reward=0.7947148 (533.20 it/sec) -training >> step=3399800, episode=567 reward=0.7898496 (538.01 it/sec) -training >> step=3399900, episode=567 reward=0.7922328 (540.24 it/sec) -training >> step=3400000, episode=567 reward=0.7641191 (491.93 it/sec) -training >> step=3400100, episode=567 reward=0.7399817 (532.99 it/sec) -training >> step=3400200, episode=567 reward=0.7793514 (538.49 it/sec) -training >> step=3400300, episode=567 reward=0.7879812 (522.17 it/sec) -training >> step=3400400, episode=567 reward=0.7600163 (493.80 it/sec) -training >> step=3400500, episode=567 reward=0.7886034 (558.13 it/sec) -training >> step=3400600, episode=567 reward=0.7557891 (524.07 it/sec) -training >> step=3400700, episode=567 reward=0.7754618 (505.17 it/sec) -training >> step=3400800, episode=567 reward=0.7711076 (510.52 it/sec) -training >> step=3400900, episode=567 reward=0.7929957 (541.53 it/sec) -training >> step=3401000, episode=567 reward=0.7796513 (479.75 it/sec) -training >> step=3401100, episode=567 reward=0.7670121 (499.46 it/sec) -training >> step=3401200, episode=567 reward=0.7692031 (531.29 it/sec) -training >> step=3401300, episode=568 reward=0.7754069 (114.52 it/sec) -training >> step=3401400, episode=568 reward=0.7715701 (514.84 it/sec) -training >> step=3401500, episode=568 reward=0.7461587 (522.11 it/sec) -training >> step=3401600, episode=568 reward=0.773856 (523.21 it/sec) -training >> step=3401700, episode=568 reward=0.7721988 (523.39 it/sec) -training >> step=3401800, episode=568 reward=0.7737512 (499.11 it/sec) -training >> step=3401900, episode=568 reward=0.7888887 (527.06 it/sec) -training >> step=3402000, episode=568 reward=0.790571 (554.33 it/sec) -training >> step=3402100, episode=568 reward=0.7975258 (535.45 it/sec) -training >> step=3402200, episode=568 reward=0.7710835 (511.87 it/sec) -training >> step=3402300, episode=568 reward=0.7596576 (549.21 it/sec) -training >> step=3402400, episode=568 reward=0.780425 (510.60 it/sec) -training >> step=3402500, episode=568 reward=0.7885748 (554.39 it/sec) -training >> step=3402600, episode=568 reward=0.7823513 (551.70 it/sec) -training >> step=3402700, episode=568 reward=0.7421519 (539.23 it/sec) -training >> step=3402800, episode=568 reward=0.7793404 (469.08 it/sec) -training >> step=3402900, episode=568 reward=0.783197 (493.56 it/sec) -training >> step=3403000, episode=568 reward=0.7799752 (501.18 it/sec) -training >> step=3403100, episode=568 reward=0.7691666 (574.46 it/sec) -training >> step=3403200, episode=568 reward=0.7869794 (504.27 it/sec) -training >> step=3403300, episode=568 reward=0.787443 (499.75 it/sec) -training >> step=3403400, episode=568 reward=0.7982864 (522.87 it/sec) -training >> step=3403500, episode=568 reward=0.7511996 (535.56 it/sec) -training >> step=3403600, episode=568 reward=0.778747 (543.37 it/sec) -training >> step=3403700, episode=568 reward=0.7628179 (534.74 it/sec) -training >> step=3403800, episode=568 reward=0.7903274 (519.62 it/sec) -training >> step=3403900, episode=568 reward=0.7773016 (503.51 it/sec) -training >> step=3404000, episode=568 reward=0.7504099 (524.51 it/sec) -training >> step=3404100, episode=568 reward=0.7680469 (526.89 it/sec) -training >> step=3404200, episode=568 reward=0.7643788 (537.73 it/sec) -training >> step=3404300, episode=568 reward=0.7801034 (479.22 it/sec) -training >> step=3404400, episode=568 reward=0.7739183 (412.69 it/sec) -training >> step=3404500, episode=568 reward=0.7838215 (519.91 it/sec) -training >> step=3404600, episode=568 reward=0.7811294 (515.52 it/sec) -training >> step=3404700, episode=568 reward=0.7805461 (547.26 it/sec) -training >> step=3404800, episode=568 reward=0.7721648 (521.58 it/sec) -training >> step=3404900, episode=568 reward=0.7602061 (538.68 it/sec) -training >> step=3405000, episode=568 reward=0.7710935 (532.26 it/sec) -training >> step=3405100, episode=568 reward=0.7845904 (475.51 it/sec) -training >> step=3405200, episode=568 reward=0.7764918 (531.64 it/sec) -training >> step=3405300, episode=568 reward=0.8007327 (536.57 it/sec) -training >> step=3405400, episode=568 reward=0.7704819 (536.78 it/sec) -training >> step=3405500, episode=568 reward=0.7799954 (493.95 it/sec) -training >> step=3405600, episode=568 reward=0.781492 (541.13 it/sec) -training >> step=3405700, episode=568 reward=0.7797344 (516.19 it/sec) -training >> step=3405800, episode=568 reward=0.7801531 (532.20 it/sec) -training >> step=3405900, episode=568 reward=0.7667564 (509.67 it/sec) -training >> step=3406000, episode=568 reward=0.7652253 (515.63 it/sec) -training >> step=3406100, episode=568 reward=0.7740959 (463.41 it/sec) -training >> step=3406200, episode=568 reward=0.767369 (436.38 it/sec) -training >> step=3406300, episode=568 reward=0.7824183 (462.89 it/sec) -training >> step=3406400, episode=568 reward=0.7700403 (483.20 it/sec) -training >> step=3406500, episode=568 reward=0.7807304 (485.28 it/sec) -training >> step=3406600, episode=568 reward=0.7579341 (498.08 it/sec) -training >> step=3406700, episode=568 reward=0.7589558 (481.56 it/sec) -training >> step=3406800, episode=568 reward=0.7849448 (496.75 it/sec) -training >> step=3406900, episode=568 reward=0.7520726 (468.70 it/sec) -training >> step=3407000, episode=568 reward=0.7608138 (510.71 it/sec) -training >> step=3407100, episode=568 reward=0.7608067 (517.26 it/sec) -training >> step=3407200, episode=568 reward=0.7573494 (479.84 it/sec) -training >> step=3407300, episode=569 reward=0.7607083 (111.17 it/sec) -training >> step=3407400, episode=569 reward=0.7770733 (554.72 it/sec) -training >> step=3407500, episode=569 reward=0.7795179 (500.26 it/sec) -training >> step=3407600, episode=569 reward=0.7490145 (495.61 it/sec) -training >> step=3407700, episode=569 reward=0.7423871 (540.01 it/sec) -training >> step=3407800, episode=569 reward=0.7639056 (558.70 it/sec) -training >> step=3407900, episode=569 reward=0.7919078 (515.05 it/sec) -training >> step=3408000, episode=569 reward=0.7719826 (548.97 it/sec) -training >> step=3408100, episode=569 reward=0.76205 (521.74 it/sec) -training >> step=3408200, episode=569 reward=0.7857874 (519.47 it/sec) -training >> step=3408300, episode=569 reward=0.7512798 (523.11 it/sec) -training >> step=3408400, episode=569 reward=0.7369822 (545.89 it/sec) -training >> step=3408500, episode=569 reward=0.7640992 (546.25 it/sec) -training >> step=3408600, episode=569 reward=0.7904506 (536.20 it/sec) -training >> step=3408700, episode=569 reward=0.7617378 (526.58 it/sec) -training >> step=3408800, episode=569 reward=0.780961 (504.98 it/sec) -training >> step=3408900, episode=569 reward=0.8013322 (577.11 it/sec) -training >> step=3409000, episode=569 reward=0.7747669 (476.30 it/sec) -training >> step=3409100, episode=569 reward=0.785345 (477.84 it/sec) -training >> step=3409200, episode=569 reward=0.7787145 (541.26 it/sec) -training >> step=3409300, episode=569 reward=0.7474897 (511.42 it/sec) -training >> step=3409400, episode=569 reward=0.7765475 (512.84 it/sec) -training >> step=3409500, episode=569 reward=0.7858251 (486.46 it/sec) -training >> step=3409600, episode=569 reward=0.7681093 (519.49 it/sec) -training >> step=3409700, episode=569 reward=0.774788 (490.67 it/sec) -training >> step=3409800, episode=569 reward=0.7631804 (516.35 it/sec) -training >> step=3409900, episode=569 reward=0.765396 (533.66 it/sec) -training >> step=3410000, episode=569 reward=0.7742211 (568.94 it/sec) -training >> step=3410100, episode=569 reward=0.7712188 (533.47 it/sec) -training >> step=3410200, episode=569 reward=0.7641114 (491.33 it/sec) -training >> step=3410300, episode=569 reward=0.7910197 (504.19 it/sec) -training >> step=3410400, episode=569 reward=0.7597986 (415.29 it/sec) -training >> step=3410500, episode=569 reward=0.7598612 (497.27 it/sec) -training >> step=3410600, episode=569 reward=0.7746701 (520.68 it/sec) -training >> step=3410700, episode=569 reward=0.7675193 (551.33 it/sec) -training >> step=3410800, episode=569 reward=0.7761012 (466.79 it/sec) -training >> step=3410900, episode=569 reward=0.74562 (519.59 it/sec) -training >> step=3411000, episode=569 reward=0.7768715 (503.40 it/sec) -training >> step=3411100, episode=569 reward=0.77305 (552.34 it/sec) -training >> step=3411200, episode=569 reward=0.7894445 (513.13 it/sec) -training >> step=3411300, episode=569 reward=0.7684122 (453.07 it/sec) -training >> step=3411400, episode=569 reward=0.7773058 (515.42 it/sec) -training >> step=3411500, episode=569 reward=0.7698606 (504.41 it/sec) -training >> step=3411600, episode=569 reward=0.7633356 (511.71 it/sec) -training >> step=3411700, episode=569 reward=0.7726989 (517.04 it/sec) -training >> step=3411800, episode=569 reward=0.7538226 (451.11 it/sec) -training >> step=3411900, episode=569 reward=0.7472256 (463.43 it/sec) -training >> step=3412000, episode=569 reward=0.7637208 (482.68 it/sec) -training >> step=3412100, episode=569 reward=0.7752016 (537.77 it/sec) -training >> step=3412200, episode=569 reward=0.7608709 (542.56 it/sec) -training >> step=3412300, episode=569 reward=0.7757995 (511.60 it/sec) -training >> step=3412400, episode=569 reward=0.7780669 (522.37 it/sec) -training >> step=3412500, episode=569 reward=0.7518448 (538.98 it/sec) -training >> step=3412600, episode=569 reward=0.7741061 (406.98 it/sec) -training >> step=3412700, episode=569 reward=0.7781485 (458.84 it/sec) -training >> step=3412800, episode=569 reward=0.7864809 (453.99 it/sec) -training >> step=3412900, episode=569 reward=0.7828603 (473.79 it/sec) -training >> step=3413000, episode=569 reward=0.7653531 (477.61 it/sec) -training >> step=3413100, episode=569 reward=0.7596951 (495.06 it/sec) -training >> step=3413200, episode=569 reward=0.7378021 (513.29 it/sec) -training >> step=3413300, episode=570 reward=0.7553676 (127.27 it/sec) -training >> step=3413400, episode=570 reward=0.7644733 (548.90 it/sec) -training >> step=3413500, episode=570 reward=0.756562 (519.38 it/sec) -training >> step=3413600, episode=570 reward=0.7769702 (563.28 it/sec) -training >> step=3413700, episode=570 reward=0.791944 (485.88 it/sec) -training >> step=3413800, episode=570 reward=0.7654335 (465.16 it/sec) -training >> step=3413900, episode=570 reward=0.7668464 (577.60 it/sec) -training >> step=3414000, episode=570 reward=0.7573926 (529.46 it/sec) -training >> step=3414100, episode=570 reward=0.7676276 (518.99 it/sec) -training >> step=3414200, episode=570 reward=0.7947785 (520.43 it/sec) -training >> step=3414300, episode=570 reward=0.765502 (584.03 it/sec) -training >> step=3414400, episode=570 reward=0.7644805 (524.94 it/sec) -training >> step=3414500, episode=570 reward=0.7887555 (510.02 it/sec) -training >> step=3414600, episode=570 reward=0.7549396 (514.85 it/sec) -training >> step=3414700, episode=570 reward=0.7917822 (543.03 it/sec) -training >> step=3414800, episode=570 reward=0.8022792 (567.24 it/sec) -training >> step=3414900, episode=570 reward=0.7657993 (542.70 it/sec) -training >> step=3415000, episode=570 reward=0.7623634 (545.47 it/sec) -training >> step=3415100, episode=570 reward=0.794103 (474.40 it/sec) -training >> step=3415200, episode=570 reward=0.760726 (505.22 it/sec) -training >> step=3415300, episode=570 reward=0.7745765 (503.13 it/sec) -training >> step=3415400, episode=570 reward=0.7943645 (511.78 it/sec) -training >> step=3415500, episode=570 reward=0.7745326 (479.23 it/sec) -training >> step=3415600, episode=570 reward=0.7756298 (511.49 it/sec) -training >> step=3415700, episode=570 reward=0.7585247 (538.91 it/sec) -training >> step=3415800, episode=570 reward=0.7694368 (484.90 it/sec) -training >> step=3415900, episode=570 reward=0.7729432 (474.75 it/sec) -training >> step=3416000, episode=570 reward=0.7692694 (489.57 it/sec) -training >> step=3416100, episode=570 reward=0.7759194 (488.08 it/sec) -training >> step=3416200, episode=570 reward=0.7781342 (448.57 it/sec) -training >> step=3416300, episode=570 reward=0.7740617 (437.48 it/sec) -training >> step=3416400, episode=570 reward=0.7787835 (498.35 it/sec) -training >> step=3416500, episode=570 reward=0.7704089 (531.85 it/sec) -training >> step=3416600, episode=570 reward=0.7907897 (387.40 it/sec) -training >> step=3416700, episode=570 reward=0.7582231 (479.71 it/sec) -training >> step=3416800, episode=570 reward=0.7753382 (480.08 it/sec) -training >> step=3416900, episode=570 reward=0.7729772 (467.53 it/sec) -training >> step=3417000, episode=570 reward=0.7683472 (501.69 it/sec) -training >> step=3417100, episode=570 reward=0.7797967 (535.58 it/sec) -training >> step=3417200, episode=570 reward=0.7759951 (480.28 it/sec) -training >> step=3417300, episode=570 reward=0.752571 (510.06 it/sec) -training >> step=3417400, episode=570 reward=0.7774692 (513.90 it/sec) -training >> step=3417500, episode=570 reward=0.7903323 (500.19 it/sec) -training >> step=3417600, episode=570 reward=0.7979683 (592.77 it/sec) -training >> step=3417700, episode=570 reward=0.7692068 (529.08 it/sec) -training >> step=3417800, episode=570 reward=0.7778717 (480.11 it/sec) -training >> step=3417900, episode=570 reward=0.7756745 (495.28 it/sec) -training >> step=3418000, episode=570 reward=0.7589356 (459.87 it/sec) -training >> step=3418100, episode=570 reward=0.7866107 (510.05 it/sec) -training >> step=3418200, episode=570 reward=0.7580231 (465.94 it/sec) -training >> step=3418300, episode=570 reward=0.788255 (471.45 it/sec) -training >> step=3418400, episode=570 reward=0.7633415 (474.12 it/sec) -training >> step=3418500, episode=570 reward=0.7546089 (464.22 it/sec) -training >> step=3418600, episode=570 reward=0.7595094 (506.72 it/sec) -training >> step=3418700, episode=570 reward=0.7913898 (459.33 it/sec) -training >> step=3418800, episode=570 reward=0.7649898 (415.65 it/sec) -training >> step=3418900, episode=570 reward=0.7770627 (481.83 it/sec) -training >> step=3419000, episode=570 reward=0.7838557 (447.29 it/sec) -training >> step=3419100, episode=570 reward=0.7923384 (418.11 it/sec) -training >> step=3419200, episode=570 reward=0.7940685 (453.32 it/sec) -training >> step=3419300, episode=571 reward=0.7747722 (111.50 it/sec) -training >> step=3419400, episode=571 reward=0.7679819 (367.46 it/sec) -training >> step=3419500, episode=571 reward=0.7775569 (507.38 it/sec) -training >> step=3419600, episode=571 reward=0.7708532 (518.82 it/sec) -training >> step=3419700, episode=571 reward=0.77811 (548.87 it/sec) -training >> step=3419800, episode=571 reward=0.761915 (493.12 it/sec) -training >> step=3419900, episode=571 reward=0.7871948 (518.43 it/sec) -training >> step=3420000, episode=571 reward=0.7713965 (548.05 it/sec) -training >> step=3420100, episode=571 reward=0.7746457 (546.11 it/sec) -training >> step=3420200, episode=571 reward=0.7710553 (526.44 it/sec) -training >> step=3420300, episode=571 reward=0.7586884 (553.26 it/sec) -training >> step=3420400, episode=571 reward=0.7713659 (487.02 it/sec) -training >> step=3420500, episode=571 reward=0.7733382 (533.09 it/sec) -training >> step=3420600, episode=571 reward=0.7658982 (537.51 it/sec) -training >> step=3420700, episode=571 reward=0.7752396 (561.50 it/sec) -training >> step=3420800, episode=571 reward=0.7911044 (550.01 it/sec) -training >> step=3420900, episode=571 reward=0.7491599 (532.34 it/sec) -training >> step=3421000, episode=571 reward=0.8112491 (545.82 it/sec) -training >> step=3421100, episode=571 reward=0.7679481 (516.58 it/sec) -training >> step=3421200, episode=571 reward=0.7889073 (572.29 it/sec) -training >> step=3421300, episode=571 reward=0.7602468 (542.10 it/sec) -training >> step=3421400, episode=571 reward=0.7609323 (572.63 it/sec) -training >> step=3421500, episode=571 reward=0.7787015 (520.63 it/sec) -training >> step=3421600, episode=571 reward=0.7806867 (538.58 it/sec) -training >> step=3421700, episode=571 reward=0.7540301 (560.05 it/sec) -training >> step=3421800, episode=571 reward=0.7732591 (549.22 it/sec) -training >> step=3421900, episode=571 reward=0.7927722 (559.96 it/sec) -training >> step=3422000, episode=571 reward=0.7712172 (534.90 it/sec) -training >> step=3422100, episode=571 reward=0.7770801 (551.40 it/sec) -training >> step=3422200, episode=571 reward=0.7635032 (492.04 it/sec) -training >> step=3422300, episode=571 reward=0.759096 (510.75 it/sec) -training >> step=3422400, episode=571 reward=0.7756381 (539.35 it/sec) -training >> step=3422500, episode=571 reward=0.768667 (540.32 it/sec) -training >> step=3422600, episode=571 reward=0.7884089 (397.22 it/sec) -training >> step=3422700, episode=571 reward=0.7631946 (551.54 it/sec) -training >> step=3422800, episode=571 reward=0.7841828 (521.24 it/sec) -training >> step=3422900, episode=571 reward=0.7468315 (496.42 it/sec) -training >> step=3423000, episode=571 reward=0.7540569 (573.90 it/sec) -training >> step=3423100, episode=571 reward=0.7622777 (514.06 it/sec) -training >> step=3423200, episode=571 reward=0.7614682 (550.89 it/sec) -training >> step=3423300, episode=571 reward=0.7733827 (564.71 it/sec) -training >> step=3423400, episode=571 reward=0.762582 (547.84 it/sec) -training >> step=3423500, episode=571 reward=0.7532397 (569.51 it/sec) -training >> step=3423600, episode=571 reward=0.7471233 (518.63 it/sec) -training >> step=3423700, episode=571 reward=0.7491215 (523.07 it/sec) -training >> step=3423800, episode=571 reward=0.745469 (528.87 it/sec) -training >> step=3423900, episode=571 reward=0.7657762 (586.85 it/sec) -training >> step=3424000, episode=571 reward=0.7569115 (504.80 it/sec) -training >> step=3424100, episode=571 reward=0.7745139 (549.23 it/sec) -training >> step=3424200, episode=571 reward=0.7799677 (505.06 it/sec) -training >> step=3424300, episode=571 reward=0.775925 (526.20 it/sec) -training >> step=3424400, episode=571 reward=0.7735028 (505.83 it/sec) -training >> step=3424500, episode=571 reward=0.7717733 (556.84 it/sec) -training >> step=3424600, episode=571 reward=0.7836286 (567.75 it/sec) -training >> step=3424700, episode=571 reward=0.799514 (541.00 it/sec) -training >> step=3424800, episode=571 reward=0.7678424 (548.29 it/sec) -training >> step=3424900, episode=571 reward=0.7608112 (540.45 it/sec) -training >> step=3425000, episode=571 reward=0.7744967 (580.44 it/sec) -training >> step=3425100, episode=571 reward=0.7795299 (521.48 it/sec) -training >> step=3425200, episode=571 reward=0.7710816 (541.07 it/sec) -training >> step=3425300, episode=572 reward=0.7883339 (122.74 it/sec) -training >> step=3425400, episode=572 reward=0.7726397 (555.80 it/sec) -training >> step=3425500, episode=572 reward=0.7926437 (545.42 it/sec) -training >> step=3425600, episode=572 reward=0.7949376 (520.91 it/sec) -training >> step=3425700, episode=572 reward=0.7740986 (492.93 it/sec) -training >> step=3425800, episode=572 reward=0.7651917 (523.90 it/sec) -training >> step=3425900, episode=572 reward=0.7943254 (535.92 it/sec) -training >> step=3426000, episode=572 reward=0.7674968 (558.16 it/sec) -training >> step=3426100, episode=572 reward=0.7678682 (558.42 it/sec) -training >> step=3426200, episode=572 reward=0.7674324 (546.10 it/sec) -training >> step=3426300, episode=572 reward=0.7744151 (534.73 it/sec) -training >> step=3426400, episode=572 reward=0.7597424 (554.17 it/sec) -training >> step=3426500, episode=572 reward=0.7644437 (544.09 it/sec) -training >> step=3426600, episode=572 reward=0.7676911 (562.75 it/sec) -training >> step=3426700, episode=572 reward=0.7844371 (534.75 it/sec) -training >> step=3426800, episode=572 reward=0.7634987 (577.38 it/sec) -training >> step=3426900, episode=572 reward=0.7707504 (571.27 it/sec) -training >> step=3427000, episode=572 reward=0.7899991 (528.50 it/sec) -training >> step=3427100, episode=572 reward=0.7677687 (526.27 it/sec) -training >> step=3427200, episode=572 reward=0.7873759 (528.93 it/sec) -training >> step=3427300, episode=572 reward=0.7706445 (541.65 it/sec) -training >> step=3427400, episode=572 reward=0.7774401 (547.04 it/sec) -training >> step=3427500, episode=572 reward=0.7881333 (577.07 it/sec) -training >> step=3427600, episode=572 reward=0.7814012 (530.18 it/sec) -training >> step=3427700, episode=572 reward=0.7648014 (508.58 it/sec) -training >> step=3427800, episode=572 reward=0.7826246 (551.62 it/sec) -training >> step=3427900, episode=572 reward=0.7612174 (589.89 it/sec) -training >> step=3428000, episode=572 reward=0.759907 (558.90 it/sec) -training >> step=3428100, episode=572 reward=0.7908031 (522.67 it/sec) -training >> step=3428200, episode=572 reward=0.7641796 (554.28 it/sec) -training >> step=3428300, episode=572 reward=0.7761157 (541.62 it/sec) -training >> step=3428400, episode=572 reward=0.7626818 (540.25 it/sec) -training >> step=3428500, episode=572 reward=0.7840856 (536.72 it/sec) -training >> step=3428600, episode=572 reward=0.7715088 (549.97 it/sec) -training >> step=3428700, episode=572 reward=0.8027301 (513.41 it/sec) -training >> step=3428800, episode=572 reward=0.7642111 (403.55 it/sec) -training >> step=3428900, episode=572 reward=0.7997831 (537.80 it/sec) -training >> step=3429000, episode=572 reward=0.7845209 (586.36 it/sec) -training >> step=3429100, episode=572 reward=0.7897336 (544.93 it/sec) -training >> step=3429200, episode=572 reward=0.7749475 (537.00 it/sec) -training >> step=3429300, episode=572 reward=0.7730058 (573.62 it/sec) -training >> step=3429400, episode=572 reward=0.7630181 (520.85 it/sec) -training >> step=3429500, episode=572 reward=0.7680904 (548.72 it/sec) -training >> step=3429600, episode=572 reward=0.7702355 (567.50 it/sec) -training >> step=3429700, episode=572 reward=0.7662492 (593.68 it/sec) -training >> step=3429800, episode=572 reward=0.7510929 (492.97 it/sec) -training >> step=3429900, episode=572 reward=0.7679864 (502.06 it/sec) -training >> step=3430000, episode=572 reward=0.7775084 (541.74 it/sec) -training >> step=3430100, episode=572 reward=0.7656219 (558.41 it/sec) -training >> step=3430200, episode=572 reward=0.7604041 (567.19 it/sec) -training >> step=3430300, episode=572 reward=0.780167 (542.85 it/sec) -training >> step=3430400, episode=572 reward=0.7429772 (567.81 it/sec) -training >> step=3430500, episode=572 reward=0.7798511 (537.59 it/sec) -training >> step=3430600, episode=572 reward=0.7633587 (467.59 it/sec) -training >> step=3430700, episode=572 reward=0.7662207 (549.70 it/sec) -training >> step=3430800, episode=572 reward=0.7596395 (577.96 it/sec) -training >> step=3430900, episode=572 reward=0.7946712 (558.11 it/sec) -training >> step=3431000, episode=572 reward=0.7810659 (548.69 it/sec) -training >> step=3431100, episode=572 reward=0.7669134 (576.77 it/sec) -training >> step=3431200, episode=572 reward=0.7810388 (533.44 it/sec) -training >> step=3431300, episode=573 reward=0.7637483 (112.02 it/sec) -training >> step=3431400, episode=573 reward=0.7610802 (552.08 it/sec) -training >> step=3431500, episode=573 reward=0.7818003 (593.14 it/sec) -training >> step=3431600, episode=573 reward=0.7763963 (521.03 it/sec) -training >> step=3431700, episode=573 reward=0.7693922 (488.34 it/sec) -training >> step=3431800, episode=573 reward=0.7678049 (533.65 it/sec) -training >> step=3431900, episode=573 reward=0.7446288 (543.16 it/sec) -training >> step=3432000, episode=573 reward=0.7699275 (522.82 it/sec) -training >> step=3432100, episode=573 reward=0.7822788 (555.35 it/sec) -training >> step=3432200, episode=573 reward=0.7788374 (572.46 it/sec) -training >> step=3432300, episode=573 reward=0.7704602 (503.27 it/sec) -training >> step=3432400, episode=573 reward=0.7916344 (539.06 it/sec) -training >> step=3432500, episode=573 reward=0.7866285 (535.19 it/sec) -training >> step=3432600, episode=573 reward=0.7741359 (589.01 it/sec) -training >> step=3432700, episode=573 reward=0.7765202 (536.89 it/sec) -training >> step=3432800, episode=573 reward=0.7707582 (482.49 it/sec) -training >> step=3432900, episode=573 reward=0.7688733 (590.79 it/sec) -training >> step=3433000, episode=573 reward=0.7696822 (526.98 it/sec) -training >> step=3433100, episode=573 reward=0.7720674 (476.32 it/sec) -training >> step=3433200, episode=573 reward=0.7758921 (553.47 it/sec) -training >> step=3433300, episode=573 reward=0.7678522 (516.42 it/sec) -training >> step=3433400, episode=573 reward=0.7678302 (520.43 it/sec) -training >> step=3433500, episode=573 reward=0.7758976 (532.39 it/sec) -training >> step=3433600, episode=573 reward=0.7745073 (483.73 it/sec) -training >> step=3433700, episode=573 reward=0.7702296 (519.18 it/sec) -training >> step=3433800, episode=573 reward=0.770176 (525.62 it/sec) -training >> step=3433900, episode=573 reward=0.7615725 (540.84 it/sec) -training >> step=3434000, episode=573 reward=0.7812347 (578.52 it/sec) -training >> step=3434100, episode=573 reward=0.7688568 (557.96 it/sec) -training >> step=3434200, episode=573 reward=0.7825122 (563.55 it/sec) -training >> step=3434300, episode=573 reward=0.7876345 (517.79 it/sec) -training >> step=3434400, episode=573 reward=0.7634147 (541.17 it/sec) -training >> step=3434500, episode=573 reward=0.7711121 (509.99 it/sec) -training >> step=3434600, episode=573 reward=0.7923072 (518.23 it/sec) -training >> step=3434700, episode=573 reward=0.770153 (531.90 it/sec) -training >> step=3434800, episode=573 reward=0.7801103 (575.27 it/sec) -training >> step=3434900, episode=573 reward=0.7600924 (388.10 it/sec) -training >> step=3435000, episode=573 reward=0.7787998 (556.21 it/sec) -training >> step=3435100, episode=573 reward=0.7729335 (555.19 it/sec) -training >> step=3435200, episode=573 reward=0.7683412 (529.07 it/sec) -training >> step=3435300, episode=573 reward=0.7882304 (565.63 it/sec) -training >> step=3435400, episode=573 reward=0.7880942 (515.45 it/sec) -training >> step=3435500, episode=573 reward=0.771614 (559.95 it/sec) -training >> step=3435600, episode=573 reward=0.7738879 (523.13 it/sec) -training >> step=3435700, episode=573 reward=0.7869836 (525.55 it/sec) -training >> step=3435800, episode=573 reward=0.7767953 (523.89 it/sec) -training >> step=3435900, episode=573 reward=0.7732018 (528.73 it/sec) -training >> step=3436000, episode=573 reward=0.7756457 (539.39 it/sec) -training >> step=3436100, episode=573 reward=0.767942 (562.42 it/sec) -training >> step=3436200, episode=573 reward=0.7627811 (513.96 it/sec) -training >> step=3436300, episode=573 reward=0.7901194 (510.49 it/sec) -training >> step=3436400, episode=573 reward=0.7799677 (544.07 it/sec) -training >> step=3436500, episode=573 reward=0.7535041 (570.57 it/sec) -training >> step=3436600, episode=573 reward=0.7825146 (556.50 it/sec) -training >> step=3436700, episode=573 reward=0.7644746 (566.12 it/sec) -training >> step=3436800, episode=573 reward=0.7818155 (550.11 it/sec) -training >> step=3436900, episode=573 reward=0.7900001 (568.44 it/sec) -training >> step=3437000, episode=573 reward=0.7621356 (507.27 it/sec) -training >> step=3437100, episode=573 reward=0.7647282 (528.21 it/sec) -training >> step=3437200, episode=573 reward=0.8009964 (561.48 it/sec) -training >> step=3437300, episode=574 reward=0.783811 (123.27 it/sec) -training >> step=3437400, episode=574 reward=0.7644136 (527.75 it/sec) -training >> step=3437500, episode=574 reward=0.7522925 (488.14 it/sec) -training >> step=3437600, episode=574 reward=0.7666367 (564.77 it/sec) -training >> step=3437700, episode=574 reward=0.7641776 (549.11 it/sec) -training >> step=3437800, episode=574 reward=0.8092232 (554.67 it/sec) -training >> step=3437900, episode=574 reward=0.78562 (563.07 it/sec) -training >> step=3438000, episode=574 reward=0.7814383 (544.92 it/sec) -training >> step=3438100, episode=574 reward=0.7441406 (558.17 it/sec) -training >> step=3438200, episode=574 reward=0.775572 (523.33 it/sec) -training >> step=3438300, episode=574 reward=0.7728027 (543.48 it/sec) -training >> step=3438400, episode=574 reward=0.7763335 (520.60 it/sec) -training >> step=3438500, episode=574 reward=0.7772065 (538.21 it/sec) -training >> step=3438600, episode=574 reward=0.7914642 (508.92 it/sec) -training >> step=3438700, episode=574 reward=0.7887898 (505.19 it/sec) -training >> step=3438800, episode=574 reward=0.7881191 (515.03 it/sec) -training >> step=3438900, episode=574 reward=0.8082972 (484.56 it/sec) -training >> step=3439000, episode=574 reward=0.7802419 (533.77 it/sec) -training >> step=3439100, episode=574 reward=0.767226 (499.25 it/sec) -training >> step=3439200, episode=574 reward=0.7725724 (536.41 it/sec) -training >> step=3439300, episode=574 reward=0.7586032 (545.78 it/sec) -training >> step=3439400, episode=574 reward=0.7707029 (504.52 it/sec) -training >> step=3439500, episode=574 reward=0.7738927 (476.25 it/sec) -training >> step=3439600, episode=574 reward=0.7667767 (427.88 it/sec) -training >> step=3439700, episode=574 reward=0.7845879 (518.61 it/sec) -training >> step=3439800, episode=574 reward=0.7705482 (524.36 it/sec) -training >> step=3439900, episode=574 reward=0.7731692 (493.85 it/sec) -training >> step=3440000, episode=574 reward=0.7823854 (500.39 it/sec) -training >> step=3440100, episode=574 reward=0.780175 (523.17 it/sec) -training >> step=3440200, episode=574 reward=0.7772926 (527.40 it/sec) -training >> step=3440300, episode=574 reward=0.7828336 (534.23 it/sec) -training >> step=3440400, episode=574 reward=0.8042708 (503.00 it/sec) -training >> step=3440500, episode=574 reward=0.7880498 (566.48 it/sec) -training >> step=3440600, episode=574 reward=0.7715173 (539.52 it/sec) -training >> step=3440700, episode=574 reward=0.7769915 (476.00 it/sec) -training >> step=3440800, episode=574 reward=0.7766519 (501.75 it/sec) -training >> step=3440900, episode=574 reward=0.7724425 (516.98 it/sec) -training >> step=3441000, episode=574 reward=0.7649463 (423.61 it/sec) -training >> step=3441100, episode=574 reward=0.7877955 (537.82 it/sec) -training >> step=3441200, episode=574 reward=0.7800941 (548.81 it/sec) -training >> step=3441300, episode=574 reward=0.768632 (506.63 it/sec) -training >> step=3441400, episode=574 reward=0.7627736 (508.62 it/sec) -training >> step=3441500, episode=574 reward=0.7729257 (446.67 it/sec) -training >> step=3441600, episode=574 reward=0.7789087 (510.94 it/sec) -training >> step=3441700, episode=574 reward=0.7530351 (501.05 it/sec) -training >> step=3441800, episode=574 reward=0.7783224 (458.25 it/sec) -training >> step=3441900, episode=574 reward=0.7599276 (564.75 it/sec) -training >> step=3442000, episode=574 reward=0.7867093 (440.54 it/sec) -training >> step=3442100, episode=574 reward=0.7413425 (477.19 it/sec) -training >> step=3442200, episode=574 reward=0.7760944 (495.47 it/sec) -training >> step=3442300, episode=574 reward=0.7803585 (499.73 it/sec) -training >> step=3442400, episode=574 reward=0.7699052 (490.31 it/sec) -training >> step=3442500, episode=574 reward=0.7661324 (472.74 it/sec) -training >> step=3442600, episode=574 reward=0.7767093 (488.27 it/sec) -training >> step=3442700, episode=574 reward=0.7920983 (444.69 it/sec) -training >> step=3442800, episode=574 reward=0.7760889 (465.49 it/sec) -training >> step=3442900, episode=574 reward=0.7807579 (402.60 it/sec) -training >> step=3443000, episode=574 reward=0.7912306 (468.97 it/sec) -training >> step=3443100, episode=574 reward=0.762053 (475.42 it/sec) -training >> step=3443200, episode=574 reward=0.7544675 (513.04 it/sec) -training >> step=3443300, episode=575 reward=0.7743647 (97.36 it/sec) -training >> step=3443400, episode=575 reward=0.7759339 (413.78 it/sec) -training >> step=3443500, episode=575 reward=0.7693678 (442.30 it/sec) -training >> step=3443600, episode=575 reward=0.7882887 (496.43 it/sec) -training >> step=3443700, episode=575 reward=0.7842473 (474.80 it/sec) -training >> step=3443800, episode=575 reward=0.7772821 (505.23 it/sec) -training >> step=3443900, episode=575 reward=0.792577 (521.37 it/sec) -training >> step=3444000, episode=575 reward=0.7539815 (529.62 it/sec) -training >> step=3444100, episode=575 reward=0.7800362 (515.18 it/sec) -training >> step=3444200, episode=575 reward=0.758808 (472.85 it/sec) -training >> step=3444300, episode=575 reward=0.7485262 (456.01 it/sec) -training >> step=3444400, episode=575 reward=0.7702231 (470.63 it/sec) -training >> step=3444500, episode=575 reward=0.7706136 (494.44 it/sec) -training >> step=3444600, episode=575 reward=0.7522075 (517.65 it/sec) -training >> step=3444700, episode=575 reward=0.7823622 (466.00 it/sec) -training >> step=3444800, episode=575 reward=0.7872882 (474.37 it/sec) -training >> step=3444900, episode=575 reward=0.7680168 (440.58 it/sec) -training >> step=3445000, episode=575 reward=0.7813616 (486.74 it/sec) -training >> step=3445100, episode=575 reward=0.7770161 (513.65 it/sec) -training >> step=3445200, episode=575 reward=0.7846475 (523.52 it/sec) -training >> step=3445300, episode=575 reward=0.7841497 (524.37 it/sec) -training >> step=3445400, episode=575 reward=0.7493544 (551.93 it/sec) -training >> step=3445500, episode=575 reward=0.7915699 (503.46 it/sec) -training >> step=3445600, episode=575 reward=0.7957547 (528.30 it/sec) -training >> step=3445700, episode=575 reward=0.7577523 (478.19 it/sec) -training >> step=3445800, episode=575 reward=0.782103 (505.16 it/sec) -training >> step=3445900, episode=575 reward=0.7759866 (505.45 it/sec) -training >> step=3446000, episode=575 reward=0.7634618 (483.19 it/sec) -training >> step=3446100, episode=575 reward=0.7808377 (544.21 it/sec) -training >> step=3446200, episode=575 reward=0.7667475 (488.15 it/sec) -training >> step=3446300, episode=575 reward=0.7785686 (483.81 it/sec) -training >> step=3446400, episode=575 reward=0.7675716 (444.93 it/sec) -training >> step=3446500, episode=575 reward=0.7553894 (525.12 it/sec) -training >> step=3446600, episode=575 reward=0.7749403 (498.67 it/sec) -training >> step=3446700, episode=575 reward=0.7941561 (474.47 it/sec) -training >> step=3446800, episode=575 reward=0.7656325 (501.56 it/sec) -training >> step=3446900, episode=575 reward=0.776412 (487.73 it/sec) -training >> step=3447000, episode=575 reward=0.7842723 (489.08 it/sec) -training >> step=3447100, episode=575 reward=0.7685063 (352.56 it/sec) -training >> step=3447200, episode=575 reward=0.7586574 (533.08 it/sec) -training >> step=3447300, episode=575 reward=0.7512584 (477.67 it/sec) -training >> step=3447400, episode=575 reward=0.7598152 (430.78 it/sec) -training >> step=3447500, episode=575 reward=0.7734745 (499.77 it/sec) -training >> step=3447600, episode=575 reward=0.7807713 (519.16 it/sec) -training >> step=3447700, episode=575 reward=0.7784495 (486.83 it/sec) -training >> step=3447800, episode=575 reward=0.772191 (438.48 it/sec) -training >> step=3447900, episode=575 reward=0.7610112 (478.05 it/sec) -training >> step=3448000, episode=575 reward=0.7737785 (474.65 it/sec) -training >> step=3448100, episode=575 reward=0.7681343 (463.02 it/sec) -training >> step=3448200, episode=575 reward=0.7722951 (438.94 it/sec) -training >> step=3448300, episode=575 reward=0.7692956 (450.84 it/sec) -training >> step=3448400, episode=575 reward=0.7704371 (454.20 it/sec) -training >> step=3448500, episode=575 reward=0.7730932 (435.48 it/sec) -training >> step=3448600, episode=575 reward=0.7504348 (492.31 it/sec) -training >> step=3448700, episode=575 reward=0.7760187 (447.14 it/sec) -training >> step=3448800, episode=575 reward=0.7511597 (469.50 it/sec) -training >> step=3448900, episode=575 reward=0.7606397 (458.05 it/sec) -training >> step=3449000, episode=575 reward=0.7784175 (504.84 it/sec) -training >> step=3449100, episode=575 reward=0.7678789 (443.29 it/sec) -training >> step=3449200, episode=575 reward=0.7678909 (456.20 it/sec) -training >> step=3449300, episode=576 reward=0.7592037 (97.35 it/sec) -training >> step=3449400, episode=576 reward=0.7596965 (475.88 it/sec) -training >> step=3449500, episode=576 reward=0.7892661 (466.65 it/sec) -training >> step=3449600, episode=576 reward=0.7944656 (446.03 it/sec) -training >> step=3449700, episode=576 reward=0.7842401 (473.41 it/sec) -training >> step=3449800, episode=576 reward=0.7947677 (455.96 it/sec) -training >> step=3449900, episode=576 reward=0.7604282 (457.91 it/sec) -training >> step=3450000, episode=576 reward=0.7730632 (470.49 it/sec) -training >> step=3450100, episode=576 reward=0.7650345 (470.16 it/sec) -training >> step=3450200, episode=576 reward=0.7561229 (493.74 it/sec) -training >> step=3450300, episode=576 reward=0.7798535 (482.33 it/sec) -training >> step=3450400, episode=576 reward=0.782723 (436.15 it/sec) -training >> step=3450500, episode=576 reward=0.7832741 (477.63 it/sec) -training >> step=3450600, episode=576 reward=0.7630588 (454.96 it/sec) -training >> step=3450700, episode=576 reward=0.7810439 (488.27 it/sec) -training >> step=3450800, episode=576 reward=0.7752173 (472.67 it/sec) -training >> step=3450900, episode=576 reward=0.795154 (459.93 it/sec) -training >> step=3451000, episode=576 reward=0.7543238 (469.89 it/sec) -training >> step=3451100, episode=576 reward=0.7711172 (456.55 it/sec) -training >> step=3451200, episode=576 reward=0.7887912 (462.13 it/sec) -training >> step=3451300, episode=576 reward=0.7747207 (472.62 it/sec) -training >> step=3451400, episode=576 reward=0.763151 (503.11 it/sec) -training >> step=3451500, episode=576 reward=0.7504948 (469.80 it/sec) -training >> step=3451600, episode=576 reward=0.7707193 (473.21 it/sec) -training >> step=3451700, episode=576 reward=0.7923969 (476.80 it/sec) -training >> step=3451800, episode=576 reward=0.7864037 (432.51 it/sec) -training >> step=3451900, episode=576 reward=0.7811591 (453.71 it/sec) -training >> step=3452000, episode=576 reward=0.7819046 (473.80 it/sec) -training >> step=3452100, episode=576 reward=0.7385292 (466.90 it/sec) -training >> step=3452200, episode=576 reward=0.753618 (474.83 it/sec) -training >> step=3452300, episode=576 reward=0.7705157 (436.32 it/sec) -training >> step=3452400, episode=576 reward=0.7566515 (487.74 it/sec) -training >> step=3452500, episode=576 reward=0.7834293 (475.57 it/sec) -training >> step=3452600, episode=576 reward=0.7876358 (396.38 it/sec) -training >> step=3452700, episode=576 reward=0.762485 (461.07 it/sec) -training >> step=3452800, episode=576 reward=0.7796003 (480.78 it/sec) -training >> step=3452900, episode=576 reward=0.792137 (451.84 it/sec) -training >> step=3453000, episode=576 reward=0.7874049 (469.10 it/sec) -training >> step=3453100, episode=576 reward=0.7566336 (459.68 it/sec) -training >> step=3453200, episode=576 reward=0.7574392 (493.28 it/sec) -training >> step=3453300, episode=576 reward=0.7324051 (465.30 it/sec) -training >> step=3453400, episode=576 reward=0.7595294 (362.00 it/sec) -training >> step=3453500, episode=576 reward=0.769829 (458.28 it/sec) -training >> step=3453600, episode=576 reward=0.7540178 (501.22 it/sec) -training >> step=3453700, episode=576 reward=0.7863817 (438.00 it/sec) -training >> step=3453800, episode=576 reward=0.769504 (477.22 it/sec) -training >> step=3453900, episode=576 reward=0.7702503 (483.15 it/sec) -training >> step=3454000, episode=576 reward=0.7780146 (475.99 it/sec) -training >> step=3454100, episode=576 reward=0.7696421 (436.44 it/sec) -training >> step=3454200, episode=576 reward=0.7622737 (469.69 it/sec) -training >> step=3454300, episode=576 reward=0.7869512 (470.70 it/sec) -training >> step=3454400, episode=576 reward=0.7681481 (459.97 it/sec) -training >> step=3454500, episode=576 reward=0.7581474 (454.49 it/sec) -training >> step=3454600, episode=576 reward=0.7711974 (493.66 it/sec) -training >> step=3454700, episode=576 reward=0.7783881 (464.26 it/sec) -training >> step=3454800, episode=576 reward=0.7713025 (476.31 it/sec) -training >> step=3454900, episode=576 reward=0.7487714 (457.93 it/sec) -training >> step=3455000, episode=576 reward=0.7691314 (465.79 it/sec) -training >> step=3455100, episode=576 reward=0.7548065 (450.49 it/sec) -training >> step=3455200, episode=576 reward=0.7777478 (458.42 it/sec) -training >> step=3455300, episode=577 reward=0.7692892 (71.09 it/sec) -training >> step=3455400, episode=577 reward=0.7810035 (486.24 it/sec) -training >> step=3455500, episode=577 reward=0.7568796 (495.01 it/sec) -training >> step=3455600, episode=577 reward=0.7625061 (459.00 it/sec) -training >> step=3455700, episode=577 reward=0.7785991 (500.57 it/sec) -training >> step=3455800, episode=577 reward=0.7823867 (499.70 it/sec) -training >> step=3455900, episode=577 reward=0.769852 (475.45 it/sec) -training >> step=3456000, episode=577 reward=0.7726774 (480.85 it/sec) -training >> step=3456100, episode=577 reward=0.7775022 (477.25 it/sec) -training >> step=3456200, episode=577 reward=0.7705975 (470.18 it/sec) -training >> step=3456300, episode=577 reward=0.7696886 (476.17 it/sec) -training >> step=3456400, episode=577 reward=0.7816304 (500.05 it/sec) -training >> step=3456500, episode=577 reward=0.7622421 (457.37 it/sec) -training >> step=3456600, episode=577 reward=0.7658268 (470.26 it/sec) -training >> step=3456700, episode=577 reward=0.7706238 (433.87 it/sec) -training >> step=3456800, episode=577 reward=0.7709689 (462.19 it/sec) -training >> step=3456900, episode=577 reward=0.7800533 (477.29 it/sec) -training >> step=3457000, episode=577 reward=0.7793836 (489.31 it/sec) -training >> step=3457100, episode=577 reward=0.7418526 (440.46 it/sec) -training >> step=3457200, episode=577 reward=0.7704993 (471.96 it/sec) -training >> step=3457300, episode=577 reward=0.7819304 (465.51 it/sec) -training >> step=3457400, episode=577 reward=0.7700127 (487.57 it/sec) -training >> step=3457500, episode=577 reward=0.7733994 (477.41 it/sec) -training >> step=3457600, episode=577 reward=0.7693648 (463.62 it/sec) -training >> step=3457700, episode=577 reward=0.7914982 (499.28 it/sec) -training >> step=3457800, episode=577 reward=0.7769394 (458.09 it/sec) -training >> step=3457900, episode=577 reward=0.7796935 (468.16 it/sec) -training >> step=3458000, episode=577 reward=0.7859223 (468.67 it/sec) -training >> step=3458100, episode=577 reward=0.7810836 (512.86 it/sec) -training >> step=3458200, episode=577 reward=0.7912554 (433.43 it/sec) -training >> step=3458300, episode=577 reward=0.7824361 (472.26 it/sec) -training >> step=3458400, episode=577 reward=0.7749501 (453.88 it/sec) -training >> step=3458500, episode=577 reward=0.7718165 (490.66 it/sec) -training >> step=3458600, episode=577 reward=0.7814902 (485.85 it/sec) -training >> step=3458700, episode=577 reward=0.7723373 (501.69 it/sec) -training >> step=3458800, episode=577 reward=0.7703235 (502.23 it/sec) -training >> step=3458900, episode=577 reward=0.760871 (439.74 it/sec) -training >> step=3459000, episode=577 reward=0.782536 (461.62 it/sec) -training >> step=3459100, episode=577 reward=0.8029447 (476.48 it/sec) -training >> step=3459200, episode=577 reward=0.7711105 (483.91 it/sec) -training >> step=3459300, episode=577 reward=0.7649844 (467.58 it/sec) -training >> step=3459400, episode=577 reward=0.7707455 (467.44 it/sec) -training >> step=3459500, episode=577 reward=0.7651661 (472.55 it/sec) -training >> step=3459600, episode=577 reward=0.7665353 (341.79 it/sec) -training >> step=3459700, episode=577 reward=0.7723438 (487.90 it/sec) -training >> step=3459800, episode=577 reward=0.7961342 (489.84 it/sec) -training >> step=3459900, episode=577 reward=0.7602159 (493.32 it/sec) -training >> step=3460000, episode=577 reward=0.761964 (471.02 it/sec) -training >> step=3460100, episode=577 reward=0.7778754 (443.80 it/sec) -training >> step=3460200, episode=577 reward=0.746309 (481.48 it/sec) -training >> step=3460300, episode=577 reward=0.7749282 (487.93 it/sec) -training >> step=3460400, episode=577 reward=0.7791777 (469.46 it/sec) -training >> step=3460500, episode=577 reward=0.7328011 (489.39 it/sec) -training >> step=3460600, episode=577 reward=0.7828778 (480.36 it/sec) -training >> step=3460700, episode=577 reward=0.7729458 (448.27 it/sec) -training >> step=3460800, episode=577 reward=0.777805 (466.47 it/sec) -training >> step=3460900, episode=577 reward=0.7787711 (458.38 it/sec) -training >> step=3461000, episode=577 reward=0.7552496 (472.28 it/sec) -training >> step=3461100, episode=577 reward=0.7666947 (470.78 it/sec) -training >> step=3461200, episode=577 reward=0.7785631 (424.27 it/sec) -training >> step=3461300, episode=578 reward=0.7819384 (69.56 it/sec) -training >> step=3461400, episode=578 reward=0.7630024 (501.63 it/sec) -training >> step=3461500, episode=578 reward=0.7934754 (508.05 it/sec) -training >> step=3461600, episode=578 reward=0.779433 (534.83 it/sec) -training >> step=3461700, episode=578 reward=0.769095 (495.88 it/sec) -training >> step=3461800, episode=578 reward=0.7786567 (492.33 it/sec) -training >> step=3461900, episode=578 reward=0.7998326 (478.04 it/sec) -training >> step=3462000, episode=578 reward=0.7646659 (503.63 it/sec) -training >> step=3462100, episode=578 reward=0.7438394 (495.91 it/sec) -training >> step=3462200, episode=578 reward=0.7782532 (506.78 it/sec) -training >> step=3462300, episode=578 reward=0.7736442 (452.86 it/sec) -training >> step=3462400, episode=578 reward=0.7666969 (483.89 it/sec) -training >> step=3462500, episode=578 reward=0.7734499 (460.27 it/sec) -training >> step=3462600, episode=578 reward=0.7762737 (468.68 it/sec) -training >> step=3462700, episode=578 reward=0.7850174 (475.79 it/sec) -training >> step=3462800, episode=578 reward=0.7700756 (483.41 it/sec) -training >> step=3462900, episode=578 reward=0.7828089 (492.51 it/sec) -training >> step=3463000, episode=578 reward=0.7696806 (443.60 it/sec) -training >> step=3463100, episode=578 reward=0.7740485 (481.41 it/sec) -training >> step=3463200, episode=578 reward=0.7770074 (497.73 it/sec) -training >> step=3463300, episode=578 reward=0.7745168 (505.40 it/sec) -training >> step=3463400, episode=578 reward=0.7772684 (476.90 it/sec) -training >> step=3463500, episode=578 reward=0.7662919 (471.60 it/sec) -training >> step=3463600, episode=578 reward=0.7681001 (458.74 it/sec) -training >> step=3463700, episode=578 reward=0.7698756 (464.29 it/sec) -training >> step=3463800, episode=578 reward=0.7858035 (488.26 it/sec) -training >> step=3463900, episode=578 reward=0.7684597 (474.47 it/sec) -training >> step=3464000, episode=578 reward=0.7604654 (490.84 it/sec) -training >> step=3464100, episode=578 reward=0.7684368 (466.41 it/sec) -training >> step=3464200, episode=578 reward=0.7627472 (434.52 it/sec) -training >> step=3464300, episode=578 reward=0.778824 (518.87 it/sec) -training >> step=3464400, episode=578 reward=0.769706 (486.39 it/sec) -training >> step=3464500, episode=578 reward=0.7687215 (471.39 it/sec) -training >> step=3464600, episode=578 reward=0.7825149 (452.15 it/sec) -training >> step=3464700, episode=578 reward=0.7939643 (491.29 it/sec) -training >> step=3464800, episode=578 reward=0.7809084 (457.00 it/sec) -training >> step=3464900, episode=578 reward=0.8007178 (480.06 it/sec) -training >> step=3465000, episode=578 reward=0.7649288 (494.48 it/sec) -training >> step=3465100, episode=578 reward=0.753507 (481.33 it/sec) -training >> step=3465200, episode=578 reward=0.7788327 (473.81 it/sec) -training >> step=3465300, episode=578 reward=0.7731785 (484.01 it/sec) -training >> step=3465400, episode=578 reward=0.7951511 (497.26 it/sec) -training >> step=3465500, episode=578 reward=0.7704895 (499.90 it/sec) -training >> step=3465600, episode=578 reward=0.7607103 (449.39 it/sec) -training >> step=3465700, episode=578 reward=0.7600802 (456.81 it/sec) -training >> step=3465800, episode=578 reward=0.7773443 (497.13 it/sec) -training >> step=3465900, episode=578 reward=0.7475281 (458.63 it/sec) -training >> step=3466000, episode=578 reward=0.7807437 (385.08 it/sec) -training >> step=3466100, episode=578 reward=0.7664722 (503.28 it/sec) -training >> step=3466200, episode=578 reward=0.7662888 (469.07 it/sec) -training >> step=3466300, episode=578 reward=0.7706581 (475.21 it/sec) -training >> step=3466400, episode=578 reward=0.7892364 (475.08 it/sec) -training >> step=3466500, episode=578 reward=0.7780622 (472.03 it/sec) -training >> step=3466600, episode=578 reward=0.7594351 (477.25 it/sec) -training >> step=3466700, episode=578 reward=0.7823248 (483.54 it/sec) -training >> step=3466800, episode=578 reward=0.7870387 (484.41 it/sec) -training >> step=3466900, episode=578 reward=0.7602863 (516.36 it/sec) -training >> step=3467000, episode=578 reward=0.7724125 (452.06 it/sec) -training >> step=3467100, episode=578 reward=0.7677422 (473.35 it/sec) -training >> step=3467200, episode=578 reward=0.7724023 (510.37 it/sec) -training >> step=3467300, episode=579 reward=0.7705131 (73.43 it/sec) -training >> step=3467400, episode=579 reward=0.7906002 (492.49 it/sec) -training >> step=3467500, episode=579 reward=0.7669005 (513.08 it/sec) -training >> step=3467600, episode=579 reward=0.745744 (488.32 it/sec) -training >> step=3467700, episode=579 reward=0.7800191 (496.98 it/sec) -training >> step=3467800, episode=579 reward=0.7535087 (439.63 it/sec) -training >> step=3467900, episode=579 reward=0.7772888 (506.23 it/sec) -training >> step=3468000, episode=579 reward=0.7858811 (505.46 it/sec) -training >> step=3468100, episode=579 reward=0.7661307 (486.58 it/sec) -training >> step=3468200, episode=579 reward=0.7722988 (471.01 it/sec) -training >> step=3468300, episode=579 reward=0.771643 (480.41 it/sec) -training >> step=3468400, episode=579 reward=0.7816511 (514.58 it/sec) -training >> step=3468500, episode=579 reward=0.7710344 (457.06 it/sec) -training >> step=3468600, episode=579 reward=0.7889592 (486.93 it/sec) -training >> step=3468700, episode=579 reward=0.7851531 (489.51 it/sec) -training >> step=3468800, episode=579 reward=0.7762246 (478.01 it/sec) -training >> step=3468900, episode=579 reward=0.7458484 (497.11 it/sec) -training >> step=3469000, episode=579 reward=0.7575006 (506.97 it/sec) -training >> step=3469100, episode=579 reward=0.7677962 (455.08 it/sec) -training >> step=3469200, episode=579 reward=0.7616966 (472.67 it/sec) -training >> step=3469300, episode=579 reward=0.7745367 (475.39 it/sec) -training >> step=3469400, episode=579 reward=0.7784882 (497.19 it/sec) -training >> step=3469500, episode=579 reward=0.7574305 (444.93 it/sec) -training >> step=3469600, episode=579 reward=0.7646101 (492.56 it/sec) -training >> step=3469700, episode=579 reward=0.7792926 (468.78 it/sec) -training >> step=3469800, episode=579 reward=0.7815748 (428.36 it/sec) -training >> step=3469900, episode=579 reward=0.771238 (439.31 it/sec) -training >> step=3470000, episode=579 reward=0.7694071 (459.77 it/sec) -training >> step=3470100, episode=579 reward=0.7735667 (505.93 it/sec) -training >> step=3470200, episode=579 reward=0.7879303 (475.40 it/sec) -training >> step=3470300, episode=579 reward=0.7610396 (455.77 it/sec) -training >> step=3470400, episode=579 reward=0.7745923 (473.78 it/sec) -training >> step=3470500, episode=579 reward=0.7687329 (446.46 it/sec) -training >> step=3470600, episode=579 reward=0.7901388 (481.84 it/sec) -training >> step=3470700, episode=579 reward=0.7878276 (479.49 it/sec) -training >> step=3470800, episode=579 reward=0.7770732 (501.10 it/sec) -training >> step=3470900, episode=579 reward=0.7786911 (459.16 it/sec) -training >> step=3471000, episode=579 reward=0.769529 (398.94 it/sec) -training >> step=3471100, episode=579 reward=0.7656547 (475.12 it/sec) -training >> step=3471200, episode=579 reward=0.7728922 (480.76 it/sec) -training >> step=3471300, episode=579 reward=0.7619718 (489.07 it/sec) -training >> step=3471400, episode=579 reward=0.7852896 (483.18 it/sec) -training >> step=3471500, episode=579 reward=0.7714111 (463.93 it/sec) -training >> step=3471600, episode=579 reward=0.7668428 (460.13 it/sec) -training >> step=3471700, episode=579 reward=0.7836944 (459.89 it/sec) -training >> step=3471800, episode=579 reward=0.7641831 (510.04 it/sec) -training >> step=3471900, episode=579 reward=0.7851779 (475.21 it/sec) -training >> step=3472000, episode=579 reward=0.7756904 (349.59 it/sec) -training >> step=3472100, episode=579 reward=0.7616438 (504.38 it/sec) -training >> step=3472200, episode=579 reward=0.7751972 (483.15 it/sec) -training >> step=3472300, episode=579 reward=0.7733625 (458.93 it/sec) -training >> step=3472400, episode=579 reward=0.7564069 (496.49 it/sec) -training >> step=3472500, episode=579 reward=0.7684449 (457.08 it/sec) -training >> step=3472600, episode=579 reward=0.7715392 (485.11 it/sec) -training >> step=3472700, episode=579 reward=0.7889429 (484.31 it/sec) -training >> step=3472800, episode=579 reward=0.7570903 (466.87 it/sec) -training >> step=3472900, episode=579 reward=0.7857497 (458.68 it/sec) -training >> step=3473000, episode=579 reward=0.7855926 (473.58 it/sec) -training >> step=3473100, episode=579 reward=0.7532427 (488.75 it/sec) -training >> step=3473200, episode=579 reward=0.7687699 (490.83 it/sec) -training >> step=3473300, episode=580 reward=0.7783563 (71.79 it/sec) -training >> step=3473400, episode=580 reward=0.7482558 (445.43 it/sec) -training >> step=3473500, episode=580 reward=0.7672892 (534.47 it/sec) -training >> step=3473600, episode=580 reward=0.7515507 (498.15 it/sec) -training >> step=3473700, episode=580 reward=0.7676282 (491.32 it/sec) -training >> step=3473800, episode=580 reward=0.7828321 (472.51 it/sec) -training >> step=3473900, episode=580 reward=0.8080286 (479.55 it/sec) -training >> step=3474000, episode=580 reward=0.7776757 (457.20 it/sec) -training >> step=3474100, episode=580 reward=0.7901818 (462.27 it/sec) -training >> step=3474200, episode=580 reward=0.785311 (467.83 it/sec) -training >> step=3474300, episode=580 reward=0.7891979 (479.96 it/sec) -training >> step=3474400, episode=580 reward=0.7504273 (429.72 it/sec) -training >> step=3474500, episode=580 reward=0.7706057 (488.97 it/sec) -training >> step=3474600, episode=580 reward=0.7629971 (485.09 it/sec) -training >> step=3474700, episode=580 reward=0.7710223 (474.63 it/sec) -training >> step=3474800, episode=580 reward=0.7707852 (454.05 it/sec) -training >> step=3474900, episode=580 reward=0.8043656 (493.65 it/sec) -training >> step=3475000, episode=580 reward=0.7769971 (464.46 it/sec) -training >> step=3475100, episode=580 reward=0.7559761 (468.17 it/sec) -training >> step=3475200, episode=580 reward=0.7663124 (449.83 it/sec) -training >> step=3475300, episode=580 reward=0.7738098 (454.77 it/sec) -training >> step=3475400, episode=580 reward=0.7649905 (415.21 it/sec) -training >> step=3475500, episode=580 reward=0.7666275 (461.26 it/sec) -training >> step=3475600, episode=580 reward=0.7783472 (467.28 it/sec) -training >> step=3475700, episode=580 reward=0.7635173 (479.25 it/sec) -training >> step=3475800, episode=580 reward=0.7842121 (463.10 it/sec) -training >> step=3475900, episode=580 reward=0.7776982 (521.16 it/sec) -training >> step=3476000, episode=580 reward=0.7658697 (447.98 it/sec) -training >> step=3476100, episode=580 reward=0.7942023 (445.27 it/sec) -training >> step=3476200, episode=580 reward=0.7720463 (472.15 it/sec) -training >> step=3476300, episode=580 reward=0.745531 (497.17 it/sec) -training >> step=3476400, episode=580 reward=0.7709854 (472.56 it/sec) -training >> step=3476500, episode=580 reward=0.7789259 (465.95 it/sec) -training >> step=3476600, episode=580 reward=0.7739791 (444.67 it/sec) -training >> step=3476700, episode=580 reward=0.7724572 (463.74 it/sec) -training >> step=3476800, episode=580 reward=0.7851008 (453.05 it/sec) -training >> step=3476900, episode=580 reward=0.7918813 (485.74 it/sec) -training >> step=3477000, episode=580 reward=0.7807558 (508.23 it/sec) -training >> step=3477100, episode=580 reward=0.781901 (468.64 it/sec) -training >> step=3477200, episode=580 reward=0.7439076 (445.77 it/sec) -training >> step=3477300, episode=580 reward=0.772192 (474.05 it/sec) -training >> step=3477400, episode=580 reward=0.7587563 (480.20 it/sec) -training >> step=3477500, episode=580 reward=0.7884263 (495.25 it/sec) -training >> step=3477600, episode=580 reward=0.7544501 (458.53 it/sec) -training >> step=3477700, episode=580 reward=0.7713829 (475.42 it/sec) -training >> step=3477800, episode=580 reward=0.7787049 (448.51 it/sec) -training >> step=3477900, episode=580 reward=0.7685035 (448.23 it/sec) -training >> step=3478000, episode=580 reward=0.7760867 (504.28 it/sec) -training >> step=3478100, episode=580 reward=0.776032 (460.42 it/sec) -training >> step=3478200, episode=580 reward=0.7748734 (474.24 it/sec) -training >> step=3478300, episode=580 reward=0.7914859 (341.15 it/sec) -training >> step=3478400, episode=580 reward=0.7696756 (474.53 it/sec) -training >> step=3478500, episode=580 reward=0.7781521 (431.49 it/sec) -training >> step=3478600, episode=580 reward=0.7673129 (477.18 it/sec) -training >> step=3478700, episode=580 reward=0.7685404 (437.27 it/sec) -training >> step=3478800, episode=580 reward=0.77306 (465.15 it/sec) -training >> step=3478900, episode=580 reward=0.7543662 (469.49 it/sec) -training >> step=3479000, episode=580 reward=0.7622436 (434.35 it/sec) -training >> step=3479100, episode=580 reward=0.7657497 (512.80 it/sec) -training >> step=3479200, episode=580 reward=0.7604309 (493.12 it/sec) -training >> step=3479300, episode=581 reward=0.782885 (93.58 it/sec) -training >> step=3479400, episode=581 reward=0.7809063 (482.12 it/sec) -training >> step=3479500, episode=581 reward=0.7586082 (464.74 it/sec) -training >> step=3479600, episode=581 reward=0.7760014 (526.49 it/sec) -training >> step=3479700, episode=581 reward=0.7585554 (393.20 it/sec) -training >> step=3479800, episode=581 reward=0.7752752 (463.11 it/sec) -training >> step=3479900, episode=581 reward=0.7822356 (492.55 it/sec) -training >> step=3480000, episode=581 reward=0.7820873 (481.69 it/sec) -training >> step=3480100, episode=581 reward=0.7770953 (462.35 it/sec) -training >> step=3480200, episode=581 reward=0.7830961 (448.99 it/sec) -training >> step=3480300, episode=581 reward=0.7837604 (499.98 it/sec) -training >> step=3480400, episode=581 reward=0.7580581 (484.09 it/sec) -training >> step=3480500, episode=581 reward=0.7723136 (447.55 it/sec) -training >> step=3480600, episode=581 reward=0.7583648 (458.42 it/sec) -training >> step=3480700, episode=581 reward=0.7628787 (497.02 it/sec) -training >> step=3480800, episode=581 reward=0.7584496 (435.31 it/sec) -training >> step=3480900, episode=581 reward=0.7686461 (453.32 it/sec) -training >> step=3481000, episode=581 reward=0.7733996 (490.75 it/sec) -training >> step=3481100, episode=581 reward=0.7958572 (510.97 it/sec) -training >> step=3481200, episode=581 reward=0.7714121 (454.96 it/sec) -training >> step=3481300, episode=581 reward=0.7932523 (449.80 it/sec) -training >> step=3481400, episode=581 reward=0.793887 (494.87 it/sec) -training >> step=3481500, episode=581 reward=0.7884501 (457.44 it/sec) -training >> step=3481600, episode=581 reward=0.7779084 (477.61 it/sec) -training >> step=3481700, episode=581 reward=0.7553396 (484.09 it/sec) -training >> step=3481800, episode=581 reward=0.7785244 (500.10 it/sec) -training >> step=3481900, episode=581 reward=0.7518567 (463.57 it/sec) -training >> step=3482000, episode=581 reward=0.7911839 (476.60 it/sec) -training >> step=3482100, episode=581 reward=0.7695158 (495.83 it/sec) -training >> step=3482200, episode=581 reward=0.7906725 (473.81 it/sec) -training >> step=3482300, episode=581 reward=0.7648745 (477.01 it/sec) -training >> step=3482400, episode=581 reward=0.787008 (470.86 it/sec) -training >> step=3482500, episode=581 reward=0.7717116 (474.62 it/sec) -training >> step=3482600, episode=581 reward=0.7855705 (441.51 it/sec) -training >> step=3482700, episode=581 reward=0.7654689 (479.47 it/sec) -training >> step=3482800, episode=581 reward=0.7696714 (484.88 it/sec) -training >> step=3482900, episode=581 reward=0.7755346 (459.16 it/sec) -training >> step=3483000, episode=581 reward=0.7750009 (468.21 it/sec) -training >> step=3483100, episode=581 reward=0.7890114 (445.51 it/sec) -training >> step=3483200, episode=581 reward=0.7764773 (499.81 it/sec) -training >> step=3483300, episode=581 reward=0.7756011 (455.34 it/sec) -training >> step=3483400, episode=581 reward=0.7814276 (470.67 it/sec) -training >> step=3483500, episode=581 reward=0.7913845 (465.94 it/sec) -training >> step=3483600, episode=581 reward=0.7788732 (472.45 it/sec) -training >> step=3483700, episode=581 reward=0.7801788 (462.29 it/sec) -training >> step=3483800, episode=581 reward=0.7913685 (444.08 it/sec) -training >> step=3483900, episode=581 reward=0.7437449 (495.03 it/sec) -training >> step=3484000, episode=581 reward=0.7767437 (482.66 it/sec) -training >> step=3484100, episode=581 reward=0.7597087 (477.32 it/sec) -training >> step=3484200, episode=581 reward=0.7611994 (474.76 it/sec) -training >> step=3484300, episode=581 reward=0.7714901 (486.35 it/sec) -training >> step=3484400, episode=581 reward=0.7542821 (475.36 it/sec) -training >> step=3484500, episode=581 reward=0.7945341 (464.57 it/sec) -training >> step=3484600, episode=581 reward=0.7603546 (337.33 it/sec) -training >> step=3484700, episode=581 reward=0.7726747 (478.91 it/sec) -training >> step=3484800, episode=581 reward=0.7481852 (456.38 it/sec) -training >> step=3484900, episode=581 reward=0.7727174 (443.92 it/sec) -training >> step=3485000, episode=581 reward=0.7770115 (503.52 it/sec) -training >> step=3485100, episode=581 reward=0.7781037 (444.57 it/sec) -training >> step=3485200, episode=581 reward=0.7687931 (471.47 it/sec) -training >> step=3485300, episode=582 reward=0.7857123 (71.08 it/sec) -training >> step=3485400, episode=582 reward=0.749826 (505.34 it/sec) -training >> step=3485500, episode=582 reward=0.7670808 (523.91 it/sec) -training >> step=3485600, episode=582 reward=0.77225 (514.58 it/sec) -training >> step=3485700, episode=582 reward=0.7602677 (490.60 it/sec) -training >> step=3485800, episode=582 reward=0.7665906 (471.98 it/sec) -training >> step=3485900, episode=582 reward=0.7571779 (515.04 it/sec) -training >> step=3486000, episode=582 reward=0.7606939 (514.64 it/sec) -training >> step=3486100, episode=582 reward=0.7868932 (483.98 it/sec) -training >> step=3486200, episode=582 reward=0.7727817 (500.55 it/sec) -training >> step=3486300, episode=582 reward=0.7822077 (435.14 it/sec) -training >> step=3486400, episode=582 reward=0.7625396 (494.51 it/sec) -training >> step=3486500, episode=582 reward=0.7721539 (419.77 it/sec) -training >> step=3486600, episode=582 reward=0.7701929 (478.96 it/sec) -training >> step=3486700, episode=582 reward=0.7891594 (477.03 it/sec) -training >> step=3486800, episode=582 reward=0.7638456 (474.15 it/sec) -training >> step=3486900, episode=582 reward=0.7710735 (468.41 it/sec) -training >> step=3487000, episode=582 reward=0.7557215 (451.75 it/sec) -training >> step=3487100, episode=582 reward=0.7895883 (469.27 it/sec) -training >> step=3487200, episode=582 reward=0.7826549 (469.80 it/sec) -training >> step=3487300, episode=582 reward=0.7488017 (473.55 it/sec) -training >> step=3487400, episode=582 reward=0.7840303 (462.84 it/sec) -training >> step=3487500, episode=582 reward=0.796944 (462.30 it/sec) -training >> step=3487600, episode=582 reward=0.7922059 (434.80 it/sec) -training >> step=3487700, episode=582 reward=0.7554882 (462.60 it/sec) -training >> step=3487800, episode=582 reward=0.7757583 (503.41 it/sec) -training >> step=3487900, episode=582 reward=0.7665063 (490.74 it/sec) -training >> step=3488000, episode=582 reward=0.7603781 (466.49 it/sec) -training >> step=3488100, episode=582 reward=0.7863047 (455.96 it/sec) -training >> step=3488200, episode=582 reward=0.7859125 (477.15 it/sec) -training >> step=3488300, episode=582 reward=0.7833902 (487.78 it/sec) -training >> step=3488400, episode=582 reward=0.7724398 (469.66 it/sec) -training >> step=3488500, episode=582 reward=0.7509642 (468.20 it/sec) -training >> step=3488600, episode=582 reward=0.7751855 (459.92 it/sec) -training >> step=3488700, episode=582 reward=0.7704403 (476.17 it/sec) -training >> step=3488800, episode=582 reward=0.7807734 (442.28 it/sec) -training >> step=3488900, episode=582 reward=0.803171 (496.05 it/sec) -training >> step=3489000, episode=582 reward=0.7811888 (439.52 it/sec) -training >> step=3489100, episode=582 reward=0.7690303 (449.56 it/sec) -training >> step=3489200, episode=582 reward=0.7635127 (458.17 it/sec) -training >> step=3489300, episode=582 reward=0.773563 (489.72 it/sec) -training >> step=3489400, episode=582 reward=0.7663959 (442.26 it/sec) -training >> step=3489500, episode=582 reward=0.7788382 (469.10 it/sec) -training >> step=3489600, episode=582 reward=0.765708 (478.88 it/sec) -training >> step=3489700, episode=582 reward=0.7783295 (502.66 it/sec) -training >> step=3489800, episode=582 reward=0.7662696 (470.20 it/sec) -training >> step=3489900, episode=582 reward=0.7544003 (457.49 it/sec) -training >> step=3490000, episode=582 reward=0.7621757 (488.40 it/sec) -training >> step=3490100, episode=582 reward=0.7664446 (466.19 it/sec) -training >> step=3490200, episode=582 reward=0.7779686 (475.43 it/sec) -training >> step=3490300, episode=582 reward=0.7463518 (463.41 it/sec) -training >> step=3490400, episode=582 reward=0.7868496 (487.52 it/sec) -training >> step=3490500, episode=582 reward=0.7899452 (483.15 it/sec) -training >> step=3490600, episode=582 reward=0.7578351 (464.53 it/sec) -training >> step=3490700, episode=582 reward=0.7608488 (473.95 it/sec) -training >> step=3490800, episode=582 reward=0.7923173 (459.92 it/sec) -training >> step=3490900, episode=582 reward=0.7947778 (344.57 it/sec) -training >> step=3491000, episode=582 reward=0.7841623 (476.66 it/sec) -training >> step=3491100, episode=582 reward=0.7613551 (486.90 it/sec) -training >> step=3491200, episode=582 reward=0.7561619 (494.27 it/sec) -training >> step=3491300, episode=583 reward=0.7767892 (74.36 it/sec) -training >> step=3491400, episode=583 reward=0.7809547 (497.09 it/sec) -training >> step=3491500, episode=583 reward=0.7653778 (504.83 it/sec) -training >> step=3491600, episode=583 reward=0.7751721 (492.14 it/sec) -training >> step=3491700, episode=583 reward=0.7607009 (512.64 it/sec) -training >> step=3491800, episode=583 reward=0.7734443 (512.13 it/sec) -training >> step=3491900, episode=583 reward=0.77784 (478.23 it/sec) -training >> step=3492000, episode=583 reward=0.7535891 (467.82 it/sec) -training >> step=3492100, episode=583 reward=0.7771947 (483.18 it/sec) -training >> step=3492200, episode=583 reward=0.7699966 (467.85 it/sec) -training >> step=3492300, episode=583 reward=0.7776096 (485.57 it/sec) -training >> step=3492400, episode=583 reward=0.7800592 (509.69 it/sec) -training >> step=3492500, episode=583 reward=0.7702172 (437.25 it/sec) -training >> step=3492600, episode=583 reward=0.7753034 (466.30 it/sec) -training >> step=3492700, episode=583 reward=0.7782671 (462.46 it/sec) -training >> step=3492800, episode=583 reward=0.7667667 (452.69 it/sec) -training >> step=3492900, episode=583 reward=0.7751247 (459.04 it/sec) -training >> step=3493000, episode=583 reward=0.7751473 (454.62 it/sec) -training >> step=3493100, episode=583 reward=0.7773808 (453.23 it/sec) -training >> step=3493200, episode=583 reward=0.7600731 (443.24 it/sec) -training >> step=3493300, episode=583 reward=0.7768939 (473.56 it/sec) -training >> step=3493400, episode=583 reward=0.7760368 (488.63 it/sec) -training >> step=3493500, episode=583 reward=0.7654901 (485.79 it/sec) -training >> step=3493600, episode=583 reward=0.7661114 (481.74 it/sec) -training >> step=3493700, episode=583 reward=0.7848051 (461.48 it/sec) -training >> step=3493800, episode=583 reward=0.765594 (409.37 it/sec) -training >> step=3493900, episode=583 reward=0.7941452 (466.73 it/sec) -training >> step=3494000, episode=583 reward=0.7714032 (480.65 it/sec) -training >> step=3494100, episode=583 reward=0.7719113 (493.90 it/sec) -training >> step=3494200, episode=583 reward=0.7631441 (471.84 it/sec) -training >> step=3494300, episode=583 reward=0.7553135 (452.86 it/sec) -training >> step=3494400, episode=583 reward=0.75178 (469.06 it/sec) -training >> step=3494500, episode=583 reward=0.792531 (454.78 it/sec) -training >> step=3494600, episode=583 reward=0.7850336 (443.30 it/sec) -training >> step=3494700, episode=583 reward=0.7799774 (495.48 it/sec) -training >> step=3494800, episode=583 reward=0.7564309 (510.42 it/sec) -training >> step=3494900, episode=583 reward=0.7844428 (448.11 it/sec) -training >> step=3495000, episode=583 reward=0.7725855 (477.67 it/sec) -training >> step=3495100, episode=583 reward=0.7661857 (461.87 it/sec) -training >> step=3495200, episode=583 reward=0.7797423 (512.73 it/sec) -training >> step=3495300, episode=583 reward=0.7954342 (475.03 it/sec) -training >> step=3495400, episode=583 reward=0.7861124 (467.93 it/sec) -training >> step=3495500, episode=583 reward=0.7603063 (475.04 it/sec) -training >> step=3495600, episode=583 reward=0.7583522 (455.16 it/sec) -training >> step=3495700, episode=583 reward=0.7852343 (479.02 it/sec) -training >> step=3495800, episode=583 reward=0.7745629 (479.61 it/sec) -training >> step=3495900, episode=583 reward=0.775021 (511.81 it/sec) -training >> step=3496000, episode=583 reward=0.7822523 (468.44 it/sec) -training >> step=3496100, episode=583 reward=0.7822581 (430.62 it/sec) -training >> step=3496200, episode=583 reward=0.7806293 (470.70 it/sec) -training >> step=3496300, episode=583 reward=0.7703785 (476.98 it/sec) -training >> step=3496400, episode=583 reward=0.7663285 (483.07 it/sec) -training >> step=3496500, episode=583 reward=0.7376192 (473.46 it/sec) -training >> step=3496600, episode=583 reward=0.7869014 (477.66 it/sec) -training >> step=3496700, episode=583 reward=0.7898116 (446.04 it/sec) -training >> step=3496800, episode=583 reward=0.7650492 (456.60 it/sec) -training >> step=3496900, episode=583 reward=0.7797915 (461.93 it/sec) -training >> step=3497000, episode=583 reward=0.7839187 (362.36 it/sec) -training >> step=3497100, episode=583 reward=0.7665527 (474.75 it/sec) -training >> step=3497200, episode=583 reward=0.7867313 (464.11 it/sec) -training >> step=3497300, episode=584 reward=0.7877145 (85.42 it/sec) -training >> step=3497400, episode=584 reward=0.772188 (510.79 it/sec) -training >> step=3497500, episode=584 reward=0.7625974 (510.04 it/sec) -training >> step=3497600, episode=584 reward=0.7460325 (527.94 it/sec) -training >> step=3497700, episode=584 reward=0.7782588 (520.68 it/sec) -training >> step=3497800, episode=584 reward=0.7621852 (489.23 it/sec) -training >> step=3497900, episode=584 reward=0.7788479 (499.54 it/sec) -training >> step=3498000, episode=584 reward=0.7710892 (509.14 it/sec) -training >> step=3498100, episode=584 reward=0.7848122 (511.06 it/sec) -training >> step=3498200, episode=584 reward=0.7735083 (460.88 it/sec) -training >> step=3498300, episode=584 reward=0.7569197 (482.25 it/sec) -training >> step=3498400, episode=584 reward=0.7615982 (423.51 it/sec) -training >> step=3498500, episode=584 reward=0.7798302 (460.49 it/sec) -training >> step=3498600, episode=584 reward=0.7623997 (482.44 it/sec) -training >> step=3498700, episode=584 reward=0.7767473 (475.17 it/sec) -training >> step=3498800, episode=584 reward=0.7842932 (488.24 it/sec) -training >> step=3498900, episode=584 reward=0.7895377 (472.48 it/sec) -training >> step=3499000, episode=584 reward=0.7803106 (509.53 it/sec) -training >> step=3499100, episode=584 reward=0.7735338 (446.48 it/sec) -training >> step=3499200, episode=584 reward=0.7693542 (501.03 it/sec) -training >> step=3499300, episode=584 reward=0.763485 (459.37 it/sec) -training >> step=3499400, episode=584 reward=0.7869267 (502.55 it/sec) -training >> step=3499500, episode=584 reward=0.7634579 (442.15 it/sec) -training >> step=3499600, episode=584 reward=0.7998132 (469.78 it/sec) -training >> step=3499700, episode=584 reward=0.7693525 (391.31 it/sec) -training >> step=3499800, episode=584 reward=0.7803946 (419.26 it/sec) -training >> step=3499900, episode=584 reward=0.772065 (490.47 it/sec) -training >> step=3500000, episode=584 reward=0.7726286 (484.72 it/sec) -training >> step=3500100, episode=584 reward=0.7564784 (454.91 it/sec) -training >> step=3500200, episode=584 reward=0.7613441 (548.03 it/sec) -training >> step=3500300, episode=584 reward=0.7705598 (476.13 it/sec) -training >> step=3500400, episode=584 reward=0.7596838 (472.99 it/sec) -training >> step=3500500, episode=584 reward=0.760187 (468.92 it/sec) -training >> step=3500600, episode=584 reward=0.7464125 (508.78 it/sec) -training >> step=3500700, episode=584 reward=0.7800655 (494.61 it/sec) -training >> step=3500800, episode=584 reward=0.7702335 (498.55 it/sec) -training >> step=3500900, episode=584 reward=0.7727908 (485.70 it/sec) -training >> step=3501000, episode=584 reward=0.7512432 (546.69 it/sec) -training >> step=3501100, episode=584 reward=0.7713804 (500.75 it/sec) -training >> step=3501200, episode=584 reward=0.779651 (487.90 it/sec) -training >> step=3501300, episode=584 reward=0.7701575 (521.68 it/sec) -training >> step=3501400, episode=584 reward=0.7663887 (525.60 it/sec) -training >> step=3501500, episode=584 reward=0.7756506 (485.68 it/sec) -training >> step=3501600, episode=584 reward=0.7834962 (472.47 it/sec) -training >> step=3501700, episode=584 reward=0.7825892 (485.65 it/sec) -training >> step=3501800, episode=584 reward=0.7784608 (503.36 it/sec) -training >> step=3501900, episode=584 reward=0.7716045 (508.62 it/sec) -training >> step=3502000, episode=584 reward=0.7697809 (465.05 it/sec) -training >> step=3502100, episode=584 reward=0.7667258 (517.95 it/sec) -training >> step=3502200, episode=584 reward=0.7767518 (474.77 it/sec) -training >> step=3502300, episode=584 reward=0.742112 (502.77 it/sec) -training >> step=3502400, episode=584 reward=0.7708449 (515.33 it/sec) -training >> step=3502500, episode=584 reward=0.7474177 (479.05 it/sec) -training >> step=3502600, episode=584 reward=0.7717541 (513.61 it/sec) -training >> step=3502700, episode=584 reward=0.7771224 (497.30 it/sec) -training >> step=3502800, episode=584 reward=0.7571421 (506.76 it/sec) -training >> step=3502900, episode=584 reward=0.7603141 (494.12 it/sec) -training >> step=3503000, episode=584 reward=0.7716033 (471.19 it/sec) -training >> step=3503100, episode=584 reward=0.7531059 (439.70 it/sec) -training >> step=3503200, episode=584 reward=0.7685119 (360.12 it/sec) -training >> step=3503300, episode=585 reward=0.762944 (94.77 it/sec) -training >> step=3503400, episode=585 reward=0.7467241 (490.50 it/sec) -training >> step=3503500, episode=585 reward=0.7659783 (491.68 it/sec) -training >> step=3503600, episode=585 reward=0.7696051 (461.85 it/sec) -training >> step=3503700, episode=585 reward=0.7751014 (476.12 it/sec) -training >> step=3503800, episode=585 reward=0.7840404 (507.91 it/sec) -training >> step=3503900, episode=585 reward=0.7693198 (524.05 it/sec) -training >> step=3504000, episode=585 reward=0.7897499 (492.87 it/sec) -training >> step=3504100, episode=585 reward=0.7546493 (458.02 it/sec) -training >> step=3504200, episode=585 reward=0.7834511 (501.01 it/sec) -training >> step=3504300, episode=585 reward=0.7817326 (458.45 it/sec) -training >> step=3504400, episode=585 reward=0.7752888 (522.83 it/sec) -training >> step=3504500, episode=585 reward=0.764383 (524.56 it/sec) -training >> step=3504600, episode=585 reward=0.7511764 (470.19 it/sec) -training >> step=3504700, episode=585 reward=0.7736807 (440.52 it/sec) -training >> step=3504800, episode=585 reward=0.7653397 (503.03 it/sec) -training >> step=3504900, episode=585 reward=0.7717496 (452.46 it/sec) -training >> step=3505000, episode=585 reward=0.7586348 (456.19 it/sec) -training >> step=3505100, episode=585 reward=0.7754508 (439.13 it/sec) -training >> step=3505200, episode=585 reward=0.7588951 (522.21 it/sec) -training >> step=3505300, episode=585 reward=0.7866029 (452.58 it/sec) -training >> step=3505400, episode=585 reward=0.7692649 (501.09 it/sec) -training >> step=3505500, episode=585 reward=0.7773436 (498.33 it/sec) -training >> step=3505600, episode=585 reward=0.7722056 (476.98 it/sec) -training >> step=3505700, episode=585 reward=0.7861163 (512.68 it/sec) -training >> step=3505800, episode=585 reward=0.7808741 (524.86 it/sec) -training >> step=3505900, episode=585 reward=0.7802752 (555.93 it/sec) -training >> step=3506000, episode=585 reward=0.7808424 (541.34 it/sec) -training >> step=3506100, episode=585 reward=0.7672791 (488.66 it/sec) -training >> step=3506200, episode=585 reward=0.8022186 (491.39 it/sec) -training >> step=3506300, episode=585 reward=0.7807821 (494.76 it/sec) -training >> step=3506400, episode=585 reward=0.7767231 (439.34 it/sec) -training >> step=3506500, episode=585 reward=0.7723988 (514.96 it/sec) -training >> step=3506600, episode=585 reward=0.7699737 (561.16 it/sec) -training >> step=3506700, episode=585 reward=0.7872 (493.73 it/sec) -training >> step=3506800, episode=585 reward=0.7651322 (517.75 it/sec) -training >> step=3506900, episode=585 reward=0.7648993 (501.50 it/sec) -training >> step=3507000, episode=585 reward=0.8058411 (519.38 it/sec) -training >> step=3507100, episode=585 reward=0.7726751 (538.65 it/sec) -training >> step=3507200, episode=585 reward=0.7698658 (505.00 it/sec) -training >> step=3507300, episode=585 reward=0.76277 (526.22 it/sec) -training >> step=3507400, episode=585 reward=0.7908849 (513.69 it/sec) -training >> step=3507500, episode=585 reward=0.7786381 (518.22 it/sec) -training >> step=3507600, episode=585 reward=0.7745329 (540.85 it/sec) -training >> step=3507700, episode=585 reward=0.7635004 (538.42 it/sec) -training >> step=3507800, episode=585 reward=0.7643648 (508.76 it/sec) -training >> step=3507900, episode=585 reward=0.7836109 (510.37 it/sec) -training >> step=3508000, episode=585 reward=0.7857115 (493.25 it/sec) -training >> step=3508100, episode=585 reward=0.7790706 (497.92 it/sec) -training >> step=3508200, episode=585 reward=0.7843195 (505.41 it/sec) -training >> step=3508300, episode=585 reward=0.766961 (533.52 it/sec) -training >> step=3508400, episode=585 reward=0.7789909 (493.81 it/sec) -training >> step=3508500, episode=585 reward=0.7612849 (523.62 it/sec) -training >> step=3508600, episode=585 reward=0.7699351 (531.78 it/sec) -training >> step=3508700, episode=585 reward=0.7580963 (510.18 it/sec) -training >> step=3508800, episode=585 reward=0.7628641 (534.43 it/sec) -training >> step=3508900, episode=585 reward=0.7689008 (472.43 it/sec) -training >> step=3509000, episode=585 reward=0.774237 (497.53 it/sec) -training >> step=3509100, episode=585 reward=0.77132 (486.53 it/sec) -training >> step=3509200, episode=585 reward=0.7827151 (515.82 it/sec) -training >> step=3509300, episode=586 reward=0.7670421 (139.80 it/sec) -training >> step=3509400, episode=586 reward=0.745766 (534.07 it/sec) -training >> step=3509500, episode=586 reward=0.7708628 (508.87 it/sec) -training >> step=3509600, episode=586 reward=0.7649072 (495.76 it/sec) -training >> step=3509700, episode=586 reward=0.7637444 (482.33 it/sec) -training >> step=3509800, episode=586 reward=0.7577084 (540.85 it/sec) -training >> step=3509900, episode=586 reward=0.7631762 (516.32 it/sec) -training >> step=3510000, episode=586 reward=0.7851171 (507.18 it/sec) -training >> step=3510100, episode=586 reward=0.7851818 (497.00 it/sec) -training >> step=3510200, episode=586 reward=0.7782782 (539.84 it/sec) -training >> step=3510300, episode=586 reward=0.7608654 (516.10 it/sec) -training >> step=3510400, episode=586 reward=0.7780458 (529.68 it/sec) -training >> step=3510500, episode=586 reward=0.7649682 (537.41 it/sec) -training >> step=3510600, episode=586 reward=0.7589479 (545.57 it/sec) -training >> step=3510700, episode=586 reward=0.771038 (508.15 it/sec) -training >> step=3510800, episode=586 reward=0.7560554 (464.19 it/sec) -training >> step=3510900, episode=586 reward=0.7786764 (545.32 it/sec) -training >> step=3511000, episode=586 reward=0.7727478 (529.80 it/sec) -training >> step=3511100, episode=586 reward=0.7894816 (494.69 it/sec) -training >> step=3511200, episode=586 reward=0.7500644 (523.39 it/sec) -training >> step=3511300, episode=586 reward=0.7716586 (493.34 it/sec) -training >> step=3511400, episode=586 reward=0.7569565 (478.83 it/sec) -training >> step=3511500, episode=586 reward=0.7611827 (500.67 it/sec) -training >> step=3511600, episode=586 reward=0.7821658 (527.10 it/sec) -training >> step=3511700, episode=586 reward=0.760906 (549.98 it/sec) -training >> step=3511800, episode=586 reward=0.7825156 (501.53 it/sec) -training >> step=3511900, episode=586 reward=0.7712734 (503.94 it/sec) -training >> step=3512000, episode=586 reward=0.7628586 (531.14 it/sec) -training >> step=3512100, episode=586 reward=0.7816923 (483.86 it/sec) -training >> step=3512200, episode=586 reward=0.7872371 (526.95 it/sec) -training >> step=3512300, episode=586 reward=0.789285 (532.38 it/sec) -training >> step=3512400, episode=586 reward=0.774489 (546.87 it/sec) -training >> step=3512500, episode=586 reward=0.7779548 (494.36 it/sec) -training >> step=3512600, episode=586 reward=0.7685198 (443.70 it/sec) -training >> step=3512700, episode=586 reward=0.776786 (558.56 it/sec) -training >> step=3512800, episode=586 reward=0.7858554 (543.38 it/sec) -training >> step=3512900, episode=586 reward=0.8041991 (535.75 it/sec) -training >> step=3513000, episode=586 reward=0.769504 (518.67 it/sec) -training >> step=3513100, episode=586 reward=0.7756829 (523.26 it/sec) -training >> step=3513200, episode=586 reward=0.7392966 (502.41 it/sec) -training >> step=3513300, episode=586 reward=0.7875522 (522.29 it/sec) -training >> step=3513400, episode=586 reward=0.7753916 (542.16 it/sec) -training >> step=3513500, episode=586 reward=0.7704441 (562.30 it/sec) -training >> step=3513600, episode=586 reward=0.757919 (560.26 it/sec) -training >> step=3513700, episode=586 reward=0.7697935 (532.62 it/sec) -training >> step=3513800, episode=586 reward=0.7432875 (543.44 it/sec) -training >> step=3513900, episode=586 reward=0.7860578 (534.08 it/sec) -training >> step=3514000, episode=586 reward=0.7627425 (503.51 it/sec) -training >> step=3514100, episode=586 reward=0.7700531 (467.74 it/sec) -training >> step=3514200, episode=586 reward=0.7760613 (565.13 it/sec) -training >> step=3514300, episode=586 reward=0.7773994 (506.19 it/sec) -training >> step=3514400, episode=586 reward=0.7656436 (517.53 it/sec) -training >> step=3514500, episode=586 reward=0.797295 (563.84 it/sec) -training >> step=3514600, episode=586 reward=0.7795038 (544.28 it/sec) -training >> step=3514700, episode=586 reward=0.7855491 (541.84 it/sec) -training >> step=3514800, episode=586 reward=0.7814728 (505.28 it/sec) -training >> step=3514900, episode=586 reward=0.761529 (525.35 it/sec) -training >> step=3515000, episode=586 reward=0.7737424 (479.15 it/sec) -training >> step=3515100, episode=586 reward=0.7795914 (556.01 it/sec) -training >> step=3515200, episode=586 reward=0.7850317 (500.00 it/sec) -training >> step=3515300, episode=587 reward=0.762656 (114.13 it/sec) -training >> step=3515400, episode=587 reward=0.794994 (550.01 it/sec) -training >> step=3515500, episode=587 reward=0.7714772 (494.59 it/sec) -training >> step=3515600, episode=587 reward=0.7584242 (524.06 it/sec) -training >> step=3515700, episode=587 reward=0.7963322 (507.04 it/sec) -training >> step=3515800, episode=587 reward=0.760291 (505.62 it/sec) -training >> step=3515900, episode=587 reward=0.7784597 (544.82 it/sec) -training >> step=3516000, episode=587 reward=0.7700839 (586.91 it/sec) -training >> step=3516100, episode=587 reward=0.7785645 (455.95 it/sec) -training >> step=3516200, episode=587 reward=0.8018237 (501.86 it/sec) -training >> step=3516300, episode=587 reward=0.7701234 (546.12 it/sec) -training >> step=3516400, episode=587 reward=0.7629189 (541.94 it/sec) -training >> step=3516500, episode=587 reward=0.7615838 (523.99 it/sec) -training >> step=3516600, episode=587 reward=0.7735286 (474.68 it/sec) -training >> step=3516700, episode=587 reward=0.7587843 (553.70 it/sec) -training >> step=3516800, episode=587 reward=0.7630492 (516.03 it/sec) -training >> step=3516900, episode=587 reward=0.769958 (495.70 it/sec) -training >> step=3517000, episode=587 reward=0.7904511 (523.72 it/sec) -training >> step=3517100, episode=587 reward=0.7685475 (493.34 it/sec) -training >> step=3517200, episode=587 reward=0.787993 (492.81 it/sec) -training >> step=3517300, episode=587 reward=0.7866558 (503.36 it/sec) -training >> step=3517400, episode=587 reward=0.7721047 (521.32 it/sec) -training >> step=3517500, episode=587 reward=0.7391086 (529.33 it/sec) -training >> step=3517600, episode=587 reward=0.7814395 (514.07 it/sec) -training >> step=3517700, episode=587 reward=0.7420892 (498.78 it/sec) -training >> step=3517800, episode=587 reward=0.7797318 (543.14 it/sec) -training >> step=3517900, episode=587 reward=0.7640452 (521.79 it/sec) -training >> step=3518000, episode=587 reward=0.7755132 (497.46 it/sec) -training >> step=3518100, episode=587 reward=0.7754517 (491.22 it/sec) -training >> step=3518200, episode=587 reward=0.7875247 (550.03 it/sec) -training >> step=3518300, episode=587 reward=0.7679601 (511.78 it/sec) -training >> step=3518400, episode=587 reward=0.7789872 (488.12 it/sec) -training >> step=3518500, episode=587 reward=0.7681718 (471.15 it/sec) -training >> step=3518600, episode=587 reward=0.7738068 (517.47 it/sec) -training >> step=3518700, episode=587 reward=0.7822552 (475.66 it/sec) -training >> step=3518800, episode=587 reward=0.7796862 (493.06 it/sec) -training >> step=3518900, episode=587 reward=0.7866259 (531.90 it/sec) -training >> step=3519000, episode=587 reward=0.7829337 (510.94 it/sec) -training >> step=3519100, episode=587 reward=0.7967238 (495.69 it/sec) -training >> step=3519200, episode=587 reward=0.7779563 (487.95 it/sec) -training >> step=3519300, episode=587 reward=0.7668874 (473.37 it/sec) -training >> step=3519400, episode=587 reward=0.7480599 (476.91 it/sec) -training >> step=3519500, episode=587 reward=0.7735116 (459.30 it/sec) -training >> step=3519600, episode=587 reward=0.7588584 (500.34 it/sec) -training >> step=3519700, episode=587 reward=0.7629666 (488.48 it/sec) -training >> step=3519800, episode=587 reward=0.7900485 (484.84 it/sec) -training >> step=3519900, episode=587 reward=0.7891164 (535.28 it/sec) -training >> step=3520000, episode=587 reward=0.7797473 (551.80 it/sec) -training >> step=3520100, episode=587 reward=0.7484574 (517.40 it/sec) -training >> step=3520200, episode=587 reward=0.757543 (472.31 it/sec) -training >> step=3520300, episode=587 reward=0.7617978 (485.55 it/sec) -training >> step=3520400, episode=587 reward=0.7648336 (521.47 it/sec) -training >> step=3520500, episode=587 reward=0.7796192 (496.74 it/sec) -training >> step=3520600, episode=587 reward=0.784388 (539.58 it/sec) -training >> step=3520700, episode=587 reward=0.7630629 (568.14 it/sec) -training >> step=3520800, episode=587 reward=0.7738364 (484.54 it/sec) -training >> step=3520900, episode=587 reward=0.7848734 (505.92 it/sec) -training >> step=3521000, episode=587 reward=0.7569933 (533.92 it/sec) -training >> step=3521100, episode=587 reward=0.7394714 (552.13 it/sec) -training >> step=3521200, episode=587 reward=0.7763361 (527.43 it/sec) -training >> step=3521300, episode=588 reward=0.759165 (112.57 it/sec) -training >> step=3521400, episode=588 reward=0.7643238 (520.44 it/sec) -training >> step=3521500, episode=588 reward=0.7807516 (445.98 it/sec) -training >> step=3521600, episode=588 reward=0.7695128 (507.16 it/sec) -training >> step=3521700, episode=588 reward=0.7702761 (542.14 it/sec) -training >> step=3521800, episode=588 reward=0.7516912 (533.93 it/sec) -training >> step=3521900, episode=588 reward=0.788491 (500.51 it/sec) -training >> step=3522000, episode=588 reward=0.7849359 (524.78 it/sec) -training >> step=3522100, episode=588 reward=0.7682274 (511.58 it/sec) -training >> step=3522200, episode=588 reward=0.7704053 (527.00 it/sec) -training >> step=3522300, episode=588 reward=0.7697858 (531.95 it/sec) -training >> step=3522400, episode=588 reward=0.7833954 (483.57 it/sec) -training >> step=3522500, episode=588 reward=0.788286 (511.71 it/sec) -training >> step=3522600, episode=588 reward=0.7710419 (476.03 it/sec) -training >> step=3522700, episode=588 reward=0.7806698 (506.44 it/sec) -training >> step=3522800, episode=588 reward=0.7625655 (554.67 it/sec) -training >> step=3522900, episode=588 reward=0.7815656 (523.92 it/sec) -training >> step=3523000, episode=588 reward=0.7736065 (516.19 it/sec) -training >> step=3523100, episode=588 reward=0.7805563 (487.16 it/sec) -training >> step=3523200, episode=588 reward=0.7581199 (540.30 it/sec) -training >> step=3523300, episode=588 reward=0.7641807 (502.52 it/sec) -training >> step=3523400, episode=588 reward=0.7726922 (524.38 it/sec) -training >> step=3523500, episode=588 reward=0.7883709 (537.33 it/sec) -training >> step=3523600, episode=588 reward=0.7517613 (537.85 it/sec) -training >> step=3523700, episode=588 reward=0.7693618 (488.99 it/sec) -training >> step=3523800, episode=588 reward=0.7633871 (525.76 it/sec) -training >> step=3523900, episode=588 reward=0.7840588 (479.38 it/sec) -training >> step=3524000, episode=588 reward=0.7820752 (530.91 it/sec) -training >> step=3524100, episode=588 reward=0.8087561 (511.44 it/sec) -training >> step=3524200, episode=588 reward=0.7675713 (478.65 it/sec) -training >> step=3524300, episode=588 reward=0.7664455 (572.35 it/sec) -training >> step=3524400, episode=588 reward=0.7620018 (463.01 it/sec) -training >> step=3524500, episode=588 reward=0.766739 (493.54 it/sec) -training >> step=3524600, episode=588 reward=0.7695969 (529.73 it/sec) -training >> step=3524700, episode=588 reward=0.7841801 (526.46 it/sec) -training >> step=3524800, episode=588 reward=0.7921753 (518.38 it/sec) -training >> step=3524900, episode=588 reward=0.800177 (512.01 it/sec) -training >> step=3525000, episode=588 reward=0.7819106 (532.60 it/sec) -training >> step=3525100, episode=588 reward=0.7688401 (522.79 it/sec) -training >> step=3525200, episode=588 reward=0.752452 (496.39 it/sec) -training >> step=3525300, episode=588 reward=0.7873209 (501.64 it/sec) -training >> step=3525400, episode=588 reward=0.7461952 (530.88 it/sec) -training >> step=3525500, episode=588 reward=0.7663867 (513.41 it/sec) -training >> step=3525600, episode=588 reward=0.7762388 (528.70 it/sec) -training >> step=3525700, episode=588 reward=0.7618304 (519.15 it/sec) -training >> step=3525800, episode=588 reward=0.7894633 (503.26 it/sec) -training >> step=3525900, episode=588 reward=0.778908 (525.36 it/sec) -training >> step=3526000, episode=588 reward=0.7752741 (513.54 it/sec) -training >> step=3526100, episode=588 reward=0.7707859 (520.82 it/sec) -training >> step=3526200, episode=588 reward=0.7614416 (515.82 it/sec) -training >> step=3526300, episode=588 reward=0.7745676 (497.78 it/sec) -training >> step=3526400, episode=588 reward=0.7765619 (525.65 it/sec) -training >> step=3526500, episode=588 reward=0.7787725 (494.21 it/sec) -training >> step=3526600, episode=588 reward=0.7774507 (521.48 it/sec) -training >> step=3526700, episode=588 reward=0.7755201 (542.70 it/sec) -training >> step=3526800, episode=588 reward=0.7665059 (505.70 it/sec) -training >> step=3526900, episode=588 reward=0.7615955 (500.14 it/sec) -training >> step=3527000, episode=588 reward=0.7482238 (523.38 it/sec) -training >> step=3527100, episode=588 reward=0.771771 (421.53 it/sec) -training >> step=3527200, episode=588 reward=0.794721 (549.92 it/sec) -training >> step=3527300, episode=589 reward=0.7850971 (113.10 it/sec) -training >> step=3527400, episode=589 reward=0.7659506 (517.41 it/sec) -training >> step=3527500, episode=589 reward=0.7843922 (449.72 it/sec) -training >> step=3527600, episode=589 reward=0.7643007 (535.32 it/sec) -training >> step=3527700, episode=589 reward=0.7764535 (479.27 it/sec) -training >> step=3527800, episode=589 reward=0.7804843 (537.94 it/sec) -training >> step=3527900, episode=589 reward=0.7792262 (508.07 it/sec) -training >> step=3528000, episode=589 reward=0.7753693 (520.63 it/sec) -training >> step=3528100, episode=589 reward=0.7542577 (557.94 it/sec) -training >> step=3528200, episode=589 reward=0.7882308 (549.50 it/sec) -training >> step=3528300, episode=589 reward=0.7622318 (474.91 it/sec) -training >> step=3528400, episode=589 reward=0.7657176 (502.03 it/sec) -training >> step=3528500, episode=589 reward=0.7858621 (472.17 it/sec) -training >> step=3528600, episode=589 reward=0.7758388 (479.73 it/sec) -training >> step=3528700, episode=589 reward=0.780487 (467.84 it/sec) -training >> step=3528800, episode=589 reward=0.7583646 (466.62 it/sec) -training >> step=3528900, episode=589 reward=0.7795048 (527.33 it/sec) -training >> step=3529000, episode=589 reward=0.7635641 (502.46 it/sec) -training >> step=3529100, episode=589 reward=0.769992 (436.89 it/sec) -training >> step=3529200, episode=589 reward=0.7623464 (475.10 it/sec) -training >> step=3529300, episode=589 reward=0.7761599 (469.30 it/sec) -training >> step=3529400, episode=589 reward=0.7818061 (510.91 it/sec) -training >> step=3529500, episode=589 reward=0.7616834 (478.88 it/sec) -training >> step=3529600, episode=589 reward=0.7415739 (486.16 it/sec) -training >> step=3529700, episode=589 reward=0.7695089 (491.88 it/sec) -training >> step=3529800, episode=589 reward=0.7628565 (469.23 it/sec) -training >> step=3529900, episode=589 reward=0.7696545 (477.02 it/sec) -training >> step=3530000, episode=589 reward=0.7782987 (447.41 it/sec) -training >> step=3530100, episode=589 reward=0.7773193 (486.84 it/sec) -training >> step=3530200, episode=589 reward=0.7737999 (493.06 it/sec) -training >> step=3530300, episode=589 reward=0.7584254 (475.56 it/sec) -training >> step=3530400, episode=589 reward=0.777351 (437.14 it/sec) -training >> step=3530500, episode=589 reward=0.7809231 (476.53 it/sec) -training >> step=3530600, episode=589 reward=0.7936296 (493.01 it/sec) -training >> step=3530700, episode=589 reward=0.754949 (475.47 it/sec) -training >> step=3530800, episode=589 reward=0.7757279 (484.52 it/sec) -training >> step=3530900, episode=589 reward=0.7571694 (459.99 it/sec) -training >> step=3531000, episode=589 reward=0.7888619 (505.40 it/sec) -training >> step=3531100, episode=589 reward=0.7792563 (508.22 it/sec) -training >> step=3531200, episode=589 reward=0.7731429 (507.91 it/sec) -training >> step=3531300, episode=589 reward=0.7743165 (476.28 it/sec) -training >> step=3531400, episode=589 reward=0.7605074 (493.30 it/sec) -training >> step=3531500, episode=589 reward=0.761646 (490.46 it/sec) -training >> step=3531600, episode=589 reward=0.7785885 (488.49 it/sec) -training >> step=3531700, episode=589 reward=0.7788259 (512.99 it/sec) -training >> step=3531800, episode=589 reward=0.7784923 (483.28 it/sec) -training >> step=3531900, episode=589 reward=0.7553393 (514.37 it/sec) -training >> step=3532000, episode=589 reward=0.7685419 (489.67 it/sec) -training >> step=3532100, episode=589 reward=0.7627752 (483.15 it/sec) -training >> step=3532200, episode=589 reward=0.7544863 (479.82 it/sec) -training >> step=3532300, episode=589 reward=0.7740661 (488.76 it/sec) -training >> step=3532400, episode=589 reward=0.7783829 (511.57 it/sec) -training >> step=3532500, episode=589 reward=0.7659071 (516.54 it/sec) -training >> step=3532600, episode=589 reward=0.7640055 (490.87 it/sec) -training >> step=3532700, episode=589 reward=0.7737187 (492.96 it/sec) -training >> step=3532800, episode=589 reward=0.7797838 (516.86 it/sec) -training >> step=3532900, episode=589 reward=0.7648661 (483.07 it/sec) -training >> step=3533000, episode=589 reward=0.748414 (501.08 it/sec) -training >> step=3533100, episode=589 reward=0.752338 (474.56 it/sec) -training >> step=3533200, episode=589 reward=0.7628139 (517.20 it/sec) -training >> step=3533300, episode=590 reward=0.7677602 (104.48 it/sec) -training >> step=3533400, episode=590 reward=0.7826406 (389.14 it/sec) -training >> step=3533500, episode=590 reward=0.7806286 (455.88 it/sec) -training >> step=3533600, episode=590 reward=0.7576128 (458.04 it/sec) -training >> step=3533700, episode=590 reward=0.7726723 (410.73 it/sec) -training >> step=3533800, episode=590 reward=0.7664237 (414.00 it/sec) -training >> step=3533900, episode=590 reward=0.7638082 (457.65 it/sec) -training >> step=3534000, episode=590 reward=0.7502606 (455.51 it/sec) -training >> step=3534100, episode=590 reward=0.7776601 (465.88 it/sec) -training >> step=3534200, episode=590 reward=0.7745486 (444.33 it/sec) -training >> step=3534300, episode=590 reward=0.7871539 (483.07 it/sec) -training >> step=3534400, episode=590 reward=0.7629405 (452.78 it/sec) -training >> step=3534500, episode=590 reward=0.761413 (465.49 it/sec) -training >> step=3534600, episode=590 reward=0.8008706 (437.13 it/sec) -training >> step=3534700, episode=590 reward=0.7620885 (474.84 it/sec) -training >> step=3534800, episode=590 reward=0.7992229 (471.19 it/sec) -training >> step=3534900, episode=590 reward=0.7813461 (449.55 it/sec) -training >> step=3535000, episode=590 reward=0.7500899 (509.31 it/sec) -training >> step=3535100, episode=590 reward=0.7783581 (419.63 it/sec) -training >> step=3535200, episode=590 reward=0.7798906 (470.82 it/sec) -training >> step=3535300, episode=590 reward=0.7567276 (499.18 it/sec) -training >> step=3535400, episode=590 reward=0.768059 (486.86 it/sec) -training >> step=3535500, episode=590 reward=0.7763944 (509.74 it/sec) -training >> step=3535600, episode=590 reward=0.7824903 (476.73 it/sec) -training >> step=3535700, episode=590 reward=0.7684003 (529.72 it/sec) -training >> step=3535800, episode=590 reward=0.780683 (460.15 it/sec) -training >> step=3535900, episode=590 reward=0.7557075 (496.51 it/sec) -training >> step=3536000, episode=590 reward=0.7569838 (482.95 it/sec) -training >> step=3536100, episode=590 reward=0.7649968 (494.62 it/sec) -training >> step=3536200, episode=590 reward=0.760892 (474.39 it/sec) -training >> step=3536300, episode=590 reward=0.7552668 (418.34 it/sec) -training >> step=3536400, episode=590 reward=0.774901 (479.65 it/sec) -training >> step=3536500, episode=590 reward=0.7546686 (447.60 it/sec) -training >> step=3536600, episode=590 reward=0.7803978 (459.43 it/sec) -training >> step=3536700, episode=590 reward=0.75915 (453.10 it/sec) -training >> step=3536800, episode=590 reward=0.7840512 (505.27 it/sec) -training >> step=3536900, episode=590 reward=0.7678623 (476.16 it/sec) -training >> step=3537000, episode=590 reward=0.7805157 (451.01 it/sec) -training >> step=3537100, episode=590 reward=0.7654497 (496.00 it/sec) -training >> step=3537200, episode=590 reward=0.7590472 (469.70 it/sec) -training >> step=3537300, episode=590 reward=0.756591 (496.37 it/sec) -training >> step=3537400, episode=590 reward=0.7621918 (473.02 it/sec) -training >> step=3537500, episode=590 reward=0.7759124 (491.61 it/sec) -training >> step=3537600, episode=590 reward=0.769067 (454.74 it/sec) -training >> step=3537700, episode=590 reward=0.7656043 (476.62 it/sec) -training >> step=3537800, episode=590 reward=0.7683413 (461.66 it/sec) -training >> step=3537900, episode=590 reward=0.7936722 (538.68 it/sec) -training >> step=3538000, episode=590 reward=0.7891092 (478.05 it/sec) -training >> step=3538100, episode=590 reward=0.7699911 (496.23 it/sec) -training >> step=3538200, episode=590 reward=0.7679968 (472.33 it/sec) -training >> step=3538300, episode=590 reward=0.7546622 (484.90 it/sec) -training >> step=3538400, episode=590 reward=0.7752909 (422.99 it/sec) -training >> step=3538500, episode=590 reward=0.7761201 (452.01 it/sec) -training >> step=3538600, episode=590 reward=0.7817504 (500.26 it/sec) -training >> step=3538700, episode=590 reward=0.7475863 (478.81 it/sec) -training >> step=3538800, episode=590 reward=0.7700803 (477.98 it/sec) -training >> step=3538900, episode=590 reward=0.7856621 (490.65 it/sec) -training >> step=3539000, episode=590 reward=0.7330201 (464.69 it/sec) -training >> step=3539100, episode=590 reward=0.7689522 (504.08 it/sec) -training >> step=3539200, episode=590 reward=0.7487977 (460.37 it/sec) -training >> step=3539300, episode=591 reward=0.7719887 (69.09 it/sec) -training >> step=3539400, episode=591 reward=0.7720762 (412.91 it/sec) -training >> step=3539500, episode=591 reward=0.7805051 (452.10 it/sec) -training >> step=3539600, episode=591 reward=0.7760093 (496.49 it/sec) -training >> step=3539700, episode=591 reward=0.7802312 (479.25 it/sec) -training >> step=3539800, episode=591 reward=0.7957844 (492.45 it/sec) -training >> step=3539900, episode=591 reward=0.771544 (545.53 it/sec) -training >> step=3540000, episode=591 reward=0.758203 (410.81 it/sec) -training >> step=3540100, episode=591 reward=0.7772319 (468.59 it/sec) -training >> step=3540200, episode=591 reward=0.7699126 (531.85 it/sec) -training >> step=3540300, episode=591 reward=0.7728109 (520.85 it/sec) -training >> step=3540400, episode=591 reward=0.7715508 (490.49 it/sec) -training >> step=3540500, episode=591 reward=0.7662132 (522.84 it/sec) -training >> step=3540600, episode=591 reward=0.756193 (529.94 it/sec) -training >> step=3540700, episode=591 reward=0.7703283 (507.74 it/sec) -training >> step=3540800, episode=591 reward=0.761054 (514.81 it/sec) -training >> step=3540900, episode=591 reward=0.7976187 (496.12 it/sec) -training >> step=3541000, episode=591 reward=0.7580019 (511.77 it/sec) -training >> step=3541100, episode=591 reward=0.7737359 (514.21 it/sec) -training >> step=3541200, episode=591 reward=0.782624 (505.61 it/sec) -training >> step=3541300, episode=591 reward=0.7672672 (486.92 it/sec) -training >> step=3541400, episode=591 reward=0.755295 (534.05 it/sec) -training >> step=3541500, episode=591 reward=0.7791497 (546.38 it/sec) -training >> step=3541600, episode=591 reward=0.7466518 (564.88 it/sec) -training >> step=3541700, episode=591 reward=0.7772075 (489.91 it/sec) -training >> step=3541800, episode=591 reward=0.7762077 (511.66 it/sec) -training >> step=3541900, episode=591 reward=0.7888733 (556.51 it/sec) -training >> step=3542000, episode=591 reward=0.7643183 (534.27 it/sec) -training >> step=3542100, episode=591 reward=0.7874619 (538.06 it/sec) -training >> step=3542200, episode=591 reward=0.7669477 (501.83 it/sec) -training >> step=3542300, episode=591 reward=0.7487826 (514.92 it/sec) -training >> step=3542400, episode=591 reward=0.793934 (513.97 it/sec) -training >> step=3542500, episode=591 reward=0.768088 (501.01 it/sec) -training >> step=3542600, episode=591 reward=0.7692114 (517.11 it/sec) -training >> step=3542700, episode=591 reward=0.767346 (504.88 it/sec) -training >> step=3542800, episode=591 reward=0.7681884 (504.42 it/sec) -training >> step=3542900, episode=591 reward=0.7998073 (501.10 it/sec) -training >> step=3543000, episode=591 reward=0.7705044 (511.38 it/sec) -training >> step=3543100, episode=591 reward=0.7872036 (535.21 it/sec) -training >> step=3543200, episode=591 reward=0.7638097 (516.08 it/sec) -training >> step=3543300, episode=591 reward=0.7547866 (527.83 it/sec) -training >> step=3543400, episode=591 reward=0.7953582 (538.13 it/sec) -training >> step=3543500, episode=591 reward=0.7623442 (527.63 it/sec) -training >> step=3543600, episode=591 reward=0.7697345 (508.22 it/sec) -training >> step=3543700, episode=591 reward=0.7974849 (548.83 it/sec) -training >> step=3543800, episode=591 reward=0.7710403 (518.57 it/sec) -training >> step=3543900, episode=591 reward=0.7685702 (522.86 it/sec) -training >> step=3544000, episode=591 reward=0.7448037 (517.89 it/sec) -training >> step=3544100, episode=591 reward=0.7593637 (488.53 it/sec) -training >> step=3544200, episode=591 reward=0.7471677 (469.33 it/sec) -training >> step=3544300, episode=591 reward=0.7938055 (529.93 it/sec) -training >> step=3544400, episode=591 reward=0.7578111 (533.31 it/sec) -training >> step=3544500, episode=591 reward=0.7783785 (506.15 it/sec) -training >> step=3544600, episode=591 reward=0.7335722 (514.85 it/sec) -training >> step=3544700, episode=591 reward=0.7723115 (498.91 it/sec) -training >> step=3544800, episode=591 reward=0.783481 (539.85 it/sec) -training >> step=3544900, episode=591 reward=0.7712772 (535.57 it/sec) -training >> step=3545000, episode=591 reward=0.7592725 (495.86 it/sec) -training >> step=3545100, episode=591 reward=0.7814003 (515.06 it/sec) -training >> step=3545200, episode=591 reward=0.758091 (541.01 it/sec) -training >> step=3545300, episode=592 reward=0.7574593 (50.20 it/sec) -training >> step=3545400, episode=592 reward=0.7583812 (457.80 it/sec) -training >> step=3545500, episode=592 reward=0.786303 (519.06 it/sec) -training >> step=3545600, episode=592 reward=0.7719492 (511.82 it/sec) -training >> step=3545700, episode=592 reward=0.7895359 (489.72 it/sec) -training >> step=3545800, episode=592 reward=0.7880818 (494.33 it/sec) -training >> step=3545900, episode=592 reward=0.7510422 (509.53 it/sec) -training >> step=3546000, episode=592 reward=0.7755879 (537.67 it/sec) -training >> step=3546100, episode=592 reward=0.8011646 (526.59 it/sec) -training >> step=3546200, episode=592 reward=0.7616013 (510.14 it/sec) -training >> step=3546300, episode=592 reward=0.7787645 (498.07 it/sec) -training >> step=3546400, episode=592 reward=0.7656534 (489.93 it/sec) -training >> step=3546500, episode=592 reward=0.7871192 (519.43 it/sec) -training >> step=3546600, episode=592 reward=0.7946377 (523.01 it/sec) -training >> step=3546700, episode=592 reward=0.7777745 (550.73 it/sec) -training >> step=3546800, episode=592 reward=0.7729068 (523.77 it/sec) -training >> step=3546900, episode=592 reward=0.7805665 (565.80 it/sec) -training >> step=3547000, episode=592 reward=0.7889736 (472.54 it/sec) -training >> step=3547100, episode=592 reward=0.7690827 (521.95 it/sec) -training >> step=3547200, episode=592 reward=0.7808991 (567.42 it/sec) -training >> step=3547300, episode=592 reward=0.7682347 (482.36 it/sec) -training >> step=3547400, episode=592 reward=0.7609299 (499.27 it/sec) -training >> step=3547500, episode=592 reward=0.7799517 (517.20 it/sec) -training >> step=3547600, episode=592 reward=0.7748108 (557.84 it/sec) -training >> step=3547700, episode=592 reward=0.7557293 (511.21 it/sec) -training >> step=3547800, episode=592 reward=0.7765218 (539.78 it/sec) -training >> step=3547900, episode=592 reward=0.7686496 (539.41 it/sec) -training >> step=3548000, episode=592 reward=0.7741729 (552.43 it/sec) -training >> step=3548100, episode=592 reward=0.743816 (525.78 it/sec) -training >> step=3548200, episode=592 reward=0.7704276 (513.17 it/sec) -training >> step=3548300, episode=592 reward=0.7750543 (531.96 it/sec) -training >> step=3548400, episode=592 reward=0.7808632 (523.10 it/sec) -training >> step=3548500, episode=592 reward=0.7802525 (524.61 it/sec) -training >> step=3548600, episode=592 reward=0.7639609 (512.70 it/sec) -training >> step=3548700, episode=592 reward=0.7803947 (480.93 it/sec) -training >> step=3548800, episode=592 reward=0.7610666 (475.59 it/sec) -training >> step=3548900, episode=592 reward=0.7688474 (455.74 it/sec) -training >> step=3549000, episode=592 reward=0.7706918 (472.46 it/sec) -training >> step=3549100, episode=592 reward=0.7785792 (507.92 it/sec) -training >> step=3549200, episode=592 reward=0.7615711 (545.13 it/sec) -training >> step=3549300, episode=592 reward=0.7604028 (526.90 it/sec) -training >> step=3549400, episode=592 reward=0.769796 (550.71 it/sec) -training >> step=3549500, episode=592 reward=0.7847627 (484.54 it/sec) -training >> step=3549600, episode=592 reward=0.7797019 (526.03 it/sec) -training >> step=3549700, episode=592 reward=0.788294 (437.35 it/sec) -training >> step=3549800, episode=592 reward=0.7479674 (509.59 it/sec) -training >> step=3549900, episode=592 reward=0.7763281 (524.70 it/sec) -training >> step=3550000, episode=592 reward=0.7767841 (523.87 it/sec) -training >> step=3550100, episode=592 reward=0.7613167 (493.50 it/sec) -training >> step=3550200, episode=592 reward=0.7814885 (512.91 it/sec) -training >> step=3550300, episode=592 reward=0.7698725 (541.72 it/sec) -training >> step=3550400, episode=592 reward=0.7555415 (559.39 it/sec) -training >> step=3550500, episode=592 reward=0.7736847 (564.26 it/sec) -training >> step=3550600, episode=592 reward=0.7579232 (542.09 it/sec) -training >> step=3550700, episode=592 reward=0.7832953 (500.93 it/sec) -training >> step=3550800, episode=592 reward=0.7765636 (544.24 it/sec) -training >> step=3550900, episode=592 reward=0.7915478 (520.85 it/sec) -training >> step=3551000, episode=592 reward=0.7767425 (505.39 it/sec) -training >> step=3551100, episode=592 reward=0.7640693 (500.90 it/sec) -training >> step=3551200, episode=592 reward=0.7807442 (565.57 it/sec) -training >> step=3551300, episode=593 reward=0.7575396 (64.53 it/sec) -training >> step=3551400, episode=593 reward=0.8042732 (481.17 it/sec) -training >> step=3551500, episode=593 reward=0.7574236 (478.36 it/sec) -training >> step=3551600, episode=593 reward=0.7842904 (505.97 it/sec) -training >> step=3551700, episode=593 reward=0.7666639 (478.02 it/sec) -training >> step=3551800, episode=593 reward=0.7700755 (453.91 it/sec) -training >> step=3551900, episode=593 reward=0.7855778 (441.73 it/sec) -training >> step=3552000, episode=593 reward=0.7766759 (443.08 it/sec) -training >> step=3552100, episode=593 reward=0.7515302 (524.37 it/sec) -training >> step=3552200, episode=593 reward=0.7761021 (522.37 it/sec) -training >> step=3552300, episode=593 reward=0.7559249 (473.29 it/sec) -training >> step=3552400, episode=593 reward=0.7742648 (492.96 it/sec) -training >> step=3552500, episode=593 reward=0.7826006 (515.71 it/sec) -training >> step=3552600, episode=593 reward=0.7821699 (516.19 it/sec) -training >> step=3552700, episode=593 reward=0.7853787 (506.99 it/sec) -training >> step=3552800, episode=593 reward=0.7803554 (524.18 it/sec) -training >> step=3552900, episode=593 reward=0.7689155 (494.45 it/sec) -training >> step=3553000, episode=593 reward=0.7567837 (522.91 it/sec) -training >> step=3553100, episode=593 reward=0.7599218 (516.25 it/sec) -training >> step=3553200, episode=593 reward=0.7788426 (537.90 it/sec) -training >> step=3553300, episode=593 reward=0.7619219 (547.56 it/sec) -training >> step=3553400, episode=593 reward=0.7863003 (516.38 it/sec) -training >> step=3553500, episode=593 reward=0.7613518 (514.94 it/sec) -training >> step=3553600, episode=593 reward=0.7858055 (519.11 it/sec) -training >> step=3553700, episode=593 reward=0.7741243 (493.88 it/sec) -training >> step=3553800, episode=593 reward=0.7761398 (471.96 it/sec) -training >> step=3553900, episode=593 reward=0.7689738 (502.74 it/sec) -training >> step=3554000, episode=593 reward=0.7852263 (518.46 it/sec) -training >> step=3554100, episode=593 reward=0.7839966 (493.69 it/sec) -training >> step=3554200, episode=593 reward=0.7892725 (521.25 it/sec) -training >> step=3554300, episode=593 reward=0.7824124 (514.14 it/sec) -training >> step=3554400, episode=593 reward=0.7879312 (525.19 it/sec) -training >> step=3554500, episode=593 reward=0.7686102 (526.36 it/sec) -training >> step=3554600, episode=593 reward=0.7634908 (528.08 it/sec) -training >> step=3554700, episode=593 reward=0.771679 (526.40 it/sec) -training >> step=3554800, episode=593 reward=0.7840097 (528.03 it/sec) -training >> step=3554900, episode=593 reward=0.774906 (513.90 it/sec) -training >> step=3555000, episode=593 reward=0.7556354 (490.49 it/sec) -training >> step=3555100, episode=593 reward=0.7715891 (501.59 it/sec) -training >> step=3555200, episode=593 reward=0.7516032 (515.01 it/sec) -training >> step=3555300, episode=593 reward=0.7923268 (514.41 it/sec) -training >> step=3555400, episode=593 reward=0.7692447 (539.05 it/sec) -training >> step=3555500, episode=593 reward=0.7584088 (527.65 it/sec) -training >> step=3555600, episode=593 reward=0.779721 (478.20 it/sec) -training >> step=3555700, episode=593 reward=0.7818673 (526.01 it/sec) -training >> step=3555800, episode=593 reward=0.7382682 (569.93 it/sec) -training >> step=3555900, episode=593 reward=0.7845621 (480.64 it/sec) -training >> step=3556000, episode=593 reward=0.767518 (526.15 it/sec) -training >> step=3556100, episode=593 reward=0.7494538 (518.42 it/sec) -training >> step=3556200, episode=593 reward=0.7451861 (543.56 it/sec) -training >> step=3556300, episode=593 reward=0.7740805 (509.96 it/sec) -training >> step=3556400, episode=593 reward=0.7892 (502.68 it/sec) -training >> step=3556500, episode=593 reward=0.7602016 (560.87 it/sec) -training >> step=3556600, episode=593 reward=0.7874356 (487.60 it/sec) -training >> step=3556700, episode=593 reward=0.787654 (495.84 it/sec) -training >> step=3556800, episode=593 reward=0.7843422 (504.81 it/sec) -training >> step=3556900, episode=593 reward=0.7638338 (557.52 it/sec) -training >> step=3557000, episode=593 reward=0.7750554 (496.37 it/sec) -training >> step=3557100, episode=593 reward=0.7717566 (501.52 it/sec) -training >> step=3557200, episode=593 reward=0.7651511 (528.74 it/sec) -training >> step=3557300, episode=594 reward=0.7910558 (133.04 it/sec) -training >> step=3557400, episode=594 reward=0.7905651 (544.92 it/sec) -training >> step=3557500, episode=594 reward=0.7565515 (525.21 it/sec) -training >> step=3557600, episode=594 reward=0.7654026 (528.28 it/sec) -training >> step=3557700, episode=594 reward=0.7799687 (544.83 it/sec) -training >> step=3557800, episode=594 reward=0.7759368 (540.49 it/sec) -training >> step=3557900, episode=594 reward=0.7493402 (504.59 it/sec) -training >> step=3558000, episode=594 reward=0.7892908 (530.70 it/sec) -training >> step=3558100, episode=594 reward=0.7739421 (524.93 it/sec) -training >> step=3558200, episode=594 reward=0.7559789 (481.77 it/sec) -training >> step=3558300, episode=594 reward=0.7697566 (566.46 it/sec) -training >> step=3558400, episode=594 reward=0.7713779 (510.34 it/sec) -training >> step=3558500, episode=594 reward=0.7731593 (525.74 it/sec) -training >> step=3558600, episode=594 reward=0.7955782 (504.47 it/sec) -training >> step=3558700, episode=594 reward=0.7756409 (561.81 it/sec) -training >> step=3558800, episode=594 reward=0.7976264 (473.90 it/sec) -training >> step=3558900, episode=594 reward=0.7502708 (514.61 it/sec) -training >> step=3559000, episode=594 reward=0.7688742 (555.56 it/sec) -training >> step=3559100, episode=594 reward=0.7587442 (551.33 it/sec) -training >> step=3559200, episode=594 reward=0.7727397 (491.15 it/sec) -training >> step=3559300, episode=594 reward=0.7500549 (528.88 it/sec) -training >> step=3559400, episode=594 reward=0.7796811 (498.14 it/sec) -training >> step=3559500, episode=594 reward=0.7693865 (522.01 it/sec) -training >> step=3559600, episode=594 reward=0.7762095 (504.61 it/sec) -training >> step=3559700, episode=594 reward=0.7620987 (531.38 it/sec) -training >> step=3559800, episode=594 reward=0.7781171 (541.87 it/sec) -training >> step=3559900, episode=594 reward=0.7885364 (481.63 it/sec) -training >> step=3560000, episode=594 reward=0.751204 (514.18 it/sec) -training >> step=3560100, episode=594 reward=0.7789339 (487.35 it/sec) -training >> step=3560200, episode=594 reward=0.7747525 (517.65 it/sec) -training >> step=3560300, episode=594 reward=0.7738541 (534.68 it/sec) -training >> step=3560400, episode=594 reward=0.7753912 (527.47 it/sec) -training >> step=3560500, episode=594 reward=0.767877 (543.48 it/sec) -training >> step=3560600, episode=594 reward=0.7541229 (504.43 it/sec) -training >> step=3560700, episode=594 reward=0.7647853 (540.51 it/sec) -training >> step=3560800, episode=594 reward=0.7797082 (437.51 it/sec) -training >> step=3560900, episode=594 reward=0.7705906 (495.47 it/sec) -training >> step=3561000, episode=594 reward=0.7789235 (477.85 it/sec) -training >> step=3561100, episode=594 reward=0.7511345 (462.79 it/sec) -training >> step=3561200, episode=594 reward=0.763917 (457.53 it/sec) -training >> step=3561300, episode=594 reward=0.7643306 (507.82 it/sec) -training >> step=3561400, episode=594 reward=0.7858241 (467.06 it/sec) -training >> step=3561500, episode=594 reward=0.7884785 (505.03 it/sec) -training >> step=3561600, episode=594 reward=0.7670999 (478.79 it/sec) -training >> step=3561700, episode=594 reward=0.7651557 (432.95 it/sec) -training >> step=3561800, episode=594 reward=0.76068 (516.74 it/sec) -training >> step=3561900, episode=594 reward=0.7636663 (492.16 it/sec) -training >> step=3562000, episode=594 reward=0.7834604 (469.81 it/sec) -training >> step=3562100, episode=594 reward=0.7527521 (502.18 it/sec) -training >> step=3562200, episode=594 reward=0.7771436 (512.98 it/sec) -training >> step=3562300, episode=594 reward=0.757196 (508.81 it/sec) -training >> step=3562400, episode=594 reward=0.7631925 (440.17 it/sec) -training >> step=3562500, episode=594 reward=0.7382016 (521.99 it/sec) -training >> step=3562600, episode=594 reward=0.7544192 (494.15 it/sec) -training >> step=3562700, episode=594 reward=0.7579143 (500.82 it/sec) -training >> step=3562800, episode=594 reward=0.7792259 (461.97 it/sec) -training >> step=3562900, episode=594 reward=0.776202 (469.04 it/sec) -training >> step=3563000, episode=594 reward=0.7513129 (509.98 it/sec) -training >> step=3563100, episode=594 reward=0.7561829 (483.80 it/sec) -training >> step=3563200, episode=594 reward=0.7936606 (466.91 it/sec) -training >> step=3563300, episode=595 reward=0.763383 (115.31 it/sec) -training >> step=3563400, episode=595 reward=0.7677519 (503.62 it/sec) -training >> step=3563500, episode=595 reward=0.7845863 (510.20 it/sec) -training >> step=3563600, episode=595 reward=0.7906418 (543.64 it/sec) -training >> step=3563700, episode=595 reward=0.7791579 (530.35 it/sec) -training >> step=3563800, episode=595 reward=0.7645991 (534.20 it/sec) -training >> step=3563900, episode=595 reward=0.77324 (516.72 it/sec) -training >> step=3564000, episode=595 reward=0.777512 (549.79 it/sec) -training >> step=3564100, episode=595 reward=0.7834754 (496.94 it/sec) -training >> step=3564200, episode=595 reward=0.7678002 (489.37 it/sec) -training >> step=3564300, episode=595 reward=0.772199 (500.39 it/sec) -training >> step=3564400, episode=595 reward=0.7700548 (544.20 it/sec) -training >> step=3564500, episode=595 reward=0.7951332 (535.44 it/sec) -training >> step=3564600, episode=595 reward=0.7754089 (556.04 it/sec) -training >> step=3564700, episode=595 reward=0.7646638 (518.67 it/sec) -training >> step=3564800, episode=595 reward=0.753619 (534.33 it/sec) -training >> step=3564900, episode=595 reward=0.7804857 (497.07 it/sec) -training >> step=3565000, episode=595 reward=0.7664308 (523.76 it/sec) -training >> step=3565100, episode=595 reward=0.7676771 (515.20 it/sec) -training >> step=3565200, episode=595 reward=0.786956 (491.05 it/sec) -training >> step=3565300, episode=595 reward=0.7751641 (530.67 it/sec) -training >> step=3565400, episode=595 reward=0.7575428 (543.20 it/sec) -training >> step=3565500, episode=595 reward=0.7657781 (504.43 it/sec) -training >> step=3565600, episode=595 reward=0.7730885 (514.99 it/sec) -training >> step=3565700, episode=595 reward=0.7788225 (534.28 it/sec) -training >> step=3565800, episode=595 reward=0.7757024 (527.99 it/sec) -training >> step=3565900, episode=595 reward=0.7748546 (529.39 it/sec) -training >> step=3566000, episode=595 reward=0.764658 (539.08 it/sec) -training >> step=3566100, episode=595 reward=0.7815286 (458.78 it/sec) -training >> step=3566200, episode=595 reward=0.7748368 (535.97 it/sec) -training >> step=3566300, episode=595 reward=0.789875 (517.42 it/sec) -training >> step=3566400, episode=595 reward=0.7855488 (502.93 it/sec) -training >> step=3566500, episode=595 reward=0.776809 (579.87 it/sec) -training >> step=3566600, episode=595 reward=0.7601628 (496.38 it/sec) -training >> step=3566700, episode=595 reward=0.7737494 (499.95 it/sec) -training >> step=3566800, episode=595 reward=0.7716661 (537.57 it/sec) -training >> step=3566900, episode=595 reward=0.7660931 (482.83 it/sec) -training >> step=3567000, episode=595 reward=0.7822531 (468.45 it/sec) -training >> step=3567100, episode=595 reward=0.7769457 (539.66 it/sec) -training >> step=3567200, episode=595 reward=0.7697513 (517.33 it/sec) -training >> step=3567300, episode=595 reward=0.764125 (536.53 it/sec) -training >> step=3567400, episode=595 reward=0.7846973 (521.90 it/sec) -training >> step=3567500, episode=595 reward=0.7744601 (517.06 it/sec) -training >> step=3567600, episode=595 reward=0.7684856 (561.76 it/sec) -training >> step=3567700, episode=595 reward=0.7892435 (521.33 it/sec) -training >> step=3567800, episode=595 reward=0.7654718 (501.43 it/sec) -training >> step=3567900, episode=595 reward=0.7307482 (539.27 it/sec) -training >> step=3568000, episode=595 reward=0.7628604 (578.54 it/sec) -training >> step=3568100, episode=595 reward=0.774439 (513.24 it/sec) -training >> step=3568200, episode=595 reward=0.7744687 (539.79 it/sec) -training >> step=3568300, episode=595 reward=0.7633355 (483.14 it/sec) -training >> step=3568400, episode=595 reward=0.766633 (549.33 it/sec) -training >> step=3568500, episode=595 reward=0.7795812 (542.36 it/sec) -training >> step=3568600, episode=595 reward=0.7616633 (546.88 it/sec) -training >> step=3568700, episode=595 reward=0.7907594 (548.27 it/sec) -training >> step=3568800, episode=595 reward=0.7766074 (540.81 it/sec) -training >> step=3568900, episode=595 reward=0.7740678 (503.00 it/sec) -training >> step=3569000, episode=595 reward=0.7644105 (527.07 it/sec) -training >> step=3569100, episode=595 reward=0.7565539 (571.04 it/sec) -training >> step=3569200, episode=595 reward=0.7642726 (518.93 it/sec) -training >> step=3569300, episode=596 reward=0.7875851 (44.88 it/sec) -training >> step=3569400, episode=596 reward=0.7516639 (491.54 it/sec) -training >> step=3569500, episode=596 reward=0.7811015 (535.59 it/sec) -training >> step=3569600, episode=596 reward=0.7932964 (492.42 it/sec) -training >> step=3569700, episode=596 reward=0.7868146 (542.53 it/sec) -training >> step=3569800, episode=596 reward=0.7599535 (480.96 it/sec) -training >> step=3569900, episode=596 reward=0.7837479 (515.95 it/sec) -training >> step=3570000, episode=596 reward=0.7637909 (549.60 it/sec) -training >> step=3570100, episode=596 reward=0.7819249 (540.18 it/sec) -training >> step=3570200, episode=596 reward=0.783895 (520.04 it/sec) -training >> step=3570300, episode=596 reward=0.7601495 (504.72 it/sec) -training >> step=3570400, episode=596 reward=0.7710667 (481.80 it/sec) -training >> step=3570500, episode=596 reward=0.7981269 (517.83 it/sec) -training >> step=3570600, episode=596 reward=0.7768937 (572.88 it/sec) -training >> step=3570700, episode=596 reward=0.7589833 (549.37 it/sec) -training >> step=3570800, episode=596 reward=0.7618927 (560.06 it/sec) -training >> step=3570900, episode=596 reward=0.7621026 (468.14 it/sec) -training >> step=3571000, episode=596 reward=0.7550455 (530.34 it/sec) -training >> step=3571100, episode=596 reward=0.7744923 (541.98 it/sec) -training >> step=3571200, episode=596 reward=0.7738397 (500.30 it/sec) -training >> step=3571300, episode=596 reward=0.7879839 (545.54 it/sec) -training >> step=3571400, episode=596 reward=0.7744735 (493.77 it/sec) -training >> step=3571500, episode=596 reward=0.7762647 (479.81 it/sec) -training >> step=3571600, episode=596 reward=0.784817 (534.57 it/sec) -training >> step=3571700, episode=596 reward=0.7606809 (530.55 it/sec) -training >> step=3571800, episode=596 reward=0.7759672 (523.80 it/sec) -training >> step=3571900, episode=596 reward=0.7949843 (458.60 it/sec) -training >> step=3572000, episode=596 reward=0.7848606 (488.98 it/sec) -training >> step=3572100, episode=596 reward=0.7868809 (516.75 it/sec) -training >> step=3572200, episode=596 reward=0.764035 (521.71 it/sec) -training >> step=3572300, episode=596 reward=0.7875787 (536.46 it/sec) -training >> step=3572400, episode=596 reward=0.7669783 (543.77 it/sec) -training >> step=3572500, episode=596 reward=0.7768924 (448.66 it/sec) -training >> step=3572600, episode=596 reward=0.7719914 (502.08 it/sec) -training >> step=3572700, episode=596 reward=0.7593215 (530.71 it/sec) -training >> step=3572800, episode=596 reward=0.7862272 (518.54 it/sec) -training >> step=3572900, episode=596 reward=0.7723682 (522.24 it/sec) -training >> step=3573000, episode=596 reward=0.7521003 (525.52 it/sec) -training >> step=3573100, episode=596 reward=0.7848176 (500.42 it/sec) -training >> step=3573200, episode=596 reward=0.7723833 (512.10 it/sec) -training >> step=3573300, episode=596 reward=0.7740913 (549.95 it/sec) -training >> step=3573400, episode=596 reward=0.7612462 (543.10 it/sec) -training >> step=3573500, episode=596 reward=0.77065 (524.24 it/sec) -training >> step=3573600, episode=596 reward=0.7710903 (511.56 it/sec) -training >> step=3573700, episode=596 reward=0.7662288 (506.66 it/sec) -training >> step=3573800, episode=596 reward=0.7603682 (539.00 it/sec) -training >> step=3573900, episode=596 reward=0.7650084 (555.14 it/sec) -training >> step=3574000, episode=596 reward=0.7971917 (470.95 it/sec) -training >> step=3574100, episode=596 reward=0.7372972 (503.66 it/sec) -training >> step=3574200, episode=596 reward=0.7717501 (551.47 it/sec) -training >> step=3574300, episode=596 reward=0.756679 (532.84 it/sec) -training >> step=3574400, episode=596 reward=0.7656478 (517.15 it/sec) -training >> step=3574500, episode=596 reward=0.7797878 (521.39 it/sec) -training >> step=3574600, episode=596 reward=0.7624993 (541.02 it/sec) -training >> step=3574700, episode=596 reward=0.779046 (509.40 it/sec) -training >> step=3574800, episode=596 reward=0.7688901 (540.36 it/sec) -training >> step=3574900, episode=596 reward=0.7733525 (550.28 it/sec) -training >> step=3575000, episode=596 reward=0.7905881 (521.40 it/sec) -training >> step=3575100, episode=596 reward=0.7804993 (518.58 it/sec) -training >> step=3575200, episode=596 reward=0.7893455 (519.18 it/sec) -training >> step=3575300, episode=597 reward=0.7647564 (45.95 it/sec) -training >> step=3575400, episode=597 reward=0.7816647 (506.26 it/sec) -training >> step=3575500, episode=597 reward=0.7597296 (532.68 it/sec) -training >> step=3575600, episode=597 reward=0.7802591 (518.47 it/sec) -training >> step=3575700, episode=597 reward=0.7620964 (525.90 it/sec) -training >> step=3575800, episode=597 reward=0.7539749 (510.26 it/sec) -training >> step=3575900, episode=597 reward=0.7614027 (549.06 it/sec) -training >> step=3576000, episode=597 reward=0.7617987 (531.88 it/sec) -training >> step=3576100, episode=597 reward=0.7618054 (540.28 it/sec) -training >> step=3576200, episode=597 reward=0.7778439 (530.53 it/sec) -training >> step=3576300, episode=597 reward=0.761098 (534.72 it/sec) -training >> step=3576400, episode=597 reward=0.7516272 (504.55 it/sec) -training >> step=3576500, episode=597 reward=0.7456661 (550.77 it/sec) -training >> step=3576600, episode=597 reward=0.7689283 (545.65 it/sec) -training >> step=3576700, episode=597 reward=0.7791506 (542.15 it/sec) -training >> step=3576800, episode=597 reward=0.7758805 (528.65 it/sec) -training >> step=3576900, episode=597 reward=0.7741347 (481.12 it/sec) -training >> step=3577000, episode=597 reward=0.7803146 (522.99 it/sec) -training >> step=3577100, episode=597 reward=0.7731041 (533.21 it/sec) -training >> step=3577200, episode=597 reward=0.7897094 (540.99 it/sec) -training >> step=3577300, episode=597 reward=0.7729834 (495.97 it/sec) -training >> step=3577400, episode=597 reward=0.7912614 (509.22 it/sec) -training >> step=3577500, episode=597 reward=0.7647879 (553.08 it/sec) -training >> step=3577600, episode=597 reward=0.7554466 (513.73 it/sec) -training >> step=3577700, episode=597 reward=0.7640651 (525.48 it/sec) -training >> step=3577800, episode=597 reward=0.7928262 (530.65 it/sec) -training >> step=3577900, episode=597 reward=0.7848808 (525.39 it/sec) -training >> step=3578000, episode=597 reward=0.762975 (496.29 it/sec) -training >> step=3578100, episode=597 reward=0.7836605 (523.55 it/sec) -training >> step=3578200, episode=597 reward=0.7933675 (524.92 it/sec) -training >> step=3578300, episode=597 reward=0.795963 (564.06 it/sec) -training >> step=3578400, episode=597 reward=0.7718289 (543.40 it/sec) -training >> step=3578500, episode=597 reward=0.7612896 (523.42 it/sec) -training >> step=3578600, episode=597 reward=0.7733627 (543.27 it/sec) -training >> step=3578700, episode=597 reward=0.7796572 (538.01 it/sec) -training >> step=3578800, episode=597 reward=0.7676305 (508.08 it/sec) -training >> step=3578900, episode=597 reward=0.7812869 (521.66 it/sec) -training >> step=3579000, episode=597 reward=0.7720968 (573.10 it/sec) -training >> step=3579100, episode=597 reward=0.7552128 (531.56 it/sec) -training >> step=3579200, episode=597 reward=0.7807313 (498.40 it/sec) -training >> step=3579300, episode=597 reward=0.7742839 (562.71 it/sec) -training >> step=3579400, episode=597 reward=0.7754652 (551.40 it/sec) -training >> step=3579500, episode=597 reward=0.7674151 (535.16 it/sec) -training >> step=3579600, episode=597 reward=0.7591535 (492.89 it/sec) -training >> step=3579700, episode=597 reward=0.7680808 (534.96 it/sec) -training >> step=3579800, episode=597 reward=0.7909589 (547.45 it/sec) -training >> step=3579900, episode=597 reward=0.7769746 (515.14 it/sec) -training >> step=3580000, episode=597 reward=0.7726123 (517.89 it/sec) -training >> step=3580100, episode=597 reward=0.7570411 (548.49 it/sec) -training >> step=3580200, episode=597 reward=0.7701992 (538.78 it/sec) -training >> step=3580300, episode=597 reward=0.7814883 (517.83 it/sec) -training >> step=3580400, episode=597 reward=0.7641395 (568.04 it/sec) -training >> step=3580500, episode=597 reward=0.770084 (538.63 it/sec) -training >> step=3580600, episode=597 reward=0.7637593 (526.60 it/sec) -training >> step=3580700, episode=597 reward=0.7494918 (516.16 it/sec) -training >> step=3580800, episode=597 reward=0.7878736 (537.21 it/sec) -training >> step=3580900, episode=597 reward=0.7713593 (563.26 it/sec) -training >> step=3581000, episode=597 reward=0.7432799 (524.24 it/sec) -training >> step=3581100, episode=597 reward=0.7569187 (509.69 it/sec) -training >> step=3581200, episode=597 reward=0.7735202 (539.14 it/sec) -training >> step=3581300, episode=598 reward=0.7671647 (43.31 it/sec) -training >> step=3581400, episode=598 reward=0.7899266 (512.60 it/sec) -training >> step=3581500, episode=598 reward=0.7798373 (510.21 it/sec) -training >> step=3581600, episode=598 reward=0.7628832 (469.30 it/sec) -training >> step=3581700, episode=598 reward=0.7552341 (541.27 it/sec) -training >> step=3581800, episode=598 reward=0.7729692 (496.02 it/sec) -training >> step=3581900, episode=598 reward=0.7736033 (497.57 it/sec) -training >> step=3582000, episode=598 reward=0.7721313 (487.66 it/sec) -training >> step=3582100, episode=598 reward=0.8053503 (564.63 it/sec) -training >> step=3582200, episode=598 reward=0.8037702 (459.11 it/sec) -training >> step=3582300, episode=598 reward=0.7871655 (496.89 it/sec) -training >> step=3582400, episode=598 reward=0.7689038 (566.90 it/sec) -training >> step=3582500, episode=598 reward=0.7644089 (529.45 it/sec) -training >> step=3582600, episode=598 reward=0.770838 (502.22 it/sec) -training >> step=3582700, episode=598 reward=0.7752732 (539.90 it/sec) -training >> step=3582800, episode=598 reward=0.7624533 (495.91 it/sec) -training >> step=3582900, episode=598 reward=0.7571192 (543.57 it/sec) -training >> step=3583000, episode=598 reward=0.7587989 (502.49 it/sec) -training >> step=3583100, episode=598 reward=0.7753616 (564.80 it/sec) -training >> step=3583200, episode=598 reward=0.7635038 (535.39 it/sec) -training >> step=3583300, episode=598 reward=0.7643131 (542.31 it/sec) -training >> step=3583400, episode=598 reward=0.7623234 (529.38 it/sec) -training >> step=3583500, episode=598 reward=0.7519294 (531.40 it/sec) -training >> step=3583600, episode=598 reward=0.7716433 (546.71 it/sec) -training >> step=3583700, episode=598 reward=0.7871244 (543.22 it/sec) -training >> step=3583800, episode=598 reward=0.794475 (513.23 it/sec) -training >> step=3583900, episode=598 reward=0.7779785 (507.79 it/sec) -training >> step=3584000, episode=598 reward=0.7669283 (494.41 it/sec) -training >> step=3584100, episode=598 reward=0.7615696 (471.21 it/sec) -training >> step=3584200, episode=598 reward=0.7781216 (571.52 it/sec) -training >> step=3584300, episode=598 reward=0.7869854 (482.43 it/sec) -training >> step=3584400, episode=598 reward=0.769807 (525.00 it/sec) -training >> step=3584500, episode=598 reward=0.7659507 (542.56 it/sec) -training >> step=3584600, episode=598 reward=0.7862254 (512.40 it/sec) -training >> step=3584700, episode=598 reward=0.7805694 (517.31 it/sec) -training >> step=3584800, episode=598 reward=0.7676335 (504.06 it/sec) -training >> step=3584900, episode=598 reward=0.7924331 (559.06 it/sec) -training >> step=3585000, episode=598 reward=0.7652828 (519.63 it/sec) -training >> step=3585100, episode=598 reward=0.7562108 (563.19 it/sec) -training >> step=3585200, episode=598 reward=0.7606554 (546.17 it/sec) -training >> step=3585300, episode=598 reward=0.7569178 (564.06 it/sec) -training >> step=3585400, episode=598 reward=0.7541685 (535.30 it/sec) -training >> step=3585500, episode=598 reward=0.7816572 (543.07 it/sec) -training >> step=3585600, episode=598 reward=0.7651838 (539.22 it/sec) -training >> step=3585700, episode=598 reward=0.7622689 (573.21 it/sec) -training >> step=3585800, episode=598 reward=0.775858 (492.31 it/sec) -training >> step=3585900, episode=598 reward=0.7487711 (540.66 it/sec) -training >> step=3586000, episode=598 reward=0.7540848 (519.21 it/sec) -training >> step=3586100, episode=598 reward=0.7877001 (548.36 it/sec) -training >> step=3586200, episode=598 reward=0.78667 (556.59 it/sec) -training >> step=3586300, episode=598 reward=0.7794932 (521.16 it/sec) -training >> step=3586400, episode=598 reward=0.7804286 (546.43 it/sec) -training >> step=3586500, episode=598 reward=0.7716571 (511.95 it/sec) -training >> step=3586600, episode=598 reward=0.7873775 (530.45 it/sec) -training >> step=3586700, episode=598 reward=0.7535981 (516.41 it/sec) -training >> step=3586800, episode=598 reward=0.7790044 (576.27 it/sec) -training >> step=3586900, episode=598 reward=0.7672732 (482.62 it/sec) -training >> step=3587000, episode=598 reward=0.7597555 (517.12 it/sec) -training >> step=3587100, episode=598 reward=0.7619447 (539.90 it/sec) -training >> step=3587200, episode=598 reward=0.7821587 (574.45 it/sec) -training >> step=3587300, episode=599 reward=0.7763209 (75.78 it/sec) -training >> step=3587400, episode=599 reward=0.7761567 (530.94 it/sec) -training >> step=3587500, episode=599 reward=0.7756516 (493.69 it/sec) -training >> step=3587600, episode=599 reward=0.7719827 (531.64 it/sec) -training >> step=3587700, episode=599 reward=0.7820925 (522.33 it/sec) -training >> step=3587800, episode=599 reward=0.7595584 (556.29 it/sec) -training >> step=3587900, episode=599 reward=0.7718639 (527.50 it/sec) -training >> step=3588000, episode=599 reward=0.7929608 (522.90 it/sec) -training >> step=3588100, episode=599 reward=0.7292717 (532.06 it/sec) -training >> step=3588200, episode=599 reward=0.7874159 (556.80 it/sec) -training >> step=3588300, episode=599 reward=0.801415 (538.26 it/sec) -training >> step=3588400, episode=599 reward=0.7922322 (573.18 it/sec) -training >> step=3588500, episode=599 reward=0.7724462 (531.38 it/sec) -training >> step=3588600, episode=599 reward=0.7768487 (465.14 it/sec) -training >> step=3588700, episode=599 reward=0.7741023 (528.50 it/sec) -training >> step=3588800, episode=599 reward=0.7828141 (507.63 it/sec) -training >> step=3588900, episode=599 reward=0.7892669 (523.04 it/sec) -training >> step=3589000, episode=599 reward=0.7703422 (505.71 it/sec) -training >> step=3589100, episode=599 reward=0.7861047 (544.18 it/sec) -training >> step=3589200, episode=599 reward=0.7759558 (515.44 it/sec) -training >> step=3589300, episode=599 reward=0.7750005 (513.97 it/sec) -training >> step=3589400, episode=599 reward=0.785496 (563.63 it/sec) -training >> step=3589500, episode=599 reward=0.7683706 (539.23 it/sec) -training >> step=3589600, episode=599 reward=0.7577642 (517.51 it/sec) -training >> step=3589700, episode=599 reward=0.775575 (508.43 it/sec) -training >> step=3589800, episode=599 reward=0.779415 (521.60 it/sec) -training >> step=3589900, episode=599 reward=0.7755349 (534.88 it/sec) -training >> step=3590000, episode=599 reward=0.7758226 (537.95 it/sec) -training >> step=3590100, episode=599 reward=0.7937571 (510.99 it/sec) -training >> step=3590200, episode=599 reward=0.773155 (533.85 it/sec) -training >> step=3590300, episode=599 reward=0.7628656 (518.23 it/sec) -training >> step=3590400, episode=599 reward=0.7740831 (507.12 it/sec) -training >> step=3590500, episode=599 reward=0.7673162 (545.36 it/sec) -training >> step=3590600, episode=599 reward=0.7793846 (506.54 it/sec) -training >> step=3590700, episode=599 reward=0.7756567 (511.20 it/sec) -training >> step=3590800, episode=599 reward=0.7646031 (529.08 it/sec) -training >> step=3590900, episode=599 reward=0.7702211 (567.51 it/sec) -training >> step=3591000, episode=599 reward=0.7793452 (499.45 it/sec) -training >> step=3591100, episode=599 reward=0.7954309 (537.73 it/sec) -training >> step=3591200, episode=599 reward=0.7726062 (568.14 it/sec) -training >> step=3591300, episode=599 reward=0.7682456 (508.57 it/sec) -training >> step=3591400, episode=599 reward=0.7771872 (524.51 it/sec) -training >> step=3591500, episode=599 reward=0.763243 (536.62 it/sec) -training >> step=3591600, episode=599 reward=0.7467549 (554.03 it/sec) -training >> step=3591700, episode=599 reward=0.7531584 (529.84 it/sec) -training >> step=3591800, episode=599 reward=0.7546787 (532.26 it/sec) -training >> step=3591900, episode=599 reward=0.7693395 (547.23 it/sec) -training >> step=3592000, episode=599 reward=0.7709259 (552.67 it/sec) -training >> step=3592100, episode=599 reward=0.7482675 (501.29 it/sec) -training >> step=3592200, episode=599 reward=0.7826818 (539.57 it/sec) -training >> step=3592300, episode=599 reward=0.7803183 (526.58 it/sec) -training >> step=3592400, episode=599 reward=0.7756339 (532.33 it/sec) -training >> step=3592500, episode=599 reward=0.7597683 (551.87 it/sec) -training >> step=3592600, episode=599 reward=0.7801769 (535.42 it/sec) -training >> step=3592700, episode=599 reward=0.7908323 (516.84 it/sec) -training >> step=3592800, episode=599 reward=0.7585118 (503.29 it/sec) -training >> step=3592900, episode=599 reward=0.7792846 (543.38 it/sec) -training >> step=3593000, episode=599 reward=0.7807992 (564.79 it/sec) -training >> step=3593100, episode=599 reward=0.7664807 (531.75 it/sec) -training >> step=3593200, episode=599 reward=0.7574686 (536.69 it/sec) -training >> step=3593300, episode=600 reward=0.7735643 (124.41 it/sec) -training >> step=3593400, episode=600 reward=0.7567535 (484.90 it/sec) -training >> step=3593500, episode=600 reward=0.7767629 (501.14 it/sec) -training >> step=3593600, episode=600 reward=0.780197 (516.20 it/sec) -training >> step=3593700, episode=600 reward=0.7932461 (508.72 it/sec) -training >> step=3593800, episode=600 reward=0.7720529 (480.54 it/sec) -training >> step=3593900, episode=600 reward=0.7697715 (511.58 it/sec) -training >> step=3594000, episode=600 reward=0.7891846 (471.77 it/sec) -training >> step=3594100, episode=600 reward=0.7718493 (456.84 it/sec) -training >> step=3594200, episode=600 reward=0.7849437 (442.85 it/sec) -training >> step=3594300, episode=600 reward=0.7811888 (342.92 it/sec) -training >> step=3594400, episode=600 reward=0.79107 (452.32 it/sec) -training >> step=3594500, episode=600 reward=0.7770892 (468.03 it/sec) -training >> step=3594600, episode=600 reward=0.7716032 (472.00 it/sec) -training >> step=3594700, episode=600 reward=0.7741204 (453.91 it/sec) -training >> step=3594800, episode=600 reward=0.7540059 (373.39 it/sec) -training >> step=3594900, episode=600 reward=0.7699814 (388.29 it/sec) -training >> step=3595000, episode=600 reward=0.7956784 (418.19 it/sec) -training >> step=3595100, episode=600 reward=0.7633305 (498.50 it/sec) -training >> step=3595200, episode=600 reward=0.7694129 (403.17 it/sec) -training >> step=3595300, episode=600 reward=0.7801017 (521.05 it/sec) -training >> step=3595400, episode=600 reward=0.7628927 (499.39 it/sec) -training >> step=3595500, episode=600 reward=0.7763312 (458.21 it/sec) -training >> step=3595600, episode=600 reward=0.7841385 (461.86 it/sec) -training >> step=3595700, episode=600 reward=0.787527 (471.01 it/sec) -training >> step=3595800, episode=600 reward=0.7855988 (449.04 it/sec) -training >> step=3595900, episode=600 reward=0.7784988 (410.92 it/sec) -training >> step=3596000, episode=600 reward=0.7710064 (456.35 it/sec) -training >> step=3596100, episode=600 reward=0.7709347 (490.88 it/sec) -training >> step=3596200, episode=600 reward=0.7582151 (517.33 it/sec) -training >> step=3596300, episode=600 reward=0.777042 (477.52 it/sec) -training >> step=3596400, episode=600 reward=0.7976573 (360.27 it/sec) -training >> step=3596500, episode=600 reward=0.7719075 (382.02 it/sec) -training >> step=3596600, episode=600 reward=0.7789809 (406.01 it/sec) -training >> step=3596700, episode=600 reward=0.7818978 (432.66 it/sec) -training >> step=3596800, episode=600 reward=0.7731635 (393.21 it/sec) -training >> step=3596900, episode=600 reward=0.7913113 (406.82 it/sec) -training >> step=3597000, episode=600 reward=0.786185 (459.21 it/sec) -training >> step=3597100, episode=600 reward=0.7733614 (470.48 it/sec) -training >> step=3597200, episode=600 reward=0.793003 (514.79 it/sec) -training >> step=3597300, episode=600 reward=0.7981768 (474.21 it/sec) -training >> step=3597400, episode=600 reward=0.7769878 (499.83 it/sec) -training >> step=3597500, episode=600 reward=0.7813042 (378.77 it/sec) -training >> step=3597600, episode=600 reward=0.7810783 (457.99 it/sec) -training >> step=3597700, episode=600 reward=0.7702019 (456.74 it/sec) -training >> step=3597800, episode=600 reward=0.7708426 (491.62 it/sec) -training >> step=3597900, episode=600 reward=0.7630911 (486.71 it/sec) -training >> step=3598000, episode=600 reward=0.7996569 (458.34 it/sec) -training >> step=3598100, episode=600 reward=0.7507727 (466.18 it/sec) -training >> step=3598200, episode=600 reward=0.7824369 (481.17 it/sec) -training >> step=3598300, episode=600 reward=0.7836024 (446.16 it/sec) -training >> step=3598400, episode=600 reward=0.7828761 (486.76 it/sec) -training >> step=3598500, episode=600 reward=0.7565243 (476.29 it/sec) -training >> step=3598600, episode=600 reward=0.7665671 (499.18 it/sec) -training >> step=3598700, episode=600 reward=0.7833465 (537.89 it/sec) -training >> step=3598800, episode=600 reward=0.7773482 (488.80 it/sec) -training >> step=3598900, episode=600 reward=0.7646537 (499.37 it/sec) -training >> step=3599000, episode=600 reward=0.7742634 (515.30 it/sec) -training >> step=3599100, episode=600 reward=0.7576563 (459.35 it/sec) -training >> step=3599200, episode=600 reward=0.7281343 (401.81 it/sec) -training >> step=3599300, episode=601 reward=0.7514964 (70.00 it/sec) -training >> step=3599400, episode=601 reward=0.7899328 (464.90 it/sec) -training >> step=3599500, episode=601 reward=0.7717053 (489.65 it/sec) -training >> step=3599600, episode=601 reward=0.7727955 (496.98 it/sec) -training >> step=3599700, episode=601 reward=0.7861246 (513.52 it/sec) -training >> step=3599800, episode=601 reward=0.7631764 (530.12 it/sec) -training >> step=3599900, episode=601 reward=0.7744943 (470.01 it/sec) -training >> step=3600000, episode=601 reward=0.7773331 (529.08 it/sec) -training >> step=3600100, episode=601 reward=0.7836967 (497.37 it/sec) -training >> step=3600200, episode=601 reward=0.7777498 (510.69 it/sec) -training >> step=3600300, episode=601 reward=0.7640723 (498.70 it/sec) -training >> step=3600400, episode=601 reward=0.7759053 (515.62 it/sec) -training >> step=3600500, episode=601 reward=0.7901427 (508.56 it/sec) -training >> step=3600600, episode=601 reward=0.801201 (569.65 it/sec) -training >> step=3600700, episode=601 reward=0.8048195 (527.20 it/sec) -training >> step=3600800, episode=601 reward=0.7771912 (538.91 it/sec) -training >> step=3600900, episode=601 reward=0.7811663 (530.10 it/sec) -training >> step=3601000, episode=601 reward=0.772498 (549.56 it/sec) -training >> step=3601100, episode=601 reward=0.773671 (541.11 it/sec) -training >> step=3601200, episode=601 reward=0.7705826 (528.95 it/sec) -training >> step=3601300, episode=601 reward=0.7816231 (550.26 it/sec) -training >> step=3601400, episode=601 reward=0.7615465 (547.83 it/sec) -training >> step=3601500, episode=601 reward=0.7601785 (535.26 it/sec) -training >> step=3601600, episode=601 reward=0.7680877 (503.13 it/sec) -training >> step=3601700, episode=601 reward=0.8011764 (522.79 it/sec) -training >> step=3601800, episode=601 reward=0.7852165 (504.53 it/sec) -training >> step=3601900, episode=601 reward=0.7816402 (564.97 it/sec) -training >> step=3602000, episode=601 reward=0.7702554 (515.39 it/sec) -training >> step=3602100, episode=601 reward=0.7977229 (546.37 it/sec) -training >> step=3602200, episode=601 reward=0.769771 (527.93 it/sec) -training >> step=3602300, episode=601 reward=0.7766455 (517.88 it/sec) -training >> step=3602400, episode=601 reward=0.7661132 (470.11 it/sec) -training >> step=3602500, episode=601 reward=0.778001 (478.83 it/sec) -training >> step=3602600, episode=601 reward=0.7835605 (471.11 it/sec) -training >> step=3602700, episode=601 reward=0.7800059 (468.09 it/sec) -training >> step=3602800, episode=601 reward=0.7921221 (429.01 it/sec) -training >> step=3602900, episode=601 reward=0.7882406 (489.93 it/sec) -training >> step=3603000, episode=601 reward=0.7626488 (481.40 it/sec) -training >> step=3603100, episode=601 reward=0.7948487 (463.50 it/sec) -training >> step=3603200, episode=601 reward=0.7809645 (403.21 it/sec) -training >> step=3603300, episode=601 reward=0.7815793 (486.67 it/sec) -training >> step=3603400, episode=601 reward=0.7721595 (447.46 it/sec) -training >> step=3603500, episode=601 reward=0.784927 (429.54 it/sec) -training >> step=3603600, episode=601 reward=0.7565331 (491.45 it/sec) -training >> step=3603700, episode=601 reward=0.7809999 (499.09 it/sec) -training >> step=3603800, episode=601 reward=0.7740073 (473.95 it/sec) -training >> step=3603900, episode=601 reward=0.766725 (506.55 it/sec) -training >> step=3604000, episode=601 reward=0.7865745 (533.11 it/sec) -training >> step=3604100, episode=601 reward=0.7576941 (426.98 it/sec) -training >> step=3604200, episode=601 reward=0.7585962 (475.95 it/sec) -training >> step=3604300, episode=601 reward=0.7773387 (480.18 it/sec) -training >> step=3604400, episode=601 reward=0.7863379 (538.61 it/sec) -training >> step=3604500, episode=601 reward=0.7741043 (476.24 it/sec) -training >> step=3604600, episode=601 reward=0.7939532 (509.06 it/sec) -training >> step=3604700, episode=601 reward=0.7576107 (471.48 it/sec) -training >> step=3604800, episode=601 reward=0.783315 (446.00 it/sec) -training >> step=3604900, episode=601 reward=0.7777447 (489.27 it/sec) -training >> step=3605000, episode=601 reward=0.7934222 (517.71 it/sec) -training >> step=3605100, episode=601 reward=0.7576191 (557.37 it/sec) -training >> step=3605200, episode=601 reward=0.8048484 (462.32 it/sec) -training >> step=3605300, episode=602 reward=0.7834439 (103.11 it/sec) -training >> step=3605400, episode=602 reward=0.7799084 (491.74 it/sec) -training >> step=3605500, episode=602 reward=0.760084 (538.15 it/sec) -training >> step=3605600, episode=602 reward=0.7640806 (511.05 it/sec) -training >> step=3605700, episode=602 reward=0.7884245 (539.06 it/sec) -training >> step=3605800, episode=602 reward=0.7921334 (484.19 it/sec) -training >> step=3605900, episode=602 reward=0.7870124 (447.96 it/sec) -training >> step=3606000, episode=602 reward=0.7560244 (525.50 it/sec) -training >> step=3606100, episode=602 reward=0.7643163 (494.22 it/sec) -training >> step=3606200, episode=602 reward=0.7666386 (530.94 it/sec) -training >> step=3606300, episode=602 reward=0.7657549 (462.51 it/sec) -training >> step=3606400, episode=602 reward=0.7770163 (440.75 it/sec) -training >> step=3606500, episode=602 reward=0.7574419 (534.47 it/sec) -training >> step=3606600, episode=602 reward=0.7770728 (497.16 it/sec) -training >> step=3606700, episode=602 reward=0.7799857 (487.86 it/sec) -training >> step=3606800, episode=602 reward=0.7812635 (486.33 it/sec) -training >> step=3606900, episode=602 reward=0.7587759 (499.92 it/sec) -training >> step=3607000, episode=602 reward=0.7655749 (415.61 it/sec) -training >> step=3607100, episode=602 reward=0.7601504 (483.85 it/sec) -training >> step=3607200, episode=602 reward=0.7752712 (493.06 it/sec) -training >> step=3607300, episode=602 reward=0.7636428 (532.98 it/sec) -training >> step=3607400, episode=602 reward=0.7871816 (460.68 it/sec) -training >> step=3607500, episode=602 reward=0.7613559 (445.62 it/sec) -training >> step=3607600, episode=602 reward=0.7649231 (444.85 it/sec) -training >> step=3607700, episode=602 reward=0.7542567 (474.46 it/sec) -training >> step=3607800, episode=602 reward=0.7789907 (435.02 it/sec) -training >> step=3607900, episode=602 reward=0.7927857 (432.16 it/sec) -training >> step=3608000, episode=602 reward=0.7707846 (537.05 it/sec) -training >> step=3608100, episode=602 reward=0.7816902 (513.15 it/sec) -training >> step=3608200, episode=602 reward=0.7837545 (463.36 it/sec) -training >> step=3608300, episode=602 reward=0.7707356 (462.03 it/sec) -training >> step=3608400, episode=602 reward=0.7872561 (446.62 it/sec) -training >> step=3608500, episode=602 reward=0.76884 (470.02 it/sec) -training >> step=3608600, episode=602 reward=0.7893904 (495.52 it/sec) -training >> step=3608700, episode=602 reward=0.7702642 (534.71 it/sec) -training >> step=3608800, episode=602 reward=0.7930245 (511.63 it/sec) -training >> step=3608900, episode=602 reward=0.7753139 (526.81 it/sec) -training >> step=3609000, episode=602 reward=0.7723959 (517.73 it/sec) -training >> step=3609100, episode=602 reward=0.7859965 (528.40 it/sec) -training >> step=3609200, episode=602 reward=0.7917901 (537.38 it/sec) -training >> step=3609300, episode=602 reward=0.7670248 (533.76 it/sec) -training >> step=3609400, episode=602 reward=0.7481568 (549.94 it/sec) -training >> step=3609500, episode=602 reward=0.7829082 (526.31 it/sec) -training >> step=3609600, episode=602 reward=0.7622488 (543.25 it/sec) -training >> step=3609700, episode=602 reward=0.7736754 (528.47 it/sec) -training >> step=3609800, episode=602 reward=0.7776042 (551.32 it/sec) -training >> step=3609900, episode=602 reward=0.7806334 (538.54 it/sec) -training >> step=3610000, episode=602 reward=0.7821735 (508.48 it/sec) -training >> step=3610100, episode=602 reward=0.7653545 (551.89 it/sec) -training >> step=3610200, episode=602 reward=0.7702848 (537.90 it/sec) -training >> step=3610300, episode=602 reward=0.753589 (538.22 it/sec) -training >> step=3610400, episode=602 reward=0.7577558 (520.14 it/sec) -training >> step=3610500, episode=602 reward=0.7805375 (584.42 it/sec) -training >> step=3610600, episode=602 reward=0.7794081 (534.36 it/sec) -training >> step=3610700, episode=602 reward=0.7737339 (548.26 it/sec) -training >> step=3610800, episode=602 reward=0.7664591 (523.97 it/sec) -training >> step=3610900, episode=602 reward=0.7878307 (576.04 it/sec) -training >> step=3611000, episode=602 reward=0.7764863 (520.43 it/sec) -training >> step=3611100, episode=602 reward=0.7749624 (493.95 it/sec) -training >> step=3611200, episode=602 reward=0.7634904 (507.82 it/sec) -training >> step=3611300, episode=603 reward=0.7765704 (74.95 it/sec) -training >> step=3611400, episode=603 reward=0.7854609 (529.84 it/sec) -training >> step=3611500, episode=603 reward=0.7832797 (485.69 it/sec) -training >> step=3611600, episode=603 reward=0.7744685 (504.95 it/sec) -training >> step=3611700, episode=603 reward=0.7982981 (522.06 it/sec) -training >> step=3611800, episode=603 reward=0.7711363 (501.30 it/sec) -training >> step=3611900, episode=603 reward=0.7806427 (537.52 it/sec) -training >> step=3612000, episode=603 reward=0.7771016 (500.86 it/sec) -training >> step=3612100, episode=603 reward=0.7788376 (517.65 it/sec) -training >> step=3612200, episode=603 reward=0.7746719 (553.70 it/sec) -training >> step=3612300, episode=603 reward=0.7536615 (498.34 it/sec) -training >> step=3612400, episode=603 reward=0.7599625 (567.50 it/sec) -training >> step=3612500, episode=603 reward=0.7868505 (508.34 it/sec) -training >> step=3612600, episode=603 reward=0.7638588 (540.04 it/sec) -training >> step=3612700, episode=603 reward=0.7909347 (504.58 it/sec) -training >> step=3612800, episode=603 reward=0.7684986 (568.61 it/sec) -training >> step=3612900, episode=603 reward=0.7772223 (490.34 it/sec) -training >> step=3613000, episode=603 reward=0.7654793 (504.53 it/sec) -training >> step=3613100, episode=603 reward=0.7667682 (567.11 it/sec) -training >> step=3613200, episode=603 reward=0.7637077 (526.53 it/sec) -training >> step=3613300, episode=603 reward=0.7624003 (549.96 it/sec) -training >> step=3613400, episode=603 reward=0.7867371 (524.32 it/sec) -training >> step=3613500, episode=603 reward=0.7502602 (560.41 it/sec) -training >> step=3613600, episode=603 reward=0.7752319 (517.28 it/sec) -training >> step=3613700, episode=603 reward=0.7644878 (534.60 it/sec) -training >> step=3613800, episode=603 reward=0.7694194 (499.66 it/sec) -training >> step=3613900, episode=603 reward=0.7792839 (572.30 it/sec) -training >> step=3614000, episode=603 reward=0.7640582 (531.61 it/sec) -training >> step=3614100, episode=603 reward=0.7682272 (547.77 it/sec) -training >> step=3614200, episode=603 reward=0.7801835 (561.37 it/sec) -training >> step=3614300, episode=603 reward=0.7919272 (507.50 it/sec) -training >> step=3614400, episode=603 reward=0.7889192 (556.57 it/sec) -training >> step=3614500, episode=603 reward=0.7749819 (532.61 it/sec) -training >> step=3614600, episode=603 reward=0.8036383 (550.61 it/sec) -training >> step=3614700, episode=603 reward=0.7779968 (542.72 it/sec) -training >> step=3614800, episode=603 reward=0.7525573 (547.12 it/sec) -training >> step=3614900, episode=603 reward=0.7815987 (540.69 it/sec) -training >> step=3615000, episode=603 reward=0.7695777 (552.69 it/sec) -training >> step=3615100, episode=603 reward=0.7849846 (499.45 it/sec) -training >> step=3615200, episode=603 reward=0.7772961 (485.76 it/sec) -training >> step=3615300, episode=603 reward=0.7564257 (558.62 it/sec) -training >> step=3615400, episode=603 reward=0.7938883 (513.68 it/sec) -training >> step=3615500, episode=603 reward=0.7831924 (518.68 it/sec) -training >> step=3615600, episode=603 reward=0.7846926 (550.73 it/sec) -training >> step=3615700, episode=603 reward=0.7666501 (525.09 it/sec) -training >> step=3615800, episode=603 reward=0.7828865 (502.20 it/sec) -training >> step=3615900, episode=603 reward=0.7828888 (439.58 it/sec) -training >> step=3616000, episode=603 reward=0.7833714 (517.81 it/sec) -training >> step=3616100, episode=603 reward=0.7582144 (474.12 it/sec) -training >> step=3616200, episode=603 reward=0.7621594 (494.25 it/sec) -training >> step=3616300, episode=603 reward=0.7626728 (534.54 it/sec) -training >> step=3616400, episode=603 reward=0.7572854 (520.84 it/sec) -training >> step=3616500, episode=603 reward=0.7791141 (517.49 it/sec) -training >> step=3616600, episode=603 reward=0.7760209 (548.50 it/sec) -training >> step=3616700, episode=603 reward=0.7635414 (533.71 it/sec) -training >> step=3616800, episode=603 reward=0.7728217 (506.10 it/sec) -training >> step=3616900, episode=603 reward=0.7545254 (532.60 it/sec) -training >> step=3617000, episode=603 reward=0.7723048 (507.99 it/sec) -training >> step=3617100, episode=603 reward=0.7643179 (504.18 it/sec) -training >> step=3617200, episode=603 reward=0.7840214 (526.22 it/sec) -training >> step=3617300, episode=604 reward=0.7721202 (112.08 it/sec) -training >> step=3617400, episode=604 reward=0.7960984 (457.66 it/sec) -training >> step=3617500, episode=604 reward=0.773636 (525.67 it/sec) -training >> step=3617600, episode=604 reward=0.7943757 (496.76 it/sec) -training >> step=3617700, episode=604 reward=0.7787441 (508.23 it/sec) -training >> step=3617800, episode=604 reward=0.7715245 (546.54 it/sec) -training >> step=3617900, episode=604 reward=0.798862 (488.97 it/sec) -training >> step=3618000, episode=604 reward=0.7533436 (500.63 it/sec) -training >> step=3618100, episode=604 reward=0.77901 (521.46 it/sec) -training >> step=3618200, episode=604 reward=0.7513302 (508.65 it/sec) -training >> step=3618300, episode=604 reward=0.7750922 (547.91 it/sec) -training >> step=3618400, episode=604 reward=0.7678706 (522.56 it/sec) -training >> step=3618500, episode=604 reward=0.7546683 (541.01 it/sec) -training >> step=3618600, episode=604 reward=0.7891032 (502.58 it/sec) -training >> step=3618700, episode=604 reward=0.7691835 (478.44 it/sec) -training >> step=3618800, episode=604 reward=0.7677727 (538.14 it/sec) -training >> step=3618900, episode=604 reward=0.7846166 (528.99 it/sec) -training >> step=3619000, episode=604 reward=0.7427052 (480.70 it/sec) -training >> step=3619100, episode=604 reward=0.7658903 (504.63 it/sec) -training >> step=3619200, episode=604 reward=0.7770215 (505.14 it/sec) -training >> step=3619300, episode=604 reward=0.7847064 (486.87 it/sec) -training >> step=3619400, episode=604 reward=0.7602714 (518.87 it/sec) -training >> step=3619500, episode=604 reward=0.7924544 (500.41 it/sec) -training >> step=3619600, episode=604 reward=0.787877 (527.61 it/sec) -training >> step=3619700, episode=604 reward=0.7797065 (517.16 it/sec) -training >> step=3619800, episode=604 reward=0.7780167 (510.85 it/sec) -training >> step=3619900, episode=604 reward=0.7492142 (529.36 it/sec) -training >> step=3620000, episode=604 reward=0.7741237 (482.89 it/sec) -training >> step=3620100, episode=604 reward=0.7741287 (484.68 it/sec) -training >> step=3620200, episode=604 reward=0.7532488 (559.07 it/sec) -training >> step=3620300, episode=604 reward=0.7665266 (461.18 it/sec) -training >> step=3620400, episode=604 reward=0.7829715 (498.94 it/sec) -training >> step=3620500, episode=604 reward=0.7671306 (501.50 it/sec) -training >> step=3620600, episode=604 reward=0.7764638 (499.44 it/sec) -training >> step=3620700, episode=604 reward=0.7897958 (509.96 it/sec) -training >> step=3620800, episode=604 reward=0.7733096 (503.32 it/sec) -training >> step=3620900, episode=604 reward=0.7770834 (552.26 it/sec) -training >> step=3621000, episode=604 reward=0.7763299 (495.27 it/sec) -training >> step=3621100, episode=604 reward=0.7696297 (505.80 it/sec) -training >> step=3621200, episode=604 reward=0.7884237 (502.04 it/sec) -training >> step=3621300, episode=604 reward=0.7656357 (521.36 it/sec) -training >> step=3621400, episode=604 reward=0.7745991 (508.94 it/sec) -training >> step=3621500, episode=604 reward=0.7735493 (516.88 it/sec) -training >> step=3621600, episode=604 reward=0.7723152 (490.01 it/sec) -training >> step=3621700, episode=604 reward=0.7448663 (498.73 it/sec) -training >> step=3621800, episode=604 reward=0.786581 (477.93 it/sec) -training >> step=3621900, episode=604 reward=0.7816072 (482.79 it/sec) -training >> step=3622000, episode=604 reward=0.7544824 (549.82 it/sec) -training >> step=3622100, episode=604 reward=0.783534 (476.41 it/sec) -training >> step=3622200, episode=604 reward=0.7777487 (501.01 it/sec) -training >> step=3622300, episode=604 reward=0.769048 (544.88 it/sec) -training >> step=3622400, episode=604 reward=0.7844318 (517.46 it/sec) -training >> step=3622500, episode=604 reward=0.7656748 (547.32 it/sec) -training >> step=3622600, episode=604 reward=0.7668468 (487.18 it/sec) -training >> step=3622700, episode=604 reward=0.7627667 (523.48 it/sec) -training >> step=3622800, episode=604 reward=0.7606541 (493.64 it/sec) -training >> step=3622900, episode=604 reward=0.7836984 (501.77 it/sec) -training >> step=3623000, episode=604 reward=0.7619412 (525.16 it/sec) -training >> step=3623100, episode=604 reward=0.7306973 (504.86 it/sec) -training >> step=3623200, episode=604 reward=0.7734783 (510.40 it/sec) -training >> step=3623300, episode=605 reward=0.7630147 (91.85 it/sec) -training >> step=3623400, episode=605 reward=0.7640448 (523.04 it/sec) -training >> step=3623500, episode=605 reward=0.7681293 (498.67 it/sec) -training >> step=3623600, episode=605 reward=0.7707 (516.83 it/sec) -training >> step=3623700, episode=605 reward=0.771126 (475.73 it/sec) -training >> step=3623800, episode=605 reward=0.7689015 (534.61 it/sec) -training >> step=3623900, episode=605 reward=0.7845924 (468.74 it/sec) -training >> step=3624000, episode=605 reward=0.8055921 (484.86 it/sec) -training >> step=3624100, episode=605 reward=0.7737945 (450.64 it/sec) -training >> step=3624200, episode=605 reward=0.7811332 (524.60 it/sec) -training >> step=3624300, episode=605 reward=0.7821572 (513.83 it/sec) -training >> step=3624400, episode=605 reward=0.7661723 (517.65 it/sec) -training >> step=3624500, episode=605 reward=0.7646903 (497.66 it/sec) -training >> step=3624600, episode=605 reward=0.7685827 (517.65 it/sec) -training >> step=3624700, episode=605 reward=0.7583236 (511.41 it/sec) -training >> step=3624800, episode=605 reward=0.7860429 (511.70 it/sec) -training >> step=3624900, episode=605 reward=0.7792669 (562.85 it/sec) -training >> step=3625000, episode=605 reward=0.7803129 (506.83 it/sec) -training >> step=3625100, episode=605 reward=0.7611604 (475.68 it/sec) -training >> step=3625200, episode=605 reward=0.7806656 (516.72 it/sec) -training >> step=3625300, episode=605 reward=0.7435198 (490.99 it/sec) -training >> step=3625400, episode=605 reward=0.7611166 (500.97 it/sec) -training >> step=3625500, episode=605 reward=0.7786537 (543.85 it/sec) -training >> step=3625600, episode=605 reward=0.7894376 (502.46 it/sec) -training >> step=3625700, episode=605 reward=0.7733171 (438.55 it/sec) -training >> step=3625800, episode=605 reward=0.7790453 (485.65 it/sec) -training >> step=3625900, episode=605 reward=0.7608126 (511.76 it/sec) -training >> step=3626000, episode=605 reward=0.7853857 (511.12 it/sec) -training >> step=3626100, episode=605 reward=0.7821887 (476.36 it/sec) -training >> step=3626200, episode=605 reward=0.7806661 (528.41 it/sec) -training >> step=3626300, episode=605 reward=0.7799156 (496.49 it/sec) -training >> step=3626400, episode=605 reward=0.7929897 (467.96 it/sec) -training >> step=3626500, episode=605 reward=0.779336 (512.31 it/sec) -training >> step=3626600, episode=605 reward=0.7935207 (504.88 it/sec) -training >> step=3626700, episode=605 reward=0.7738166 (556.76 it/sec) -training >> step=3626800, episode=605 reward=0.7992365 (559.72 it/sec) -training >> step=3626900, episode=605 reward=0.7821288 (573.23 it/sec) -training >> step=3627000, episode=605 reward=0.7911629 (565.60 it/sec) -training >> step=3627100, episode=605 reward=0.76786 (551.13 it/sec) -training >> step=3627200, episode=605 reward=0.7731333 (541.11 it/sec) -training >> step=3627300, episode=605 reward=0.7790989 (569.89 it/sec) -training >> step=3627400, episode=605 reward=0.773836 (534.46 it/sec) -training >> step=3627500, episode=605 reward=0.7547533 (539.73 it/sec) -training >> step=3627600, episode=605 reward=0.781339 (593.30 it/sec) -training >> step=3627700, episode=605 reward=0.7682636 (557.13 it/sec) -training >> step=3627800, episode=605 reward=0.760424 (485.70 it/sec) -training >> step=3627900, episode=605 reward=0.7742291 (556.34 it/sec) -training >> step=3628000, episode=605 reward=0.7786847 (575.85 it/sec) -training >> step=3628100, episode=605 reward=0.7614567 (542.76 it/sec) -training >> step=3628200, episode=605 reward=0.7539228 (527.88 it/sec) -training >> step=3628300, episode=605 reward=0.7879197 (534.48 it/sec) -training >> step=3628400, episode=605 reward=0.7799316 (584.65 it/sec) -training >> step=3628500, episode=605 reward=0.7706425 (505.35 it/sec) -training >> step=3628600, episode=605 reward=0.7540783 (533.22 it/sec) -training >> step=3628700, episode=605 reward=0.7742916 (531.61 it/sec) -training >> step=3628800, episode=605 reward=0.7630951 (548.58 it/sec) -training >> step=3628900, episode=605 reward=0.7851452 (498.29 it/sec) -training >> step=3629000, episode=605 reward=0.7775392 (532.44 it/sec) -training >> step=3629100, episode=605 reward=0.7592495 (499.30 it/sec) -training >> step=3629200, episode=605 reward=0.7697026 (540.16 it/sec) -training >> step=3629300, episode=606 reward=0.7534235 (100.59 it/sec) -training >> step=3629400, episode=606 reward=0.7704295 (572.50 it/sec) -training >> step=3629500, episode=606 reward=0.7703639 (558.72 it/sec) -training >> step=3629600, episode=606 reward=0.785104 (513.20 it/sec) -training >> step=3629700, episode=606 reward=0.7603415 (558.11 it/sec) -training >> step=3629800, episode=606 reward=0.7843251 (539.94 it/sec) -training >> step=3629900, episode=606 reward=0.7811912 (538.38 it/sec) -training >> step=3630000, episode=606 reward=0.7790706 (562.85 it/sec) -training >> step=3630100, episode=606 reward=0.7953759 (541.31 it/sec) -training >> step=3630200, episode=606 reward=0.7651028 (546.59 it/sec) -training >> step=3630300, episode=606 reward=0.7795319 (534.39 it/sec) -training >> step=3630400, episode=606 reward=0.7633784 (556.97 it/sec) -training >> step=3630500, episode=606 reward=0.7772424 (531.45 it/sec) -training >> step=3630600, episode=606 reward=0.7726776 (551.21 it/sec) -training >> step=3630700, episode=606 reward=0.7691535 (522.10 it/sec) -training >> step=3630800, episode=606 reward=0.7810838 (559.89 it/sec) -training >> step=3630900, episode=606 reward=0.7737594 (522.34 it/sec) -training >> step=3631000, episode=606 reward=0.7691939 (534.50 it/sec) -training >> step=3631100, episode=606 reward=0.7854323 (562.60 it/sec) -training >> step=3631200, episode=606 reward=0.7542999 (527.37 it/sec) -training >> step=3631300, episode=606 reward=0.7903365 (550.76 it/sec) -training >> step=3631400, episode=606 reward=0.7634119 (463.45 it/sec) -training >> step=3631500, episode=606 reward=0.761562 (544.24 it/sec) -training >> step=3631600, episode=606 reward=0.7848724 (532.47 it/sec) -training >> step=3631700, episode=606 reward=0.7816439 (544.02 it/sec) -training >> step=3631800, episode=606 reward=0.7660184 (501.04 it/sec) -training >> step=3631900, episode=606 reward=0.7694299 (557.38 it/sec) -training >> step=3632000, episode=606 reward=0.7541015 (556.30 it/sec) -training >> step=3632100, episode=606 reward=0.7605385 (558.71 it/sec) -training >> step=3632200, episode=606 reward=0.7568011 (562.54 it/sec) -training >> step=3632300, episode=606 reward=0.7884185 (473.20 it/sec) -training >> step=3632400, episode=606 reward=0.757349 (533.15 it/sec) -training >> step=3632500, episode=606 reward=0.7591416 (514.05 it/sec) -training >> step=3632600, episode=606 reward=0.7898062 (583.83 it/sec) -training >> step=3632700, episode=606 reward=0.7763852 (553.19 it/sec) -training >> step=3632800, episode=606 reward=0.7787217 (551.66 it/sec) -training >> step=3632900, episode=606 reward=0.7890795 (551.50 it/sec) -training >> step=3633000, episode=606 reward=0.772307 (578.48 it/sec) -training >> step=3633100, episode=606 reward=0.7525876 (528.58 it/sec) -training >> step=3633200, episode=606 reward=0.7795906 (573.43 it/sec) -training >> step=3633300, episode=606 reward=0.7726526 (576.70 it/sec) -training >> step=3633400, episode=606 reward=0.7636999 (528.55 it/sec) -training >> step=3633500, episode=606 reward=0.7806808 (506.65 it/sec) -training >> step=3633600, episode=606 reward=0.7610193 (511.36 it/sec) -training >> step=3633700, episode=606 reward=0.7652988 (571.97 it/sec) -training >> step=3633800, episode=606 reward=0.7494187 (552.66 it/sec) -training >> step=3633900, episode=606 reward=0.7912341 (569.11 it/sec) -training >> step=3634000, episode=606 reward=0.7453334 (558.56 it/sec) -training >> step=3634100, episode=606 reward=0.7712313 (550.37 it/sec) -training >> step=3634200, episode=606 reward=0.793275 (537.25 it/sec) -training >> step=3634300, episode=606 reward=0.7596764 (536.60 it/sec) -training >> step=3634400, episode=606 reward=0.7692168 (586.04 it/sec) -training >> step=3634500, episode=606 reward=0.777746 (543.77 it/sec) -training >> step=3634600, episode=606 reward=0.78306 (495.92 it/sec) -training >> step=3634700, episode=606 reward=0.7567954 (551.70 it/sec) -training >> step=3634800, episode=606 reward=0.7458568 (534.20 it/sec) -training >> step=3634900, episode=606 reward=0.7574964 (520.78 it/sec) -training >> step=3635000, episode=606 reward=0.7851787 (558.62 it/sec) -training >> step=3635100, episode=606 reward=0.7850758 (529.08 it/sec) -training >> step=3635200, episode=606 reward=0.7512541 (552.59 it/sec) -training >> step=3635300, episode=607 reward=0.7750558 (116.35 it/sec) -training >> step=3635400, episode=607 reward=0.7580905 (597.49 it/sec) -training >> step=3635500, episode=607 reward=0.7950467 (543.44 it/sec) -training >> step=3635600, episode=607 reward=0.7927142 (544.43 it/sec) -training >> step=3635700, episode=607 reward=0.7867667 (557.53 it/sec) -training >> step=3635800, episode=607 reward=0.7736863 (580.55 it/sec) -training >> step=3635900, episode=607 reward=0.7507817 (492.92 it/sec) -training >> step=3636000, episode=607 reward=0.7789221 (570.15 it/sec) -training >> step=3636100, episode=607 reward=0.7784727 (508.37 it/sec) -training >> step=3636200, episode=607 reward=0.7577069 (591.09 it/sec) -training >> step=3636300, episode=607 reward=0.7917354 (536.47 it/sec) -training >> step=3636400, episode=607 reward=0.7791095 (540.87 it/sec) -training >> step=3636500, episode=607 reward=0.7654185 (479.35 it/sec) -training >> step=3636600, episode=607 reward=0.7690976 (505.41 it/sec) -training >> step=3636700, episode=607 reward=0.7877783 (539.93 it/sec) -training >> step=3636800, episode=607 reward=0.7942505 (543.53 it/sec) -training >> step=3636900, episode=607 reward=0.777651 (570.00 it/sec) -training >> step=3637000, episode=607 reward=0.754499 (526.11 it/sec) -training >> step=3637100, episode=607 reward=0.7934839 (543.56 it/sec) -training >> step=3637200, episode=607 reward=0.7889616 (567.20 it/sec) -training >> step=3637300, episode=607 reward=0.7762607 (546.01 it/sec) -training >> step=3637400, episode=607 reward=0.7642469 (558.69 it/sec) -training >> step=3637500, episode=607 reward=0.7734092 (544.40 it/sec) -training >> step=3637600, episode=607 reward=0.7631557 (528.02 it/sec) -training >> step=3637700, episode=607 reward=0.7715192 (505.86 it/sec) -training >> step=3637800, episode=607 reward=0.7539076 (528.11 it/sec) -training >> step=3637900, episode=607 reward=0.7658678 (554.64 it/sec) -training >> step=3638000, episode=607 reward=0.7873563 (581.12 it/sec) -training >> step=3638100, episode=607 reward=0.7826431 (508.70 it/sec) -training >> step=3638200, episode=607 reward=0.7595083 (557.24 it/sec) -training >> step=3638300, episode=607 reward=0.7695425 (522.91 it/sec) -training >> step=3638400, episode=607 reward=0.7675853 (514.10 it/sec) -training >> step=3638500, episode=607 reward=0.7811072 (531.55 it/sec) -training >> step=3638600, episode=607 reward=0.7777209 (539.60 it/sec) -training >> step=3638700, episode=607 reward=0.7610099 (489.17 it/sec) -training >> step=3638800, episode=607 reward=0.8014088 (498.40 it/sec) -training >> step=3638900, episode=607 reward=0.7605755 (528.91 it/sec) -training >> step=3639000, episode=607 reward=0.758968 (573.10 it/sec) -training >> step=3639100, episode=607 reward=0.7714511 (571.26 it/sec) -training >> step=3639200, episode=607 reward=0.7854881 (520.52 it/sec) -training >> step=3639300, episode=607 reward=0.7814085 (548.19 it/sec) -training >> step=3639400, episode=607 reward=0.76306 (520.82 it/sec) -training >> step=3639500, episode=607 reward=0.7704331 (520.25 it/sec) -training >> step=3639600, episode=607 reward=0.794081 (518.55 it/sec) -training >> step=3639700, episode=607 reward=0.7587792 (542.14 it/sec) -training >> step=3639800, episode=607 reward=0.7851455 (545.60 it/sec) -training >> step=3639900, episode=607 reward=0.7834196 (428.13 it/sec) -training >> step=3640000, episode=607 reward=0.7666955 (496.26 it/sec) -training >> step=3640100, episode=607 reward=0.7868463 (508.02 it/sec) -training >> step=3640200, episode=607 reward=0.7618001 (518.69 it/sec) -training >> step=3640300, episode=607 reward=0.7669514 (489.31 it/sec) -training >> step=3640400, episode=607 reward=0.7314458 (507.33 it/sec) -training >> step=3640500, episode=607 reward=0.7672396 (523.90 it/sec) -training >> step=3640600, episode=607 reward=0.7678545 (496.90 it/sec) -training >> step=3640700, episode=607 reward=0.7631912 (475.86 it/sec) -training >> step=3640800, episode=607 reward=0.7882243 (517.21 it/sec) -training >> step=3640900, episode=607 reward=0.7699631 (505.25 it/sec) -training >> step=3641000, episode=607 reward=0.7683145 (490.65 it/sec) -training >> step=3641100, episode=607 reward=0.7885033 (502.92 it/sec) -training >> step=3641200, episode=607 reward=0.7533017 (544.87 it/sec) -training >> step=3641300, episode=608 reward=0.7777899 (101.30 it/sec) -training >> step=3641400, episode=608 reward=0.7714457 (482.19 it/sec) -training >> step=3641500, episode=608 reward=0.770332 (469.64 it/sec) -training >> step=3641600, episode=608 reward=0.7756736 (496.79 it/sec) -training >> step=3641700, episode=608 reward=0.7830027 (484.81 it/sec) -training >> step=3641800, episode=608 reward=0.766777 (512.78 it/sec) -training >> step=3641900, episode=608 reward=0.791395 (524.55 it/sec) -training >> step=3642000, episode=608 reward=0.7868406 (498.68 it/sec) -training >> step=3642100, episode=608 reward=0.7520501 (502.18 it/sec) -training >> step=3642200, episode=608 reward=0.7613316 (455.79 it/sec) -training >> step=3642300, episode=608 reward=0.770564 (538.33 it/sec) -training >> step=3642400, episode=608 reward=0.757648 (500.25 it/sec) -training >> step=3642500, episode=608 reward=0.7449459 (498.29 it/sec) -training >> step=3642600, episode=608 reward=0.7803937 (508.23 it/sec) -training >> step=3642700, episode=608 reward=0.7717918 (510.94 it/sec) -training >> step=3642800, episode=608 reward=0.7813895 (491.89 it/sec) -training >> step=3642900, episode=608 reward=0.7939007 (483.30 it/sec) -training >> step=3643000, episode=608 reward=0.7623168 (481.33 it/sec) -training >> step=3643100, episode=608 reward=0.7910848 (417.26 it/sec) -training >> step=3643200, episode=608 reward=0.7590222 (505.72 it/sec) -training >> step=3643300, episode=608 reward=0.7885737 (496.17 it/sec) -training >> step=3643400, episode=608 reward=0.7686683 (473.18 it/sec) -training >> step=3643500, episode=608 reward=0.7802035 (482.39 it/sec) -training >> step=3643600, episode=608 reward=0.7631543 (498.45 it/sec) -training >> step=3643700, episode=608 reward=0.7885097 (528.82 it/sec) -training >> step=3643800, episode=608 reward=0.7633683 (462.36 it/sec) -training >> step=3643900, episode=608 reward=0.7526892 (471.75 it/sec) -training >> step=3644000, episode=608 reward=0.7672178 (488.93 it/sec) -training >> step=3644100, episode=608 reward=0.7695864 (473.33 it/sec) -training >> step=3644200, episode=608 reward=0.7635147 (503.58 it/sec) -training >> step=3644300, episode=608 reward=0.7667826 (513.47 it/sec) -training >> step=3644400, episode=608 reward=0.7646134 (483.24 it/sec) -training >> step=3644500, episode=608 reward=0.7899144 (490.00 it/sec) -training >> step=3644600, episode=608 reward=0.7667215 (486.55 it/sec) -training >> step=3644700, episode=608 reward=0.7670119 (482.71 it/sec) -training >> step=3644800, episode=608 reward=0.7728437 (551.66 it/sec) -training >> step=3644900, episode=608 reward=0.7805179 (469.23 it/sec) -training >> step=3645000, episode=608 reward=0.7626663 (487.73 it/sec) -training >> step=3645100, episode=608 reward=0.7795997 (519.94 it/sec) -training >> step=3645200, episode=608 reward=0.7728211 (499.91 it/sec) -training >> step=3645300, episode=608 reward=0.7734321 (508.69 it/sec) -training >> step=3645400, episode=608 reward=0.7744623 (486.78 it/sec) -training >> step=3645500, episode=608 reward=0.7907093 (528.68 it/sec) -training >> step=3645600, episode=608 reward=0.7472486 (490.65 it/sec) -training >> step=3645700, episode=608 reward=0.7836332 (507.01 it/sec) -training >> step=3645800, episode=608 reward=0.7582443 (513.56 it/sec) -training >> step=3645900, episode=608 reward=0.7663428 (535.73 it/sec) -training >> step=3646000, episode=608 reward=0.7648997 (474.13 it/sec) -training >> step=3646100, episode=608 reward=0.7883758 (488.12 it/sec) -training >> step=3646200, episode=608 reward=0.7868524 (520.83 it/sec) -training >> step=3646300, episode=608 reward=0.7792302 (491.30 it/sec) -training >> step=3646400, episode=608 reward=0.7792853 (496.99 it/sec) -training >> step=3646500, episode=608 reward=0.7764819 (489.34 it/sec) -training >> step=3646600, episode=608 reward=0.7753029 (533.15 it/sec) -training >> step=3646700, episode=608 reward=0.7648846 (481.27 it/sec) -training >> step=3646800, episode=608 reward=0.7796289 (519.09 it/sec) -training >> step=3646900, episode=608 reward=0.7634009 (534.92 it/sec) -training >> step=3647000, episode=608 reward=0.7608605 (506.35 it/sec) -training >> step=3647100, episode=608 reward=0.7711958 (470.35 it/sec) -training >> step=3647200, episode=608 reward=0.7679543 (497.83 it/sec) -training >> step=3647300, episode=609 reward=0.7758473 (135.10 it/sec) -training >> step=3647400, episode=609 reward=0.768939 (481.96 it/sec) -training >> step=3647500, episode=609 reward=0.7818162 (472.80 it/sec) -training >> step=3647600, episode=609 reward=0.7751039 (480.89 it/sec) -training >> step=3647700, episode=609 reward=0.7618777 (539.39 it/sec) -training >> step=3647800, episode=609 reward=0.7660635 (453.58 it/sec) -training >> step=3647900, episode=609 reward=0.806276 (456.91 it/sec) -training >> step=3648000, episode=609 reward=0.76802 (452.93 it/sec) -training >> step=3648100, episode=609 reward=0.7646085 (463.99 it/sec) -training >> step=3648200, episode=609 reward=0.7717535 (491.24 it/sec) -training >> step=3648300, episode=609 reward=0.7835131 (434.87 it/sec) -training >> step=3648400, episode=609 reward=0.7601255 (520.47 it/sec) -training >> step=3648500, episode=609 reward=0.7729575 (496.52 it/sec) -training >> step=3648600, episode=609 reward=0.7699845 (528.42 it/sec) -training >> step=3648700, episode=609 reward=0.7623702 (412.78 it/sec) -training >> step=3648800, episode=609 reward=0.7728704 (472.76 it/sec) -training >> step=3648900, episode=609 reward=0.7979651 (511.15 it/sec) -training >> step=3649000, episode=609 reward=0.7713643 (474.27 it/sec) -training >> step=3649100, episode=609 reward=0.7864632 (492.23 it/sec) -training >> step=3649200, episode=609 reward=0.7672933 (501.54 it/sec) -training >> step=3649300, episode=609 reward=0.7843984 (508.37 it/sec) -training >> step=3649400, episode=609 reward=0.7639066 (477.96 it/sec) -training >> step=3649500, episode=609 reward=0.7585651 (480.41 it/sec) -training >> step=3649600, episode=609 reward=0.772028 (460.43 it/sec) -training >> step=3649700, episode=609 reward=0.7832392 (510.37 it/sec) -training >> step=3649800, episode=609 reward=0.7807016 (475.65 it/sec) -training >> step=3649900, episode=609 reward=0.7543954 (492.49 it/sec) -training >> step=3650000, episode=609 reward=0.7982519 (528.05 it/sec) -training >> step=3650100, episode=609 reward=0.757923 (525.45 it/sec) -training >> step=3650200, episode=609 reward=0.7639635 (490.51 it/sec) -training >> step=3650300, episode=609 reward=0.7879848 (489.04 it/sec) -training >> step=3650400, episode=609 reward=0.7895559 (525.99 it/sec) -training >> step=3650500, episode=609 reward=0.7814883 (482.66 it/sec) -training >> step=3650600, episode=609 reward=0.7860515 (518.68 it/sec) -training >> step=3650700, episode=609 reward=0.767538 (470.82 it/sec) -training >> step=3650800, episode=609 reward=0.7570205 (459.29 it/sec) -training >> step=3650900, episode=609 reward=0.7780374 (475.86 it/sec) -training >> step=3651000, episode=609 reward=0.7769492 (460.93 it/sec) -training >> step=3651100, episode=609 reward=0.8011392 (476.51 it/sec) -training >> step=3651200, episode=609 reward=0.7758988 (462.53 it/sec) -training >> step=3651300, episode=609 reward=0.756187 (472.83 it/sec) -training >> step=3651400, episode=609 reward=0.7785029 (511.74 it/sec) -training >> step=3651500, episode=609 reward=0.7658443 (514.08 it/sec) -training >> step=3651600, episode=609 reward=0.7680824 (497.96 it/sec) -training >> step=3651700, episode=609 reward=0.7853916 (487.68 it/sec) -training >> step=3651800, episode=609 reward=0.761029 (483.26 it/sec) -training >> step=3651900, episode=609 reward=0.7787294 (504.12 it/sec) -training >> step=3652000, episode=609 reward=0.7909423 (461.52 it/sec) -training >> step=3652100, episode=609 reward=0.7673427 (431.34 it/sec) -training >> step=3652200, episode=609 reward=0.8007481 (462.13 it/sec) -training >> step=3652300, episode=609 reward=0.7708057 (496.68 it/sec) -training >> step=3652400, episode=609 reward=0.7770407 (519.12 it/sec) -training >> step=3652500, episode=609 reward=0.793872 (499.08 it/sec) -training >> step=3652600, episode=609 reward=0.7702278 (516.23 it/sec) -training >> step=3652700, episode=609 reward=0.7856305 (435.59 it/sec) -training >> step=3652800, episode=609 reward=0.761808 (483.44 it/sec) -training >> step=3652900, episode=609 reward=0.7654344 (535.46 it/sec) -training >> step=3653000, episode=609 reward=0.7787851 (509.34 it/sec) -training >> step=3653100, episode=609 reward=0.7614754 (508.38 it/sec) -training >> step=3653200, episode=609 reward=0.7890213 (494.37 it/sec) -training >> step=3653300, episode=610 reward=0.7651054 (143.74 it/sec) -training >> step=3653400, episode=610 reward=0.7635291 (497.12 it/sec) -training >> step=3653500, episode=610 reward=0.7819936 (494.63 it/sec) -training >> step=3653600, episode=610 reward=0.7500886 (502.87 it/sec) -training >> step=3653700, episode=610 reward=0.7605735 (512.73 it/sec) -training >> step=3653800, episode=610 reward=0.7822936 (490.83 it/sec) -training >> step=3653900, episode=610 reward=0.8035315 (473.91 it/sec) -training >> step=3654000, episode=610 reward=0.7795282 (498.80 it/sec) -training >> step=3654100, episode=610 reward=0.7623005 (469.99 it/sec) -training >> step=3654200, episode=610 reward=0.7948611 (503.11 it/sec) -training >> step=3654300, episode=610 reward=0.7592875 (512.92 it/sec) -training >> step=3654400, episode=610 reward=0.7587757 (501.59 it/sec) -training >> step=3654500, episode=610 reward=0.7792888 (496.27 it/sec) -training >> step=3654600, episode=610 reward=0.7829898 (487.46 it/sec) -training >> step=3654700, episode=610 reward=0.7751242 (494.49 it/sec) -training >> step=3654800, episode=610 reward=0.7871324 (479.75 it/sec) -training >> step=3654900, episode=610 reward=0.7621538 (511.48 it/sec) -training >> step=3655000, episode=610 reward=0.7693599 (497.10 it/sec) -training >> step=3655100, episode=610 reward=0.7921299 (520.25 it/sec) -training >> step=3655200, episode=610 reward=0.7880856 (511.60 it/sec) -training >> step=3655300, episode=610 reward=0.7785568 (497.12 it/sec) -training >> step=3655400, episode=610 reward=0.7720127 (531.93 it/sec) -training >> step=3655500, episode=610 reward=0.8003898 (521.03 it/sec) -training >> step=3655600, episode=610 reward=0.7834443 (480.47 it/sec) -training >> step=3655700, episode=610 reward=0.7506024 (439.30 it/sec) -training >> step=3655800, episode=610 reward=0.7905494 (521.28 it/sec) -training >> step=3655900, episode=610 reward=0.7724464 (488.24 it/sec) -training >> step=3656000, episode=610 reward=0.7672874 (514.61 it/sec) -training >> step=3656100, episode=610 reward=0.7678671 (468.02 it/sec) -training >> step=3656200, episode=610 reward=0.7706388 (472.17 it/sec) -training >> step=3656300, episode=610 reward=0.7846733 (471.87 it/sec) -training >> step=3656400, episode=610 reward=0.7752531 (487.39 it/sec) -training >> step=3656500, episode=610 reward=0.7652802 (530.74 it/sec) -training >> step=3656600, episode=610 reward=0.7709548 (451.98 it/sec) -training >> step=3656700, episode=610 reward=0.7646631 (521.65 it/sec) -training >> step=3656800, episode=610 reward=0.7771272 (484.97 it/sec) -training >> step=3656900, episode=610 reward=0.7850185 (521.16 it/sec) -training >> step=3657000, episode=610 reward=0.7692348 (459.41 it/sec) -training >> step=3657100, episode=610 reward=0.7970929 (443.95 it/sec) -training >> step=3657200, episode=610 reward=0.7798709 (532.65 it/sec) -training >> step=3657300, episode=610 reward=0.7688173 (529.66 it/sec) -training >> step=3657400, episode=610 reward=0.7699225 (508.20 it/sec) -training >> step=3657500, episode=610 reward=0.7688392 (492.28 it/sec) -training >> step=3657600, episode=610 reward=0.7894397 (468.27 it/sec) -training >> step=3657700, episode=610 reward=0.7883012 (494.48 it/sec) -training >> step=3657800, episode=610 reward=0.7640517 (501.94 it/sec) -training >> step=3657900, episode=610 reward=0.7697108 (471.58 it/sec) -training >> step=3658000, episode=610 reward=0.7800301 (538.13 it/sec) -training >> step=3658100, episode=610 reward=0.7744943 (489.52 it/sec) -training >> step=3658200, episode=610 reward=0.7674195 (462.76 it/sec) -training >> step=3658300, episode=610 reward=0.7777542 (542.23 it/sec) -training >> step=3658400, episode=610 reward=0.7649179 (522.13 it/sec) -training >> step=3658500, episode=610 reward=0.7418073 (516.66 it/sec) -training >> step=3658600, episode=610 reward=0.7914566 (474.17 it/sec) -training >> step=3658700, episode=610 reward=0.7652491 (540.10 it/sec) -training >> step=3658800, episode=610 reward=0.7490514 (489.09 it/sec) -training >> step=3658900, episode=610 reward=0.7772686 (506.63 it/sec) -training >> step=3659000, episode=610 reward=0.7748775 (478.16 it/sec) -training >> step=3659100, episode=610 reward=0.7751805 (539.61 it/sec) -training >> step=3659200, episode=610 reward=0.7725507 (509.29 it/sec) -training >> step=3659300, episode=611 reward=0.7744269 (159.31 it/sec) -training >> step=3659400, episode=611 reward=0.7900819 (393.77 it/sec) -training >> step=3659500, episode=611 reward=0.7723796 (504.14 it/sec) -training >> step=3659600, episode=611 reward=0.7556344 (515.15 it/sec) -training >> step=3659700, episode=611 reward=0.7801609 (498.21 it/sec) -training >> step=3659800, episode=611 reward=0.7717181 (536.09 it/sec) -training >> step=3659900, episode=611 reward=0.7786419 (451.94 it/sec) -training >> step=3660000, episode=611 reward=0.7861685 (511.67 it/sec) -training >> step=3660100, episode=611 reward=0.7520545 (467.48 it/sec) -training >> step=3660200, episode=611 reward=0.7710234 (528.50 it/sec) -training >> step=3660300, episode=611 reward=0.7584609 (514.54 it/sec) -training >> step=3660400, episode=611 reward=0.770545 (478.04 it/sec) -training >> step=3660500, episode=611 reward=0.7709637 (453.05 it/sec) -training >> step=3660600, episode=611 reward=0.7668471 (416.52 it/sec) -training >> step=3660700, episode=611 reward=0.7795193 (502.97 it/sec) -training >> step=3660800, episode=611 reward=0.7723875 (487.43 it/sec) -training >> step=3660900, episode=611 reward=0.7836003 (444.93 it/sec) -training >> step=3661000, episode=611 reward=0.7767777 (442.66 it/sec) -training >> step=3661100, episode=611 reward=0.7861867 (476.23 it/sec) -training >> step=3661200, episode=611 reward=0.7640302 (481.00 it/sec) -training >> step=3661300, episode=611 reward=0.7918254 (504.32 it/sec) -training >> step=3661400, episode=611 reward=0.7671696 (497.43 it/sec) -training >> step=3661500, episode=611 reward=0.7792377 (493.03 it/sec) -training >> step=3661600, episode=611 reward=0.7934142 (476.00 it/sec) -training >> step=3661700, episode=611 reward=0.7674858 (452.19 it/sec) -training >> step=3661800, episode=611 reward=0.7832444 (541.14 it/sec) -training >> step=3661900, episode=611 reward=0.7654927 (483.92 it/sec) -training >> step=3662000, episode=611 reward=0.7666959 (528.14 it/sec) -training >> step=3662100, episode=611 reward=0.7807457 (460.51 it/sec) -training >> step=3662200, episode=611 reward=0.7691587 (493.51 it/sec) -training >> step=3662300, episode=611 reward=0.8019347 (479.81 it/sec) -training >> step=3662400, episode=611 reward=0.7781063 (484.21 it/sec) -training >> step=3662500, episode=611 reward=0.7654485 (482.72 it/sec) -training >> step=3662600, episode=611 reward=0.768529 (490.78 it/sec) -training >> step=3662700, episode=611 reward=0.7696227 (516.90 it/sec) -training >> step=3662800, episode=611 reward=0.7695231 (511.16 it/sec) -training >> step=3662900, episode=611 reward=0.8020851 (489.18 it/sec) -training >> step=3663000, episode=611 reward=0.7667218 (499.77 it/sec) -training >> step=3663100, episode=611 reward=0.7891528 (467.09 it/sec) -training >> step=3663200, episode=611 reward=0.7852136 (500.88 it/sec) -training >> step=3663300, episode=611 reward=0.7826891 (473.93 it/sec) -training >> step=3663400, episode=611 reward=0.7595392 (509.36 it/sec) -training >> step=3663500, episode=611 reward=0.7612616 (490.09 it/sec) -training >> step=3663600, episode=611 reward=0.7631585 (487.17 it/sec) -training >> step=3663700, episode=611 reward=0.7659073 (499.50 it/sec) -training >> step=3663800, episode=611 reward=0.7879469 (539.96 it/sec) -training >> step=3663900, episode=611 reward=0.754873 (481.37 it/sec) -training >> step=3664000, episode=611 reward=0.7786944 (461.81 it/sec) -training >> step=3664100, episode=611 reward=0.7695897 (509.38 it/sec) -training >> step=3664200, episode=611 reward=0.7649986 (469.80 it/sec) -training >> step=3664300, episode=611 reward=0.7710685 (502.67 it/sec) -training >> step=3664400, episode=611 reward=0.7799985 (455.94 it/sec) -training >> step=3664500, episode=611 reward=0.7510584 (452.98 it/sec) -training >> step=3664600, episode=611 reward=0.7794658 (484.95 it/sec) -training >> step=3664700, episode=611 reward=0.757255 (492.05 it/sec) -training >> step=3664800, episode=611 reward=0.7924259 (493.54 it/sec) -training >> step=3664900, episode=611 reward=0.7798478 (486.84 it/sec) -training >> step=3665000, episode=611 reward=0.7840838 (500.31 it/sec) -training >> step=3665100, episode=611 reward=0.7768685 (487.01 it/sec) -training >> step=3665200, episode=611 reward=0.7781278 (530.30 it/sec) -training >> step=3665300, episode=612 reward=0.7563812 (133.32 it/sec) -training >> step=3665400, episode=612 reward=0.7769579 (474.58 it/sec) -training >> step=3665500, episode=612 reward=0.7727652 (422.89 it/sec) -training >> step=3665600, episode=612 reward=0.7909559 (539.32 it/sec) -training >> step=3665700, episode=612 reward=0.7748644 (456.32 it/sec) -training >> step=3665800, episode=612 reward=0.755631 (485.16 it/sec) -training >> step=3665900, episode=612 reward=0.7725741 (454.25 it/sec) -training >> step=3666000, episode=612 reward=0.7753042 (499.41 it/sec) -training >> step=3666100, episode=612 reward=0.7724515 (513.05 it/sec) -training >> step=3666200, episode=612 reward=0.7730201 (489.62 it/sec) -training >> step=3666300, episode=612 reward=0.7822902 (487.76 it/sec) -training >> step=3666400, episode=612 reward=0.7876896 (482.13 it/sec) -training >> step=3666500, episode=612 reward=0.7896867 (472.44 it/sec) -training >> step=3666600, episode=612 reward=0.7736884 (486.03 it/sec) -training >> step=3666700, episode=612 reward=0.7877692 (500.11 it/sec) -training >> step=3666800, episode=612 reward=0.7698377 (497.54 it/sec) -training >> step=3666900, episode=612 reward=0.7563422 (486.70 it/sec) -training >> step=3667000, episode=612 reward=0.7759892 (462.64 it/sec) -training >> step=3667100, episode=612 reward=0.7493972 (480.91 it/sec) -training >> step=3667200, episode=612 reward=0.7934883 (476.39 it/sec) -training >> step=3667300, episode=612 reward=0.7657508 (446.82 it/sec) -training >> step=3667400, episode=612 reward=0.7831542 (509.67 it/sec) -training >> step=3667500, episode=612 reward=0.7469122 (524.12 it/sec) -training >> step=3667600, episode=612 reward=0.7806718 (480.00 it/sec) -training >> step=3667700, episode=612 reward=0.777786 (481.56 it/sec) -training >> step=3667800, episode=612 reward=0.7845978 (505.73 it/sec) -training >> step=3667900, episode=612 reward=0.7833906 (465.06 it/sec) -training >> step=3668000, episode=612 reward=0.7577559 (505.06 it/sec) -training >> step=3668100, episode=612 reward=0.7788833 (507.05 it/sec) -training >> step=3668200, episode=612 reward=0.7708888 (462.09 it/sec) -training >> step=3668300, episode=612 reward=0.772881 (481.54 it/sec) -training >> step=3668400, episode=612 reward=0.7668279 (515.67 it/sec) -training >> step=3668500, episode=612 reward=0.7942415 (517.96 it/sec) -training >> step=3668600, episode=612 reward=0.7740377 (481.82 it/sec) -training >> step=3668700, episode=612 reward=0.7755585 (468.80 it/sec) -training >> step=3668800, episode=612 reward=0.7751558 (513.11 it/sec) -training >> step=3668900, episode=612 reward=0.774619 (476.96 it/sec) -training >> step=3669000, episode=612 reward=0.7866184 (533.50 it/sec) -training >> step=3669100, episode=612 reward=0.7814673 (515.15 it/sec) -training >> step=3669200, episode=612 reward=0.7710394 (499.77 it/sec) -training >> step=3669300, episode=612 reward=0.786817 (484.80 it/sec) -training >> step=3669400, episode=612 reward=0.7705823 (509.89 it/sec) -training >> step=3669500, episode=612 reward=0.7983372 (526.29 it/sec) -training >> step=3669600, episode=612 reward=0.7814778 (481.41 it/sec) -training >> step=3669700, episode=612 reward=0.7728335 (480.18 it/sec) -training >> step=3669800, episode=612 reward=0.7690604 (476.32 it/sec) -training >> step=3669900, episode=612 reward=0.7666711 (454.12 it/sec) -training >> step=3670000, episode=612 reward=0.76079 (503.16 it/sec) -training >> step=3670100, episode=612 reward=0.7689434 (482.98 it/sec) -training >> step=3670200, episode=612 reward=0.7701887 (481.87 it/sec) -training >> step=3670300, episode=612 reward=0.7875611 (492.83 it/sec) -training >> step=3670400, episode=612 reward=0.7509315 (512.42 it/sec) -training >> step=3670500, episode=612 reward=0.7939324 (505.53 it/sec) -training >> step=3670600, episode=612 reward=0.7797509 (495.76 it/sec) -training >> step=3670700, episode=612 reward=0.7703503 (473.59 it/sec) -training >> step=3670800, episode=612 reward=0.7861129 (500.97 it/sec) -training >> step=3670900, episode=612 reward=0.761324 (507.31 it/sec) -training >> step=3671000, episode=612 reward=0.777107 (510.89 it/sec) -training >> step=3671100, episode=612 reward=0.7631277 (475.75 it/sec) -training >> step=3671200, episode=612 reward=0.7410216 (457.09 it/sec) -training >> step=3671300, episode=613 reward=0.7707665 (146.30 it/sec) -training >> step=3671400, episode=613 reward=0.7792853 (491.73 it/sec) -training >> step=3671500, episode=613 reward=0.7817395 (477.57 it/sec) -training >> step=3671600, episode=613 reward=0.7857918 (497.66 it/sec) -training >> step=3671700, episode=613 reward=0.7682683 (337.26 it/sec) -training >> step=3671800, episode=613 reward=0.7845789 (519.14 it/sec) -training >> step=3671900, episode=613 reward=0.7775268 (482.98 it/sec) -training >> step=3672000, episode=613 reward=0.763242 (510.84 it/sec) -training >> step=3672100, episode=613 reward=0.7885327 (511.94 it/sec) -training >> step=3672200, episode=613 reward=0.792265 (449.28 it/sec) -training >> step=3672300, episode=613 reward=0.7628545 (485.78 it/sec) -training >> step=3672400, episode=613 reward=0.7657539 (485.52 it/sec) -training >> step=3672500, episode=613 reward=0.774714 (473.47 it/sec) -training >> step=3672600, episode=613 reward=0.7761766 (505.50 it/sec) -training >> step=3672700, episode=613 reward=0.7637791 (476.82 it/sec) -training >> step=3672800, episode=613 reward=0.7954406 (520.78 it/sec) -training >> step=3672900, episode=613 reward=0.770824 (505.27 it/sec) -training >> step=3673000, episode=613 reward=0.7745797 (488.49 it/sec) -training >> step=3673100, episode=613 reward=0.781663 (481.17 it/sec) -training >> step=3673200, episode=613 reward=0.7739198 (475.00 it/sec) -training >> step=3673300, episode=613 reward=0.7691069 (503.20 it/sec) -training >> step=3673400, episode=613 reward=0.7739533 (489.64 it/sec) -training >> step=3673500, episode=613 reward=0.7553748 (486.52 it/sec) -training >> step=3673600, episode=613 reward=0.7784843 (453.64 it/sec) -training >> step=3673700, episode=613 reward=0.7489804 (496.53 it/sec) -training >> step=3673800, episode=613 reward=0.7581983 (486.86 it/sec) -training >> step=3673900, episode=613 reward=0.7762721 (497.27 it/sec) -training >> step=3674000, episode=613 reward=0.7770359 (469.15 it/sec) -training >> step=3674100, episode=613 reward=0.7772764 (502.54 it/sec) -training >> step=3674200, episode=613 reward=0.779245 (465.07 it/sec) -training >> step=3674300, episode=613 reward=0.7786441 (451.39 it/sec) -training >> step=3674400, episode=613 reward=0.7761822 (495.37 it/sec) -training >> step=3674500, episode=613 reward=0.7899092 (455.79 it/sec) -training >> step=3674600, episode=613 reward=0.7914485 (500.05 it/sec) -training >> step=3674700, episode=613 reward=0.7822165 (474.98 it/sec) -training >> step=3674800, episode=613 reward=0.7748728 (512.34 it/sec) -training >> step=3674900, episode=613 reward=0.7790865 (515.86 it/sec) -training >> step=3675000, episode=613 reward=0.7596711 (462.07 it/sec) -training >> step=3675100, episode=613 reward=0.7548816 (487.58 it/sec) -training >> step=3675200, episode=613 reward=0.784804 (527.27 it/sec) -training >> step=3675300, episode=613 reward=0.7637919 (508.12 it/sec) -training >> step=3675400, episode=613 reward=0.752386 (521.33 it/sec) -training >> step=3675500, episode=613 reward=0.777612 (432.62 it/sec) -training >> step=3675600, episode=613 reward=0.7863616 (529.40 it/sec) -training >> step=3675700, episode=613 reward=0.7634629 (505.41 it/sec) -training >> step=3675800, episode=613 reward=0.7911335 (508.85 it/sec) -training >> step=3675900, episode=613 reward=0.7744792 (528.36 it/sec) -training >> step=3676000, episode=613 reward=0.7925703 (493.23 it/sec) -training >> step=3676100, episode=613 reward=0.8071004 (421.10 it/sec) -training >> step=3676200, episode=613 reward=0.782917 (474.75 it/sec) -training >> step=3676300, episode=613 reward=0.7659944 (502.61 it/sec) -training >> step=3676400, episode=613 reward=0.7655039 (527.03 it/sec) -training >> step=3676500, episode=613 reward=0.7675223 (480.27 it/sec) -training >> step=3676600, episode=613 reward=0.7683277 (486.91 it/sec) -training >> step=3676700, episode=613 reward=0.7617572 (497.26 it/sec) -training >> step=3676800, episode=613 reward=0.7653628 (438.05 it/sec) -training >> step=3676900, episode=613 reward=0.7566742 (511.50 it/sec) -training >> step=3677000, episode=613 reward=0.7836878 (465.48 it/sec) -training >> step=3677100, episode=613 reward=0.7879053 (514.86 it/sec) -training >> step=3677200, episode=613 reward=0.7719667 (483.59 it/sec) -training >> step=3677300, episode=614 reward=0.7710904 (135.07 it/sec) -training >> step=3677400, episode=614 reward=0.7625439 (421.32 it/sec) -training >> step=3677500, episode=614 reward=0.7858958 (437.15 it/sec) -training >> step=3677600, episode=614 reward=0.7793162 (475.07 it/sec) -training >> step=3677700, episode=614 reward=0.7645872 (516.83 it/sec) -training >> step=3677800, episode=614 reward=0.74788 (436.56 it/sec) -training >> step=3677900, episode=614 reward=0.7872361 (370.22 it/sec) -training >> step=3678000, episode=614 reward=0.7867529 (495.64 it/sec) -training >> step=3678100, episode=614 reward=0.7661148 (517.17 it/sec) -training >> step=3678200, episode=614 reward=0.7902917 (466.19 it/sec) -training >> step=3678300, episode=614 reward=0.7672412 (461.45 it/sec) -training >> step=3678400, episode=614 reward=0.7638927 (456.98 it/sec) -training >> step=3678500, episode=614 reward=0.7842698 (453.91 it/sec) -training >> step=3678600, episode=614 reward=0.7662107 (485.64 it/sec) -training >> step=3678700, episode=614 reward=0.776831 (484.33 it/sec) -training >> step=3678800, episode=614 reward=0.7848136 (479.53 it/sec) -training >> step=3678900, episode=614 reward=0.7881233 (514.66 it/sec) -training >> step=3679000, episode=614 reward=0.7681512 (502.30 it/sec) -training >> step=3679100, episode=614 reward=0.7845681 (480.13 it/sec) -training >> step=3679200, episode=614 reward=0.7980852 (473.38 it/sec) -training >> step=3679300, episode=614 reward=0.7637767 (485.38 it/sec) -training >> step=3679400, episode=614 reward=0.7648038 (468.10 it/sec) -training >> step=3679500, episode=614 reward=0.7581149 (501.80 it/sec) -training >> step=3679600, episode=614 reward=0.7568604 (464.13 it/sec) -training >> step=3679700, episode=614 reward=0.7662145 (475.66 it/sec) -training >> step=3679800, episode=614 reward=0.7748406 (478.02 it/sec) -training >> step=3679900, episode=614 reward=0.7579627 (518.17 it/sec) -training >> step=3680000, episode=614 reward=0.791904 (515.02 it/sec) -training >> step=3680100, episode=614 reward=0.7808338 (446.44 it/sec) -training >> step=3680200, episode=614 reward=0.7671003 (427.90 it/sec) -training >> step=3680300, episode=614 reward=0.7674803 (507.97 it/sec) -training >> step=3680400, episode=614 reward=0.7658839 (501.07 it/sec) -training >> step=3680500, episode=614 reward=0.7801844 (478.32 it/sec) -training >> step=3680600, episode=614 reward=0.7881919 (490.65 it/sec) -training >> step=3680700, episode=614 reward=0.7653971 (472.72 it/sec) -training >> step=3680800, episode=614 reward=0.7781999 (492.55 it/sec) -training >> step=3680900, episode=614 reward=0.790232 (416.10 it/sec) -training >> step=3681000, episode=614 reward=0.7721642 (461.53 it/sec) -training >> step=3681100, episode=614 reward=0.7666088 (478.49 it/sec) -training >> step=3681200, episode=614 reward=0.7789757 (515.72 it/sec) -training >> step=3681300, episode=614 reward=0.755451 (493.58 it/sec) -training >> step=3681400, episode=614 reward=0.7742859 (555.50 it/sec) -training >> step=3681500, episode=614 reward=0.7765083 (485.84 it/sec) -training >> step=3681600, episode=614 reward=0.7544693 (511.87 it/sec) -training >> step=3681700, episode=614 reward=0.7727222 (496.57 it/sec) -training >> step=3681800, episode=614 reward=0.7936953 (456.53 it/sec) -training >> step=3681900, episode=614 reward=0.7492509 (509.14 it/sec) -training >> step=3682000, episode=614 reward=0.7733812 (408.83 it/sec) -training >> step=3682100, episode=614 reward=0.7810277 (467.74 it/sec) -training >> step=3682200, episode=614 reward=0.7607436 (408.21 it/sec) -training >> step=3682300, episode=614 reward=0.7736286 (447.35 it/sec) -training >> step=3682400, episode=614 reward=0.7727833 (494.73 it/sec) -training >> step=3682500, episode=614 reward=0.7643397 (492.69 it/sec) -training >> step=3682600, episode=614 reward=0.7650945 (447.09 it/sec) -training >> step=3682700, episode=614 reward=0.7587288 (518.14 it/sec) -training >> step=3682800, episode=614 reward=0.7672068 (510.95 it/sec) -training >> step=3682900, episode=614 reward=0.7788103 (548.06 it/sec) -training >> step=3683000, episode=614 reward=0.7754948 (520.13 it/sec) -training >> step=3683100, episode=614 reward=0.7813709 (506.80 it/sec) -training >> step=3683200, episode=614 reward=0.756414 (494.55 it/sec) -training >> step=3683300, episode=615 reward=0.7801353 (65.12 it/sec) -training >> step=3683400, episode=615 reward=0.772092 (454.49 it/sec) -training >> step=3683500, episode=615 reward=0.7743517 (458.96 it/sec) -training >> step=3683600, episode=615 reward=0.7818988 (425.03 it/sec) -training >> step=3683700, episode=615 reward=0.783236 (415.00 it/sec) -training >> step=3683800, episode=615 reward=0.7520438 (491.79 it/sec) -training >> step=3683900, episode=615 reward=0.7713705 (419.21 it/sec) -training >> step=3684000, episode=615 reward=0.7704686 (446.24 it/sec) -training >> step=3684100, episode=615 reward=0.7711065 (291.95 it/sec) -training >> step=3684200, episode=615 reward=0.7789577 (406.17 it/sec) -training >> step=3684300, episode=615 reward=0.7776113 (434.37 it/sec) -training >> step=3684400, episode=615 reward=0.8057569 (449.77 it/sec) -training >> step=3684500, episode=615 reward=0.7701422 (442.46 it/sec) -training >> step=3684600, episode=615 reward=0.7961419 (546.46 it/sec) -training >> step=3684700, episode=615 reward=0.7847518 (559.93 it/sec) -training >> step=3684800, episode=615 reward=0.7740295 (451.99 it/sec) -training >> step=3684900, episode=615 reward=0.7760506 (506.57 it/sec) -training >> step=3685000, episode=615 reward=0.775853 (491.04 it/sec) -training >> step=3685100, episode=615 reward=0.7641109 (479.20 it/sec) -training >> step=3685200, episode=615 reward=0.7651292 (534.02 it/sec) -training >> step=3685300, episode=615 reward=0.7786872 (498.12 it/sec) -training >> step=3685400, episode=615 reward=0.7660127 (467.28 it/sec) -training >> step=3685500, episode=615 reward=0.7670806 (507.29 it/sec) -training >> step=3685600, episode=615 reward=0.7744122 (485.87 it/sec) -training >> step=3685700, episode=615 reward=0.7748995 (561.00 it/sec) -training >> step=3685800, episode=615 reward=0.7782698 (529.43 it/sec) -training >> step=3685900, episode=615 reward=0.7763487 (477.50 it/sec) -training >> step=3686000, episode=615 reward=0.7735571 (499.46 it/sec) -training >> step=3686100, episode=615 reward=0.7613144 (520.88 it/sec) -training >> step=3686200, episode=615 reward=0.7861245 (501.20 it/sec) -training >> step=3686300, episode=615 reward=0.77787 (453.21 it/sec) -training >> step=3686400, episode=615 reward=0.7884294 (456.77 it/sec) -training >> step=3686500, episode=615 reward=0.7868395 (512.33 it/sec) -training >> step=3686600, episode=615 reward=0.7649119 (508.00 it/sec) -training >> step=3686700, episode=615 reward=0.7581568 (515.48 it/sec) -training >> step=3686800, episode=615 reward=0.7694495 (500.93 it/sec) -training >> step=3686900, episode=615 reward=0.7638353 (482.83 it/sec) -training >> step=3687000, episode=615 reward=0.7914857 (457.32 it/sec) -training >> step=3687100, episode=615 reward=0.7802994 (435.33 it/sec) -training >> step=3687200, episode=615 reward=0.768579 (469.39 it/sec) -training >> step=3687300, episode=615 reward=0.7759725 (463.71 it/sec) -training >> step=3687400, episode=615 reward=0.7812034 (434.12 it/sec) -training >> step=3687500, episode=615 reward=0.756589 (517.21 it/sec) -training >> step=3687600, episode=615 reward=0.7809147 (507.86 it/sec) -training >> step=3687700, episode=615 reward=0.7706511 (536.00 it/sec) -training >> step=3687800, episode=615 reward=0.7648998 (472.85 it/sec) -training >> step=3687900, episode=615 reward=0.761388 (462.20 it/sec) -training >> step=3688000, episode=615 reward=0.7771837 (497.67 it/sec) -training >> step=3688100, episode=615 reward=0.784586 (446.71 it/sec) -training >> step=3688200, episode=615 reward=0.7767568 (498.17 it/sec) -training >> step=3688300, episode=615 reward=0.7816956 (524.42 it/sec) -training >> step=3688400, episode=615 reward=0.7941299 (485.90 it/sec) -training >> step=3688500, episode=615 reward=0.7586076 (515.48 it/sec) -training >> step=3688600, episode=615 reward=0.7863198 (538.43 it/sec) -training >> step=3688700, episode=615 reward=0.796766 (529.17 it/sec) -training >> step=3688800, episode=615 reward=0.7742457 (470.19 it/sec) -training >> step=3688900, episode=615 reward=0.7687156 (498.39 it/sec) -training >> step=3689000, episode=615 reward=0.7761067 (531.83 it/sec) -training >> step=3689100, episode=615 reward=0.7577687 (486.45 it/sec) -training >> step=3689200, episode=615 reward=0.7816479 (530.70 it/sec) -training >> step=3689300, episode=616 reward=0.7717614 (141.85 it/sec) -training >> step=3689400, episode=616 reward=0.776354 (475.66 it/sec) -training >> step=3689500, episode=616 reward=0.7533994 (485.88 it/sec) -training >> step=3689600, episode=616 reward=0.7811618 (491.14 it/sec) -training >> step=3689700, episode=616 reward=0.769519 (514.98 it/sec) -training >> step=3689800, episode=616 reward=0.7691236 (521.42 it/sec) -training >> step=3689900, episode=616 reward=0.7629777 (529.38 it/sec) -training >> step=3690000, episode=616 reward=0.7541828 (510.52 it/sec) -training >> step=3690100, episode=616 reward=0.7808614 (498.96 it/sec) -training >> step=3690200, episode=616 reward=0.7875401 (384.37 it/sec) -training >> step=3690300, episode=616 reward=0.77266 (527.76 it/sec) -training >> step=3690400, episode=616 reward=0.778416 (527.27 it/sec) -training >> step=3690500, episode=616 reward=0.7899804 (507.02 it/sec) -training >> step=3690600, episode=616 reward=0.7781144 (500.25 it/sec) -training >> step=3690700, episode=616 reward=0.7819363 (530.89 it/sec) -training >> step=3690800, episode=616 reward=0.7675682 (531.15 it/sec) -training >> step=3690900, episode=616 reward=0.7699758 (508.58 it/sec) -training >> step=3691000, episode=616 reward=0.7664204 (562.93 it/sec) -training >> step=3691100, episode=616 reward=0.7501095 (563.11 it/sec) -training >> step=3691200, episode=616 reward=0.7849818 (490.21 it/sec) -training >> step=3691300, episode=616 reward=0.7635185 (530.27 it/sec) -training >> step=3691400, episode=616 reward=0.7753289 (521.61 it/sec) -training >> step=3691500, episode=616 reward=0.765733 (547.76 it/sec) -training >> step=3691600, episode=616 reward=0.7699251 (534.87 it/sec) -training >> step=3691700, episode=616 reward=0.7956131 (447.72 it/sec) -training >> step=3691800, episode=616 reward=0.7699311 (444.80 it/sec) -training >> step=3691900, episode=616 reward=0.7888073 (449.41 it/sec) -training >> step=3692000, episode=616 reward=0.7948518 (507.52 it/sec) -training >> step=3692100, episode=616 reward=0.770963 (510.06 it/sec) -training >> step=3692200, episode=616 reward=0.7806 (519.03 it/sec) -training >> step=3692300, episode=616 reward=0.7644822 (508.05 it/sec) -training >> step=3692400, episode=616 reward=0.7832362 (487.05 it/sec) -training >> step=3692500, episode=616 reward=0.7681001 (493.83 it/sec) -training >> step=3692600, episode=616 reward=0.7800059 (486.98 it/sec) -training >> step=3692700, episode=616 reward=0.7694228 (438.01 it/sec) -training >> step=3692800, episode=616 reward=0.7891515 (420.10 it/sec) -training >> step=3692900, episode=616 reward=0.7725319 (445.20 it/sec) -training >> step=3693000, episode=616 reward=0.7780284 (494.02 it/sec) -training >> step=3693100, episode=616 reward=0.7662404 (501.45 it/sec) -training >> step=3693200, episode=616 reward=0.7605038 (490.26 it/sec) -training >> step=3693300, episode=616 reward=0.7642124 (530.54 it/sec) -training >> step=3693400, episode=616 reward=0.7801848 (495.06 it/sec) -training >> step=3693500, episode=616 reward=0.7773402 (527.44 it/sec) -training >> step=3693600, episode=616 reward=0.7807223 (512.20 it/sec) -training >> step=3693700, episode=616 reward=0.768963 (452.93 it/sec) -training >> step=3693800, episode=616 reward=0.7667522 (485.54 it/sec) -training >> step=3693900, episode=616 reward=0.7608218 (498.78 it/sec) -training >> step=3694000, episode=616 reward=0.7843789 (560.76 it/sec) -training >> step=3694100, episode=616 reward=0.7514339 (513.94 it/sec) -training >> step=3694200, episode=616 reward=0.7606328 (492.99 it/sec) -training >> step=3694300, episode=616 reward=0.7858081 (528.76 it/sec) -training >> step=3694400, episode=616 reward=0.751748 (500.54 it/sec) -training >> step=3694500, episode=616 reward=0.773329 (501.48 it/sec) -training >> step=3694600, episode=616 reward=0.7841914 (502.96 it/sec) -training >> step=3694700, episode=616 reward=0.7811624 (496.23 it/sec) -training >> step=3694800, episode=616 reward=0.7770694 (491.35 it/sec) -training >> step=3694900, episode=616 reward=0.7553105 (486.55 it/sec) -training >> step=3695000, episode=616 reward=0.7903067 (533.29 it/sec) -training >> step=3695100, episode=616 reward=0.7818059 (557.31 it/sec) -training >> step=3695200, episode=616 reward=0.7622615 (489.68 it/sec) -training >> step=3695300, episode=617 reward=0.7807447 (109.42 it/sec) -training >> step=3695400, episode=617 reward=0.7815334 (441.15 it/sec) -training >> step=3695500, episode=617 reward=0.7623163 (432.39 it/sec) -training >> step=3695600, episode=617 reward=0.7762349 (499.16 it/sec) -training >> step=3695700, episode=617 reward=0.7800401 (508.71 it/sec) -training >> step=3695800, episode=617 reward=0.7810913 (477.16 it/sec) -training >> step=3695900, episode=617 reward=0.7664712 (505.25 it/sec) -training >> step=3696000, episode=617 reward=0.7550566 (507.64 it/sec) -training >> step=3696100, episode=617 reward=0.7825592 (508.34 it/sec) -training >> step=3696200, episode=617 reward=0.7801633 (500.98 it/sec) -training >> step=3696300, episode=617 reward=0.7555639 (507.65 it/sec) -training >> step=3696400, episode=617 reward=0.7708726 (351.76 it/sec) -training >> step=3696500, episode=617 reward=0.768263 (513.75 it/sec) -training >> step=3696600, episode=617 reward=0.7798804 (485.55 it/sec) -training >> step=3696700, episode=617 reward=0.7898161 (455.26 it/sec) -training >> step=3696800, episode=617 reward=0.7664597 (415.17 it/sec) -training >> step=3696900, episode=617 reward=0.7718875 (380.28 it/sec) -training >> step=3697000, episode=617 reward=0.7753106 (420.01 it/sec) -training >> step=3697100, episode=617 reward=0.759371 (490.85 it/sec) -training >> step=3697200, episode=617 reward=0.7802674 (473.98 it/sec) -training >> step=3697300, episode=617 reward=0.774712 (491.71 it/sec) -training >> step=3697400, episode=617 reward=0.7801057 (474.23 it/sec) -training >> step=3697500, episode=617 reward=0.7861115 (461.70 it/sec) -training >> step=3697600, episode=617 reward=0.788466 (504.31 it/sec) -training >> step=3697700, episode=617 reward=0.7661319 (434.76 it/sec) -training >> step=3697800, episode=617 reward=0.7697654 (451.00 it/sec) -training >> step=3697900, episode=617 reward=0.763123 (517.89 it/sec) -training >> step=3698000, episode=617 reward=0.7687261 (535.76 it/sec) -training >> step=3698100, episode=617 reward=0.7777553 (535.37 it/sec) -training >> step=3698200, episode=617 reward=0.7811487 (539.82 it/sec) -training >> step=3698300, episode=617 reward=0.7836927 (523.91 it/sec) -training >> step=3698400, episode=617 reward=0.7612818 (468.05 it/sec) -training >> step=3698500, episode=617 reward=0.7697253 (529.46 it/sec) -training >> step=3698600, episode=617 reward=0.7636955 (468.79 it/sec) -training >> step=3698700, episode=617 reward=0.799803 (458.06 it/sec) -training >> step=3698800, episode=617 reward=0.7916459 (512.38 it/sec) -training >> step=3698900, episode=617 reward=0.7510909 (500.46 it/sec) -training >> step=3699000, episode=617 reward=0.7558511 (543.74 it/sec) -training >> step=3699100, episode=617 reward=0.7729114 (509.33 it/sec) -training >> step=3699200, episode=617 reward=0.7583638 (452.71 it/sec) -training >> step=3699300, episode=617 reward=0.7642823 (477.81 it/sec) -training >> step=3699400, episode=617 reward=0.7883855 (488.26 it/sec) -training >> step=3699500, episode=617 reward=0.7636555 (486.67 it/sec) -training >> step=3699600, episode=617 reward=0.7602019 (512.14 it/sec) -training >> step=3699700, episode=617 reward=0.7878616 (534.39 it/sec) -training >> step=3699800, episode=617 reward=0.7684901 (551.83 it/sec) -training >> step=3699900, episode=617 reward=0.778549 (510.94 it/sec) -training >> step=3700000, episode=617 reward=0.7587389 (506.69 it/sec) -training >> step=3700100, episode=617 reward=0.7714435 (560.92 it/sec) -training >> step=3700200, episode=617 reward=0.7839144 (503.48 it/sec) -training >> step=3700300, episode=617 reward=0.7710115 (551.45 it/sec) -training >> step=3700400, episode=617 reward=0.7835088 (504.81 it/sec) -training >> step=3700500, episode=617 reward=0.7616126 (495.83 it/sec) -training >> step=3700600, episode=617 reward=0.770488 (432.56 it/sec) -training >> step=3700700, episode=617 reward=0.773124 (466.03 it/sec) -training >> step=3700800, episode=617 reward=0.7630516 (493.87 it/sec) -training >> step=3700900, episode=617 reward=0.7582406 (496.34 it/sec) -training >> step=3701000, episode=617 reward=0.7863137 (526.04 it/sec) -training >> step=3701100, episode=617 reward=0.7678938 (491.40 it/sec) -training >> step=3701200, episode=617 reward=0.7819952 (527.05 it/sec) -training >> step=3701300, episode=618 reward=0.7615936 (101.38 it/sec) -training >> step=3701400, episode=618 reward=0.7761607 (518.16 it/sec) -training >> step=3701500, episode=618 reward=0.750999 (492.36 it/sec) -training >> step=3701600, episode=618 reward=0.7643731 (513.58 it/sec) -training >> step=3701700, episode=618 reward=0.7967255 (515.77 it/sec) -training >> step=3701800, episode=618 reward=0.7606109 (533.23 it/sec) -training >> step=3701900, episode=618 reward=0.7909454 (509.51 it/sec) -training >> step=3702000, episode=618 reward=0.776246 (469.96 it/sec) -training >> step=3702100, episode=618 reward=0.7774416 (498.70 it/sec) -training >> step=3702200, episode=618 reward=0.7571474 (512.82 it/sec) -training >> step=3702300, episode=618 reward=0.7377187 (546.69 it/sec) -training >> step=3702400, episode=618 reward=0.7574307 (534.65 it/sec) -training >> step=3702500, episode=618 reward=0.7901341 (357.55 it/sec) -training >> step=3702600, episode=618 reward=0.7675881 (507.67 it/sec) -training >> step=3702700, episode=618 reward=0.784178 (518.40 it/sec) -training >> step=3702800, episode=618 reward=0.7789932 (546.75 it/sec) -training >> step=3702900, episode=618 reward=0.7546078 (542.23 it/sec) -training >> step=3703000, episode=618 reward=0.7874696 (535.12 it/sec) -training >> step=3703100, episode=618 reward=0.7928042 (527.12 it/sec) -training >> step=3703200, episode=618 reward=0.7709763 (491.43 it/sec) -training >> step=3703300, episode=618 reward=0.7577202 (513.08 it/sec) -training >> step=3703400, episode=618 reward=0.796963 (467.69 it/sec) -training >> step=3703500, episode=618 reward=0.7907829 (496.76 it/sec) -training >> step=3703600, episode=618 reward=0.767916 (506.18 it/sec) -training >> step=3703700, episode=618 reward=0.7694878 (423.15 it/sec) -training >> step=3703800, episode=618 reward=0.7498476 (515.18 it/sec) -training >> step=3703900, episode=618 reward=0.7709219 (558.18 it/sec) -training >> step=3704000, episode=618 reward=0.7589934 (533.60 it/sec) -training >> step=3704100, episode=618 reward=0.7869334 (531.89 it/sec) -training >> step=3704200, episode=618 reward=0.7726002 (531.89 it/sec) -training >> step=3704300, episode=618 reward=0.7794069 (487.65 it/sec) -training >> step=3704400, episode=618 reward=0.7675657 (498.48 it/sec) -training >> step=3704500, episode=618 reward=0.7521769 (478.95 it/sec) -training >> step=3704600, episode=618 reward=0.7761424 (485.39 it/sec) -training >> step=3704700, episode=618 reward=0.7838293 (461.70 it/sec) -training >> step=3704800, episode=618 reward=0.7776858 (471.32 it/sec) -training >> step=3704900, episode=618 reward=0.7885811 (574.67 it/sec) -training >> step=3705000, episode=618 reward=0.7597663 (534.61 it/sec) -training >> step=3705100, episode=618 reward=0.7789927 (521.71 it/sec) -training >> step=3705200, episode=618 reward=0.7623693 (516.05 it/sec) -training >> step=3705300, episode=618 reward=0.7737038 (551.15 it/sec) -training >> step=3705400, episode=618 reward=0.7699016 (524.56 it/sec) -training >> step=3705500, episode=618 reward=0.7784123 (532.07 it/sec) -training >> step=3705600, episode=618 reward=0.7707303 (538.47 it/sec) -training >> step=3705700, episode=618 reward=0.7694756 (536.08 it/sec) -training >> step=3705800, episode=618 reward=0.7798627 (522.40 it/sec) -training >> step=3705900, episode=618 reward=0.7627892 (507.05 it/sec) -training >> step=3706000, episode=618 reward=0.7766429 (535.22 it/sec) -training >> step=3706100, episode=618 reward=0.794661 (559.59 it/sec) -training >> step=3706200, episode=618 reward=0.7680648 (519.68 it/sec) -training >> step=3706300, episode=618 reward=0.7738096 (509.10 it/sec) -training >> step=3706400, episode=618 reward=0.7686939 (517.78 it/sec) -training >> step=3706500, episode=618 reward=0.7593741 (534.42 it/sec) -training >> step=3706600, episode=618 reward=0.7974306 (524.51 it/sec) -training >> step=3706700, episode=618 reward=0.7685947 (482.18 it/sec) -training >> step=3706800, episode=618 reward=0.7991621 (506.18 it/sec) -training >> step=3706900, episode=618 reward=0.7842467 (496.98 it/sec) -training >> step=3707000, episode=618 reward=0.7686709 (528.76 it/sec) -training >> step=3707100, episode=618 reward=0.7678817 (506.23 it/sec) -training >> step=3707200, episode=618 reward=0.7904438 (501.45 it/sec) -training >> step=3707300, episode=619 reward=0.7834467 (136.98 it/sec) -training >> step=3707400, episode=619 reward=0.7988178 (521.52 it/sec) -training >> step=3707500, episode=619 reward=0.73666 (529.44 it/sec) -training >> step=3707600, episode=619 reward=0.7459382 (469.73 it/sec) -training >> step=3707700, episode=619 reward=0.7715687 (506.43 it/sec) -training >> step=3707800, episode=619 reward=0.7701219 (547.49 it/sec) -training >> step=3707900, episode=619 reward=0.7677034 (517.68 it/sec) -training >> step=3708000, episode=619 reward=0.7473543 (439.97 it/sec) -training >> step=3708100, episode=619 reward=0.7604414 (480.98 it/sec) -training >> step=3708200, episode=619 reward=0.7812791 (482.58 it/sec) -training >> step=3708300, episode=619 reward=0.7871845 (463.94 it/sec) -training >> step=3708400, episode=619 reward=0.768055 (453.93 it/sec) -training >> step=3708500, episode=619 reward=0.7617264 (464.34 it/sec) -training >> step=3708600, episode=619 reward=0.7700611 (453.19 it/sec) -training >> step=3708700, episode=619 reward=0.7513323 (292.56 it/sec) -training >> step=3708800, episode=619 reward=0.7879437 (434.72 it/sec) -training >> step=3708900, episode=619 reward=0.7845224 (492.85 it/sec) -training >> step=3709000, episode=619 reward=0.7800967 (418.48 it/sec) -training >> step=3709100, episode=619 reward=0.7823747 (487.14 it/sec) -training >> step=3709200, episode=619 reward=0.7688481 (542.32 it/sec) -training >> step=3709300, episode=619 reward=0.7750433 (531.13 it/sec) -training >> step=3709400, episode=619 reward=0.7770057 (524.46 it/sec) -training >> step=3709500, episode=619 reward=0.7668525 (455.27 it/sec) -training >> step=3709600, episode=619 reward=0.7674028 (499.90 it/sec) -training >> step=3709700, episode=619 reward=0.7692418 (517.40 it/sec) -training >> step=3709800, episode=619 reward=0.7601677 (518.12 it/sec) -training >> step=3709900, episode=619 reward=0.7677935 (524.63 it/sec) -training >> step=3710000, episode=619 reward=0.7797803 (499.86 it/sec) -training >> step=3710100, episode=619 reward=0.7762512 (472.61 it/sec) -training >> step=3710200, episode=619 reward=0.7528385 (537.52 it/sec) -training >> step=3710300, episode=619 reward=0.7738675 (525.00 it/sec) -training >> step=3710400, episode=619 reward=0.7689152 (578.85 it/sec) -training >> step=3710500, episode=619 reward=0.7952459 (505.99 it/sec) -training >> step=3710600, episode=619 reward=0.7902308 (511.93 it/sec) -training >> step=3710700, episode=619 reward=0.7596066 (531.60 it/sec) -training >> step=3710800, episode=619 reward=0.7821812 (522.39 it/sec) -training >> step=3710900, episode=619 reward=0.7802848 (498.88 it/sec) -training >> step=3711000, episode=619 reward=0.7698201 (460.43 it/sec) -training >> step=3711100, episode=619 reward=0.7751121 (428.06 it/sec) -training >> step=3711200, episode=619 reward=0.7761201 (453.27 it/sec) -training >> step=3711300, episode=619 reward=0.7617379 (466.63 it/sec) -training >> step=3711400, episode=619 reward=0.7846696 (514.70 it/sec) -training >> step=3711500, episode=619 reward=0.7781835 (538.76 it/sec) -training >> step=3711600, episode=619 reward=0.7956493 (508.72 it/sec) -training >> step=3711700, episode=619 reward=0.8042151 (487.26 it/sec) -training >> step=3711800, episode=619 reward=0.7888767 (535.79 it/sec) -training >> step=3711900, episode=619 reward=0.769572 (506.23 it/sec) -training >> step=3712000, episode=619 reward=0.7935834 (537.27 it/sec) -training >> step=3712100, episode=619 reward=0.7758074 (447.13 it/sec) -training >> step=3712200, episode=619 reward=0.7651027 (553.74 it/sec) -training >> step=3712300, episode=619 reward=0.7742787 (497.61 it/sec) -training >> step=3712400, episode=619 reward=0.7648173 (480.35 it/sec) -training >> step=3712500, episode=619 reward=0.7911121 (553.15 it/sec) -training >> step=3712600, episode=619 reward=0.7636193 (514.60 it/sec) -training >> step=3712700, episode=619 reward=0.77086 (505.60 it/sec) -training >> step=3712800, episode=619 reward=0.7686184 (545.30 it/sec) -training >> step=3712900, episode=619 reward=0.7639601 (538.45 it/sec) -training >> step=3713000, episode=619 reward=0.7663669 (515.60 it/sec) -training >> step=3713100, episode=619 reward=0.7816785 (506.86 it/sec) -training >> step=3713200, episode=619 reward=0.7650715 (498.23 it/sec) -training >> step=3713300, episode=620 reward=0.7739083 (68.87 it/sec) -training >> step=3713400, episode=620 reward=0.7779296 (535.28 it/sec) -training >> step=3713500, episode=620 reward=0.7810459 (493.45 it/sec) -training >> step=3713600, episode=620 reward=0.7860886 (500.06 it/sec) -training >> step=3713700, episode=620 reward=0.7461596 (527.27 it/sec) -training >> step=3713800, episode=620 reward=0.7933735 (562.10 it/sec) -training >> step=3713900, episode=620 reward=0.7866896 (525.87 it/sec) -training >> step=3714000, episode=620 reward=0.7673997 (546.27 it/sec) -training >> step=3714100, episode=620 reward=0.7816107 (480.18 it/sec) -training >> step=3714200, episode=620 reward=0.7860848 (532.39 it/sec) -training >> step=3714300, episode=620 reward=0.7950581 (514.00 it/sec) -training >> step=3714400, episode=620 reward=0.7730575 (535.99 it/sec) -training >> step=3714500, episode=620 reward=0.7578387 (519.66 it/sec) -training >> step=3714600, episode=620 reward=0.7862154 (491.19 it/sec) -training >> step=3714700, episode=620 reward=0.7664529 (517.24 it/sec) -training >> step=3714800, episode=620 reward=0.7760281 (498.08 it/sec) -training >> step=3714900, episode=620 reward=0.7727486 (387.15 it/sec) -training >> step=3715000, episode=620 reward=0.7562217 (537.61 it/sec) -training >> step=3715100, episode=620 reward=0.7662501 (477.31 it/sec) -training >> step=3715200, episode=620 reward=0.7761308 (531.05 it/sec) -training >> step=3715300, episode=620 reward=0.7756134 (498.15 it/sec) -training >> step=3715400, episode=620 reward=0.7578792 (499.40 it/sec) -training >> step=3715500, episode=620 reward=0.7811189 (535.51 it/sec) -training >> step=3715600, episode=620 reward=0.7652585 (479.44 it/sec) -training >> step=3715700, episode=620 reward=0.7774131 (515.92 it/sec) -training >> step=3715800, episode=620 reward=0.75755 (535.77 it/sec) -training >> step=3715900, episode=620 reward=0.7895273 (501.00 it/sec) -training >> step=3716000, episode=620 reward=0.7773871 (507.92 it/sec) -training >> step=3716100, episode=620 reward=0.7880121 (491.91 it/sec) -training >> step=3716200, episode=620 reward=0.7676532 (500.58 it/sec) -training >> step=3716300, episode=620 reward=0.7632695 (509.06 it/sec) -training >> step=3716400, episode=620 reward=0.7532194 (504.82 it/sec) -training >> step=3716500, episode=620 reward=0.7722731 (468.17 it/sec) -training >> step=3716600, episode=620 reward=0.7861633 (537.95 it/sec) -training >> step=3716700, episode=620 reward=0.7874276 (494.32 it/sec) -training >> step=3716800, episode=620 reward=0.7799615 (531.73 it/sec) -training >> step=3716900, episode=620 reward=0.7871184 (528.43 it/sec) -training >> step=3717000, episode=620 reward=0.7794325 (554.12 it/sec) -training >> step=3717100, episode=620 reward=0.7637841 (512.17 it/sec) -training >> step=3717200, episode=620 reward=0.765946 (502.14 it/sec) -training >> step=3717300, episode=620 reward=0.7680066 (515.22 it/sec) -training >> step=3717400, episode=620 reward=0.7748283 (554.61 it/sec) -training >> step=3717500, episode=620 reward=0.7834532 (535.47 it/sec) -training >> step=3717600, episode=620 reward=0.7754739 (534.70 it/sec) -training >> step=3717700, episode=620 reward=0.7625059 (494.33 it/sec) -training >> step=3717800, episode=620 reward=0.7528936 (493.03 it/sec) -training >> step=3717900, episode=620 reward=0.7877536 (527.86 it/sec) -training >> step=3718000, episode=620 reward=0.7733824 (541.47 it/sec) -training >> step=3718100, episode=620 reward=0.7715446 (527.51 it/sec) -training >> step=3718200, episode=620 reward=0.7791915 (520.94 it/sec) -training >> step=3718300, episode=620 reward=0.7706621 (491.82 it/sec) -training >> step=3718400, episode=620 reward=0.7520845 (522.35 it/sec) -training >> step=3718500, episode=620 reward=0.7725213 (539.38 it/sec) -training >> step=3718600, episode=620 reward=0.7693955 (493.95 it/sec) -training >> step=3718700, episode=620 reward=0.7839534 (512.21 it/sec) -training >> step=3718800, episode=620 reward=0.7537956 (502.99 it/sec) -training >> step=3718900, episode=620 reward=0.7689893 (486.26 it/sec) -training >> step=3719000, episode=620 reward=0.7792149 (529.53 it/sec) -training >> step=3719100, episode=620 reward=0.762092 (528.40 it/sec) -training >> step=3719200, episode=620 reward=0.7870788 (488.48 it/sec) -training >> step=3719300, episode=621 reward=0.7781375 (67.28 it/sec) -training >> step=3719400, episode=621 reward=0.771691 (534.33 it/sec) -training >> step=3719500, episode=621 reward=0.7473178 (522.63 it/sec) -training >> step=3719600, episode=621 reward=0.7802517 (530.07 it/sec) -training >> step=3719700, episode=621 reward=0.788848 (529.86 it/sec) -training >> step=3719800, episode=621 reward=0.7603199 (519.41 it/sec) -training >> step=3719900, episode=621 reward=0.7970584 (544.17 it/sec) -training >> step=3720000, episode=621 reward=0.7638775 (511.59 it/sec) -training >> step=3720100, episode=621 reward=0.7488657 (542.60 it/sec) -training >> step=3720200, episode=621 reward=0.7707221 (464.42 it/sec) -training >> step=3720300, episode=621 reward=0.7589818 (521.82 it/sec) -training >> step=3720400, episode=621 reward=0.7877542 (575.47 it/sec) -training >> step=3720500, episode=621 reward=0.782468 (519.80 it/sec) -training >> step=3720600, episode=621 reward=0.7615566 (525.87 it/sec) -training >> step=3720700, episode=621 reward=0.7595645 (491.23 it/sec) -training >> step=3720800, episode=621 reward=0.800979 (442.32 it/sec) -training >> step=3720900, episode=621 reward=0.7732332 (469.89 it/sec) -training >> step=3721000, episode=621 reward=0.7698829 (520.94 it/sec) -training >> step=3721100, episode=621 reward=0.7715004 (580.46 it/sec) -training >> step=3721200, episode=621 reward=0.7839288 (399.98 it/sec) -training >> step=3721300, episode=621 reward=0.7658101 (505.02 it/sec) -training >> step=3721400, episode=621 reward=0.784512 (535.14 it/sec) -training >> step=3721500, episode=621 reward=0.7739202 (552.27 it/sec) -training >> step=3721600, episode=621 reward=0.754057 (530.41 it/sec) -training >> step=3721700, episode=621 reward=0.7840711 (514.53 it/sec) -training >> step=3721800, episode=621 reward=0.7591763 (514.04 it/sec) -training >> step=3721900, episode=621 reward=0.7689603 (520.11 it/sec) -training >> step=3722000, episode=621 reward=0.7675462 (461.10 it/sec) -training >> step=3722100, episode=621 reward=0.7871804 (495.47 it/sec) -training >> step=3722200, episode=621 reward=0.7563556 (533.31 it/sec) -training >> step=3722300, episode=621 reward=0.7860143 (505.77 it/sec) -training >> step=3722400, episode=621 reward=0.7654592 (516.84 it/sec) -training >> step=3722500, episode=621 reward=0.7795403 (538.99 it/sec) -training >> step=3722600, episode=621 reward=0.7775002 (461.70 it/sec) -training >> step=3722700, episode=621 reward=0.7771872 (527.50 it/sec) -training >> step=3722800, episode=621 reward=0.7792024 (520.99 it/sec) -training >> step=3722900, episode=621 reward=0.7867176 (541.25 it/sec) -training >> step=3723000, episode=621 reward=0.7668515 (524.58 it/sec) -training >> step=3723100, episode=621 reward=0.7763759 (509.90 it/sec) -training >> step=3723200, episode=621 reward=0.7709591 (570.74 it/sec) -training >> step=3723300, episode=621 reward=0.7624073 (484.19 it/sec) -training >> step=3723400, episode=621 reward=0.7783283 (515.44 it/sec) -training >> step=3723500, episode=621 reward=0.7726537 (516.03 it/sec) -training >> step=3723600, episode=621 reward=0.795007 (544.02 it/sec) -training >> step=3723700, episode=621 reward=0.7543046 (518.14 it/sec) -training >> step=3723800, episode=621 reward=0.7749225 (500.94 it/sec) -training >> step=3723900, episode=621 reward=0.7620593 (561.92 it/sec) -training >> step=3724000, episode=621 reward=0.7685224 (535.24 it/sec) -training >> step=3724100, episode=621 reward=0.7778547 (495.73 it/sec) -training >> step=3724200, episode=621 reward=0.7664077 (518.11 it/sec) -training >> step=3724300, episode=621 reward=0.7618932 (476.10 it/sec) -training >> step=3724400, episode=621 reward=0.7858196 (460.31 it/sec) -training >> step=3724500, episode=621 reward=0.7767982 (475.17 it/sec) -training >> step=3724600, episode=621 reward=0.7896863 (446.64 it/sec) -training >> step=3724700, episode=621 reward=0.7794345 (478.77 it/sec) -training >> step=3724800, episode=621 reward=0.7734541 (444.98 it/sec) -training >> step=3724900, episode=621 reward=0.7616244 (426.41 it/sec) -training >> step=3725000, episode=621 reward=0.7837471 (465.15 it/sec) -training >> step=3725100, episode=621 reward=0.7715347 (420.53 it/sec) -training >> step=3725200, episode=621 reward=0.7691682 (360.53 it/sec) -training >> step=3725300, episode=622 reward=0.775112 (56.34 it/sec) -training >> step=3725400, episode=622 reward=0.7656202 (446.43 it/sec) -training >> step=3725500, episode=622 reward=0.7698212 (440.36 it/sec) -training >> step=3725600, episode=622 reward=0.7772179 (465.39 it/sec) -training >> step=3725700, episode=622 reward=0.766596 (442.60 it/sec) -training >> step=3725800, episode=622 reward=0.7741925 (462.05 it/sec) -training >> step=3725900, episode=622 reward=0.7804657 (463.68 it/sec) -training >> step=3726000, episode=622 reward=0.7715335 (501.63 it/sec) -training >> step=3726100, episode=622 reward=0.7695405 (492.48 it/sec) -training >> step=3726200, episode=622 reward=0.7547123 (530.70 it/sec) -training >> step=3726300, episode=622 reward=0.7635228 (470.68 it/sec) -training >> step=3726400, episode=622 reward=0.7736278 (425.50 it/sec) -training >> step=3726500, episode=622 reward=0.7426749 (487.47 it/sec) -training >> step=3726600, episode=622 reward=0.7777078 (474.30 it/sec) -training >> step=3726700, episode=622 reward=0.7879905 (491.54 it/sec) -training >> step=3726800, episode=622 reward=0.7868375 (466.62 it/sec) -training >> step=3726900, episode=622 reward=0.7851096 (516.01 it/sec) -training >> step=3727000, episode=622 reward=0.7828655 (516.34 it/sec) -training >> step=3727100, episode=622 reward=0.7863681 (473.91 it/sec) -training >> step=3727200, episode=622 reward=0.7742342 (461.00 it/sec) -training >> step=3727300, episode=622 reward=0.7661665 (341.30 it/sec) -training >> step=3727400, episode=622 reward=0.7563575 (464.70 it/sec) -training >> step=3727500, episode=622 reward=0.7651964 (451.53 it/sec) -training >> step=3727600, episode=622 reward=0.7801914 (446.16 it/sec) -training >> step=3727700, episode=622 reward=0.7598675 (490.80 it/sec) -training >> step=3727800, episode=622 reward=0.7659484 (527.49 it/sec) -training >> step=3727900, episode=622 reward=0.7786262 (450.62 it/sec) -training >> step=3728000, episode=622 reward=0.7877817 (494.70 it/sec) -training >> step=3728100, episode=622 reward=0.7654253 (525.00 it/sec) -training >> step=3728200, episode=622 reward=0.7596186 (493.60 it/sec) -training >> step=3728300, episode=622 reward=0.8063975 (519.73 it/sec) -training >> step=3728400, episode=622 reward=0.7589408 (544.06 it/sec) -training >> step=3728500, episode=622 reward=0.7805771 (498.92 it/sec) -training >> step=3728600, episode=622 reward=0.7687425 (507.86 it/sec) -training >> step=3728700, episode=622 reward=0.7806055 (492.59 it/sec) -training >> step=3728800, episode=622 reward=0.784641 (516.57 it/sec) -training >> step=3728900, episode=622 reward=0.7671714 (482.32 it/sec) -training >> step=3729000, episode=622 reward=0.7614683 (432.30 it/sec) -training >> step=3729100, episode=622 reward=0.7741294 (475.60 it/sec) -training >> step=3729200, episode=622 reward=0.8068849 (490.51 it/sec) -training >> step=3729300, episode=622 reward=0.7691784 (517.47 it/sec) -training >> step=3729400, episode=622 reward=0.7678423 (507.59 it/sec) -training >> step=3729500, episode=622 reward=0.7611167 (469.67 it/sec) -training >> step=3729600, episode=622 reward=0.7725307 (478.90 it/sec) -training >> step=3729700, episode=622 reward=0.8036005 (416.61 it/sec) -training >> step=3729800, episode=622 reward=0.7831024 (450.13 it/sec) -training >> step=3729900, episode=622 reward=0.791366 (462.47 it/sec) -training >> step=3730000, episode=622 reward=0.7403492 (452.13 it/sec) -training >> step=3730100, episode=622 reward=0.7771302 (439.20 it/sec) -training >> step=3730200, episode=622 reward=0.7690023 (501.50 it/sec) -training >> step=3730300, episode=622 reward=0.7921655 (467.01 it/sec) -training >> step=3730400, episode=622 reward=0.7802423 (460.74 it/sec) -training >> step=3730500, episode=622 reward=0.7623615 (431.21 it/sec) -training >> step=3730600, episode=622 reward=0.7764878 (435.10 it/sec) -training >> step=3730700, episode=622 reward=0.7703291 (466.55 it/sec) -training >> step=3730800, episode=622 reward=0.7675225 (447.87 it/sec) -training >> step=3730900, episode=622 reward=0.7578584 (470.82 it/sec) -training >> step=3731000, episode=622 reward=0.7663946 (500.80 it/sec) -training >> step=3731100, episode=622 reward=0.7910146 (468.18 it/sec) -training >> step=3731200, episode=622 reward=0.7743549 (433.87 it/sec) -training >> step=3731300, episode=623 reward=0.7668657 (85.75 it/sec) -training >> step=3731400, episode=623 reward=0.7769415 (468.84 it/sec) -training >> step=3731500, episode=623 reward=0.7622081 (522.82 it/sec) -training >> step=3731600, episode=623 reward=0.7685666 (502.86 it/sec) -training >> step=3731700, episode=623 reward=0.7971874 (558.55 it/sec) -training >> step=3731800, episode=623 reward=0.7629982 (541.35 it/sec) -training >> step=3731900, episode=623 reward=0.7614982 (497.10 it/sec) -training >> step=3732000, episode=623 reward=0.7815273 (512.98 it/sec) -training >> step=3732100, episode=623 reward=0.7657598 (510.58 it/sec) -training >> step=3732200, episode=623 reward=0.7946219 (504.04 it/sec) -training >> step=3732300, episode=623 reward=0.7807509 (503.68 it/sec) -training >> step=3732400, episode=623 reward=0.7569475 (491.56 it/sec) -training >> step=3732500, episode=623 reward=0.7498065 (511.74 it/sec) -training >> step=3732600, episode=623 reward=0.7968913 (525.11 it/sec) -training >> step=3732700, episode=623 reward=0.7811254 (541.35 it/sec) -training >> step=3732800, episode=623 reward=0.7664629 (502.99 it/sec) -training >> step=3732900, episode=623 reward=0.7714553 (520.98 it/sec) -training >> step=3733000, episode=623 reward=0.7764717 (524.04 it/sec) -training >> step=3733100, episode=623 reward=0.7666506 (544.86 it/sec) -training >> step=3733200, episode=623 reward=0.7755011 (517.07 it/sec) -training >> step=3733300, episode=623 reward=0.7641827 (525.95 it/sec) -training >> step=3733400, episode=623 reward=0.7646399 (499.46 it/sec) -training >> step=3733500, episode=623 reward=0.7628831 (384.17 it/sec) -training >> step=3733600, episode=623 reward=0.7812503 (441.59 it/sec) -training >> step=3733700, episode=623 reward=0.777455 (506.33 it/sec) -training >> step=3733800, episode=623 reward=0.7794697 (479.87 it/sec) -training >> step=3733900, episode=623 reward=0.7646082 (529.32 it/sec) -training >> step=3734000, episode=623 reward=0.7791069 (549.68 it/sec) -training >> step=3734100, episode=623 reward=0.7757922 (492.30 it/sec) -training >> step=3734200, episode=623 reward=0.7755975 (457.87 it/sec) -training >> step=3734300, episode=623 reward=0.7676248 (471.27 it/sec) -training >> step=3734400, episode=623 reward=0.7912907 (490.71 it/sec) -training >> step=3734500, episode=623 reward=0.7729189 (495.03 it/sec) -training >> step=3734600, episode=623 reward=0.7760044 (538.39 it/sec) -training >> step=3734700, episode=623 reward=0.7704808 (533.12 it/sec) -training >> step=3734800, episode=623 reward=0.7809399 (561.65 it/sec) -training >> step=3734900, episode=623 reward=0.7537702 (537.40 it/sec) -training >> step=3735000, episode=623 reward=0.7833979 (523.94 it/sec) -training >> step=3735100, episode=623 reward=0.7826933 (549.97 it/sec) -training >> step=3735200, episode=623 reward=0.770318 (497.68 it/sec) -training >> step=3735300, episode=623 reward=0.7847085 (511.13 it/sec) -training >> step=3735400, episode=623 reward=0.7734199 (545.50 it/sec) -training >> step=3735500, episode=623 reward=0.7904456 (538.00 it/sec) -training >> step=3735600, episode=623 reward=0.7886546 (509.41 it/sec) -training >> step=3735700, episode=623 reward=0.7789868 (491.04 it/sec) -training >> step=3735800, episode=623 reward=0.7819444 (555.75 it/sec) -training >> step=3735900, episode=623 reward=0.7786915 (525.38 it/sec) -training >> step=3736000, episode=623 reward=0.7745234 (536.85 it/sec) -training >> step=3736100, episode=623 reward=0.7821498 (520.50 it/sec) -training >> step=3736200, episode=623 reward=0.7555286 (536.04 it/sec) -training >> step=3736300, episode=623 reward=0.7778444 (529.18 it/sec) -training >> step=3736400, episode=623 reward=0.7652006 (551.94 it/sec) -training >> step=3736500, episode=623 reward=0.7817842 (513.73 it/sec) -training >> step=3736600, episode=623 reward=0.7853293 (520.01 it/sec) -training >> step=3736700, episode=623 reward=0.7692121 (504.58 it/sec) -training >> step=3736800, episode=623 reward=0.7626185 (546.69 it/sec) -training >> step=3736900, episode=623 reward=0.7798715 (532.93 it/sec) -training >> step=3737000, episode=623 reward=0.7628826 (557.66 it/sec) -training >> step=3737100, episode=623 reward=0.7463329 (509.10 it/sec) -training >> step=3737200, episode=623 reward=0.7819567 (506.14 it/sec) -training >> step=3737300, episode=624 reward=0.7890661 (129.48 it/sec) -training >> step=3737400, episode=624 reward=0.7842655 (546.08 it/sec) -training >> step=3737500, episode=624 reward=0.7783439 (526.57 it/sec) -training >> step=3737600, episode=624 reward=0.7794139 (532.47 it/sec) -training >> step=3737700, episode=624 reward=0.7956668 (569.59 it/sec) -training >> step=3737800, episode=624 reward=0.7689122 (499.47 it/sec) -training >> step=3737900, episode=624 reward=0.7776621 (515.60 it/sec) -training >> step=3738000, episode=624 reward=0.7931984 (568.90 it/sec) -training >> step=3738100, episode=624 reward=0.7770368 (519.76 it/sec) -training >> step=3738200, episode=624 reward=0.7610924 (465.49 it/sec) -training >> step=3738300, episode=624 reward=0.7679318 (474.14 it/sec) -training >> step=3738400, episode=624 reward=0.7781256 (486.89 it/sec) -training >> step=3738500, episode=624 reward=0.7591408 (424.56 it/sec) -training >> step=3738600, episode=624 reward=0.7529266 (512.23 it/sec) -training >> step=3738700, episode=624 reward=0.7761395 (497.87 it/sec) -training >> step=3738800, episode=624 reward=0.7732176 (568.84 it/sec) -training >> step=3738900, episode=624 reward=0.7787581 (536.25 it/sec) -training >> step=3739000, episode=624 reward=0.7810476 (496.40 it/sec) -training >> step=3739100, episode=624 reward=0.767471 (507.12 it/sec) -training >> step=3739200, episode=624 reward=0.7278412 (478.78 it/sec) -training >> step=3739300, episode=624 reward=0.7592943 (525.97 it/sec) -training >> step=3739400, episode=624 reward=0.745024 (546.89 it/sec) -training >> step=3739500, episode=624 reward=0.7600569 (505.92 it/sec) -training >> step=3739600, episode=624 reward=0.7846586 (371.85 it/sec) -training >> step=3739700, episode=624 reward=0.7703862 (520.70 it/sec) -training >> step=3739800, episode=624 reward=0.755355 (547.02 it/sec) -training >> step=3739900, episode=624 reward=0.7656596 (548.64 it/sec) -training >> step=3740000, episode=624 reward=0.7915302 (537.43 it/sec) -training >> step=3740100, episode=624 reward=0.7932419 (517.44 it/sec) -training >> step=3740200, episode=624 reward=0.7709538 (582.96 it/sec) -training >> step=3740300, episode=624 reward=0.7461085 (498.00 it/sec) -training >> step=3740400, episode=624 reward=0.7788994 (531.75 it/sec) -training >> step=3740500, episode=624 reward=0.7714875 (489.02 it/sec) -training >> step=3740600, episode=624 reward=0.7889302 (578.69 it/sec) -training >> step=3740700, episode=624 reward=0.7672007 (533.86 it/sec) -training >> step=3740800, episode=624 reward=0.768234 (550.84 it/sec) -training >> step=3740900, episode=624 reward=0.7807852 (502.95 it/sec) -training >> step=3741000, episode=624 reward=0.7722797 (512.05 it/sec) -training >> step=3741100, episode=624 reward=0.7433045 (501.33 it/sec) -training >> step=3741200, episode=624 reward=0.7846544 (522.40 it/sec) -training >> step=3741300, episode=624 reward=0.7723092 (485.76 it/sec) -training >> step=3741400, episode=624 reward=0.7363408 (509.21 it/sec) -training >> step=3741500, episode=624 reward=0.7902267 (516.71 it/sec) -training >> step=3741600, episode=624 reward=0.7793505 (516.27 it/sec) -training >> step=3741700, episode=624 reward=0.7588227 (568.21 it/sec) -training >> step=3741800, episode=624 reward=0.7828683 (500.37 it/sec) -training >> step=3741900, episode=624 reward=0.788551 (544.88 it/sec) -training >> step=3742000, episode=624 reward=0.7878405 (515.93 it/sec) -training >> step=3742100, episode=624 reward=0.7750171 (549.51 it/sec) -training >> step=3742200, episode=624 reward=0.7794048 (541.70 it/sec) -training >> step=3742300, episode=624 reward=0.7823633 (516.25 it/sec) -training >> step=3742400, episode=624 reward=0.7771761 (565.01 it/sec) -training >> step=3742500, episode=624 reward=0.7733719 (487.45 it/sec) -training >> step=3742600, episode=624 reward=0.7796579 (531.88 it/sec) -training >> step=3742700, episode=624 reward=0.7804952 (455.05 it/sec) -training >> step=3742800, episode=624 reward=0.7760658 (553.19 it/sec) -training >> step=3742900, episode=624 reward=0.7666439 (553.68 it/sec) -training >> step=3743000, episode=624 reward=0.7590948 (489.79 it/sec) -training >> step=3743100, episode=624 reward=0.7637089 (537.53 it/sec) -training >> step=3743200, episode=624 reward=0.7681617 (535.48 it/sec) -training >> step=3743300, episode=625 reward=0.7887617 (138.88 it/sec) -training >> step=3743400, episode=625 reward=0.7903573 (508.68 it/sec) -training >> step=3743500, episode=625 reward=0.7675143 (544.21 it/sec) -training >> step=3743600, episode=625 reward=0.7739481 (499.34 it/sec) -training >> step=3743700, episode=625 reward=0.7716109 (528.65 it/sec) -training >> step=3743800, episode=625 reward=0.7766146 (535.38 it/sec) -training >> step=3743900, episode=625 reward=0.7618176 (537.69 it/sec) -training >> step=3744000, episode=625 reward=0.7551991 (509.87 it/sec) -training >> step=3744100, episode=625 reward=0.7704104 (480.69 it/sec) -training >> step=3744200, episode=625 reward=0.7801036 (572.31 it/sec) -training >> step=3744300, episode=625 reward=0.7701343 (502.07 it/sec) -training >> step=3744400, episode=625 reward=0.7540147 (491.37 it/sec) -training >> step=3744500, episode=625 reward=0.7632851 (555.82 it/sec) -training >> step=3744600, episode=625 reward=0.7843983 (527.12 it/sec) -training >> step=3744700, episode=625 reward=0.7674261 (526.47 it/sec) -training >> step=3744800, episode=625 reward=0.7661377 (515.04 it/sec) -training >> step=3744900, episode=625 reward=0.7671096 (529.33 it/sec) -training >> step=3745000, episode=625 reward=0.7865362 (536.65 it/sec) -training >> step=3745100, episode=625 reward=0.7820493 (535.01 it/sec) -training >> step=3745200, episode=625 reward=0.7891936 (527.12 it/sec) -training >> step=3745300, episode=625 reward=0.7693685 (521.39 it/sec) -training >> step=3745400, episode=625 reward=0.7841085 (517.83 it/sec) -training >> step=3745500, episode=625 reward=0.7483671 (522.53 it/sec) -training >> step=3745600, episode=625 reward=0.7685477 (519.86 it/sec) -training >> step=3745700, episode=625 reward=0.777312 (551.26 it/sec) -training >> step=3745800, episode=625 reward=0.7756265 (520.26 it/sec) -training >> step=3745900, episode=625 reward=0.784207 (552.02 it/sec) -training >> step=3746000, episode=625 reward=0.7885814 (343.48 it/sec) -training >> step=3746100, episode=625 reward=0.7806123 (545.95 it/sec) -training >> step=3746200, episode=625 reward=0.7908642 (537.55 it/sec) -training >> step=3746300, episode=625 reward=0.7901786 (518.41 it/sec) -training >> step=3746400, episode=625 reward=0.7919596 (505.32 it/sec) -training >> step=3746500, episode=625 reward=0.7594857 (534.55 it/sec) -training >> step=3746600, episode=625 reward=0.7773312 (501.20 it/sec) -training >> step=3746700, episode=625 reward=0.773936 (550.18 it/sec) -training >> step=3746800, episode=625 reward=0.7704077 (489.35 it/sec) -training >> step=3746900, episode=625 reward=0.7714118 (528.06 it/sec) -training >> step=3747000, episode=625 reward=0.7629076 (534.69 it/sec) -training >> step=3747100, episode=625 reward=0.7613021 (573.14 it/sec) -training >> step=3747200, episode=625 reward=0.7697442 (500.26 it/sec) -training >> step=3747300, episode=625 reward=0.7659778 (516.28 it/sec) -training >> step=3747400, episode=625 reward=0.7533426 (565.49 it/sec) -training >> step=3747500, episode=625 reward=0.7784554 (512.58 it/sec) -training >> step=3747600, episode=625 reward=0.7902154 (536.99 it/sec) -training >> step=3747700, episode=625 reward=0.7829134 (497.64 it/sec) -training >> step=3747800, episode=625 reward=0.7804446 (592.15 it/sec) -training >> step=3747900, episode=625 reward=0.7897664 (463.04 it/sec) -training >> step=3748000, episode=625 reward=0.7885982 (551.77 it/sec) -training >> step=3748100, episode=625 reward=0.7835568 (506.67 it/sec) -training >> step=3748200, episode=625 reward=0.761351 (526.03 it/sec) -training >> step=3748300, episode=625 reward=0.7771829 (552.43 it/sec) -training >> step=3748400, episode=625 reward=0.7806073 (532.21 it/sec) -training >> step=3748500, episode=625 reward=0.7620944 (537.96 it/sec) -training >> step=3748600, episode=625 reward=0.7878447 (519.27 it/sec) -training >> step=3748700, episode=625 reward=0.8127732 (498.65 it/sec) -training >> step=3748800, episode=625 reward=0.7997245 (526.41 it/sec) -training >> step=3748900, episode=625 reward=0.7699153 (534.59 it/sec) -training >> step=3749000, episode=625 reward=0.781689 (488.93 it/sec) -training >> step=3749100, episode=625 reward=0.7932625 (461.26 it/sec) -training >> step=3749200, episode=625 reward=0.7545725 (567.39 it/sec) -training >> step=3749300, episode=626 reward=0.7546282 (142.08 it/sec) -training >> step=3749400, episode=626 reward=0.7654899 (518.32 it/sec) -training >> step=3749500, episode=626 reward=0.7592523 (509.89 it/sec) -training >> step=3749600, episode=626 reward=0.7965472 (566.14 it/sec) -training >> step=3749700, episode=626 reward=0.7773684 (522.76 it/sec) -training >> step=3749800, episode=626 reward=0.7913912 (541.96 it/sec) -training >> step=3749900, episode=626 reward=0.7596309 (505.58 it/sec) -training >> step=3750000, episode=626 reward=0.7647609 (535.73 it/sec) -training >> step=3750100, episode=626 reward=0.7663866 (469.45 it/sec) -training >> step=3750200, episode=626 reward=0.7686747 (542.95 it/sec) -training >> step=3750300, episode=626 reward=0.775375 (539.23 it/sec) -training >> step=3750400, episode=626 reward=0.7719536 (544.64 it/sec) -training >> step=3750500, episode=626 reward=0.7634143 (485.42 it/sec) -training >> step=3750600, episode=626 reward=0.7654008 (547.70 it/sec) -training >> step=3750700, episode=626 reward=0.7862554 (560.36 it/sec) -training >> step=3750800, episode=626 reward=0.767899 (509.68 it/sec) -training >> step=3750900, episode=626 reward=0.7532952 (516.24 it/sec) -training >> step=3751000, episode=626 reward=0.7799472 (508.68 it/sec) -training >> step=3751100, episode=626 reward=0.7904506 (565.19 it/sec) -training >> step=3751200, episode=626 reward=0.7659614 (507.29 it/sec) -training >> step=3751300, episode=626 reward=0.7977049 (521.41 it/sec) -training >> step=3751400, episode=626 reward=0.774501 (518.09 it/sec) -training >> step=3751500, episode=626 reward=0.780456 (519.56 it/sec) -training >> step=3751600, episode=626 reward=0.7681718 (547.94 it/sec) -training >> step=3751700, episode=626 reward=0.8031064 (567.83 it/sec) -training >> step=3751800, episode=626 reward=0.7786921 (514.86 it/sec) -training >> step=3751900, episode=626 reward=0.8093467 (517.65 it/sec) -training >> step=3752000, episode=626 reward=0.7661175 (509.86 it/sec) -training >> step=3752100, episode=626 reward=0.7786563 (524.00 it/sec) -training >> step=3752200, episode=626 reward=0.7877554 (443.01 it/sec) -training >> step=3752300, episode=626 reward=0.7916875 (533.26 it/sec) -training >> step=3752400, episode=626 reward=0.7587693 (489.12 it/sec) -training >> step=3752500, episode=626 reward=0.773272 (559.52 it/sec) -training >> step=3752600, episode=626 reward=0.7826697 (507.24 it/sec) -training >> step=3752700, episode=626 reward=0.7606956 (546.46 it/sec) -training >> step=3752800, episode=626 reward=0.7520307 (489.87 it/sec) -training >> step=3752900, episode=626 reward=0.7891446 (521.39 it/sec) -training >> step=3753000, episode=626 reward=0.7505231 (525.60 it/sec) -training >> step=3753100, episode=626 reward=0.7434438 (486.19 it/sec) -training >> step=3753200, episode=626 reward=0.7850538 (455.40 it/sec) -training >> step=3753300, episode=626 reward=0.775449 (498.86 it/sec) -training >> step=3753400, episode=626 reward=0.7928103 (468.32 it/sec) -training >> step=3753500, episode=626 reward=0.77412 (414.66 it/sec) -training >> step=3753600, episode=626 reward=0.7775527 (533.48 it/sec) -training >> step=3753700, episode=626 reward=0.7648563 (546.43 it/sec) -training >> step=3753800, episode=626 reward=0.7844406 (535.21 it/sec) -training >> step=3753900, episode=626 reward=0.8016398 (514.94 it/sec) -training >> step=3754000, episode=626 reward=0.7584875 (507.12 it/sec) -training >> step=3754100, episode=626 reward=0.7734304 (513.33 it/sec) -training >> step=3754200, episode=626 reward=0.7810193 (494.71 it/sec) -training >> step=3754300, episode=626 reward=0.7611091 (497.63 it/sec) -training >> step=3754400, episode=626 reward=0.7933307 (551.22 it/sec) -training >> step=3754500, episode=626 reward=0.7665816 (514.15 it/sec) -training >> step=3754600, episode=626 reward=0.7701278 (537.35 it/sec) -training >> step=3754700, episode=626 reward=0.7889333 (516.91 it/sec) -training >> step=3754800, episode=626 reward=0.758563 (519.94 it/sec) -training >> step=3754900, episode=626 reward=0.76657 (490.07 it/sec) -training >> step=3755000, episode=626 reward=0.7553195 (532.94 it/sec) -training >> step=3755100, episode=626 reward=0.7543018 (500.10 it/sec) -training >> step=3755200, episode=626 reward=0.7755232 (439.54 it/sec) -training >> step=3755300, episode=627 reward=0.7915533 (87.74 it/sec) -training >> step=3755400, episode=627 reward=0.7480456 (480.51 it/sec) -training >> step=3755500, episode=627 reward=0.7737399 (475.57 it/sec) -training >> step=3755600, episode=627 reward=0.7786158 (467.73 it/sec) -training >> step=3755700, episode=627 reward=0.7616332 (459.73 it/sec) -training >> step=3755800, episode=627 reward=0.7915759 (462.80 it/sec) -training >> step=3755900, episode=627 reward=0.7807983 (449.29 it/sec) -training >> step=3756000, episode=627 reward=0.776078 (514.73 it/sec) -training >> step=3756100, episode=627 reward=0.7641795 (554.76 it/sec) -training >> step=3756200, episode=627 reward=0.7657239 (478.33 it/sec) -training >> step=3756300, episode=627 reward=0.8035181 (535.37 it/sec) -training >> step=3756400, episode=627 reward=0.7682787 (530.87 it/sec) -training >> step=3756500, episode=627 reward=0.7677674 (461.50 it/sec) -training >> step=3756600, episode=627 reward=0.7738184 (438.87 it/sec) -training >> step=3756700, episode=627 reward=0.7654768 (510.49 it/sec) -training >> step=3756800, episode=627 reward=0.7605055 (484.87 it/sec) -training >> step=3756900, episode=627 reward=0.7604555 (531.06 it/sec) -training >> step=3757000, episode=627 reward=0.7828028 (494.64 it/sec) -training >> step=3757100, episode=627 reward=0.7777815 (529.00 it/sec) -training >> step=3757200, episode=627 reward=0.7612053 (476.94 it/sec) -training >> step=3757300, episode=627 reward=0.794188 (521.15 it/sec) -training >> step=3757400, episode=627 reward=0.7682759 (556.76 it/sec) -training >> step=3757500, episode=627 reward=0.7692202 (502.23 it/sec) -training >> step=3757600, episode=627 reward=0.7675439 (478.78 it/sec) -training >> step=3757700, episode=627 reward=0.7780001 (469.01 it/sec) -training >> step=3757800, episode=627 reward=0.7791092 (491.52 it/sec) -training >> step=3757900, episode=627 reward=0.7805854 (461.50 it/sec) -training >> step=3758000, episode=627 reward=0.7684767 (475.66 it/sec) -training >> step=3758100, episode=627 reward=0.7893347 (527.68 it/sec) -training >> step=3758200, episode=627 reward=0.7911956 (342.03 it/sec) -training >> step=3758300, episode=627 reward=0.7799334 (462.27 it/sec) -training >> step=3758400, episode=627 reward=0.7663861 (509.15 it/sec) -training >> step=3758500, episode=627 reward=0.7655223 (473.31 it/sec) -training >> step=3758600, episode=627 reward=0.7886204 (448.93 it/sec) -training >> step=3758700, episode=627 reward=0.7723128 (431.76 it/sec) -training >> step=3758800, episode=627 reward=0.7900171 (519.25 it/sec) -training >> step=3758900, episode=627 reward=0.7691391 (515.48 it/sec) -training >> step=3759000, episode=627 reward=0.7694792 (472.03 it/sec) -training >> step=3759100, episode=627 reward=0.7806754 (506.55 it/sec) -training >> step=3759200, episode=627 reward=0.7605915 (513.23 it/sec) -training >> step=3759300, episode=627 reward=0.7859194 (508.22 it/sec) -training >> step=3759400, episode=627 reward=0.778013 (493.00 it/sec) -training >> step=3759500, episode=627 reward=0.7686666 (513.62 it/sec) -training >> step=3759600, episode=627 reward=0.7677227 (515.60 it/sec) -training >> step=3759700, episode=627 reward=0.8059779 (476.04 it/sec) -training >> step=3759800, episode=627 reward=0.7739921 (532.45 it/sec) -training >> step=3759900, episode=627 reward=0.7907243 (524.69 it/sec) -training >> step=3760000, episode=627 reward=0.7879628 (498.73 it/sec) -training >> step=3760100, episode=627 reward=0.7764039 (529.08 it/sec) -training >> step=3760200, episode=627 reward=0.7869344 (517.79 it/sec) -training >> step=3760300, episode=627 reward=0.7712334 (493.52 it/sec) -training >> step=3760400, episode=627 reward=0.7790669 (502.65 it/sec) -training >> step=3760500, episode=627 reward=0.7884988 (505.24 it/sec) -training >> step=3760600, episode=627 reward=0.7867676 (493.07 it/sec) -training >> step=3760700, episode=627 reward=0.7749845 (525.43 it/sec) -training >> step=3760800, episode=627 reward=0.7541779 (473.72 it/sec) -training >> step=3760900, episode=627 reward=0.7731511 (525.43 it/sec) -training >> step=3761000, episode=627 reward=0.7588795 (522.84 it/sec) -training >> step=3761100, episode=627 reward=0.7992972 (473.73 it/sec) -training >> step=3761200, episode=627 reward=0.7847289 (480.37 it/sec) -training >> step=3761300, episode=628 reward=0.7578015 (55.13 it/sec) -training >> step=3761400, episode=628 reward=0.7686273 (520.90 it/sec) -training >> step=3761500, episode=628 reward=0.7727132 (499.04 it/sec) -training >> step=3761600, episode=628 reward=0.7721341 (565.67 it/sec) -training >> step=3761700, episode=628 reward=0.7790562 (513.93 it/sec) -training >> step=3761800, episode=628 reward=0.7609485 (538.37 it/sec) -training >> step=3761900, episode=628 reward=0.7866706 (488.38 it/sec) -training >> step=3762000, episode=628 reward=0.7637373 (524.02 it/sec) -training >> step=3762100, episode=628 reward=0.7774533 (536.38 it/sec) -training >> step=3762200, episode=628 reward=0.7745076 (571.35 it/sec) -training >> step=3762300, episode=628 reward=0.7680643 (486.09 it/sec) -training >> step=3762400, episode=628 reward=0.7598199 (521.10 it/sec) -training >> step=3762500, episode=628 reward=0.7755724 (525.33 it/sec) -training >> step=3762600, episode=628 reward=0.7641762 (492.82 it/sec) -training >> step=3762700, episode=628 reward=0.7806179 (515.44 it/sec) -training >> step=3762800, episode=628 reward=0.7903647 (523.73 it/sec) -training >> step=3762900, episode=628 reward=0.7831846 (524.76 it/sec) -training >> step=3763000, episode=628 reward=0.7887238 (506.68 it/sec) -training >> step=3763100, episode=628 reward=0.7943456 (487.28 it/sec) -training >> step=3763200, episode=628 reward=0.7725401 (570.38 it/sec) -training >> step=3763300, episode=628 reward=0.7751278 (560.54 it/sec) -training >> step=3763400, episode=628 reward=0.7925027 (487.42 it/sec) -training >> step=3763500, episode=628 reward=0.7926725 (531.45 it/sec) -training >> step=3763600, episode=628 reward=0.7695089 (529.58 it/sec) -training >> step=3763700, episode=628 reward=0.7588862 (498.64 it/sec) -training >> step=3763800, episode=628 reward=0.7919952 (533.69 it/sec) -training >> step=3763900, episode=628 reward=0.7768295 (542.99 it/sec) -training >> step=3764000, episode=628 reward=0.7787774 (562.77 it/sec) -training >> step=3764100, episode=628 reward=0.7835035 (483.30 it/sec) -training >> step=3764200, episode=628 reward=0.7963333 (494.79 it/sec) -training >> step=3764300, episode=628 reward=0.7563067 (553.45 it/sec) -training >> step=3764400, episode=628 reward=0.7852363 (380.34 it/sec) -training >> step=3764500, episode=628 reward=0.786474 (508.96 it/sec) -training >> step=3764600, episode=628 reward=0.7596068 (520.26 it/sec) -training >> step=3764700, episode=628 reward=0.7917446 (533.96 it/sec) -training >> step=3764800, episode=628 reward=0.7713191 (457.22 it/sec) -training >> step=3764900, episode=628 reward=0.7651475 (502.83 it/sec) -training >> step=3765000, episode=628 reward=0.801607 (518.61 it/sec) -training >> step=3765100, episode=628 reward=0.7690501 (552.10 it/sec) -training >> step=3765200, episode=628 reward=0.7588579 (508.72 it/sec) -training >> step=3765300, episode=628 reward=0.7788679 (511.87 it/sec) -training >> step=3765400, episode=628 reward=0.7590257 (484.10 it/sec) -training >> step=3765500, episode=628 reward=0.7835247 (542.02 it/sec) -training >> step=3765600, episode=628 reward=0.7664933 (506.81 it/sec) -training >> step=3765700, episode=628 reward=0.7610506 (518.02 it/sec) -training >> step=3765800, episode=628 reward=0.7459552 (553.49 it/sec) -training >> step=3765900, episode=628 reward=0.7681336 (511.19 it/sec) -training >> step=3766000, episode=628 reward=0.7760115 (494.74 it/sec) -training >> step=3766100, episode=628 reward=0.7698225 (438.38 it/sec) -training >> step=3766200, episode=628 reward=0.7779104 (525.58 it/sec) -training >> step=3766300, episode=628 reward=0.77637 (508.04 it/sec) -training >> step=3766400, episode=628 reward=0.7706897 (512.21 it/sec) -training >> step=3766500, episode=628 reward=0.7806435 (538.42 it/sec) -training >> step=3766600, episode=628 reward=0.7655593 (490.17 it/sec) -training >> step=3766700, episode=628 reward=0.7793853 (488.57 it/sec) -training >> step=3766800, episode=628 reward=0.7695736 (470.80 it/sec) -training >> step=3766900, episode=628 reward=0.7859363 (523.10 it/sec) -training >> step=3767000, episode=628 reward=0.7691059 (468.27 it/sec) -training >> step=3767100, episode=628 reward=0.7728018 (508.75 it/sec) -training >> step=3767200, episode=628 reward=0.75475 (461.14 it/sec) -training >> step=3767300, episode=629 reward=0.7844552 (45.78 it/sec) -training >> step=3767400, episode=629 reward=0.7732124 (512.48 it/sec) -training >> step=3767500, episode=629 reward=0.78203 (511.36 it/sec) -training >> step=3767600, episode=629 reward=0.7546297 (511.70 it/sec) -training >> step=3767700, episode=629 reward=0.7884865 (500.40 it/sec) -training >> step=3767800, episode=629 reward=0.7666751 (502.78 it/sec) -training >> step=3767900, episode=629 reward=0.7881466 (528.75 it/sec) -training >> step=3768000, episode=629 reward=0.7778766 (503.19 it/sec) -training >> step=3768100, episode=629 reward=0.7729065 (522.30 it/sec) -training >> step=3768200, episode=629 reward=0.7662848 (540.02 it/sec) -training >> step=3768300, episode=629 reward=0.791452 (476.83 it/sec) -training >> step=3768400, episode=629 reward=0.7588323 (542.30 it/sec) -training >> step=3768500, episode=629 reward=0.7740279 (535.60 it/sec) -training >> step=3768600, episode=629 reward=0.7624495 (508.18 it/sec) -training >> step=3768700, episode=629 reward=0.7761494 (538.34 it/sec) -training >> step=3768800, episode=629 reward=0.7970623 (540.82 it/sec) -training >> step=3768900, episode=629 reward=0.7574397 (487.19 it/sec) -training >> step=3769000, episode=629 reward=0.7679538 (512.37 it/sec) -training >> step=3769100, episode=629 reward=0.7693803 (513.96 it/sec) -training >> step=3769200, episode=629 reward=0.7922857 (525.99 it/sec) -training >> step=3769300, episode=629 reward=0.7871275 (480.03 it/sec) -training >> step=3769400, episode=629 reward=0.7599822 (518.15 it/sec) -training >> step=3769500, episode=629 reward=0.7679895 (515.90 it/sec) -training >> step=3769600, episode=629 reward=0.771422 (501.98 it/sec) -training >> step=3769700, episode=629 reward=0.7806605 (492.08 it/sec) -training >> step=3769800, episode=629 reward=0.7809113 (498.04 it/sec) -training >> step=3769900, episode=629 reward=0.7807894 (531.14 it/sec) -training >> step=3770000, episode=629 reward=0.7808977 (543.04 it/sec) -training >> step=3770100, episode=629 reward=0.7758723 (523.81 it/sec) -training >> step=3770200, episode=629 reward=0.7819215 (490.81 it/sec) -training >> step=3770300, episode=629 reward=0.7586725 (506.90 it/sec) -training >> step=3770400, episode=629 reward=0.7552064 (509.55 it/sec) -training >> step=3770500, episode=629 reward=0.7641452 (525.69 it/sec) -training >> step=3770600, episode=629 reward=0.7853165 (342.44 it/sec) -training >> step=3770700, episode=629 reward=0.7719622 (470.22 it/sec) -training >> step=3770800, episode=629 reward=0.773411 (463.73 it/sec) -training >> step=3770900, episode=629 reward=0.7753062 (456.41 it/sec) -training >> step=3771000, episode=629 reward=0.7853563 (496.64 it/sec) -training >> step=3771100, episode=629 reward=0.7621383 (422.97 it/sec) -training >> step=3771200, episode=629 reward=0.7543051 (421.38 it/sec) -training >> step=3771300, episode=629 reward=0.7727261 (432.21 it/sec) -training >> step=3771400, episode=629 reward=0.801775 (462.56 it/sec) -training >> step=3771500, episode=629 reward=0.7795303 (485.14 it/sec) -training >> step=3771600, episode=629 reward=0.7825836 (461.06 it/sec) -training >> step=3771700, episode=629 reward=0.778415 (466.85 it/sec) -training >> step=3771800, episode=629 reward=0.7751146 (479.49 it/sec) -training >> step=3771900, episode=629 reward=0.7667372 (510.86 it/sec) -training >> step=3772000, episode=629 reward=0.7722626 (464.27 it/sec) -training >> step=3772100, episode=629 reward=0.7769225 (462.08 it/sec) -training >> step=3772200, episode=629 reward=0.77713 (510.36 it/sec) -training >> step=3772300, episode=629 reward=0.763233 (501.95 it/sec) -training >> step=3772400, episode=629 reward=0.7649925 (479.85 it/sec) -training >> step=3772500, episode=629 reward=0.7839066 (467.43 it/sec) -training >> step=3772600, episode=629 reward=0.7553166 (513.11 it/sec) -training >> step=3772700, episode=629 reward=0.762076 (426.78 it/sec) -training >> step=3772800, episode=629 reward=0.7803416 (485.47 it/sec) -training >> step=3772900, episode=629 reward=0.7923232 (511.43 it/sec) -training >> step=3773000, episode=629 reward=0.7810338 (533.92 it/sec) -training >> step=3773100, episode=629 reward=0.7677507 (463.56 it/sec) -training >> step=3773200, episode=629 reward=0.7691957 (467.15 it/sec) -training >> step=3773300, episode=630 reward=0.7785757 (69.81 it/sec) -training >> step=3773400, episode=630 reward=0.7640129 (499.27 it/sec) -training >> step=3773500, episode=630 reward=0.7694371 (502.13 it/sec) -training >> step=3773600, episode=630 reward=0.7719371 (524.45 it/sec) -training >> step=3773700, episode=630 reward=0.7804499 (493.36 it/sec) -training >> step=3773800, episode=630 reward=0.7686987 (473.44 it/sec) -training >> step=3773900, episode=630 reward=0.7961108 (518.03 it/sec) -training >> step=3774000, episode=630 reward=0.7694657 (511.98 it/sec) -training >> step=3774100, episode=630 reward=0.784942 (507.80 it/sec) -training >> step=3774200, episode=630 reward=0.759701 (485.04 it/sec) -training >> step=3774300, episode=630 reward=0.7761885 (504.86 it/sec) -training >> step=3774400, episode=630 reward=0.7834964 (469.67 it/sec) -training >> step=3774500, episode=630 reward=0.7777843 (478.94 it/sec) -training >> step=3774600, episode=630 reward=0.7670139 (455.05 it/sec) -training >> step=3774700, episode=630 reward=0.737123 (506.99 it/sec) -training >> step=3774800, episode=630 reward=0.7945138 (507.29 it/sec) -training >> step=3774900, episode=630 reward=0.7649927 (540.18 it/sec) -training >> step=3775000, episode=630 reward=0.779888 (499.52 it/sec) -training >> step=3775100, episode=630 reward=0.7858054 (509.20 it/sec) -training >> step=3775200, episode=630 reward=0.7773894 (512.27 it/sec) -training >> step=3775300, episode=630 reward=0.7639101 (502.72 it/sec) -training >> step=3775400, episode=630 reward=0.790291 (501.89 it/sec) -training >> step=3775500, episode=630 reward=0.7586707 (499.04 it/sec) -training >> step=3775600, episode=630 reward=0.7830544 (519.89 it/sec) -training >> step=3775700, episode=630 reward=0.7716727 (463.38 it/sec) -training >> step=3775800, episode=630 reward=0.7923838 (484.46 it/sec) -training >> step=3775900, episode=630 reward=0.7642663 (436.20 it/sec) -training >> step=3776000, episode=630 reward=0.7719212 (481.27 it/sec) -training >> step=3776100, episode=630 reward=0.7719259 (471.74 it/sec) -training >> step=3776200, episode=630 reward=0.7659388 (486.25 it/sec) -training >> step=3776300, episode=630 reward=0.739936 (435.51 it/sec) -training >> step=3776400, episode=630 reward=0.7724806 (313.87 it/sec) -training >> step=3776500, episode=630 reward=0.7720485 (428.86 it/sec) -training >> step=3776600, episode=630 reward=0.7764285 (430.68 it/sec) -training >> step=3776700, episode=630 reward=0.7645206 (451.27 it/sec) -training >> step=3776800, episode=630 reward=0.7759883 (505.26 it/sec) -training >> step=3776900, episode=630 reward=0.7788496 (491.95 it/sec) -training >> step=3777000, episode=630 reward=0.7589776 (513.67 it/sec) -training >> step=3777100, episode=630 reward=0.7618776 (514.19 it/sec) -training >> step=3777200, episode=630 reward=0.7799689 (516.65 it/sec) -training >> step=3777300, episode=630 reward=0.7589602 (494.34 it/sec) -training >> step=3777400, episode=630 reward=0.7625785 (521.11 it/sec) -training >> step=3777500, episode=630 reward=0.788213 (490.69 it/sec) -training >> step=3777600, episode=630 reward=0.7565964 (452.68 it/sec) -training >> step=3777700, episode=630 reward=0.7771971 (508.22 it/sec) -training >> step=3777800, episode=630 reward=0.7808917 (523.39 it/sec) -training >> step=3777900, episode=630 reward=0.7997779 (500.49 it/sec) -training >> step=3778000, episode=630 reward=0.7659836 (497.72 it/sec) -training >> step=3778100, episode=630 reward=0.7625649 (508.17 it/sec) -training >> step=3778200, episode=630 reward=0.7764578 (502.17 it/sec) -training >> step=3778300, episode=630 reward=0.7738599 (511.06 it/sec) -training >> step=3778400, episode=630 reward=0.7445322 (512.54 it/sec) -training >> step=3778500, episode=630 reward=0.7890082 (465.71 it/sec) -training >> step=3778600, episode=630 reward=0.7836955 (494.16 it/sec) -training >> step=3778700, episode=630 reward=0.7495283 (475.44 it/sec) -training >> step=3778800, episode=630 reward=0.7862278 (462.71 it/sec) -training >> step=3778900, episode=630 reward=0.7814779 (524.39 it/sec) -training >> step=3779000, episode=630 reward=0.7794484 (511.03 it/sec) -training >> step=3779100, episode=630 reward=0.7807917 (502.32 it/sec) -training >> step=3779200, episode=630 reward=0.7672162 (544.27 it/sec) -training >> step=3779300, episode=631 reward=0.7593693 (113.86 it/sec) -training >> step=3779400, episode=631 reward=0.761533 (474.95 it/sec) -training >> step=3779500, episode=631 reward=0.7524257 (503.00 it/sec) -training >> step=3779600, episode=631 reward=0.7607762 (493.67 it/sec) -training >> step=3779700, episode=631 reward=0.775295 (463.01 it/sec) -training >> step=3779800, episode=631 reward=0.7923741 (451.55 it/sec) -training >> step=3779900, episode=631 reward=0.7675918 (398.22 it/sec) -training >> step=3780000, episode=631 reward=0.7785464 (441.35 it/sec) -training >> step=3780100, episode=631 reward=0.7830296 (471.12 it/sec) -training >> step=3780200, episode=631 reward=0.7683952 (449.86 it/sec) -training >> step=3780300, episode=631 reward=0.760764 (422.43 it/sec) -training >> step=3780400, episode=631 reward=0.7604086 (473.20 it/sec) -training >> step=3780500, episode=631 reward=0.7844743 (479.67 it/sec) -training >> step=3780600, episode=631 reward=0.7903197 (535.96 it/sec) -training >> step=3780700, episode=631 reward=0.7706224 (481.59 it/sec) -training >> step=3780800, episode=631 reward=0.7644851 (447.00 it/sec) -training >> step=3780900, episode=631 reward=0.7660593 (480.61 it/sec) -training >> step=3781000, episode=631 reward=0.7729853 (534.83 it/sec) -training >> step=3781100, episode=631 reward=0.7703203 (486.92 it/sec) -training >> step=3781200, episode=631 reward=0.7877594 (450.74 it/sec) -training >> step=3781300, episode=631 reward=0.7766613 (437.97 it/sec) -training >> step=3781400, episode=631 reward=0.7485207 (478.43 it/sec) -training >> step=3781500, episode=631 reward=0.7800069 (467.35 it/sec) -training >> step=3781600, episode=631 reward=0.7879454 (403.38 it/sec) -training >> step=3781700, episode=631 reward=0.780727 (411.46 it/sec) -training >> step=3781800, episode=631 reward=0.7701063 (462.82 it/sec) -training >> step=3781900, episode=631 reward=0.7855023 (471.68 it/sec) -training >> step=3782000, episode=631 reward=0.7831173 (414.63 it/sec) -training >> step=3782100, episode=631 reward=0.7815678 (424.94 it/sec) -training >> step=3782200, episode=631 reward=0.7873247 (458.11 it/sec) -training >> step=3782300, episode=631 reward=0.7587839 (480.48 it/sec) -training >> step=3782400, episode=631 reward=0.755234 (512.25 it/sec) -training >> step=3782500, episode=631 reward=0.7922217 (363.87 it/sec) -training >> step=3782600, episode=631 reward=0.7721665 (456.88 it/sec) -training >> step=3782700, episode=631 reward=0.7862053 (496.43 it/sec) -training >> step=3782800, episode=631 reward=0.7926884 (505.50 it/sec) -training >> step=3782900, episode=631 reward=0.7778588 (461.63 it/sec) -training >> step=3783000, episode=631 reward=0.753503 (483.09 it/sec) -training >> step=3783100, episode=631 reward=0.7726493 (545.42 it/sec) -training >> step=3783200, episode=631 reward=0.7742185 (446.19 it/sec) -training >> step=3783300, episode=631 reward=0.7875015 (433.21 it/sec) -training >> step=3783400, episode=631 reward=0.7827585 (519.99 it/sec) -training >> step=3783500, episode=631 reward=0.7714411 (516.87 it/sec) -training >> step=3783600, episode=631 reward=0.739239 (503.50 it/sec) -training >> step=3783700, episode=631 reward=0.7620911 (531.69 it/sec) -training >> step=3783800, episode=631 reward=0.784758 (510.79 it/sec) -training >> step=3783900, episode=631 reward=0.738988 (475.85 it/sec) -training >> step=3784000, episode=631 reward=0.7893127 (499.02 it/sec) -training >> step=3784100, episode=631 reward=0.7802482 (470.26 it/sec) -training >> step=3784200, episode=631 reward=0.7556756 (489.67 it/sec) -training >> step=3784300, episode=631 reward=0.74237 (505.61 it/sec) -training >> step=3784400, episode=631 reward=0.7787917 (475.72 it/sec) -training >> step=3784500, episode=631 reward=0.7858971 (496.21 it/sec) -training >> step=3784600, episode=631 reward=0.7693735 (481.26 it/sec) -training >> step=3784700, episode=631 reward=0.7715399 (531.26 it/sec) -training >> step=3784800, episode=631 reward=0.7780425 (480.79 it/sec) -training >> step=3784900, episode=631 reward=0.7684399 (516.47 it/sec) -training >> step=3785000, episode=631 reward=0.7727487 (515.66 it/sec) -training >> step=3785100, episode=631 reward=0.7725635 (482.87 it/sec) -training >> step=3785200, episode=631 reward=0.7748553 (432.87 it/sec) -training >> step=3785300, episode=632 reward=0.7881689 (64.22 it/sec) -training >> step=3785400, episode=632 reward=0.7824672 (382.35 it/sec) -training >> step=3785500, episode=632 reward=0.7765498 (424.25 it/sec) -training >> step=3785600, episode=632 reward=0.7767671 (461.75 it/sec) -training >> step=3785700, episode=632 reward=0.7805684 (442.39 it/sec) -training >> step=3785800, episode=632 reward=0.7724867 (499.53 it/sec) -training >> step=3785900, episode=632 reward=0.8038824 (455.68 it/sec) -training >> step=3786000, episode=632 reward=0.7745053 (488.30 it/sec) -training >> step=3786100, episode=632 reward=0.7739984 (512.98 it/sec) -training >> step=3786200, episode=632 reward=0.7887599 (494.39 it/sec) -training >> step=3786300, episode=632 reward=0.7712311 (410.41 it/sec) -training >> step=3786400, episode=632 reward=0.7838572 (453.30 it/sec) -training >> step=3786500, episode=632 reward=0.7820033 (385.73 it/sec) -training >> step=3786600, episode=632 reward=0.7648086 (506.90 it/sec) -training >> step=3786700, episode=632 reward=0.7755605 (452.49 it/sec) -training >> step=3786800, episode=632 reward=0.7827801 (504.35 it/sec) -training >> step=3786900, episode=632 reward=0.7689366 (532.27 it/sec) -training >> step=3787000, episode=632 reward=0.7658283 (495.77 it/sec) -training >> step=3787100, episode=632 reward=0.7624255 (523.02 it/sec) -training >> step=3787200, episode=632 reward=0.770744 (486.04 it/sec) -training >> step=3787300, episode=632 reward=0.7810549 (459.14 it/sec) -training >> step=3787400, episode=632 reward=0.7837991 (437.92 it/sec) -training >> step=3787500, episode=632 reward=0.7700851 (515.28 it/sec) -training >> step=3787600, episode=632 reward=0.7774873 (530.59 it/sec) -training >> step=3787700, episode=632 reward=0.7770127 (484.32 it/sec) -training >> step=3787800, episode=632 reward=0.7842464 (504.69 it/sec) -training >> step=3787900, episode=632 reward=0.786288 (506.68 it/sec) -training >> step=3788000, episode=632 reward=0.7788257 (508.52 it/sec) -training >> step=3788100, episode=632 reward=0.786186 (515.80 it/sec) -training >> step=3788200, episode=632 reward=0.7744737 (543.30 it/sec) -training >> step=3788300, episode=632 reward=0.7963818 (510.89 it/sec) -training >> step=3788400, episode=632 reward=0.8013855 (508.13 it/sec) -training >> step=3788500, episode=632 reward=0.7994705 (506.79 it/sec) -training >> step=3788600, episode=632 reward=0.7616561 (412.53 it/sec) -training >> step=3788700, episode=632 reward=0.7763653 (530.03 it/sec) -training >> step=3788800, episode=632 reward=0.7861385 (495.59 it/sec) -training >> step=3788900, episode=632 reward=0.7728611 (529.24 it/sec) -training >> step=3789000, episode=632 reward=0.7542362 (518.61 it/sec) -training >> step=3789100, episode=632 reward=0.7766998 (424.63 it/sec) -training >> step=3789200, episode=632 reward=0.7612928 (511.16 it/sec) -training >> step=3789300, episode=632 reward=0.7603409 (557.86 it/sec) -training >> step=3789400, episode=632 reward=0.7504882 (516.89 it/sec) -training >> step=3789500, episode=632 reward=0.7845224 (522.13 it/sec) -training >> step=3789600, episode=632 reward=0.7640474 (526.11 it/sec) -training >> step=3789700, episode=632 reward=0.776892 (551.27 it/sec) -training >> step=3789800, episode=632 reward=0.7760925 (494.73 it/sec) -training >> step=3789900, episode=632 reward=0.7535864 (484.33 it/sec) -training >> step=3790000, episode=632 reward=0.7772647 (537.60 it/sec) -training >> step=3790100, episode=632 reward=0.7735898 (517.48 it/sec) -training >> step=3790200, episode=632 reward=0.7620432 (530.84 it/sec) -training >> step=3790300, episode=632 reward=0.7521091 (550.73 it/sec) -training >> step=3790400, episode=632 reward=0.7829571 (531.02 it/sec) -training >> step=3790500, episode=632 reward=0.7852058 (541.16 it/sec) -training >> step=3790600, episode=632 reward=0.771312 (518.42 it/sec) -training >> step=3790700, episode=632 reward=0.7852891 (538.67 it/sec) -training >> step=3790800, episode=632 reward=0.7823725 (531.01 it/sec) -training >> step=3790900, episode=632 reward=0.7616593 (563.37 it/sec) -training >> step=3791000, episode=632 reward=0.775873 (506.67 it/sec) -training >> step=3791100, episode=632 reward=0.7937343 (572.92 it/sec) -training >> step=3791200, episode=632 reward=0.798492 (519.37 it/sec) -training >> step=3791300, episode=633 reward=0.770798 (109.65 it/sec) -training >> step=3791400, episode=633 reward=0.7832338 (556.66 it/sec) -training >> step=3791500, episode=633 reward=0.8048304 (543.17 it/sec) -training >> step=3791600, episode=633 reward=0.763123 (560.85 it/sec) -training >> step=3791700, episode=633 reward=0.7582393 (509.51 it/sec) -training >> step=3791800, episode=633 reward=0.7879567 (544.48 it/sec) -training >> step=3791900, episode=633 reward=0.7652962 (526.57 it/sec) -training >> step=3792000, episode=633 reward=0.7617458 (482.43 it/sec) -training >> step=3792100, episode=633 reward=0.7839456 (524.18 it/sec) -training >> step=3792200, episode=633 reward=0.7931873 (519.33 it/sec) -training >> step=3792300, episode=633 reward=0.7562554 (498.59 it/sec) -training >> step=3792400, episode=633 reward=0.7777681 (523.37 it/sec) -training >> step=3792500, episode=633 reward=0.7547857 (532.52 it/sec) -training >> step=3792600, episode=633 reward=0.7886713 (541.47 it/sec) -training >> step=3792700, episode=633 reward=0.7674114 (565.45 it/sec) -training >> step=3792800, episode=633 reward=0.7827467 (526.18 it/sec) -training >> step=3792900, episode=633 reward=0.7685817 (510.86 it/sec) -training >> step=3793000, episode=633 reward=0.7948279 (410.55 it/sec) -training >> step=3793100, episode=633 reward=0.7934905 (519.16 it/sec) -training >> step=3793200, episode=633 reward=0.7657607 (508.60 it/sec) -training >> step=3793300, episode=633 reward=0.766537 (448.92 it/sec) -training >> step=3793400, episode=633 reward=0.7786828 (371.63 it/sec) -training >> step=3793500, episode=633 reward=0.7583101 (388.41 it/sec) -training >> step=3793600, episode=633 reward=0.7680664 (528.50 it/sec) -training >> step=3793700, episode=633 reward=0.7754605 (480.31 it/sec) -training >> step=3793800, episode=633 reward=0.7679487 (368.31 it/sec) -training >> step=3793900, episode=633 reward=0.7980481 (403.40 it/sec) -training >> step=3794000, episode=633 reward=0.7693824 (421.06 it/sec) -training >> step=3794100, episode=633 reward=0.7615423 (377.45 it/sec) -training >> step=3794200, episode=633 reward=0.7935337 (421.76 it/sec) -training >> step=3794300, episode=633 reward=0.7669373 (401.27 it/sec) -training >> step=3794400, episode=633 reward=0.7853349 (449.28 it/sec) -training >> step=3794500, episode=633 reward=0.7854527 (372.34 it/sec) -training >> step=3794600, episode=633 reward=0.7822734 (349.28 it/sec) -training >> step=3794700, episode=633 reward=0.7759354 (516.94 it/sec) -training >> step=3794800, episode=633 reward=0.7754185 (538.20 it/sec) -training >> step=3794900, episode=633 reward=0.7961628 (488.18 it/sec) -training >> step=3795000, episode=633 reward=0.7836515 (368.85 it/sec) -training >> step=3795100, episode=633 reward=0.7935231 (451.49 it/sec) -training >> step=3795200, episode=633 reward=0.7807367 (454.60 it/sec) -training >> step=3795300, episode=633 reward=0.7568876 (472.52 it/sec) -training >> step=3795400, episode=633 reward=0.7630703 (478.03 it/sec) -training >> step=3795500, episode=633 reward=0.789521 (485.26 it/sec) -training >> step=3795600, episode=633 reward=0.7726117 (529.34 it/sec) -training >> step=3795700, episode=633 reward=0.763669 (503.23 it/sec) -training >> step=3795800, episode=633 reward=0.7831258 (469.43 it/sec) -training >> step=3795900, episode=633 reward=0.7655067 (484.56 it/sec) -training >> step=3796000, episode=633 reward=0.7686547 (468.30 it/sec) -training >> step=3796100, episode=633 reward=0.7710428 (499.56 it/sec) -training >> step=3796200, episode=633 reward=0.7800493 (516.79 it/sec) -training >> step=3796300, episode=633 reward=0.7717561 (474.28 it/sec) -training >> step=3796400, episode=633 reward=0.7765026 (483.24 it/sec) -training >> step=3796500, episode=633 reward=0.7650015 (523.11 it/sec) -training >> step=3796600, episode=633 reward=0.7785425 (469.01 it/sec) -training >> step=3796700, episode=633 reward=0.7850164 (523.94 it/sec) -training >> step=3796800, episode=633 reward=0.7659267 (499.16 it/sec) -training >> step=3796900, episode=633 reward=0.7763768 (504.68 it/sec) -training >> step=3797000, episode=633 reward=0.7680454 (489.33 it/sec) -training >> step=3797100, episode=633 reward=0.7926549 (464.59 it/sec) -training >> step=3797200, episode=633 reward=0.7692255 (523.44 it/sec) -training >> step=3797300, episode=634 reward=0.7676126 (99.33 it/sec) -training >> step=3797400, episode=634 reward=0.7737399 (506.79 it/sec) -training >> step=3797500, episode=634 reward=0.765694 (504.39 it/sec) -training >> step=3797600, episode=634 reward=0.7669958 (514.42 it/sec) -training >> step=3797700, episode=634 reward=0.795027 (451.46 it/sec) -training >> step=3797800, episode=634 reward=0.7876392 (515.41 it/sec) -training >> step=3797900, episode=634 reward=0.7712207 (510.67 it/sec) -training >> step=3798000, episode=634 reward=0.7919559 (472.35 it/sec) -training >> step=3798100, episode=634 reward=0.7578948 (468.31 it/sec) -training >> step=3798200, episode=634 reward=0.7731118 (466.50 it/sec) -training >> step=3798300, episode=634 reward=0.7810208 (465.71 it/sec) -training >> step=3798400, episode=634 reward=0.7656813 (486.23 it/sec) -training >> step=3798500, episode=634 reward=0.8024096 (474.35 it/sec) -training >> step=3798600, episode=634 reward=0.7809833 (495.12 it/sec) -training >> step=3798700, episode=634 reward=0.7545797 (460.85 it/sec) -training >> step=3798800, episode=634 reward=0.7754696 (446.03 it/sec) -training >> step=3798900, episode=634 reward=0.7701269 (467.17 it/sec) -training >> step=3799000, episode=634 reward=0.7701905 (490.81 it/sec) -training >> step=3799100, episode=634 reward=0.7778889 (487.49 it/sec) -training >> step=3799200, episode=634 reward=0.7849472 (470.49 it/sec) -training >> step=3799300, episode=634 reward=0.7931592 (495.24 it/sec) -training >> step=3799400, episode=634 reward=0.7835672 (470.17 it/sec) -training >> step=3799500, episode=634 reward=0.7820305 (487.81 it/sec) -training >> step=3799600, episode=634 reward=0.7685851 (488.25 it/sec) -training >> step=3799700, episode=634 reward=0.7672027 (474.45 it/sec) -training >> step=3799800, episode=634 reward=0.7568837 (446.56 it/sec) -training >> step=3799900, episode=634 reward=0.7835487 (486.21 it/sec) -training >> step=3800000, episode=634 reward=0.7645923 (514.56 it/sec) -training >> step=3800100, episode=634 reward=0.79659 (469.92 it/sec) -training >> step=3800200, episode=634 reward=0.7857829 (480.58 it/sec) -training >> step=3800300, episode=634 reward=0.7740909 (522.58 it/sec) -training >> step=3800400, episode=634 reward=0.7735585 (503.98 it/sec) -training >> step=3800500, episode=634 reward=0.7750739 (500.86 it/sec) -training >> step=3800600, episode=634 reward=0.7855941 (501.11 it/sec) -training >> step=3800700, episode=634 reward=0.7663964 (490.95 it/sec) -training >> step=3800800, episode=634 reward=0.7832142 (372.60 it/sec) -training >> step=3800900, episode=634 reward=0.7727538 (500.77 it/sec) -training >> step=3801000, episode=634 reward=0.7774835 (488.82 it/sec) -training >> step=3801100, episode=634 reward=0.7853417 (524.24 it/sec) -training >> step=3801200, episode=634 reward=0.795733 (492.25 it/sec) -training >> step=3801300, episode=634 reward=0.7783995 (521.33 it/sec) -training >> step=3801400, episode=634 reward=0.7716716 (505.30 it/sec) -training >> step=3801500, episode=634 reward=0.7714737 (503.25 it/sec) -training >> step=3801600, episode=634 reward=0.7788182 (489.80 it/sec) -training >> step=3801700, episode=634 reward=0.7923945 (488.09 it/sec) -training >> step=3801800, episode=634 reward=0.7626358 (533.13 it/sec) -training >> step=3801900, episode=634 reward=0.7751552 (490.09 it/sec) -training >> step=3802000, episode=634 reward=0.7856717 (487.66 it/sec) -training >> step=3802100, episode=634 reward=0.7844403 (496.47 it/sec) -training >> step=3802200, episode=634 reward=0.789453 (507.44 it/sec) -training >> step=3802300, episode=634 reward=0.7677528 (502.74 it/sec) -training >> step=3802400, episode=634 reward=0.7821265 (482.32 it/sec) -training >> step=3802500, episode=634 reward=0.7720026 (502.18 it/sec) -training >> step=3802600, episode=634 reward=0.7580257 (507.09 it/sec) -training >> step=3802700, episode=634 reward=0.7882889 (463.12 it/sec) -training >> step=3802800, episode=634 reward=0.7615175 (495.81 it/sec) -training >> step=3802900, episode=634 reward=0.788011 (520.93 it/sec) -training >> step=3803000, episode=634 reward=0.7852362 (495.48 it/sec) -training >> step=3803100, episode=634 reward=0.7817421 (477.81 it/sec) -training >> step=3803200, episode=634 reward=0.7856096 (497.60 it/sec) -training >> step=3803300, episode=635 reward=0.7811113 (91.85 it/sec) -training >> step=3803400, episode=635 reward=0.7740867 (501.46 it/sec) -training >> step=3803500, episode=635 reward=0.7841894 (509.13 it/sec) -training >> step=3803600, episode=635 reward=0.7635882 (487.02 it/sec) -training >> step=3803700, episode=635 reward=0.7781086 (503.40 it/sec) -training >> step=3803800, episode=635 reward=0.7865088 (502.33 it/sec) -training >> step=3803900, episode=635 reward=0.75551 (486.24 it/sec) -training >> step=3804000, episode=635 reward=0.7798719 (544.39 it/sec) -training >> step=3804100, episode=635 reward=0.7758192 (472.82 it/sec) -training >> step=3804200, episode=635 reward=0.7727286 (492.97 it/sec) -training >> step=3804300, episode=635 reward=0.7789758 (496.69 it/sec) -training >> step=3804400, episode=635 reward=0.7799107 (490.09 it/sec) -training >> step=3804500, episode=635 reward=0.7682862 (518.17 it/sec) -training >> step=3804600, episode=635 reward=0.7703548 (498.29 it/sec) -training >> step=3804700, episode=635 reward=0.7617788 (508.12 it/sec) -training >> step=3804800, episode=635 reward=0.7831681 (460.01 it/sec) -training >> step=3804900, episode=635 reward=0.7982031 (528.52 it/sec) -training >> step=3805000, episode=635 reward=0.7756431 (526.00 it/sec) -training >> step=3805100, episode=635 reward=0.7872886 (476.96 it/sec) -training >> step=3805200, episode=635 reward=0.7587892 (499.91 it/sec) -training >> step=3805300, episode=635 reward=0.7760028 (483.33 it/sec) -training >> step=3805400, episode=635 reward=0.7539937 (498.68 it/sec) -training >> step=3805500, episode=635 reward=0.781314 (505.98 it/sec) -training >> step=3805600, episode=635 reward=0.7676118 (493.17 it/sec) -training >> step=3805700, episode=635 reward=0.7707814 (527.21 it/sec) -training >> step=3805800, episode=635 reward=0.7773466 (490.09 it/sec) -training >> step=3805900, episode=635 reward=0.771751 (473.34 it/sec) -training >> step=3806000, episode=635 reward=0.773418 (539.42 it/sec) -training >> step=3806100, episode=635 reward=0.775034 (473.38 it/sec) -training >> step=3806200, episode=635 reward=0.7626899 (458.62 it/sec) -training >> step=3806300, episode=635 reward=0.780464 (476.07 it/sec) -training >> step=3806400, episode=635 reward=0.7786282 (507.75 it/sec) -training >> step=3806500, episode=635 reward=0.7882332 (518.02 it/sec) -training >> step=3806600, episode=635 reward=0.7831508 (475.04 it/sec) -training >> step=3806700, episode=635 reward=0.7807024 (483.80 it/sec) -training >> step=3806800, episode=635 reward=0.7874504 (483.32 it/sec) -training >> step=3806900, episode=635 reward=0.7639966 (484.90 it/sec) -training >> step=3807000, episode=635 reward=0.7783277 (372.03 it/sec) -training >> step=3807100, episode=635 reward=0.7611055 (479.32 it/sec) -training >> step=3807200, episode=635 reward=0.78813 (469.61 it/sec) -training >> step=3807300, episode=635 reward=0.7752728 (468.50 it/sec) -training >> step=3807400, episode=635 reward=0.7591357 (495.87 it/sec) -training >> step=3807500, episode=635 reward=0.7747421 (522.26 it/sec) -training >> step=3807600, episode=635 reward=0.764815 (474.26 it/sec) -training >> step=3807700, episode=635 reward=0.7840806 (478.09 it/sec) -training >> step=3807800, episode=635 reward=0.7746336 (509.42 it/sec) -training >> step=3807900, episode=635 reward=0.778021 (466.82 it/sec) -training >> step=3808000, episode=635 reward=0.7530328 (493.72 it/sec) -training >> step=3808100, episode=635 reward=0.7652008 (485.02 it/sec) -training >> step=3808200, episode=635 reward=0.7691684 (476.00 it/sec) -training >> step=3808300, episode=635 reward=0.7737195 (446.54 it/sec) -training >> step=3808400, episode=635 reward=0.7784017 (515.04 it/sec) -training >> step=3808500, episode=635 reward=0.765285 (488.95 it/sec) -training >> step=3808600, episode=635 reward=0.7603844 (474.25 it/sec) -training >> step=3808700, episode=635 reward=0.7854242 (492.49 it/sec) -training >> step=3808800, episode=635 reward=0.7855141 (505.64 it/sec) -training >> step=3808900, episode=635 reward=0.7671914 (529.26 it/sec) -training >> step=3809000, episode=635 reward=0.7735249 (510.31 it/sec) -training >> step=3809100, episode=635 reward=0.7753889 (497.44 it/sec) -training >> step=3809200, episode=635 reward=0.7737657 (451.62 it/sec) -training >> step=3809300, episode=636 reward=0.7604522 (74.77 it/sec) -training >> step=3809400, episode=636 reward=0.7995489 (478.67 it/sec) -training >> step=3809500, episode=636 reward=0.7823571 (504.35 it/sec) -training >> step=3809600, episode=636 reward=0.789435 (489.32 it/sec) -training >> step=3809700, episode=636 reward=0.7751841 (501.46 it/sec) -training >> step=3809800, episode=636 reward=0.7521588 (450.18 it/sec) -training >> step=3809900, episode=636 reward=0.7729188 (525.70 it/sec) -training >> step=3810000, episode=636 reward=0.7760647 (497.16 it/sec) -training >> step=3810100, episode=636 reward=0.7864777 (512.54 it/sec) -training >> step=3810200, episode=636 reward=0.7542757 (485.29 it/sec) -training >> step=3810300, episode=636 reward=0.7765848 (468.53 it/sec) -training >> step=3810400, episode=636 reward=0.7828919 (491.07 it/sec) -training >> step=3810500, episode=636 reward=0.7871881 (498.73 it/sec) -training >> step=3810600, episode=636 reward=0.7558919 (481.75 it/sec) -training >> step=3810700, episode=636 reward=0.7930721 (539.99 it/sec) -training >> step=3810800, episode=636 reward=0.7764706 (504.02 it/sec) -training >> step=3810900, episode=636 reward=0.7905447 (463.92 it/sec) -training >> step=3811000, episode=636 reward=0.76476 (492.75 it/sec) -training >> step=3811100, episode=636 reward=0.7812985 (494.17 it/sec) -training >> step=3811200, episode=636 reward=0.7855639 (498.86 it/sec) -training >> step=3811300, episode=636 reward=0.7945529 (487.56 it/sec) -training >> step=3811400, episode=636 reward=0.765328 (529.32 it/sec) -training >> step=3811500, episode=636 reward=0.7955907 (504.21 it/sec) -training >> step=3811600, episode=636 reward=0.7492064 (472.28 it/sec) -training >> step=3811700, episode=636 reward=0.7685534 (528.29 it/sec) -training >> step=3811800, episode=636 reward=0.7635582 (479.56 it/sec) -training >> step=3811900, episode=636 reward=0.7612646 (475.14 it/sec) -training >> step=3812000, episode=636 reward=0.7778665 (462.11 it/sec) -training >> step=3812100, episode=636 reward=0.7954879 (440.52 it/sec) -training >> step=3812200, episode=636 reward=0.7772411 (443.32 it/sec) -training >> step=3812300, episode=636 reward=0.7648156 (494.43 it/sec) -training >> step=3812400, episode=636 reward=0.7329862 (499.85 it/sec) -training >> step=3812500, episode=636 reward=0.7805718 (520.84 it/sec) -training >> step=3812600, episode=636 reward=0.7696032 (440.99 it/sec) -training >> step=3812700, episode=636 reward=0.7898356 (494.01 it/sec) -training >> step=3812800, episode=636 reward=0.7874244 (504.55 it/sec) -training >> step=3812900, episode=636 reward=0.7822031 (528.33 it/sec) -training >> step=3813000, episode=636 reward=0.7606226 (526.97 it/sec) -training >> step=3813100, episode=636 reward=0.7813745 (518.09 it/sec) -training >> step=3813200, episode=636 reward=0.7687218 (410.16 it/sec) -training >> step=3813300, episode=636 reward=0.7744887 (512.15 it/sec) -training >> step=3813400, episode=636 reward=0.7943406 (513.36 it/sec) -training >> step=3813500, episode=636 reward=0.7825416 (568.65 it/sec) -training >> step=3813600, episode=636 reward=0.780795 (533.52 it/sec) -training >> step=3813700, episode=636 reward=0.781902 (493.63 it/sec) -training >> step=3813800, episode=636 reward=0.7564646 (500.70 it/sec) -training >> step=3813900, episode=636 reward=0.7739707 (551.91 it/sec) -training >> step=3814000, episode=636 reward=0.7599866 (521.05 it/sec) -training >> step=3814100, episode=636 reward=0.7541896 (520.53 it/sec) -training >> step=3814200, episode=636 reward=0.7527736 (518.65 it/sec) -training >> step=3814300, episode=636 reward=0.7767403 (521.62 it/sec) -training >> step=3814400, episode=636 reward=0.7459142 (510.47 it/sec) -training >> step=3814500, episode=636 reward=0.7639149 (537.61 it/sec) -training >> step=3814600, episode=636 reward=0.7686301 (536.74 it/sec) -training >> step=3814700, episode=636 reward=0.7560438 (513.93 it/sec) -training >> step=3814800, episode=636 reward=0.7610016 (491.52 it/sec) -training >> step=3814900, episode=636 reward=0.7402983 (512.25 it/sec) -training >> step=3815000, episode=636 reward=0.7644585 (558.25 it/sec) -training >> step=3815100, episode=636 reward=0.7743965 (518.52 it/sec) -training >> step=3815200, episode=636 reward=0.7577687 (547.03 it/sec) -training >> step=3815300, episode=637 reward=0.7627527 (75.92 it/sec) -training >> step=3815400, episode=637 reward=0.7816826 (505.32 it/sec) -training >> step=3815500, episode=637 reward=0.7783265 (534.13 it/sec) -training >> step=3815600, episode=637 reward=0.7674467 (568.14 it/sec) -training >> step=3815700, episode=637 reward=0.7776279 (542.23 it/sec) -training >> step=3815800, episode=637 reward=0.7680057 (482.10 it/sec) -training >> step=3815900, episode=637 reward=0.7646629 (558.13 it/sec) -training >> step=3816000, episode=637 reward=0.7644325 (544.55 it/sec) -training >> step=3816100, episode=637 reward=0.7691635 (528.63 it/sec) -training >> step=3816200, episode=637 reward=0.7730108 (533.38 it/sec) -training >> step=3816300, episode=637 reward=0.7782025 (523.78 it/sec) -training >> step=3816400, episode=637 reward=0.7573566 (514.89 it/sec) -training >> step=3816500, episode=637 reward=0.7692848 (540.11 it/sec) -training >> step=3816600, episode=637 reward=0.7746698 (534.60 it/sec) -training >> step=3816700, episode=637 reward=0.7687504 (526.88 it/sec) -training >> step=3816800, episode=637 reward=0.7700157 (554.59 it/sec) -training >> step=3816900, episode=637 reward=0.7720717 (495.95 it/sec) -training >> step=3817000, episode=637 reward=0.7830682 (522.34 it/sec) -training >> step=3817100, episode=637 reward=0.7564179 (553.18 it/sec) -training >> step=3817200, episode=637 reward=0.7905679 (476.69 it/sec) -training >> step=3817300, episode=637 reward=0.7726607 (539.52 it/sec) -training >> step=3817400, episode=637 reward=0.7928121 (505.35 it/sec) -training >> step=3817500, episode=637 reward=0.7668488 (496.36 it/sec) -training >> step=3817600, episode=637 reward=0.7662004 (541.51 it/sec) -training >> step=3817700, episode=637 reward=0.7655473 (535.01 it/sec) -training >> step=3817800, episode=637 reward=0.786808 (526.06 it/sec) -training >> step=3817900, episode=637 reward=0.7948711 (554.29 it/sec) -training >> step=3818000, episode=637 reward=0.7715901 (471.25 it/sec) -training >> step=3818100, episode=637 reward=0.7672278 (517.38 it/sec) -training >> step=3818200, episode=637 reward=0.7826708 (536.26 it/sec) -training >> step=3818300, episode=637 reward=0.7760766 (534.72 it/sec) -training >> step=3818400, episode=637 reward=0.7685264 (527.26 it/sec) -training >> step=3818500, episode=637 reward=0.776947 (456.38 it/sec) -training >> step=3818600, episode=637 reward=0.7690909 (501.09 it/sec) -training >> step=3818700, episode=637 reward=0.7775848 (587.33 it/sec) -training >> step=3818800, episode=637 reward=0.7672464 (542.34 it/sec) -training >> step=3818900, episode=637 reward=0.7418083 (546.48 it/sec) -training >> step=3819000, episode=637 reward=0.7754107 (551.75 it/sec) -training >> step=3819100, episode=637 reward=0.7784597 (448.40 it/sec) -training >> step=3819200, episode=637 reward=0.7740594 (540.04 it/sec) -training >> step=3819300, episode=637 reward=0.7649346 (548.16 it/sec) -training >> step=3819400, episode=637 reward=0.7899159 (404.94 it/sec) -training >> step=3819500, episode=637 reward=0.7770991 (516.48 it/sec) -training >> step=3819600, episode=637 reward=0.7840847 (491.30 it/sec) -training >> step=3819700, episode=637 reward=0.7871276 (553.95 it/sec) -training >> step=3819800, episode=637 reward=0.7730765 (558.93 it/sec) -training >> step=3819900, episode=637 reward=0.7467831 (529.98 it/sec) -training >> step=3820000, episode=637 reward=0.7810802 (543.78 it/sec) -training >> step=3820100, episode=637 reward=0.7754992 (540.98 it/sec) -training >> step=3820200, episode=637 reward=0.7833934 (489.55 it/sec) -training >> step=3820300, episode=637 reward=0.7730408 (499.26 it/sec) -training >> step=3820400, episode=637 reward=0.753307 (555.47 it/sec) -training >> step=3820500, episode=637 reward=0.7863014 (541.80 it/sec) -training >> step=3820600, episode=637 reward=0.7732116 (496.74 it/sec) -training >> step=3820700, episode=637 reward=0.7814808 (504.06 it/sec) -training >> step=3820800, episode=637 reward=0.7893049 (568.72 it/sec) -training >> step=3820900, episode=637 reward=0.7699928 (518.03 it/sec) -training >> step=3821000, episode=637 reward=0.7513208 (522.23 it/sec) -training >> step=3821100, episode=637 reward=0.7593229 (462.81 it/sec) -training >> step=3821200, episode=637 reward=0.7765978 (511.35 it/sec) -training >> step=3821300, episode=638 reward=0.7802202 (134.92 it/sec) -training >> step=3821400, episode=638 reward=0.7811738 (486.50 it/sec) -training >> step=3821500, episode=638 reward=0.7659968 (520.73 it/sec) -training >> step=3821600, episode=638 reward=0.7706349 (557.45 it/sec) -training >> step=3821700, episode=638 reward=0.7814114 (518.39 it/sec) -training >> step=3821800, episode=638 reward=0.7651318 (533.47 it/sec) -training >> step=3821900, episode=638 reward=0.7933688 (490.98 it/sec) -training >> step=3822000, episode=638 reward=0.7808785 (487.01 it/sec) -training >> step=3822100, episode=638 reward=0.7843544 (501.07 it/sec) -training >> step=3822200, episode=638 reward=0.7650287 (504.87 it/sec) -training >> step=3822300, episode=638 reward=0.7807257 (540.28 it/sec) -training >> step=3822400, episode=638 reward=0.7682829 (498.32 it/sec) -training >> step=3822500, episode=638 reward=0.7644174 (501.71 it/sec) -training >> step=3822600, episode=638 reward=0.7867535 (554.07 it/sec) -training >> step=3822700, episode=638 reward=0.7744774 (489.50 it/sec) -training >> step=3822800, episode=638 reward=0.7594313 (529.09 it/sec) -training >> step=3822900, episode=638 reward=0.7275941 (517.03 it/sec) -training >> step=3823000, episode=638 reward=0.7729385 (508.57 it/sec) -training >> step=3823100, episode=638 reward=0.7667943 (508.27 it/sec) -training >> step=3823200, episode=638 reward=0.7711016 (517.25 it/sec) -training >> step=3823300, episode=638 reward=0.7637945 (507.34 it/sec) -training >> step=3823400, episode=638 reward=0.7635421 (498.46 it/sec) -training >> step=3823500, episode=638 reward=0.7668304 (505.25 it/sec) -training >> step=3823600, episode=638 reward=0.7716109 (520.05 it/sec) -training >> step=3823700, episode=638 reward=0.7821819 (550.97 it/sec) -training >> step=3823800, episode=638 reward=0.7939051 (501.99 it/sec) -training >> step=3823900, episode=638 reward=0.769534 (489.30 it/sec) -training >> step=3824000, episode=638 reward=0.7671369 (552.08 it/sec) -training >> step=3824100, episode=638 reward=0.7512336 (530.44 it/sec) -training >> step=3824200, episode=638 reward=0.7734004 (524.50 it/sec) -training >> step=3824300, episode=638 reward=0.7653409 (532.24 it/sec) -training >> step=3824400, episode=638 reward=0.7762069 (510.10 it/sec) -training >> step=3824500, episode=638 reward=0.7614319 (502.57 it/sec) -training >> step=3824600, episode=638 reward=0.773663 (484.74 it/sec) -training >> step=3824700, episode=638 reward=0.7906372 (526.09 it/sec) -training >> step=3824800, episode=638 reward=0.7941806 (545.07 it/sec) -training >> step=3824900, episode=638 reward=0.7839279 (514.47 it/sec) -training >> step=3825000, episode=638 reward=0.7704206 (499.79 it/sec) -training >> step=3825100, episode=638 reward=0.7784823 (484.03 it/sec) -training >> step=3825200, episode=638 reward=0.781182 (514.98 it/sec) -training >> step=3825300, episode=638 reward=0.7617171 (536.20 it/sec) -training >> step=3825400, episode=638 reward=0.7735513 (531.59 it/sec) -training >> step=3825500, episode=638 reward=0.771634 (510.32 it/sec) -training >> step=3825600, episode=638 reward=0.7704701 (358.50 it/sec) -training >> step=3825700, episode=638 reward=0.8035998 (500.21 it/sec) -training >> step=3825800, episode=638 reward=0.77432 (542.17 it/sec) -training >> step=3825900, episode=638 reward=0.7819232 (499.51 it/sec) -training >> step=3826000, episode=638 reward=0.7597045 (502.96 it/sec) -training >> step=3826100, episode=638 reward=0.7683245 (489.16 it/sec) -training >> step=3826200, episode=638 reward=0.7604726 (511.41 it/sec) -training >> step=3826300, episode=638 reward=0.7902412 (434.91 it/sec) -training >> step=3826400, episode=638 reward=0.7790192 (506.56 it/sec) -training >> step=3826500, episode=638 reward=0.7712542 (491.71 it/sec) -training >> step=3826600, episode=638 reward=0.7933242 (502.98 it/sec) -training >> step=3826700, episode=638 reward=0.7849235 (468.31 it/sec) -training >> step=3826800, episode=638 reward=0.7666482 (413.91 it/sec) -training >> step=3826900, episode=638 reward=0.7834617 (512.33 it/sec) -training >> step=3827000, episode=638 reward=0.7596874 (514.49 it/sec) -training >> step=3827100, episode=638 reward=0.7630587 (444.23 it/sec) -training >> step=3827200, episode=638 reward=0.7591715 (452.54 it/sec) -training >> step=3827300, episode=639 reward=0.7952594 (68.74 it/sec) -training >> step=3827400, episode=639 reward=0.7714895 (513.36 it/sec) -training >> step=3827500, episode=639 reward=0.76244 (515.09 it/sec) -training >> step=3827600, episode=639 reward=0.7745638 (491.41 it/sec) -training >> step=3827700, episode=639 reward=0.7860693 (496.06 it/sec) -training >> step=3827800, episode=639 reward=0.7694616 (516.18 it/sec) -training >> step=3827900, episode=639 reward=0.7725019 (548.01 it/sec) -training >> step=3828000, episode=639 reward=0.7788836 (511.20 it/sec) -training >> step=3828100, episode=639 reward=0.7600613 (506.27 it/sec) -training >> step=3828200, episode=639 reward=0.7820663 (497.09 it/sec) -training >> step=3828300, episode=639 reward=0.7683738 (498.84 it/sec) -training >> step=3828400, episode=639 reward=0.7821552 (521.25 it/sec) -training >> step=3828500, episode=639 reward=0.7865996 (500.21 it/sec) -training >> step=3828600, episode=639 reward=0.7650487 (365.05 it/sec) -training >> step=3828700, episode=639 reward=0.7854829 (400.50 it/sec) -training >> step=3828800, episode=639 reward=0.7760354 (499.08 it/sec) -training >> step=3828900, episode=639 reward=0.7559581 (487.55 it/sec) -training >> step=3829000, episode=639 reward=0.7718498 (405.03 it/sec) -training >> step=3829100, episode=639 reward=0.8111162 (417.04 it/sec) -training >> step=3829200, episode=639 reward=0.776691 (426.16 it/sec) -training >> step=3829300, episode=639 reward=0.7835842 (426.49 it/sec) -training >> step=3829400, episode=639 reward=0.7817324 (477.42 it/sec) -training >> step=3829500, episode=639 reward=0.7796739 (476.54 it/sec) -training >> step=3829600, episode=639 reward=0.7860856 (434.84 it/sec) -training >> step=3829700, episode=639 reward=0.7733739 (458.61 it/sec) -training >> step=3829800, episode=639 reward=0.7923355 (479.35 it/sec) -training >> step=3829900, episode=639 reward=0.7683349 (497.27 it/sec) -training >> step=3830000, episode=639 reward=0.7893696 (499.71 it/sec) -training >> step=3830100, episode=639 reward=0.7765337 (464.89 it/sec) -training >> step=3830200, episode=639 reward=0.7935928 (487.98 it/sec) -training >> step=3830300, episode=639 reward=0.7726015 (483.82 it/sec) -training >> step=3830400, episode=639 reward=0.7685274 (466.82 it/sec) -training >> step=3830500, episode=639 reward=0.7804219 (428.94 it/sec) -training >> step=3830600, episode=639 reward=0.7798789 (487.86 it/sec) -training >> step=3830700, episode=639 reward=0.7566111 (463.24 it/sec) -training >> step=3830800, episode=639 reward=0.7871436 (401.31 it/sec) -training >> step=3830900, episode=639 reward=0.7580939 (412.71 it/sec) -training >> step=3831000, episode=639 reward=0.7780877 (379.36 it/sec) -training >> step=3831100, episode=639 reward=0.7804045 (389.32 it/sec) -training >> step=3831200, episode=639 reward=0.7692599 (405.96 it/sec) -training >> step=3831300, episode=639 reward=0.7765403 (334.52 it/sec) -training >> step=3831400, episode=639 reward=0.7676615 (418.05 it/sec) -training >> step=3831500, episode=639 reward=0.7816625 (418.15 it/sec) -training >> step=3831600, episode=639 reward=0.7843884 (429.01 it/sec) -training >> step=3831700, episode=639 reward=0.7739505 (457.02 it/sec) -training >> step=3831800, episode=639 reward=0.7649262 (347.47 it/sec) -training >> step=3831900, episode=639 reward=0.7653843 (527.48 it/sec) -training >> step=3832000, episode=639 reward=0.7699122 (448.01 it/sec) -training >> step=3832100, episode=639 reward=0.7783665 (454.96 it/sec) -training >> step=3832200, episode=639 reward=0.7856618 (462.76 it/sec) -training >> step=3832300, episode=639 reward=0.7675475 (491.71 it/sec) -training >> step=3832400, episode=639 reward=0.7832755 (481.14 it/sec) -training >> step=3832500, episode=639 reward=0.7621883 (487.42 it/sec) -training >> step=3832600, episode=639 reward=0.764603 (466.88 it/sec) -training >> step=3832700, episode=639 reward=0.792302 (477.28 it/sec) -training >> step=3832800, episode=639 reward=0.7936114 (471.33 it/sec) -training >> step=3832900, episode=639 reward=0.7510378 (520.41 it/sec) -training >> step=3833000, episode=639 reward=0.7528688 (444.90 it/sec) -training >> step=3833100, episode=639 reward=0.7616978 (471.62 it/sec) -training >> step=3833200, episode=639 reward=0.7726688 (472.27 it/sec) -training >> step=3833300, episode=640 reward=0.7835674 (84.25 it/sec) -training >> step=3833400, episode=640 reward=0.7747676 (463.18 it/sec) -training >> step=3833500, episode=640 reward=0.7544172 (499.14 it/sec) -training >> step=3833600, episode=640 reward=0.7784204 (494.41 it/sec) -training >> step=3833700, episode=640 reward=0.7822623 (447.70 it/sec) -training >> step=3833800, episode=640 reward=0.7884908 (467.06 it/sec) -training >> step=3833900, episode=640 reward=0.7764598 (480.13 it/sec) -training >> step=3834000, episode=640 reward=0.7802773 (465.86 it/sec) -training >> step=3834100, episode=640 reward=0.7831434 (471.41 it/sec) -training >> step=3834200, episode=640 reward=0.7575861 (497.54 it/sec) -training >> step=3834300, episode=640 reward=0.7801226 (483.13 it/sec) -training >> step=3834400, episode=640 reward=0.772609 (518.68 it/sec) -training >> step=3834500, episode=640 reward=0.7731017 (471.80 it/sec) -training >> step=3834600, episode=640 reward=0.7757196 (482.28 it/sec) -training >> step=3834700, episode=640 reward=0.7706748 (516.46 it/sec) -training >> step=3834800, episode=640 reward=0.7751653 (444.70 it/sec) -training >> step=3834900, episode=640 reward=0.7881471 (489.84 it/sec) -training >> step=3835000, episode=640 reward=0.7569886 (483.00 it/sec) -training >> step=3835100, episode=640 reward=0.7669958 (497.27 it/sec) -training >> step=3835200, episode=640 reward=0.7966099 (463.80 it/sec) -training >> step=3835300, episode=640 reward=0.7953805 (477.30 it/sec) -training >> step=3835400, episode=640 reward=0.781355 (474.12 it/sec) -training >> step=3835500, episode=640 reward=0.7901796 (467.63 it/sec) -training >> step=3835600, episode=640 reward=0.7644156 (464.02 it/sec) -training >> step=3835700, episode=640 reward=0.7922164 (492.98 it/sec) -training >> step=3835800, episode=640 reward=0.7851716 (485.67 it/sec) -training >> step=3835900, episode=640 reward=0.7687047 (490.61 it/sec) -training >> step=3836000, episode=640 reward=0.777999 (482.61 it/sec) -training >> step=3836100, episode=640 reward=0.7849178 (457.57 it/sec) -training >> step=3836200, episode=640 reward=0.7811381 (487.79 it/sec) -training >> step=3836300, episode=640 reward=0.7682972 (471.90 it/sec) -training >> step=3836400, episode=640 reward=0.7621147 (498.60 it/sec) -training >> step=3836500, episode=640 reward=0.7743158 (477.48 it/sec) -training >> step=3836600, episode=640 reward=0.7765659 (474.76 it/sec) -training >> step=3836700, episode=640 reward=0.7690787 (455.16 it/sec) -training >> step=3836800, episode=640 reward=0.777317 (432.36 it/sec) -training >> step=3836900, episode=640 reward=0.771008 (461.24 it/sec) -training >> step=3837000, episode=640 reward=0.781214 (497.93 it/sec) -training >> step=3837100, episode=640 reward=0.7721231 (417.64 it/sec) -training >> step=3837200, episode=640 reward=0.7543424 (453.64 it/sec) -training >> step=3837300, episode=640 reward=0.797583 (499.90 it/sec) -training >> step=3837400, episode=640 reward=0.7746395 (449.60 it/sec) -training >> step=3837500, episode=640 reward=0.743783 (499.41 it/sec) -training >> step=3837600, episode=640 reward=0.7679987 (504.72 it/sec) -training >> step=3837700, episode=640 reward=0.7838713 (488.66 it/sec) -training >> step=3837800, episode=640 reward=0.7815056 (526.23 it/sec) -training >> step=3837900, episode=640 reward=0.7672983 (501.45 it/sec) -training >> step=3838000, episode=640 reward=0.7685122 (459.01 it/sec) -training >> step=3838100, episode=640 reward=0.7656568 (321.26 it/sec) -training >> step=3838200, episode=640 reward=0.774414 (536.19 it/sec) -training >> step=3838300, episode=640 reward=0.7791606 (525.79 it/sec) -training >> step=3838400, episode=640 reward=0.7638804 (504.31 it/sec) -training >> step=3838500, episode=640 reward=0.7612317 (460.15 it/sec) -training >> step=3838600, episode=640 reward=0.7852451 (486.96 it/sec) -training >> step=3838700, episode=640 reward=0.7551495 (469.55 it/sec) -training >> step=3838800, episode=640 reward=0.7524732 (503.28 it/sec) -training >> step=3838900, episode=640 reward=0.7749438 (485.56 it/sec) -training >> step=3839000, episode=640 reward=0.7690917 (469.58 it/sec) -training >> step=3839100, episode=640 reward=0.774245 (480.49 it/sec) -training >> step=3839200, episode=640 reward=0.7676588 (496.70 it/sec) -training >> step=3839300, episode=641 reward=0.7825494 (70.83 it/sec) -training >> step=3839400, episode=641 reward=0.761635 (457.57 it/sec) -training >> step=3839500, episode=641 reward=0.7523125 (519.23 it/sec) -training >> step=3839600, episode=641 reward=0.757252 (530.86 it/sec) -training >> step=3839700, episode=641 reward=0.7531781 (483.31 it/sec) -training >> step=3839800, episode=641 reward=0.7578514 (469.32 it/sec) -training >> step=3839900, episode=641 reward=0.7663776 (520.78 it/sec) -training >> step=3840000, episode=641 reward=0.7842139 (517.03 it/sec) -training >> step=3840100, episode=641 reward=0.7850971 (453.46 it/sec) -training >> step=3840200, episode=641 reward=0.7814252 (517.49 it/sec) -training >> step=3840300, episode=641 reward=0.7401721 (504.18 it/sec) -training >> step=3840400, episode=641 reward=0.7725577 (492.32 it/sec) -training >> step=3840500, episode=641 reward=0.774489 (542.72 it/sec) -training >> step=3840600, episode=641 reward=0.7792361 (506.40 it/sec) -training >> step=3840700, episode=641 reward=0.7498993 (492.87 it/sec) -training >> step=3840800, episode=641 reward=0.7690848 (521.56 it/sec) -training >> step=3840900, episode=641 reward=0.7316529 (479.24 it/sec) -training >> step=3841000, episode=641 reward=0.7740321 (478.57 it/sec) -training >> step=3841100, episode=641 reward=0.7509515 (540.58 it/sec) -training >> step=3841200, episode=641 reward=0.7982168 (523.53 it/sec) -training >> step=3841300, episode=641 reward=0.7888557 (450.56 it/sec) -training >> step=3841400, episode=641 reward=0.784397 (496.71 it/sec) -training >> step=3841500, episode=641 reward=0.7747778 (534.64 it/sec) -training >> step=3841600, episode=641 reward=0.7663563 (511.22 it/sec) -training >> step=3841700, episode=641 reward=0.773573 (517.38 it/sec) -training >> step=3841800, episode=641 reward=0.7847866 (523.62 it/sec) -training >> step=3841900, episode=641 reward=0.786518 (532.85 it/sec) -training >> step=3842000, episode=641 reward=0.777302 (444.35 it/sec) -training >> step=3842100, episode=641 reward=0.7882856 (486.91 it/sec) -training >> step=3842200, episode=641 reward=0.7803457 (531.38 it/sec) -training >> step=3842300, episode=641 reward=0.7708297 (505.07 it/sec) -training >> step=3842400, episode=641 reward=0.7625115 (504.14 it/sec) -training >> step=3842500, episode=641 reward=0.7689094 (491.54 it/sec) -training >> step=3842600, episode=641 reward=0.771443 (538.89 it/sec) -training >> step=3842700, episode=641 reward=0.7547995 (526.00 it/sec) -training >> step=3842800, episode=641 reward=0.7706048 (443.73 it/sec) -training >> step=3842900, episode=641 reward=0.7787111 (491.03 it/sec) -training >> step=3843000, episode=641 reward=0.7632626 (527.44 it/sec) -training >> step=3843100, episode=641 reward=0.768869 (515.56 it/sec) -training >> step=3843200, episode=641 reward=0.7856649 (508.71 it/sec) -training >> step=3843300, episode=641 reward=0.7718109 (521.02 it/sec) -training >> step=3843400, episode=641 reward=0.7799444 (505.79 it/sec) -training >> step=3843500, episode=641 reward=0.7917513 (481.22 it/sec) -training >> step=3843600, episode=641 reward=0.7778643 (556.70 it/sec) -training >> step=3843700, episode=641 reward=0.7810221 (531.17 it/sec) -training >> step=3843800, episode=641 reward=0.7957361 (531.42 it/sec) -training >> step=3843900, episode=641 reward=0.7742856 (501.50 it/sec) -training >> step=3844000, episode=641 reward=0.779777 (535.27 it/sec) -training >> step=3844100, episode=641 reward=0.7827586 (472.71 it/sec) -training >> step=3844200, episode=641 reward=0.7418621 (499.30 it/sec) -training >> step=3844300, episode=641 reward=0.7640287 (332.44 it/sec) -training >> step=3844400, episode=641 reward=0.7781337 (500.49 it/sec) -training >> step=3844500, episode=641 reward=0.79185 (515.29 it/sec) -training >> step=3844600, episode=641 reward=0.7733752 (443.39 it/sec) -training >> step=3844700, episode=641 reward=0.7904904 (499.66 it/sec) -training >> step=3844800, episode=641 reward=0.7852711 (532.86 it/sec) -training >> step=3844900, episode=641 reward=0.7747095 (506.84 it/sec) -training >> step=3845000, episode=641 reward=0.7758904 (489.19 it/sec) -training >> step=3845100, episode=641 reward=0.7841114 (530.50 it/sec) -training >> step=3845200, episode=641 reward=0.7676197 (496.26 it/sec) -training >> step=3845300, episode=642 reward=0.7561215 (46.04 it/sec) -training >> step=3845400, episode=642 reward=0.7826171 (477.92 it/sec) -training >> step=3845500, episode=642 reward=0.7801276 (530.41 it/sec) -training >> step=3845600, episode=642 reward=0.7997458 (460.98 it/sec) -training >> step=3845700, episode=642 reward=0.7760194 (532.94 it/sec) -training >> step=3845800, episode=642 reward=0.7403297 (500.35 it/sec) -training >> step=3845900, episode=642 reward=0.750797 (461.52 it/sec) -training >> step=3846000, episode=642 reward=0.7724785 (525.17 it/sec) -training >> step=3846100, episode=642 reward=0.7456763 (506.36 it/sec) -training >> step=3846200, episode=642 reward=0.7781978 (485.70 it/sec) -training >> step=3846300, episode=642 reward=0.7777293 (547.60 it/sec) -training >> step=3846400, episode=642 reward=0.7710595 (523.59 it/sec) -training >> step=3846500, episode=642 reward=0.7753045 (501.49 it/sec) -training >> step=3846600, episode=642 reward=0.7830109 (500.97 it/sec) -training >> step=3846700, episode=642 reward=0.7805383 (502.82 it/sec) -training >> step=3846800, episode=642 reward=0.7716078 (493.57 it/sec) -training >> step=3846900, episode=642 reward=0.7777051 (502.88 it/sec) -training >> step=3847000, episode=642 reward=0.7562826 (510.01 it/sec) -training >> step=3847100, episode=642 reward=0.7900742 (515.44 it/sec) -training >> step=3847200, episode=642 reward=0.7890455 (477.36 it/sec) -training >> step=3847300, episode=642 reward=0.7968738 (517.01 it/sec) -training >> step=3847400, episode=642 reward=0.7671688 (518.84 it/sec) -training >> step=3847500, episode=642 reward=0.7736187 (510.18 it/sec) -training >> step=3847600, episode=642 reward=0.7762122 (544.46 it/sec) -training >> step=3847700, episode=642 reward=0.7670398 (510.88 it/sec) -training >> step=3847800, episode=642 reward=0.7740887 (488.00 it/sec) -training >> step=3847900, episode=642 reward=0.7767255 (525.59 it/sec) -training >> step=3848000, episode=642 reward=0.7468683 (483.03 it/sec) -training >> step=3848100, episode=642 reward=0.7772313 (522.02 it/sec) -training >> step=3848200, episode=642 reward=0.7865462 (515.70 it/sec) -training >> step=3848300, episode=642 reward=0.7706103 (473.01 it/sec) -training >> step=3848400, episode=642 reward=0.7949865 (431.18 it/sec) -training >> step=3848500, episode=642 reward=0.7748867 (448.60 it/sec) -training >> step=3848600, episode=642 reward=0.7742333 (537.17 it/sec) -training >> step=3848700, episode=642 reward=0.772096 (459.97 it/sec) -training >> step=3848800, episode=642 reward=0.7699727 (483.58 it/sec) -training >> step=3848900, episode=642 reward=0.7642314 (486.32 it/sec) -training >> step=3849000, episode=642 reward=0.789118 (499.71 it/sec) -training >> step=3849100, episode=642 reward=0.787016 (476.60 it/sec) -training >> step=3849200, episode=642 reward=0.7799502 (487.90 it/sec) -training >> step=3849300, episode=642 reward=0.7822458 (478.42 it/sec) -training >> step=3849400, episode=642 reward=0.7807356 (518.66 it/sec) -training >> step=3849500, episode=642 reward=0.7750769 (479.03 it/sec) -training >> step=3849600, episode=642 reward=0.7878042 (536.20 it/sec) -training >> step=3849700, episode=642 reward=0.7913186 (495.44 it/sec) -training >> step=3849800, episode=642 reward=0.775713 (491.92 it/sec) -training >> step=3849900, episode=642 reward=0.7758266 (493.31 it/sec) -training >> step=3850000, episode=642 reward=0.7810288 (497.41 it/sec) -training >> step=3850100, episode=642 reward=0.778324 (513.61 it/sec) -training >> step=3850200, episode=642 reward=0.7489483 (504.16 it/sec) -training >> step=3850300, episode=642 reward=0.7988918 (484.98 it/sec) -training >> step=3850400, episode=642 reward=0.762762 (493.78 it/sec) -training >> step=3850500, episode=642 reward=0.7800048 (435.74 it/sec) -training >> step=3850600, episode=642 reward=0.7862896 (542.11 it/sec) -training >> step=3850700, episode=642 reward=0.7803208 (479.96 it/sec) -training >> step=3850800, episode=642 reward=0.7600729 (501.12 it/sec) -training >> step=3850900, episode=642 reward=0.7671122 (504.44 it/sec) -training >> step=3851000, episode=642 reward=0.756088 (503.96 it/sec) -training >> step=3851100, episode=642 reward=0.7649144 (512.25 it/sec) -training >> step=3851200, episode=642 reward=0.7582141 (529.10 it/sec) -training >> step=3851300, episode=643 reward=0.7861403 (130.97 it/sec) -training >> step=3851400, episode=643 reward=0.7681687 (556.98 it/sec) -training >> step=3851500, episode=643 reward=0.7630275 (520.35 it/sec) -training >> step=3851600, episode=643 reward=0.783888 (459.60 it/sec) -training >> step=3851700, episode=643 reward=0.7688065 (479.00 it/sec) -training >> step=3851800, episode=643 reward=0.7566378 (490.33 it/sec) -training >> step=3851900, episode=643 reward=0.7855986 (504.67 it/sec) -training >> step=3852000, episode=643 reward=0.7697722 (508.97 it/sec) -training >> step=3852100, episode=643 reward=0.7562773 (477.97 it/sec) -training >> step=3852200, episode=643 reward=0.7703389 (518.52 it/sec) -training >> step=3852300, episode=643 reward=0.7577789 (547.50 it/sec) -training >> step=3852400, episode=643 reward=0.7817289 (516.54 it/sec) -training >> step=3852500, episode=643 reward=0.774866 (503.01 it/sec) -training >> step=3852600, episode=643 reward=0.7831672 (495.41 it/sec) -training >> step=3852700, episode=643 reward=0.7955668 (519.66 it/sec) -training >> step=3852800, episode=643 reward=0.7744249 (498.53 it/sec) -training >> step=3852900, episode=643 reward=0.7605485 (490.67 it/sec) -training >> step=3853000, episode=643 reward=0.7741735 (511.40 it/sec) -training >> step=3853100, episode=643 reward=0.7670946 (421.72 it/sec) -training >> step=3853200, episode=643 reward=0.7712078 (480.90 it/sec) -training >> step=3853300, episode=643 reward=0.7711171 (527.47 it/sec) -training >> step=3853400, episode=643 reward=0.7701751 (528.14 it/sec) -training >> step=3853500, episode=643 reward=0.7803769 (471.71 it/sec) -training >> step=3853600, episode=643 reward=0.7551042 (473.43 it/sec) -training >> step=3853700, episode=643 reward=0.7721136 (515.12 it/sec) -training >> step=3853800, episode=643 reward=0.7680751 (509.06 it/sec) -training >> step=3853900, episode=643 reward=0.7798319 (457.83 it/sec) -training >> step=3854000, episode=643 reward=0.7758508 (512.96 it/sec) -training >> step=3854100, episode=643 reward=0.773842 (521.83 it/sec) -training >> step=3854200, episode=643 reward=0.7758511 (532.46 it/sec) -training >> step=3854300, episode=643 reward=0.7869116 (489.60 it/sec) -training >> step=3854400, episode=643 reward=0.7911823 (530.55 it/sec) -training >> step=3854500, episode=643 reward=0.7552204 (549.61 it/sec) -training >> step=3854600, episode=643 reward=0.7628149 (525.59 it/sec) -training >> step=3854700, episode=643 reward=0.7719536 (519.85 it/sec) -training >> step=3854800, episode=643 reward=0.8000937 (536.98 it/sec) -training >> step=3854900, episode=643 reward=0.7661178 (508.78 it/sec) -training >> step=3855000, episode=643 reward=0.7895092 (533.86 it/sec) -training >> step=3855100, episode=643 reward=0.7744464 (556.68 it/sec) -training >> step=3855200, episode=643 reward=0.7675403 (527.68 it/sec) -training >> step=3855300, episode=643 reward=0.7700709 (524.97 it/sec) -training >> step=3855400, episode=643 reward=0.7835373 (522.18 it/sec) -training >> step=3855500, episode=643 reward=0.7646747 (553.94 it/sec) -training >> step=3855600, episode=643 reward=0.7676014 (522.65 it/sec) -training >> step=3855700, episode=643 reward=0.7851459 (552.64 it/sec) -training >> step=3855800, episode=643 reward=0.7693708 (513.90 it/sec) -training >> step=3855900, episode=643 reward=0.7752356 (518.37 it/sec) -training >> step=3856000, episode=643 reward=0.7784632 (505.71 it/sec) -training >> step=3856100, episode=643 reward=0.7635184 (537.20 it/sec) -training >> step=3856200, episode=643 reward=0.7698733 (551.66 it/sec) -training >> step=3856300, episode=643 reward=0.7599249 (517.35 it/sec) -training >> step=3856400, episode=643 reward=0.7670571 (527.26 it/sec) -training >> step=3856500, episode=643 reward=0.7787425 (411.45 it/sec) -training >> step=3856600, episode=643 reward=0.7896457 (557.18 it/sec) -training >> step=3856700, episode=643 reward=0.7754766 (540.03 it/sec) -training >> step=3856800, episode=643 reward=0.7833154 (508.83 it/sec) -training >> step=3856900, episode=643 reward=0.7935261 (508.78 it/sec) -training >> step=3857000, episode=643 reward=0.7590201 (561.49 it/sec) -training >> step=3857100, episode=643 reward=0.7621931 (544.82 it/sec) -training >> step=3857200, episode=643 reward=0.7845489 (523.47 it/sec) -training >> step=3857300, episode=644 reward=0.7719423 (119.62 it/sec) -training >> step=3857400, episode=644 reward=0.7635284 (545.08 it/sec) -training >> step=3857500, episode=644 reward=0.7413205 (530.27 it/sec) -training >> step=3857600, episode=644 reward=0.7742771 (510.63 it/sec) -training >> step=3857700, episode=644 reward=0.7668651 (501.04 it/sec) -training >> step=3857800, episode=644 reward=0.7983782 (553.66 it/sec) -training >> step=3857900, episode=644 reward=0.7932521 (535.47 it/sec) -training >> step=3858000, episode=644 reward=0.7678081 (538.77 it/sec) -training >> step=3858100, episode=644 reward=0.7744588 (514.01 it/sec) -training >> step=3858200, episode=644 reward=0.7652471 (544.66 it/sec) -training >> step=3858300, episode=644 reward=0.7970579 (489.13 it/sec) -training >> step=3858400, episode=644 reward=0.754409 (477.21 it/sec) -training >> step=3858500, episode=644 reward=0.7867974 (540.19 it/sec) -training >> step=3858600, episode=644 reward=0.7786294 (515.68 it/sec) -training >> step=3858700, episode=644 reward=0.7792653 (500.53 it/sec) -training >> step=3858800, episode=644 reward=0.7569398 (543.35 it/sec) -training >> step=3858900, episode=644 reward=0.7749979 (567.00 it/sec) -training >> step=3859000, episode=644 reward=0.7776105 (502.69 it/sec) -training >> step=3859100, episode=644 reward=0.7890736 (520.35 it/sec) -training >> step=3859200, episode=644 reward=0.7830949 (525.93 it/sec) -training >> step=3859300, episode=644 reward=0.7590553 (514.87 it/sec) -training >> step=3859400, episode=644 reward=0.7664181 (510.91 it/sec) -training >> step=3859500, episode=644 reward=0.785243 (531.74 it/sec) -training >> step=3859600, episode=644 reward=0.7823215 (541.82 it/sec) -training >> step=3859700, episode=644 reward=0.7745486 (537.55 it/sec) -training >> step=3859800, episode=644 reward=0.7794868 (520.72 it/sec) -training >> step=3859900, episode=644 reward=0.7560893 (517.39 it/sec) -training >> step=3860000, episode=644 reward=0.7580745 (577.39 it/sec) -training >> step=3860100, episode=644 reward=0.7978385 (538.75 it/sec) -training >> step=3860200, episode=644 reward=0.7772744 (508.69 it/sec) -training >> step=3860300, episode=644 reward=0.7921056 (534.38 it/sec) -training >> step=3860400, episode=644 reward=0.7536948 (530.44 it/sec) -training >> step=3860500, episode=644 reward=0.7520173 (564.00 it/sec) -training >> step=3860600, episode=644 reward=0.7705277 (515.30 it/sec) -training >> step=3860700, episode=644 reward=0.7633679 (573.57 it/sec) -training >> step=3860800, episode=644 reward=0.8009569 (509.99 it/sec) -training >> step=3860900, episode=644 reward=0.7783498 (520.34 it/sec) -training >> step=3861000, episode=644 reward=0.790348 (526.36 it/sec) -training >> step=3861100, episode=644 reward=0.7732683 (555.20 it/sec) -training >> step=3861200, episode=644 reward=0.7886795 (540.02 it/sec) -training >> step=3861300, episode=644 reward=0.7862391 (525.36 it/sec) -training >> step=3861400, episode=644 reward=0.7832406 (506.93 it/sec) -training >> step=3861500, episode=644 reward=0.7870794 (536.20 it/sec) -training >> step=3861600, episode=644 reward=0.7947079 (535.22 it/sec) -training >> step=3861700, episode=644 reward=0.8072338 (528.88 it/sec) -training >> step=3861800, episode=644 reward=0.7761588 (538.69 it/sec) -training >> step=3861900, episode=644 reward=0.7670364 (516.42 it/sec) -training >> step=3862000, episode=644 reward=0.7593406 (497.19 it/sec) -training >> step=3862100, episode=644 reward=0.7886136 (552.42 it/sec) -training >> step=3862200, episode=644 reward=0.761665 (563.18 it/sec) -training >> step=3862300, episode=644 reward=0.7730938 (539.81 it/sec) -training >> step=3862400, episode=644 reward=0.7740024 (526.01 it/sec) -training >> step=3862500, episode=644 reward=0.7689332 (347.17 it/sec) -training >> step=3862600, episode=644 reward=0.7599009 (509.22 it/sec) -training >> step=3862700, episode=644 reward=0.7584013 (540.99 it/sec) -training >> step=3862800, episode=644 reward=0.777204 (517.35 it/sec) -training >> step=3862900, episode=644 reward=0.7884167 (566.61 it/sec) -training >> step=3863000, episode=644 reward=0.7766544 (496.63 it/sec) -training >> step=3863100, episode=644 reward=0.7888328 (516.08 it/sec) -training >> step=3863200, episode=644 reward=0.7604768 (529.09 it/sec) -training >> step=3863300, episode=645 reward=0.7691245 (121.20 it/sec) -training >> step=3863400, episode=645 reward=0.7706005 (513.84 it/sec) -training >> step=3863500, episode=645 reward=0.7856407 (536.60 it/sec) -training >> step=3863600, episode=645 reward=0.7661707 (548.49 it/sec) -training >> step=3863700, episode=645 reward=0.7675896 (529.47 it/sec) -training >> step=3863800, episode=645 reward=0.766017 (488.49 it/sec) -training >> step=3863900, episode=645 reward=0.7935251 (566.50 it/sec) -training >> step=3864000, episode=645 reward=0.7462779 (539.15 it/sec) -training >> step=3864100, episode=645 reward=0.768598 (524.96 it/sec) -training >> step=3864200, episode=645 reward=0.7716236 (534.55 it/sec) -training >> step=3864300, episode=645 reward=0.7915797 (500.27 it/sec) -training >> step=3864400, episode=645 reward=0.7756864 (518.91 it/sec) -training >> step=3864500, episode=645 reward=0.7483048 (512.20 it/sec) -training >> step=3864600, episode=645 reward=0.790499 (533.80 it/sec) -training >> step=3864700, episode=645 reward=0.7784982 (548.69 it/sec) -training >> step=3864800, episode=645 reward=0.7654496 (513.83 it/sec) -training >> step=3864900, episode=645 reward=0.7900147 (534.72 it/sec) -training >> step=3865000, episode=645 reward=0.8037252 (568.52 it/sec) -training >> step=3865100, episode=645 reward=0.7796348 (520.11 it/sec) -training >> step=3865200, episode=645 reward=0.7539964 (523.16 it/sec) -training >> step=3865300, episode=645 reward=0.7692053 (548.89 it/sec) -training >> step=3865400, episode=645 reward=0.7982497 (524.50 it/sec) -training >> step=3865500, episode=645 reward=0.7832488 (538.14 it/sec) -training >> step=3865600, episode=645 reward=0.7567642 (529.81 it/sec) -training >> step=3865700, episode=645 reward=0.7867329 (567.12 it/sec) -training >> step=3865800, episode=645 reward=0.7671757 (528.02 it/sec) -training >> step=3865900, episode=645 reward=0.787519 (525.74 it/sec) -training >> step=3866000, episode=645 reward=0.7714971 (523.10 it/sec) -training >> step=3866100, episode=645 reward=0.7748206 (585.25 it/sec) -training >> step=3866200, episode=645 reward=0.7781513 (531.77 it/sec) -training >> step=3866300, episode=645 reward=0.7672324 (504.07 it/sec) -training >> step=3866400, episode=645 reward=0.7700274 (533.83 it/sec) -training >> step=3866500, episode=645 reward=0.7842014 (516.09 it/sec) -training >> step=3866600, episode=645 reward=0.7771644 (519.09 it/sec) -training >> step=3866700, episode=645 reward=0.7951256 (511.43 it/sec) -training >> step=3866800, episode=645 reward=0.7759577 (513.90 it/sec) -training >> step=3866900, episode=645 reward=0.7848933 (507.49 it/sec) -training >> step=3867000, episode=645 reward=0.7939946 (457.56 it/sec) -training >> step=3867100, episode=645 reward=0.7509659 (503.68 it/sec) -training >> step=3867200, episode=645 reward=0.7772253 (563.05 it/sec) -training >> step=3867300, episode=645 reward=0.7793081 (507.11 it/sec) -training >> step=3867400, episode=645 reward=0.7906134 (459.74 it/sec) -training >> step=3867500, episode=645 reward=0.7756764 (459.84 it/sec) -training >> step=3867600, episode=645 reward=0.7698431 (504.79 it/sec) -training >> step=3867700, episode=645 reward=0.7736169 (529.42 it/sec) -training >> step=3867800, episode=645 reward=0.7878978 (536.05 it/sec) -training >> step=3867900, episode=645 reward=0.7655002 (476.08 it/sec) -training >> step=3868000, episode=645 reward=0.7693198 (411.20 it/sec) -training >> step=3868100, episode=645 reward=0.7790248 (462.79 it/sec) -training >> step=3868200, episode=645 reward=0.7747846 (535.15 it/sec) -training >> step=3868300, episode=645 reward=0.7551972 (552.39 it/sec) -training >> step=3868400, episode=645 reward=0.7526628 (501.83 it/sec) -training >> step=3868500, episode=645 reward=0.7728109 (454.04 it/sec) -training >> step=3868600, episode=645 reward=0.774446 (485.79 it/sec) -training >> step=3868700, episode=645 reward=0.781894 (392.83 it/sec) -training >> step=3868800, episode=645 reward=0.762508 (504.89 it/sec) -training >> step=3868900, episode=645 reward=0.7883706 (492.84 it/sec) -training >> step=3869000, episode=645 reward=0.7544793 (525.38 it/sec) -training >> step=3869100, episode=645 reward=0.8112037 (464.73 it/sec) -training >> step=3869200, episode=645 reward=0.7467147 (524.36 it/sec) -training >> step=3869300, episode=646 reward=0.7877234 (66.90 it/sec) -training >> step=3869400, episode=646 reward=0.7648793 (509.98 it/sec) -training >> step=3869500, episode=646 reward=0.7586022 (440.14 it/sec) -training >> step=3869600, episode=646 reward=0.7466122 (518.10 it/sec) -training >> step=3869700, episode=646 reward=0.770611 (528.79 it/sec) -training >> step=3869800, episode=646 reward=0.7647738 (505.62 it/sec) -training >> step=3869900, episode=646 reward=0.7862719 (566.64 it/sec) -training >> step=3870000, episode=646 reward=0.752263 (492.53 it/sec) -training >> step=3870100, episode=646 reward=0.7545362 (522.39 it/sec) -training >> step=3870200, episode=646 reward=0.7650728 (510.46 it/sec) -training >> step=3870300, episode=646 reward=0.7786086 (464.38 it/sec) -training >> step=3870400, episode=646 reward=0.7848073 (497.05 it/sec) -training >> step=3870500, episode=646 reward=0.774043 (483.93 it/sec) -training >> step=3870600, episode=646 reward=0.7773467 (482.87 it/sec) -training >> step=3870700, episode=646 reward=0.7796606 (504.54 it/sec) -training >> step=3870800, episode=646 reward=0.7752439 (514.28 it/sec) -training >> step=3870900, episode=646 reward=0.7736565 (464.50 it/sec) -training >> step=3871000, episode=646 reward=0.7569331 (475.83 it/sec) -training >> step=3871100, episode=646 reward=0.7650414 (508.26 it/sec) -training >> step=3871200, episode=646 reward=0.790536 (501.43 it/sec) -training >> step=3871300, episode=646 reward=0.7983717 (454.11 it/sec) -training >> step=3871400, episode=646 reward=0.7758968 (448.02 it/sec) -training >> step=3871500, episode=646 reward=0.7567581 (502.76 it/sec) -training >> step=3871600, episode=646 reward=0.7781613 (521.92 it/sec) -training >> step=3871700, episode=646 reward=0.7600774 (484.21 it/sec) -training >> step=3871800, episode=646 reward=0.7843487 (509.09 it/sec) -training >> step=3871900, episode=646 reward=0.763281 (492.46 it/sec) -training >> step=3872000, episode=646 reward=0.782575 (477.10 it/sec) -training >> step=3872100, episode=646 reward=0.7737932 (459.55 it/sec) -training >> step=3872200, episode=646 reward=0.7907783 (510.25 it/sec) -training >> step=3872300, episode=646 reward=0.775006 (543.40 it/sec) -training >> step=3872400, episode=646 reward=0.789241 (493.34 it/sec) -training >> step=3872500, episode=646 reward=0.7847378 (486.92 it/sec) -training >> step=3872600, episode=646 reward=0.7743573 (487.34 it/sec) -training >> step=3872700, episode=646 reward=0.7769785 (408.99 it/sec) -training >> step=3872800, episode=646 reward=0.7799835 (457.98 it/sec) -training >> step=3872900, episode=646 reward=0.7777429 (479.57 it/sec) -training >> step=3873000, episode=646 reward=0.7752387 (510.87 it/sec) -training >> step=3873100, episode=646 reward=0.785808 (478.33 it/sec) -training >> step=3873200, episode=646 reward=0.7625404 (443.65 it/sec) -training >> step=3873300, episode=646 reward=0.7728878 (476.62 it/sec) -training >> step=3873400, episode=646 reward=0.7939149 (494.55 it/sec) -training >> step=3873500, episode=646 reward=0.7636562 (479.84 it/sec) -training >> step=3873600, episode=646 reward=0.7766041 (469.00 it/sec) -training >> step=3873700, episode=646 reward=0.7921067 (442.35 it/sec) -training >> step=3873800, episode=646 reward=0.7694658 (413.79 it/sec) -training >> step=3873900, episode=646 reward=0.7704999 (465.11 it/sec) -training >> step=3874000, episode=646 reward=0.7797142 (475.38 it/sec) -training >> step=3874100, episode=646 reward=0.7630681 (474.77 it/sec) -training >> step=3874200, episode=646 reward=0.7674758 (464.55 it/sec) -training >> step=3874300, episode=646 reward=0.7730964 (508.34 it/sec) -training >> step=3874400, episode=646 reward=0.7714009 (544.89 it/sec) -training >> step=3874500, episode=646 reward=0.7640499 (527.19 it/sec) -training >> step=3874600, episode=646 reward=0.7595354 (480.97 it/sec) -training >> step=3874700, episode=646 reward=0.7765696 (476.61 it/sec) -training >> step=3874800, episode=646 reward=0.784463 (439.37 it/sec) -training >> step=3874900, episode=646 reward=0.7779366 (332.60 it/sec) -training >> step=3875000, episode=646 reward=0.7768089 (502.47 it/sec) -training >> step=3875100, episode=646 reward=0.7590022 (511.06 it/sec) -training >> step=3875200, episode=646 reward=0.7820451 (554.33 it/sec) -training >> step=3875300, episode=647 reward=0.7536058 (98.58 it/sec) -training >> step=3875400, episode=647 reward=0.776145 (522.85 it/sec) -training >> step=3875500, episode=647 reward=0.7599243 (478.84 it/sec) -training >> step=3875600, episode=647 reward=0.7876768 (531.99 it/sec) -training >> step=3875700, episode=647 reward=0.774809 (493.96 it/sec) -training >> step=3875800, episode=647 reward=0.7943887 (531.41 it/sec) -training >> step=3875900, episode=647 reward=0.7767026 (456.43 it/sec) -training >> step=3876000, episode=647 reward=0.790563 (471.04 it/sec) -training >> step=3876100, episode=647 reward=0.7523617 (533.72 it/sec) -training >> step=3876200, episode=647 reward=0.7904192 (512.95 it/sec) -training >> step=3876300, episode=647 reward=0.7822098 (503.46 it/sec) -training >> step=3876400, episode=647 reward=0.7701401 (514.41 it/sec) -training >> step=3876500, episode=647 reward=0.7636614 (537.15 it/sec) -training >> step=3876600, episode=647 reward=0.7708966 (487.12 it/sec) -training >> step=3876700, episode=647 reward=0.7607728 (516.03 it/sec) -training >> step=3876800, episode=647 reward=0.7744445 (475.07 it/sec) -training >> step=3876900, episode=647 reward=0.7678326 (480.06 it/sec) -training >> step=3877000, episode=647 reward=0.7690765 (547.96 it/sec) -training >> step=3877100, episode=647 reward=0.7778209 (499.67 it/sec) -training >> step=3877200, episode=647 reward=0.7755776 (507.26 it/sec) -training >> step=3877300, episode=647 reward=0.770592 (457.10 it/sec) -training >> step=3877400, episode=647 reward=0.7566569 (489.40 it/sec) -training >> step=3877500, episode=647 reward=0.7597646 (523.67 it/sec) -training >> step=3877600, episode=647 reward=0.7723494 (486.04 it/sec) -training >> step=3877700, episode=647 reward=0.7873327 (468.84 it/sec) -training >> step=3877800, episode=647 reward=0.765013 (464.92 it/sec) -training >> step=3877900, episode=647 reward=0.7869192 (465.81 it/sec) -training >> step=3878000, episode=647 reward=0.7845356 (542.66 it/sec) -training >> step=3878100, episode=647 reward=0.758574 (478.95 it/sec) -training >> step=3878200, episode=647 reward=0.7830666 (479.06 it/sec) -training >> step=3878300, episode=647 reward=0.7913209 (489.48 it/sec) -training >> step=3878400, episode=647 reward=0.7709721 (469.94 it/sec) -training >> step=3878500, episode=647 reward=0.783964 (552.57 it/sec) -training >> step=3878600, episode=647 reward=0.7980486 (453.91 it/sec) -training >> step=3878700, episode=647 reward=0.7569761 (542.17 it/sec) -training >> step=3878800, episode=647 reward=0.7775434 (508.79 it/sec) -training >> step=3878900, episode=647 reward=0.7806318 (515.63 it/sec) -training >> step=3879000, episode=647 reward=0.7868515 (564.35 it/sec) -training >> step=3879100, episode=647 reward=0.7951771 (528.70 it/sec) -training >> step=3879200, episode=647 reward=0.7679849 (522.09 it/sec) -training >> step=3879300, episode=647 reward=0.7546028 (531.97 it/sec) -training >> step=3879400, episode=647 reward=0.7866706 (566.10 it/sec) -training >> step=3879500, episode=647 reward=0.7822663 (498.37 it/sec) -training >> step=3879600, episode=647 reward=0.7901093 (536.06 it/sec) -training >> step=3879700, episode=647 reward=0.7752615 (523.13 it/sec) -training >> step=3879800, episode=647 reward=0.7700303 (537.13 it/sec) -training >> step=3879900, episode=647 reward=0.765348 (489.36 it/sec) -training >> step=3880000, episode=647 reward=0.7597027 (530.94 it/sec) -training >> step=3880100, episode=647 reward=0.7749935 (548.09 it/sec) -training >> step=3880200, episode=647 reward=0.765031 (523.38 it/sec) -training >> step=3880300, episode=647 reward=0.7597518 (496.42 it/sec) -training >> step=3880400, episode=647 reward=0.7768325 (488.91 it/sec) -training >> step=3880500, episode=647 reward=0.7838848 (539.21 it/sec) -training >> step=3880600, episode=647 reward=0.7636853 (526.85 it/sec) -training >> step=3880700, episode=647 reward=0.782289 (490.00 it/sec) -training >> step=3880800, episode=647 reward=0.758953 (518.22 it/sec) -training >> step=3880900, episode=647 reward=0.7714103 (510.83 it/sec) -training >> step=3881000, episode=647 reward=0.7846602 (391.84 it/sec) -training >> step=3881100, episode=647 reward=0.7621673 (541.73 it/sec) -training >> step=3881200, episode=647 reward=0.7865754 (552.89 it/sec) -training >> step=3881300, episode=648 reward=0.7667159 (139.53 it/sec) -training >> step=3881400, episode=648 reward=0.764686 (534.65 it/sec) -training >> step=3881500, episode=648 reward=0.7755388 (507.76 it/sec) -training >> step=3881600, episode=648 reward=0.7803724 (550.25 it/sec) -training >> step=3881700, episode=648 reward=0.7778332 (508.91 it/sec) -training >> step=3881800, episode=648 reward=0.7838728 (500.00 it/sec) -training >> step=3881900, episode=648 reward=0.763386 (544.03 it/sec) -training >> step=3882000, episode=648 reward=0.77091 (546.95 it/sec) -training >> step=3882100, episode=648 reward=0.7797543 (446.15 it/sec) -training >> step=3882200, episode=648 reward=0.7879689 (473.96 it/sec) -training >> step=3882300, episode=648 reward=0.7830516 (525.18 it/sec) -training >> step=3882400, episode=648 reward=0.7821837 (475.50 it/sec) -training >> step=3882500, episode=648 reward=0.7760352 (514.43 it/sec) -training >> step=3882600, episode=648 reward=0.781186 (513.58 it/sec) -training >> step=3882700, episode=648 reward=0.7716978 (501.88 it/sec) -training >> step=3882800, episode=648 reward=0.7726049 (512.40 it/sec) -training >> step=3882900, episode=648 reward=0.7632014 (513.23 it/sec) -training >> step=3883000, episode=648 reward=0.7754892 (530.15 it/sec) -training >> step=3883100, episode=648 reward=0.7797549 (466.19 it/sec) -training >> step=3883200, episode=648 reward=0.7962914 (507.55 it/sec) -training >> step=3883300, episode=648 reward=0.7658793 (538.96 it/sec) -training >> step=3883400, episode=648 reward=0.757126 (540.18 it/sec) -training >> step=3883500, episode=648 reward=0.7676327 (377.20 it/sec) -training >> step=3883600, episode=648 reward=0.7808497 (473.54 it/sec) -training >> step=3883700, episode=648 reward=0.7707031 (500.70 it/sec) -training >> step=3883800, episode=648 reward=0.7707858 (476.75 it/sec) -training >> step=3883900, episode=648 reward=0.7840329 (415.48 it/sec) -training >> step=3884000, episode=648 reward=0.7660273 (432.28 it/sec) -training >> step=3884100, episode=648 reward=0.7931847 (378.88 it/sec) -training >> step=3884200, episode=648 reward=0.7714344 (407.43 it/sec) -training >> step=3884300, episode=648 reward=0.7742771 (448.95 it/sec) -training >> step=3884400, episode=648 reward=0.7765436 (431.10 it/sec) -training >> step=3884500, episode=648 reward=0.7680435 (415.58 it/sec) -training >> step=3884600, episode=648 reward=0.7830323 (491.83 it/sec) -training >> step=3884700, episode=648 reward=0.775354 (490.35 it/sec) -training >> step=3884800, episode=648 reward=0.7713752 (542.28 it/sec) -training >> step=3884900, episode=648 reward=0.762968 (521.88 it/sec) -training >> step=3885000, episode=648 reward=0.7677491 (506.29 it/sec) -training >> step=3885100, episode=648 reward=0.7594735 (504.83 it/sec) -training >> step=3885200, episode=648 reward=0.7520994 (541.23 it/sec) -training >> step=3885300, episode=648 reward=0.7636901 (503.84 it/sec) -training >> step=3885400, episode=648 reward=0.7697038 (512.26 it/sec) -training >> step=3885500, episode=648 reward=0.7802331 (426.93 it/sec) -training >> step=3885600, episode=648 reward=0.7711776 (516.43 it/sec) -training >> step=3885700, episode=648 reward=0.7535417 (517.19 it/sec) -training >> step=3885800, episode=648 reward=0.7847388 (507.04 it/sec) -training >> step=3885900, episode=648 reward=0.7628821 (516.53 it/sec) -training >> step=3886000, episode=648 reward=0.7691206 (437.80 it/sec) -training >> step=3886100, episode=648 reward=0.7657375 (512.24 it/sec) -training >> step=3886200, episode=648 reward=0.7942427 (464.74 it/sec) -training >> step=3886300, episode=648 reward=0.7679951 (465.16 it/sec) -training >> step=3886400, episode=648 reward=0.7714302 (531.24 it/sec) -training >> step=3886500, episode=648 reward=0.7814565 (489.85 it/sec) -training >> step=3886600, episode=648 reward=0.793063 (526.96 it/sec) -training >> step=3886700, episode=648 reward=0.77353 (537.28 it/sec) -training >> step=3886800, episode=648 reward=0.7759492 (517.34 it/sec) -training >> step=3886900, episode=648 reward=0.7767211 (524.46 it/sec) -training >> step=3887000, episode=648 reward=0.7785481 (480.36 it/sec) -training >> step=3887100, episode=648 reward=0.7693951 (467.62 it/sec) -training >> step=3887200, episode=648 reward=0.7608824 (368.81 it/sec) -training >> step=3887300, episode=649 reward=0.7898288 (58.29 it/sec) -training >> step=3887400, episode=649 reward=0.7711262 (505.36 it/sec) -training >> step=3887500, episode=649 reward=0.7732639 (496.69 it/sec) -training >> step=3887600, episode=649 reward=0.7821591 (526.48 it/sec) -training >> step=3887700, episode=649 reward=0.7526439 (518.26 it/sec) -training >> step=3887800, episode=649 reward=0.7844599 (473.92 it/sec) -training >> step=3887900, episode=649 reward=0.7801961 (511.39 it/sec) -training >> step=3888000, episode=649 reward=0.7784531 (527.19 it/sec) -training >> step=3888100, episode=649 reward=0.7735692 (530.98 it/sec) -training >> step=3888200, episode=649 reward=0.7668167 (539.03 it/sec) -training >> step=3888300, episode=649 reward=0.7580322 (467.19 it/sec) -training >> step=3888400, episode=649 reward=0.7740039 (514.45 it/sec) -training >> step=3888500, episode=649 reward=0.7765819 (544.80 it/sec) -training >> step=3888600, episode=649 reward=0.7678291 (498.38 it/sec) -training >> step=3888700, episode=649 reward=0.7943083 (533.97 it/sec) -training >> step=3888800, episode=649 reward=0.7679107 (541.53 it/sec) -training >> step=3888900, episode=649 reward=0.7799715 (498.42 it/sec) -training >> step=3889000, episode=649 reward=0.7847728 (514.78 it/sec) -training >> step=3889100, episode=649 reward=0.7669052 (541.13 it/sec) -training >> step=3889200, episode=649 reward=0.7716303 (539.97 it/sec) -training >> step=3889300, episode=649 reward=0.7720896 (523.68 it/sec) -training >> step=3889400, episode=649 reward=0.7481644 (509.97 it/sec) -training >> step=3889500, episode=649 reward=0.7750165 (549.38 it/sec) -training >> step=3889600, episode=649 reward=0.7636999 (531.19 it/sec) -training >> step=3889700, episode=649 reward=0.7742609 (511.91 it/sec) -training >> step=3889800, episode=649 reward=0.7693807 (554.28 it/sec) -training >> step=3889900, episode=649 reward=0.767374 (513.98 it/sec) -training >> step=3890000, episode=649 reward=0.7831701 (501.89 it/sec) -training >> step=3890100, episode=649 reward=0.7516028 (531.37 it/sec) -training >> step=3890200, episode=649 reward=0.7921128 (541.16 it/sec) -training >> step=3890300, episode=649 reward=0.7834564 (521.11 it/sec) -training >> step=3890400, episode=649 reward=0.7870404 (532.34 it/sec) -training >> step=3890500, episode=649 reward=0.7670012 (480.07 it/sec) -training >> step=3890600, episode=649 reward=0.7925451 (520.41 it/sec) -training >> step=3890700, episode=649 reward=0.770131 (506.23 it/sec) -training >> step=3890800, episode=649 reward=0.7846895 (432.79 it/sec) -training >> step=3890900, episode=649 reward=0.7784674 (541.72 it/sec) -training >> step=3891000, episode=649 reward=0.7639114 (511.55 it/sec) -training >> step=3891100, episode=649 reward=0.7840888 (494.01 it/sec) -training >> step=3891200, episode=649 reward=0.7826726 (486.86 it/sec) -training >> step=3891300, episode=649 reward=0.7721822 (537.95 it/sec) -training >> step=3891400, episode=649 reward=0.7696342 (502.85 it/sec) -training >> step=3891500, episode=649 reward=0.7917073 (508.47 it/sec) -training >> step=3891600, episode=649 reward=0.7695365 (515.05 it/sec) -training >> step=3891700, episode=649 reward=0.7652687 (515.69 it/sec) -training >> step=3891800, episode=649 reward=0.7539792 (538.75 it/sec) -training >> step=3891900, episode=649 reward=0.7845237 (516.13 it/sec) -training >> step=3892000, episode=649 reward=0.7812775 (535.26 it/sec) -training >> step=3892100, episode=649 reward=0.7772359 (485.64 it/sec) -training >> step=3892200, episode=649 reward=0.7763325 (524.38 it/sec) -training >> step=3892300, episode=649 reward=0.7701851 (491.46 it/sec) -training >> step=3892400, episode=649 reward=0.7631464 (519.40 it/sec) -training >> step=3892500, episode=649 reward=0.7665263 (525.65 it/sec) -training >> step=3892600, episode=649 reward=0.7783338 (476.13 it/sec) -training >> step=3892700, episode=649 reward=0.7520836 (530.13 it/sec) -training >> step=3892800, episode=649 reward=0.7579045 (543.73 it/sec) -training >> step=3892900, episode=649 reward=0.7724375 (535.80 it/sec) -training >> step=3893000, episode=649 reward=0.7491126 (546.18 it/sec) -training >> step=3893100, episode=649 reward=0.7812308 (557.85 it/sec) -training >> step=3893200, episode=649 reward=0.7719172 (484.73 it/sec) -training >> step=3893300, episode=650 reward=0.7631272 (70.33 it/sec) -training >> step=3893400, episode=650 reward=0.7637584 (511.82 it/sec) -training >> step=3893500, episode=650 reward=0.7688486 (448.16 it/sec) -training >> step=3893600, episode=650 reward=0.7839102 (493.78 it/sec) -training >> step=3893700, episode=650 reward=0.7839314 (522.30 it/sec) -training >> step=3893800, episode=650 reward=0.7837164 (485.37 it/sec) -training >> step=3893900, episode=650 reward=0.7775661 (516.46 it/sec) -training >> step=3894000, episode=650 reward=0.7763417 (544.16 it/sec) -training >> step=3894100, episode=650 reward=0.7735316 (501.64 it/sec) -training >> step=3894200, episode=650 reward=0.7811627 (504.97 it/sec) -training >> step=3894300, episode=650 reward=0.7699926 (489.58 it/sec) -training >> step=3894400, episode=650 reward=0.7770653 (509.43 it/sec) -training >> step=3894500, episode=650 reward=0.7656203 (507.91 it/sec) -training >> step=3894600, episode=650 reward=0.7763528 (475.71 it/sec) -training >> step=3894700, episode=650 reward=0.7795593 (503.92 it/sec) -training >> step=3894800, episode=650 reward=0.7800057 (475.94 it/sec) -training >> step=3894900, episode=650 reward=0.7752507 (525.42 it/sec) -training >> step=3895000, episode=650 reward=0.7642931 (514.76 it/sec) -training >> step=3895100, episode=650 reward=0.7745215 (481.17 it/sec) -training >> step=3895200, episode=650 reward=0.7656984 (528.05 it/sec) -training >> step=3895300, episode=650 reward=0.773645 (532.52 it/sec) -training >> step=3895400, episode=650 reward=0.7645056 (521.28 it/sec) -training >> step=3895500, episode=650 reward=0.768687 (520.62 it/sec) -training >> step=3895600, episode=650 reward=0.790193 (526.11 it/sec) -training >> step=3895700, episode=650 reward=0.7749721 (500.00 it/sec) -training >> step=3895800, episode=650 reward=0.7578123 (467.11 it/sec) -training >> step=3895900, episode=650 reward=0.7963065 (495.81 it/sec) -training >> step=3896000, episode=650 reward=0.793615 (527.74 it/sec) -training >> step=3896100, episode=650 reward=0.7734846 (502.64 it/sec) -training >> step=3896200, episode=650 reward=0.7914369 (498.56 it/sec) -training >> step=3896300, episode=650 reward=0.7702036 (527.29 it/sec) -training >> step=3896400, episode=650 reward=0.7749901 (511.71 it/sec) -training >> step=3896500, episode=650 reward=0.7823058 (520.74 it/sec) -training >> step=3896600, episode=650 reward=0.785846 (516.37 it/sec) -training >> step=3896700, episode=650 reward=0.7637134 (529.72 it/sec) -training >> step=3896800, episode=650 reward=0.7864366 (467.44 it/sec) -training >> step=3896900, episode=650 reward=0.7586857 (525.33 it/sec) -training >> step=3897000, episode=650 reward=0.7600742 (483.36 it/sec) -training >> step=3897100, episode=650 reward=0.7967845 (479.87 it/sec) -training >> step=3897200, episode=650 reward=0.772632 (458.29 it/sec) -training >> step=3897300, episode=650 reward=0.7839945 (471.60 it/sec) -training >> step=3897400, episode=650 reward=0.7764767 (515.91 it/sec) -training >> step=3897500, episode=650 reward=0.7672042 (530.09 it/sec) -training >> step=3897600, episode=650 reward=0.775278 (493.60 it/sec) -training >> step=3897700, episode=650 reward=0.7802709 (442.59 it/sec) -training >> step=3897800, episode=650 reward=0.7834437 (539.39 it/sec) -training >> step=3897900, episode=650 reward=0.7862594 (473.62 it/sec) -training >> step=3898000, episode=650 reward=0.7878465 (552.38 it/sec) -training >> step=3898100, episode=650 reward=0.7777379 (526.70 it/sec) -training >> step=3898200, episode=650 reward=0.7714574 (550.44 it/sec) -training >> step=3898300, episode=650 reward=0.768883 (507.93 it/sec) -training >> step=3898400, episode=650 reward=0.7580116 (517.74 it/sec) -training >> step=3898500, episode=650 reward=0.7811785 (547.93 it/sec) -training >> step=3898600, episode=650 reward=0.7778357 (536.56 it/sec) -training >> step=3898700, episode=650 reward=0.7827188 (515.63 it/sec) -training >> step=3898800, episode=650 reward=0.7672146 (496.05 it/sec) -training >> step=3898900, episode=650 reward=0.7561359 (574.11 it/sec) -training >> step=3899000, episode=650 reward=0.7563379 (510.97 it/sec) -training >> step=3899100, episode=650 reward=0.7478927 (526.51 it/sec) -training >> step=3899200, episode=650 reward=0.7868572 (528.06 it/sec) -training >> step=3899300, episode=651 reward=0.77123 (55.79 it/sec) -training >> step=3899400, episode=651 reward=0.7683449 (491.03 it/sec) -training >> step=3899500, episode=651 reward=0.7753624 (503.59 it/sec) -training >> step=3899600, episode=651 reward=0.7805482 (436.28 it/sec) -training >> step=3899700, episode=651 reward=0.7465256 (524.53 it/sec) -training >> step=3899800, episode=651 reward=0.7784942 (527.62 it/sec) -training >> step=3899900, episode=651 reward=0.7830917 (467.04 it/sec) -training >> step=3900000, episode=651 reward=0.7610054 (472.63 it/sec) -training >> step=3900100, episode=651 reward=0.7663839 (432.24 it/sec) -training >> step=3900200, episode=651 reward=0.7830037 (471.63 it/sec) -training >> step=3900300, episode=651 reward=0.7974156 (505.96 it/sec) -training >> step=3900400, episode=651 reward=0.7776807 (474.08 it/sec) -training >> step=3900500, episode=651 reward=0.7635993 (435.92 it/sec) -training >> step=3900600, episode=651 reward=0.7643166 (492.66 it/sec) -training >> step=3900700, episode=651 reward=0.7897448 (500.80 it/sec) -training >> step=3900800, episode=651 reward=0.7803995 (545.91 it/sec) -training >> step=3900900, episode=651 reward=0.7741299 (465.87 it/sec) -training >> step=3901000, episode=651 reward=0.7770712 (498.07 it/sec) -training >> step=3901100, episode=651 reward=0.7740023 (461.26 it/sec) -training >> step=3901200, episode=651 reward=0.77693 (440.30 it/sec) -training >> step=3901300, episode=651 reward=0.7554007 (468.39 it/sec) -training >> step=3901400, episode=651 reward=0.7724017 (539.52 it/sec) -training >> step=3901500, episode=651 reward=0.7628363 (456.79 it/sec) -training >> step=3901600, episode=651 reward=0.7725736 (451.63 it/sec) -training >> step=3901700, episode=651 reward=0.774019 (513.05 it/sec) -training >> step=3901800, episode=651 reward=0.7595356 (507.52 it/sec) -training >> step=3901900, episode=651 reward=0.766751 (470.62 it/sec) -training >> step=3902000, episode=651 reward=0.772841 (438.43 it/sec) -training >> step=3902100, episode=651 reward=0.7830746 (465.16 it/sec) -training >> step=3902200, episode=651 reward=0.7724929 (442.45 it/sec) -training >> step=3902300, episode=651 reward=0.7776527 (470.51 it/sec) -training >> step=3902400, episode=651 reward=0.7689836 (497.05 it/sec) -training >> step=3902500, episode=651 reward=0.7678588 (472.03 it/sec) -training >> step=3902600, episode=651 reward=0.7788115 (473.62 it/sec) -training >> step=3902700, episode=651 reward=0.7709936 (472.04 it/sec) -training >> step=3902800, episode=651 reward=0.7914251 (473.73 it/sec) -training >> step=3902900, episode=651 reward=0.7830031 (461.04 it/sec) -training >> step=3903000, episode=651 reward=0.759667 (445.90 it/sec) -training >> step=3903100, episode=651 reward=0.778501 (487.83 it/sec) -training >> step=3903200, episode=651 reward=0.7927375 (474.78 it/sec) -training >> step=3903300, episode=651 reward=0.7777604 (466.72 it/sec) -training >> step=3903400, episode=651 reward=0.7797963 (423.82 it/sec) -training >> step=3903500, episode=651 reward=0.7822478 (459.80 it/sec) -training >> step=3903600, episode=651 reward=0.7854893 (479.90 it/sec) -training >> step=3903700, episode=651 reward=0.7809678 (493.39 it/sec) -training >> step=3903800, episode=651 reward=0.7907382 (475.76 it/sec) -training >> step=3903900, episode=651 reward=0.7707504 (472.39 it/sec) -training >> step=3904000, episode=651 reward=0.7604532 (443.11 it/sec) -training >> step=3904100, episode=651 reward=0.7711376 (480.49 it/sec) -training >> step=3904200, episode=651 reward=0.7524841 (512.29 it/sec) -training >> step=3904300, episode=651 reward=0.7919555 (474.78 it/sec) -training >> step=3904400, episode=651 reward=0.7691181 (445.09 it/sec) -training >> step=3904500, episode=651 reward=0.7796379 (433.56 it/sec) -training >> step=3904600, episode=651 reward=0.7627211 (406.58 it/sec) -training >> step=3904700, episode=651 reward=0.7602739 (481.18 it/sec) -training >> step=3904800, episode=651 reward=0.7791182 (474.64 it/sec) -training >> step=3904900, episode=651 reward=0.772207 (451.26 it/sec) -training >> step=3905000, episode=651 reward=0.7661607 (471.13 it/sec) -training >> step=3905100, episode=651 reward=0.7431192 (461.94 it/sec) -training >> step=3905200, episode=651 reward=0.781287 (524.91 it/sec) -training >> step=3905300, episode=652 reward=0.7622556 (52.64 it/sec) -training >> step=3905400, episode=652 reward=0.7691991 (501.11 it/sec) -training >> step=3905500, episode=652 reward=0.7908549 (544.13 it/sec) -training >> step=3905600, episode=652 reward=0.7816058 (521.91 it/sec) -training >> step=3905700, episode=652 reward=0.7494375 (431.39 it/sec) -training >> step=3905800, episode=652 reward=0.7841212 (508.49 it/sec) -training >> step=3905900, episode=652 reward=0.77525 (529.21 it/sec) -training >> step=3906000, episode=652 reward=0.7681277 (485.33 it/sec) -training >> step=3906100, episode=652 reward=0.7820035 (500.69 it/sec) -training >> step=3906200, episode=652 reward=0.7559211 (481.51 it/sec) -training >> step=3906300, episode=652 reward=0.7657628 (566.43 it/sec) -training >> step=3906400, episode=652 reward=0.7824984 (493.91 it/sec) -training >> step=3906500, episode=652 reward=0.759475 (516.17 it/sec) -training >> step=3906600, episode=652 reward=0.7677901 (515.50 it/sec) -training >> step=3906700, episode=652 reward=0.7768767 (522.82 it/sec) -training >> step=3906800, episode=652 reward=0.7736084 (497.10 it/sec) -training >> step=3906900, episode=652 reward=0.768656 (482.86 it/sec) -training >> step=3907000, episode=652 reward=0.7796364 (523.75 it/sec) -training >> step=3907100, episode=652 reward=0.7597762 (470.86 it/sec) -training >> step=3907200, episode=652 reward=0.7816368 (493.16 it/sec) -training >> step=3907300, episode=652 reward=0.7772228 (481.07 it/sec) -training >> step=3907400, episode=652 reward=0.7700441 (494.74 it/sec) -training >> step=3907500, episode=652 reward=0.7712241 (473.21 it/sec) -training >> step=3907600, episode=652 reward=0.7977818 (457.10 it/sec) -training >> step=3907700, episode=652 reward=0.7870015 (493.84 it/sec) -training >> step=3907800, episode=652 reward=0.759616 (495.17 it/sec) -training >> step=3907900, episode=652 reward=0.7698509 (521.89 it/sec) -training >> step=3908000, episode=652 reward=0.7598742 (462.43 it/sec) -training >> step=3908100, episode=652 reward=0.7528741 (471.03 it/sec) -training >> step=3908200, episode=652 reward=0.7793633 (503.46 it/sec) -training >> step=3908300, episode=652 reward=0.7774543 (497.10 it/sec) -training >> step=3908400, episode=652 reward=0.7832517 (484.64 it/sec) -training >> step=3908500, episode=652 reward=0.77878 (466.48 it/sec) -training >> step=3908600, episode=652 reward=0.7741374 (360.96 it/sec) -training >> step=3908700, episode=652 reward=0.7524069 (498.38 it/sec) -training >> step=3908800, episode=652 reward=0.7635816 (480.92 it/sec) -training >> step=3908900, episode=652 reward=0.7732107 (477.00 it/sec) -training >> step=3909000, episode=652 reward=0.7794679 (447.43 it/sec) -training >> step=3909100, episode=652 reward=0.7735856 (476.25 it/sec) -training >> step=3909200, episode=652 reward=0.7750495 (503.99 it/sec) -training >> step=3909300, episode=652 reward=0.7769052 (493.31 it/sec) -training >> step=3909400, episode=652 reward=0.7819247 (477.77 it/sec) -training >> step=3909500, episode=652 reward=0.789234 (518.73 it/sec) -training >> step=3909600, episode=652 reward=0.8026908 (485.65 it/sec) -training >> step=3909700, episode=652 reward=0.7862512 (511.44 it/sec) -training >> step=3909800, episode=652 reward=0.7666154 (527.44 it/sec) -training >> step=3909900, episode=652 reward=0.7870214 (504.56 it/sec) -training >> step=3910000, episode=652 reward=0.75938 (480.43 it/sec) -training >> step=3910100, episode=652 reward=0.7812794 (493.32 it/sec) -training >> step=3910200, episode=652 reward=0.7573147 (523.39 it/sec) -training >> step=3910300, episode=652 reward=0.784317 (541.74 it/sec) -training >> step=3910400, episode=652 reward=0.7775647 (503.77 it/sec) -training >> step=3910500, episode=652 reward=0.7693015 (514.93 it/sec) -training >> step=3910600, episode=652 reward=0.8021858 (483.31 it/sec) -training >> step=3910700, episode=652 reward=0.7687525 (509.57 it/sec) -training >> step=3910800, episode=652 reward=0.7889246 (528.61 it/sec) -training >> step=3910900, episode=652 reward=0.778264 (492.49 it/sec) -training >> step=3911000, episode=652 reward=0.7651109 (518.29 it/sec) -training >> step=3911100, episode=652 reward=0.7640178 (502.19 it/sec) -training >> step=3911200, episode=652 reward=0.7778828 (448.45 it/sec) -training >> step=3911300, episode=653 reward=0.7865235 (76.36 it/sec) -training >> step=3911400, episode=653 reward=0.7660637 (491.63 it/sec) -training >> step=3911500, episode=653 reward=0.7594238 (501.61 it/sec) -training >> step=3911600, episode=653 reward=0.7910731 (476.91 it/sec) -training >> step=3911700, episode=653 reward=0.7797168 (523.58 it/sec) -training >> step=3911800, episode=653 reward=0.7855712 (478.42 it/sec) -training >> step=3911900, episode=653 reward=0.7650034 (510.96 it/sec) -training >> step=3912000, episode=653 reward=0.7887348 (517.48 it/sec) -training >> step=3912100, episode=653 reward=0.7610284 (473.09 it/sec) -training >> step=3912200, episode=653 reward=0.7715531 (533.91 it/sec) -training >> step=3912300, episode=653 reward=0.7746394 (514.91 it/sec) -training >> step=3912400, episode=653 reward=0.7608874 (489.13 it/sec) -training >> step=3912500, episode=653 reward=0.8082734 (457.38 it/sec) -training >> step=3912600, episode=653 reward=0.7993214 (410.88 it/sec) -training >> step=3912700, episode=653 reward=0.7643356 (473.46 it/sec) -training >> step=3912800, episode=653 reward=0.7621776 (548.51 it/sec) -training >> step=3912900, episode=653 reward=0.789497 (484.41 it/sec) -training >> step=3913000, episode=653 reward=0.7795684 (472.13 it/sec) -training >> step=3913100, episode=653 reward=0.7579002 (448.06 it/sec) -training >> step=3913200, episode=653 reward=0.774101 (498.13 it/sec) -training >> step=3913300, episode=653 reward=0.790345 (483.42 it/sec) -training >> step=3913400, episode=653 reward=0.767373 (505.16 it/sec) -training >> step=3913500, episode=653 reward=0.7781122 (552.16 it/sec) -training >> step=3913600, episode=653 reward=0.7823504 (509.02 it/sec) -training >> step=3913700, episode=653 reward=0.7673504 (493.88 it/sec) -training >> step=3913800, episode=653 reward=0.7845231 (507.96 it/sec) -training >> step=3913900, episode=653 reward=0.7820438 (515.07 it/sec) -training >> step=3914000, episode=653 reward=0.7544375 (509.81 it/sec) -training >> step=3914100, episode=653 reward=0.7616371 (510.18 it/sec) -training >> step=3914200, episode=653 reward=0.7703517 (512.94 it/sec) -training >> step=3914300, episode=653 reward=0.7605942 (478.46 it/sec) -training >> step=3914400, episode=653 reward=0.7581965 (514.86 it/sec) -training >> step=3914500, episode=653 reward=0.7552139 (505.82 it/sec) -training >> step=3914600, episode=653 reward=0.7750216 (539.56 it/sec) -training >> step=3914700, episode=653 reward=0.7549117 (507.18 it/sec) -training >> step=3914800, episode=653 reward=0.7693145 (513.42 it/sec) -training >> step=3914900, episode=653 reward=0.7831922 (525.96 it/sec) -training >> step=3915000, episode=653 reward=0.7710199 (472.04 it/sec) -training >> step=3915100, episode=653 reward=0.7613223 (514.64 it/sec) -training >> step=3915200, episode=653 reward=0.775009 (486.24 it/sec) -training >> step=3915300, episode=653 reward=0.773185 (482.71 it/sec) -training >> step=3915400, episode=653 reward=0.7684727 (454.37 it/sec) -training >> step=3915500, episode=653 reward=0.7732592 (490.70 it/sec) -training >> step=3915600, episode=653 reward=0.7830618 (528.13 it/sec) -training >> step=3915700, episode=653 reward=0.7821019 (489.87 it/sec) -training >> step=3915800, episode=653 reward=0.8012543 (534.26 it/sec) -training >> step=3915900, episode=653 reward=0.7845327 (492.85 it/sec) -training >> step=3916000, episode=653 reward=0.7783995 (520.34 it/sec) -training >> step=3916100, episode=653 reward=0.7923485 (432.60 it/sec) -training >> step=3916200, episode=653 reward=0.7588257 (514.28 it/sec) -training >> step=3916300, episode=653 reward=0.7903958 (501.10 it/sec) -training >> step=3916400, episode=653 reward=0.7620811 (475.14 it/sec) -training >> step=3916500, episode=653 reward=0.7691592 (481.55 it/sec) -training >> step=3916600, episode=653 reward=0.7665435 (475.57 it/sec) -training >> step=3916700, episode=653 reward=0.7747895 (457.87 it/sec) -training >> step=3916800, episode=653 reward=0.776759 (474.29 it/sec) -training >> step=3916900, episode=653 reward=0.7740104 (537.21 it/sec) -training >> step=3917000, episode=653 reward=0.7801591 (554.61 it/sec) -training >> step=3917100, episode=653 reward=0.7650005 (565.36 it/sec) -training >> step=3917200, episode=653 reward=0.7651045 (536.52 it/sec) -training >> step=3917300, episode=654 reward=0.792756 (98.03 it/sec) -training >> step=3917400, episode=654 reward=0.7652346 (519.05 it/sec) -training >> step=3917500, episode=654 reward=0.7842678 (522.60 it/sec) -training >> step=3917600, episode=654 reward=0.7726164 (568.71 it/sec) -training >> step=3917700, episode=654 reward=0.7729366 (521.40 it/sec) -training >> step=3917800, episode=654 reward=0.7722559 (566.53 it/sec) -training >> step=3917900, episode=654 reward=0.780947 (508.30 it/sec) -training >> step=3918000, episode=654 reward=0.7767034 (476.85 it/sec) -training >> step=3918100, episode=654 reward=0.7725338 (505.78 it/sec) -training >> step=3918200, episode=654 reward=0.7668661 (534.34 it/sec) -training >> step=3918300, episode=654 reward=0.7813379 (533.86 it/sec) -training >> step=3918400, episode=654 reward=0.7682864 (511.64 it/sec) -training >> step=3918500, episode=654 reward=0.7558541 (512.12 it/sec) -training >> step=3918600, episode=654 reward=0.7826306 (514.93 it/sec) -training >> step=3918700, episode=654 reward=0.7712613 (537.66 it/sec) -training >> step=3918800, episode=654 reward=0.7582194 (518.91 it/sec) -training >> step=3918900, episode=654 reward=0.7707123 (569.53 it/sec) -training >> step=3919000, episode=654 reward=0.7891133 (548.29 it/sec) -training >> step=3919100, episode=654 reward=0.78175 (514.49 it/sec) -training >> step=3919200, episode=654 reward=0.7965382 (510.37 it/sec) -training >> step=3919300, episode=654 reward=0.7722259 (535.55 it/sec) -training >> step=3919400, episode=654 reward=0.7721037 (557.02 it/sec) -training >> step=3919500, episode=654 reward=0.78214 (505.73 it/sec) -training >> step=3919600, episode=654 reward=0.7926981 (561.31 it/sec) -training >> step=3919700, episode=654 reward=0.7672011 (568.81 it/sec) -training >> step=3919800, episode=654 reward=0.7772089 (535.19 it/sec) -training >> step=3919900, episode=654 reward=0.7813407 (548.96 it/sec) -training >> step=3920000, episode=654 reward=0.7760076 (543.40 it/sec) -training >> step=3920100, episode=654 reward=0.7897748 (560.59 it/sec) -training >> step=3920200, episode=654 reward=0.7888334 (546.91 it/sec) -training >> step=3920300, episode=654 reward=0.7866142 (516.40 it/sec) -training >> step=3920400, episode=654 reward=0.7763035 (582.94 it/sec) -training >> step=3920500, episode=654 reward=0.7639601 (539.10 it/sec) -training >> step=3920600, episode=654 reward=0.7637441 (536.78 it/sec) -training >> step=3920700, episode=654 reward=0.788345 (556.25 it/sec) -training >> step=3920800, episode=654 reward=0.755596 (560.19 it/sec) -training >> step=3920900, episode=654 reward=0.7407136 (532.84 it/sec) -training >> step=3921000, episode=654 reward=0.786794 (548.16 it/sec) -training >> step=3921100, episode=654 reward=0.7925411 (538.29 it/sec) -training >> step=3921200, episode=654 reward=0.7820517 (556.29 it/sec) -training >> step=3921300, episode=654 reward=0.774482 (529.62 it/sec) -training >> step=3921400, episode=654 reward=0.7656323 (538.00 it/sec) -training >> step=3921500, episode=654 reward=0.791664 (494.04 it/sec) -training >> step=3921600, episode=654 reward=0.8100152 (511.38 it/sec) -training >> step=3921700, episode=654 reward=0.765413 (504.49 it/sec) -training >> step=3921800, episode=654 reward=0.7791555 (513.68 it/sec) -training >> step=3921900, episode=654 reward=0.7757205 (572.71 it/sec) -training >> step=3922000, episode=654 reward=0.7711347 (494.48 it/sec) -training >> step=3922100, episode=654 reward=0.7820376 (494.81 it/sec) -training >> step=3922200, episode=654 reward=0.7622325 (490.30 it/sec) -training >> step=3922300, episode=654 reward=0.7460139 (472.84 it/sec) -training >> step=3922400, episode=654 reward=0.7913571 (467.60 it/sec) -training >> step=3922500, episode=654 reward=0.7513909 (494.82 it/sec) -training >> step=3922600, episode=654 reward=0.7720862 (460.22 it/sec) -training >> step=3922700, episode=654 reward=0.7789119 (448.01 it/sec) -training >> step=3922800, episode=654 reward=0.7883638 (495.44 it/sec) -training >> step=3922900, episode=654 reward=0.7616486 (531.82 it/sec) -training >> step=3923000, episode=654 reward=0.773907 (540.05 it/sec) -training >> step=3923100, episode=654 reward=0.7755255 (522.63 it/sec) -training >> step=3923200, episode=654 reward=0.7867677 (492.34 it/sec) -training >> step=3923300, episode=655 reward=0.7670733 (111.96 it/sec) -training >> step=3923400, episode=655 reward=0.7764258 (493.31 it/sec) -training >> step=3923500, episode=655 reward=0.7631363 (486.96 it/sec) -training >> step=3923600, episode=655 reward=0.7629133 (520.46 it/sec) -training >> step=3923700, episode=655 reward=0.7878525 (533.38 it/sec) -training >> step=3923800, episode=655 reward=0.781029 (487.17 it/sec) -training >> step=3923900, episode=655 reward=0.7750127 (492.46 it/sec) -training >> step=3924000, episode=655 reward=0.7814165 (502.37 it/sec) -training >> step=3924100, episode=655 reward=0.7586733 (493.09 it/sec) -training >> step=3924200, episode=655 reward=0.782128 (443.72 it/sec) -training >> step=3924300, episode=655 reward=0.7542198 (494.65 it/sec) -training >> step=3924400, episode=655 reward=0.767251 (540.68 it/sec) -training >> step=3924500, episode=655 reward=0.7837212 (483.62 it/sec) -training >> step=3924600, episode=655 reward=0.7703847 (523.85 it/sec) -training >> step=3924700, episode=655 reward=0.7664297 (508.64 it/sec) -training >> step=3924800, episode=655 reward=0.7709692 (526.23 it/sec) -training >> step=3924900, episode=655 reward=0.7846822 (520.66 it/sec) -training >> step=3925000, episode=655 reward=0.786957 (464.64 it/sec) -training >> step=3925100, episode=655 reward=0.7683878 (483.80 it/sec) -training >> step=3925200, episode=655 reward=0.7668056 (475.75 it/sec) -training >> step=3925300, episode=655 reward=0.7725431 (498.45 it/sec) -training >> step=3925400, episode=655 reward=0.7923322 (505.94 it/sec) -training >> step=3925500, episode=655 reward=0.7787979 (447.80 it/sec) -training >> step=3925600, episode=655 reward=0.7850299 (422.93 it/sec) -training >> step=3925700, episode=655 reward=0.7505209 (472.80 it/sec) -training >> step=3925800, episode=655 reward=0.7704462 (474.80 it/sec) -training >> step=3925900, episode=655 reward=0.7694758 (506.81 it/sec) -training >> step=3926000, episode=655 reward=0.7812437 (433.83 it/sec) -training >> step=3926100, episode=655 reward=0.7733796 (475.52 it/sec) -training >> step=3926200, episode=655 reward=0.7810802 (443.06 it/sec) -training >> step=3926300, episode=655 reward=0.7840736 (448.15 it/sec) -training >> step=3926400, episode=655 reward=0.7718363 (415.74 it/sec) -training >> step=3926500, episode=655 reward=0.7678891 (447.73 it/sec) -training >> step=3926600, episode=655 reward=0.7711338 (479.64 it/sec) -training >> step=3926700, episode=655 reward=0.7902116 (467.95 it/sec) -training >> step=3926800, episode=655 reward=0.7751203 (468.20 it/sec) -training >> step=3926900, episode=655 reward=0.7771442 (461.86 it/sec) -training >> step=3927000, episode=655 reward=0.799343 (463.38 it/sec) -training >> step=3927100, episode=655 reward=0.7741428 (473.36 it/sec) -training >> step=3927200, episode=655 reward=0.7931147 (487.03 it/sec) -training >> step=3927300, episode=655 reward=0.7709986 (442.15 it/sec) -training >> step=3927400, episode=655 reward=0.7539445 (426.96 it/sec) -training >> step=3927500, episode=655 reward=0.7817951 (498.48 it/sec) -training >> step=3927600, episode=655 reward=0.7897722 (486.37 it/sec) -training >> step=3927700, episode=655 reward=0.7752645 (398.08 it/sec) -training >> step=3927800, episode=655 reward=0.7947736 (460.39 it/sec) -training >> step=3927900, episode=655 reward=0.7727333 (447.39 it/sec) -training >> step=3928000, episode=655 reward=0.7933398 (467.48 it/sec) -training >> step=3928100, episode=655 reward=0.7758639 (480.96 it/sec) -training >> step=3928200, episode=655 reward=0.7867995 (446.97 it/sec) -training >> step=3928300, episode=655 reward=0.7798434 (481.89 it/sec) -training >> step=3928400, episode=655 reward=0.7819391 (475.00 it/sec) -training >> step=3928500, episode=655 reward=0.7736539 (493.07 it/sec) -training >> step=3928600, episode=655 reward=0.7704288 (514.63 it/sec) -training >> step=3928700, episode=655 reward=0.7472781 (441.12 it/sec) -training >> step=3928800, episode=655 reward=0.7659988 (419.30 it/sec) -training >> step=3928900, episode=655 reward=0.7454665 (421.15 it/sec) -training >> step=3929000, episode=655 reward=0.7500035 (479.57 it/sec) -training >> step=3929100, episode=655 reward=0.7619797 (452.78 it/sec) -training >> step=3929200, episode=655 reward=0.7805349 (455.46 it/sec) -training >> step=3929300, episode=656 reward=0.7692945 (68.95 it/sec) -training >> step=3929400, episode=656 reward=0.7650265 (444.89 it/sec) -training >> step=3929500, episode=656 reward=0.7855871 (450.23 it/sec) -training >> step=3929600, episode=656 reward=0.7783824 (445.85 it/sec) -training >> step=3929700, episode=656 reward=0.7570044 (465.28 it/sec) -training >> step=3929800, episode=656 reward=0.765601 (460.78 it/sec) -training >> step=3929900, episode=656 reward=0.8002958 (454.90 it/sec) -training >> step=3930000, episode=656 reward=0.7779769 (465.57 it/sec) -training >> step=3930100, episode=656 reward=0.7684113 (455.40 it/sec) -training >> step=3930200, episode=656 reward=0.7851161 (439.86 it/sec) -training >> step=3930300, episode=656 reward=0.7785746 (513.44 it/sec) -training >> step=3930400, episode=656 reward=0.7769805 (436.24 it/sec) -training >> step=3930500, episode=656 reward=0.7759482 (460.78 it/sec) -training >> step=3930600, episode=656 reward=0.7706062 (406.76 it/sec) -training >> step=3930700, episode=656 reward=0.7868242 (441.09 it/sec) -training >> step=3930800, episode=656 reward=0.776949 (480.47 it/sec) -training >> step=3930900, episode=656 reward=0.7680549 (444.27 it/sec) -training >> step=3931000, episode=656 reward=0.8016259 (434.52 it/sec) -training >> step=3931100, episode=656 reward=0.7644407 (491.42 it/sec) -training >> step=3931200, episode=656 reward=0.7940823 (532.30 it/sec) -training >> step=3931300, episode=656 reward=0.7872644 (511.01 it/sec) -training >> step=3931400, episode=656 reward=0.7690332 (497.26 it/sec) -training >> step=3931500, episode=656 reward=0.7786586 (528.34 it/sec) -training >> step=3931600, episode=656 reward=0.7897853 (431.81 it/sec) -training >> step=3931700, episode=656 reward=0.7850446 (484.79 it/sec) -training >> step=3931800, episode=656 reward=0.771257 (473.54 it/sec) -training >> step=3931900, episode=656 reward=0.7818282 (492.96 it/sec) -training >> step=3932000, episode=656 reward=0.7805324 (462.74 it/sec) -training >> step=3932100, episode=656 reward=0.7837163 (475.71 it/sec) -training >> step=3932200, episode=656 reward=0.775514 (505.62 it/sec) -training >> step=3932300, episode=656 reward=0.783387 (431.12 it/sec) -training >> step=3932400, episode=656 reward=0.7813969 (490.02 it/sec) -training >> step=3932500, episode=656 reward=0.7704539 (514.46 it/sec) -training >> step=3932600, episode=656 reward=0.7650284 (518.22 it/sec) -training >> step=3932700, episode=656 reward=0.757681 (521.69 it/sec) -training >> step=3932800, episode=656 reward=0.8017779 (467.20 it/sec) -training >> step=3932900, episode=656 reward=0.7742909 (490.74 it/sec) -training >> step=3933000, episode=656 reward=0.7801039 (524.70 it/sec) -training >> step=3933100, episode=656 reward=0.7915058 (498.37 it/sec) -training >> step=3933200, episode=656 reward=0.7753261 (485.17 it/sec) -training >> step=3933300, episode=656 reward=0.8013634 (443.29 it/sec) -training >> step=3933400, episode=656 reward=0.7889028 (502.44 it/sec) -training >> step=3933500, episode=656 reward=0.7678118 (478.60 it/sec) -training >> step=3933600, episode=656 reward=0.7897179 (500.50 it/sec) -training >> step=3933700, episode=656 reward=0.7503211 (455.50 it/sec) -training >> step=3933800, episode=656 reward=0.7669595 (497.84 it/sec) -training >> step=3933900, episode=656 reward=0.774048 (503.29 it/sec) -training >> step=3934000, episode=656 reward=0.7692927 (517.61 it/sec) -training >> step=3934100, episode=656 reward=0.7837972 (466.78 it/sec) -training >> step=3934200, episode=656 reward=0.7661425 (445.87 it/sec) -training >> step=3934300, episode=656 reward=0.773954 (530.99 it/sec) -training >> step=3934400, episode=656 reward=0.7845458 (525.53 it/sec) -training >> step=3934500, episode=656 reward=0.7851011 (513.91 it/sec) -training >> step=3934600, episode=656 reward=0.7886497 (489.58 it/sec) -training >> step=3934700, episode=656 reward=0.7576929 (540.36 it/sec) -training >> step=3934800, episode=656 reward=0.7753184 (474.19 it/sec) -training >> step=3934900, episode=656 reward=0.7803035 (439.59 it/sec) -training >> step=3935000, episode=656 reward=0.752619 (534.23 it/sec) -training >> step=3935100, episode=656 reward=0.7581018 (473.02 it/sec) -training >> step=3935200, episode=656 reward=0.761917 (481.64 it/sec) -training >> step=3935300, episode=657 reward=0.7550182 (120.89 it/sec) -training >> step=3935400, episode=657 reward=0.7884654 (503.80 it/sec) -training >> step=3935500, episode=657 reward=0.7836516 (485.53 it/sec) -training >> step=3935600, episode=657 reward=0.7741107 (500.35 it/sec) -training >> step=3935700, episode=657 reward=0.7694581 (515.79 it/sec) -training >> step=3935800, episode=657 reward=0.7733546 (464.62 it/sec) -training >> step=3935900, episode=657 reward=0.7507795 (508.37 it/sec) -training >> step=3936000, episode=657 reward=0.7601651 (517.39 it/sec) -training >> step=3936100, episode=657 reward=0.7774995 (469.20 it/sec) -training >> step=3936200, episode=657 reward=0.7762287 (502.92 it/sec) -training >> step=3936300, episode=657 reward=0.7541789 (467.06 it/sec) -training >> step=3936400, episode=657 reward=0.7784847 (481.30 it/sec) -training >> step=3936500, episode=657 reward=0.7736276 (506.17 it/sec) -training >> step=3936600, episode=657 reward=0.7873187 (494.62 it/sec) -training >> step=3936700, episode=657 reward=0.7744755 (508.16 it/sec) -training >> step=3936800, episode=657 reward=0.7707102 (537.86 it/sec) -training >> step=3936900, episode=657 reward=0.7972692 (477.66 it/sec) -training >> step=3937000, episode=657 reward=0.7712123 (513.21 it/sec) -training >> step=3937100, episode=657 reward=0.7666435 (514.92 it/sec) -training >> step=3937200, episode=657 reward=0.7822501 (551.43 it/sec) -training >> step=3937300, episode=657 reward=0.7822675 (516.06 it/sec) -training >> step=3937400, episode=657 reward=0.7760298 (462.66 it/sec) -training >> step=3937500, episode=657 reward=0.7699404 (541.22 it/sec) -training >> step=3937600, episode=657 reward=0.7775949 (509.47 it/sec) -training >> step=3937700, episode=657 reward=0.7589421 (521.20 it/sec) -training >> step=3937800, episode=657 reward=0.7857827 (483.35 it/sec) -training >> step=3937900, episode=657 reward=0.7665351 (496.51 it/sec) -training >> step=3938000, episode=657 reward=0.793996 (523.30 it/sec) -training >> step=3938100, episode=657 reward=0.7837539 (480.83 it/sec) -training >> step=3938200, episode=657 reward=0.8016907 (513.31 it/sec) -training >> step=3938300, episode=657 reward=0.7899326 (520.77 it/sec) -training >> step=3938400, episode=657 reward=0.7864072 (497.92 it/sec) -training >> step=3938500, episode=657 reward=0.7714822 (495.43 it/sec) -training >> step=3938600, episode=657 reward=0.7793821 (491.75 it/sec) -training >> step=3938700, episode=657 reward=0.7741084 (490.85 it/sec) -training >> step=3938800, episode=657 reward=0.7783752 (522.23 it/sec) -training >> step=3938900, episode=657 reward=0.7776728 (484.69 it/sec) -training >> step=3939000, episode=657 reward=0.7853101 (435.01 it/sec) -training >> step=3939100, episode=657 reward=0.7718214 (470.10 it/sec) -training >> step=3939200, episode=657 reward=0.7799366 (505.46 it/sec) -training >> step=3939300, episode=657 reward=0.7957525 (490.09 it/sec) -training >> step=3939400, episode=657 reward=0.7703092 (505.44 it/sec) -training >> step=3939500, episode=657 reward=0.781803 (482.95 it/sec) -training >> step=3939600, episode=657 reward=0.7642522 (472.33 it/sec) -training >> step=3939700, episode=657 reward=0.7700658 (558.91 it/sec) -training >> step=3939800, episode=657 reward=0.7873235 (488.77 it/sec) -training >> step=3939900, episode=657 reward=0.7784789 (512.76 it/sec) -training >> step=3940000, episode=657 reward=0.7826484 (462.00 it/sec) -training >> step=3940100, episode=657 reward=0.7809746 (537.72 it/sec) -training >> step=3940200, episode=657 reward=0.7777058 (500.28 it/sec) -training >> step=3940300, episode=657 reward=0.7628325 (521.75 it/sec) -training >> step=3940400, episode=657 reward=0.7472259 (481.67 it/sec) -training >> step=3940500, episode=657 reward=0.7597179 (480.64 it/sec) -training >> step=3940600, episode=657 reward=0.777878 (452.79 it/sec) -training >> step=3940700, episode=657 reward=0.7464802 (508.40 it/sec) -training >> step=3940800, episode=657 reward=0.7562313 (538.17 it/sec) -training >> step=3940900, episode=657 reward=0.7737531 (509.00 it/sec) -training >> step=3941000, episode=657 reward=0.7364687 (465.95 it/sec) -training >> step=3941100, episode=657 reward=0.7615843 (496.34 it/sec) -training >> step=3941200, episode=657 reward=0.7491174 (492.71 it/sec) -training >> step=3941300, episode=658 reward=0.7742693 (117.69 it/sec) -training >> step=3941400, episode=658 reward=0.7768403 (471.41 it/sec) -training >> step=3941500, episode=658 reward=0.766754 (517.91 it/sec) -training >> step=3941600, episode=658 reward=0.7717404 (544.35 it/sec) -training >> step=3941700, episode=658 reward=0.773218 (497.73 it/sec) -training >> step=3941800, episode=658 reward=0.7745634 (475.22 it/sec) -training >> step=3941900, episode=658 reward=0.7651436 (553.36 it/sec) -training >> step=3942000, episode=658 reward=0.7793996 (514.30 it/sec) -training >> step=3942100, episode=658 reward=0.7668915 (522.73 it/sec) -training >> step=3942200, episode=658 reward=0.7754944 (526.27 it/sec) -training >> step=3942300, episode=658 reward=0.7841193 (498.97 it/sec) -training >> step=3942400, episode=658 reward=0.7862466 (505.15 it/sec) -training >> step=3942500, episode=658 reward=0.773246 (496.33 it/sec) -training >> step=3942600, episode=658 reward=0.7601683 (489.20 it/sec) -training >> step=3942700, episode=658 reward=0.7794718 (495.63 it/sec) -training >> step=3942800, episode=658 reward=0.768492 (476.48 it/sec) -training >> step=3942900, episode=658 reward=0.7672673 (515.40 it/sec) -training >> step=3943000, episode=658 reward=0.7811073 (552.23 it/sec) -training >> step=3943100, episode=658 reward=0.7881583 (511.28 it/sec) -training >> step=3943200, episode=658 reward=0.7948423 (521.13 it/sec) -training >> step=3943300, episode=658 reward=0.7897769 (499.78 it/sec) -training >> step=3943400, episode=658 reward=0.7668054 (448.06 it/sec) -training >> step=3943500, episode=658 reward=0.7578434 (515.95 it/sec) -training >> step=3943600, episode=658 reward=0.7747898 (474.16 it/sec) -training >> step=3943700, episode=658 reward=0.7962723 (481.37 it/sec) -training >> step=3943800, episode=658 reward=0.7641309 (456.18 it/sec) -training >> step=3943900, episode=658 reward=0.7569926 (485.12 it/sec) -training >> step=3944000, episode=658 reward=0.7793514 (449.74 it/sec) -training >> step=3944100, episode=658 reward=0.7767404 (417.61 it/sec) -training >> step=3944200, episode=658 reward=0.7698683 (426.63 it/sec) -training >> step=3944300, episode=658 reward=0.7719226 (459.38 it/sec) -training >> step=3944400, episode=658 reward=0.7729399 (459.33 it/sec) -training >> step=3944500, episode=658 reward=0.770479 (498.76 it/sec) -training >> step=3944600, episode=658 reward=0.7619113 (493.37 it/sec) -training >> step=3944700, episode=658 reward=0.7808115 (464.84 it/sec) -training >> step=3944800, episode=658 reward=0.7696036 (530.61 it/sec) -training >> step=3944900, episode=658 reward=0.7718249 (536.39 it/sec) -training >> step=3945000, episode=658 reward=0.7922453 (501.13 it/sec) -training >> step=3945100, episode=658 reward=0.7643621 (503.56 it/sec) -training >> step=3945200, episode=658 reward=0.7763082 (516.63 it/sec) -training >> step=3945300, episode=658 reward=0.7651926 (472.66 it/sec) -training >> step=3945400, episode=658 reward=0.762023 (499.27 it/sec) -training >> step=3945500, episode=658 reward=0.7887962 (468.55 it/sec) -training >> step=3945600, episode=658 reward=0.7588369 (528.38 it/sec) -training >> step=3945700, episode=658 reward=0.7740209 (517.28 it/sec) -training >> step=3945800, episode=658 reward=0.7906857 (482.97 it/sec) -training >> step=3945900, episode=658 reward=0.7736777 (534.13 it/sec) -training >> step=3946000, episode=658 reward=0.7762056 (473.54 it/sec) -training >> step=3946100, episode=658 reward=0.7764376 (459.69 it/sec) -training >> step=3946200, episode=658 reward=0.7717392 (520.91 it/sec) -training >> step=3946300, episode=658 reward=0.7903252 (540.58 it/sec) -training >> step=3946400, episode=658 reward=0.778899 (522.59 it/sec) -training >> step=3946500, episode=658 reward=0.7683878 (484.61 it/sec) -training >> step=3946600, episode=658 reward=0.7347712 (429.66 it/sec) -training >> step=3946700, episode=658 reward=0.7729405 (530.49 it/sec) -training >> step=3946800, episode=658 reward=0.7810606 (468.84 it/sec) -training >> step=3946900, episode=658 reward=0.7725811 (472.32 it/sec) -training >> step=3947000, episode=658 reward=0.7670918 (483.36 it/sec) -training >> step=3947100, episode=658 reward=0.7447534 (536.59 it/sec) -training >> step=3947200, episode=658 reward=0.7879645 (559.37 it/sec) -training >> step=3947300, episode=659 reward=0.7582606 (63.28 it/sec) -training >> step=3947400, episode=659 reward=0.7653278 (488.51 it/sec) -training >> step=3947500, episode=659 reward=0.7648447 (545.21 it/sec) -training >> step=3947600, episode=659 reward=0.7883549 (516.84 it/sec) -training >> step=3947700, episode=659 reward=0.7731798 (517.49 it/sec) -training >> step=3947800, episode=659 reward=0.781153 (523.59 it/sec) -training >> step=3947900, episode=659 reward=0.78193 (540.84 it/sec) -training >> step=3948000, episode=659 reward=0.7572886 (524.93 it/sec) -training >> step=3948100, episode=659 reward=0.7841642 (468.35 it/sec) -training >> step=3948200, episode=659 reward=0.7806934 (455.16 it/sec) -training >> step=3948300, episode=659 reward=0.7829211 (469.64 it/sec) -training >> step=3948400, episode=659 reward=0.7806007 (508.32 it/sec) -training >> step=3948500, episode=659 reward=0.7506248 (512.77 it/sec) -training >> step=3948600, episode=659 reward=0.7861991 (494.66 it/sec) -training >> step=3948700, episode=659 reward=0.7763647 (409.96 it/sec) -training >> step=3948800, episode=659 reward=0.7954559 (447.00 it/sec) -training >> step=3948900, episode=659 reward=0.7610323 (462.45 it/sec) -training >> step=3949000, episode=659 reward=0.7647641 (446.78 it/sec) -training >> step=3949100, episode=659 reward=0.7774933 (468.94 it/sec) -training >> step=3949200, episode=659 reward=0.7667219 (464.80 it/sec) -training >> step=3949300, episode=659 reward=0.7664917 (492.93 it/sec) -training >> step=3949400, episode=659 reward=0.7822724 (517.56 it/sec) -training >> step=3949500, episode=659 reward=0.8038147 (478.35 it/sec) -training >> step=3949600, episode=659 reward=0.7637656 (518.89 it/sec) -training >> step=3949700, episode=659 reward=0.7885808 (501.17 it/sec) -training >> step=3949800, episode=659 reward=0.7739481 (494.10 it/sec) -training >> step=3949900, episode=659 reward=0.792656 (489.53 it/sec) -training >> step=3950000, episode=659 reward=0.7768317 (522.81 it/sec) -training >> step=3950100, episode=659 reward=0.7776057 (490.11 it/sec) -training >> step=3950200, episode=659 reward=0.7939777 (496.73 it/sec) -training >> step=3950300, episode=659 reward=0.78901 (494.76 it/sec) -training >> step=3950400, episode=659 reward=0.7787498 (484.99 it/sec) -training >> step=3950500, episode=659 reward=0.7812521 (458.58 it/sec) -training >> step=3950600, episode=659 reward=0.777397 (460.40 it/sec) -training >> step=3950700, episode=659 reward=0.7562372 (453.93 it/sec) -training >> step=3950800, episode=659 reward=0.7648475 (505.00 it/sec) -training >> step=3950900, episode=659 reward=0.7947297 (461.79 it/sec) -training >> step=3951000, episode=659 reward=0.7685916 (470.12 it/sec) -training >> step=3951100, episode=659 reward=0.792226 (482.36 it/sec) -training >> step=3951200, episode=659 reward=0.78347 (429.62 it/sec) -training >> step=3951300, episode=659 reward=0.7640349 (475.64 it/sec) -training >> step=3951400, episode=659 reward=0.7796198 (533.81 it/sec) -training >> step=3951500, episode=659 reward=0.7567143 (485.54 it/sec) -training >> step=3951600, episode=659 reward=0.7666751 (460.01 it/sec) -training >> step=3951700, episode=659 reward=0.7767949 (424.83 it/sec) -training >> step=3951800, episode=659 reward=0.7710196 (488.30 it/sec) -training >> step=3951900, episode=659 reward=0.7724573 (478.47 it/sec) -training >> step=3952000, episode=659 reward=0.7856782 (465.49 it/sec) -training >> step=3952100, episode=659 reward=0.7698631 (445.27 it/sec) -training >> step=3952200, episode=659 reward=0.7922737 (480.55 it/sec) -training >> step=3952300, episode=659 reward=0.7889693 (469.16 it/sec) -training >> step=3952400, episode=659 reward=0.74727 (493.24 it/sec) -training >> step=3952500, episode=659 reward=0.754738 (482.77 it/sec) -training >> step=3952600, episode=659 reward=0.7669393 (470.88 it/sec) -training >> step=3952700, episode=659 reward=0.7716466 (438.42 it/sec) -training >> step=3952800, episode=659 reward=0.7619969 (488.86 it/sec) -training >> step=3952900, episode=659 reward=0.7443511 (470.22 it/sec) -training >> step=3953000, episode=659 reward=0.7405022 (447.01 it/sec) -training >> step=3953100, episode=659 reward=0.787948 (467.79 it/sec) -training >> step=3953200, episode=659 reward=0.7658836 (373.55 it/sec) -training >> step=3953300, episode=660 reward=0.7796609 (57.20 it/sec) -training >> step=3953400, episode=660 reward=0.789137 (520.22 it/sec) -training >> step=3953500, episode=660 reward=0.7793763 (506.49 it/sec) -training >> step=3953600, episode=660 reward=0.7817852 (513.69 it/sec) -training >> step=3953700, episode=660 reward=0.7670823 (485.55 it/sec) -training >> step=3953800, episode=660 reward=0.7588453 (502.37 it/sec) -training >> step=3953900, episode=660 reward=0.7954855 (466.30 it/sec) -training >> step=3954000, episode=660 reward=0.7826632 (489.56 it/sec) -training >> step=3954100, episode=660 reward=0.7773415 (518.79 it/sec) -training >> step=3954200, episode=660 reward=0.7767981 (492.68 it/sec) -training >> step=3954300, episode=660 reward=0.7675402 (492.86 it/sec) -training >> step=3954400, episode=660 reward=0.7638306 (458.47 it/sec) -training >> step=3954500, episode=660 reward=0.7594689 (483.84 it/sec) -training >> step=3954600, episode=660 reward=0.7689433 (477.15 it/sec) -training >> step=3954700, episode=660 reward=0.7708437 (491.23 it/sec) -training >> step=3954800, episode=660 reward=0.7670353 (486.03 it/sec) -training >> step=3954900, episode=660 reward=0.7842354 (455.35 it/sec) -training >> step=3955000, episode=660 reward=0.7798259 (482.54 it/sec) -training >> step=3955100, episode=660 reward=0.7633377 (472.00 it/sec) -training >> step=3955200, episode=660 reward=0.7794269 (414.69 it/sec) -training >> step=3955300, episode=660 reward=0.788557 (470.03 it/sec) -training >> step=3955400, episode=660 reward=0.7894614 (489.26 it/sec) -training >> step=3955500, episode=660 reward=0.7791014 (476.47 it/sec) -training >> step=3955600, episode=660 reward=0.7665543 (477.92 it/sec) -training >> step=3955700, episode=660 reward=0.7665498 (477.69 it/sec) -training >> step=3955800, episode=660 reward=0.7874136 (515.14 it/sec) -training >> step=3955900, episode=660 reward=0.7701213 (481.37 it/sec) -training >> step=3956000, episode=660 reward=0.7744073 (472.90 it/sec) -training >> step=3956100, episode=660 reward=0.7704479 (503.60 it/sec) -training >> step=3956200, episode=660 reward=0.7791383 (453.45 it/sec) -training >> step=3956300, episode=660 reward=0.7689504 (473.90 it/sec) -training >> step=3956400, episode=660 reward=0.7851903 (463.47 it/sec) -training >> step=3956500, episode=660 reward=0.768514 (491.00 it/sec) -training >> step=3956600, episode=660 reward=0.7662505 (482.22 it/sec) -training >> step=3956700, episode=660 reward=0.7518694 (452.79 it/sec) -training >> step=3956800, episode=660 reward=0.7937584 (467.62 it/sec) -training >> step=3956900, episode=660 reward=0.771562 (472.53 it/sec) -training >> step=3957000, episode=660 reward=0.7773175 (480.94 it/sec) -training >> step=3957100, episode=660 reward=0.7624505 (528.89 it/sec) -training >> step=3957200, episode=660 reward=0.7710851 (559.86 it/sec) -training >> step=3957300, episode=660 reward=0.7639341 (523.70 it/sec) -training >> step=3957400, episode=660 reward=0.7826008 (506.46 it/sec) -training >> step=3957500, episode=660 reward=0.7714244 (529.68 it/sec) -training >> step=3957600, episode=660 reward=0.7734438 (552.08 it/sec) -training >> step=3957700, episode=660 reward=0.7808414 (530.22 it/sec) -training >> step=3957800, episode=660 reward=0.7821764 (490.08 it/sec) -training >> step=3957900, episode=660 reward=0.7728918 (550.05 it/sec) -training >> step=3958000, episode=660 reward=0.7943517 (528.56 it/sec) -training >> step=3958100, episode=660 reward=0.781157 (528.74 it/sec) -training >> step=3958200, episode=660 reward=0.766453 (524.86 it/sec) -training >> step=3958300, episode=660 reward=0.781745 (458.68 it/sec) -training >> step=3958400, episode=660 reward=0.7884532 (480.16 it/sec) -training >> step=3958500, episode=660 reward=0.7701125 (462.95 it/sec) -training >> step=3958600, episode=660 reward=0.7647482 (506.32 it/sec) -training >> step=3958700, episode=660 reward=0.7620805 (466.68 it/sec) -training >> step=3958800, episode=660 reward=0.7746902 (464.45 it/sec) -training >> step=3958900, episode=660 reward=0.7667304 (465.40 it/sec) -training >> step=3959000, episode=660 reward=0.7765362 (511.07 it/sec) -training >> step=3959100, episode=660 reward=0.7851744 (478.13 it/sec) -training >> step=3959200, episode=660 reward=0.7731485 (420.94 it/sec) -training >> step=3959300, episode=661 reward=0.7635929 (100.05 it/sec) -training >> step=3959400, episode=661 reward=0.7892452 (441.71 it/sec) -training >> step=3959500, episode=661 reward=0.7793013 (438.38 it/sec) -training >> step=3959600, episode=661 reward=0.791237 (478.03 it/sec) -training >> step=3959700, episode=661 reward=0.7591808 (518.80 it/sec) -training >> step=3959800, episode=661 reward=0.7717351 (480.59 it/sec) -training >> step=3959900, episode=661 reward=0.7795479 (480.21 it/sec) -training >> step=3960000, episode=661 reward=0.7845684 (477.46 it/sec) -training >> step=3960100, episode=661 reward=0.7692496 (517.64 it/sec) -training >> step=3960200, episode=661 reward=0.7861757 (513.10 it/sec) -training >> step=3960300, episode=661 reward=0.7866474 (523.27 it/sec) -training >> step=3960400, episode=661 reward=0.7971933 (509.12 it/sec) -training >> step=3960500, episode=661 reward=0.7537491 (511.40 it/sec) -training >> step=3960600, episode=661 reward=0.7542168 (496.65 it/sec) -training >> step=3960700, episode=661 reward=0.7673518 (487.63 it/sec) -training >> step=3960800, episode=661 reward=0.7714712 (551.54 it/sec) -training >> step=3960900, episode=661 reward=0.7562093 (476.16 it/sec) -training >> step=3961000, episode=661 reward=0.7605153 (483.95 it/sec) -training >> step=3961100, episode=661 reward=0.770152 (515.24 it/sec) -training >> step=3961200, episode=661 reward=0.7896751 (492.27 it/sec) -training >> step=3961300, episode=661 reward=0.7713283 (519.33 it/sec) -training >> step=3961400, episode=661 reward=0.7813607 (529.47 it/sec) -training >> step=3961500, episode=661 reward=0.7769326 (513.67 it/sec) -training >> step=3961600, episode=661 reward=0.7911292 (488.45 it/sec) -training >> step=3961700, episode=661 reward=0.7862841 (438.71 it/sec) -training >> step=3961800, episode=661 reward=0.76208 (527.11 it/sec) -training >> step=3961900, episode=661 reward=0.8040248 (521.34 it/sec) -training >> step=3962000, episode=661 reward=0.7645311 (513.41 it/sec) -training >> step=3962100, episode=661 reward=0.7859254 (503.60 it/sec) -training >> step=3962200, episode=661 reward=0.7720761 (513.37 it/sec) -training >> step=3962300, episode=661 reward=0.773783 (506.70 it/sec) -training >> step=3962400, episode=661 reward=0.7735612 (493.74 it/sec) -training >> step=3962500, episode=661 reward=0.7842575 (493.42 it/sec) -training >> step=3962600, episode=661 reward=0.7615938 (530.83 it/sec) -training >> step=3962700, episode=661 reward=0.7599983 (500.13 it/sec) -training >> step=3962800, episode=661 reward=0.7820867 (528.52 it/sec) -training >> step=3962900, episode=661 reward=0.7815905 (534.77 it/sec) -training >> step=3963000, episode=661 reward=0.7506582 (489.09 it/sec) -training >> step=3963100, episode=661 reward=0.7419739 (485.95 it/sec) -training >> step=3963200, episode=661 reward=0.7782128 (521.70 it/sec) -training >> step=3963300, episode=661 reward=0.7666761 (544.66 it/sec) -training >> step=3963400, episode=661 reward=0.782003 (438.22 it/sec) -training >> step=3963500, episode=661 reward=0.7819842 (464.68 it/sec) -training >> step=3963600, episode=661 reward=0.7778043 (496.05 it/sec) -training >> step=3963700, episode=661 reward=0.7902966 (491.66 it/sec) -training >> step=3963800, episode=661 reward=0.7590859 (463.45 it/sec) -training >> step=3963900, episode=661 reward=0.7717388 (458.51 it/sec) -training >> step=3964000, episode=661 reward=0.7844175 (473.66 it/sec) -training >> step=3964100, episode=661 reward=0.768983 (515.72 it/sec) -training >> step=3964200, episode=661 reward=0.7758248 (475.41 it/sec) -training >> step=3964300, episode=661 reward=0.7983922 (525.55 it/sec) -training >> step=3964400, episode=661 reward=0.7778398 (547.87 it/sec) -training >> step=3964500, episode=661 reward=0.7902468 (461.41 it/sec) -training >> step=3964600, episode=661 reward=0.7722073 (466.41 it/sec) -training >> step=3964700, episode=661 reward=0.7575765 (490.49 it/sec) -training >> step=3964800, episode=661 reward=0.7718664 (485.17 it/sec) -training >> step=3964900, episode=661 reward=0.7837556 (502.45 it/sec) -training >> step=3965000, episode=661 reward=0.7781566 (491.78 it/sec) -training >> step=3965100, episode=661 reward=0.7425085 (548.70 it/sec) -training >> step=3965200, episode=661 reward=0.7783974 (506.49 it/sec) -training >> step=3965300, episode=662 reward=0.7887698 (88.28 it/sec) -training >> step=3965400, episode=662 reward=0.7723657 (494.00 it/sec) -training >> step=3965500, episode=662 reward=0.7722612 (495.70 it/sec) -training >> step=3965600, episode=662 reward=0.7547078 (401.34 it/sec) -training >> step=3965700, episode=662 reward=0.7828017 (473.08 it/sec) -training >> step=3965800, episode=662 reward=0.7901843 (515.42 it/sec) -training >> step=3965900, episode=662 reward=0.759242 (497.86 it/sec) -training >> step=3966000, episode=662 reward=0.7696552 (500.95 it/sec) -training >> step=3966100, episode=662 reward=0.7763821 (486.36 it/sec) -training >> step=3966200, episode=662 reward=0.7719264 (485.91 it/sec) -training >> step=3966300, episode=662 reward=0.7758024 (509.67 it/sec) -training >> step=3966400, episode=662 reward=0.7842459 (492.40 it/sec) -training >> step=3966500, episode=662 reward=0.7688972 (503.25 it/sec) -training >> step=3966600, episode=662 reward=0.7523345 (458.09 it/sec) -training >> step=3966700, episode=662 reward=0.7872945 (474.68 it/sec) -training >> step=3966800, episode=662 reward=0.7745035 (518.40 it/sec) -training >> step=3966900, episode=662 reward=0.7767192 (507.87 it/sec) -training >> step=3967000, episode=662 reward=0.7668322 (487.51 it/sec) -training >> step=3967100, episode=662 reward=0.7819508 (459.54 it/sec) -training >> step=3967200, episode=662 reward=0.746156 (458.48 it/sec) -training >> step=3967300, episode=662 reward=0.7828331 (462.47 it/sec) -training >> step=3967400, episode=662 reward=0.7957324 (470.99 it/sec) -training >> step=3967500, episode=662 reward=0.7801495 (519.57 it/sec) -training >> step=3967600, episode=662 reward=0.7669191 (450.37 it/sec) -training >> step=3967700, episode=662 reward=0.7666968 (430.88 it/sec) -training >> step=3967800, episode=662 reward=0.7672823 (488.16 it/sec) -training >> step=3967900, episode=662 reward=0.7761944 (473.14 it/sec) -training >> step=3968000, episode=662 reward=0.7737032 (514.12 it/sec) -training >> step=3968100, episode=662 reward=0.7894764 (458.65 it/sec) -training >> step=3968200, episode=662 reward=0.7689359 (508.37 it/sec) -training >> step=3968300, episode=662 reward=0.7812197 (441.21 it/sec) -training >> step=3968400, episode=662 reward=0.7843403 (495.03 it/sec) -training >> step=3968500, episode=662 reward=0.773916 (469.17 it/sec) -training >> step=3968600, episode=662 reward=0.7721012 (474.01 it/sec) -training >> step=3968700, episode=662 reward=0.7759057 (439.12 it/sec) -training >> step=3968800, episode=662 reward=0.7789671 (438.88 it/sec) -training >> step=3968900, episode=662 reward=0.7739961 (493.35 it/sec) -training >> step=3969000, episode=662 reward=0.7806052 (462.44 it/sec) -training >> step=3969100, episode=662 reward=0.7936749 (456.97 it/sec) -training >> step=3969200, episode=662 reward=0.7828813 (429.85 it/sec) -training >> step=3969300, episode=662 reward=0.75787 (478.76 it/sec) -training >> step=3969400, episode=662 reward=0.7709032 (489.93 it/sec) -training >> step=3969500, episode=662 reward=0.7747999 (445.74 it/sec) -training >> step=3969600, episode=662 reward=0.769933 (487.50 it/sec) -training >> step=3969700, episode=662 reward=0.7852129 (488.84 it/sec) -training >> step=3969800, episode=662 reward=0.7826931 (496.52 it/sec) -training >> step=3969900, episode=662 reward=0.7797797 (454.85 it/sec) -training >> step=3970000, episode=662 reward=0.7752412 (420.68 it/sec) -training >> step=3970100, episode=662 reward=0.7800059 (435.21 it/sec) -training >> step=3970200, episode=662 reward=0.763669 (465.30 it/sec) -training >> step=3970300, episode=662 reward=0.7879962 (479.92 it/sec) -training >> step=3970400, episode=662 reward=0.7648586 (490.66 it/sec) -training >> step=3970500, episode=662 reward=0.7637962 (451.43 it/sec) -training >> step=3970600, episode=662 reward=0.7749258 (470.70 it/sec) -training >> step=3970700, episode=662 reward=0.7689102 (466.13 it/sec) -training >> step=3970800, episode=662 reward=0.7650186 (489.23 it/sec) -training >> step=3970900, episode=662 reward=0.7753167 (413.55 it/sec) -training >> step=3971000, episode=662 reward=0.7655465 (413.43 it/sec) -training >> step=3971100, episode=662 reward=0.7873043 (474.31 it/sec) -training >> step=3971200, episode=662 reward=0.7663279 (453.66 it/sec) -training >> step=3971300, episode=663 reward=0.7522338 (37.53 it/sec) -training >> step=3971400, episode=663 reward=0.7721533 (464.44 it/sec) -training >> step=3971500, episode=663 reward=0.7876568 (474.26 it/sec) -training >> step=3971600, episode=663 reward=0.7671084 (445.87 it/sec) -training >> step=3971700, episode=663 reward=0.7706614 (458.82 it/sec) -training >> step=3971800, episode=663 reward=0.7649276 (432.96 it/sec) -training >> step=3971900, episode=663 reward=0.7748907 (492.36 it/sec) -training >> step=3972000, episode=663 reward=0.7509013 (468.50 it/sec) -training >> step=3972100, episode=663 reward=0.7672147 (464.00 it/sec) -training >> step=3972200, episode=663 reward=0.7790767 (385.63 it/sec) -training >> step=3972300, episode=663 reward=0.7786771 (489.75 it/sec) -training >> step=3972400, episode=663 reward=0.7824346 (462.38 it/sec) -training >> step=3972500, episode=663 reward=0.7665647 (430.90 it/sec) -training >> step=3972600, episode=663 reward=0.775913 (434.10 it/sec) -training >> step=3972700, episode=663 reward=0.7514141 (431.71 it/sec) -training >> step=3972800, episode=663 reward=0.7690538 (450.76 it/sec) -training >> step=3972900, episode=663 reward=0.756157 (496.19 it/sec) -training >> step=3973000, episode=663 reward=0.783597 (517.59 it/sec) -training >> step=3973100, episode=663 reward=0.7835171 (440.22 it/sec) -training >> step=3973200, episode=663 reward=0.7727201 (435.35 it/sec) -training >> step=3973300, episode=663 reward=0.7702615 (486.50 it/sec) -training >> step=3973400, episode=663 reward=0.7783601 (476.33 it/sec) -training >> step=3973500, episode=663 reward=0.7787972 (479.70 it/sec) -training >> step=3973600, episode=663 reward=0.7555085 (487.86 it/sec) -training >> step=3973700, episode=663 reward=0.7737926 (440.75 it/sec) -training >> step=3973800, episode=663 reward=0.7740788 (472.19 it/sec) -training >> step=3973900, episode=663 reward=0.7961578 (473.17 it/sec) -training >> step=3974000, episode=663 reward=0.768212 (474.75 it/sec) -training >> step=3974100, episode=663 reward=0.7769164 (496.20 it/sec) -training >> step=3974200, episode=663 reward=0.7807472 (446.98 it/sec) -training >> step=3974300, episode=663 reward=0.7918705 (472.04 it/sec) -training >> step=3974400, episode=663 reward=0.7827816 (459.25 it/sec) -training >> step=3974500, episode=663 reward=0.7922359 (464.38 it/sec) -training >> step=3974600, episode=663 reward=0.775593 (438.10 it/sec) -training >> step=3974700, episode=663 reward=0.7572284 (465.33 it/sec) -training >> step=3974800, episode=663 reward=0.7718373 (457.88 it/sec) -training >> step=3974900, episode=663 reward=0.7760899 (464.61 it/sec) -training >> step=3975000, episode=663 reward=0.778847 (491.70 it/sec) -training >> step=3975100, episode=663 reward=0.7866766 (448.00 it/sec) -training >> step=3975200, episode=663 reward=0.7601021 (475.42 it/sec) -training >> step=3975300, episode=663 reward=0.7491592 (508.73 it/sec) -training >> step=3975400, episode=663 reward=0.7978855 (501.12 it/sec) -training >> step=3975500, episode=663 reward=0.7895138 (449.15 it/sec) -training >> step=3975600, episode=663 reward=0.7704276 (465.14 it/sec) -training >> step=3975700, episode=663 reward=0.7765 (477.14 it/sec) -training >> step=3975800, episode=663 reward=0.7564031 (429.66 it/sec) -training >> step=3975900, episode=663 reward=0.7793715 (482.10 it/sec) -training >> step=3976000, episode=663 reward=0.769443 (462.86 it/sec) -training >> step=3976100, episode=663 reward=0.7728548 (503.38 it/sec) -training >> step=3976200, episode=663 reward=0.7640998 (470.55 it/sec) -training >> step=3976300, episode=663 reward=0.7686825 (503.14 it/sec) -training >> step=3976400, episode=663 reward=0.7741587 (503.44 it/sec) -training >> step=3976500, episode=663 reward=0.772282 (413.08 it/sec) -training >> step=3976600, episode=663 reward=0.7538981 (415.07 it/sec) -training >> step=3976700, episode=663 reward=0.7679453 (485.83 it/sec) -training >> step=3976800, episode=663 reward=0.7614014 (477.16 it/sec) -training >> step=3976900, episode=663 reward=0.7905821 (481.52 it/sec) -training >> step=3977000, episode=663 reward=0.7815896 (472.04 it/sec) -training >> step=3977100, episode=663 reward=0.7528353 (486.72 it/sec) -training >> step=3977200, episode=663 reward=0.7607105 (502.44 it/sec) -training >> step=3977300, episode=664 reward=0.7643321 (37.29 it/sec) -training >> step=3977400, episode=664 reward=0.7618137 (433.29 it/sec) -training >> step=3977500, episode=664 reward=0.7788868 (426.79 it/sec) -training >> step=3977600, episode=664 reward=0.7526656 (484.57 it/sec) -training >> step=3977700, episode=664 reward=0.7913568 (469.42 it/sec) -training >> step=3977800, episode=664 reward=0.7745678 (475.94 it/sec) -training >> step=3977900, episode=664 reward=0.7708631 (486.87 it/sec) -training >> step=3978000, episode=664 reward=0.7866225 (488.35 it/sec) -training >> step=3978100, episode=664 reward=0.7837443 (505.55 it/sec) -training >> step=3978200, episode=664 reward=0.7767138 (454.63 it/sec) -training >> step=3978300, episode=664 reward=0.7623629 (436.01 it/sec) -training >> step=3978400, episode=664 reward=0.7829456 (491.67 it/sec) -training >> step=3978500, episode=664 reward=0.7695386 (476.69 it/sec) -training >> step=3978600, episode=664 reward=0.7791734 (448.15 it/sec) -training >> step=3978700, episode=664 reward=0.763814 (497.55 it/sec) -training >> step=3978800, episode=664 reward=0.7642654 (481.71 it/sec) -training >> step=3978900, episode=664 reward=0.7851887 (484.61 it/sec) -training >> step=3979000, episode=664 reward=0.7737913 (496.34 it/sec) -training >> step=3979100, episode=664 reward=0.7689865 (409.33 it/sec) -training >> step=3979200, episode=664 reward=0.7671448 (399.95 it/sec) -training >> step=3979300, episode=664 reward=0.7647465 (469.43 it/sec) -training >> step=3979400, episode=664 reward=0.7747784 (483.93 it/sec) -training >> step=3979500, episode=664 reward=0.7811548 (458.26 it/sec) -training >> step=3979600, episode=664 reward=0.793534 (445.53 it/sec) -training >> step=3979700, episode=664 reward=0.7748026 (450.60 it/sec) -training >> step=3979800, episode=664 reward=0.7804231 (532.08 it/sec) -training >> step=3979900, episode=664 reward=0.7604561 (482.10 it/sec) -training >> step=3980000, episode=664 reward=0.7899109 (469.91 it/sec) -training >> step=3980100, episode=664 reward=0.7589803 (467.34 it/sec) -training >> step=3980200, episode=664 reward=0.7820477 (476.60 it/sec) -training >> step=3980300, episode=664 reward=0.774358 (495.45 it/sec) -training >> step=3980400, episode=664 reward=0.7863434 (521.52 it/sec) -training >> step=3980500, episode=664 reward=0.7491879 (539.75 it/sec) -training >> step=3980600, episode=664 reward=0.7650785 (542.25 it/sec) -training >> step=3980700, episode=664 reward=0.7796291 (499.00 it/sec) -training >> step=3980800, episode=664 reward=0.781063 (509.54 it/sec) -training >> step=3980900, episode=664 reward=0.7891729 (571.14 it/sec) -training >> step=3981000, episode=664 reward=0.788406 (509.67 it/sec) -training >> step=3981100, episode=664 reward=0.8004112 (501.19 it/sec) -training >> step=3981200, episode=664 reward=0.7688946 (437.39 it/sec) -training >> step=3981300, episode=664 reward=0.7781072 (538.35 it/sec) -training >> step=3981400, episode=664 reward=0.7816021 (505.38 it/sec) -training >> step=3981500, episode=664 reward=0.7759066 (540.17 it/sec) -training >> step=3981600, episode=664 reward=0.7573034 (514.98 it/sec) -training >> step=3981700, episode=664 reward=0.7704262 (475.12 it/sec) -training >> step=3981800, episode=664 reward=0.7847867 (511.10 it/sec) -training >> step=3981900, episode=664 reward=0.775489 (540.81 it/sec) -training >> step=3982000, episode=664 reward=0.7822449 (539.19 it/sec) -training >> step=3982100, episode=664 reward=0.7765622 (526.20 it/sec) -training >> step=3982200, episode=664 reward=0.7449096 (444.26 it/sec) -training >> step=3982300, episode=664 reward=0.7819909 (532.12 it/sec) -training >> step=3982400, episode=664 reward=0.7616605 (540.17 it/sec) -training >> step=3982500, episode=664 reward=0.80244 (521.69 it/sec) -training >> step=3982600, episode=664 reward=0.7821937 (504.61 it/sec) -training >> step=3982700, episode=664 reward=0.7740915 (510.61 it/sec) -training >> step=3982800, episode=664 reward=0.7606866 (474.70 it/sec) -training >> step=3982900, episode=664 reward=0.7836068 (505.92 it/sec) -training >> step=3983000, episode=664 reward=0.7568693 (456.90 it/sec) -training >> step=3983100, episode=664 reward=0.7749218 (491.36 it/sec) -training >> step=3983200, episode=664 reward=0.7972102 (455.86 it/sec) -training >> step=3983300, episode=665 reward=0.7768367 (46.06 it/sec) -training >> step=3983400, episode=665 reward=0.7734615 (433.96 it/sec) -training >> step=3983500, episode=665 reward=0.7958621 (405.40 it/sec) -training >> step=3983600, episode=665 reward=0.766252 (472.02 it/sec) -training >> step=3983700, episode=665 reward=0.7749417 (430.86 it/sec) -training >> step=3983800, episode=665 reward=0.7827253 (416.87 it/sec) -training >> step=3983900, episode=665 reward=0.7666349 (519.46 it/sec) -training >> step=3984000, episode=665 reward=0.7654265 (515.02 it/sec) -training >> step=3984100, episode=665 reward=0.7694237 (498.23 it/sec) -training >> step=3984200, episode=665 reward=0.7926158 (498.36 it/sec) -training >> step=3984300, episode=665 reward=0.7818661 (525.18 it/sec) -training >> step=3984400, episode=665 reward=0.8168868 (552.19 it/sec) -training >> step=3984500, episode=665 reward=0.7853392 (497.78 it/sec) -training >> step=3984600, episode=665 reward=0.7818173 (533.85 it/sec) -training >> step=3984700, episode=665 reward=0.7915562 (543.28 it/sec) -training >> step=3984800, episode=665 reward=0.7776259 (528.92 it/sec) -training >> step=3984900, episode=665 reward=0.7532778 (518.89 it/sec) -training >> step=3985000, episode=665 reward=0.7888421 (551.25 it/sec) -training >> step=3985100, episode=665 reward=0.787715 (515.63 it/sec) -training >> step=3985200, episode=665 reward=0.7750823 (504.11 it/sec) -training >> step=3985300, episode=665 reward=0.7752228 (459.80 it/sec) -training >> step=3985400, episode=665 reward=0.7764758 (479.90 it/sec) -training >> step=3985500, episode=665 reward=0.7946554 (449.80 it/sec) -training >> step=3985600, episode=665 reward=0.7632135 (519.57 it/sec) -training >> step=3985700, episode=665 reward=0.7924092 (511.16 it/sec) -training >> step=3985800, episode=665 reward=0.783606 (503.92 it/sec) -training >> step=3985900, episode=665 reward=0.7727165 (503.62 it/sec) -training >> step=3986000, episode=665 reward=0.7625265 (520.25 it/sec) -training >> step=3986100, episode=665 reward=0.7653291 (544.14 it/sec) -training >> step=3986200, episode=665 reward=0.7768403 (512.86 it/sec) -training >> step=3986300, episode=665 reward=0.7467247 (456.54 it/sec) -training >> step=3986400, episode=665 reward=0.7826507 (505.22 it/sec) -training >> step=3986500, episode=665 reward=0.7643182 (409.29 it/sec) -training >> step=3986600, episode=665 reward=0.7873175 (437.16 it/sec) -training >> step=3986700, episode=665 reward=0.7605975 (407.98 it/sec) -training >> step=3986800, episode=665 reward=0.746711 (449.09 it/sec) -training >> step=3986900, episode=665 reward=0.7703721 (372.31 it/sec) -training >> step=3987000, episode=665 reward=0.7869995 (464.10 it/sec) -training >> step=3987100, episode=665 reward=0.7765785 (581.10 it/sec) -training >> step=3987200, episode=665 reward=0.7831559 (528.40 it/sec) -training >> step=3987300, episode=665 reward=0.7651462 (518.59 it/sec) -training >> step=3987400, episode=665 reward=0.7909808 (546.56 it/sec) -training >> step=3987500, episode=665 reward=0.7784017 (511.85 it/sec) -training >> step=3987600, episode=665 reward=0.7660891 (541.69 it/sec) -training >> step=3987700, episode=665 reward=0.7731372 (521.48 it/sec) -training >> step=3987800, episode=665 reward=0.784812 (503.09 it/sec) -training >> step=3987900, episode=665 reward=0.7770755 (469.90 it/sec) -training >> step=3988000, episode=665 reward=0.7622677 (489.70 it/sec) -training >> step=3988100, episode=665 reward=0.7747431 (544.02 it/sec) -training >> step=3988200, episode=665 reward=0.7817413 (546.28 it/sec) -training >> step=3988300, episode=665 reward=0.7730013 (456.37 it/sec) -training >> step=3988400, episode=665 reward=0.7948821 (498.00 it/sec) -training >> step=3988500, episode=665 reward=0.7596496 (567.66 it/sec) -training >> step=3988600, episode=665 reward=0.7589481 (552.58 it/sec) -training >> step=3988700, episode=665 reward=0.7701185 (527.21 it/sec) -training >> step=3988800, episode=665 reward=0.7748768 (533.84 it/sec) -training >> step=3988900, episode=665 reward=0.7774403 (500.13 it/sec) -training >> step=3989000, episode=665 reward=0.7607108 (450.07 it/sec) -training >> step=3989100, episode=665 reward=0.774014 (473.50 it/sec) -training >> step=3989200, episode=665 reward=0.7588404 (580.14 it/sec) -training >> step=3989300, episode=666 reward=0.7437032 (122.45 it/sec) -training >> step=3989400, episode=666 reward=0.7766769 (559.93 it/sec) -training >> step=3989500, episode=666 reward=0.7623485 (510.86 it/sec) -training >> step=3989600, episode=666 reward=0.774838 (488.90 it/sec) -training >> step=3989700, episode=666 reward=0.7736883 (530.86 it/sec) -training >> step=3989800, episode=666 reward=0.7897161 (519.47 it/sec) -training >> step=3989900, episode=666 reward=0.7963245 (566.00 it/sec) -training >> step=3990000, episode=666 reward=0.7561859 (535.45 it/sec) -training >> step=3990100, episode=666 reward=0.7769026 (522.20 it/sec) -training >> step=3990200, episode=666 reward=0.7698408 (525.26 it/sec) -training >> step=3990300, episode=666 reward=0.7794071 (544.54 it/sec) -training >> step=3990400, episode=666 reward=0.7768648 (508.32 it/sec) -training >> step=3990500, episode=666 reward=0.7773677 (539.14 it/sec) -training >> step=3990600, episode=666 reward=0.7784699 (493.90 it/sec) -training >> step=3990700, episode=666 reward=0.7820176 (545.34 it/sec) -training >> step=3990800, episode=666 reward=0.7626283 (502.71 it/sec) -training >> step=3990900, episode=666 reward=0.7942354 (504.70 it/sec) -training >> step=3991000, episode=666 reward=0.7631715 (540.33 it/sec) -training >> step=3991100, episode=666 reward=0.7482293 (550.16 it/sec) -training >> step=3991200, episode=666 reward=0.7681463 (457.28 it/sec) -training >> step=3991300, episode=666 reward=0.746716 (524.01 it/sec) -training >> step=3991400, episode=666 reward=0.7851748 (546.30 it/sec) -training >> step=3991500, episode=666 reward=0.775981 (522.65 it/sec) -training >> step=3991600, episode=666 reward=0.7658905 (515.01 it/sec) -training >> step=3991700, episode=666 reward=0.7668183 (497.78 it/sec) -training >> step=3991800, episode=666 reward=0.7841311 (526.47 it/sec) -training >> step=3991900, episode=666 reward=0.7848566 (547.14 it/sec) -training >> step=3992000, episode=666 reward=0.7739775 (507.09 it/sec) -training >> step=3992100, episode=666 reward=0.7865151 (570.00 it/sec) -training >> step=3992200, episode=666 reward=0.7773373 (490.47 it/sec) -training >> step=3992300, episode=666 reward=0.772475 (505.59 it/sec) -training >> step=3992400, episode=666 reward=0.7637556 (480.32 it/sec) -training >> step=3992500, episode=666 reward=0.7587585 (519.85 it/sec) -training >> step=3992600, episode=666 reward=0.7943285 (491.89 it/sec) -training >> step=3992700, episode=666 reward=0.7724775 (471.50 it/sec) -training >> step=3992800, episode=666 reward=0.7627702 (505.00 it/sec) -training >> step=3992900, episode=666 reward=0.7852095 (430.24 it/sec) -training >> step=3993000, episode=666 reward=0.7679396 (400.81 it/sec) -training >> step=3993100, episode=666 reward=0.7846024 (406.38 it/sec) -training >> step=3993200, episode=666 reward=0.7743084 (457.48 it/sec) -training >> step=3993300, episode=666 reward=0.7521566 (466.98 it/sec) -training >> step=3993400, episode=666 reward=0.7649966 (503.82 it/sec) -training >> step=3993500, episode=666 reward=0.7793915 (488.53 it/sec) -training >> step=3993600, episode=666 reward=0.7767629 (502.81 it/sec) -training >> step=3993700, episode=666 reward=0.7926043 (496.47 it/sec) -training >> step=3993800, episode=666 reward=0.7900642 (494.05 it/sec) -training >> step=3993900, episode=666 reward=0.7625948 (481.26 it/sec) -training >> step=3994000, episode=666 reward=0.7732254 (473.84 it/sec) -training >> step=3994100, episode=666 reward=0.7838081 (415.75 it/sec) -training >> step=3994200, episode=666 reward=0.7780163 (396.25 it/sec) -training >> step=3994300, episode=666 reward=0.8007967 (495.38 it/sec) -training >> step=3994400, episode=666 reward=0.7873231 (476.12 it/sec) -training >> step=3994500, episode=666 reward=0.7443143 (506.07 it/sec) -training >> step=3994600, episode=666 reward=0.7615689 (476.04 it/sec) -training >> step=3994700, episode=666 reward=0.7772267 (493.27 it/sec) -training >> step=3994800, episode=666 reward=0.7851998 (494.90 it/sec) -training >> step=3994900, episode=666 reward=0.7739257 (459.21 it/sec) -training >> step=3995000, episode=666 reward=0.7656933 (490.64 it/sec) -training >> step=3995100, episode=666 reward=0.7747181 (377.00 it/sec) -training >> step=3995200, episode=666 reward=0.7926905 (377.86 it/sec) -training >> step=3995300, episode=667 reward=0.7829385 (60.84 it/sec) -training >> step=3995400, episode=667 reward=0.7686436 (428.75 it/sec) -training >> step=3995500, episode=667 reward=0.7688021 (488.22 it/sec) -training >> step=3995600, episode=667 reward=0.7603609 (527.63 it/sec) -training >> step=3995700, episode=667 reward=0.7570941 (509.62 it/sec) -training >> step=3995800, episode=667 reward=0.7825251 (469.76 it/sec) -training >> step=3995900, episode=667 reward=0.7528901 (463.43 it/sec) -training >> step=3996000, episode=667 reward=0.7770317 (414.36 it/sec) -training >> step=3996100, episode=667 reward=0.7702715 (473.80 it/sec) -training >> step=3996200, episode=667 reward=0.7617313 (481.23 it/sec) -training >> step=3996300, episode=667 reward=0.7698317 (491.32 it/sec) -training >> step=3996400, episode=667 reward=0.7851848 (500.16 it/sec) -training >> step=3996500, episode=667 reward=0.7785351 (505.99 it/sec) -training >> step=3996600, episode=667 reward=0.7769952 (507.87 it/sec) -training >> step=3996700, episode=667 reward=0.7588927 (540.42 it/sec) -training >> step=3996800, episode=667 reward=0.762456 (524.25 it/sec) -training >> step=3996900, episode=667 reward=0.770906 (499.30 it/sec) -training >> step=3997000, episode=667 reward=0.7528679 (500.56 it/sec) -training >> step=3997100, episode=667 reward=0.7670661 (471.30 it/sec) -training >> step=3997200, episode=667 reward=0.7593318 (425.05 it/sec) -training >> step=3997300, episode=667 reward=0.778346 (448.93 it/sec) -training >> step=3997400, episode=667 reward=0.7697674 (426.32 it/sec) -training >> step=3997500, episode=667 reward=0.7621656 (438.24 it/sec) -training >> step=3997600, episode=667 reward=0.7779092 (409.62 it/sec) -training >> step=3997700, episode=667 reward=0.7878681 (460.43 it/sec) -training >> step=3997800, episode=667 reward=0.7910582 (478.53 it/sec) -training >> step=3997900, episode=667 reward=0.7740985 (465.02 it/sec) -training >> step=3998000, episode=667 reward=0.773922 (491.65 it/sec) -training >> step=3998100, episode=667 reward=0.7782245 (488.51 it/sec) -training >> step=3998200, episode=667 reward=0.7756342 (517.07 it/sec) -training >> step=3998300, episode=667 reward=0.7780252 (464.29 it/sec) -training >> step=3998400, episode=667 reward=0.7994094 (394.53 it/sec) -training >> step=3998500, episode=667 reward=0.7748285 (392.41 it/sec) -training >> step=3998600, episode=667 reward=0.7944233 (459.77 it/sec) -training >> step=3998700, episode=667 reward=0.7769796 (489.61 it/sec) -training >> step=3998800, episode=667 reward=0.75335 (438.23 it/sec) -training >> step=3998900, episode=667 reward=0.7719663 (431.36 it/sec) -training >> step=3999000, episode=667 reward=0.7956011 (446.09 it/sec) -training >> step=3999100, episode=667 reward=0.7673022 (445.05 it/sec) -training >> step=3999200, episode=667 reward=0.7812955 (430.09 it/sec) -training >> step=3999300, episode=667 reward=0.7838626 (421.00 it/sec) -training >> step=3999400, episode=667 reward=0.7702182 (397.72 it/sec) -training >> step=3999500, episode=667 reward=0.7889934 (466.99 it/sec) -training >> step=3999600, episode=667 reward=0.7873188 (491.06 it/sec) -training >> step=3999700, episode=667 reward=0.7752391 (522.67 it/sec) -training >> step=3999800, episode=667 reward=0.7737395 (463.24 it/sec) -training >> step=3999900, episode=667 reward=0.7713411 (483.09 it/sec) -training >> step=4000000, episode=667 reward=0.7704931 (427.21 it/sec) -training >> step=4000100, episode=667 reward=0.7669966 (447.69 it/sec) -training >> step=4000200, episode=667 reward=0.7771837 (464.57 it/sec) -training >> step=4000300, episode=667 reward=0.7880197 (476.80 it/sec) -training >> step=4000400, episode=667 reward=0.7619976 (493.73 it/sec) -training >> step=4000500, episode=667 reward=0.7686114 (438.70 it/sec) -training >> step=4000600, episode=667 reward=0.7832778 (468.70 it/sec) -training >> step=4000700, episode=667 reward=0.7695934 (498.66 it/sec) -training >> step=4000800, episode=667 reward=0.7685348 (475.24 it/sec) -training >> step=4000900, episode=667 reward=0.7662652 (462.49 it/sec) -training >> step=4001000, episode=667 reward=0.7623901 (426.99 it/sec) -training >> step=4001100, episode=667 reward=0.7854582 (427.14 it/sec) -training >> step=4001200, episode=667 reward=0.7779914 (420.77 it/sec) -training >> step=4001300, episode=668 reward=0.7701908 (82.55 it/sec) -training >> step=4001400, episode=668 reward=0.7817376 (494.35 it/sec) -training >> step=4001500, episode=668 reward=0.7819195 (510.95 it/sec) -training >> step=4001600, episode=668 reward=0.8013995 (470.99 it/sec) -training >> step=4001700, episode=668 reward=0.7782217 (473.87 it/sec) -training >> step=4001800, episode=668 reward=0.7864184 (485.46 it/sec) -training >> step=4001900, episode=668 reward=0.7842301 (506.91 it/sec) -training >> step=4002000, episode=668 reward=0.7889997 (471.05 it/sec) -training >> step=4002100, episode=668 reward=0.7878897 (443.94 it/sec) -training >> step=4002200, episode=668 reward=0.7732702 (451.13 it/sec) -training >> step=4002300, episode=668 reward=0.7827076 (421.60 it/sec) -training >> step=4002400, episode=668 reward=0.7714059 (468.11 it/sec) -training >> step=4002500, episode=668 reward=0.7553459 (508.05 it/sec) -training >> step=4002600, episode=668 reward=0.7779666 (477.24 it/sec) -training >> step=4002700, episode=668 reward=0.7789333 (458.15 it/sec) -training >> step=4002800, episode=668 reward=0.7861456 (500.23 it/sec) -training >> step=4002900, episode=668 reward=0.7782094 (458.54 it/sec) -training >> step=4003000, episode=668 reward=0.7919229 (433.23 it/sec) -training >> step=4003100, episode=668 reward=0.7794201 (428.30 it/sec) -training >> step=4003200, episode=668 reward=0.7811207 (410.75 it/sec) -training >> step=4003300, episode=668 reward=0.7738321 (474.61 it/sec) -training >> step=4003400, episode=668 reward=0.7989734 (467.18 it/sec) -training >> step=4003500, episode=668 reward=0.7815605 (496.96 it/sec) -training >> step=4003600, episode=668 reward=0.7671084 (482.54 it/sec) -training >> step=4003700, episode=668 reward=0.7736712 (472.70 it/sec) -training >> step=4003800, episode=668 reward=0.7680864 (470.10 it/sec) -training >> step=4003900, episode=668 reward=0.7718342 (464.49 it/sec) -training >> step=4004000, episode=668 reward=0.7924978 (409.09 it/sec) -training >> step=4004100, episode=668 reward=0.7824629 (407.86 it/sec) -training >> step=4004200, episode=668 reward=0.765006 (492.96 it/sec) -training >> step=4004300, episode=668 reward=0.7903782 (449.44 it/sec) -training >> step=4004400, episode=668 reward=0.7695769 (474.44 it/sec) -training >> step=4004500, episode=668 reward=0.7687093 (484.17 it/sec) -training >> step=4004600, episode=668 reward=0.7726488 (467.00 it/sec) -training >> step=4004700, episode=668 reward=0.7457098 (474.71 it/sec) -training >> step=4004800, episode=668 reward=0.7851126 (458.99 it/sec) -training >> step=4004900, episode=668 reward=0.8003258 (484.59 it/sec) -training >> step=4005000, episode=668 reward=0.7853576 (487.46 it/sec) -training >> step=4005100, episode=668 reward=0.7957174 (415.04 it/sec) -training >> step=4005200, episode=668 reward=0.766939 (360.21 it/sec) -training >> step=4005300, episode=668 reward=0.7559458 (382.04 it/sec) -training >> step=4005400, episode=668 reward=0.7762337 (394.24 it/sec) -training >> step=4005500, episode=668 reward=0.7684537 (384.47 it/sec) -training >> step=4005600, episode=668 reward=0.7803462 (437.32 it/sec) -training >> step=4005700, episode=668 reward=0.7751441 (448.25 it/sec) -training >> step=4005800, episode=668 reward=0.7728535 (486.79 it/sec) -training >> step=4005900, episode=668 reward=0.7738393 (505.36 it/sec) -training >> step=4006000, episode=668 reward=0.7835081 (473.17 it/sec) -training >> step=4006100, episode=668 reward=0.7693645 (440.02 it/sec) -training >> step=4006200, episode=668 reward=0.7839545 (472.47 it/sec) -training >> step=4006300, episode=668 reward=0.765619 (485.82 it/sec) -training >> step=4006400, episode=668 reward=0.8047733 (499.06 it/sec) -training >> step=4006500, episode=668 reward=0.7681019 (400.04 it/sec) -training >> step=4006600, episode=668 reward=0.7660902 (426.41 it/sec) -training >> step=4006700, episode=668 reward=0.7762806 (473.79 it/sec) -training >> step=4006800, episode=668 reward=0.7875986 (388.28 it/sec) -training >> step=4006900, episode=668 reward=0.7710721 (413.52 it/sec) -training >> step=4007000, episode=668 reward=0.7601202 (519.81 it/sec) -training >> step=4007100, episode=668 reward=0.7716343 (502.97 it/sec) -training >> step=4007200, episode=668 reward=0.7769215 (503.88 it/sec) -training >> step=4007300, episode=669 reward=0.7790241 (108.40 it/sec) -training >> step=4007400, episode=669 reward=0.7569885 (537.06 it/sec) -training >> step=4007500, episode=669 reward=0.7699832 (491.82 it/sec) -training >> step=4007600, episode=669 reward=0.7789021 (452.17 it/sec) -training >> step=4007700, episode=669 reward=0.7620545 (491.30 it/sec) -training >> step=4007800, episode=669 reward=0.7533565 (493.91 it/sec) -training >> step=4007900, episode=669 reward=0.7673337 (491.80 it/sec) -training >> step=4008000, episode=669 reward=0.7777246 (461.43 it/sec) -training >> step=4008100, episode=669 reward=0.7919216 (462.22 it/sec) -training >> step=4008200, episode=669 reward=0.7811724 (491.38 it/sec) -training >> step=4008300, episode=669 reward=0.776587 (482.04 it/sec) -training >> step=4008400, episode=669 reward=0.7707046 (463.52 it/sec) -training >> step=4008500, episode=669 reward=0.7610986 (442.19 it/sec) -training >> step=4008600, episode=669 reward=0.7904769 (456.72 it/sec) -training >> step=4008700, episode=669 reward=0.7681946 (479.34 it/sec) -training >> step=4008800, episode=669 reward=0.7711266 (410.05 it/sec) -training >> step=4008900, episode=669 reward=0.7727029 (473.11 it/sec) -training >> step=4009000, episode=669 reward=0.774794 (473.79 it/sec) -training >> step=4009100, episode=669 reward=0.7953803 (471.19 it/sec) -training >> step=4009200, episode=669 reward=0.7678229 (511.33 it/sec) -training >> step=4009300, episode=669 reward=0.780341 (528.18 it/sec) -training >> step=4009400, episode=669 reward=0.7826419 (536.09 it/sec) -training >> step=4009500, episode=669 reward=0.7750915 (516.71 it/sec) -training >> step=4009600, episode=669 reward=0.7674503 (494.53 it/sec) -training >> step=4009700, episode=669 reward=0.7622766 (472.41 it/sec) -training >> step=4009800, episode=669 reward=0.7687595 (464.87 it/sec) -training >> step=4009900, episode=669 reward=0.7541842 (474.79 it/sec) -training >> step=4010000, episode=669 reward=0.7752776 (526.30 it/sec) -training >> step=4010100, episode=669 reward=0.7890565 (445.70 it/sec) -training >> step=4010200, episode=669 reward=0.7767649 (441.00 it/sec) -training >> step=4010300, episode=669 reward=0.793485 (476.77 it/sec) -training >> step=4010400, episode=669 reward=0.7897872 (489.24 it/sec) -training >> step=4010500, episode=669 reward=0.7749903 (519.49 it/sec) -training >> step=4010600, episode=669 reward=0.7971119 (491.67 it/sec) -training >> step=4010700, episode=669 reward=0.7678498 (498.48 it/sec) -training >> step=4010800, episode=669 reward=0.7912772 (499.43 it/sec) -training >> step=4010900, episode=669 reward=0.7721107 (452.35 it/sec) -training >> step=4011000, episode=669 reward=0.7750713 (387.62 it/sec) -training >> step=4011100, episode=669 reward=0.7964091 (408.56 it/sec) -training >> step=4011200, episode=669 reward=0.7746475 (442.88 it/sec) -training >> step=4011300, episode=669 reward=0.767857 (472.72 it/sec) -training >> step=4011400, episode=669 reward=0.7974685 (460.37 it/sec) -training >> step=4011500, episode=669 reward=0.7867059 (493.91 it/sec) -training >> step=4011600, episode=669 reward=0.7913762 (483.79 it/sec) -training >> step=4011700, episode=669 reward=0.7731602 (530.99 it/sec) -training >> step=4011800, episode=669 reward=0.7929758 (515.09 it/sec) -training >> step=4011900, episode=669 reward=0.754275 (475.22 it/sec) -training >> step=4012000, episode=669 reward=0.7695972 (481.41 it/sec) -training >> step=4012100, episode=669 reward=0.773325 (398.93 it/sec) -training >> step=4012200, episode=669 reward=0.7657734 (402.73 it/sec) -training >> step=4012300, episode=669 reward=0.7765674 (448.26 it/sec) -training >> step=4012400, episode=669 reward=0.7655411 (505.93 it/sec) -training >> step=4012500, episode=669 reward=0.7923419 (458.18 it/sec) -training >> step=4012600, episode=669 reward=0.7706714 (447.61 it/sec) -training >> step=4012700, episode=669 reward=0.7752168 (459.45 it/sec) -training >> step=4012800, episode=669 reward=0.7720578 (505.45 it/sec) -training >> step=4012900, episode=669 reward=0.7668727 (426.14 it/sec) -training >> step=4013000, episode=669 reward=0.7646542 (431.03 it/sec) -training >> step=4013100, episode=669 reward=0.7664551 (479.00 it/sec) -training >> step=4013200, episode=669 reward=0.7638381 (526.67 it/sec) -training >> step=4013300, episode=670 reward=0.774954 (61.36 it/sec) -training >> step=4013400, episode=670 reward=0.7739683 (519.48 it/sec) -training >> step=4013500, episode=670 reward=0.7799681 (417.75 it/sec) -training >> step=4013600, episode=670 reward=0.7740752 (460.50 it/sec) -training >> step=4013700, episode=670 reward=0.780794 (505.57 it/sec) -training >> step=4013800, episode=670 reward=0.7770523 (476.57 it/sec) -training >> step=4013900, episode=670 reward=0.7796577 (495.25 it/sec) -training >> step=4014000, episode=670 reward=0.7727023 (455.37 it/sec) -training >> step=4014100, episode=670 reward=0.7452556 (508.58 it/sec) -training >> step=4014200, episode=670 reward=0.7631528 (484.98 it/sec) -training >> step=4014300, episode=670 reward=0.7778341 (491.05 it/sec) -training >> step=4014400, episode=670 reward=0.7776561 (541.71 it/sec) -training >> step=4014500, episode=670 reward=0.7833021 (464.75 it/sec) -training >> step=4014600, episode=670 reward=0.7692077 (462.13 it/sec) -training >> step=4014700, episode=670 reward=0.7970568 (443.23 it/sec) -training >> step=4014800, episode=670 reward=0.7645829 (427.95 it/sec) -training >> step=4014900, episode=670 reward=0.7787617 (410.46 it/sec) -training >> step=4015000, episode=670 reward=0.7794712 (462.59 it/sec) -training >> step=4015100, episode=670 reward=0.7852044 (452.48 it/sec) -training >> step=4015200, episode=670 reward=0.7583742 (477.56 it/sec) -training >> step=4015300, episode=670 reward=0.7941389 (497.11 it/sec) -training >> step=4015400, episode=670 reward=0.768653 (511.30 it/sec) -training >> step=4015500, episode=670 reward=0.7771919 (500.89 it/sec) -training >> step=4015600, episode=670 reward=0.7638872 (480.64 it/sec) -training >> step=4015700, episode=670 reward=0.78259 (539.32 it/sec) -training >> step=4015800, episode=670 reward=0.7708487 (485.08 it/sec) -training >> step=4015900, episode=670 reward=0.7692609 (491.78 it/sec) -training >> step=4016000, episode=670 reward=0.7870985 (492.09 it/sec) -training >> step=4016100, episode=670 reward=0.7836068 (485.93 it/sec) -training >> step=4016200, episode=670 reward=0.7848389 (434.26 it/sec) -training >> step=4016300, episode=670 reward=0.7702007 (406.22 it/sec) -training >> step=4016400, episode=670 reward=0.7822603 (440.49 it/sec) -training >> step=4016500, episode=670 reward=0.7949012 (444.39 it/sec) -training >> step=4016600, episode=670 reward=0.7583576 (451.92 it/sec) -training >> step=4016700, episode=670 reward=0.7753083 (475.94 it/sec) -training >> step=4016800, episode=670 reward=0.7553804 (468.63 it/sec) -training >> step=4016900, episode=670 reward=0.7833794 (502.19 it/sec) -training >> step=4017000, episode=670 reward=0.7499645 (506.40 it/sec) -training >> step=4017100, episode=670 reward=0.7817771 (487.66 it/sec) -training >> step=4017200, episode=670 reward=0.77769 (509.98 it/sec) -training >> step=4017300, episode=670 reward=0.7770019 (491.32 it/sec) -training >> step=4017400, episode=670 reward=0.8000106 (471.14 it/sec) -training >> step=4017500, episode=670 reward=0.7837367 (504.01 it/sec) -training >> step=4017600, episode=670 reward=0.7903115 (505.82 it/sec) -training >> step=4017700, episode=670 reward=0.7564161 (487.85 it/sec) -training >> step=4017800, episode=670 reward=0.7848112 (477.62 it/sec) -training >> step=4017900, episode=670 reward=0.7794034 (528.22 it/sec) -training >> step=4018000, episode=670 reward=0.7628239 (483.25 it/sec) -training >> step=4018100, episode=670 reward=0.7793455 (507.22 it/sec) -training >> step=4018200, episode=670 reward=0.7911684 (485.37 it/sec) -training >> step=4018300, episode=670 reward=0.7744531 (518.46 it/sec) -training >> step=4018400, episode=670 reward=0.7747496 (508.88 it/sec) -training >> step=4018500, episode=670 reward=0.7724519 (486.37 it/sec) -training >> step=4018600, episode=670 reward=0.7712997 (487.08 it/sec) -training >> step=4018700, episode=670 reward=0.7780327 (411.30 it/sec) -training >> step=4018800, episode=670 reward=0.7815084 (418.65 it/sec) -training >> step=4018900, episode=670 reward=0.7592893 (401.38 it/sec) -training >> step=4019000, episode=670 reward=0.7581329 (386.97 it/sec) -training >> step=4019100, episode=670 reward=0.7759279 (377.35 it/sec) -training >> step=4019200, episode=670 reward=0.7738453 (420.44 it/sec) -training >> step=4019300, episode=671 reward=0.7837481 (49.36 it/sec) -training >> step=4019400, episode=671 reward=0.7857304 (469.50 it/sec) -training >> step=4019500, episode=671 reward=0.8059226 (462.67 it/sec) -training >> step=4019600, episode=671 reward=0.7480106 (410.98 it/sec) -training >> step=4019700, episode=671 reward=0.7981851 (376.05 it/sec) -training >> step=4019800, episode=671 reward=0.7812536 (394.42 it/sec) -training >> step=4019900, episode=671 reward=0.7762667 (402.17 it/sec) -training >> step=4020000, episode=671 reward=0.7790936 (414.15 it/sec) -training >> step=4020100, episode=671 reward=0.7854355 (388.27 it/sec) -training >> step=4020200, episode=671 reward=0.7824484 (430.44 it/sec) -training >> step=4020300, episode=671 reward=0.7664525 (476.49 it/sec) -training >> step=4020400, episode=671 reward=0.7608981 (469.82 it/sec) -training >> step=4020500, episode=671 reward=0.7399622 (435.93 it/sec) -training >> step=4020600, episode=671 reward=0.7764119 (446.14 it/sec) -training >> step=4020700, episode=671 reward=0.7620254 (426.52 it/sec) -training >> step=4020800, episode=671 reward=0.7864409 (461.64 it/sec) -training >> step=4020900, episode=671 reward=0.7631775 (427.79 it/sec) -training >> step=4021000, episode=671 reward=0.7827843 (370.30 it/sec) -training >> step=4021100, episode=671 reward=0.7748919 (403.45 it/sec) -training >> step=4021200, episode=671 reward=0.780506 (451.72 it/sec) -training >> step=4021300, episode=671 reward=0.7683744 (450.19 it/sec) -training >> step=4021400, episode=671 reward=0.7692057 (453.31 it/sec) -training >> step=4021500, episode=671 reward=0.7640507 (450.11 it/sec) -training >> step=4021600, episode=671 reward=0.7755997 (439.61 it/sec) -training >> step=4021700, episode=671 reward=0.7626337 (467.71 it/sec) -training >> step=4021800, episode=671 reward=0.7789966 (459.65 it/sec) -training >> step=4021900, episode=671 reward=0.7843442 (488.13 it/sec) -training >> step=4022000, episode=671 reward=0.7895585 (455.17 it/sec) -training >> step=4022100, episode=671 reward=0.7560902 (451.27 it/sec) -training >> step=4022200, episode=671 reward=0.7784645 (493.16 it/sec) -training >> step=4022300, episode=671 reward=0.7733999 (475.12 it/sec) -training >> step=4022400, episode=671 reward=0.7865391 (451.91 it/sec) -training >> step=4022500, episode=671 reward=0.7845687 (470.51 it/sec) -training >> step=4022600, episode=671 reward=0.7769412 (439.10 it/sec) -training >> step=4022700, episode=671 reward=0.7807906 (432.33 it/sec) -training >> step=4022800, episode=671 reward=0.7696106 (412.12 it/sec) -training >> step=4022900, episode=671 reward=0.7678496 (451.98 it/sec) -training >> step=4023000, episode=671 reward=0.7767367 (446.65 it/sec) -training >> step=4023100, episode=671 reward=0.765389 (448.92 it/sec) -training >> step=4023200, episode=671 reward=0.7700518 (474.54 it/sec) -training >> step=4023300, episode=671 reward=0.7961965 (429.94 it/sec) -training >> step=4023400, episode=671 reward=0.7917711 (389.94 it/sec) -training >> step=4023500, episode=671 reward=0.7775541 (446.42 it/sec) -training >> step=4023600, episode=671 reward=0.7798393 (457.94 it/sec) -training >> step=4023700, episode=671 reward=0.7789955 (459.01 it/sec) -training >> step=4023800, episode=671 reward=0.7845966 (467.89 it/sec) -training >> step=4023900, episode=671 reward=0.7828959 (431.47 it/sec) -training >> step=4024000, episode=671 reward=0.7757828 (495.94 it/sec) -training >> step=4024100, episode=671 reward=0.7785225 (473.78 it/sec) -training >> step=4024200, episode=671 reward=0.7776431 (478.51 it/sec) -training >> step=4024300, episode=671 reward=0.7682589 (425.79 it/sec) -training >> step=4024400, episode=671 reward=0.7810988 (479.38 it/sec) -training >> step=4024500, episode=671 reward=0.7756144 (454.47 it/sec) -training >> step=4024600, episode=671 reward=0.7669032 (486.33 it/sec) -training >> step=4024700, episode=671 reward=0.7774194 (440.98 it/sec) -training >> step=4024800, episode=671 reward=0.7763869 (434.72 it/sec) -training >> step=4024900, episode=671 reward=0.7791852 (459.87 it/sec) -training >> step=4025000, episode=671 reward=0.7924987 (437.27 it/sec) -training >> step=4025100, episode=671 reward=0.7883021 (469.50 it/sec) -training >> step=4025200, episode=671 reward=0.7833399 (461.16 it/sec) -training >> step=4025300, episode=672 reward=0.769949 (86.16 it/sec) -training >> step=4025400, episode=672 reward=0.7769926 (453.21 it/sec) -training >> step=4025500, episode=672 reward=0.7417362 (496.60 it/sec) -training >> step=4025600, episode=672 reward=0.7646047 (465.88 it/sec) -training >> step=4025700, episode=672 reward=0.7755244 (426.13 it/sec) -training >> step=4025800, episode=672 reward=0.7761897 (461.90 it/sec) -training >> step=4025900, episode=672 reward=0.7680736 (394.33 it/sec) -training >> step=4026000, episode=672 reward=0.7895974 (350.13 it/sec) -training >> step=4026100, episode=672 reward=0.7677813 (407.47 it/sec) -training >> step=4026200, episode=672 reward=0.7905856 (422.70 it/sec) -training >> step=4026300, episode=672 reward=0.7816047 (407.35 it/sec) -training >> step=4026400, episode=672 reward=0.7427415 (407.30 it/sec) -training >> step=4026500, episode=672 reward=0.7911335 (413.92 it/sec) -training >> step=4026600, episode=672 reward=0.7857817 (407.40 it/sec) -training >> step=4026700, episode=672 reward=0.7650458 (408.93 it/sec) -training >> step=4026800, episode=672 reward=0.785572 (449.73 it/sec) -training >> step=4026900, episode=672 reward=0.7968692 (414.20 it/sec) -training >> step=4027000, episode=672 reward=0.7820806 (445.03 it/sec) -training >> step=4027100, episode=672 reward=0.7607468 (502.20 it/sec) -training >> step=4027200, episode=672 reward=0.7587071 (464.80 it/sec) -training >> step=4027300, episode=672 reward=0.7773652 (449.73 it/sec) -training >> step=4027400, episode=672 reward=0.7910891 (423.45 it/sec) -training >> step=4027500, episode=672 reward=0.7782384 (480.50 it/sec) -training >> step=4027600, episode=672 reward=0.7653666 (420.78 it/sec) -training >> step=4027700, episode=672 reward=0.7936317 (450.06 it/sec) -training >> step=4027800, episode=672 reward=0.7728978 (466.34 it/sec) -training >> step=4027900, episode=672 reward=0.7874922 (485.21 it/sec) -training >> step=4028000, episode=672 reward=0.7853944 (439.69 it/sec) -training >> step=4028100, episode=672 reward=0.7708403 (486.23 it/sec) -training >> step=4028200, episode=672 reward=0.7703844 (445.14 it/sec) -training >> step=4028300, episode=672 reward=0.7863843 (479.51 it/sec) -training >> step=4028400, episode=672 reward=0.7751978 (443.23 it/sec) -training >> step=4028500, episode=672 reward=0.7787266 (431.75 it/sec) -training >> step=4028600, episode=672 reward=0.7895783 (452.75 it/sec) -training >> step=4028700, episode=672 reward=0.7820757 (455.66 it/sec) -training >> step=4028800, episode=672 reward=0.7732664 (483.98 it/sec) -training >> step=4028900, episode=672 reward=0.7735395 (473.49 it/sec) -training >> step=4029000, episode=672 reward=0.788714 (490.17 it/sec) -training >> step=4029100, episode=672 reward=0.7968732 (470.07 it/sec) -training >> step=4029200, episode=672 reward=0.7864663 (372.20 it/sec) -training >> step=4029300, episode=672 reward=0.7887535 (407.35 it/sec) -training >> step=4029400, episode=672 reward=0.771723 (369.47 it/sec) -training >> step=4029500, episode=672 reward=0.7799571 (379.89 it/sec) -training >> step=4029600, episode=672 reward=0.7744949 (387.83 it/sec) -training >> step=4029700, episode=672 reward=0.7632816 (405.41 it/sec) -training >> step=4029800, episode=672 reward=0.794472 (413.52 it/sec) -training >> step=4029900, episode=672 reward=0.7856472 (458.83 it/sec) -training >> step=4030000, episode=672 reward=0.7785072 (381.98 it/sec) -training >> step=4030100, episode=672 reward=0.7797819 (360.09 it/sec) -training >> step=4030200, episode=672 reward=0.7728231 (419.93 it/sec) -training >> step=4030300, episode=672 reward=0.7798159 (415.85 it/sec) -training >> step=4030400, episode=672 reward=0.7767339 (410.36 it/sec) -training >> step=4030500, episode=672 reward=0.7681053 (432.33 it/sec) -training >> step=4030600, episode=672 reward=0.7861788 (462.04 it/sec) -training >> step=4030700, episode=672 reward=0.7908515 (426.41 it/sec) -training >> step=4030800, episode=672 reward=0.7566872 (431.77 it/sec) -training >> step=4030900, episode=672 reward=0.7742118 (450.75 it/sec) -training >> step=4031000, episode=672 reward=0.7648134 (470.11 it/sec) -training >> step=4031100, episode=672 reward=0.7483913 (434.56 it/sec) -training >> step=4031200, episode=672 reward=0.8009487 (449.31 it/sec) -training >> step=4031300, episode=673 reward=0.7799727 (78.60 it/sec) -training >> step=4031400, episode=673 reward=0.7529491 (470.56 it/sec) -training >> step=4031500, episode=673 reward=0.7748973 (436.49 it/sec) -training >> step=4031600, episode=673 reward=0.7581338 (463.26 it/sec) -training >> step=4031700, episode=673 reward=0.7691358 (438.11 it/sec) -training >> step=4031800, episode=673 reward=0.7700971 (451.06 it/sec) -training >> step=4031900, episode=673 reward=0.7820688 (459.68 it/sec) -training >> step=4032000, episode=673 reward=0.7836171 (463.40 it/sec) -training >> step=4032100, episode=673 reward=0.7725427 (455.42 it/sec) -training >> step=4032200, episode=673 reward=0.7591451 (436.38 it/sec) -training >> step=4032300, episode=673 reward=0.7853117 (471.67 it/sec) -training >> step=4032400, episode=673 reward=0.7863299 (433.16 it/sec) -training >> step=4032500, episode=673 reward=0.7763973 (437.99 it/sec) -training >> step=4032600, episode=673 reward=0.7791086 (480.92 it/sec) -training >> step=4032700, episode=673 reward=0.784116 (442.74 it/sec) -training >> step=4032800, episode=673 reward=0.7599781 (453.11 it/sec) -training >> step=4032900, episode=673 reward=0.8057276 (438.70 it/sec) -training >> step=4033000, episode=673 reward=0.7791844 (400.08 it/sec) -training >> step=4033100, episode=673 reward=0.7730308 (421.93 it/sec) -training >> step=4033200, episode=673 reward=0.7862937 (432.84 it/sec) -training >> step=4033300, episode=673 reward=0.7744955 (441.87 it/sec) -training >> step=4033400, episode=673 reward=0.7604192 (438.71 it/sec) -training >> step=4033500, episode=673 reward=0.7858325 (461.39 it/sec) -training >> step=4033600, episode=673 reward=0.7862139 (425.09 it/sec) -training >> step=4033700, episode=673 reward=0.7650175 (406.71 it/sec) -training >> step=4033800, episode=673 reward=0.7862019 (409.98 it/sec) -training >> step=4033900, episode=673 reward=0.7734482 (421.81 it/sec) -training >> step=4034000, episode=673 reward=0.7576525 (429.90 it/sec) -training >> step=4034100, episode=673 reward=0.7691457 (414.81 it/sec) -training >> step=4034200, episode=673 reward=0.7799417 (422.92 it/sec) -training >> step=4034300, episode=673 reward=0.7844657 (412.71 it/sec) -training >> step=4034400, episode=673 reward=0.7715946 (409.99 it/sec) -training >> step=4034500, episode=673 reward=0.7683464 (409.20 it/sec) -training >> step=4034600, episode=673 reward=0.7861632 (404.23 it/sec) -training >> step=4034700, episode=673 reward=0.7684809 (411.84 it/sec) -training >> step=4034800, episode=673 reward=0.7677869 (410.51 it/sec) -training >> step=4034900, episode=673 reward=0.7685852 (463.84 it/sec) -training >> step=4035000, episode=673 reward=0.7839049 (398.02 it/sec) -training >> step=4035100, episode=673 reward=0.7833575 (384.51 it/sec) -training >> step=4035200, episode=673 reward=0.7838871 (417.13 it/sec) -training >> step=4035300, episode=673 reward=0.7865087 (395.85 it/sec) -training >> step=4035400, episode=673 reward=0.7743315 (405.99 it/sec) -training >> step=4035500, episode=673 reward=0.7713508 (414.02 it/sec) -training >> step=4035600, episode=673 reward=0.7805288 (433.30 it/sec) -training >> step=4035700, episode=673 reward=0.7765434 (448.74 it/sec) -training >> step=4035800, episode=673 reward=0.7637558 (418.93 it/sec) -training >> step=4035900, episode=673 reward=0.7905307 (398.44 it/sec) -training >> step=4036000, episode=673 reward=0.7309663 (420.67 it/sec) -training >> step=4036100, episode=673 reward=0.7594237 (424.87 it/sec) -training >> step=4036200, episode=673 reward=0.7888969 (449.39 it/sec) -training >> step=4036300, episode=673 reward=0.7539074 (431.78 it/sec) -training >> step=4036400, episode=673 reward=0.7771825 (454.20 it/sec) -training >> step=4036500, episode=673 reward=0.7619032 (416.30 it/sec) -training >> step=4036600, episode=673 reward=0.7710456 (436.00 it/sec) -training >> step=4036700, episode=673 reward=0.7664547 (414.91 it/sec) -training >> step=4036800, episode=673 reward=0.7880853 (463.10 it/sec) -training >> step=4036900, episode=673 reward=0.7552885 (436.89 it/sec) -training >> step=4037000, episode=673 reward=0.7605245 (450.80 it/sec) -training >> step=4037100, episode=673 reward=0.7648271 (536.04 it/sec) -training >> step=4037200, episode=673 reward=0.7694607 (466.49 it/sec) -training >> step=4037300, episode=674 reward=0.7968979 (90.60 it/sec) -training >> step=4037400, episode=674 reward=0.7763566 (510.23 it/sec) -training >> step=4037500, episode=674 reward=0.7789587 (503.53 it/sec) -training >> step=4037600, episode=674 reward=0.7585124 (502.46 it/sec) -training >> step=4037700, episode=674 reward=0.7770358 (489.99 it/sec) -training >> step=4037800, episode=674 reward=0.7761976 (508.88 it/sec) -training >> step=4037900, episode=674 reward=0.7862037 (485.76 it/sec) -training >> step=4038000, episode=674 reward=0.7780625 (517.04 it/sec) -training >> step=4038100, episode=674 reward=0.7804043 (490.84 it/sec) -training >> step=4038200, episode=674 reward=0.7743239 (493.37 it/sec) -training >> step=4038300, episode=674 reward=0.7651385 (510.44 it/sec) -training >> step=4038400, episode=674 reward=0.7718685 (503.13 it/sec) -training >> step=4038500, episode=674 reward=0.7690426 (453.02 it/sec) -training >> step=4038600, episode=674 reward=0.7591532 (450.45 it/sec) -training >> step=4038700, episode=674 reward=0.7847263 (498.74 it/sec) -training >> step=4038800, episode=674 reward=0.7948575 (508.33 it/sec) -training >> step=4038900, episode=674 reward=0.7716031 (512.29 it/sec) -training >> step=4039000, episode=674 reward=0.7615124 (507.63 it/sec) -training >> step=4039100, episode=674 reward=0.7461193 (516.48 it/sec) -training >> step=4039200, episode=674 reward=0.7658388 (504.06 it/sec) -training >> step=4039300, episode=674 reward=0.7783595 (503.95 it/sec) -training >> step=4039400, episode=674 reward=0.7853093 (491.65 it/sec) -training >> step=4039500, episode=674 reward=0.7911643 (542.52 it/sec) -training >> step=4039600, episode=674 reward=0.7800094 (449.68 it/sec) -training >> step=4039700, episode=674 reward=0.7768429 (511.86 it/sec) -training >> step=4039800, episode=674 reward=0.7829921 (516.62 it/sec) -training >> step=4039900, episode=674 reward=0.7756575 (492.79 it/sec) -training >> step=4040000, episode=674 reward=0.7733234 (476.88 it/sec) -training >> step=4040100, episode=674 reward=0.7661133 (509.73 it/sec) -training >> step=4040200, episode=674 reward=0.7660604 (513.85 it/sec) -training >> step=4040300, episode=674 reward=0.7831216 (466.09 it/sec) -training >> step=4040400, episode=674 reward=0.7604663 (450.31 it/sec) -training >> step=4040500, episode=674 reward=0.7796003 (526.57 it/sec) -training >> step=4040600, episode=674 reward=0.7586065 (542.16 it/sec) -training >> step=4040700, episode=674 reward=0.7644833 (516.23 it/sec) -training >> step=4040800, episode=674 reward=0.7758929 (529.99 it/sec) -training >> step=4040900, episode=674 reward=0.7949552 (491.44 it/sec) -training >> step=4041000, episode=674 reward=0.7759286 (501.33 it/sec) -training >> step=4041100, episode=674 reward=0.7601537 (507.38 it/sec) -training >> step=4041200, episode=674 reward=0.7620582 (525.44 it/sec) -training >> step=4041300, episode=674 reward=0.7963227 (528.20 it/sec) -training >> step=4041400, episode=674 reward=0.793084 (467.01 it/sec) -training >> step=4041500, episode=674 reward=0.7757475 (517.40 it/sec) -training >> step=4041600, episode=674 reward=0.7759604 (517.06 it/sec) -training >> step=4041700, episode=674 reward=0.7741303 (518.72 it/sec) -training >> step=4041800, episode=674 reward=0.7598321 (521.02 it/sec) -training >> step=4041900, episode=674 reward=0.7657648 (549.26 it/sec) -training >> step=4042000, episode=674 reward=0.7758303 (480.63 it/sec) -training >> step=4042100, episode=674 reward=0.7824822 (530.84 it/sec) -training >> step=4042200, episode=674 reward=0.785345 (501.84 it/sec) -training >> step=4042300, episode=674 reward=0.7565405 (541.12 it/sec) -training >> step=4042400, episode=674 reward=0.7699087 (506.80 it/sec) -training >> step=4042500, episode=674 reward=0.7754689 (485.99 it/sec) -training >> step=4042600, episode=674 reward=0.7584645 (540.08 it/sec) -training >> step=4042700, episode=674 reward=0.7887091 (490.66 it/sec) -training >> step=4042800, episode=674 reward=0.7514302 (489.83 it/sec) -training >> step=4042900, episode=674 reward=0.7900602 (493.28 it/sec) -training >> step=4043000, episode=674 reward=0.7724879 (478.82 it/sec) -training >> step=4043100, episode=674 reward=0.7669376 (497.59 it/sec) -training >> step=4043200, episode=674 reward=0.7868701 (533.28 it/sec) -training >> step=4043300, episode=675 reward=0.7720592 (99.64 it/sec) -training >> step=4043400, episode=675 reward=0.7577172 (492.40 it/sec) -training >> step=4043500, episode=675 reward=0.797585 (519.97 it/sec) -training >> step=4043600, episode=675 reward=0.7846472 (487.76 it/sec) -training >> step=4043700, episode=675 reward=0.7800617 (494.93 it/sec) -training >> step=4043800, episode=675 reward=0.793798 (496.85 it/sec) -training >> step=4043900, episode=675 reward=0.768862 (509.89 it/sec) -training >> step=4044000, episode=675 reward=0.7527826 (534.08 it/sec) -training >> step=4044100, episode=675 reward=0.7827851 (499.37 it/sec) -training >> step=4044200, episode=675 reward=0.7985173 (496.68 it/sec) -training >> step=4044300, episode=675 reward=0.7860715 (524.40 it/sec) -training >> step=4044400, episode=675 reward=0.7736977 (507.44 it/sec) -training >> step=4044500, episode=675 reward=0.7925682 (502.48 it/sec) -training >> step=4044600, episode=675 reward=0.752468 (463.80 it/sec) -training >> step=4044700, episode=675 reward=0.7586196 (463.87 it/sec) -training >> step=4044800, episode=675 reward=0.7958329 (521.02 it/sec) -training >> step=4044900, episode=675 reward=0.7970574 (462.50 it/sec) -training >> step=4045000, episode=675 reward=0.7714111 (491.66 it/sec) -training >> step=4045100, episode=675 reward=0.7906305 (497.94 it/sec) -training >> step=4045200, episode=675 reward=0.7795942 (477.28 it/sec) -training >> step=4045300, episode=675 reward=0.7865273 (549.33 it/sec) -training >> step=4045400, episode=675 reward=0.7929171 (508.30 it/sec) -training >> step=4045500, episode=675 reward=0.7752921 (512.25 it/sec) -training >> step=4045600, episode=675 reward=0.7970661 (521.72 it/sec) -training >> step=4045700, episode=675 reward=0.770341 (468.82 it/sec) -training >> step=4045800, episode=675 reward=0.7661188 (520.16 it/sec) -training >> step=4045900, episode=675 reward=0.7832687 (526.53 it/sec) -training >> step=4046000, episode=675 reward=0.7785299 (534.32 it/sec) -training >> step=4046100, episode=675 reward=0.7689877 (481.20 it/sec) -training >> step=4046200, episode=675 reward=0.7913345 (527.75 it/sec) -training >> step=4046300, episode=675 reward=0.7844942 (502.19 it/sec) -training >> step=4046400, episode=675 reward=0.7817476 (522.41 it/sec) -training >> step=4046500, episode=675 reward=0.7813487 (501.80 it/sec) -training >> step=4046600, episode=675 reward=0.7831632 (510.45 it/sec) -training >> step=4046700, episode=675 reward=0.7884118 (519.71 it/sec) -training >> step=4046800, episode=675 reward=0.7751702 (508.96 it/sec) -training >> step=4046900, episode=675 reward=0.7788067 (527.57 it/sec) -training >> step=4047000, episode=675 reward=0.7972824 (533.23 it/sec) -training >> step=4047100, episode=675 reward=0.7615095 (533.91 it/sec) -training >> step=4047200, episode=675 reward=0.777566 (491.52 it/sec) -training >> step=4047300, episode=675 reward=0.7836363 (498.66 it/sec) -training >> step=4047400, episode=675 reward=0.7936542 (553.28 it/sec) -training >> step=4047500, episode=675 reward=0.7837795 (525.09 it/sec) -training >> step=4047600, episode=675 reward=0.7554308 (494.56 it/sec) -training >> step=4047700, episode=675 reward=0.753697 (530.84 it/sec) -training >> step=4047800, episode=675 reward=0.7600395 (506.58 it/sec) -training >> step=4047900, episode=675 reward=0.7856412 (501.59 it/sec) -training >> step=4048000, episode=675 reward=0.7615699 (546.75 it/sec) -training >> step=4048100, episode=675 reward=0.7756068 (495.12 it/sec) -training >> step=4048200, episode=675 reward=0.7628159 (489.74 it/sec) -training >> step=4048300, episode=675 reward=0.7967194 (514.10 it/sec) -training >> step=4048400, episode=675 reward=0.7750222 (416.82 it/sec) -training >> step=4048500, episode=675 reward=0.7760011 (517.82 it/sec) -training >> step=4048600, episode=675 reward=0.7655752 (517.97 it/sec) -training >> step=4048700, episode=675 reward=0.7765527 (496.37 it/sec) -training >> step=4048800, episode=675 reward=0.7561151 (510.91 it/sec) -training >> step=4048900, episode=675 reward=0.7777045 (496.39 it/sec) -training >> step=4049000, episode=675 reward=0.7750956 (497.62 it/sec) -training >> step=4049100, episode=675 reward=0.7891022 (503.82 it/sec) -training >> step=4049200, episode=675 reward=0.7722372 (500.57 it/sec) -training >> step=4049300, episode=676 reward=0.7996409 (103.63 it/sec) -training >> step=4049400, episode=676 reward=0.7684091 (462.30 it/sec) -training >> step=4049500, episode=676 reward=0.766833 (473.79 it/sec) -training >> step=4049600, episode=676 reward=0.7686131 (536.52 it/sec) -training >> step=4049700, episode=676 reward=0.7818571 (535.08 it/sec) -training >> step=4049800, episode=676 reward=0.78414 (515.05 it/sec) -training >> step=4049900, episode=676 reward=0.7569114 (482.22 it/sec) -training >> step=4050000, episode=676 reward=0.7647313 (474.59 it/sec) -training >> step=4050100, episode=676 reward=0.7871314 (491.47 it/sec) -training >> step=4050200, episode=676 reward=0.7823386 (511.93 it/sec) -training >> step=4050300, episode=676 reward=0.7744572 (465.11 it/sec) -training >> step=4050400, episode=676 reward=0.7932702 (492.53 it/sec) -training >> step=4050500, episode=676 reward=0.7778163 (496.01 it/sec) -training >> step=4050600, episode=676 reward=0.7714348 (520.10 it/sec) -training >> step=4050700, episode=676 reward=0.761098 (535.90 it/sec) -training >> step=4050800, episode=676 reward=0.7917194 (481.84 it/sec) -training >> step=4050900, episode=676 reward=0.7668902 (493.68 it/sec) -training >> step=4051000, episode=676 reward=0.8019216 (501.90 it/sec) -training >> step=4051100, episode=676 reward=0.7905617 (538.20 it/sec) -training >> step=4051200, episode=676 reward=0.7828588 (524.33 it/sec) -training >> step=4051300, episode=676 reward=0.7761047 (481.67 it/sec) -training >> step=4051400, episode=676 reward=0.7951308 (532.62 it/sec) -training >> step=4051500, episode=676 reward=0.7797229 (462.28 it/sec) -training >> step=4051600, episode=676 reward=0.7700425 (514.49 it/sec) -training >> step=4051700, episode=676 reward=0.7972346 (498.34 it/sec) -training >> step=4051800, episode=676 reward=0.7715256 (525.09 it/sec) -training >> step=4051900, episode=676 reward=0.789302 (499.49 it/sec) -training >> step=4052000, episode=676 reward=0.7654532 (527.16 it/sec) -training >> step=4052100, episode=676 reward=0.7758468 (494.49 it/sec) -training >> step=4052200, episode=676 reward=0.7798805 (560.00 it/sec) -training >> step=4052300, episode=676 reward=0.7830577 (433.38 it/sec) -training >> step=4052400, episode=676 reward=0.769362 (497.49 it/sec) -training >> step=4052500, episode=676 reward=0.8000354 (497.16 it/sec) -training >> step=4052600, episode=676 reward=0.7783395 (490.27 it/sec) -training >> step=4052700, episode=676 reward=0.784052 (538.44 it/sec) -training >> step=4052800, episode=676 reward=0.7708834 (520.41 it/sec) -training >> step=4052900, episode=676 reward=0.7809696 (537.58 it/sec) -training >> step=4053000, episode=676 reward=0.8051183 (499.19 it/sec) -training >> step=4053100, episode=676 reward=0.7916952 (507.51 it/sec) -training >> step=4053200, episode=676 reward=0.7881234 (512.35 it/sec) -training >> step=4053300, episode=676 reward=0.7881683 (521.31 it/sec) -training >> step=4053400, episode=676 reward=0.7949217 (503.14 it/sec) -training >> step=4053500, episode=676 reward=0.769532 (504.34 it/sec) -training >> step=4053600, episode=676 reward=0.7679902 (554.77 it/sec) -training >> step=4053700, episode=676 reward=0.749352 (508.52 it/sec) -training >> step=4053800, episode=676 reward=0.7634293 (510.39 it/sec) -training >> step=4053900, episode=676 reward=0.7783196 (527.44 it/sec) -training >> step=4054000, episode=676 reward=0.7685627 (518.54 it/sec) -training >> step=4054100, episode=676 reward=0.7874848 (450.42 it/sec) -training >> step=4054200, episode=676 reward=0.7690684 (495.59 it/sec) -training >> step=4054300, episode=676 reward=0.7786044 (514.42 it/sec) -training >> step=4054400, episode=676 reward=0.7605849 (451.67 it/sec) -training >> step=4054500, episode=676 reward=0.7653987 (510.36 it/sec) -training >> step=4054600, episode=676 reward=0.7910637 (501.35 it/sec) -training >> step=4054700, episode=676 reward=0.7587423 (523.15 it/sec) -training >> step=4054800, episode=676 reward=0.7641841 (484.81 it/sec) -training >> step=4054900, episode=676 reward=0.7872916 (449.23 it/sec) -training >> step=4055000, episode=676 reward=0.7628564 (506.05 it/sec) -training >> step=4055100, episode=676 reward=0.7613746 (507.98 it/sec) -training >> step=4055200, episode=676 reward=0.776712 (504.12 it/sec) -training >> step=4055300, episode=677 reward=0.7738964 (114.71 it/sec) -training >> step=4055400, episode=677 reward=0.7654545 (532.73 it/sec) -training >> step=4055500, episode=677 reward=0.7592518 (508.32 it/sec) -training >> step=4055600, episode=677 reward=0.7691967 (503.62 it/sec) -training >> step=4055700, episode=677 reward=0.7752488 (510.17 it/sec) -training >> step=4055800, episode=677 reward=0.7575334 (564.19 it/sec) -training >> step=4055900, episode=677 reward=0.7545095 (495.69 it/sec) -training >> step=4056000, episode=677 reward=0.784403 (497.82 it/sec) -training >> step=4056100, episode=677 reward=0.7831478 (549.07 it/sec) -training >> step=4056200, episode=677 reward=0.7735583 (468.10 it/sec) -training >> step=4056300, episode=677 reward=0.7846978 (519.92 it/sec) -training >> step=4056400, episode=677 reward=0.7897847 (482.23 it/sec) -training >> step=4056500, episode=677 reward=0.7823737 (524.95 it/sec) -training >> step=4056600, episode=677 reward=0.7759626 (498.95 it/sec) -training >> step=4056700, episode=677 reward=0.7726307 (504.13 it/sec) -training >> step=4056800, episode=677 reward=0.7935008 (531.93 it/sec) -training >> step=4056900, episode=677 reward=0.7330835 (504.55 it/sec) -training >> step=4057000, episode=677 reward=0.7719943 (499.64 it/sec) -training >> step=4057100, episode=677 reward=0.7883198 (411.69 it/sec) -training >> step=4057200, episode=677 reward=0.7654493 (502.61 it/sec) -training >> step=4057300, episode=677 reward=0.7958545 (516.33 it/sec) -training >> step=4057400, episode=677 reward=0.7662101 (481.58 it/sec) -training >> step=4057500, episode=677 reward=0.7667716 (510.32 it/sec) -training >> step=4057600, episode=677 reward=0.767038 (537.76 it/sec) -training >> step=4057700, episode=677 reward=0.7959101 (503.41 it/sec) -training >> step=4057800, episode=677 reward=0.7587795 (523.13 it/sec) -training >> step=4057900, episode=677 reward=0.7810804 (521.59 it/sec) -training >> step=4058000, episode=677 reward=0.7560406 (502.33 it/sec) -training >> step=4058100, episode=677 reward=0.7748123 (542.41 it/sec) -training >> step=4058200, episode=677 reward=0.7788157 (486.78 it/sec) -training >> step=4058300, episode=677 reward=0.7846264 (491.16 it/sec) -training >> step=4058400, episode=677 reward=0.7797272 (494.07 it/sec) -training >> step=4058500, episode=677 reward=0.793318 (482.79 it/sec) -training >> step=4058600, episode=677 reward=0.7953331 (510.50 it/sec) -training >> step=4058700, episode=677 reward=0.7817574 (481.44 it/sec) -training >> step=4058800, episode=677 reward=0.7682557 (503.91 it/sec) -training >> step=4058900, episode=677 reward=0.7943387 (490.55 it/sec) -training >> step=4059000, episode=677 reward=0.7661544 (526.30 it/sec) -training >> step=4059100, episode=677 reward=0.7715706 (474.46 it/sec) -training >> step=4059200, episode=677 reward=0.7798276 (474.84 it/sec) -training >> step=4059300, episode=677 reward=0.7670526 (492.93 it/sec) -training >> step=4059400, episode=677 reward=0.7797056 (528.17 it/sec) -training >> step=4059500, episode=677 reward=0.7963247 (517.76 it/sec) -training >> step=4059600, episode=677 reward=0.7762209 (505.76 it/sec) -training >> step=4059700, episode=677 reward=0.7691042 (515.94 it/sec) -training >> step=4059800, episode=677 reward=0.7585614 (485.62 it/sec) -training >> step=4059900, episode=677 reward=0.7888278 (520.78 it/sec) -training >> step=4060000, episode=677 reward=0.7996563 (501.12 it/sec) -training >> step=4060100, episode=677 reward=0.7783085 (533.63 it/sec) -training >> step=4060200, episode=677 reward=0.7779108 (519.23 it/sec) -training >> step=4060300, episode=677 reward=0.7485206 (439.27 it/sec) -training >> step=4060400, episode=677 reward=0.7833226 (572.33 it/sec) -training >> step=4060500, episode=677 reward=0.769962 (496.32 it/sec) -training >> step=4060600, episode=677 reward=0.761547 (516.06 it/sec) -training >> step=4060700, episode=677 reward=0.7831744 (512.77 it/sec) -training >> step=4060800, episode=677 reward=0.7538565 (470.66 it/sec) -training >> step=4060900, episode=677 reward=0.7835459 (508.89 it/sec) -training >> step=4061000, episode=677 reward=0.7906206 (472.25 it/sec) -training >> step=4061100, episode=677 reward=0.7782623 (497.29 it/sec) -training >> step=4061200, episode=677 reward=0.7892001 (547.22 it/sec) -training >> step=4061300, episode=678 reward=0.7713734 (97.44 it/sec) -training >> step=4061400, episode=678 reward=0.7740369 (362.00 it/sec) -training >> step=4061500, episode=678 reward=0.7832912 (496.66 it/sec) -training >> step=4061600, episode=678 reward=0.7685964 (461.24 it/sec) -training >> step=4061700, episode=678 reward=0.791245 (499.81 it/sec) -training >> step=4061800, episode=678 reward=0.7750831 (541.70 it/sec) -training >> step=4061900, episode=678 reward=0.7926636 (486.97 it/sec) -training >> step=4062000, episode=678 reward=0.767231 (518.55 it/sec) -training >> step=4062100, episode=678 reward=0.7560578 (505.57 it/sec) -training >> step=4062200, episode=678 reward=0.7801083 (472.32 it/sec) -training >> step=4062300, episode=678 reward=0.7824299 (525.08 it/sec) -training >> step=4062400, episode=678 reward=0.757035 (459.01 it/sec) -training >> step=4062500, episode=678 reward=0.772506 (549.71 it/sec) -training >> step=4062600, episode=678 reward=0.7671044 (509.42 it/sec) -training >> step=4062700, episode=678 reward=0.7789571 (488.47 it/sec) -training >> step=4062800, episode=678 reward=0.7911157 (528.30 it/sec) -training >> step=4062900, episode=678 reward=0.7707234 (514.17 it/sec) -training >> step=4063000, episode=678 reward=0.7821604 (505.94 it/sec) -training >> step=4063100, episode=678 reward=0.7673424 (473.52 it/sec) -training >> step=4063200, episode=678 reward=0.7623979 (490.60 it/sec) -training >> step=4063300, episode=678 reward=0.7586228 (482.07 it/sec) -training >> step=4063400, episode=678 reward=0.7730345 (529.72 it/sec) -training >> step=4063500, episode=678 reward=0.7676497 (482.48 it/sec) -training >> step=4063600, episode=678 reward=0.7719595 (527.74 it/sec) -training >> step=4063700, episode=678 reward=0.7728621 (470.69 it/sec) -training >> step=4063800, episode=678 reward=0.7705652 (483.02 it/sec) -training >> step=4063900, episode=678 reward=0.7783322 (504.80 it/sec) -training >> step=4064000, episode=678 reward=0.7785623 (508.25 it/sec) -training >> step=4064100, episode=678 reward=0.7869638 (506.14 it/sec) -training >> step=4064200, episode=678 reward=0.7749165 (514.28 it/sec) -training >> step=4064300, episode=678 reward=0.7693039 (495.38 it/sec) -training >> step=4064400, episode=678 reward=0.7639587 (519.45 it/sec) -training >> step=4064500, episode=678 reward=0.7834503 (467.44 it/sec) -training >> step=4064600, episode=678 reward=0.7866785 (506.44 it/sec) -training >> step=4064700, episode=678 reward=0.7740303 (484.54 it/sec) -training >> step=4064800, episode=678 reward=0.7849514 (457.30 it/sec) -training >> step=4064900, episode=678 reward=0.7679748 (511.80 it/sec) -training >> step=4065000, episode=678 reward=0.7881213 (520.13 it/sec) -training >> step=4065100, episode=678 reward=0.7986383 (482.99 it/sec) -training >> step=4065200, episode=678 reward=0.7931574 (505.45 it/sec) -training >> step=4065300, episode=678 reward=0.7716406 (510.63 it/sec) -training >> step=4065400, episode=678 reward=0.7758111 (561.36 it/sec) -training >> step=4065500, episode=678 reward=0.7822088 (492.01 it/sec) -training >> step=4065600, episode=678 reward=0.7451067 (500.76 it/sec) -training >> step=4065700, episode=678 reward=0.7510595 (514.22 it/sec) -training >> step=4065800, episode=678 reward=0.7801927 (470.45 it/sec) -training >> step=4065900, episode=678 reward=0.7610234 (515.39 it/sec) -training >> step=4066000, episode=678 reward=0.7843226 (512.63 it/sec) -training >> step=4066100, episode=678 reward=0.788483 (533.14 it/sec) -training >> step=4066200, episode=678 reward=0.7738711 (499.72 it/sec) -training >> step=4066300, episode=678 reward=0.7969266 (476.73 it/sec) -training >> step=4066400, episode=678 reward=0.7750331 (510.55 it/sec) -training >> step=4066500, episode=678 reward=0.8181204 (560.00 it/sec) -training >> step=4066600, episode=678 reward=0.7518113 (501.39 it/sec) -training >> step=4066700, episode=678 reward=0.7595182 (473.14 it/sec) -training >> step=4066800, episode=678 reward=0.7769639 (550.31 it/sec) -training >> step=4066900, episode=678 reward=0.7611548 (516.21 it/sec) -training >> step=4067000, episode=678 reward=0.7736067 (522.96 it/sec) -training >> step=4067100, episode=678 reward=0.7644845 (482.57 it/sec) -training >> step=4067200, episode=678 reward=0.7702731 (503.23 it/sec) -training >> step=4067300, episode=679 reward=0.7699947 (116.91 it/sec) -training >> step=4067400, episode=679 reward=0.7584282 (509.85 it/sec) -training >> step=4067500, episode=679 reward=0.744574 (361.19 it/sec) -training >> step=4067600, episode=679 reward=0.7788851 (546.89 it/sec) -training >> step=4067700, episode=679 reward=0.78835 (515.22 it/sec) -training >> step=4067800, episode=679 reward=0.7677745 (471.12 it/sec) -training >> step=4067900, episode=679 reward=0.7795817 (475.77 it/sec) -training >> step=4068000, episode=679 reward=0.7826826 (531.04 it/sec) -training >> step=4068100, episode=679 reward=0.7942698 (502.59 it/sec) -training >> step=4068200, episode=679 reward=0.7769276 (515.75 it/sec) -training >> step=4068300, episode=679 reward=0.7761679 (518.05 it/sec) -training >> step=4068400, episode=679 reward=0.7750824 (450.48 it/sec) -training >> step=4068500, episode=679 reward=0.792173 (494.78 it/sec) -training >> step=4068600, episode=679 reward=0.7737711 (485.29 it/sec) -training >> step=4068700, episode=679 reward=0.7759663 (499.28 it/sec) -training >> step=4068800, episode=679 reward=0.7826728 (515.49 it/sec) -training >> step=4068900, episode=679 reward=0.7854243 (509.44 it/sec) -training >> step=4069000, episode=679 reward=0.7732501 (530.25 it/sec) -training >> step=4069100, episode=679 reward=0.7729108 (507.63 it/sec) -training >> step=4069200, episode=679 reward=0.7800781 (502.52 it/sec) -training >> step=4069300, episode=679 reward=0.7664596 (525.07 it/sec) -training >> step=4069400, episode=679 reward=0.7664667 (518.03 it/sec) -training >> step=4069500, episode=679 reward=0.744846 (462.99 it/sec) -training >> step=4069600, episode=679 reward=0.7808779 (457.55 it/sec) -training >> step=4069700, episode=679 reward=0.7613978 (545.84 it/sec) -training >> step=4069800, episode=679 reward=0.7782746 (501.62 it/sec) -training >> step=4069900, episode=679 reward=0.7828895 (470.31 it/sec) -training >> step=4070000, episode=679 reward=0.7702028 (516.59 it/sec) -training >> step=4070100, episode=679 reward=0.7780256 (516.17 it/sec) -training >> step=4070200, episode=679 reward=0.755652 (487.20 it/sec) -training >> step=4070300, episode=679 reward=0.7812474 (500.03 it/sec) -training >> step=4070400, episode=679 reward=0.7829539 (475.28 it/sec) -training >> step=4070500, episode=679 reward=0.7756895 (513.21 it/sec) -training >> step=4070600, episode=679 reward=0.7835116 (424.07 it/sec) -training >> step=4070700, episode=679 reward=0.7883011 (421.10 it/sec) -training >> step=4070800, episode=679 reward=0.7851183 (392.13 it/sec) -training >> step=4070900, episode=679 reward=0.769229 (452.86 it/sec) -training >> step=4071000, episode=679 reward=0.7680632 (448.92 it/sec) -training >> step=4071100, episode=679 reward=0.7692197 (475.96 it/sec) -training >> step=4071200, episode=679 reward=0.7656949 (481.57 it/sec) -training >> step=4071300, episode=679 reward=0.7786623 (458.55 it/sec) -training >> step=4071400, episode=679 reward=0.7908694 (415.40 it/sec) -training >> step=4071500, episode=679 reward=0.7514837 (471.06 it/sec) -training >> step=4071600, episode=679 reward=0.7715511 (483.81 it/sec) -training >> step=4071700, episode=679 reward=0.7755865 (490.19 it/sec) -training >> step=4071800, episode=679 reward=0.7827234 (460.82 it/sec) -training >> step=4071900, episode=679 reward=0.7838644 (507.92 it/sec) -training >> step=4072000, episode=679 reward=0.7687426 (476.64 it/sec) -training >> step=4072100, episode=679 reward=0.7862657 (531.02 it/sec) -training >> step=4072200, episode=679 reward=0.7812447 (445.40 it/sec) -training >> step=4072300, episode=679 reward=0.7416925 (508.55 it/sec) -training >> step=4072400, episode=679 reward=0.7564142 (480.10 it/sec) -training >> step=4072500, episode=679 reward=0.796905 (475.05 it/sec) -training >> step=4072600, episode=679 reward=0.7712286 (518.99 it/sec) -training >> step=4072700, episode=679 reward=0.786681 (537.84 it/sec) -training >> step=4072800, episode=679 reward=0.7777701 (495.68 it/sec) -training >> step=4072900, episode=679 reward=0.7648033 (538.17 it/sec) -training >> step=4073000, episode=679 reward=0.7628796 (544.82 it/sec) -training >> step=4073100, episode=679 reward=0.780292 (528.52 it/sec) -training >> step=4073200, episode=679 reward=0.7630224 (535.81 it/sec) -training >> step=4073300, episode=680 reward=0.7792811 (100.02 it/sec) -training >> step=4073400, episode=680 reward=0.7812539 (475.72 it/sec) -training >> step=4073500, episode=680 reward=0.7534792 (497.62 it/sec) -training >> step=4073600, episode=680 reward=0.7719386 (544.87 it/sec) -training >> step=4073700, episode=680 reward=0.7747265 (557.62 it/sec) -training >> step=4073800, episode=680 reward=0.7881174 (524.45 it/sec) -training >> step=4073900, episode=680 reward=0.7589234 (380.38 it/sec) -training >> step=4074000, episode=680 reward=0.7792441 (513.39 it/sec) -training >> step=4074100, episode=680 reward=0.7774088 (526.45 it/sec) -training >> step=4074200, episode=680 reward=0.7747403 (522.33 it/sec) -training >> step=4074300, episode=680 reward=0.785512 (557.98 it/sec) -training >> step=4074400, episode=680 reward=0.7621711 (512.91 it/sec) -training >> step=4074500, episode=680 reward=0.7836331 (477.51 it/sec) -training >> step=4074600, episode=680 reward=0.7832571 (552.88 it/sec) -training >> step=4074700, episode=680 reward=0.790919 (541.15 it/sec) -training >> step=4074800, episode=680 reward=0.7775068 (534.02 it/sec) -training >> step=4074900, episode=680 reward=0.7916711 (520.11 it/sec) -training >> step=4075000, episode=680 reward=0.7713041 (475.99 it/sec) -training >> step=4075100, episode=680 reward=0.7817745 (499.75 it/sec) -training >> step=4075200, episode=680 reward=0.7942263 (513.59 it/sec) -training >> step=4075300, episode=680 reward=0.7734877 (530.64 it/sec) -training >> step=4075400, episode=680 reward=0.786454 (569.20 it/sec) -training >> step=4075500, episode=680 reward=0.7798131 (521.99 it/sec) -training >> step=4075600, episode=680 reward=0.779452 (478.00 it/sec) -training >> step=4075700, episode=680 reward=0.7805247 (559.01 it/sec) -training >> step=4075800, episode=680 reward=0.7930463 (516.43 it/sec) -training >> step=4075900, episode=680 reward=0.7693313 (524.03 it/sec) -training >> step=4076000, episode=680 reward=0.7620293 (530.02 it/sec) -training >> step=4076100, episode=680 reward=0.7827609 (512.28 it/sec) -training >> step=4076200, episode=680 reward=0.7679883 (513.31 it/sec) -training >> step=4076300, episode=680 reward=0.7947405 (517.63 it/sec) -training >> step=4076400, episode=680 reward=0.7659122 (514.97 it/sec) -training >> step=4076500, episode=680 reward=0.7903083 (564.07 it/sec) -training >> step=4076600, episode=680 reward=0.7826782 (518.55 it/sec) -training >> step=4076700, episode=680 reward=0.781024 (513.74 it/sec) -training >> step=4076800, episode=680 reward=0.792599 (534.51 it/sec) -training >> step=4076900, episode=680 reward=0.7869007 (542.81 it/sec) -training >> step=4077000, episode=680 reward=0.7644674 (548.67 it/sec) -training >> step=4077100, episode=680 reward=0.7644045 (499.06 it/sec) -training >> step=4077200, episode=680 reward=0.7834536 (522.26 it/sec) -training >> step=4077300, episode=680 reward=0.795884 (560.79 it/sec) -training >> step=4077400, episode=680 reward=0.7891585 (469.18 it/sec) -training >> step=4077500, episode=680 reward=0.7742175 (475.17 it/sec) -training >> step=4077600, episode=680 reward=0.7861611 (547.40 it/sec) -training >> step=4077700, episode=680 reward=0.7887929 (472.30 it/sec) -training >> step=4077800, episode=680 reward=0.7705031 (551.40 it/sec) -training >> step=4077900, episode=680 reward=0.7664831 (543.54 it/sec) -training >> step=4078000, episode=680 reward=0.7733932 (462.29 it/sec) -training >> step=4078100, episode=680 reward=0.7710474 (488.03 it/sec) -training >> step=4078200, episode=680 reward=0.7702448 (486.95 it/sec) -training >> step=4078300, episode=680 reward=0.7676046 (516.71 it/sec) -training >> step=4078400, episode=680 reward=0.76186 (543.30 it/sec) -training >> step=4078500, episode=680 reward=0.7662928 (542.89 it/sec) -training >> step=4078600, episode=680 reward=0.7790614 (531.96 it/sec) -training >> step=4078700, episode=680 reward=0.7833332 (496.19 it/sec) -training >> step=4078800, episode=680 reward=0.7722353 (517.64 it/sec) -training >> step=4078900, episode=680 reward=0.7571252 (542.97 it/sec) -training >> step=4079000, episode=680 reward=0.7744742 (529.47 it/sec) -training >> step=4079100, episode=680 reward=0.7752675 (530.14 it/sec) -training >> step=4079200, episode=680 reward=0.7639087 (497.56 it/sec) -training >> step=4079300, episode=681 reward=0.7790219 (114.01 it/sec) -training >> step=4079400, episode=681 reward=0.7803514 (492.60 it/sec) -training >> step=4079500, episode=681 reward=0.7568825 (496.04 it/sec) -training >> step=4079600, episode=681 reward=0.7725596 (505.19 it/sec) -training >> step=4079700, episode=681 reward=0.7871577 (504.81 it/sec) -training >> step=4079800, episode=681 reward=0.7662374 (530.35 it/sec) -training >> step=4079900, episode=681 reward=0.7865078 (358.38 it/sec) -training >> step=4080000, episode=681 reward=0.767158 (508.74 it/sec) -training >> step=4080100, episode=681 reward=0.7837767 (519.54 it/sec) -training >> step=4080200, episode=681 reward=0.7708148 (487.70 it/sec) -training >> step=4080300, episode=681 reward=0.7663195 (517.64 it/sec) -training >> step=4080400, episode=681 reward=0.7652337 (511.98 it/sec) -training >> step=4080500, episode=681 reward=0.7847403 (510.51 it/sec) -training >> step=4080600, episode=681 reward=0.784687 (503.07 it/sec) -training >> step=4080700, episode=681 reward=0.770035 (468.35 it/sec) -training >> step=4080800, episode=681 reward=0.7744498 (532.67 it/sec) -training >> step=4080900, episode=681 reward=0.7670909 (467.15 it/sec) -training >> step=4081000, episode=681 reward=0.768249 (478.49 it/sec) -training >> step=4081100, episode=681 reward=0.7585513 (514.11 it/sec) -training >> step=4081200, episode=681 reward=0.7669092 (529.31 it/sec) -training >> step=4081300, episode=681 reward=0.7760901 (534.82 it/sec) -training >> step=4081400, episode=681 reward=0.7702612 (482.02 it/sec) -training >> step=4081500, episode=681 reward=0.7708688 (494.86 it/sec) -training >> step=4081600, episode=681 reward=0.7833639 (483.13 it/sec) -training >> step=4081700, episode=681 reward=0.7955793 (465.23 it/sec) -training >> step=4081800, episode=681 reward=0.7871474 (492.16 it/sec) -training >> step=4081900, episode=681 reward=0.7517481 (486.63 it/sec) -training >> step=4082000, episode=681 reward=0.7649541 (476.21 it/sec) -training >> step=4082100, episode=681 reward=0.7910391 (498.61 it/sec) -training >> step=4082200, episode=681 reward=0.7806072 (463.98 it/sec) -training >> step=4082300, episode=681 reward=0.7725034 (450.58 it/sec) -training >> step=4082400, episode=681 reward=0.7805054 (417.80 it/sec) -training >> step=4082500, episode=681 reward=0.7602594 (448.23 it/sec) -training >> step=4082600, episode=681 reward=0.7863542 (468.75 it/sec) -training >> step=4082700, episode=681 reward=0.7730861 (380.50 it/sec) -training >> step=4082800, episode=681 reward=0.766205 (468.28 it/sec) -training >> step=4082900, episode=681 reward=0.7752087 (431.13 it/sec) -training >> step=4083000, episode=681 reward=0.7738457 (510.45 it/sec) -training >> step=4083100, episode=681 reward=0.7841726 (521.57 it/sec) -training >> step=4083200, episode=681 reward=0.7842221 (506.35 it/sec) -training >> step=4083300, episode=681 reward=0.7776079 (542.25 it/sec) -training >> step=4083400, episode=681 reward=0.7733708 (506.80 it/sec) -training >> step=4083500, episode=681 reward=0.7723699 (496.25 it/sec) -training >> step=4083600, episode=681 reward=0.7656717 (508.65 it/sec) -training >> step=4083700, episode=681 reward=0.7697654 (504.97 it/sec) -training >> step=4083800, episode=681 reward=0.7760177 (488.88 it/sec) -training >> step=4083900, episode=681 reward=0.7631047 (453.34 it/sec) -training >> step=4084000, episode=681 reward=0.7781911 (484.94 it/sec) -training >> step=4084100, episode=681 reward=0.772064 (442.92 it/sec) -training >> step=4084200, episode=681 reward=0.7792145 (457.78 it/sec) -training >> step=4084300, episode=681 reward=0.7865245 (442.62 it/sec) -training >> step=4084400, episode=681 reward=0.782254 (488.80 it/sec) -training >> step=4084500, episode=681 reward=0.774093 (516.47 it/sec) -training >> step=4084600, episode=681 reward=0.7807454 (550.30 it/sec) -training >> step=4084700, episode=681 reward=0.7746265 (515.06 it/sec) -training >> step=4084800, episode=681 reward=0.7899104 (575.17 it/sec) -training >> step=4084900, episode=681 reward=0.7738013 (507.72 it/sec) -training >> step=4085000, episode=681 reward=0.7815917 (531.11 it/sec) -training >> step=4085100, episode=681 reward=0.772265 (556.66 it/sec) -training >> step=4085200, episode=681 reward=0.774936 (531.88 it/sec) -training >> step=4085300, episode=682 reward=0.7818421 (105.39 it/sec) -training >> step=4085400, episode=682 reward=0.7771382 (498.02 it/sec) -training >> step=4085500, episode=682 reward=0.7559189 (543.37 it/sec) -training >> step=4085600, episode=682 reward=0.7795519 (519.02 it/sec) -training >> step=4085700, episode=682 reward=0.7653452 (523.72 it/sec) -training >> step=4085800, episode=682 reward=0.8001032 (556.82 it/sec) -training >> step=4085900, episode=682 reward=0.7699919 (547.86 it/sec) -training >> step=4086000, episode=682 reward=0.7761234 (517.41 it/sec) -training >> step=4086100, episode=682 reward=0.7480035 (462.03 it/sec) -training >> step=4086200, episode=682 reward=0.7834198 (395.81 it/sec) -training >> step=4086300, episode=682 reward=0.7626849 (522.80 it/sec) -training >> step=4086400, episode=682 reward=0.7980068 (540.41 it/sec) -training >> step=4086500, episode=682 reward=0.7736567 (524.39 it/sec) -training >> step=4086600, episode=682 reward=0.7894041 (558.95 it/sec) -training >> step=4086700, episode=682 reward=0.7714332 (500.48 it/sec) -training >> step=4086800, episode=682 reward=0.757645 (540.10 it/sec) -training >> step=4086900, episode=682 reward=0.7761965 (560.92 it/sec) -training >> step=4087000, episode=682 reward=0.7718358 (544.76 it/sec) -training >> step=4087100, episode=682 reward=0.7770774 (542.25 it/sec) -training >> step=4087200, episode=682 reward=0.7621928 (517.41 it/sec) -training >> step=4087300, episode=682 reward=0.7802342 (544.81 it/sec) -training >> step=4087400, episode=682 reward=0.770104 (484.38 it/sec) -training >> step=4087500, episode=682 reward=0.7708238 (514.28 it/sec) -training >> step=4087600, episode=682 reward=0.7725002 (560.99 it/sec) -training >> step=4087700, episode=682 reward=0.7693221 (530.80 it/sec) -training >> step=4087800, episode=682 reward=0.7700602 (515.00 it/sec) -training >> step=4087900, episode=682 reward=0.7826188 (452.71 it/sec) -training >> step=4088000, episode=682 reward=0.7770595 (552.70 it/sec) -training >> step=4088100, episode=682 reward=0.75862 (517.45 it/sec) -training >> step=4088200, episode=682 reward=0.7743727 (451.56 it/sec) -training >> step=4088300, episode=682 reward=0.7666488 (507.72 it/sec) -training >> step=4088400, episode=682 reward=0.7971877 (556.30 it/sec) -training >> step=4088500, episode=682 reward=0.7861513 (549.00 it/sec) -training >> step=4088600, episode=682 reward=0.7642928 (537.28 it/sec) -training >> step=4088700, episode=682 reward=0.7773981 (565.65 it/sec) -training >> step=4088800, episode=682 reward=0.7724146 (520.60 it/sec) -training >> step=4088900, episode=682 reward=0.7815542 (506.16 it/sec) -training >> step=4089000, episode=682 reward=0.7797781 (527.45 it/sec) -training >> step=4089100, episode=682 reward=0.7728179 (550.39 it/sec) -training >> step=4089200, episode=682 reward=0.7663929 (522.12 it/sec) -training >> step=4089300, episode=682 reward=0.7964418 (483.39 it/sec) -training >> step=4089400, episode=682 reward=0.780801 (487.56 it/sec) -training >> step=4089500, episode=682 reward=0.7634407 (510.03 it/sec) -training >> step=4089600, episode=682 reward=0.7809517 (527.19 it/sec) -training >> step=4089700, episode=682 reward=0.7562256 (507.84 it/sec) -training >> step=4089800, episode=682 reward=0.7733111 (505.40 it/sec) -training >> step=4089900, episode=682 reward=0.7743706 (509.89 it/sec) -training >> step=4090000, episode=682 reward=0.7825904 (517.80 it/sec) -training >> step=4090100, episode=682 reward=0.7733758 (537.08 it/sec) -training >> step=4090200, episode=682 reward=0.7745099 (543.80 it/sec) -training >> step=4090300, episode=682 reward=0.7908813 (528.96 it/sec) -training >> step=4090400, episode=682 reward=0.7517747 (439.18 it/sec) -training >> step=4090500, episode=682 reward=0.7769221 (427.83 it/sec) -training >> step=4090600, episode=682 reward=0.7725026 (489.72 it/sec) -training >> step=4090700, episode=682 reward=0.771754 (461.02 it/sec) -training >> step=4090800, episode=682 reward=0.7499437 (498.14 it/sec) -training >> step=4090900, episode=682 reward=0.7902185 (462.46 it/sec) -training >> step=4091000, episode=682 reward=0.7685582 (548.81 it/sec) -training >> step=4091100, episode=682 reward=0.7570505 (529.79 it/sec) -training >> step=4091200, episode=682 reward=0.7731801 (542.58 it/sec) -training >> step=4091300, episode=683 reward=0.7863706 (124.18 it/sec) -training >> step=4091400, episode=683 reward=0.7822859 (388.39 it/sec) -training >> step=4091500, episode=683 reward=0.7747316 (496.16 it/sec) -training >> step=4091600, episode=683 reward=0.7818639 (437.82 it/sec) -training >> step=4091700, episode=683 reward=0.7699774 (497.78 it/sec) -training >> step=4091800, episode=683 reward=0.7754799 (534.16 it/sec) -training >> step=4091900, episode=683 reward=0.785674 (516.26 it/sec) -training >> step=4092000, episode=683 reward=0.769874 (523.92 it/sec) -training >> step=4092100, episode=683 reward=0.7647576 (487.13 it/sec) -training >> step=4092200, episode=683 reward=0.7629524 (463.22 it/sec) -training >> step=4092300, episode=683 reward=0.7809896 (363.82 it/sec) -training >> step=4092400, episode=683 reward=0.7828577 (485.26 it/sec) -training >> step=4092500, episode=683 reward=0.7811351 (493.60 it/sec) -training >> step=4092600, episode=683 reward=0.7915106 (498.29 it/sec) -training >> step=4092700, episode=683 reward=0.782779 (495.83 it/sec) -training >> step=4092800, episode=683 reward=0.7651047 (547.48 it/sec) -training >> step=4092900, episode=683 reward=0.7895883 (507.66 it/sec) -training >> step=4093000, episode=683 reward=0.764166 (493.49 it/sec) -training >> step=4093100, episode=683 reward=0.7833415 (448.30 it/sec) -training >> step=4093200, episode=683 reward=0.756928 (414.85 it/sec) -training >> step=4093300, episode=683 reward=0.7534345 (522.59 it/sec) -training >> step=4093400, episode=683 reward=0.7924647 (542.82 it/sec) -training >> step=4093500, episode=683 reward=0.7634531 (539.26 it/sec) -training >> step=4093600, episode=683 reward=0.7892662 (471.08 it/sec) -training >> step=4093700, episode=683 reward=0.7721891 (478.82 it/sec) -training >> step=4093800, episode=683 reward=0.7468446 (574.21 it/sec) -training >> step=4093900, episode=683 reward=0.7785862 (536.09 it/sec) -training >> step=4094000, episode=683 reward=0.7363624 (507.61 it/sec) -training >> step=4094100, episode=683 reward=0.7902527 (549.79 it/sec) -training >> step=4094200, episode=683 reward=0.8026191 (537.76 it/sec) -training >> step=4094300, episode=683 reward=0.800316 (545.49 it/sec) -training >> step=4094400, episode=683 reward=0.7966151 (500.30 it/sec) -training >> step=4094500, episode=683 reward=0.7977526 (578.32 it/sec) -training >> step=4094600, episode=683 reward=0.7616764 (509.86 it/sec) -training >> step=4094700, episode=683 reward=0.7926389 (425.07 it/sec) -training >> step=4094800, episode=683 reward=0.7823887 (452.49 it/sec) -training >> step=4094900, episode=683 reward=0.7633469 (527.22 it/sec) -training >> step=4095000, episode=683 reward=0.7646757 (542.50 it/sec) -training >> step=4095100, episode=683 reward=0.8022239 (534.81 it/sec) -training >> step=4095200, episode=683 reward=0.7515485 (522.78 it/sec) -training >> step=4095300, episode=683 reward=0.790691 (540.34 it/sec) -training >> step=4095400, episode=683 reward=0.7708411 (546.21 it/sec) -training >> step=4095500, episode=683 reward=0.7832703 (525.04 it/sec) -training >> step=4095600, episode=683 reward=0.7830264 (558.10 it/sec) -training >> step=4095700, episode=683 reward=0.7871126 (531.08 it/sec) -training >> step=4095800, episode=683 reward=0.790935 (516.33 it/sec) -training >> step=4095900, episode=683 reward=0.7884199 (556.50 it/sec) -training >> step=4096000, episode=683 reward=0.7726285 (548.86 it/sec) -training >> step=4096100, episode=683 reward=0.7575634 (560.42 it/sec) -training >> step=4096200, episode=683 reward=0.7996056 (523.87 it/sec) -training >> step=4096300, episode=683 reward=0.778682 (544.90 it/sec) -training >> step=4096400, episode=683 reward=0.7645253 (537.20 it/sec) -training >> step=4096500, episode=683 reward=0.7739612 (536.53 it/sec) -training >> step=4096600, episode=683 reward=0.7584061 (516.01 it/sec) -training >> step=4096700, episode=683 reward=0.7820448 (571.61 it/sec) -training >> step=4096800, episode=683 reward=0.7622623 (538.24 it/sec) -training >> step=4096900, episode=683 reward=0.7703896 (502.33 it/sec) -training >> step=4097000, episode=683 reward=0.7552487 (537.07 it/sec) -training >> step=4097100, episode=683 reward=0.7361241 (539.19 it/sec) -training >> step=4097200, episode=683 reward=0.7585256 (528.01 it/sec) -training >> step=4097300, episode=684 reward=0.7814324 (110.86 it/sec) -training >> step=4097400, episode=684 reward=0.7598503 (547.26 it/sec) -training >> step=4097500, episode=684 reward=0.7782006 (463.98 it/sec) -training >> step=4097600, episode=684 reward=0.7727624 (484.55 it/sec) -training >> step=4097700, episode=684 reward=0.7634439 (523.93 it/sec) -training >> step=4097800, episode=684 reward=0.7744316 (536.41 it/sec) -training >> step=4097900, episode=684 reward=0.7730027 (521.09 it/sec) -training >> step=4098000, episode=684 reward=0.7756573 (504.08 it/sec) -training >> step=4098100, episode=684 reward=0.7762415 (521.04 it/sec) -training >> step=4098200, episode=684 reward=0.7789473 (530.32 it/sec) -training >> step=4098300, episode=684 reward=0.7820575 (409.97 it/sec) -training >> step=4098400, episode=684 reward=0.776921 (426.12 it/sec) -training >> step=4098500, episode=684 reward=0.7598402 (547.34 it/sec) -training >> step=4098600, episode=684 reward=0.7657034 (506.45 it/sec) -training >> step=4098700, episode=684 reward=0.765814 (470.45 it/sec) -training >> step=4098800, episode=684 reward=0.7826527 (441.85 it/sec) -training >> step=4098900, episode=684 reward=0.7723383 (530.25 it/sec) -training >> step=4099000, episode=684 reward=0.7658272 (521.22 it/sec) -training >> step=4099100, episode=684 reward=0.7727706 (484.33 it/sec) -training >> step=4099200, episode=684 reward=0.7835539 (550.22 it/sec) -training >> step=4099300, episode=684 reward=0.7765625 (515.41 it/sec) -training >> step=4099400, episode=684 reward=0.7918712 (433.14 it/sec) -training >> step=4099500, episode=684 reward=0.7850863 (481.71 it/sec) -training >> step=4099600, episode=684 reward=0.7600207 (494.94 it/sec) -training >> step=4099700, episode=684 reward=0.778511 (506.45 it/sec) -training >> step=4099800, episode=684 reward=0.7977388 (495.42 it/sec) -training >> step=4099900, episode=684 reward=0.7835628 (526.84 it/sec) -training >> step=4100000, episode=684 reward=0.7546754 (509.53 it/sec) -training >> step=4100100, episode=684 reward=0.7658892 (493.44 it/sec) -training >> step=4100200, episode=684 reward=0.7811863 (500.39 it/sec) -training >> step=4100300, episode=684 reward=0.7709828 (512.51 it/sec) -training >> step=4100400, episode=684 reward=0.7644029 (432.85 it/sec) -training >> step=4100500, episode=684 reward=0.7824876 (524.20 it/sec) -training >> step=4100600, episode=684 reward=0.7904159 (533.04 it/sec) -training >> step=4100700, episode=684 reward=0.7823104 (520.62 it/sec) -training >> step=4100800, episode=684 reward=0.7615282 (544.70 it/sec) -training >> step=4100900, episode=684 reward=0.7640256 (491.59 it/sec) -training >> step=4101000, episode=684 reward=0.775787 (525.10 it/sec) -training >> step=4101100, episode=684 reward=0.779976 (465.87 it/sec) -training >> step=4101200, episode=684 reward=0.7838467 (495.55 it/sec) -training >> step=4101300, episode=684 reward=0.7692447 (490.43 it/sec) -training >> step=4101400, episode=684 reward=0.7960435 (483.66 it/sec) -training >> step=4101500, episode=684 reward=0.7773545 (498.98 it/sec) -training >> step=4101600, episode=684 reward=0.7945788 (385.37 it/sec) -training >> step=4101700, episode=684 reward=0.7664283 (435.89 it/sec) -training >> step=4101800, episode=684 reward=0.7716767 (493.69 it/sec) -training >> step=4101900, episode=684 reward=0.7799107 (452.78 it/sec) -training >> step=4102000, episode=684 reward=0.7624465 (448.58 it/sec) -training >> step=4102100, episode=684 reward=0.7715219 (472.26 it/sec) -training >> step=4102200, episode=684 reward=0.7627766 (447.88 it/sec) -training >> step=4102300, episode=684 reward=0.7748899 (352.64 it/sec) -training >> step=4102400, episode=684 reward=0.7626418 (445.19 it/sec) -training >> step=4102500, episode=684 reward=0.7808284 (479.78 it/sec) -training >> step=4102600, episode=684 reward=0.784318 (456.17 it/sec) -training >> step=4102700, episode=684 reward=0.7744138 (451.00 it/sec) -training >> step=4102800, episode=684 reward=0.7938958 (480.49 it/sec) -training >> step=4102900, episode=684 reward=0.7721078 (453.66 it/sec) -training >> step=4103000, episode=684 reward=0.7487896 (429.14 it/sec) -training >> step=4103100, episode=684 reward=0.7691765 (476.79 it/sec) -training >> step=4103200, episode=684 reward=0.7663291 (488.95 it/sec) -training >> step=4103300, episode=685 reward=0.7932785 (94.95 it/sec) -training >> step=4103400, episode=685 reward=0.7578873 (459.42 it/sec) -training >> step=4103500, episode=685 reward=0.7910573 (445.41 it/sec) -training >> step=4103600, episode=685 reward=0.7645755 (431.22 it/sec) -training >> step=4103700, episode=685 reward=0.780166 (463.62 it/sec) -training >> step=4103800, episode=685 reward=0.7898259 (438.90 it/sec) -training >> step=4103900, episode=685 reward=0.7600032 (526.49 it/sec) -training >> step=4104000, episode=685 reward=0.7835431 (447.02 it/sec) -training >> step=4104100, episode=685 reward=0.7709996 (428.75 it/sec) -training >> step=4104200, episode=685 reward=0.7671935 (497.35 it/sec) -training >> step=4104300, episode=685 reward=0.7724404 (483.66 it/sec) -training >> step=4104400, episode=685 reward=0.8026066 (440.17 it/sec) -training >> step=4104500, episode=685 reward=0.7587168 (391.43 it/sec) -training >> step=4104600, episode=685 reward=0.7584572 (336.16 it/sec) -training >> step=4104700, episode=685 reward=0.7799522 (447.14 it/sec) -training >> step=4104800, episode=685 reward=0.7628971 (449.55 it/sec) -training >> step=4104900, episode=685 reward=0.7627712 (418.18 it/sec) -training >> step=4105000, episode=685 reward=0.7804271 (498.71 it/sec) -training >> step=4105100, episode=685 reward=0.7800069 (510.71 it/sec) -training >> step=4105200, episode=685 reward=0.787443 (452.41 it/sec) -training >> step=4105300, episode=685 reward=0.7714932 (467.78 it/sec) -training >> step=4105400, episode=685 reward=0.7747693 (479.50 it/sec) -training >> step=4105500, episode=685 reward=0.7750117 (452.15 it/sec) -training >> step=4105600, episode=685 reward=0.7734979 (509.04 it/sec) -training >> step=4105700, episode=685 reward=0.7777076 (498.05 it/sec) -training >> step=4105800, episode=685 reward=0.7820505 (505.42 it/sec) -training >> step=4105900, episode=685 reward=0.7614166 (433.96 it/sec) -training >> step=4106000, episode=685 reward=0.7649353 (496.69 it/sec) -training >> step=4106100, episode=685 reward=0.792103 (440.72 it/sec) -training >> step=4106200, episode=685 reward=0.7786653 (468.82 it/sec) -training >> step=4106300, episode=685 reward=0.7807406 (480.64 it/sec) -training >> step=4106400, episode=685 reward=0.7695112 (529.63 it/sec) -training >> step=4106500, episode=685 reward=0.7974119 (498.75 it/sec) -training >> step=4106600, episode=685 reward=0.7813506 (490.45 it/sec) -training >> step=4106700, episode=685 reward=0.7635601 (481.68 it/sec) -training >> step=4106800, episode=685 reward=0.7897174 (474.12 it/sec) -training >> step=4106900, episode=685 reward=0.7634461 (500.93 it/sec) -training >> step=4107000, episode=685 reward=0.7595531 (484.13 it/sec) -training >> step=4107100, episode=685 reward=0.7783406 (498.95 it/sec) -training >> step=4107200, episode=685 reward=0.7713836 (470.72 it/sec) -training >> step=4107300, episode=685 reward=0.813951 (454.41 it/sec) -training >> step=4107400, episode=685 reward=0.7716829 (510.24 it/sec) -training >> step=4107500, episode=685 reward=0.7539924 (479.98 it/sec) -training >> step=4107600, episode=685 reward=0.7643427 (503.87 it/sec) -training >> step=4107700, episode=685 reward=0.7648521 (476.20 it/sec) -training >> step=4107800, episode=685 reward=0.7656466 (519.37 it/sec) -training >> step=4107900, episode=685 reward=0.8001379 (481.62 it/sec) -training >> step=4108000, episode=685 reward=0.768218 (499.81 it/sec) -training >> step=4108100, episode=685 reward=0.7956915 (496.22 it/sec) -training >> step=4108200, episode=685 reward=0.7725911 (486.45 it/sec) -training >> step=4108300, episode=685 reward=0.7763973 (493.81 it/sec) -training >> step=4108400, episode=685 reward=0.7543223 (411.65 it/sec) -training >> step=4108500, episode=685 reward=0.7674158 (485.09 it/sec) -training >> step=4108600, episode=685 reward=0.7606291 (534.23 it/sec) -training >> step=4108700, episode=685 reward=0.7596753 (430.49 it/sec) -training >> step=4108800, episode=685 reward=0.7470304 (470.84 it/sec) -training >> step=4108900, episode=685 reward=0.7857514 (523.43 it/sec) -training >> step=4109000, episode=685 reward=0.8052551 (459.87 it/sec) -training >> step=4109100, episode=685 reward=0.7843577 (483.99 it/sec) -training >> step=4109200, episode=685 reward=0.7594476 (516.19 it/sec) -training >> step=4109300, episode=686 reward=0.7566025 (112.40 it/sec) -training >> step=4109400, episode=686 reward=0.7660255 (396.76 it/sec) -training >> step=4109500, episode=686 reward=0.7772408 (434.69 it/sec) -training >> step=4109600, episode=686 reward=0.7703325 (504.22 it/sec) -training >> step=4109700, episode=686 reward=0.7760531 (498.88 it/sec) -training >> step=4109800, episode=686 reward=0.7788078 (430.46 it/sec) -training >> step=4109900, episode=686 reward=0.7877579 (475.57 it/sec) -training >> step=4110000, episode=686 reward=0.7696981 (497.64 it/sec) -training >> step=4110100, episode=686 reward=0.7777397 (487.37 it/sec) -training >> step=4110200, episode=686 reward=0.7773787 (465.72 it/sec) -training >> step=4110300, episode=686 reward=0.7445691 (415.13 it/sec) -training >> step=4110400, episode=686 reward=0.7837576 (452.08 it/sec) -training >> step=4110500, episode=686 reward=0.7816716 (437.93 it/sec) -training >> step=4110600, episode=686 reward=0.7877946 (419.11 it/sec) -training >> step=4110700, episode=686 reward=0.7863019 (306.53 it/sec) -training >> step=4110800, episode=686 reward=0.7907255 (448.99 it/sec) -training >> step=4110900, episode=686 reward=0.7739353 (455.46 it/sec) -training >> step=4111000, episode=686 reward=0.7939993 (457.63 it/sec) -training >> step=4111100, episode=686 reward=0.759459 (510.90 it/sec) -training >> step=4111200, episode=686 reward=0.7732512 (534.39 it/sec) -training >> step=4111300, episode=686 reward=0.7692479 (521.28 it/sec) -training >> step=4111400, episode=686 reward=0.8164147 (537.27 it/sec) -training >> step=4111500, episode=686 reward=0.7753484 (458.33 it/sec) -training >> step=4111600, episode=686 reward=0.7667653 (459.95 it/sec) -training >> step=4111700, episode=686 reward=0.7837825 (490.16 it/sec) -training >> step=4111800, episode=686 reward=0.7613186 (509.76 it/sec) -training >> step=4111900, episode=686 reward=0.7749214 (464.59 it/sec) -training >> step=4112000, episode=686 reward=0.7380877 (377.21 it/sec) -training >> step=4112100, episode=686 reward=0.7840905 (436.79 it/sec) -training >> step=4112200, episode=686 reward=0.7771207 (416.60 it/sec) -training >> step=4112300, episode=686 reward=0.7757435 (522.31 it/sec) -training >> step=4112400, episode=686 reward=0.7861242 (465.21 it/sec) -training >> step=4112500, episode=686 reward=0.7928739 (533.53 it/sec) -training >> step=4112600, episode=686 reward=0.777732 (448.18 it/sec) -training >> step=4112700, episode=686 reward=0.7762225 (463.43 it/sec) -training >> step=4112800, episode=686 reward=0.7605038 (477.20 it/sec) -training >> step=4112900, episode=686 reward=0.741357 (476.74 it/sec) -training >> step=4113000, episode=686 reward=0.7794988 (502.87 it/sec) -training >> step=4113100, episode=686 reward=0.7794662 (439.70 it/sec) -training >> step=4113200, episode=686 reward=0.7628685 (455.31 it/sec) -training >> step=4113300, episode=686 reward=0.7871851 (458.01 it/sec) -training >> step=4113400, episode=686 reward=0.8070987 (448.22 it/sec) -training >> step=4113500, episode=686 reward=0.7706007 (445.41 it/sec) -training >> step=4113600, episode=686 reward=0.7849147 (470.92 it/sec) -training >> step=4113700, episode=686 reward=0.7845954 (474.84 it/sec) -training >> step=4113800, episode=686 reward=0.7709722 (447.90 it/sec) -training >> step=4113900, episode=686 reward=0.7669364 (483.12 it/sec) -training >> step=4114000, episode=686 reward=0.7794025 (472.90 it/sec) -training >> step=4114100, episode=686 reward=0.7668298 (483.40 it/sec) -training >> step=4114200, episode=686 reward=0.7943916 (432.71 it/sec) -training >> step=4114300, episode=686 reward=0.780728 (407.49 it/sec) -training >> step=4114400, episode=686 reward=0.7754079 (401.07 it/sec) -training >> step=4114500, episode=686 reward=0.7800486 (443.08 it/sec) -training >> step=4114600, episode=686 reward=0.7611216 (412.87 it/sec) -training >> step=4114700, episode=686 reward=0.7743982 (429.25 it/sec) -training >> step=4114800, episode=686 reward=0.781889 (429.86 it/sec) -training >> step=4114900, episode=686 reward=0.74944 (472.79 it/sec) -training >> step=4115000, episode=686 reward=0.7749724 (515.18 it/sec) -training >> step=4115100, episode=686 reward=0.7837502 (473.27 it/sec) -training >> step=4115200, episode=686 reward=0.7776678 (458.49 it/sec) -training >> step=4115300, episode=687 reward=0.7574516 (137.64 it/sec) -training >> step=4115400, episode=687 reward=0.7652453 (461.08 it/sec) -training >> step=4115500, episode=687 reward=0.7931951 (442.96 it/sec) -training >> step=4115600, episode=687 reward=0.7699065 (426.51 it/sec) -training >> step=4115700, episode=687 reward=0.7646112 (471.24 it/sec) -training >> step=4115800, episode=687 reward=0.7788134 (523.48 it/sec) -training >> step=4115900, episode=687 reward=0.7783191 (435.90 it/sec) -training >> step=4116000, episode=687 reward=0.7845291 (487.74 it/sec) -training >> step=4116100, episode=687 reward=0.7717734 (504.07 it/sec) -training >> step=4116200, episode=687 reward=0.779891 (518.95 it/sec) -training >> step=4116300, episode=687 reward=0.77915 (462.09 it/sec) -training >> step=4116400, episode=687 reward=0.7819481 (471.90 it/sec) -training >> step=4116500, episode=687 reward=0.7758622 (499.09 it/sec) -training >> step=4116600, episode=687 reward=0.77392 (450.97 it/sec) -training >> step=4116700, episode=687 reward=0.7708015 (488.22 it/sec) -training >> step=4116800, episode=687 reward=0.7845499 (496.64 it/sec) -training >> step=4116900, episode=687 reward=0.7844793 (383.84 it/sec) -training >> step=4117000, episode=687 reward=0.7844676 (459.27 it/sec) -training >> step=4117100, episode=687 reward=0.7849303 (436.22 it/sec) -training >> step=4117200, episode=687 reward=0.7706384 (472.00 it/sec) -training >> step=4117300, episode=687 reward=0.7551066 (509.17 it/sec) -training >> step=4117400, episode=687 reward=0.7799111 (454.71 it/sec) -training >> step=4117500, episode=687 reward=0.7666266 (490.13 it/sec) -training >> step=4117600, episode=687 reward=0.7981508 (478.26 it/sec) -training >> step=4117700, episode=687 reward=0.7655472 (445.53 it/sec) -training >> step=4117800, episode=687 reward=0.7732878 (474.38 it/sec) -training >> step=4117900, episode=687 reward=0.7946193 (455.63 it/sec) -training >> step=4118000, episode=687 reward=0.7524225 (443.92 it/sec) -training >> step=4118100, episode=687 reward=0.8084705 (450.20 it/sec) -training >> step=4118200, episode=687 reward=0.7706236 (471.50 it/sec) -training >> step=4118300, episode=687 reward=0.7534141 (488.88 it/sec) -training >> step=4118400, episode=687 reward=0.774496 (459.04 it/sec) -training >> step=4118500, episode=687 reward=0.7929074 (433.45 it/sec) -training >> step=4118600, episode=687 reward=0.7602332 (451.98 it/sec) -training >> step=4118700, episode=687 reward=0.7654378 (459.89 it/sec) -training >> step=4118800, episode=687 reward=0.7731195 (460.80 it/sec) -training >> step=4118900, episode=687 reward=0.7690115 (468.69 it/sec) -training >> step=4119000, episode=687 reward=0.7629961 (492.59 it/sec) -training >> step=4119100, episode=687 reward=0.7830408 (471.90 it/sec) -training >> step=4119200, episode=687 reward=0.7860122 (468.91 it/sec) -training >> step=4119300, episode=687 reward=0.7675903 (463.60 it/sec) -training >> step=4119400, episode=687 reward=0.7840826 (499.45 it/sec) -training >> step=4119500, episode=687 reward=0.7738455 (529.22 it/sec) -training >> step=4119600, episode=687 reward=0.7793348 (511.49 it/sec) -training >> step=4119700, episode=687 reward=0.781864 (493.29 it/sec) -training >> step=4119800, episode=687 reward=0.7779755 (465.36 it/sec) -training >> step=4119900, episode=687 reward=0.7562607 (472.57 it/sec) -training >> step=4120000, episode=687 reward=0.774084 (431.60 it/sec) -training >> step=4120100, episode=687 reward=0.8061437 (514.71 it/sec) -training >> step=4120200, episode=687 reward=0.7738444 (489.23 it/sec) -training >> step=4120300, episode=687 reward=0.7731982 (450.25 it/sec) -training >> step=4120400, episode=687 reward=0.7566764 (503.26 it/sec) -training >> step=4120500, episode=687 reward=0.7868724 (491.78 it/sec) -training >> step=4120600, episode=687 reward=0.7855452 (472.58 it/sec) -training >> step=4120700, episode=687 reward=0.7691599 (508.40 it/sec) -training >> step=4120800, episode=687 reward=0.7755262 (485.71 it/sec) -training >> step=4120900, episode=687 reward=0.7764363 (494.26 it/sec) -training >> step=4121000, episode=687 reward=0.7765887 (445.36 it/sec) -training >> step=4121100, episode=687 reward=0.761992 (477.69 it/sec) -training >> step=4121200, episode=687 reward=0.7759615 (547.99 it/sec) -training >> step=4121300, episode=688 reward=0.7499934 (85.97 it/sec) -training >> step=4121400, episode=688 reward=0.7513418 (431.82 it/sec) -training >> step=4121500, episode=688 reward=0.7597805 (378.96 it/sec) -training >> step=4121600, episode=688 reward=0.7743108 (397.22 it/sec) -training >> step=4121700, episode=688 reward=0.787047 (463.01 it/sec) -training >> step=4121800, episode=688 reward=0.7505829 (445.82 it/sec) -training >> step=4121900, episode=688 reward=0.8008212 (481.74 it/sec) -training >> step=4122000, episode=688 reward=0.7977015 (462.37 it/sec) -training >> step=4122100, episode=688 reward=0.7730585 (454.97 it/sec) -training >> step=4122200, episode=688 reward=0.777668 (467.71 it/sec) -training >> step=4122300, episode=688 reward=0.7796957 (454.27 it/sec) -training >> step=4122400, episode=688 reward=0.7686659 (485.64 it/sec) -training >> step=4122500, episode=688 reward=0.7892986 (495.30 it/sec) -training >> step=4122600, episode=688 reward=0.7868498 (481.76 it/sec) -training >> step=4122700, episode=688 reward=0.7721718 (429.57 it/sec) -training >> step=4122800, episode=688 reward=0.7655618 (464.88 it/sec) -training >> step=4122900, episode=688 reward=0.7612395 (471.94 it/sec) -training >> step=4123000, episode=688 reward=0.7886063 (457.54 it/sec) -training >> step=4123100, episode=688 reward=0.7598251 (470.59 it/sec) -training >> step=4123200, episode=688 reward=0.7793429 (286.00 it/sec) -training >> step=4123300, episode=688 reward=0.7733123 (460.32 it/sec) -training >> step=4123400, episode=688 reward=0.8061741 (460.47 it/sec) -training >> step=4123500, episode=688 reward=0.773053 (439.87 it/sec) -training >> step=4123600, episode=688 reward=0.771982 (453.78 it/sec) -training >> step=4123700, episode=688 reward=0.7823787 (476.29 it/sec) -training >> step=4123800, episode=688 reward=0.7716533 (445.55 it/sec) -training >> step=4123900, episode=688 reward=0.7911047 (463.46 it/sec) -training >> step=4124000, episode=688 reward=0.7779845 (473.72 it/sec) -training >> step=4124100, episode=688 reward=0.7738919 (452.38 it/sec) -training >> step=4124200, episode=688 reward=0.7730414 (491.35 it/sec) -training >> step=4124300, episode=688 reward=0.7736915 (476.43 it/sec) -training >> step=4124400, episode=688 reward=0.7766734 (513.23 it/sec) -training >> step=4124500, episode=688 reward=0.7754535 (514.76 it/sec) -training >> step=4124600, episode=688 reward=0.7755505 (497.92 it/sec) -training >> step=4124700, episode=688 reward=0.7632393 (510.68 it/sec) -training >> step=4124800, episode=688 reward=0.7592655 (533.97 it/sec) -training >> step=4124900, episode=688 reward=0.7840064 (512.07 it/sec) -training >> step=4125000, episode=688 reward=0.7740949 (513.86 it/sec) -training >> step=4125100, episode=688 reward=0.7625685 (521.45 it/sec) -training >> step=4125200, episode=688 reward=0.7820085 (530.12 it/sec) -training >> step=4125300, episode=688 reward=0.759469 (408.50 it/sec) -training >> step=4125400, episode=688 reward=0.7566238 (511.35 it/sec) -training >> step=4125500, episode=688 reward=0.7676528 (487.78 it/sec) -training >> step=4125600, episode=688 reward=0.7722487 (440.44 it/sec) -training >> step=4125700, episode=688 reward=0.7759495 (498.18 it/sec) -training >> step=4125800, episode=688 reward=0.7748516 (516.73 it/sec) -training >> step=4125900, episode=688 reward=0.7766755 (454.10 it/sec) -training >> step=4126000, episode=688 reward=0.7777366 (519.86 it/sec) -training >> step=4126100, episode=688 reward=0.7650835 (488.44 it/sec) -training >> step=4126200, episode=688 reward=0.7794989 (503.15 it/sec) -training >> step=4126300, episode=688 reward=0.7601832 (454.79 it/sec) -training >> step=4126400, episode=688 reward=0.7624953 (470.03 it/sec) -training >> step=4126500, episode=688 reward=0.7726095 (549.62 it/sec) -training >> step=4126600, episode=688 reward=0.7699087 (504.17 it/sec) -training >> step=4126700, episode=688 reward=0.7727348 (493.91 it/sec) -training >> step=4126800, episode=688 reward=0.753521 (513.89 it/sec) -training >> step=4126900, episode=688 reward=0.7594594 (533.83 it/sec) -training >> step=4127000, episode=688 reward=0.7829239 (475.55 it/sec) -training >> step=4127100, episode=688 reward=0.7681845 (498.38 it/sec) -training >> step=4127200, episode=688 reward=0.774937 (540.87 it/sec) -training >> step=4127300, episode=689 reward=0.7480591 (73.89 it/sec) -training >> step=4127400, episode=689 reward=0.7775311 (502.54 it/sec) -training >> step=4127500, episode=689 reward=0.7566271 (530.75 it/sec) -training >> step=4127600, episode=689 reward=0.7809995 (490.10 it/sec) -training >> step=4127700, episode=689 reward=0.7720905 (498.07 it/sec) -training >> step=4127800, episode=689 reward=0.7750199 (503.46 it/sec) -training >> step=4127900, episode=689 reward=0.7630898 (489.45 it/sec) -training >> step=4128000, episode=689 reward=0.7782651 (461.88 it/sec) -training >> step=4128100, episode=689 reward=0.7830305 (486.52 it/sec) -training >> step=4128200, episode=689 reward=0.7640266 (505.95 it/sec) -training >> step=4128300, episode=689 reward=0.7630947 (460.56 it/sec) -training >> step=4128400, episode=689 reward=0.7793312 (482.05 it/sec) -training >> step=4128500, episode=689 reward=0.7997621 (517.20 it/sec) -training >> step=4128600, episode=689 reward=0.7515914 (483.20 it/sec) -training >> step=4128700, episode=689 reward=0.757405 (486.14 it/sec) -training >> step=4128800, episode=689 reward=0.7774404 (484.04 it/sec) -training >> step=4128900, episode=689 reward=0.7696034 (462.27 it/sec) -training >> step=4129000, episode=689 reward=0.7701707 (471.60 it/sec) -training >> step=4129100, episode=689 reward=0.7905837 (478.37 it/sec) -training >> step=4129200, episode=689 reward=0.7675624 (512.82 it/sec) -training >> step=4129300, episode=689 reward=0.7835885 (513.65 it/sec) -training >> step=4129400, episode=689 reward=0.7622992 (345.29 it/sec) -training >> step=4129500, episode=689 reward=0.7723997 (454.24 it/sec) -training >> step=4129600, episode=689 reward=0.784765 (474.59 it/sec) -training >> step=4129700, episode=689 reward=0.775429 (498.25 it/sec) -training >> step=4129800, episode=689 reward=0.7727501 (526.56 it/sec) -training >> step=4129900, episode=689 reward=0.7839767 (494.06 it/sec) -training >> step=4130000, episode=689 reward=0.777748 (498.88 it/sec) -training >> step=4130100, episode=689 reward=0.775417 (477.34 it/sec) -training >> step=4130200, episode=689 reward=0.7933109 (506.85 it/sec) -training >> step=4130300, episode=689 reward=0.7627447 (518.07 it/sec) -training >> step=4130400, episode=689 reward=0.7935863 (515.14 it/sec) -training >> step=4130500, episode=689 reward=0.7901646 (497.96 it/sec) -training >> step=4130600, episode=689 reward=0.7598636 (474.46 it/sec) -training >> step=4130700, episode=689 reward=0.7759845 (491.92 it/sec) -training >> step=4130800, episode=689 reward=0.7775964 (495.25 it/sec) -training >> step=4130900, episode=689 reward=0.7471354 (502.28 it/sec) -training >> step=4131000, episode=689 reward=0.7798399 (538.20 it/sec) -training >> step=4131100, episode=689 reward=0.772568 (480.13 it/sec) -training >> step=4131200, episode=689 reward=0.7496939 (473.81 it/sec) -training >> step=4131300, episode=689 reward=0.79282 (519.48 it/sec) -training >> step=4131400, episode=689 reward=0.7839633 (485.25 it/sec) -training >> step=4131500, episode=689 reward=0.7662531 (504.45 it/sec) -training >> step=4131600, episode=689 reward=0.7729863 (474.26 it/sec) -training >> step=4131700, episode=689 reward=0.782975 (508.06 it/sec) -training >> step=4131800, episode=689 reward=0.7746356 (475.89 it/sec) -training >> step=4131900, episode=689 reward=0.7654813 (490.54 it/sec) -training >> step=4132000, episode=689 reward=0.7775786 (499.09 it/sec) -training >> step=4132100, episode=689 reward=0.7487495 (536.84 it/sec) -training >> step=4132200, episode=689 reward=0.774244 (513.38 it/sec) -training >> step=4132300, episode=689 reward=0.7659422 (497.29 it/sec) -training >> step=4132400, episode=689 reward=0.7802947 (505.86 it/sec) -training >> step=4132500, episode=689 reward=0.7486427 (517.96 it/sec) -training >> step=4132600, episode=689 reward=0.7846143 (511.24 it/sec) -training >> step=4132700, episode=689 reward=0.7353608 (507.31 it/sec) -training >> step=4132800, episode=689 reward=0.7604163 (528.23 it/sec) -training >> step=4132900, episode=689 reward=0.7619332 (512.56 it/sec) -training >> step=4133000, episode=689 reward=0.7583153 (502.31 it/sec) -training >> step=4133100, episode=689 reward=0.76713 (502.90 it/sec) -training >> step=4133200, episode=689 reward=0.7838337 (497.20 it/sec) -training >> step=4133300, episode=690 reward=0.7826699 (116.72 it/sec) -training >> step=4133400, episode=690 reward=0.7900332 (482.12 it/sec) -training >> step=4133500, episode=690 reward=0.7817772 (517.83 it/sec) -training >> step=4133600, episode=690 reward=0.7745098 (439.53 it/sec) -training >> step=4133700, episode=690 reward=0.7842807 (531.03 it/sec) -training >> step=4133800, episode=690 reward=0.7719109 (539.41 it/sec) -training >> step=4133900, episode=690 reward=0.7649323 (574.24 it/sec) -training >> step=4134000, episode=690 reward=0.7780673 (549.68 it/sec) -training >> step=4134100, episode=690 reward=0.7993879 (537.91 it/sec) -training >> step=4134200, episode=690 reward=0.7801007 (545.15 it/sec) -training >> step=4134300, episode=690 reward=0.7704299 (513.50 it/sec) -training >> step=4134400, episode=690 reward=0.7787285 (537.12 it/sec) -training >> step=4134500, episode=690 reward=0.7659977 (553.94 it/sec) -training >> step=4134600, episode=690 reward=0.7856483 (559.51 it/sec) -training >> step=4134700, episode=690 reward=0.7752104 (539.91 it/sec) -training >> step=4134800, episode=690 reward=0.7595047 (498.71 it/sec) -training >> step=4134900, episode=690 reward=0.7589362 (523.60 it/sec) -training >> step=4135000, episode=690 reward=0.7614605 (470.22 it/sec) -training >> step=4135100, episode=690 reward=0.7959865 (510.17 it/sec) -training >> step=4135200, episode=690 reward=0.7898783 (482.41 it/sec) -training >> step=4135300, episode=690 reward=0.7948861 (562.76 it/sec) -training >> step=4135400, episode=690 reward=0.7686303 (504.36 it/sec) -training >> step=4135500, episode=690 reward=0.7693158 (357.89 it/sec) -training >> step=4135600, episode=690 reward=0.7729741 (501.69 it/sec) -training >> step=4135700, episode=690 reward=0.7771388 (567.03 it/sec) -training >> step=4135800, episode=690 reward=0.7643312 (556.65 it/sec) -training >> step=4135900, episode=690 reward=0.762115 (535.52 it/sec) -training >> step=4136000, episode=690 reward=0.7937691 (565.31 it/sec) -training >> step=4136100, episode=690 reward=0.7663812 (474.43 it/sec) -training >> step=4136200, episode=690 reward=0.7740514 (497.14 it/sec) -training >> step=4136300, episode=690 reward=0.7970333 (536.54 it/sec) -training >> step=4136400, episode=690 reward=0.77841 (596.24 it/sec) -training >> step=4136500, episode=690 reward=0.7970389 (510.47 it/sec) -training >> step=4136600, episode=690 reward=0.7695549 (477.05 it/sec) -training >> step=4136700, episode=690 reward=0.7685666 (524.79 it/sec) -training >> step=4136800, episode=690 reward=0.7736562 (490.21 it/sec) -training >> step=4136900, episode=690 reward=0.7620196 (506.24 it/sec) -training >> step=4137000, episode=690 reward=0.7764852 (502.38 it/sec) -training >> step=4137100, episode=690 reward=0.7463337 (498.13 it/sec) -training >> step=4137200, episode=690 reward=0.7661389 (466.41 it/sec) -training >> step=4137300, episode=690 reward=0.7771447 (497.11 it/sec) -training >> step=4137400, episode=690 reward=0.7676604 (508.98 it/sec) -training >> step=4137500, episode=690 reward=0.7699267 (534.26 it/sec) -training >> step=4137600, episode=690 reward=0.781385 (509.34 it/sec) -training >> step=4137700, episode=690 reward=0.7660436 (455.91 it/sec) -training >> step=4137800, episode=690 reward=0.7877533 (507.82 it/sec) -training >> step=4137900, episode=690 reward=0.7780163 (464.60 it/sec) -training >> step=4138000, episode=690 reward=0.7618685 (488.28 it/sec) -training >> step=4138100, episode=690 reward=0.7684048 (456.66 it/sec) -training >> step=4138200, episode=690 reward=0.7643048 (522.57 it/sec) -training >> step=4138300, episode=690 reward=0.7426097 (466.98 it/sec) -training >> step=4138400, episode=690 reward=0.7561311 (450.10 it/sec) -training >> step=4138500, episode=690 reward=0.7787154 (525.75 it/sec) -training >> step=4138600, episode=690 reward=0.7765159 (481.34 it/sec) -training >> step=4138700, episode=690 reward=0.7898608 (519.33 it/sec) -training >> step=4138800, episode=690 reward=0.7880364 (509.00 it/sec) -training >> step=4138900, episode=690 reward=0.757309 (548.73 it/sec) -training >> step=4139000, episode=690 reward=0.7561558 (461.86 it/sec) -training >> step=4139100, episode=690 reward=0.7746337 (508.48 it/sec) -training >> step=4139200, episode=690 reward=0.7733284 (463.67 it/sec) -training >> step=4139300, episode=691 reward=0.7619433 (93.03 it/sec) -training >> step=4139400, episode=691 reward=0.7757677 (489.73 it/sec) -training >> step=4139500, episode=691 reward=0.7719057 (537.00 it/sec) -training >> step=4139600, episode=691 reward=0.7459339 (497.29 it/sec) -training >> step=4139700, episode=691 reward=0.8022161 (487.58 it/sec) -training >> step=4139800, episode=691 reward=0.7821784 (494.42 it/sec) -training >> step=4139900, episode=691 reward=0.7784517 (522.64 it/sec) -training >> step=4140000, episode=691 reward=0.7909721 (497.48 it/sec) -training >> step=4140100, episode=691 reward=0.7621966 (543.26 it/sec) -training >> step=4140200, episode=691 reward=0.7769954 (500.58 it/sec) -training >> step=4140300, episode=691 reward=0.7764036 (493.28 it/sec) -training >> step=4140400, episode=691 reward=0.7795437 (496.17 it/sec) -training >> step=4140500, episode=691 reward=0.7766004 (482.42 it/sec) -training >> step=4140600, episode=691 reward=0.7815168 (526.03 it/sec) -training >> step=4140700, episode=691 reward=0.7508345 (444.68 it/sec) -training >> step=4140800, episode=691 reward=0.7744064 (493.77 it/sec) -training >> step=4140900, episode=691 reward=0.7650733 (511.72 it/sec) -training >> step=4141000, episode=691 reward=0.7688532 (494.08 it/sec) -training >> step=4141100, episode=691 reward=0.7825472 (496.89 it/sec) -training >> step=4141200, episode=691 reward=0.758256 (506.10 it/sec) -training >> step=4141300, episode=691 reward=0.7747638 (559.25 it/sec) -training >> step=4141400, episode=691 reward=0.7488703 (503.62 it/sec) -training >> step=4141500, episode=691 reward=0.777886 (525.37 it/sec) -training >> step=4141600, episode=691 reward=0.7732698 (335.86 it/sec) -training >> step=4141700, episode=691 reward=0.78789 (451.09 it/sec) -training >> step=4141800, episode=691 reward=0.793734 (492.67 it/sec) -training >> step=4141900, episode=691 reward=0.7878785 (486.61 it/sec) -training >> step=4142000, episode=691 reward=0.7824415 (529.68 it/sec) -training >> step=4142100, episode=691 reward=0.7606447 (456.77 it/sec) -training >> step=4142200, episode=691 reward=0.7581725 (473.57 it/sec) -training >> step=4142300, episode=691 reward=0.7816004 (485.90 it/sec) -training >> step=4142400, episode=691 reward=0.7546744 (513.26 it/sec) -training >> step=4142500, episode=691 reward=0.7849199 (492.92 it/sec) -training >> step=4142600, episode=691 reward=0.7805305 (416.56 it/sec) -training >> step=4142700, episode=691 reward=0.7754431 (472.25 it/sec) -training >> step=4142800, episode=691 reward=0.7545736 (483.56 it/sec) -training >> step=4142900, episode=691 reward=0.7699609 (511.62 it/sec) -training >> step=4143000, episode=691 reward=0.7588238 (462.48 it/sec) -training >> step=4143100, episode=691 reward=0.785392 (510.90 it/sec) -training >> step=4143200, episode=691 reward=0.7706624 (460.76 it/sec) -training >> step=4143300, episode=691 reward=0.766124 (451.54 it/sec) -training >> step=4143400, episode=691 reward=0.7911336 (482.14 it/sec) -training >> step=4143500, episode=691 reward=0.7875698 (461.40 it/sec) -training >> step=4143600, episode=691 reward=0.7716992 (522.49 it/sec) -training >> step=4143700, episode=691 reward=0.7886876 (416.21 it/sec) -training >> step=4143800, episode=691 reward=0.7826633 (494.46 it/sec) -training >> step=4143900, episode=691 reward=0.7852725 (452.14 it/sec) -training >> step=4144000, episode=691 reward=0.7668593 (457.97 it/sec) -training >> step=4144100, episode=691 reward=0.7869651 (481.21 it/sec) -training >> step=4144200, episode=691 reward=0.7597938 (480.79 it/sec) -training >> step=4144300, episode=691 reward=0.7655491 (493.96 it/sec) -training >> step=4144400, episode=691 reward=0.775143 (502.33 it/sec) -training >> step=4144500, episode=691 reward=0.7429449 (529.06 it/sec) -training >> step=4144600, episode=691 reward=0.7728614 (535.48 it/sec) -training >> step=4144700, episode=691 reward=0.7392955 (536.85 it/sec) -training >> step=4144800, episode=691 reward=0.7631052 (521.64 it/sec) -training >> step=4144900, episode=691 reward=0.7747324 (565.88 it/sec) -training >> step=4145000, episode=691 reward=0.7653143 (504.31 it/sec) -training >> step=4145100, episode=691 reward=0.7742098 (522.20 it/sec) -training >> step=4145200, episode=691 reward=0.7689122 (556.32 it/sec) -training >> step=4145300, episode=692 reward=0.7826338 (131.80 it/sec) -training >> step=4145400, episode=692 reward=0.7847585 (502.25 it/sec) -training >> step=4145500, episode=692 reward=0.7704698 (490.60 it/sec) -training >> step=4145600, episode=692 reward=0.7735429 (538.63 it/sec) -training >> step=4145700, episode=692 reward=0.7721429 (502.84 it/sec) -training >> step=4145800, episode=692 reward=0.7745687 (542.07 it/sec) -training >> step=4145900, episode=692 reward=0.7732974 (538.82 it/sec) -training >> step=4146000, episode=692 reward=0.7619789 (528.90 it/sec) -training >> step=4146100, episode=692 reward=0.7926366 (509.12 it/sec) -training >> step=4146200, episode=692 reward=0.7846159 (480.54 it/sec) -training >> step=4146300, episode=692 reward=0.7599564 (533.82 it/sec) -training >> step=4146400, episode=692 reward=0.7781126 (503.13 it/sec) -training >> step=4146500, episode=692 reward=0.7754401 (472.19 it/sec) -training >> step=4146600, episode=692 reward=0.790118 (508.75 it/sec) -training >> step=4146700, episode=692 reward=0.7793244 (500.33 it/sec) -training >> step=4146800, episode=692 reward=0.7834092 (450.19 it/sec) -training >> step=4146900, episode=692 reward=0.7745807 (514.31 it/sec) -training >> step=4147000, episode=692 reward=0.7797263 (498.97 it/sec) -training >> step=4147100, episode=692 reward=0.7643099 (435.36 it/sec) -training >> step=4147200, episode=692 reward=0.7892979 (466.31 it/sec) -training >> step=4147300, episode=692 reward=0.7469617 (501.58 it/sec) -training >> step=4147400, episode=692 reward=0.7618003 (531.35 it/sec) -training >> step=4147500, episode=692 reward=0.7869897 (510.90 it/sec) -training >> step=4147600, episode=692 reward=0.794068 (488.75 it/sec) -training >> step=4147700, episode=692 reward=0.7953609 (505.30 it/sec) -training >> step=4147800, episode=692 reward=0.7744296 (515.67 it/sec) -training >> step=4147900, episode=692 reward=0.7552245 (349.82 it/sec) -training >> step=4148000, episode=692 reward=0.7822853 (478.00 it/sec) -training >> step=4148100, episode=692 reward=0.7634179 (492.36 it/sec) -training >> step=4148200, episode=692 reward=0.7654889 (488.44 it/sec) -training >> step=4148300, episode=692 reward=0.7971419 (548.16 it/sec) -training >> step=4148400, episode=692 reward=0.7792057 (486.53 it/sec) -training >> step=4148500, episode=692 reward=0.7536447 (422.60 it/sec) -training >> step=4148600, episode=692 reward=0.7724312 (474.85 it/sec) -training >> step=4148700, episode=692 reward=0.7820582 (520.51 it/sec) -training >> step=4148800, episode=692 reward=0.7772242 (497.04 it/sec) -training >> step=4148900, episode=692 reward=0.7587653 (573.98 it/sec) -training >> step=4149000, episode=692 reward=0.7610956 (525.27 it/sec) -training >> step=4149100, episode=692 reward=0.7776014 (465.68 it/sec) -training >> step=4149200, episode=692 reward=0.7623445 (541.36 it/sec) -training >> step=4149300, episode=692 reward=0.7677892 (497.62 it/sec) -training >> step=4149400, episode=692 reward=0.7637786 (533.24 it/sec) -training >> step=4149500, episode=692 reward=0.7765113 (547.03 it/sec) -training >> step=4149600, episode=692 reward=0.7635915 (473.15 it/sec) -training >> step=4149700, episode=692 reward=0.7707979 (514.03 it/sec) -training >> step=4149800, episode=692 reward=0.7864909 (532.79 it/sec) -training >> step=4149900, episode=692 reward=0.7874456 (544.43 it/sec) -training >> step=4150000, episode=692 reward=0.7688065 (560.38 it/sec) -training >> step=4150100, episode=692 reward=0.762937 (492.83 it/sec) -training >> step=4150200, episode=692 reward=0.7736174 (437.67 it/sec) -training >> step=4150300, episode=692 reward=0.7688109 (551.64 it/sec) -training >> step=4150400, episode=692 reward=0.7551841 (525.85 it/sec) -training >> step=4150500, episode=692 reward=0.763136 (536.37 it/sec) -training >> step=4150600, episode=692 reward=0.7863731 (514.47 it/sec) -training >> step=4150700, episode=692 reward=0.773502 (496.50 it/sec) -training >> step=4150800, episode=692 reward=0.789242 (545.17 it/sec) -training >> step=4150900, episode=692 reward=0.7730157 (500.70 it/sec) -training >> step=4151000, episode=692 reward=0.7723069 (554.62 it/sec) -training >> step=4151100, episode=692 reward=0.7673821 (526.50 it/sec) -training >> step=4151200, episode=692 reward=0.7757778 (465.41 it/sec) -training >> step=4151300, episode=693 reward=0.7577628 (113.19 it/sec) -training >> step=4151400, episode=693 reward=0.763546 (502.61 it/sec) -training >> step=4151500, episode=693 reward=0.7846213 (510.51 it/sec) -training >> step=4151600, episode=693 reward=0.7720364 (468.63 it/sec) -training >> step=4151700, episode=693 reward=0.7950248 (537.95 it/sec) -training >> step=4151800, episode=693 reward=0.7805236 (505.70 it/sec) -training >> step=4151900, episode=693 reward=0.7872343 (398.12 it/sec) -training >> step=4152000, episode=693 reward=0.7849696 (481.10 it/sec) -training >> step=4152100, episode=693 reward=0.7765319 (488.02 it/sec) -training >> step=4152200, episode=693 reward=0.7707084 (454.01 it/sec) -training >> step=4152300, episode=693 reward=0.7818312 (465.13 it/sec) -training >> step=4152400, episode=693 reward=0.7731276 (436.63 it/sec) -training >> step=4152500, episode=693 reward=0.7764421 (484.65 it/sec) -training >> step=4152600, episode=693 reward=0.779119 (482.91 it/sec) -training >> step=4152700, episode=693 reward=0.7892702 (467.67 it/sec) -training >> step=4152800, episode=693 reward=0.7821953 (489.76 it/sec) -training >> step=4152900, episode=693 reward=0.7814599 (496.31 it/sec) -training >> step=4153000, episode=693 reward=0.7903636 (519.06 it/sec) -training >> step=4153100, episode=693 reward=0.7867286 (507.36 it/sec) -training >> step=4153200, episode=693 reward=0.7570239 (529.57 it/sec) -training >> step=4153300, episode=693 reward=0.7939969 (440.67 it/sec) -training >> step=4153400, episode=693 reward=0.7474645 (499.22 it/sec) -training >> step=4153500, episode=693 reward=0.746807 (509.03 it/sec) -training >> step=4153600, episode=693 reward=0.7740836 (512.99 it/sec) -training >> step=4153700, episode=693 reward=0.788124 (478.12 it/sec) -training >> step=4153800, episode=693 reward=0.7600577 (417.95 it/sec) -training >> step=4153900, episode=693 reward=0.7946647 (479.41 it/sec) -training >> step=4154000, episode=693 reward=0.7706166 (382.65 it/sec) -training >> step=4154100, episode=693 reward=0.7674392 (537.88 it/sec) -training >> step=4154200, episode=693 reward=0.7687606 (515.95 it/sec) -training >> step=4154300, episode=693 reward=0.7975816 (484.59 it/sec) -training >> step=4154400, episode=693 reward=0.7810559 (484.00 it/sec) -training >> step=4154500, episode=693 reward=0.7730588 (511.64 it/sec) -training >> step=4154600, episode=693 reward=0.7786666 (514.43 it/sec) -training >> step=4154700, episode=693 reward=0.7826161 (580.78 it/sec) -training >> step=4154800, episode=693 reward=0.7811304 (463.30 it/sec) -training >> step=4154900, episode=693 reward=0.7849738 (418.09 it/sec) -training >> step=4155000, episode=693 reward=0.7834464 (504.44 it/sec) -training >> step=4155100, episode=693 reward=0.7733485 (485.38 it/sec) -training >> step=4155200, episode=693 reward=0.7793813 (504.56 it/sec) -training >> step=4155300, episode=693 reward=0.7760924 (488.80 it/sec) -training >> step=4155400, episode=693 reward=0.7768869 (493.84 it/sec) -training >> step=4155500, episode=693 reward=0.761399 (492.26 it/sec) -training >> step=4155600, episode=693 reward=0.7805966 (506.79 it/sec) -training >> step=4155700, episode=693 reward=0.7937894 (489.89 it/sec) -training >> step=4155800, episode=693 reward=0.752711 (465.73 it/sec) -training >> step=4155900, episode=693 reward=0.787491 (476.42 it/sec) -training >> step=4156000, episode=693 reward=0.7830595 (494.94 it/sec) -training >> step=4156100, episode=693 reward=0.7726181 (529.94 it/sec) -training >> step=4156200, episode=693 reward=0.7546823 (492.98 it/sec) -training >> step=4156300, episode=693 reward=0.7756565 (458.02 it/sec) -training >> step=4156400, episode=693 reward=0.7575669 (464.28 it/sec) -training >> step=4156500, episode=693 reward=0.7635244 (488.87 it/sec) -training >> step=4156600, episode=693 reward=0.773742 (512.44 it/sec) -training >> step=4156700, episode=693 reward=0.7582963 (461.39 it/sec) -training >> step=4156800, episode=693 reward=0.7675998 (497.80 it/sec) -training >> step=4156900, episode=693 reward=0.7792718 (446.14 it/sec) -training >> step=4157000, episode=693 reward=0.7789407 (442.18 it/sec) -training >> step=4157100, episode=693 reward=0.7512317 (485.30 it/sec) -training >> step=4157200, episode=693 reward=0.7505221 (524.72 it/sec) -training >> step=4157300, episode=694 reward=0.764326 (111.65 it/sec) -training >> step=4157400, episode=694 reward=0.7776548 (472.50 it/sec) -training >> step=4157500, episode=694 reward=0.7680298 (528.01 it/sec) -training >> step=4157600, episode=694 reward=0.7699119 (505.81 it/sec) -training >> step=4157700, episode=694 reward=0.7659608 (493.41 it/sec) -training >> step=4157800, episode=694 reward=0.7936702 (506.92 it/sec) -training >> step=4157900, episode=694 reward=0.7645991 (512.48 it/sec) -training >> step=4158000, episode=694 reward=0.7696154 (497.71 it/sec) -training >> step=4158100, episode=694 reward=0.7669001 (426.78 it/sec) -training >> step=4158200, episode=694 reward=0.7633861 (520.90 it/sec) -training >> step=4158300, episode=694 reward=0.7799713 (535.90 it/sec) -training >> step=4158400, episode=694 reward=0.7731375 (497.92 it/sec) -training >> step=4158500, episode=694 reward=0.7755758 (532.05 it/sec) -training >> step=4158600, episode=694 reward=0.7821798 (498.02 it/sec) -training >> step=4158700, episode=694 reward=0.7707793 (510.10 it/sec) -training >> step=4158800, episode=694 reward=0.7633485 (511.88 it/sec) -training >> step=4158900, episode=694 reward=0.7569596 (507.16 it/sec) -training >> step=4159000, episode=694 reward=0.8062604 (526.19 it/sec) -training >> step=4159100, episode=694 reward=0.7295828 (505.45 it/sec) -training >> step=4159200, episode=694 reward=0.7898998 (490.01 it/sec) -training >> step=4159300, episode=694 reward=0.7782198 (513.39 it/sec) -training >> step=4159400, episode=694 reward=0.7497053 (539.37 it/sec) -training >> step=4159500, episode=694 reward=0.7849287 (517.77 it/sec) -training >> step=4159600, episode=694 reward=0.7737534 (486.42 it/sec) -training >> step=4159700, episode=694 reward=0.7889296 (502.02 it/sec) -training >> step=4159800, episode=694 reward=0.7674726 (511.67 it/sec) -training >> step=4159900, episode=694 reward=0.7694448 (472.93 it/sec) -training >> step=4160000, episode=694 reward=0.7815596 (507.73 it/sec) -training >> step=4160100, episode=694 reward=0.7506583 (505.25 it/sec) -training >> step=4160200, episode=694 reward=0.7806077 (340.75 it/sec) -training >> step=4160300, episode=694 reward=0.7882984 (511.49 it/sec) -training >> step=4160400, episode=694 reward=0.7774956 (515.14 it/sec) -training >> step=4160500, episode=694 reward=0.7751105 (497.29 it/sec) -training >> step=4160600, episode=694 reward=0.7712575 (486.39 it/sec) -training >> step=4160700, episode=694 reward=0.7801939 (504.29 it/sec) -training >> step=4160800, episode=694 reward=0.7658715 (525.59 it/sec) -training >> step=4160900, episode=694 reward=0.7568285 (476.84 it/sec) -training >> step=4161000, episode=694 reward=0.7711543 (483.79 it/sec) -training >> step=4161100, episode=694 reward=0.7613196 (491.39 it/sec) -training >> step=4161200, episode=694 reward=0.786062 (504.77 it/sec) -training >> step=4161300, episode=694 reward=0.7573345 (501.21 it/sec) -training >> step=4161400, episode=694 reward=0.7890683 (481.69 it/sec) -training >> step=4161500, episode=694 reward=0.7948508 (511.14 it/sec) -training >> step=4161600, episode=694 reward=0.760381 (453.68 it/sec) -training >> step=4161700, episode=694 reward=0.756251 (485.64 it/sec) -training >> step=4161800, episode=694 reward=0.7710013 (553.75 it/sec) -training >> step=4161900, episode=694 reward=0.7926627 (461.72 it/sec) -training >> step=4162000, episode=694 reward=0.7831892 (481.83 it/sec) -training >> step=4162100, episode=694 reward=0.7969023 (467.53 it/sec) -training >> step=4162200, episode=694 reward=0.7595653 (460.69 it/sec) -training >> step=4162300, episode=694 reward=0.780246 (480.41 it/sec) -training >> step=4162400, episode=694 reward=0.7694436 (500.24 it/sec) -training >> step=4162500, episode=694 reward=0.7837263 (535.17 it/sec) -training >> step=4162600, episode=694 reward=0.7797474 (475.37 it/sec) -training >> step=4162700, episode=694 reward=0.7789962 (499.08 it/sec) -training >> step=4162800, episode=694 reward=0.7420393 (494.27 it/sec) -training >> step=4162900, episode=694 reward=0.7662695 (487.55 it/sec) -training >> step=4163000, episode=694 reward=0.781646 (512.17 it/sec) -training >> step=4163100, episode=694 reward=0.7844983 (468.99 it/sec) -training >> step=4163200, episode=694 reward=0.7771893 (501.77 it/sec) -training >> step=4163300, episode=695 reward=0.7691503 (110.89 it/sec) -training >> step=4163400, episode=695 reward=0.7857946 (382.48 it/sec) -training >> step=4163500, episode=695 reward=0.7696953 (485.08 it/sec) -training >> step=4163600, episode=695 reward=0.7784369 (493.29 it/sec) -training >> step=4163700, episode=695 reward=0.772103 (510.12 it/sec) -training >> step=4163800, episode=695 reward=0.7829806 (458.39 it/sec) -training >> step=4163900, episode=695 reward=0.7675886 (503.46 it/sec) -training >> step=4164000, episode=695 reward=0.7758063 (505.03 it/sec) -training >> step=4164100, episode=695 reward=0.7817169 (472.68 it/sec) -training >> step=4164200, episode=695 reward=0.7533334 (521.99 it/sec) -training >> step=4164300, episode=695 reward=0.7688061 (462.47 it/sec) -training >> step=4164400, episode=695 reward=0.7600565 (503.43 it/sec) -training >> step=4164500, episode=695 reward=0.7768339 (522.53 it/sec) -training >> step=4164600, episode=695 reward=0.772372 (541.42 it/sec) -training >> step=4164700, episode=695 reward=0.7823817 (499.33 it/sec) -training >> step=4164800, episode=695 reward=0.7651529 (459.92 it/sec) -training >> step=4164900, episode=695 reward=0.7540385 (522.97 it/sec) -training >> step=4165000, episode=695 reward=0.7667632 (538.88 it/sec) -training >> step=4165100, episode=695 reward=0.7718067 (477.53 it/sec) -training >> step=4165200, episode=695 reward=0.7779415 (509.86 it/sec) -training >> step=4165300, episode=695 reward=0.7724762 (476.47 it/sec) -training >> step=4165400, episode=695 reward=0.7835575 (498.87 it/sec) -training >> step=4165500, episode=695 reward=0.7768432 (518.24 it/sec) -training >> step=4165600, episode=695 reward=0.7773783 (471.41 it/sec) -training >> step=4165700, episode=695 reward=0.779312 (502.29 it/sec) -training >> step=4165800, episode=695 reward=0.7912132 (458.86 it/sec) -training >> step=4165900, episode=695 reward=0.7610651 (450.56 it/sec) -training >> step=4166000, episode=695 reward=0.7801316 (491.71 it/sec) -training >> step=4166100, episode=695 reward=0.7726678 (505.39 it/sec) -training >> step=4166200, episode=695 reward=0.7819759 (502.88 it/sec) -training >> step=4166300, episode=695 reward=0.7700422 (312.71 it/sec) -training >> step=4166400, episode=695 reward=0.7702664 (526.86 it/sec) -training >> step=4166500, episode=695 reward=0.7846963 (470.96 it/sec) -training >> step=4166600, episode=695 reward=0.7842065 (483.87 it/sec) -training >> step=4166700, episode=695 reward=0.7532727 (510.40 it/sec) -training >> step=4166800, episode=695 reward=0.7712606 (449.67 it/sec) -training >> step=4166900, episode=695 reward=0.770553 (499.10 it/sec) -training >> step=4167000, episode=695 reward=0.7648495 (435.57 it/sec) -training >> step=4167100, episode=695 reward=0.7658961 (475.30 it/sec) -training >> step=4167200, episode=695 reward=0.7796673 (510.39 it/sec) -training >> step=4167300, episode=695 reward=0.7679839 (500.54 it/sec) -training >> step=4167400, episode=695 reward=0.7885712 (513.36 it/sec) -training >> step=4167500, episode=695 reward=0.7858174 (526.95 it/sec) -training >> step=4167600, episode=695 reward=0.7574168 (500.54 it/sec) -training >> step=4167700, episode=695 reward=0.7649346 (390.86 it/sec) -training >> step=4167800, episode=695 reward=0.743775 (519.09 it/sec) -training >> step=4167900, episode=695 reward=0.7723544 (536.78 it/sec) -training >> step=4168000, episode=695 reward=0.7910746 (496.78 it/sec) -training >> step=4168100, episode=695 reward=0.7919645 (508.60 it/sec) -training >> step=4168200, episode=695 reward=0.7606838 (517.88 it/sec) -training >> step=4168300, episode=695 reward=0.7808301 (463.49 it/sec) -training >> step=4168400, episode=695 reward=0.7760936 (528.69 it/sec) -training >> step=4168500, episode=695 reward=0.7528058 (490.25 it/sec) -training >> step=4168600, episode=695 reward=0.7657194 (542.44 it/sec) -training >> step=4168700, episode=695 reward=0.7631597 (489.90 it/sec) -training >> step=4168800, episode=695 reward=0.7907389 (472.21 it/sec) -training >> step=4168900, episode=695 reward=0.7534609 (480.84 it/sec) -training >> step=4169000, episode=695 reward=0.7852044 (507.95 it/sec) -training >> step=4169100, episode=695 reward=0.7631297 (507.00 it/sec) -training >> step=4169200, episode=695 reward=0.7623172 (480.93 it/sec) -training >> step=4169300, episode=696 reward=0.7619957 (115.31 it/sec) -training >> step=4169400, episode=696 reward=0.7767198 (504.54 it/sec) -training >> step=4169500, episode=696 reward=0.7669697 (431.16 it/sec) -training >> step=4169600, episode=696 reward=0.7804978 (511.03 it/sec) -training >> step=4169700, episode=696 reward=0.7750977 (493.61 it/sec) -training >> step=4169800, episode=696 reward=0.7790127 (500.70 it/sec) -training >> step=4169900, episode=696 reward=0.7738965 (497.10 it/sec) -training >> step=4170000, episode=696 reward=0.7853859 (473.30 it/sec) -training >> step=4170100, episode=696 reward=0.7884695 (467.07 it/sec) -training >> step=4170200, episode=696 reward=0.7920442 (484.04 it/sec) -training >> step=4170300, episode=696 reward=0.7696927 (463.07 it/sec) -training >> step=4170400, episode=696 reward=0.780308 (496.70 it/sec) -training >> step=4170500, episode=696 reward=0.7873782 (440.07 it/sec) -training >> step=4170600, episode=696 reward=0.7853225 (440.99 it/sec) -training >> step=4170700, episode=696 reward=0.759917 (456.35 it/sec) -training >> step=4170800, episode=696 reward=0.7972782 (448.39 it/sec) -training >> step=4170900, episode=696 reward=0.7805988 (465.51 it/sec) -training >> step=4171000, episode=696 reward=0.7627877 (462.00 it/sec) -training >> step=4171100, episode=696 reward=0.7954963 (448.59 it/sec) -training >> step=4171200, episode=696 reward=0.7566701 (470.29 it/sec) -training >> step=4171300, episode=696 reward=0.7860625 (387.80 it/sec) -training >> step=4171400, episode=696 reward=0.7549117 (464.83 it/sec) -training >> step=4171500, episode=696 reward=0.7609667 (490.51 it/sec) -training >> step=4171600, episode=696 reward=0.7709447 (493.04 it/sec) -training >> step=4171700, episode=696 reward=0.7771146 (481.04 it/sec) -training >> step=4171800, episode=696 reward=0.7701588 (515.23 it/sec) -training >> step=4171900, episode=696 reward=0.7823703 (423.86 it/sec) -training >> step=4172000, episode=696 reward=0.7510757 (487.59 it/sec) -training >> step=4172100, episode=696 reward=0.7676688 (494.98 it/sec) -training >> step=4172200, episode=696 reward=0.7949987 (496.27 it/sec) -training >> step=4172300, episode=696 reward=0.7982208 (482.42 it/sec) -training >> step=4172400, episode=696 reward=0.772186 (428.12 it/sec) -training >> step=4172500, episode=696 reward=0.7800978 (366.28 it/sec) -training >> step=4172600, episode=696 reward=0.7737942 (474.74 it/sec) -training >> step=4172700, episode=696 reward=0.7712359 (452.08 it/sec) -training >> step=4172800, episode=696 reward=0.7957533 (452.91 it/sec) -training >> step=4172900, episode=696 reward=0.7716321 (487.73 it/sec) -training >> step=4173000, episode=696 reward=0.7708138 (435.18 it/sec) -training >> step=4173100, episode=696 reward=0.7816862 (419.85 it/sec) -training >> step=4173200, episode=696 reward=0.7661387 (467.20 it/sec) -training >> step=4173300, episode=696 reward=0.7563957 (442.28 it/sec) -training >> step=4173400, episode=696 reward=0.7763228 (443.23 it/sec) -training >> step=4173500, episode=696 reward=0.7836499 (461.22 it/sec) -training >> step=4173600, episode=696 reward=0.7749779 (473.53 it/sec) -training >> step=4173700, episode=696 reward=0.7660203 (515.31 it/sec) -training >> step=4173800, episode=696 reward=0.7919773 (455.65 it/sec) -training >> step=4173900, episode=696 reward=0.7816926 (436.22 it/sec) -training >> step=4174000, episode=696 reward=0.7657778 (474.58 it/sec) -training >> step=4174100, episode=696 reward=0.7640842 (477.55 it/sec) -training >> step=4174200, episode=696 reward=0.7672718 (480.01 it/sec) -training >> step=4174300, episode=696 reward=0.7834609 (448.23 it/sec) -training >> step=4174400, episode=696 reward=0.7888986 (507.83 it/sec) -training >> step=4174500, episode=696 reward=0.7753651 (518.65 it/sec) -training >> step=4174600, episode=696 reward=0.7548382 (515.12 it/sec) -training >> step=4174700, episode=696 reward=0.7816038 (529.11 it/sec) -training >> step=4174800, episode=696 reward=0.7594926 (473.55 it/sec) -training >> step=4174900, episode=696 reward=0.775061 (517.11 it/sec) -training >> step=4175000, episode=696 reward=0.7712178 (539.79 it/sec) -training >> step=4175100, episode=696 reward=0.7472482 (520.90 it/sec) -training >> step=4175200, episode=696 reward=0.7511229 (530.30 it/sec) -training >> step=4175300, episode=697 reward=0.7726669 (65.43 it/sec) -training >> step=4175400, episode=697 reward=0.7612785 (515.81 it/sec) -training >> step=4175500, episode=697 reward=0.7838005 (464.50 it/sec) -training >> step=4175600, episode=697 reward=0.8017136 (570.16 it/sec) -training >> step=4175700, episode=697 reward=0.787833 (485.74 it/sec) -training >> step=4175800, episode=697 reward=0.7778887 (525.80 it/sec) -training >> step=4175900, episode=697 reward=0.7753776 (484.64 it/sec) -training >> step=4176000, episode=697 reward=0.7729616 (525.08 it/sec) -training >> step=4176100, episode=697 reward=0.7626319 (487.06 it/sec) -training >> step=4176200, episode=697 reward=0.7888639 (513.83 it/sec) -training >> step=4176300, episode=697 reward=0.7925375 (489.85 it/sec) -training >> step=4176400, episode=697 reward=0.7809343 (476.17 it/sec) -training >> step=4176500, episode=697 reward=0.7771651 (474.50 it/sec) -training >> step=4176600, episode=697 reward=0.7571002 (518.93 it/sec) -training >> step=4176700, episode=697 reward=0.7654538 (453.93 it/sec) -training >> step=4176800, episode=697 reward=0.7863035 (455.69 it/sec) -training >> step=4176900, episode=697 reward=0.7635292 (462.64 it/sec) -training >> step=4177000, episode=697 reward=0.7713397 (489.13 it/sec) -training >> step=4177100, episode=697 reward=0.7636576 (492.29 it/sec) -training >> step=4177200, episode=697 reward=0.7796049 (524.79 it/sec) -training >> step=4177300, episode=697 reward=0.7605338 (492.40 it/sec) -training >> step=4177400, episode=697 reward=0.7826047 (540.28 it/sec) -training >> step=4177500, episode=697 reward=0.7747176 (539.89 it/sec) -training >> step=4177600, episode=697 reward=0.7991834 (534.79 it/sec) -training >> step=4177700, episode=697 reward=0.7813699 (545.12 it/sec) -training >> step=4177800, episode=697 reward=0.7810326 (493.68 it/sec) -training >> step=4177900, episode=697 reward=0.7746758 (495.92 it/sec) -training >> step=4178000, episode=697 reward=0.7680206 (548.77 it/sec) -training >> step=4178100, episode=697 reward=0.7852897 (497.51 it/sec) -training >> step=4178200, episode=697 reward=0.7919356 (528.35 it/sec) -training >> step=4178300, episode=697 reward=0.7643576 (523.94 it/sec) -training >> step=4178400, episode=697 reward=0.7914341 (541.81 it/sec) -training >> step=4178500, episode=697 reward=0.7903634 (514.11 it/sec) -training >> step=4178600, episode=697 reward=0.7629712 (365.52 it/sec) -training >> step=4178700, episode=697 reward=0.7401258 (507.75 it/sec) -training >> step=4178800, episode=697 reward=0.776299 (530.92 it/sec) -training >> step=4178900, episode=697 reward=0.7822756 (509.47 it/sec) -training >> step=4179000, episode=697 reward=0.7754771 (531.29 it/sec) -training >> step=4179100, episode=697 reward=0.7778365 (527.22 it/sec) -training >> step=4179200, episode=697 reward=0.7597408 (490.87 it/sec) -training >> step=4179300, episode=697 reward=0.776751 (537.86 it/sec) -training >> step=4179400, episode=697 reward=0.7813297 (476.96 it/sec) -training >> step=4179500, episode=697 reward=0.7747945 (535.88 it/sec) -training >> step=4179600, episode=697 reward=0.7921404 (518.55 it/sec) -training >> step=4179700, episode=697 reward=0.7875099 (517.08 it/sec) -training >> step=4179800, episode=697 reward=0.7960964 (521.08 it/sec) -training >> step=4179900, episode=697 reward=0.7652245 (523.06 it/sec) -training >> step=4180000, episode=697 reward=0.7664233 (519.27 it/sec) -training >> step=4180100, episode=697 reward=0.7443948 (537.65 it/sec) -training >> step=4180200, episode=697 reward=0.7910901 (555.85 it/sec) -training >> step=4180300, episode=697 reward=0.7750631 (497.80 it/sec) -training >> step=4180400, episode=697 reward=0.7725272 (526.03 it/sec) -training >> step=4180500, episode=697 reward=0.7631692 (499.30 it/sec) -training >> step=4180600, episode=697 reward=0.7597589 (578.72 it/sec) -training >> step=4180700, episode=697 reward=0.7820024 (546.68 it/sec) -training >> step=4180800, episode=697 reward=0.7706603 (507.76 it/sec) -training >> step=4180900, episode=697 reward=0.7942729 (566.28 it/sec) -training >> step=4181000, episode=697 reward=0.7596546 (495.07 it/sec) -training >> step=4181100, episode=697 reward=0.7531183 (512.57 it/sec) -training >> step=4181200, episode=697 reward=0.7696888 (555.75 it/sec) -training >> step=4181300, episode=698 reward=0.7792149 (49.50 it/sec) -training >> step=4181400, episode=698 reward=0.7782729 (509.38 it/sec) -training >> step=4181500, episode=698 reward=0.7723424 (510.68 it/sec) -training >> step=4181600, episode=698 reward=0.7800176 (520.24 it/sec) -training >> step=4181700, episode=698 reward=0.7628137 (518.56 it/sec) -training >> step=4181800, episode=698 reward=0.786044 (545.80 it/sec) -training >> step=4181900, episode=698 reward=0.7797403 (556.77 it/sec) -training >> step=4182000, episode=698 reward=0.7672892 (491.20 it/sec) -training >> step=4182100, episode=698 reward=0.7842043 (544.71 it/sec) -training >> step=4182200, episode=698 reward=0.7782791 (519.11 it/sec) -training >> step=4182300, episode=698 reward=0.7570213 (536.77 it/sec) -training >> step=4182400, episode=698 reward=0.7598232 (532.13 it/sec) -training >> step=4182500, episode=698 reward=0.7634003 (537.52 it/sec) -training >> step=4182600, episode=698 reward=0.7821469 (548.35 it/sec) -training >> step=4182700, episode=698 reward=0.7861679 (500.54 it/sec) -training >> step=4182800, episode=698 reward=0.7698296 (510.29 it/sec) -training >> step=4182900, episode=698 reward=0.782326 (554.08 it/sec) -training >> step=4183000, episode=698 reward=0.7755167 (559.82 it/sec) -training >> step=4183100, episode=698 reward=0.7816105 (545.87 it/sec) -training >> step=4183200, episode=698 reward=0.7977827 (470.18 it/sec) -training >> step=4183300, episode=698 reward=0.7494349 (500.04 it/sec) -training >> step=4183400, episode=698 reward=0.7922567 (500.00 it/sec) -training >> step=4183500, episode=698 reward=0.7807546 (542.78 it/sec) -training >> step=4183600, episode=698 reward=0.7661874 (553.25 it/sec) -training >> step=4183700, episode=698 reward=0.7798492 (548.59 it/sec) -training >> step=4183800, episode=698 reward=0.7764609 (472.92 it/sec) -training >> step=4183900, episode=698 reward=0.7721021 (523.57 it/sec) -training >> step=4184000, episode=698 reward=0.8040205 (544.75 it/sec) -training >> step=4184100, episode=698 reward=0.7819713 (521.74 it/sec) -training >> step=4184200, episode=698 reward=0.7720177 (530.10 it/sec) -training >> step=4184300, episode=698 reward=0.7833371 (494.84 it/sec) -training >> step=4184400, episode=698 reward=0.7627732 (526.46 it/sec) -training >> step=4184500, episode=698 reward=0.7901916 (511.21 it/sec) -training >> step=4184600, episode=698 reward=0.7822717 (489.61 it/sec) -training >> step=4184700, episode=698 reward=0.768819 (506.93 it/sec) -training >> step=4184800, episode=698 reward=0.7755593 (384.44 it/sec) -training >> step=4184900, episode=698 reward=0.7565985 (485.91 it/sec) -training >> step=4185000, episode=698 reward=0.788771 (527.87 it/sec) -training >> step=4185100, episode=698 reward=0.7634277 (479.21 it/sec) -training >> step=4185200, episode=698 reward=0.7442012 (509.40 it/sec) -training >> step=4185300, episode=698 reward=0.7809291 (461.19 it/sec) -training >> step=4185400, episode=698 reward=0.7950967 (514.35 it/sec) -training >> step=4185500, episode=698 reward=0.7943573 (489.64 it/sec) -training >> step=4185600, episode=698 reward=0.7812221 (382.86 it/sec) -training >> step=4185700, episode=698 reward=0.7858413 (510.41 it/sec) -training >> step=4185800, episode=698 reward=0.7818571 (449.87 it/sec) -training >> step=4185900, episode=698 reward=0.7872577 (492.71 it/sec) -training >> step=4186000, episode=698 reward=0.7739949 (510.16 it/sec) -training >> step=4186100, episode=698 reward=0.755922 (538.84 it/sec) -training >> step=4186200, episode=698 reward=0.7433504 (481.17 it/sec) -training >> step=4186300, episode=698 reward=0.7651216 (483.09 it/sec) -training >> step=4186400, episode=698 reward=0.7586316 (496.07 it/sec) -training >> step=4186500, episode=698 reward=0.7877963 (526.26 it/sec) -training >> step=4186600, episode=698 reward=0.7699287 (483.37 it/sec) -training >> step=4186700, episode=698 reward=0.7553772 (503.79 it/sec) -training >> step=4186800, episode=698 reward=0.7669016 (418.80 it/sec) -training >> step=4186900, episode=698 reward=0.7800377 (500.97 it/sec) -training >> step=4187000, episode=698 reward=0.7861285 (482.80 it/sec) -training >> step=4187100, episode=698 reward=0.8016372 (518.95 it/sec) -training >> step=4187200, episode=698 reward=0.761767 (545.19 it/sec) -training >> step=4187300, episode=699 reward=0.7635829 (49.76 it/sec) -training >> step=4187400, episode=699 reward=0.758665 (451.60 it/sec) -training >> step=4187500, episode=699 reward=0.7352462 (483.72 it/sec) -training >> step=4187600, episode=699 reward=0.7578384 (452.45 it/sec) -training >> step=4187700, episode=699 reward=0.7907734 (488.92 it/sec) -training >> step=4187800, episode=699 reward=0.7740888 (500.84 it/sec) -training >> step=4187900, episode=699 reward=0.7810298 (449.17 it/sec) -training >> step=4188000, episode=699 reward=0.7601312 (428.37 it/sec) -training >> step=4188100, episode=699 reward=0.7673818 (519.16 it/sec) -training >> step=4188200, episode=699 reward=0.7788989 (514.53 it/sec) -training >> step=4188300, episode=699 reward=0.749315 (499.88 it/sec) -training >> step=4188400, episode=699 reward=0.7678152 (487.52 it/sec) -training >> step=4188500, episode=699 reward=0.7716362 (483.06 it/sec) -training >> step=4188600, episode=699 reward=0.7836077 (481.66 it/sec) -training >> step=4188700, episode=699 reward=0.7718515 (537.77 it/sec) -training >> step=4188800, episode=699 reward=0.7917301 (512.92 it/sec) -training >> step=4188900, episode=699 reward=0.7785367 (465.14 it/sec) -training >> step=4189000, episode=699 reward=0.7502442 (482.56 it/sec) -training >> step=4189100, episode=699 reward=0.7952986 (506.79 it/sec) -training >> step=4189200, episode=699 reward=0.7701328 (452.64 it/sec) -training >> step=4189300, episode=699 reward=0.7803097 (513.17 it/sec) -training >> step=4189400, episode=699 reward=0.7750987 (454.93 it/sec) -training >> step=4189500, episode=699 reward=0.7784525 (516.52 it/sec) -training >> step=4189600, episode=699 reward=0.7773585 (498.42 it/sec) -training >> step=4189700, episode=699 reward=0.7743809 (453.37 it/sec) -training >> step=4189800, episode=699 reward=0.7776434 (511.64 it/sec) -training >> step=4189900, episode=699 reward=0.7735085 (471.35 it/sec) -training >> step=4190000, episode=699 reward=0.7909698 (497.93 it/sec) -training >> step=4190100, episode=699 reward=0.7772006 (522.16 it/sec) -training >> step=4190200, episode=699 reward=0.7666023 (501.12 it/sec) -training >> step=4190300, episode=699 reward=0.8015574 (485.44 it/sec) -training >> step=4190400, episode=699 reward=0.7785853 (466.85 it/sec) -training >> step=4190500, episode=699 reward=0.7787468 (497.01 it/sec) -training >> step=4190600, episode=699 reward=0.7734301 (436.67 it/sec) -training >> step=4190700, episode=699 reward=0.7833142 (374.84 it/sec) -training >> step=4190800, episode=699 reward=0.8006905 (524.28 it/sec) -training >> step=4190900, episode=699 reward=0.7785743 (476.06 it/sec) -training >> step=4191000, episode=699 reward=0.7654468 (441.96 it/sec) -training >> step=4191100, episode=699 reward=0.7755901 (529.67 it/sec) -training >> step=4191200, episode=699 reward=0.7625117 (523.97 it/sec) -training >> step=4191300, episode=699 reward=0.7929674 (564.62 it/sec) -training >> step=4191400, episode=699 reward=0.7773621 (482.19 it/sec) -training >> step=4191500, episode=699 reward=0.7711629 (485.18 it/sec) -training >> step=4191600, episode=699 reward=0.7792063 (508.92 it/sec) -training >> step=4191700, episode=699 reward=0.769466 (475.57 it/sec) -training >> step=4191800, episode=699 reward=0.7962373 (482.62 it/sec) -training >> step=4191900, episode=699 reward=0.7772467 (423.76 it/sec) -training >> step=4192000, episode=699 reward=0.7667444 (432.92 it/sec) -training >> step=4192100, episode=699 reward=0.7842157 (393.60 it/sec) -training >> step=4192200, episode=699 reward=0.7658345 (490.46 it/sec) -training >> step=4192300, episode=699 reward=0.7631505 (478.26 it/sec) -training >> step=4192400, episode=699 reward=0.7638121 (463.74 it/sec) -training >> step=4192500, episode=699 reward=0.7717742 (474.91 it/sec) -training >> step=4192600, episode=699 reward=0.7837727 (452.41 it/sec) -training >> step=4192700, episode=699 reward=0.7863677 (452.98 it/sec) -training >> step=4192800, episode=699 reward=0.7764336 (452.79 it/sec) -training >> step=4192900, episode=699 reward=0.7610565 (425.50 it/sec) -training >> step=4193000, episode=699 reward=0.7964894 (475.38 it/sec) -training >> step=4193100, episode=699 reward=0.7823802 (525.19 it/sec) -training >> step=4193200, episode=699 reward=0.7724627 (424.61 it/sec) -training >> step=4193300, episode=700 reward=0.7840174 (65.32 it/sec) -training >> step=4193400, episode=700 reward=0.7554803 (458.40 it/sec) -training >> step=4193500, episode=700 reward=0.7587255 (489.20 it/sec) -training >> step=4193600, episode=700 reward=0.7728311 (496.94 it/sec) -training >> step=4193700, episode=700 reward=0.7969805 (453.30 it/sec) -training >> step=4193800, episode=700 reward=0.7719582 (453.39 it/sec) -training >> step=4193900, episode=700 reward=0.7797278 (461.64 it/sec) -training >> step=4194000, episode=700 reward=0.7808826 (491.62 it/sec) -training >> step=4194100, episode=700 reward=0.7785658 (449.36 it/sec) -training >> step=4194200, episode=700 reward=0.7728641 (457.21 it/sec) -training >> step=4194300, episode=700 reward=0.7978937 (507.42 it/sec) -training >> step=4194400, episode=700 reward=0.7808574 (440.51 it/sec) -training >> step=4194500, episode=700 reward=0.7901537 (462.50 it/sec) -training >> step=4194600, episode=700 reward=0.7587656 (445.53 it/sec) -training >> step=4194700, episode=700 reward=0.7862171 (452.78 it/sec) -training >> step=4194800, episode=700 reward=0.7623536 (503.08 it/sec) -training >> step=4194900, episode=700 reward=0.7688853 (509.55 it/sec) -training >> step=4195000, episode=700 reward=0.7716171 (462.69 it/sec) -training >> step=4195100, episode=700 reward=0.7874089 (437.57 it/sec) -training >> step=4195200, episode=700 reward=0.7729157 (446.81 it/sec) -training >> step=4195300, episode=700 reward=0.7759592 (493.13 it/sec) -training >> step=4195400, episode=700 reward=0.777702 (446.24 it/sec) -training >> step=4195500, episode=700 reward=0.7856203 (460.02 it/sec) -training >> step=4195600, episode=700 reward=0.7777463 (429.55 it/sec) -training >> step=4195700, episode=700 reward=0.7954382 (396.42 it/sec) -training >> step=4195800, episode=700 reward=0.7810512 (444.34 it/sec) -training >> step=4195900, episode=700 reward=0.7780948 (481.17 it/sec) -training >> step=4196000, episode=700 reward=0.7784725 (480.46 it/sec) -training >> step=4196100, episode=700 reward=0.7841185 (416.02 it/sec) -training >> step=4196200, episode=700 reward=0.79084 (431.37 it/sec) -training >> step=4196300, episode=700 reward=0.7655668 (459.23 it/sec) -training >> step=4196400, episode=700 reward=0.776696 (473.57 it/sec) -training >> step=4196500, episode=700 reward=0.7849734 (425.77 it/sec) -training >> step=4196600, episode=700 reward=0.7821638 (424.34 it/sec) -training >> step=4196700, episode=700 reward=0.7857939 (462.80 it/sec) -training >> step=4196800, episode=700 reward=0.7661798 (342.18 it/sec) -training >> step=4196900, episode=700 reward=0.780003 (448.37 it/sec) -training >> step=4197000, episode=700 reward=0.7840658 (411.61 it/sec) -training >> step=4197100, episode=700 reward=0.7753677 (448.31 it/sec) -training >> step=4197200, episode=700 reward=0.7879854 (473.01 it/sec) -training >> step=4197300, episode=700 reward=0.755783 (431.31 it/sec) -training >> step=4197400, episode=700 reward=0.7718089 (418.52 it/sec) -training >> step=4197500, episode=700 reward=0.7684075 (369.51 it/sec) -training >> step=4197600, episode=700 reward=0.7857651 (450.57 it/sec) -training >> step=4197700, episode=700 reward=0.7480643 (465.55 it/sec) -training >> step=4197800, episode=700 reward=0.7954023 (460.15 it/sec) -training >> step=4197900, episode=700 reward=0.7723649 (498.68 it/sec) -training >> step=4198000, episode=700 reward=0.7733366 (468.30 it/sec) -training >> step=4198100, episode=700 reward=0.7743346 (496.53 it/sec) -training >> step=4198200, episode=700 reward=0.7690959 (472.31 it/sec) -training >> step=4198300, episode=700 reward=0.7878519 (469.83 it/sec) -training >> step=4198400, episode=700 reward=0.7738512 (453.13 it/sec) -training >> step=4198500, episode=700 reward=0.7757193 (508.22 it/sec) -training >> step=4198600, episode=700 reward=0.7665425 (492.06 it/sec) -training >> step=4198700, episode=700 reward=0.7897047 (457.13 it/sec) -training >> step=4198800, episode=700 reward=0.7790951 (464.18 it/sec) -training >> step=4198900, episode=700 reward=0.7679334 (463.70 it/sec) -training >> step=4199000, episode=700 reward=0.7686092 (502.79 it/sec) -training >> step=4199100, episode=700 reward=0.7758335 (509.39 it/sec) -training >> step=4199200, episode=700 reward=0.7777722 (507.48 it/sec) -training >> step=4199300, episode=701 reward=0.7489563 (90.77 it/sec) -training >> step=4199400, episode=701 reward=0.7543661 (469.27 it/sec) -training >> step=4199500, episode=701 reward=0.7655361 (477.43 it/sec) -training >> step=4199600, episode=701 reward=0.7883143 (514.72 it/sec) -training >> step=4199700, episode=701 reward=0.7726223 (493.34 it/sec) -training >> step=4199800, episode=701 reward=0.7856646 (484.68 it/sec) -training >> step=4199900, episode=701 reward=0.792073 (493.19 it/sec) -training >> step=4200000, episode=701 reward=0.7837892 (443.76 it/sec) -training >> step=4200100, episode=701 reward=0.7846004 (501.06 it/sec) -training >> step=4200200, episode=701 reward=0.7564107 (472.81 it/sec) -training >> step=4200300, episode=701 reward=0.7793321 (510.90 it/sec) -training >> step=4200400, episode=701 reward=0.7764459 (490.19 it/sec) -training >> step=4200500, episode=701 reward=0.7763507 (459.74 it/sec) -training >> step=4200600, episode=701 reward=0.7931504 (498.45 it/sec) -training >> step=4200700, episode=701 reward=0.7594255 (479.85 it/sec) -training >> step=4200800, episode=701 reward=0.7811654 (489.37 it/sec) -training >> step=4200900, episode=701 reward=0.7668195 (465.17 it/sec) -training >> step=4201000, episode=701 reward=0.7674689 (460.18 it/sec) -training >> step=4201100, episode=701 reward=0.7774467 (465.10 it/sec) -training >> step=4201200, episode=701 reward=0.7814689 (500.96 it/sec) -training >> step=4201300, episode=701 reward=0.7795849 (481.72 it/sec) -training >> step=4201400, episode=701 reward=0.7684929 (524.27 it/sec) -training >> step=4201500, episode=701 reward=0.7709528 (495.34 it/sec) -training >> step=4201600, episode=701 reward=0.7865832 (528.71 it/sec) -training >> step=4201700, episode=701 reward=0.7778835 (523.86 it/sec) -training >> step=4201800, episode=701 reward=0.7756505 (477.64 it/sec) -training >> step=4201900, episode=701 reward=0.7685658 (496.77 it/sec) -training >> step=4202000, episode=701 reward=0.771244 (488.91 it/sec) -training >> step=4202100, episode=701 reward=0.7715759 (521.02 it/sec) -training >> step=4202200, episode=701 reward=0.7778829 (526.59 it/sec) -training >> step=4202300, episode=701 reward=0.7647409 (508.78 it/sec) -training >> step=4202400, episode=701 reward=0.7893427 (507.35 it/sec) -training >> step=4202500, episode=701 reward=0.7843108 (471.87 it/sec) -training >> step=4202600, episode=701 reward=0.7646548 (484.88 it/sec) -training >> step=4202700, episode=701 reward=0.7784888 (487.82 it/sec) -training >> step=4202800, episode=701 reward=0.7770113 (538.08 it/sec) -training >> step=4202900, episode=701 reward=0.7774637 (470.37 it/sec) -training >> step=4203000, episode=701 reward=0.7594184 (422.95 it/sec) -training >> step=4203100, episode=701 reward=0.787157 (379.00 it/sec) -training >> step=4203200, episode=701 reward=0.7747146 (536.35 it/sec) -training >> step=4203300, episode=701 reward=0.7928851 (476.54 it/sec) -training >> step=4203400, episode=701 reward=0.776378 (506.56 it/sec) -training >> step=4203500, episode=701 reward=0.7556642 (489.00 it/sec) -training >> step=4203600, episode=701 reward=0.794921 (490.35 it/sec) -training >> step=4203700, episode=701 reward=0.7723736 (499.80 it/sec) -training >> step=4203800, episode=701 reward=0.7759797 (460.04 it/sec) -training >> step=4203900, episode=701 reward=0.7766553 (518.97 it/sec) -training >> step=4204000, episode=701 reward=0.7629282 (469.86 it/sec) -training >> step=4204100, episode=701 reward=0.7752011 (507.92 it/sec) -training >> step=4204200, episode=701 reward=0.7746657 (530.96 it/sec) -training >> step=4204300, episode=701 reward=0.7691406 (510.15 it/sec) -training >> step=4204400, episode=701 reward=0.7721363 (504.77 it/sec) -training >> step=4204500, episode=701 reward=0.7586815 (473.25 it/sec) -training >> step=4204600, episode=701 reward=0.7745566 (510.03 it/sec) -training >> step=4204700, episode=701 reward=0.7698401 (523.19 it/sec) -training >> step=4204800, episode=701 reward=0.7829993 (493.66 it/sec) -training >> step=4204900, episode=701 reward=0.7511186 (507.36 it/sec) -training >> step=4205000, episode=701 reward=0.7599605 (517.85 it/sec) -training >> step=4205100, episode=701 reward=0.774187 (492.87 it/sec) -training >> step=4205200, episode=701 reward=0.7707431 (464.95 it/sec) -training >> step=4205300, episode=702 reward=0.7876938 (95.81 it/sec) -training >> step=4205400, episode=702 reward=0.7623762 (485.25 it/sec) -training >> step=4205500, episode=702 reward=0.7897076 (443.45 it/sec) -training >> step=4205600, episode=702 reward=0.7862128 (497.54 it/sec) -training >> step=4205700, episode=702 reward=0.7745133 (515.77 it/sec) -training >> step=4205800, episode=702 reward=0.7779995 (442.06 it/sec) -training >> step=4205900, episode=702 reward=0.790541 (499.17 it/sec) -training >> step=4206000, episode=702 reward=0.7838054 (519.91 it/sec) -training >> step=4206100, episode=702 reward=0.7743086 (467.41 it/sec) -training >> step=4206200, episode=702 reward=0.7814432 (501.12 it/sec) -training >> step=4206300, episode=702 reward=0.7759279 (482.65 it/sec) -training >> step=4206400, episode=702 reward=0.7640407 (480.34 it/sec) -training >> step=4206500, episode=702 reward=0.7986687 (496.89 it/sec) -training >> step=4206600, episode=702 reward=0.7954817 (508.15 it/sec) -training >> step=4206700, episode=702 reward=0.7740655 (526.76 it/sec) -training >> step=4206800, episode=702 reward=0.7813929 (508.79 it/sec) -training >> step=4206900, episode=702 reward=0.7684743 (513.39 it/sec) -training >> step=4207000, episode=702 reward=0.7574036 (493.96 it/sec) -training >> step=4207100, episode=702 reward=0.7521446 (477.53 it/sec) -training >> step=4207200, episode=702 reward=0.8060502 (496.04 it/sec) -training >> step=4207300, episode=702 reward=0.7895321 (501.85 it/sec) -training >> step=4207400, episode=702 reward=0.7981064 (525.67 it/sec) -training >> step=4207500, episode=702 reward=0.7631327 (492.68 it/sec) -training >> step=4207600, episode=702 reward=0.7522211 (498.31 it/sec) -training >> step=4207700, episode=702 reward=0.7770616 (505.35 it/sec) -training >> step=4207800, episode=702 reward=0.7712096 (505.77 it/sec) -training >> step=4207900, episode=702 reward=0.7912223 (487.45 it/sec) -training >> step=4208000, episode=702 reward=0.7705892 (493.52 it/sec) -training >> step=4208100, episode=702 reward=0.7561099 (530.40 it/sec) -training >> step=4208200, episode=702 reward=0.7770717 (493.48 it/sec) -training >> step=4208300, episode=702 reward=0.7646372 (488.32 it/sec) -training >> step=4208400, episode=702 reward=0.7721496 (504.83 it/sec) -training >> step=4208500, episode=702 reward=0.7839915 (491.39 it/sec) -training >> step=4208600, episode=702 reward=0.7630049 (495.97 it/sec) -training >> step=4208700, episode=702 reward=0.7603362 (454.07 it/sec) -training >> step=4208800, episode=702 reward=0.7796044 (518.13 it/sec) -training >> step=4208900, episode=702 reward=0.7657753 (513.19 it/sec) -training >> step=4209000, episode=702 reward=0.7807097 (486.06 it/sec) -training >> step=4209100, episode=702 reward=0.7768568 (368.35 it/sec) -training >> step=4209200, episode=702 reward=0.7631356 (504.10 it/sec) -training >> step=4209300, episode=702 reward=0.781342 (498.66 it/sec) -training >> step=4209400, episode=702 reward=0.7665733 (505.04 it/sec) -training >> step=4209500, episode=702 reward=0.7749726 (496.90 it/sec) -training >> step=4209600, episode=702 reward=0.769636 (516.69 it/sec) -training >> step=4209700, episode=702 reward=0.778038 (475.53 it/sec) -training >> step=4209800, episode=702 reward=0.79447 (531.96 it/sec) -training >> step=4209900, episode=702 reward=0.7804004 (493.07 it/sec) -training >> step=4210000, episode=702 reward=0.7809101 (476.64 it/sec) -training >> step=4210100, episode=702 reward=0.7888713 (500.05 it/sec) -training >> step=4210200, episode=702 reward=0.7760823 (481.66 it/sec) -training >> step=4210300, episode=702 reward=0.7612611 (546.89 it/sec) -training >> step=4210400, episode=702 reward=0.7645243 (496.64 it/sec) -training >> step=4210500, episode=702 reward=0.7534926 (435.13 it/sec) -training >> step=4210600, episode=702 reward=0.7710516 (516.99 it/sec) -training >> step=4210700, episode=702 reward=0.7784705 (476.65 it/sec) -training >> step=4210800, episode=702 reward=0.7462971 (513.73 it/sec) -training >> step=4210900, episode=702 reward=0.7826686 (503.41 it/sec) -training >> step=4211000, episode=702 reward=0.7952107 (477.20 it/sec) -training >> step=4211100, episode=702 reward=0.7734365 (485.64 it/sec) -training >> step=4211200, episode=702 reward=0.7846737 (463.20 it/sec) -training >> step=4211300, episode=703 reward=0.7929568 (86.12 it/sec) -training >> step=4211400, episode=703 reward=0.7966807 (468.32 it/sec) -training >> step=4211500, episode=703 reward=0.7556941 (466.95 it/sec) -training >> step=4211600, episode=703 reward=0.7780276 (503.25 it/sec) -training >> step=4211700, episode=703 reward=0.7691982 (519.74 it/sec) -training >> step=4211800, episode=703 reward=0.7650162 (489.63 it/sec) -training >> step=4211900, episode=703 reward=0.7638485 (501.01 it/sec) -training >> step=4212000, episode=703 reward=0.7780891 (506.34 it/sec) -training >> step=4212100, episode=703 reward=0.7711043 (467.78 it/sec) -training >> step=4212200, episode=703 reward=0.7727626 (503.64 it/sec) -training >> step=4212300, episode=703 reward=0.7737656 (522.86 it/sec) -training >> step=4212400, episode=703 reward=0.7938361 (503.38 it/sec) -training >> step=4212500, episode=703 reward=0.7738714 (473.35 it/sec) -training >> step=4212600, episode=703 reward=0.7808761 (512.58 it/sec) -training >> step=4212700, episode=703 reward=0.7867945 (497.46 it/sec) -training >> step=4212800, episode=703 reward=0.7856767 (494.87 it/sec) -training >> step=4212900, episode=703 reward=0.7890954 (517.63 it/sec) -training >> step=4213000, episode=703 reward=0.7566293 (531.79 it/sec) -training >> step=4213100, episode=703 reward=0.7845145 (497.61 it/sec) -training >> step=4213200, episode=703 reward=0.775044 (494.77 it/sec) -training >> step=4213300, episode=703 reward=0.7905188 (500.02 it/sec) -training >> step=4213400, episode=703 reward=0.7736464 (520.66 it/sec) -training >> step=4213500, episode=703 reward=0.7662551 (496.96 it/sec) -training >> step=4213600, episode=703 reward=0.7911757 (458.29 it/sec) -training >> step=4213700, episode=703 reward=0.7532683 (470.68 it/sec) -training >> step=4213800, episode=703 reward=0.7586447 (511.14 it/sec) -training >> step=4213900, episode=703 reward=0.7894411 (486.66 it/sec) -training >> step=4214000, episode=703 reward=0.7867663 (476.42 it/sec) -training >> step=4214100, episode=703 reward=0.7856687 (506.02 it/sec) -training >> step=4214200, episode=703 reward=0.793119 (493.08 it/sec) -training >> step=4214300, episode=703 reward=0.7670252 (500.27 it/sec) -training >> step=4214400, episode=703 reward=0.7648886 (495.65 it/sec) -training >> step=4214500, episode=703 reward=0.7713976 (456.35 it/sec) -training >> step=4214600, episode=703 reward=0.7774698 (440.79 it/sec) -training >> step=4214700, episode=703 reward=0.7766988 (497.72 it/sec) -training >> step=4214800, episode=703 reward=0.7957944 (485.80 it/sec) -training >> step=4214900, episode=703 reward=0.7915025 (497.97 it/sec) -training >> step=4215000, episode=703 reward=0.764208 (450.64 it/sec) -training >> step=4215100, episode=703 reward=0.8037986 (498.26 it/sec) -training >> step=4215200, episode=703 reward=0.7707824 (499.41 it/sec) -training >> step=4215300, episode=703 reward=0.7849559 (354.75 it/sec) -training >> step=4215400, episode=703 reward=0.7725767 (518.16 it/sec) -training >> step=4215500, episode=703 reward=0.7750148 (484.02 it/sec) -training >> step=4215600, episode=703 reward=0.7888342 (441.39 it/sec) -training >> step=4215700, episode=703 reward=0.7838863 (465.95 it/sec) -training >> step=4215800, episode=703 reward=0.7794434 (473.66 it/sec) -training >> step=4215900, episode=703 reward=0.7715251 (488.91 it/sec) -training >> step=4216000, episode=703 reward=0.7753536 (432.97 it/sec) -training >> step=4216100, episode=703 reward=0.7765248 (504.31 it/sec) -training >> step=4216200, episode=703 reward=0.7634526 (454.03 it/sec) -training >> step=4216300, episode=703 reward=0.7801058 (517.82 it/sec) -training >> step=4216400, episode=703 reward=0.7620093 (500.26 it/sec) -training >> step=4216500, episode=703 reward=0.7757969 (466.57 it/sec) -training >> step=4216600, episode=703 reward=0.7545194 (501.81 it/sec) -training >> step=4216700, episode=703 reward=0.769776 (540.75 it/sec) -training >> step=4216800, episode=703 reward=0.7512914 (498.36 it/sec) -training >> step=4216900, episode=703 reward=0.7324601 (524.57 it/sec) -training >> step=4217000, episode=703 reward=0.7759168 (543.46 it/sec) -training >> step=4217100, episode=703 reward=0.7703191 (479.06 it/sec) -training >> step=4217200, episode=703 reward=0.7643897 (522.35 it/sec) -training >> step=4217300, episode=704 reward=0.7851573 (90.47 it/sec) -training >> step=4217400, episode=704 reward=0.7732244 (500.08 it/sec) -training >> step=4217500, episode=704 reward=0.7582902 (504.95 it/sec) -training >> step=4217600, episode=704 reward=0.7875564 (515.33 it/sec) -training >> step=4217700, episode=704 reward=0.76175 (484.23 it/sec) -training >> step=4217800, episode=704 reward=0.7818844 (491.49 it/sec) -training >> step=4217900, episode=704 reward=0.7762415 (496.39 it/sec) -training >> step=4218000, episode=704 reward=0.8125181 (519.34 it/sec) -training >> step=4218100, episode=704 reward=0.7771761 (496.42 it/sec) -training >> step=4218200, episode=704 reward=0.7856152 (487.12 it/sec) -training >> step=4218300, episode=704 reward=0.7636232 (485.83 it/sec) -training >> step=4218400, episode=704 reward=0.7744703 (490.67 it/sec) -training >> step=4218500, episode=704 reward=0.7707936 (476.08 it/sec) -training >> step=4218600, episode=704 reward=0.7763984 (480.63 it/sec) -training >> step=4218700, episode=704 reward=0.764556 (476.22 it/sec) -training >> step=4218800, episode=704 reward=0.7639807 (494.47 it/sec) -training >> step=4218900, episode=704 reward=0.7849603 (514.86 it/sec) -training >> step=4219000, episode=704 reward=0.7609627 (474.56 it/sec) -training >> step=4219100, episode=704 reward=0.7785964 (457.57 it/sec) -training >> step=4219200, episode=704 reward=0.7862718 (453.29 it/sec) -training >> step=4219300, episode=704 reward=0.7663648 (510.05 it/sec) -training >> step=4219400, episode=704 reward=0.7774202 (450.49 it/sec) -training >> step=4219500, episode=704 reward=0.7714313 (497.48 it/sec) -training >> step=4219600, episode=704 reward=0.7783091 (460.11 it/sec) -training >> step=4219700, episode=704 reward=0.7885726 (448.54 it/sec) -training >> step=4219800, episode=704 reward=0.7718989 (510.66 it/sec) -training >> step=4219900, episode=704 reward=0.7830279 (436.07 it/sec) -training >> step=4220000, episode=704 reward=0.8067923 (466.46 it/sec) -training >> step=4220100, episode=704 reward=0.7834142 (497.96 it/sec) -training >> step=4220200, episode=704 reward=0.7695746 (466.71 it/sec) -training >> step=4220300, episode=704 reward=0.7691497 (480.98 it/sec) -training >> step=4220400, episode=704 reward=0.8081861 (505.64 it/sec) -training >> step=4220500, episode=704 reward=0.7672229 (481.66 it/sec) -training >> step=4220600, episode=704 reward=0.7542061 (464.19 it/sec) -training >> step=4220700, episode=704 reward=0.7818922 (433.74 it/sec) -training >> step=4220800, episode=704 reward=0.7706097 (464.36 it/sec) -training >> step=4220900, episode=704 reward=0.7716573 (489.62 it/sec) -training >> step=4221000, episode=704 reward=0.7714325 (480.43 it/sec) -training >> step=4221100, episode=704 reward=0.7668021 (449.51 it/sec) -training >> step=4221200, episode=704 reward=0.7682252 (443.03 it/sec) -training >> step=4221300, episode=704 reward=0.7672642 (496.72 it/sec) -training >> step=4221400, episode=704 reward=0.7919925 (454.51 it/sec) -training >> step=4221500, episode=704 reward=0.7579448 (415.39 it/sec) -training >> step=4221600, episode=704 reward=0.7633348 (378.27 it/sec) -training >> step=4221700, episode=704 reward=0.7708458 (518.83 it/sec) -training >> step=4221800, episode=704 reward=0.7713098 (463.77 it/sec) -training >> step=4221900, episode=704 reward=0.766686 (472.35 it/sec) -training >> step=4222000, episode=704 reward=0.7733102 (452.34 it/sec) -training >> step=4222100, episode=704 reward=0.7659381 (481.45 it/sec) -training >> step=4222200, episode=704 reward=0.7801423 (494.29 it/sec) -training >> step=4222300, episode=704 reward=0.7697774 (504.23 it/sec) -training >> step=4222400, episode=704 reward=0.7713145 (474.27 it/sec) -training >> step=4222500, episode=704 reward=0.7226628 (471.30 it/sec) -training >> step=4222600, episode=704 reward=0.7778255 (498.36 it/sec) -training >> step=4222700, episode=704 reward=0.7774496 (490.18 it/sec) -training >> step=4222800, episode=704 reward=0.7708848 (498.66 it/sec) -training >> step=4222900, episode=704 reward=0.772472 (468.18 it/sec) -training >> step=4223000, episode=704 reward=0.7776249 (502.88 it/sec) -training >> step=4223100, episode=704 reward=0.7592971 (460.33 it/sec) -training >> step=4223200, episode=704 reward=0.7638611 (453.69 it/sec) -training >> step=4223300, episode=705 reward=0.7453796 (54.65 it/sec) -training >> step=4223400, episode=705 reward=0.7849666 (476.85 it/sec) -training >> step=4223500, episode=705 reward=0.7770215 (460.10 it/sec) -training >> step=4223600, episode=705 reward=0.7774547 (501.71 it/sec) -training >> step=4223700, episode=705 reward=0.7762222 (464.70 it/sec) -training >> step=4223800, episode=705 reward=0.7674953 (507.68 it/sec) -training >> step=4223900, episode=705 reward=0.7954398 (521.89 it/sec) -training >> step=4224000, episode=705 reward=0.7884853 (510.70 it/sec) -training >> step=4224100, episode=705 reward=0.7690135 (478.38 it/sec) -training >> step=4224200, episode=705 reward=0.7716722 (514.19 it/sec) -training >> step=4224300, episode=705 reward=0.7842241 (430.44 it/sec) -training >> step=4224400, episode=705 reward=0.7464252 (492.74 it/sec) -training >> step=4224500, episode=705 reward=0.7430124 (481.15 it/sec) -training >> step=4224600, episode=705 reward=0.7825304 (525.19 it/sec) -training >> step=4224700, episode=705 reward=0.7682871 (485.05 it/sec) -training >> step=4224800, episode=705 reward=0.7805836 (474.97 it/sec) -training >> step=4224900, episode=705 reward=0.7641105 (485.97 it/sec) -training >> step=4225000, episode=705 reward=0.7916256 (502.36 it/sec) -training >> step=4225100, episode=705 reward=0.7747195 (503.22 it/sec) -training >> step=4225200, episode=705 reward=0.7617544 (453.75 it/sec) -training >> step=4225300, episode=705 reward=0.7928979 (487.47 it/sec) -training >> step=4225400, episode=705 reward=0.7986669 (472.17 it/sec) -training >> step=4225500, episode=705 reward=0.7858555 (478.12 it/sec) -training >> step=4225600, episode=705 reward=0.7812141 (493.38 it/sec) -training >> step=4225700, episode=705 reward=0.7879042 (490.47 it/sec) -training >> step=4225800, episode=705 reward=0.7638838 (382.31 it/sec) -training >> step=4225900, episode=705 reward=0.7629139 (459.93 it/sec) -training >> step=4226000, episode=705 reward=0.7670629 (515.57 it/sec) -training >> step=4226100, episode=705 reward=0.7729494 (483.85 it/sec) -training >> step=4226200, episode=705 reward=0.7980339 (455.55 it/sec) -training >> step=4226300, episode=705 reward=0.7736307 (500.55 it/sec) -training >> step=4226400, episode=705 reward=0.7802099 (506.59 it/sec) -training >> step=4226500, episode=705 reward=0.7973197 (489.26 it/sec) -training >> step=4226600, episode=705 reward=0.7720736 (498.86 it/sec) -training >> step=4226700, episode=705 reward=0.772051 (514.77 it/sec) -training >> step=4226800, episode=705 reward=0.7682285 (478.51 it/sec) -training >> step=4226900, episode=705 reward=0.7607549 (474.13 it/sec) -training >> step=4227000, episode=705 reward=0.7631109 (494.28 it/sec) -training >> step=4227100, episode=705 reward=0.7600638 (487.85 it/sec) -training >> step=4227200, episode=705 reward=0.7427462 (476.71 it/sec) -training >> step=4227300, episode=705 reward=0.764563 (439.48 it/sec) -training >> step=4227400, episode=705 reward=0.7422894 (456.61 it/sec) -training >> step=4227500, episode=705 reward=0.7712558 (508.35 it/sec) -training >> step=4227600, episode=705 reward=0.7461417 (487.95 it/sec) -training >> step=4227700, episode=705 reward=0.7721146 (466.78 it/sec) -training >> step=4227800, episode=705 reward=0.762403 (521.21 it/sec) -training >> step=4227900, episode=705 reward=0.7782971 (334.51 it/sec) -training >> step=4228000, episode=705 reward=0.7781407 (490.53 it/sec) -training >> step=4228100, episode=705 reward=0.7687837 (476.52 it/sec) -training >> step=4228200, episode=705 reward=0.7526518 (487.81 it/sec) -training >> step=4228300, episode=705 reward=0.7851248 (466.69 it/sec) -training >> step=4228400, episode=705 reward=0.739362 (494.59 it/sec) -training >> step=4228500, episode=705 reward=0.7812085 (503.99 it/sec) -training >> step=4228600, episode=705 reward=0.7734007 (497.14 it/sec) -training >> step=4228700, episode=705 reward=0.7910065 (472.13 it/sec) -training >> step=4228800, episode=705 reward=0.7911791 (471.55 it/sec) -training >> step=4228900, episode=705 reward=0.7833475 (495.75 it/sec) -training >> step=4229000, episode=705 reward=0.7625287 (448.40 it/sec) -training >> step=4229100, episode=705 reward=0.7688032 (442.61 it/sec) -training >> step=4229200, episode=705 reward=0.77246 (509.89 it/sec) -training >> step=4229300, episode=706 reward=0.7718857 (40.10 it/sec) -training >> step=4229400, episode=706 reward=0.7912754 (483.26 it/sec) -training >> step=4229500, episode=706 reward=0.7574224 (498.98 it/sec) -training >> step=4229600, episode=706 reward=0.7673991 (476.68 it/sec) -training >> step=4229700, episode=706 reward=0.7845778 (490.47 it/sec) -training >> step=4229800, episode=706 reward=0.787203 (500.66 it/sec) -training >> step=4229900, episode=706 reward=0.7816104 (501.36 it/sec) -training >> step=4230000, episode=706 reward=0.7937139 (516.11 it/sec) -training >> step=4230100, episode=706 reward=0.7923592 (495.49 it/sec) -training >> step=4230200, episode=706 reward=0.7857068 (492.31 it/sec) -training >> step=4230300, episode=706 reward=0.780091 (473.44 it/sec) -training >> step=4230400, episode=706 reward=0.7513012 (486.64 it/sec) -training >> step=4230500, episode=706 reward=0.774074 (506.76 it/sec) -training >> step=4230600, episode=706 reward=0.7841156 (493.94 it/sec) -training >> step=4230700, episode=706 reward=0.7801814 (514.67 it/sec) -training >> step=4230800, episode=706 reward=0.7679061 (503.06 it/sec) -training >> step=4230900, episode=706 reward=0.7741036 (475.27 it/sec) -training >> step=4231000, episode=706 reward=0.8104606 (483.24 it/sec) -training >> step=4231100, episode=706 reward=0.7838438 (458.06 it/sec) -training >> step=4231200, episode=706 reward=0.7778494 (473.12 it/sec) -training >> step=4231300, episode=706 reward=0.7902798 (487.36 it/sec) -training >> step=4231400, episode=706 reward=0.7725461 (548.26 it/sec) -training >> step=4231500, episode=706 reward=0.7895508 (478.54 it/sec) -training >> step=4231600, episode=706 reward=0.7871183 (495.99 it/sec) -training >> step=4231700, episode=706 reward=0.7681382 (486.94 it/sec) -training >> step=4231800, episode=706 reward=0.766709 (501.12 it/sec) -training >> step=4231900, episode=706 reward=0.7762142 (481.22 it/sec) -training >> step=4232000, episode=706 reward=0.7872346 (478.26 it/sec) -training >> step=4232100, episode=706 reward=0.7705073 (477.62 it/sec) -training >> step=4232200, episode=706 reward=0.7829842 (481.74 it/sec) -training >> step=4232300, episode=706 reward=0.7637314 (473.42 it/sec) -training >> step=4232400, episode=706 reward=0.7444654 (481.90 it/sec) -training >> step=4232500, episode=706 reward=0.7982612 (526.08 it/sec) -training >> step=4232600, episode=706 reward=0.7656084 (520.50 it/sec) -training >> step=4232700, episode=706 reward=0.7664806 (484.61 it/sec) -training >> step=4232800, episode=706 reward=0.7677431 (470.72 it/sec) -training >> step=4232900, episode=706 reward=0.7897978 (435.26 it/sec) -training >> step=4233000, episode=706 reward=0.7516285 (441.43 it/sec) -training >> step=4233100, episode=706 reward=0.7680143 (485.97 it/sec) -training >> step=4233200, episode=706 reward=0.7945862 (517.19 it/sec) -training >> step=4233300, episode=706 reward=0.765106 (467.73 it/sec) -training >> step=4233400, episode=706 reward=0.7814794 (499.02 it/sec) -training >> step=4233500, episode=706 reward=0.7571383 (535.84 it/sec) -training >> step=4233600, episode=706 reward=0.7768627 (525.48 it/sec) -training >> step=4233700, episode=706 reward=0.7886481 (491.63 it/sec) -training >> step=4233800, episode=706 reward=0.7827432 (521.56 it/sec) -training >> step=4233900, episode=706 reward=0.7716789 (500.55 it/sec) -training >> step=4234000, episode=706 reward=0.7701811 (545.40 it/sec) -training >> step=4234100, episode=706 reward=0.7739599 (519.03 it/sec) -training >> step=4234200, episode=706 reward=0.7666473 (390.82 it/sec) -training >> step=4234300, episode=706 reward=0.7727649 (509.44 it/sec) -training >> step=4234400, episode=706 reward=0.7791916 (496.03 it/sec) -training >> step=4234500, episode=706 reward=0.7766993 (492.97 it/sec) -training >> step=4234600, episode=706 reward=0.7806226 (510.54 it/sec) -training >> step=4234700, episode=706 reward=0.7692052 (455.79 it/sec) -training >> step=4234800, episode=706 reward=0.7530472 (506.79 it/sec) -training >> step=4234900, episode=706 reward=0.761182 (498.19 it/sec) -training >> step=4235000, episode=706 reward=0.7715134 (497.28 it/sec) -training >> step=4235100, episode=706 reward=0.7494184 (534.98 it/sec) -training >> step=4235200, episode=706 reward=0.7942589 (514.36 it/sec) -training >> step=4235300, episode=707 reward=0.7714826 (45.80 it/sec) -training >> step=4235400, episode=707 reward=0.7832579 (495.93 it/sec) -training >> step=4235500, episode=707 reward=0.7833855 (513.15 it/sec) -training >> step=4235600, episode=707 reward=0.7652436 (482.70 it/sec) -training >> step=4235700, episode=707 reward=0.7705884 (529.47 it/sec) -training >> step=4235800, episode=707 reward=0.7720077 (539.37 it/sec) -training >> step=4235900, episode=707 reward=0.7712234 (516.68 it/sec) -training >> step=4236000, episode=707 reward=0.7860807 (522.67 it/sec) -training >> step=4236100, episode=707 reward=0.7579267 (529.03 it/sec) -training >> step=4236200, episode=707 reward=0.7622643 (446.26 it/sec) -training >> step=4236300, episode=707 reward=0.7700264 (509.62 it/sec) -training >> step=4236400, episode=707 reward=0.7881551 (464.28 it/sec) -training >> step=4236500, episode=707 reward=0.7716122 (524.31 it/sec) -training >> step=4236600, episode=707 reward=0.7767373 (538.21 it/sec) -training >> step=4236700, episode=707 reward=0.7661244 (518.36 it/sec) -training >> step=4236800, episode=707 reward=0.7812513 (470.34 it/sec) -training >> step=4236900, episode=707 reward=0.7842585 (532.90 it/sec) -training >> step=4237000, episode=707 reward=0.7647465 (502.61 it/sec) -training >> step=4237100, episode=707 reward=0.7618614 (510.95 it/sec) -training >> step=4237200, episode=707 reward=0.7737552 (503.24 it/sec) -training >> step=4237300, episode=707 reward=0.7810087 (530.16 it/sec) -training >> step=4237400, episode=707 reward=0.770945 (476.45 it/sec) -training >> step=4237500, episode=707 reward=0.7619249 (477.02 it/sec) -training >> step=4237600, episode=707 reward=0.7822661 (491.81 it/sec) -training >> step=4237700, episode=707 reward=0.789027 (541.41 it/sec) -training >> step=4237800, episode=707 reward=0.7702629 (502.54 it/sec) -training >> step=4237900, episode=707 reward=0.7721865 (490.93 it/sec) -training >> step=4238000, episode=707 reward=0.7601581 (544.88 it/sec) -training >> step=4238100, episode=707 reward=0.7943767 (505.96 it/sec) -training >> step=4238200, episode=707 reward=0.7705754 (544.59 it/sec) -training >> step=4238300, episode=707 reward=0.7951193 (534.62 it/sec) -training >> step=4238400, episode=707 reward=0.7778851 (534.94 it/sec) -training >> step=4238500, episode=707 reward=0.766704 (502.15 it/sec) -training >> step=4238600, episode=707 reward=0.7702233 (494.78 it/sec) -training >> step=4238700, episode=707 reward=0.7854654 (542.91 it/sec) -training >> step=4238800, episode=707 reward=0.7740132 (515.59 it/sec) -training >> step=4238900, episode=707 reward=0.7525998 (511.27 it/sec) -training >> step=4239000, episode=707 reward=0.773176 (435.01 it/sec) -training >> step=4239100, episode=707 reward=0.7820362 (552.87 it/sec) -training >> step=4239200, episode=707 reward=0.7889442 (510.68 it/sec) -training >> step=4239300, episode=707 reward=0.7818444 (477.44 it/sec) -training >> step=4239400, episode=707 reward=0.7783181 (504.52 it/sec) -training >> step=4239500, episode=707 reward=0.7538698 (532.29 it/sec) -training >> step=4239600, episode=707 reward=0.793263 (526.47 it/sec) -training >> step=4239700, episode=707 reward=0.7823361 (522.36 it/sec) -training >> step=4239800, episode=707 reward=0.7723336 (498.91 it/sec) -training >> step=4239900, episode=707 reward=0.7904045 (482.02 it/sec) -training >> step=4240000, episode=707 reward=0.7531247 (517.35 it/sec) -training >> step=4240100, episode=707 reward=0.7753015 (514.67 it/sec) -training >> step=4240200, episode=707 reward=0.7767029 (533.68 it/sec) -training >> step=4240300, episode=707 reward=0.7805435 (471.99 it/sec) -training >> step=4240400, episode=707 reward=0.7547424 (496.98 it/sec) -training >> step=4240500, episode=707 reward=0.7796964 (524.58 it/sec) -training >> step=4240600, episode=707 reward=0.7496696 (389.26 it/sec) -training >> step=4240700, episode=707 reward=0.775362 (509.25 it/sec) -training >> step=4240800, episode=707 reward=0.7642665 (493.74 it/sec) -training >> step=4240900, episode=707 reward=0.7671815 (498.37 it/sec) -training >> step=4241000, episode=707 reward=0.7654647 (483.06 it/sec) -training >> step=4241100, episode=707 reward=0.7839175 (496.87 it/sec) -training >> step=4241200, episode=707 reward=0.7581723 (512.03 it/sec) -training >> step=4241300, episode=708 reward=0.7895395 (56.22 it/sec) -training >> step=4241400, episode=708 reward=0.7680798 (513.85 it/sec) -training >> step=4241500, episode=708 reward=0.7416977 (511.47 it/sec) -training >> step=4241600, episode=708 reward=0.7855977 (463.95 it/sec) -training >> step=4241700, episode=708 reward=0.7763833 (488.34 it/sec) -training >> step=4241800, episode=708 reward=0.7697361 (519.42 it/sec) -training >> step=4241900, episode=708 reward=0.794484 (540.63 it/sec) -training >> step=4242000, episode=708 reward=0.7648337 (513.82 it/sec) -training >> step=4242100, episode=708 reward=0.7710949 (528.70 it/sec) -training >> step=4242200, episode=708 reward=0.7763763 (501.14 it/sec) -training >> step=4242300, episode=708 reward=0.7860039 (517.95 it/sec) -training >> step=4242400, episode=708 reward=0.7924747 (500.06 it/sec) -training >> step=4242500, episode=708 reward=0.7739964 (515.70 it/sec) -training >> step=4242600, episode=708 reward=0.7826976 (495.48 it/sec) -training >> step=4242700, episode=708 reward=0.7989656 (508.67 it/sec) -training >> step=4242800, episode=708 reward=0.797821 (491.86 it/sec) -training >> step=4242900, episode=708 reward=0.7864291 (493.16 it/sec) -training >> step=4243000, episode=708 reward=0.7794545 (515.25 it/sec) -training >> step=4243100, episode=708 reward=0.7654234 (518.74 it/sec) -training >> step=4243200, episode=708 reward=0.7883217 (529.75 it/sec) -training >> step=4243300, episode=708 reward=0.7490239 (510.93 it/sec) -training >> step=4243400, episode=708 reward=0.782995 (497.14 it/sec) -training >> step=4243500, episode=708 reward=0.7733618 (540.99 it/sec) -training >> step=4243600, episode=708 reward=0.7773632 (467.29 it/sec) -training >> step=4243700, episode=708 reward=0.7776833 (482.63 it/sec) -training >> step=4243800, episode=708 reward=0.7940015 (473.14 it/sec) -training >> step=4243900, episode=708 reward=0.7693228 (467.10 it/sec) -training >> step=4244000, episode=708 reward=0.735656 (494.98 it/sec) -training >> step=4244100, episode=708 reward=0.7686945 (503.20 it/sec) -training >> step=4244200, episode=708 reward=0.7821653 (462.61 it/sec) -training >> step=4244300, episode=708 reward=0.7776614 (511.75 it/sec) -training >> step=4244400, episode=708 reward=0.7458462 (428.41 it/sec) -training >> step=4244500, episode=708 reward=0.7801414 (490.47 it/sec) -training >> step=4244600, episode=708 reward=0.7773376 (466.15 it/sec) -training >> step=4244700, episode=708 reward=0.7780919 (476.28 it/sec) -training >> step=4244800, episode=708 reward=0.770279 (466.33 it/sec) -training >> step=4244900, episode=708 reward=0.7619153 (489.79 it/sec) -training >> step=4245000, episode=708 reward=0.7853236 (525.56 it/sec) -training >> step=4245100, episode=708 reward=0.7734289 (469.69 it/sec) -training >> step=4245200, episode=708 reward=0.7643689 (453.68 it/sec) -training >> step=4245300, episode=708 reward=0.787911 (484.69 it/sec) -training >> step=4245400, episode=708 reward=0.7774157 (454.59 it/sec) -training >> step=4245500, episode=708 reward=0.7703821 (460.42 it/sec) -training >> step=4245600, episode=708 reward=0.7898193 (479.93 it/sec) -training >> step=4245700, episode=708 reward=0.7665802 (496.34 it/sec) -training >> step=4245800, episode=708 reward=0.7564222 (489.55 it/sec) -training >> step=4245900, episode=708 reward=0.7463297 (502.76 it/sec) -training >> step=4246000, episode=708 reward=0.7913612 (480.97 it/sec) -training >> step=4246100, episode=708 reward=0.7563931 (501.09 it/sec) -training >> step=4246200, episode=708 reward=0.7892203 (478.60 it/sec) -training >> step=4246300, episode=708 reward=0.7773879 (501.89 it/sec) -training >> step=4246400, episode=708 reward=0.7693414 (529.75 it/sec) -training >> step=4246500, episode=708 reward=0.7664906 (502.00 it/sec) -training >> step=4246600, episode=708 reward=0.7612747 (509.80 it/sec) -training >> step=4246700, episode=708 reward=0.7512389 (466.82 it/sec) -training >> step=4246800, episode=708 reward=0.768186 (525.11 it/sec) -training >> step=4246900, episode=708 reward=0.7747259 (320.15 it/sec) -training >> step=4247000, episode=708 reward=0.7674851 (454.03 it/sec) -training >> step=4247100, episode=708 reward=0.7654423 (473.55 it/sec) -training >> step=4247200, episode=708 reward=0.7696207 (517.66 it/sec) -training >> step=4247300, episode=709 reward=0.7482995 (50.24 it/sec) -training >> step=4247400, episode=709 reward=0.7753361 (506.75 it/sec) -training >> step=4247500, episode=709 reward=0.7874069 (439.47 it/sec) -training >> step=4247600, episode=709 reward=0.763788 (492.16 it/sec) -training >> step=4247700, episode=709 reward=0.7604937 (512.47 it/sec) -training >> step=4247800, episode=709 reward=0.7640502 (482.71 it/sec) -training >> step=4247900, episode=709 reward=0.7786094 (512.91 it/sec) -training >> step=4248000, episode=709 reward=0.7897726 (436.07 it/sec) -training >> step=4248100, episode=709 reward=0.7727364 (507.10 it/sec) -training >> step=4248200, episode=709 reward=0.7566666 (459.99 it/sec) -training >> step=4248300, episode=709 reward=0.7749582 (516.95 it/sec) -training >> step=4248400, episode=709 reward=0.7760783 (533.11 it/sec) -training >> step=4248500, episode=709 reward=0.7779715 (521.65 it/sec) -training >> step=4248600, episode=709 reward=0.7715619 (476.53 it/sec) -training >> step=4248700, episode=709 reward=0.767742 (503.72 it/sec) -training >> step=4248800, episode=709 reward=0.7773337 (493.32 it/sec) -training >> step=4248900, episode=709 reward=0.7648089 (458.42 it/sec) -training >> step=4249000, episode=709 reward=0.78067 (507.96 it/sec) -training >> step=4249100, episode=709 reward=0.7999581 (526.85 it/sec) -training >> step=4249200, episode=709 reward=0.7679003 (468.78 it/sec) -training >> step=4249300, episode=709 reward=0.7794583 (502.24 it/sec) -training >> step=4249400, episode=709 reward=0.7898164 (474.31 it/sec) -training >> step=4249500, episode=709 reward=0.7681581 (473.13 it/sec) -training >> step=4249600, episode=709 reward=0.7720696 (486.21 it/sec) -training >> step=4249700, episode=709 reward=0.7769775 (493.17 it/sec) -training >> step=4249800, episode=709 reward=0.7893902 (504.10 it/sec) -training >> step=4249900, episode=709 reward=0.7698427 (502.82 it/sec) -training >> step=4250000, episode=709 reward=0.7942511 (454.68 it/sec) -training >> step=4250100, episode=709 reward=0.7670844 (510.36 it/sec) -training >> step=4250200, episode=709 reward=0.7680194 (521.28 it/sec) -training >> step=4250300, episode=709 reward=0.7804624 (484.18 it/sec) -training >> step=4250400, episode=709 reward=0.7708758 (492.76 it/sec) -training >> step=4250500, episode=709 reward=0.7550228 (494.33 it/sec) -training >> step=4250600, episode=709 reward=0.7650236 (496.67 it/sec) -training >> step=4250700, episode=709 reward=0.7727031 (525.02 it/sec) -training >> step=4250800, episode=709 reward=0.7620853 (496.85 it/sec) -training >> step=4250900, episode=709 reward=0.7541031 (530.57 it/sec) -training >> step=4251000, episode=709 reward=0.7686532 (491.18 it/sec) -training >> step=4251100, episode=709 reward=0.7756626 (468.92 it/sec) -training >> step=4251200, episode=709 reward=0.7838131 (513.43 it/sec) -training >> step=4251300, episode=709 reward=0.7773989 (513.49 it/sec) -training >> step=4251400, episode=709 reward=0.7928262 (543.11 it/sec) -training >> step=4251500, episode=709 reward=0.7798868 (502.93 it/sec) -training >> step=4251600, episode=709 reward=0.7569432 (485.26 it/sec) -training >> step=4251700, episode=709 reward=0.7616423 (480.50 it/sec) -training >> step=4251800, episode=709 reward=0.7666728 (496.07 it/sec) -training >> step=4251900, episode=709 reward=0.7909131 (543.92 it/sec) -training >> step=4252000, episode=709 reward=0.7666949 (491.22 it/sec) -training >> step=4252100, episode=709 reward=0.7702128 (518.70 it/sec) -training >> step=4252200, episode=709 reward=0.7586565 (527.84 it/sec) -training >> step=4252300, episode=709 reward=0.7682125 (524.18 it/sec) -training >> step=4252400, episode=709 reward=0.7635077 (483.64 it/sec) -training >> step=4252500, episode=709 reward=0.7616228 (528.25 it/sec) -training >> step=4252600, episode=709 reward=0.767753 (514.71 it/sec) -training >> step=4252700, episode=709 reward=0.7627841 (538.84 it/sec) -training >> step=4252800, episode=709 reward=0.768967 (476.34 it/sec) -training >> step=4252900, episode=709 reward=0.7723794 (505.11 it/sec) -training >> step=4253000, episode=709 reward=0.7748107 (518.78 it/sec) -training >> step=4253100, episode=709 reward=0.7806842 (356.46 it/sec) -training >> step=4253200, episode=709 reward=0.7891176 (533.52 it/sec) -training >> step=4253300, episode=710 reward=0.7679874 (40.43 it/sec) -training >> step=4253400, episode=710 reward=0.7788801 (464.06 it/sec) -training >> step=4253500, episode=710 reward=0.7764754 (502.87 it/sec) -training >> step=4253600, episode=710 reward=0.7821726 (468.96 it/sec) -training >> step=4253700, episode=710 reward=0.7878159 (430.95 it/sec) -training >> step=4253800, episode=710 reward=0.7699078 (535.14 it/sec) -training >> step=4253900, episode=710 reward=0.7795241 (493.21 it/sec) -training >> step=4254000, episode=710 reward=0.7690505 (544.82 it/sec) -training >> step=4254100, episode=710 reward=0.7736368 (518.83 it/sec) -training >> step=4254200, episode=710 reward=0.8010534 (472.52 it/sec) -training >> step=4254300, episode=710 reward=0.7700058 (528.51 it/sec) -training >> step=4254400, episode=710 reward=0.7682824 (513.82 it/sec) -training >> step=4254500, episode=710 reward=0.79029 (511.82 it/sec) -training >> step=4254600, episode=710 reward=0.7809745 (547.97 it/sec) -training >> step=4254700, episode=710 reward=0.7807976 (520.82 it/sec) -training >> step=4254800, episode=710 reward=0.7777646 (499.28 it/sec) -training >> step=4254900, episode=710 reward=0.7737794 (548.84 it/sec) -training >> step=4255000, episode=710 reward=0.7802986 (513.53 it/sec) -training >> step=4255100, episode=710 reward=0.7836752 (523.94 it/sec) -training >> step=4255200, episode=710 reward=0.7875461 (489.44 it/sec) -training >> step=4255300, episode=710 reward=0.7897967 (508.12 it/sec) -training >> step=4255400, episode=710 reward=0.7672718 (505.70 it/sec) -training >> step=4255500, episode=710 reward=0.7756336 (507.52 it/sec) -training >> step=4255600, episode=710 reward=0.7735306 (517.98 it/sec) -training >> step=4255700, episode=710 reward=0.7677345 (517.76 it/sec) -training >> step=4255800, episode=710 reward=0.7907031 (511.66 it/sec) -training >> step=4255900, episode=710 reward=0.7640933 (531.93 it/sec) -training >> step=4256000, episode=710 reward=0.772007 (555.66 it/sec) -training >> step=4256100, episode=710 reward=0.7834717 (515.28 it/sec) -training >> step=4256200, episode=710 reward=0.7912627 (540.79 it/sec) -training >> step=4256300, episode=710 reward=0.7954937 (535.53 it/sec) -training >> step=4256400, episode=710 reward=0.7617527 (554.75 it/sec) -training >> step=4256500, episode=710 reward=0.7866624 (511.62 it/sec) -training >> step=4256600, episode=710 reward=0.7708177 (501.40 it/sec) -training >> step=4256700, episode=710 reward=0.7719492 (539.69 it/sec) -training >> step=4256800, episode=710 reward=0.7729611 (513.61 it/sec) -training >> step=4256900, episode=710 reward=0.7823506 (541.07 it/sec) -training >> step=4257000, episode=710 reward=0.7625176 (520.63 it/sec) -training >> step=4257100, episode=710 reward=0.7949746 (547.49 it/sec) -training >> step=4257200, episode=710 reward=0.7667685 (494.08 it/sec) -training >> step=4257300, episode=710 reward=0.7813169 (539.49 it/sec) -training >> step=4257400, episode=710 reward=0.7773411 (550.37 it/sec) -training >> step=4257500, episode=710 reward=0.7798804 (542.02 it/sec) -training >> step=4257600, episode=710 reward=0.7749951 (533.05 it/sec) -training >> step=4257700, episode=710 reward=0.7708606 (486.42 it/sec) -training >> step=4257800, episode=710 reward=0.761448 (561.72 it/sec) -training >> step=4257900, episode=710 reward=0.7824273 (516.26 it/sec) -training >> step=4258000, episode=710 reward=0.770057 (532.24 it/sec) -training >> step=4258100, episode=710 reward=0.7558014 (515.79 it/sec) -training >> step=4258200, episode=710 reward=0.7935724 (507.85 it/sec) -training >> step=4258300, episode=710 reward=0.7463328 (460.94 it/sec) -training >> step=4258400, episode=710 reward=0.7690463 (464.06 it/sec) -training >> step=4258500, episode=710 reward=0.762186 (504.62 it/sec) -training >> step=4258600, episode=710 reward=0.7627928 (539.89 it/sec) -training >> step=4258700, episode=710 reward=0.7639032 (525.52 it/sec) -training >> step=4258800, episode=710 reward=0.775688 (527.58 it/sec) -training >> step=4258900, episode=710 reward=0.7727476 (509.09 it/sec) -training >> step=4259000, episode=710 reward=0.7698658 (524.29 it/sec) -training >> step=4259100, episode=710 reward=0.7723508 (524.16 it/sec) -training >> step=4259200, episode=710 reward=0.7864211 (384.83 it/sec) -training >> step=4259300, episode=711 reward=0.7644867 (96.37 it/sec) -training >> step=4259400, episode=711 reward=0.7587717 (518.59 it/sec) -training >> step=4259500, episode=711 reward=0.7645052 (500.74 it/sec) -training >> step=4259600, episode=711 reward=0.7570425 (537.55 it/sec) -training >> step=4259700, episode=711 reward=0.7721053 (552.38 it/sec) -training >> step=4259800, episode=711 reward=0.7821202 (529.23 it/sec) -training >> step=4259900, episode=711 reward=0.7758096 (510.29 it/sec) -training >> step=4260000, episode=711 reward=0.8003932 (567.84 it/sec) -training >> step=4260100, episode=711 reward=0.7715112 (519.73 it/sec) -training >> step=4260200, episode=711 reward=0.7943448 (522.40 it/sec) -training >> step=4260300, episode=711 reward=0.7827662 (539.39 it/sec) -training >> step=4260400, episode=711 reward=0.7667422 (511.76 it/sec) -training >> step=4260500, episode=711 reward=0.7838739 (503.71 it/sec) -training >> step=4260600, episode=711 reward=0.7809887 (490.76 it/sec) -training >> step=4260700, episode=711 reward=0.7750198 (535.95 it/sec) -training >> step=4260800, episode=711 reward=0.7759061 (534.31 it/sec) -training >> step=4260900, episode=711 reward=0.7697258 (489.25 it/sec) -training >> step=4261000, episode=711 reward=0.7878469 (527.70 it/sec) -training >> step=4261100, episode=711 reward=0.7731888 (526.04 it/sec) -training >> step=4261200, episode=711 reward=0.7662978 (521.28 it/sec) -training >> step=4261300, episode=711 reward=0.7654008 (514.57 it/sec) -training >> step=4261400, episode=711 reward=0.7749953 (539.18 it/sec) -training >> step=4261500, episode=711 reward=0.7816614 (542.70 it/sec) -training >> step=4261600, episode=711 reward=0.7945098 (522.82 it/sec) -training >> step=4261700, episode=711 reward=0.7856598 (495.84 it/sec) -training >> step=4261800, episode=711 reward=0.7933661 (471.94 it/sec) -training >> step=4261900, episode=711 reward=0.7631598 (477.39 it/sec) -training >> step=4262000, episode=711 reward=0.7878573 (474.06 it/sec) -training >> step=4262100, episode=711 reward=0.7731726 (489.93 it/sec) -training >> step=4262200, episode=711 reward=0.7775841 (531.09 it/sec) -training >> step=4262300, episode=711 reward=0.7829289 (446.52 it/sec) -training >> step=4262400, episode=711 reward=0.7775836 (458.91 it/sec) -training >> step=4262500, episode=711 reward=0.7954543 (478.77 it/sec) -training >> step=4262600, episode=711 reward=0.769375 (499.95 it/sec) -training >> step=4262700, episode=711 reward=0.765725 (477.47 it/sec) -training >> step=4262800, episode=711 reward=0.7676998 (466.38 it/sec) -training >> step=4262900, episode=711 reward=0.7717769 (464.83 it/sec) -training >> step=4263000, episode=711 reward=0.783613 (433.93 it/sec) -training >> step=4263100, episode=711 reward=0.7810941 (425.29 it/sec) -training >> step=4263200, episode=711 reward=0.7783521 (469.01 it/sec) -training >> step=4263300, episode=711 reward=0.7790796 (496.83 it/sec) -training >> step=4263400, episode=711 reward=0.7909697 (468.06 it/sec) -training >> step=4263500, episode=711 reward=0.7628586 (448.66 it/sec) -training >> step=4263600, episode=711 reward=0.7917435 (508.59 it/sec) -training >> step=4263700, episode=711 reward=0.7814574 (458.46 it/sec) -training >> step=4263800, episode=711 reward=0.7861689 (495.02 it/sec) -training >> step=4263900, episode=711 reward=0.7720912 (436.17 it/sec) -training >> step=4264000, episode=711 reward=0.791339 (495.18 it/sec) -training >> step=4264100, episode=711 reward=0.7623218 (458.94 it/sec) -training >> step=4264200, episode=711 reward=0.7736323 (494.32 it/sec) -training >> step=4264300, episode=711 reward=0.7577274 (469.49 it/sec) -training >> step=4264400, episode=711 reward=0.7619579 (484.17 it/sec) -training >> step=4264500, episode=711 reward=0.7846179 (473.96 it/sec) -training >> step=4264600, episode=711 reward=0.7511776 (472.17 it/sec) -training >> step=4264700, episode=711 reward=0.7835878 (489.30 it/sec) -training >> step=4264800, episode=711 reward=0.7503046 (477.84 it/sec) -training >> step=4264900, episode=711 reward=0.7647522 (461.26 it/sec) -training >> step=4265000, episode=711 reward=0.7552583 (484.11 it/sec) -training >> step=4265100, episode=711 reward=0.770373 (501.45 it/sec) -training >> step=4265200, episode=711 reward=0.7568572 (474.55 it/sec) -training >> step=4265300, episode=712 reward=0.7590644 (90.63 it/sec) -training >> step=4265400, episode=712 reward=0.7613698 (466.49 it/sec) -training >> step=4265500, episode=712 reward=0.7690651 (470.77 it/sec) -training >> step=4265600, episode=712 reward=0.766563 (470.62 it/sec) -training >> step=4265700, episode=712 reward=0.7726676 (503.38 it/sec) -training >> step=4265800, episode=712 reward=0.7669355 (499.30 it/sec) -training >> step=4265900, episode=712 reward=0.7883119 (428.40 it/sec) -training >> step=4266000, episode=712 reward=0.7773446 (457.91 it/sec) -training >> step=4266100, episode=712 reward=0.7806056 (483.96 it/sec) -training >> step=4266200, episode=712 reward=0.7817018 (463.74 it/sec) -training >> step=4266300, episode=712 reward=0.7663924 (473.10 it/sec) -training >> step=4266400, episode=712 reward=0.8108352 (500.46 it/sec) -training >> step=4266500, episode=712 reward=0.7904848 (448.83 it/sec) -training >> step=4266600, episode=712 reward=0.7948937 (426.36 it/sec) -training >> step=4266700, episode=712 reward=0.7754803 (473.30 it/sec) -training >> step=4266800, episode=712 reward=0.7805506 (483.74 it/sec) -training >> step=4266900, episode=712 reward=0.7602671 (418.43 it/sec) -training >> step=4267000, episode=712 reward=0.7842135 (500.68 it/sec) -training >> step=4267100, episode=712 reward=0.7744503 (518.94 it/sec) -training >> step=4267200, episode=712 reward=0.7650587 (474.05 it/sec) -training >> step=4267300, episode=712 reward=0.7710755 (510.48 it/sec) -training >> step=4267400, episode=712 reward=0.7948343 (497.29 it/sec) -training >> step=4267500, episode=712 reward=0.7788113 (517.85 it/sec) -training >> step=4267600, episode=712 reward=0.7821209 (500.15 it/sec) -training >> step=4267700, episode=712 reward=0.7742653 (466.01 it/sec) -training >> step=4267800, episode=712 reward=0.7899548 (558.01 it/sec) -training >> step=4267900, episode=712 reward=0.7657005 (465.11 it/sec) -training >> step=4268000, episode=712 reward=0.7979808 (440.54 it/sec) -training >> step=4268100, episode=712 reward=0.7509184 (459.64 it/sec) -training >> step=4268200, episode=712 reward=0.7747193 (518.69 it/sec) -training >> step=4268300, episode=712 reward=0.7738057 (475.10 it/sec) -training >> step=4268400, episode=712 reward=0.7818854 (445.88 it/sec) -training >> step=4268500, episode=712 reward=0.7736703 (482.74 it/sec) -training >> step=4268600, episode=712 reward=0.7669687 (465.78 it/sec) -training >> step=4268700, episode=712 reward=0.7716507 (485.77 it/sec) -training >> step=4268800, episode=712 reward=0.7756898 (463.15 it/sec) -training >> step=4268900, episode=712 reward=0.7425687 (484.85 it/sec) -training >> step=4269000, episode=712 reward=0.7838038 (497.57 it/sec) -training >> step=4269100, episode=712 reward=0.7682393 (466.54 it/sec) -training >> step=4269200, episode=712 reward=0.7759575 (450.23 it/sec) -training >> step=4269300, episode=712 reward=0.7808337 (462.15 it/sec) -training >> step=4269400, episode=712 reward=0.7841544 (468.01 it/sec) -training >> step=4269500, episode=712 reward=0.7750602 (469.07 it/sec) -training >> step=4269600, episode=712 reward=0.7753855 (456.04 it/sec) -training >> step=4269700, episode=712 reward=0.7778692 (464.00 it/sec) -training >> step=4269800, episode=712 reward=0.7800216 (473.61 it/sec) -training >> step=4269900, episode=712 reward=0.7516317 (434.04 it/sec) -training >> step=4270000, episode=712 reward=0.7818684 (500.86 it/sec) -training >> step=4270100, episode=712 reward=0.7626776 (469.33 it/sec) -training >> step=4270200, episode=712 reward=0.7716774 (487.07 it/sec) -training >> step=4270300, episode=712 reward=0.7878494 (472.44 it/sec) -training >> step=4270400, episode=712 reward=0.7557262 (471.50 it/sec) -training >> step=4270500, episode=712 reward=0.7539526 (481.61 it/sec) -training >> step=4270600, episode=712 reward=0.768091 (468.95 it/sec) -training >> step=4270700, episode=712 reward=0.7734866 (491.43 it/sec) -training >> step=4270800, episode=712 reward=0.7714074 (459.14 it/sec) -training >> step=4270900, episode=712 reward=0.7747853 (485.26 it/sec) -training >> step=4271000, episode=712 reward=0.7852359 (492.22 it/sec) -training >> step=4271100, episode=712 reward=0.7811503 (474.55 it/sec) -training >> step=4271200, episode=712 reward=0.7535047 (454.83 it/sec) -training >> step=4271300, episode=713 reward=0.7693375 (68.27 it/sec) -training >> step=4271400, episode=713 reward=0.7650521 (397.10 it/sec) -training >> step=4271500, episode=713 reward=0.7417794 (452.77 it/sec) -training >> step=4271600, episode=713 reward=0.778735 (479.07 it/sec) -training >> step=4271700, episode=713 reward=0.7542771 (475.55 it/sec) -training >> step=4271800, episode=713 reward=0.7500058 (462.68 it/sec) -training >> step=4271900, episode=713 reward=0.7789119 (495.81 it/sec) -training >> step=4272000, episode=713 reward=0.7756345 (493.78 it/sec) -training >> step=4272100, episode=713 reward=0.7535702 (485.00 it/sec) -training >> step=4272200, episode=713 reward=0.7676073 (463.13 it/sec) -training >> step=4272300, episode=713 reward=0.7785114 (505.27 it/sec) -training >> step=4272400, episode=713 reward=0.7772058 (478.54 it/sec) -training >> step=4272500, episode=713 reward=0.7962266 (517.46 it/sec) -training >> step=4272600, episode=713 reward=0.7674111 (485.36 it/sec) -training >> step=4272700, episode=713 reward=0.783784 (468.15 it/sec) -training >> step=4272800, episode=713 reward=0.7664393 (509.76 it/sec) -training >> step=4272900, episode=713 reward=0.7694459 (511.83 it/sec) -training >> step=4273000, episode=713 reward=0.7742526 (490.43 it/sec) -training >> step=4273100, episode=713 reward=0.7822769 (514.45 it/sec) -training >> step=4273200, episode=713 reward=0.7727512 (452.71 it/sec) -training >> step=4273300, episode=713 reward=0.8025231 (499.93 it/sec) -training >> step=4273400, episode=713 reward=0.7728124 (491.12 it/sec) -training >> step=4273500, episode=713 reward=0.7656862 (511.03 it/sec) -training >> step=4273600, episode=713 reward=0.7783172 (477.81 it/sec) -training >> step=4273700, episode=713 reward=0.7499819 (475.11 it/sec) -training >> step=4273800, episode=713 reward=0.7712104 (473.41 it/sec) -training >> step=4273900, episode=713 reward=0.7828059 (474.69 it/sec) -training >> step=4274000, episode=713 reward=0.7837336 (471.23 it/sec) -training >> step=4274100, episode=713 reward=0.7813249 (491.03 it/sec) -training >> step=4274200, episode=713 reward=0.7625287 (487.72 it/sec) -training >> step=4274300, episode=713 reward=0.774512 (476.86 it/sec) -training >> step=4274400, episode=713 reward=0.7832331 (442.28 it/sec) -training >> step=4274500, episode=713 reward=0.7891436 (448.91 it/sec) -training >> step=4274600, episode=713 reward=0.7808957 (491.80 it/sec) -training >> step=4274700, episode=713 reward=0.7719092 (505.57 it/sec) -training >> step=4274800, episode=713 reward=0.8058006 (481.08 it/sec) -training >> step=4274900, episode=713 reward=0.7714545 (493.18 it/sec) -training >> step=4275000, episode=713 reward=0.7706211 (470.08 it/sec) -training >> step=4275100, episode=713 reward=0.7847869 (455.71 it/sec) -training >> step=4275200, episode=713 reward=0.7892194 (471.42 it/sec) -training >> step=4275300, episode=713 reward=0.7560158 (429.10 it/sec) -training >> step=4275400, episode=713 reward=0.7693306 (477.38 it/sec) -training >> step=4275500, episode=713 reward=0.776179 (480.14 it/sec) -training >> step=4275600, episode=713 reward=0.7676388 (440.28 it/sec) -training >> step=4275700, episode=713 reward=0.7848063 (460.47 it/sec) -training >> step=4275800, episode=713 reward=0.7694828 (493.35 it/sec) -training >> step=4275900, episode=713 reward=0.7882733 (499.37 it/sec) -training >> step=4276000, episode=713 reward=0.7689717 (474.86 it/sec) -training >> step=4276100, episode=713 reward=0.7736457 (479.92 it/sec) -training >> step=4276200, episode=713 reward=0.7715948 (476.99 it/sec) -training >> step=4276300, episode=713 reward=0.762048 (436.04 it/sec) -training >> step=4276400, episode=713 reward=0.7705206 (456.15 it/sec) -training >> step=4276500, episode=713 reward=0.7877583 (471.42 it/sec) -training >> step=4276600, episode=713 reward=0.767791 (497.38 it/sec) -training >> step=4276700, episode=713 reward=0.752287 (426.51 it/sec) -training >> step=4276800, episode=713 reward=0.7638685 (480.01 it/sec) -training >> step=4276900, episode=713 reward=0.7631295 (526.74 it/sec) -training >> step=4277000, episode=713 reward=0.7743569 (439.29 it/sec) -training >> step=4277100, episode=713 reward=0.7791932 (471.88 it/sec) -training >> step=4277200, episode=713 reward=0.7660804 (457.64 it/sec) -training >> step=4277300, episode=714 reward=0.7818271 (71.91 it/sec) -training >> step=4277400, episode=714 reward=0.7669103 (460.00 it/sec) -training >> step=4277500, episode=714 reward=0.755791 (509.98 it/sec) -training >> step=4277600, episode=714 reward=0.7666764 (446.91 it/sec) -training >> step=4277700, episode=714 reward=0.7611622 (468.90 it/sec) -training >> step=4277800, episode=714 reward=0.7730035 (451.06 it/sec) -training >> step=4277900, episode=714 reward=0.7806374 (488.07 it/sec) -training >> step=4278000, episode=714 reward=0.7669718 (518.69 it/sec) -training >> step=4278100, episode=714 reward=0.7820075 (456.10 it/sec) -training >> step=4278200, episode=714 reward=0.7957318 (462.73 it/sec) -training >> step=4278300, episode=714 reward=0.7952849 (480.09 it/sec) -training >> step=4278400, episode=714 reward=0.7613083 (487.39 it/sec) -training >> step=4278500, episode=714 reward=0.7944859 (471.41 it/sec) -training >> step=4278600, episode=714 reward=0.7790987 (459.98 it/sec) -training >> step=4278700, episode=714 reward=0.7866735 (434.32 it/sec) -training >> step=4278800, episode=714 reward=0.783223 (471.63 it/sec) -training >> step=4278900, episode=714 reward=0.7771518 (476.29 it/sec) -training >> step=4279000, episode=714 reward=0.756053 (475.81 it/sec) -training >> step=4279100, episode=714 reward=0.7657561 (486.93 it/sec) -training >> step=4279200, episode=714 reward=0.786974 (481.89 it/sec) -training >> step=4279300, episode=714 reward=0.7526649 (447.42 it/sec) -training >> step=4279400, episode=714 reward=0.7875883 (464.18 it/sec) -training >> step=4279500, episode=714 reward=0.777141 (439.84 it/sec) -training >> step=4279600, episode=714 reward=0.8040285 (464.72 it/sec) -training >> step=4279700, episode=714 reward=0.7676332 (432.75 it/sec) -training >> step=4279800, episode=714 reward=0.7887624 (481.03 it/sec) -training >> step=4279900, episode=714 reward=0.7784596 (462.76 it/sec) -training >> step=4280000, episode=714 reward=0.7742955 (458.81 it/sec) -training >> step=4280100, episode=714 reward=0.7737858 (450.95 it/sec) -training >> step=4280200, episode=714 reward=0.7645354 (455.30 it/sec) -training >> step=4280300, episode=714 reward=0.7869563 (455.71 it/sec) -training >> step=4280400, episode=714 reward=0.7861079 (434.21 it/sec) -training >> step=4280500, episode=714 reward=0.7870859 (435.88 it/sec) -training >> step=4280600, episode=714 reward=0.7825862 (435.78 it/sec) -training >> step=4280700, episode=714 reward=0.7875143 (476.59 it/sec) -training >> step=4280800, episode=714 reward=0.7782062 (510.07 it/sec) -training >> step=4280900, episode=714 reward=0.7810819 (496.63 it/sec) -training >> step=4281000, episode=714 reward=0.7839524 (486.42 it/sec) -training >> step=4281100, episode=714 reward=0.7550989 (434.49 it/sec) -training >> step=4281200, episode=714 reward=0.7656428 (455.03 it/sec) -training >> step=4281300, episode=714 reward=0.7651848 (496.89 it/sec) -training >> step=4281400, episode=714 reward=0.7693272 (498.88 it/sec) -training >> step=4281500, episode=714 reward=0.7759259 (451.15 it/sec) -training >> step=4281600, episode=714 reward=0.7716113 (447.02 it/sec) -training >> step=4281700, episode=714 reward=0.7739147 (480.65 it/sec) -training >> step=4281800, episode=714 reward=0.7866683 (442.39 it/sec) -training >> step=4281900, episode=714 reward=0.7730894 (502.75 it/sec) -training >> step=4282000, episode=714 reward=0.7937213 (439.86 it/sec) -training >> step=4282100, episode=714 reward=0.7614068 (475.61 it/sec) -training >> step=4282200, episode=714 reward=0.7637144 (480.79 it/sec) -training >> step=4282300, episode=714 reward=0.7754098 (452.99 it/sec) -training >> step=4282400, episode=714 reward=0.7441376 (445.40 it/sec) -training >> step=4282500, episode=714 reward=0.7614223 (497.90 it/sec) -training >> step=4282600, episode=714 reward=0.7673597 (472.62 it/sec) -training >> step=4282700, episode=714 reward=0.7544797 (489.32 it/sec) -training >> step=4282800, episode=714 reward=0.7704752 (486.31 it/sec) -training >> step=4282900, episode=714 reward=0.7702666 (441.71 it/sec) -training >> step=4283000, episode=714 reward=0.7776021 (469.26 it/sec) -training >> step=4283100, episode=714 reward=0.775536 (459.78 it/sec) -training >> step=4283200, episode=714 reward=0.7612722 (465.38 it/sec) -training >> step=4283300, episode=715 reward=0.7762845 (71.07 it/sec) -training >> step=4283400, episode=715 reward=0.7644973 (491.19 it/sec) -training >> step=4283500, episode=715 reward=0.7720745 (438.56 it/sec) -training >> step=4283600, episode=715 reward=0.7415426 (472.76 it/sec) -training >> step=4283700, episode=715 reward=0.7687299 (453.84 it/sec) -training >> step=4283800, episode=715 reward=0.7732464 (515.66 it/sec) -training >> step=4283900, episode=715 reward=0.7922763 (453.74 it/sec) -training >> step=4284000, episode=715 reward=0.7791861 (440.04 it/sec) -training >> step=4284100, episode=715 reward=0.7761025 (494.98 it/sec) -training >> step=4284200, episode=715 reward=0.7815745 (450.88 it/sec) -training >> step=4284300, episode=715 reward=0.7794701 (459.31 it/sec) -training >> step=4284400, episode=715 reward=0.7718367 (477.86 it/sec) -training >> step=4284500, episode=715 reward=0.7904133 (436.00 it/sec) -training >> step=4284600, episode=715 reward=0.769575 (411.01 it/sec) -training >> step=4284700, episode=715 reward=0.7805013 (522.57 it/sec) -training >> step=4284800, episode=715 reward=0.7515069 (456.96 it/sec) -training >> step=4284900, episode=715 reward=0.7896622 (457.70 it/sec) -training >> step=4285000, episode=715 reward=0.7785125 (478.61 it/sec) -training >> step=4285100, episode=715 reward=0.7509577 (499.89 it/sec) -training >> step=4285200, episode=715 reward=0.7733929 (451.99 it/sec) -training >> step=4285300, episode=715 reward=0.7690992 (454.99 it/sec) -training >> step=4285400, episode=715 reward=0.7869219 (442.97 it/sec) -training >> step=4285500, episode=715 reward=0.7804078 (463.41 it/sec) -training >> step=4285600, episode=715 reward=0.7677468 (454.94 it/sec) -training >> step=4285700, episode=715 reward=0.7620254 (473.34 it/sec) -training >> step=4285800, episode=715 reward=0.7600247 (484.26 it/sec) -training >> step=4285900, episode=715 reward=0.7892588 (462.73 it/sec) -training >> step=4286000, episode=715 reward=0.7755913 (420.53 it/sec) -training >> step=4286100, episode=715 reward=0.7773582 (422.34 it/sec) -training >> step=4286200, episode=715 reward=0.7680084 (471.94 it/sec) -training >> step=4286300, episode=715 reward=0.7726991 (459.02 it/sec) -training >> step=4286400, episode=715 reward=0.7710141 (469.32 it/sec) -training >> step=4286500, episode=715 reward=0.7719413 (489.29 it/sec) -training >> step=4286600, episode=715 reward=0.7626652 (482.18 it/sec) -training >> step=4286700, episode=715 reward=0.7501712 (481.73 it/sec) -training >> step=4286800, episode=715 reward=0.78501 (511.54 it/sec) -training >> step=4286900, episode=715 reward=0.7937541 (518.21 it/sec) -training >> step=4287000, episode=715 reward=0.7692428 (496.74 it/sec) -training >> step=4287100, episode=715 reward=0.7732792 (510.45 it/sec) -training >> step=4287200, episode=715 reward=0.7653709 (511.95 it/sec) -training >> step=4287300, episode=715 reward=0.7585676 (495.02 it/sec) -training >> step=4287400, episode=715 reward=0.775432 (468.12 it/sec) -training >> step=4287500, episode=715 reward=0.7655785 (504.10 it/sec) -training >> step=4287600, episode=715 reward=0.7787013 (475.80 it/sec) -training >> step=4287700, episode=715 reward=0.7703204 (501.12 it/sec) -training >> step=4287800, episode=715 reward=0.7866677 (455.44 it/sec) -training >> step=4287900, episode=715 reward=0.7919556 (502.10 it/sec) -training >> step=4288000, episode=715 reward=0.7652278 (484.94 it/sec) -training >> step=4288100, episode=715 reward=0.7765493 (481.69 it/sec) -training >> step=4288200, episode=715 reward=0.7721687 (485.24 it/sec) -training >> step=4288300, episode=715 reward=0.7846123 (491.15 it/sec) -training >> step=4288400, episode=715 reward=0.7691748 (447.55 it/sec) -training >> step=4288500, episode=715 reward=0.7607234 (486.10 it/sec) -training >> step=4288600, episode=715 reward=0.7527336 (460.24 it/sec) -training >> step=4288700, episode=715 reward=0.7821262 (510.64 it/sec) -training >> step=4288800, episode=715 reward=0.7535536 (485.09 it/sec) -training >> step=4288900, episode=715 reward=0.7628678 (465.29 it/sec) -training >> step=4289000, episode=715 reward=0.7513949 (494.15 it/sec) -training >> step=4289100, episode=715 reward=0.7517443 (495.68 it/sec) -training >> step=4289200, episode=715 reward=0.7500586 (516.82 it/sec) -training >> step=4289300, episode=716 reward=0.7711329 (50.69 it/sec) -training >> step=4289400, episode=716 reward=0.7633266 (473.06 it/sec) -training >> step=4289500, episode=716 reward=0.7571192 (493.86 it/sec) -training >> step=4289600, episode=716 reward=0.7688707 (487.64 it/sec) -training >> step=4289700, episode=716 reward=0.7611251 (460.97 it/sec) -training >> step=4289800, episode=716 reward=0.7758299 (486.51 it/sec) -training >> step=4289900, episode=716 reward=0.7566807 (464.86 it/sec) -training >> step=4290000, episode=716 reward=0.7794639 (495.20 it/sec) -training >> step=4290100, episode=716 reward=0.7702662 (475.93 it/sec) -training >> step=4290200, episode=716 reward=0.7714089 (510.90 it/sec) -training >> step=4290300, episode=716 reward=0.7981765 (490.23 it/sec) -training >> step=4290400, episode=716 reward=0.7490627 (484.85 it/sec) -training >> step=4290500, episode=716 reward=0.7710186 (493.76 it/sec) -training >> step=4290600, episode=716 reward=0.789796 (477.72 it/sec) -training >> step=4290700, episode=716 reward=0.7772459 (495.18 it/sec) -training >> step=4290800, episode=716 reward=0.782594 (488.80 it/sec) -training >> step=4290900, episode=716 reward=0.7830101 (495.21 it/sec) -training >> step=4291000, episode=716 reward=0.7949175 (447.69 it/sec) -training >> step=4291100, episode=716 reward=0.7808114 (506.94 it/sec) -training >> step=4291200, episode=716 reward=0.7902554 (505.31 it/sec) -training >> step=4291300, episode=716 reward=0.7815997 (553.73 it/sec) -training >> step=4291400, episode=716 reward=0.7683877 (492.21 it/sec) -training >> step=4291500, episode=716 reward=0.7703098 (486.23 it/sec) -training >> step=4291600, episode=716 reward=0.7611713 (527.70 it/sec) -training >> step=4291700, episode=716 reward=0.7747074 (529.96 it/sec) -training >> step=4291800, episode=716 reward=0.7726082 (494.32 it/sec) -training >> step=4291900, episode=716 reward=0.7864608 (490.58 it/sec) -training >> step=4292000, episode=716 reward=0.786463 (540.85 it/sec) -training >> step=4292100, episode=716 reward=0.7742102 (479.44 it/sec) -training >> step=4292200, episode=716 reward=0.765524 (531.59 it/sec) -training >> step=4292300, episode=716 reward=0.7788391 (473.74 it/sec) -training >> step=4292400, episode=716 reward=0.7567362 (429.18 it/sec) -training >> step=4292500, episode=716 reward=0.8108153 (443.58 it/sec) -training >> step=4292600, episode=716 reward=0.7812791 (481.84 it/sec) -training >> step=4292700, episode=716 reward=0.7883368 (522.90 it/sec) -training >> step=4292800, episode=716 reward=0.7624734 (483.76 it/sec) -training >> step=4292900, episode=716 reward=0.7677432 (473.28 it/sec) -training >> step=4293000, episode=716 reward=0.7579564 (479.80 it/sec) -training >> step=4293100, episode=716 reward=0.7818637 (492.24 it/sec) -training >> step=4293200, episode=716 reward=0.7766638 (482.52 it/sec) -training >> step=4293300, episode=716 reward=0.7661988 (496.43 it/sec) -training >> step=4293400, episode=716 reward=0.774775 (505.57 it/sec) -training >> step=4293500, episode=716 reward=0.7717861 (504.52 it/sec) -training >> step=4293600, episode=716 reward=0.7787869 (376.40 it/sec) -training >> step=4293700, episode=716 reward=0.7733059 (354.10 it/sec) -training >> step=4293800, episode=716 reward=0.7940921 (335.13 it/sec) -training >> step=4293900, episode=716 reward=0.7747238 (326.54 it/sec) -training >> step=4294000, episode=716 reward=0.7731291 (344.64 it/sec) -training >> step=4294100, episode=716 reward=0.7712901 (389.80 it/sec) -training >> step=4294200, episode=716 reward=0.7673705 (388.01 it/sec) -training >> step=4294300, episode=716 reward=0.7894287 (444.00 it/sec) -training >> step=4294400, episode=716 reward=0.7422439 (462.96 it/sec) -training >> step=4294500, episode=716 reward=0.7525903 (495.46 it/sec) -training >> step=4294600, episode=716 reward=0.7742073 (500.75 it/sec) -training >> step=4294700, episode=716 reward=0.7980556 (497.43 it/sec) -training >> step=4294800, episode=716 reward=0.7464079 (446.00 it/sec) -training >> step=4294900, episode=716 reward=0.7753035 (522.57 it/sec) -training >> step=4295000, episode=716 reward=0.7580723 (474.95 it/sec) -training >> step=4295100, episode=716 reward=0.7528611 (430.36 it/sec) -training >> step=4295200, episode=716 reward=0.7768309 (522.80 it/sec) -training >> step=4295300, episode=717 reward=0.7779335 (59.90 it/sec) -training >> step=4295400, episode=717 reward=0.747218 (443.11 it/sec) -training >> step=4295500, episode=717 reward=0.7846908 (506.96 it/sec) -training >> step=4295600, episode=717 reward=0.7536989 (501.53 it/sec) -training >> step=4295700, episode=717 reward=0.79129 (423.05 it/sec) -training >> step=4295800, episode=717 reward=0.7989194 (457.96 it/sec) -training >> step=4295900, episode=717 reward=0.7722421 (411.67 it/sec) -training >> step=4296000, episode=717 reward=0.7960885 (439.59 it/sec) -training >> step=4296100, episode=717 reward=0.7774524 (522.60 it/sec) -training >> step=4296200, episode=717 reward=0.7891573 (410.75 it/sec) -training >> step=4296300, episode=717 reward=0.7939367 (367.42 it/sec) -training >> step=4296400, episode=717 reward=0.7790528 (485.15 it/sec) -training >> step=4296500, episode=717 reward=0.7711301 (440.98 it/sec) -training >> step=4296600, episode=717 reward=0.7582417 (436.01 it/sec) -training >> step=4296700, episode=717 reward=0.7708273 (438.10 it/sec) -training >> step=4296800, episode=717 reward=0.7881054 (497.56 it/sec) -training >> step=4296900, episode=717 reward=0.7722879 (419.84 it/sec) -training >> step=4297000, episode=717 reward=0.7803094 (502.14 it/sec) -training >> step=4297100, episode=717 reward=0.7703984 (468.60 it/sec) -training >> step=4297200, episode=717 reward=0.7756248 (498.63 it/sec) -training >> step=4297300, episode=717 reward=0.7766169 (400.54 it/sec) -training >> step=4297400, episode=717 reward=0.7919268 (425.28 it/sec) -training >> step=4297500, episode=717 reward=0.7766282 (416.16 it/sec) -training >> step=4297600, episode=717 reward=0.7788141 (384.61 it/sec) -training >> step=4297700, episode=717 reward=0.7650574 (363.39 it/sec) -training >> step=4297800, episode=717 reward=0.7767739 (390.49 it/sec) -training >> step=4297900, episode=717 reward=0.787002 (474.39 it/sec) -training >> step=4298000, episode=717 reward=0.7844043 (484.65 it/sec) -training >> step=4298100, episode=717 reward=0.7742269 (459.55 it/sec) -training >> step=4298200, episode=717 reward=0.7879065 (467.38 it/sec) -training >> step=4298300, episode=717 reward=0.783469 (484.57 it/sec) -training >> step=4298400, episode=717 reward=0.7835614 (472.12 it/sec) -training >> step=4298500, episode=717 reward=0.7767345 (453.07 it/sec) -training >> step=4298600, episode=717 reward=0.790764 (488.30 it/sec) -training >> step=4298700, episode=717 reward=0.7577097 (438.39 it/sec) -training >> step=4298800, episode=717 reward=0.788246 (463.08 it/sec) -training >> step=4298900, episode=717 reward=0.7697374 (486.30 it/sec) -training >> step=4299000, episode=717 reward=0.7741998 (512.78 it/sec) -training >> step=4299100, episode=717 reward=0.7765116 (482.26 it/sec) -training >> step=4299200, episode=717 reward=0.7722443 (483.82 it/sec) -training >> step=4299300, episode=717 reward=0.791647 (494.33 it/sec) -training >> step=4299400, episode=717 reward=0.7853583 (491.69 it/sec) -training >> step=4299500, episode=717 reward=0.7787196 (494.34 it/sec) -training >> step=4299600, episode=717 reward=0.7921088 (457.35 it/sec) -training >> step=4299700, episode=717 reward=0.7605061 (491.85 it/sec) -training >> step=4299800, episode=717 reward=0.7629729 (475.62 it/sec) -training >> step=4299900, episode=717 reward=0.7780156 (476.95 it/sec) -training >> step=4300000, episode=717 reward=0.7863333 (486.65 it/sec) -training >> step=4300100, episode=717 reward=0.7610838 (491.64 it/sec) -training >> step=4300200, episode=717 reward=0.7726541 (514.41 it/sec) -training >> step=4300300, episode=717 reward=0.7761342 (481.57 it/sec) -training >> step=4300400, episode=717 reward=0.7986025 (509.32 it/sec) -training >> step=4300500, episode=717 reward=0.7766492 (485.70 it/sec) -training >> step=4300600, episode=717 reward=0.7688575 (481.23 it/sec) -training >> step=4300700, episode=717 reward=0.7695225 (483.87 it/sec) -training >> step=4300800, episode=717 reward=0.7691201 (539.27 it/sec) -training >> step=4300900, episode=717 reward=0.7850855 (508.52 it/sec) -training >> step=4301000, episode=717 reward=0.7608594 (510.47 it/sec) -training >> step=4301100, episode=717 reward=0.7488496 (463.46 it/sec) -training >> step=4301200, episode=717 reward=0.7886106 (527.80 it/sec) -training >> step=4301300, episode=718 reward=0.7847494 (58.85 it/sec) -training >> step=4301400, episode=718 reward=0.7478043 (473.49 it/sec) -training >> step=4301500, episode=718 reward=0.7701129 (514.12 it/sec) -training >> step=4301600, episode=718 reward=0.7714955 (503.72 it/sec) -training >> step=4301700, episode=718 reward=0.7717664 (526.93 it/sec) -training >> step=4301800, episode=718 reward=0.7599152 (500.04 it/sec) -training >> step=4301900, episode=718 reward=0.7886825 (493.11 it/sec) -training >> step=4302000, episode=718 reward=0.7617512 (473.19 it/sec) -training >> step=4302100, episode=718 reward=0.7698808 (476.94 it/sec) -training >> step=4302200, episode=718 reward=0.7700504 (506.67 it/sec) -training >> step=4302300, episode=718 reward=0.7690121 (497.39 it/sec) -training >> step=4302400, episode=718 reward=0.7701979 (468.43 it/sec) -training >> step=4302500, episode=718 reward=0.756329 (498.04 it/sec) -training >> step=4302600, episode=718 reward=0.7775122 (504.07 it/sec) -training >> step=4302700, episode=718 reward=0.783334 (502.65 it/sec) -training >> step=4302800, episode=718 reward=0.7804754 (536.10 it/sec) -training >> step=4302900, episode=718 reward=0.7791069 (481.61 it/sec) -training >> step=4303000, episode=718 reward=0.7577367 (466.65 it/sec) -training >> step=4303100, episode=718 reward=0.7673915 (499.84 it/sec) -training >> step=4303200, episode=718 reward=0.7831474 (483.21 it/sec) -training >> step=4303300, episode=718 reward=0.7856427 (467.89 it/sec) -training >> step=4303400, episode=718 reward=0.7735207 (459.39 it/sec) -training >> step=4303500, episode=718 reward=0.7707787 (504.36 it/sec) -training >> step=4303600, episode=718 reward=0.7717878 (510.45 it/sec) -training >> step=4303700, episode=718 reward=0.761191 (523.71 it/sec) -training >> step=4303800, episode=718 reward=0.788294 (538.16 it/sec) -training >> step=4303900, episode=718 reward=0.7945193 (443.72 it/sec) -training >> step=4304000, episode=718 reward=0.785477 (523.81 it/sec) -training >> step=4304100, episode=718 reward=0.7682225 (511.35 it/sec) -training >> step=4304200, episode=718 reward=0.7750712 (546.82 it/sec) -training >> step=4304300, episode=718 reward=0.7856954 (519.26 it/sec) -training >> step=4304400, episode=718 reward=0.7686088 (492.20 it/sec) -training >> step=4304500, episode=718 reward=0.787254 (505.25 it/sec) -training >> step=4304600, episode=718 reward=0.7652505 (522.65 it/sec) -training >> step=4304700, episode=718 reward=0.7746182 (502.66 it/sec) -training >> step=4304800, episode=718 reward=0.770784 (534.71 it/sec) -training >> step=4304900, episode=718 reward=0.7810634 (526.96 it/sec) -training >> step=4305000, episode=718 reward=0.7661535 (512.80 it/sec) -training >> step=4305100, episode=718 reward=0.7932368 (498.11 it/sec) -training >> step=4305200, episode=718 reward=0.7722687 (522.64 it/sec) -training >> step=4305300, episode=718 reward=0.7882854 (523.99 it/sec) -training >> step=4305400, episode=718 reward=0.7801035 (509.72 it/sec) -training >> step=4305500, episode=718 reward=0.7895503 (499.55 it/sec) -training >> step=4305600, episode=718 reward=0.7695104 (500.60 it/sec) -training >> step=4305700, episode=718 reward=0.7886912 (483.15 it/sec) -training >> step=4305800, episode=718 reward=0.7688647 (541.96 it/sec) -training >> step=4305900, episode=718 reward=0.7807493 (493.52 it/sec) -training >> step=4306000, episode=718 reward=0.785217 (471.13 it/sec) -training >> step=4306100, episode=718 reward=0.7914792 (506.72 it/sec) -training >> step=4306200, episode=718 reward=0.7888119 (419.62 it/sec) -training >> step=4306300, episode=718 reward=0.7747768 (400.37 it/sec) -training >> step=4306400, episode=718 reward=0.7670723 (410.99 it/sec) -training >> step=4306500, episode=718 reward=0.7692453 (478.62 it/sec) -training >> step=4306600, episode=718 reward=0.7799265 (535.45 it/sec) -training >> step=4306700, episode=718 reward=0.7849829 (539.78 it/sec) -training >> step=4306800, episode=718 reward=0.7846655 (505.61 it/sec) -training >> step=4306900, episode=718 reward=0.7818239 (451.80 it/sec) -training >> step=4307000, episode=718 reward=0.7697363 (503.01 it/sec) -training >> step=4307100, episode=718 reward=0.8002748 (529.10 it/sec) -training >> step=4307200, episode=718 reward=0.7742504 (493.78 it/sec) -training >> step=4307300, episode=719 reward=0.7591878 (89.80 it/sec) -training >> step=4307400, episode=719 reward=0.7743623 (534.18 it/sec) -training >> step=4307500, episode=719 reward=0.7686318 (455.98 it/sec) -training >> step=4307600, episode=719 reward=0.7400836 (505.01 it/sec) -training >> step=4307700, episode=719 reward=0.7673056 (557.58 it/sec) -training >> step=4307800, episode=719 reward=0.7887949 (495.18 it/sec) -training >> step=4307900, episode=719 reward=0.7844732 (528.44 it/sec) -training >> step=4308000, episode=719 reward=0.7683874 (531.80 it/sec) -training >> step=4308100, episode=719 reward=0.7709041 (500.44 it/sec) -training >> step=4308200, episode=719 reward=0.7624047 (491.08 it/sec) -training >> step=4308300, episode=719 reward=0.7722172 (496.86 it/sec) -training >> step=4308400, episode=719 reward=0.7836592 (498.11 it/sec) -training >> step=4308500, episode=719 reward=0.7662175 (510.00 it/sec) -training >> step=4308600, episode=719 reward=0.7991398 (505.23 it/sec) -training >> step=4308700, episode=719 reward=0.7739671 (524.66 it/sec) -training >> step=4308800, episode=719 reward=0.7867497 (526.41 it/sec) -training >> step=4308900, episode=719 reward=0.7928854 (496.86 it/sec) -training >> step=4309000, episode=719 reward=0.7763286 (513.85 it/sec) -training >> step=4309100, episode=719 reward=0.7769372 (526.89 it/sec) -training >> step=4309200, episode=719 reward=0.7725508 (526.02 it/sec) -training >> step=4309300, episode=719 reward=0.7558057 (536.57 it/sec) -training >> step=4309400, episode=719 reward=0.7718956 (511.83 it/sec) -training >> step=4309500, episode=719 reward=0.7593714 (493.43 it/sec) -training >> step=4309600, episode=719 reward=0.7699971 (498.59 it/sec) -training >> step=4309700, episode=719 reward=0.7769107 (454.43 it/sec) -training >> step=4309800, episode=719 reward=0.7983328 (520.18 it/sec) -training >> step=4309900, episode=719 reward=0.7696625 (477.95 it/sec) -training >> step=4310000, episode=719 reward=0.7813429 (523.75 it/sec) -training >> step=4310100, episode=719 reward=0.7889685 (511.49 it/sec) -training >> step=4310200, episode=719 reward=0.7625695 (445.59 it/sec) -training >> step=4310300, episode=719 reward=0.7712638 (409.41 it/sec) -training >> step=4310400, episode=719 reward=0.79024 (464.56 it/sec) -training >> step=4310500, episode=719 reward=0.7875412 (478.18 it/sec) -training >> step=4310600, episode=719 reward=0.7717902 (491.93 it/sec) -training >> step=4310700, episode=719 reward=0.7782601 (465.36 it/sec) -training >> step=4310800, episode=719 reward=0.7868497 (468.22 it/sec) -training >> step=4310900, episode=719 reward=0.7851633 (455.92 it/sec) -training >> step=4311000, episode=719 reward=0.7902893 (445.47 it/sec) -training >> step=4311100, episode=719 reward=0.7714067 (379.00 it/sec) -training >> step=4311200, episode=719 reward=0.7872936 (371.53 it/sec) -training >> step=4311300, episode=719 reward=0.7785429 (391.69 it/sec) -training >> step=4311400, episode=719 reward=0.7812859 (346.45 it/sec) -training >> step=4311500, episode=719 reward=0.7855468 (353.16 it/sec) -training >> step=4311600, episode=719 reward=0.8065173 (431.90 it/sec) -training >> step=4311700, episode=719 reward=0.7567147 (493.46 it/sec) -training >> step=4311800, episode=719 reward=0.7787024 (444.68 it/sec) -training >> step=4311900, episode=719 reward=0.7821618 (412.07 it/sec) -training >> step=4312000, episode=719 reward=0.7722768 (427.45 it/sec) -training >> step=4312100, episode=719 reward=0.7718059 (420.08 it/sec) -training >> step=4312200, episode=719 reward=0.7893477 (430.47 it/sec) -training >> step=4312300, episode=719 reward=0.7440861 (449.68 it/sec) -training >> step=4312400, episode=719 reward=0.7539468 (501.96 it/sec) -training >> step=4312500, episode=719 reward=0.7760118 (524.71 it/sec) -training >> step=4312600, episode=719 reward=0.7623832 (434.03 it/sec) -training >> step=4312700, episode=719 reward=0.7780575 (464.67 it/sec) -training >> step=4312800, episode=719 reward=0.7464448 (442.87 it/sec) -training >> step=4312900, episode=719 reward=0.7789497 (497.32 it/sec) -training >> step=4313000, episode=719 reward=0.7892075 (486.84 it/sec) -training >> step=4313100, episode=719 reward=0.7624865 (478.79 it/sec) -training >> step=4313200, episode=719 reward=0.7726682 (463.18 it/sec) -training >> step=4313300, episode=720 reward=0.7676231 (87.01 it/sec) -training >> step=4313400, episode=720 reward=0.7682117 (486.22 it/sec) -training >> step=4313500, episode=720 reward=0.7651073 (512.11 it/sec) -training >> step=4313600, episode=720 reward=0.7675756 (500.57 it/sec) -training >> step=4313700, episode=720 reward=0.7781898 (513.31 it/sec) -training >> step=4313800, episode=720 reward=0.7752966 (460.78 it/sec) -training >> step=4313900, episode=720 reward=0.7783542 (480.44 it/sec) -training >> step=4314000, episode=720 reward=0.7667579 (500.12 it/sec) -training >> step=4314100, episode=720 reward=0.787255 (551.72 it/sec) -training >> step=4314200, episode=720 reward=0.7761694 (481.78 it/sec) -training >> step=4314300, episode=720 reward=0.7793545 (478.48 it/sec) -training >> step=4314400, episode=720 reward=0.7714759 (499.44 it/sec) -training >> step=4314500, episode=720 reward=0.7799958 (505.82 it/sec) -training >> step=4314600, episode=720 reward=0.7928521 (480.69 it/sec) -training >> step=4314700, episode=720 reward=0.7746577 (496.00 it/sec) -training >> step=4314800, episode=720 reward=0.7799594 (403.45 it/sec) -training >> step=4314900, episode=720 reward=0.7904393 (402.99 it/sec) -training >> step=4315000, episode=720 reward=0.7540934 (452.22 it/sec) -training >> step=4315100, episode=720 reward=0.7885737 (497.76 it/sec) -training >> step=4315200, episode=720 reward=0.777509 (472.94 it/sec) -training >> step=4315300, episode=720 reward=0.7768057 (435.34 it/sec) -training >> step=4315400, episode=720 reward=0.7740698 (490.38 it/sec) -training >> step=4315500, episode=720 reward=0.778479 (468.89 it/sec) -training >> step=4315600, episode=720 reward=0.7805076 (478.64 it/sec) -training >> step=4315700, episode=720 reward=0.7900285 (528.59 it/sec) -training >> step=4315800, episode=720 reward=0.778374 (504.33 it/sec) -training >> step=4315900, episode=720 reward=0.7676883 (508.43 it/sec) -training >> step=4316000, episode=720 reward=0.800306 (503.85 it/sec) -training >> step=4316100, episode=720 reward=0.7967168 (478.04 it/sec) -training >> step=4316200, episode=720 reward=0.7805352 (494.30 it/sec) -training >> step=4316300, episode=720 reward=0.7883382 (514.95 it/sec) -training >> step=4316400, episode=720 reward=0.7608598 (500.37 it/sec) -training >> step=4316500, episode=720 reward=0.790005 (515.02 it/sec) -training >> step=4316600, episode=720 reward=0.7761253 (488.05 it/sec) -training >> step=4316700, episode=720 reward=0.7933738 (498.47 it/sec) -training >> step=4316800, episode=720 reward=0.795975 (482.36 it/sec) -training >> step=4316900, episode=720 reward=0.7684507 (535.64 it/sec) -training >> step=4317000, episode=720 reward=0.7761298 (479.17 it/sec) -training >> step=4317100, episode=720 reward=0.7750314 (486.78 it/sec) -training >> step=4317200, episode=720 reward=0.7960029 (452.22 it/sec) -training >> step=4317300, episode=720 reward=0.7780405 (474.37 it/sec) -training >> step=4317400, episode=720 reward=0.7625443 (513.55 it/sec) -training >> step=4317500, episode=720 reward=0.787164 (513.79 it/sec) -training >> step=4317600, episode=720 reward=0.752477 (561.32 it/sec) -training >> step=4317700, episode=720 reward=0.7781202 (465.77 it/sec) -training >> step=4317800, episode=720 reward=0.7604519 (483.27 it/sec) -training >> step=4317900, episode=720 reward=0.7951889 (465.63 it/sec) -training >> step=4318000, episode=720 reward=0.7779564 (462.36 it/sec) -training >> step=4318100, episode=720 reward=0.7628367 (474.19 it/sec) -training >> step=4318200, episode=720 reward=0.7713336 (478.73 it/sec) -training >> step=4318300, episode=720 reward=0.7729954 (490.10 it/sec) -training >> step=4318400, episode=720 reward=0.7625837 (489.60 it/sec) -training >> step=4318500, episode=720 reward=0.767732 (521.00 it/sec) -training >> step=4318600, episode=720 reward=0.7768216 (508.36 it/sec) -training >> step=4318700, episode=720 reward=0.756502 (488.25 it/sec) -training >> step=4318800, episode=720 reward=0.7868398 (489.79 it/sec) -training >> step=4318900, episode=720 reward=0.7751103 (544.70 it/sec) -training >> step=4319000, episode=720 reward=0.7598979 (521.57 it/sec) -training >> step=4319100, episode=720 reward=0.7825996 (489.17 it/sec) -training >> step=4319200, episode=720 reward=0.7613052 (507.67 it/sec) -training >> step=4319300, episode=721 reward=0.7746358 (70.32 it/sec) -training >> step=4319400, episode=721 reward=0.7546147 (479.41 it/sec) -training >> step=4319500, episode=721 reward=0.7568794 (460.83 it/sec) -training >> step=4319600, episode=721 reward=0.7748466 (523.72 it/sec) -training >> step=4319700, episode=721 reward=0.7748494 (471.34 it/sec) -training >> step=4319800, episode=721 reward=0.7757742 (470.13 it/sec) -training >> step=4319900, episode=721 reward=0.7675758 (525.71 it/sec) -training >> step=4320000, episode=721 reward=0.8011777 (482.09 it/sec) -training >> step=4320100, episode=721 reward=0.7684681 (493.58 it/sec) -training >> step=4320200, episode=721 reward=0.7767582 (475.75 it/sec) -training >> step=4320300, episode=721 reward=0.7682214 (409.77 it/sec) -training >> step=4320400, episode=721 reward=0.7846747 (509.61 it/sec) -training >> step=4320500, episode=721 reward=0.7769915 (480.36 it/sec) -training >> step=4320600, episode=721 reward=0.783225 (444.44 it/sec) -training >> step=4320700, episode=721 reward=0.7620772 (407.78 it/sec) -training >> step=4320800, episode=721 reward=0.7974784 (425.19 it/sec) -training >> step=4320900, episode=721 reward=0.7579302 (516.76 it/sec) -training >> step=4321000, episode=721 reward=0.7748743 (480.94 it/sec) -training >> step=4321100, episode=721 reward=0.7760606 (505.25 it/sec) -training >> step=4321200, episode=721 reward=0.7938538 (557.92 it/sec) -training >> step=4321300, episode=721 reward=0.7840535 (495.93 it/sec) -training >> step=4321400, episode=721 reward=0.7719271 (525.55 it/sec) -training >> step=4321500, episode=721 reward=0.7776358 (449.33 it/sec) -training >> step=4321600, episode=721 reward=0.7777318 (396.64 it/sec) -training >> step=4321700, episode=721 reward=0.7720851 (378.59 it/sec) -training >> step=4321800, episode=721 reward=0.7742465 (390.20 it/sec) -training >> step=4321900, episode=721 reward=0.7779264 (407.38 it/sec) -training >> step=4322000, episode=721 reward=0.7865126 (401.15 it/sec) -training >> step=4322100, episode=721 reward=0.7910623 (417.34 it/sec) -training >> step=4322200, episode=721 reward=0.7762184 (438.06 it/sec) -training >> step=4322300, episode=721 reward=0.7652984 (463.72 it/sec) -training >> step=4322400, episode=721 reward=0.7950594 (413.84 it/sec) -training >> step=4322500, episode=721 reward=0.7887521 (453.95 it/sec) -training >> step=4322600, episode=721 reward=0.7799156 (484.79 it/sec) -training >> step=4322700, episode=721 reward=0.7600437 (482.87 it/sec) -training >> step=4322800, episode=721 reward=0.7821287 (436.63 it/sec) -training >> step=4322900, episode=721 reward=0.7904876 (436.09 it/sec) -training >> step=4323000, episode=721 reward=0.761137 (448.43 it/sec) -training >> step=4323100, episode=721 reward=0.7904119 (519.57 it/sec) -training >> step=4323200, episode=721 reward=0.7920575 (446.42 it/sec) -training >> step=4323300, episode=721 reward=0.7847108 (422.55 it/sec) -training >> step=4323400, episode=721 reward=0.8055199 (477.42 it/sec) -training >> step=4323500, episode=721 reward=0.7704015 (434.73 it/sec) -training >> step=4323600, episode=721 reward=0.7776329 (469.59 it/sec) -training >> step=4323700, episode=721 reward=0.7690392 (469.56 it/sec) -training >> step=4323800, episode=721 reward=0.7780319 (439.15 it/sec) -training >> step=4323900, episode=721 reward=0.7841244 (458.77 it/sec) -training >> step=4324000, episode=721 reward=0.7607249 (407.66 it/sec) -training >> step=4324100, episode=721 reward=0.7787681 (477.96 it/sec) -training >> step=4324200, episode=721 reward=0.7588593 (473.66 it/sec) -training >> step=4324300, episode=721 reward=0.787061 (416.63 it/sec) -training >> step=4324400, episode=721 reward=0.7555784 (351.21 it/sec) -training >> step=4324500, episode=721 reward=0.7750754 (451.02 it/sec) -training >> step=4324600, episode=721 reward=0.7577398 (472.85 it/sec) -training >> step=4324700, episode=721 reward=0.7921123 (465.85 it/sec) -training >> step=4324800, episode=721 reward=0.765525 (453.92 it/sec) -training >> step=4324900, episode=721 reward=0.773219 (455.65 it/sec) -training >> step=4325000, episode=721 reward=0.7716826 (458.33 it/sec) -training >> step=4325100, episode=721 reward=0.7753811 (423.25 it/sec) -training >> step=4325200, episode=721 reward=0.7681355 (463.47 it/sec) -training >> step=4325300, episode=722 reward=0.7760885 (87.24 it/sec) -training >> step=4325400, episode=722 reward=0.7863129 (471.66 it/sec) -training >> step=4325500, episode=722 reward=0.7726317 (481.31 it/sec) -training >> step=4325600, episode=722 reward=0.7491046 (473.38 it/sec) -training >> step=4325700, episode=722 reward=0.7673125 (480.01 it/sec) -training >> step=4325800, episode=722 reward=0.7826895 (487.93 it/sec) -training >> step=4325900, episode=722 reward=0.7814427 (442.17 it/sec) -training >> step=4326000, episode=722 reward=0.7691141 (485.65 it/sec) -training >> step=4326100, episode=722 reward=0.7816708 (468.29 it/sec) -training >> step=4326200, episode=722 reward=0.7879921 (463.25 it/sec) -training >> step=4326300, episode=722 reward=0.7668056 (472.81 it/sec) -training >> step=4326400, episode=722 reward=0.7925339 (466.50 it/sec) -training >> step=4326500, episode=722 reward=0.7525505 (474.32 it/sec) -training >> step=4326600, episode=722 reward=0.7684585 (458.17 it/sec) -training >> step=4326700, episode=722 reward=0.7864499 (485.68 it/sec) -training >> step=4326800, episode=722 reward=0.7732383 (443.31 it/sec) -training >> step=4326900, episode=722 reward=0.7744716 (449.04 it/sec) -training >> step=4327000, episode=722 reward=0.770809 (479.43 it/sec) -training >> step=4327100, episode=722 reward=0.7938888 (516.17 it/sec) -training >> step=4327200, episode=722 reward=0.7835834 (465.82 it/sec) -training >> step=4327300, episode=722 reward=0.7655335 (488.02 it/sec) -training >> step=4327400, episode=722 reward=0.7681743 (454.34 it/sec) -training >> step=4327500, episode=722 reward=0.7829543 (473.75 it/sec) -training >> step=4327600, episode=722 reward=0.7746263 (439.53 it/sec) -training >> step=4327700, episode=722 reward=0.768917 (480.05 it/sec) -training >> step=4327800, episode=722 reward=0.7838506 (471.99 it/sec) -training >> step=4327900, episode=722 reward=0.7837012 (481.59 it/sec) -training >> step=4328000, episode=722 reward=0.7859613 (416.68 it/sec) -training >> step=4328100, episode=722 reward=0.7894214 (487.03 it/sec) -training >> step=4328200, episode=722 reward=0.7728592 (479.60 it/sec) -training >> step=4328300, episode=722 reward=0.7830815 (488.98 it/sec) -training >> step=4328400, episode=722 reward=0.7559176 (453.87 it/sec) -training >> step=4328500, episode=722 reward=0.7804381 (505.15 it/sec) -training >> step=4328600, episode=722 reward=0.7595022 (424.27 it/sec) -training >> step=4328700, episode=722 reward=0.7983073 (442.03 it/sec) -training >> step=4328800, episode=722 reward=0.7735804 (476.90 it/sec) -training >> step=4328900, episode=722 reward=0.7682038 (497.77 it/sec) -training >> step=4329000, episode=722 reward=0.7869413 (474.06 it/sec) -training >> step=4329100, episode=722 reward=0.7849662 (463.81 it/sec) -training >> step=4329200, episode=722 reward=0.7715498 (474.12 it/sec) -training >> step=4329300, episode=722 reward=0.7559578 (462.04 it/sec) -training >> step=4329400, episode=722 reward=0.7966722 (501.30 it/sec) -training >> step=4329500, episode=722 reward=0.787267 (482.00 it/sec) -training >> step=4329600, episode=722 reward=0.77851 (485.15 it/sec) -training >> step=4329700, episode=722 reward=0.774436 (463.01 it/sec) -training >> step=4329800, episode=722 reward=0.7877163 (443.14 it/sec) -training >> step=4329900, episode=722 reward=0.7725833 (441.73 it/sec) -training >> step=4330000, episode=722 reward=0.7833216 (495.07 it/sec) -training >> step=4330100, episode=722 reward=0.7797515 (489.08 it/sec) -training >> step=4330200, episode=722 reward=0.7689244 (483.12 it/sec) -training >> step=4330300, episode=722 reward=0.7658356 (522.44 it/sec) -training >> step=4330400, episode=722 reward=0.7878827 (505.52 it/sec) -training >> step=4330500, episode=722 reward=0.7888516 (528.42 it/sec) -training >> step=4330600, episode=722 reward=0.7793691 (509.86 it/sec) -training >> step=4330700, episode=722 reward=0.7530565 (508.95 it/sec) -training >> step=4330800, episode=722 reward=0.7696642 (513.32 it/sec) -training >> step=4330900, episode=722 reward=0.7724007 (511.07 it/sec) -training >> step=4331000, episode=722 reward=0.7602591 (496.93 it/sec) -training >> step=4331100, episode=722 reward=0.7452527 (499.70 it/sec) -training >> step=4331200, episode=722 reward=0.7776782 (449.04 it/sec) -training >> step=4331300, episode=723 reward=0.7638578 (44.90 it/sec) -training >> step=4331400, episode=723 reward=0.7491866 (400.25 it/sec) -training >> step=4331500, episode=723 reward=0.7758006 (396.90 it/sec) -training >> step=4331600, episode=723 reward=0.7763041 (462.60 it/sec) -training >> step=4331700, episode=723 reward=0.7867221 (458.37 it/sec) -training >> step=4331800, episode=723 reward=0.7860495 (460.17 it/sec) -training >> step=4331900, episode=723 reward=0.7615796 (496.81 it/sec) -training >> step=4332000, episode=723 reward=0.7804577 (475.99 it/sec) -training >> step=4332100, episode=723 reward=0.7702832 (484.18 it/sec) -training >> step=4332200, episode=723 reward=0.7888649 (494.72 it/sec) -training >> step=4332300, episode=723 reward=0.7918447 (483.65 it/sec) -training >> step=4332400, episode=723 reward=0.7875174 (489.28 it/sec) -training >> step=4332500, episode=723 reward=0.783656 (470.52 it/sec) -training >> step=4332600, episode=723 reward=0.7708206 (478.87 it/sec) -training >> step=4332700, episode=723 reward=0.7794688 (423.55 it/sec) -training >> step=4332800, episode=723 reward=0.7978242 (399.36 it/sec) -training >> step=4332900, episode=723 reward=0.7793185 (380.28 it/sec) -training >> step=4333000, episode=723 reward=0.7814606 (358.61 it/sec) -training >> step=4333100, episode=723 reward=0.766735 (426.94 it/sec) -training >> step=4333200, episode=723 reward=0.7789142 (449.30 it/sec) -training >> step=4333300, episode=723 reward=0.7768083 (490.63 it/sec) -training >> step=4333400, episode=723 reward=0.7696342 (441.08 it/sec) -training >> step=4333500, episode=723 reward=0.7716672 (469.25 it/sec) -training >> step=4333600, episode=723 reward=0.7756807 (441.00 it/sec) -training >> step=4333700, episode=723 reward=0.7672198 (492.49 it/sec) -training >> step=4333800, episode=723 reward=0.7527294 (474.73 it/sec) -training >> step=4333900, episode=723 reward=0.7614367 (487.65 it/sec) -training >> step=4334000, episode=723 reward=0.7681831 (494.22 it/sec) -training >> step=4334100, episode=723 reward=0.7657816 (527.44 it/sec) -training >> step=4334200, episode=723 reward=0.7862301 (439.70 it/sec) -training >> step=4334300, episode=723 reward=0.7801853 (487.71 it/sec) -training >> step=4334400, episode=723 reward=0.7836182 (471.13 it/sec) -training >> step=4334500, episode=723 reward=0.7815707 (516.99 it/sec) -training >> step=4334600, episode=723 reward=0.7738291 (465.76 it/sec) -training >> step=4334700, episode=723 reward=0.7679455 (493.26 it/sec) -training >> step=4334800, episode=723 reward=0.7661788 (482.69 it/sec) -training >> step=4334900, episode=723 reward=0.7680988 (446.96 it/sec) -training >> step=4335000, episode=723 reward=0.7802983 (421.94 it/sec) -training >> step=4335100, episode=723 reward=0.7829333 (478.31 it/sec) -training >> step=4335200, episode=723 reward=0.783473 (520.31 it/sec) -training >> step=4335300, episode=723 reward=0.7727778 (462.49 it/sec) -training >> step=4335400, episode=723 reward=0.7888833 (419.34 it/sec) -training >> step=4335500, episode=723 reward=0.7758319 (456.33 it/sec) -training >> step=4335600, episode=723 reward=0.7561372 (448.44 it/sec) -training >> step=4335700, episode=723 reward=0.7635974 (477.02 it/sec) -training >> step=4335800, episode=723 reward=0.7847344 (455.22 it/sec) -training >> step=4335900, episode=723 reward=0.7865524 (530.64 it/sec) -training >> step=4336000, episode=723 reward=0.7649794 (470.21 it/sec) -training >> step=4336100, episode=723 reward=0.789805 (485.57 it/sec) -training >> step=4336200, episode=723 reward=0.7777553 (477.94 it/sec) -training >> step=4336300, episode=723 reward=0.7826169 (508.35 it/sec) -training >> step=4336400, episode=723 reward=0.7786109 (439.04 it/sec) -training >> step=4336500, episode=723 reward=0.809434 (446.13 it/sec) -training >> step=4336600, episode=723 reward=0.7993923 (518.71 it/sec) -training >> step=4336700, episode=723 reward=0.7621498 (477.31 it/sec) -training >> step=4336800, episode=723 reward=0.770429 (459.49 it/sec) -training >> step=4336900, episode=723 reward=0.7784922 (495.15 it/sec) -training >> step=4337000, episode=723 reward=0.7834117 (492.11 it/sec) -training >> step=4337100, episode=723 reward=0.778061 (469.70 it/sec) -training >> step=4337200, episode=723 reward=0.7434728 (468.67 it/sec) -training >> step=4337300, episode=724 reward=0.7921537 (115.14 it/sec) -training >> step=4337400, episode=724 reward=0.7692004 (490.38 it/sec) -training >> step=4337500, episode=724 reward=0.7217216 (367.40 it/sec) -training >> step=4337600, episode=724 reward=0.7656881 (409.57 it/sec) -training >> step=4337700, episode=724 reward=0.7986249 (441.74 it/sec) -training >> step=4337800, episode=724 reward=0.7399242 (465.43 it/sec) -training >> step=4337900, episode=724 reward=0.7830396 (422.16 it/sec) -training >> step=4338000, episode=724 reward=0.7624564 (444.97 it/sec) -training >> step=4338100, episode=724 reward=0.7866586 (479.85 it/sec) -training >> step=4338200, episode=724 reward=0.770503 (470.85 it/sec) -training >> step=4338300, episode=724 reward=0.7711756 (489.73 it/sec) -training >> step=4338400, episode=724 reward=0.7684523 (468.05 it/sec) -training >> step=4338500, episode=724 reward=0.7802091 (482.27 it/sec) -training >> step=4338600, episode=724 reward=0.7752612 (470.41 it/sec) -training >> step=4338700, episode=724 reward=0.7750868 (484.70 it/sec) -training >> step=4338800, episode=724 reward=0.772417 (478.30 it/sec) -training >> step=4338900, episode=724 reward=0.7828903 (475.74 it/sec) -training >> step=4339000, episode=724 reward=0.7687088 (484.63 it/sec) -training >> step=4339100, episode=724 reward=0.7741006 (484.37 it/sec) -training >> step=4339200, episode=724 reward=0.7851312 (461.99 it/sec) -training >> step=4339300, episode=724 reward=0.766527 (500.87 it/sec) -training >> step=4339400, episode=724 reward=0.7807749 (505.24 it/sec) -training >> step=4339500, episode=724 reward=0.7855209 (497.05 it/sec) -training >> step=4339600, episode=724 reward=0.7853732 (537.67 it/sec) -training >> step=4339700, episode=724 reward=0.771631 (489.20 it/sec) -training >> step=4339800, episode=724 reward=0.7884418 (467.13 it/sec) -training >> step=4339900, episode=724 reward=0.7497599 (446.13 it/sec) -training >> step=4340000, episode=724 reward=0.778367 (465.17 it/sec) -training >> step=4340100, episode=724 reward=0.7751982 (474.53 it/sec) -training >> step=4340200, episode=724 reward=0.771336 (471.81 it/sec) -training >> step=4340300, episode=724 reward=0.7830819 (467.79 it/sec) -training >> step=4340400, episode=724 reward=0.7725461 (459.49 it/sec) -training >> step=4340500, episode=724 reward=0.7595642 (495.55 it/sec) -training >> step=4340600, episode=724 reward=0.7740759 (477.90 it/sec) -training >> step=4340700, episode=724 reward=0.780508 (482.20 it/sec) -training >> step=4340800, episode=724 reward=0.7800846 (442.45 it/sec) -training >> step=4340900, episode=724 reward=0.7774814 (443.49 it/sec) -training >> step=4341000, episode=724 reward=0.7864882 (489.35 it/sec) -training >> step=4341100, episode=724 reward=0.7662689 (435.42 it/sec) -training >> step=4341200, episode=724 reward=0.7849424 (359.14 it/sec) -training >> step=4341300, episode=724 reward=0.7751898 (382.84 it/sec) -training >> step=4341400, episode=724 reward=0.7728695 (388.33 it/sec) -training >> step=4341500, episode=724 reward=0.7943025 (405.78 it/sec) -training >> step=4341600, episode=724 reward=0.7934262 (438.68 it/sec) -training >> step=4341700, episode=724 reward=0.764391 (468.21 it/sec) -training >> step=4341800, episode=724 reward=0.767902 (526.17 it/sec) -training >> step=4341900, episode=724 reward=0.7582553 (487.85 it/sec) -training >> step=4342000, episode=724 reward=0.7733583 (497.19 it/sec) -training >> step=4342100, episode=724 reward=0.7537559 (534.85 it/sec) -training >> step=4342200, episode=724 reward=0.7721816 (487.04 it/sec) -training >> step=4342300, episode=724 reward=0.775174 (483.64 it/sec) -training >> step=4342400, episode=724 reward=0.7590637 (493.40 it/sec) -training >> step=4342500, episode=724 reward=0.7790796 (521.11 it/sec) -training >> step=4342600, episode=724 reward=0.7584882 (465.90 it/sec) -training >> step=4342700, episode=724 reward=0.7869062 (454.20 it/sec) -training >> step=4342800, episode=724 reward=0.7549843 (500.55 it/sec) -training >> step=4342900, episode=724 reward=0.7707852 (484.09 it/sec) -training >> step=4343000, episode=724 reward=0.7718503 (462.13 it/sec) -training >> step=4343100, episode=724 reward=0.7688571 (494.91 it/sec) -training >> step=4343200, episode=724 reward=0.7408655 (509.40 it/sec) -training >> step=4343300, episode=725 reward=0.7647946 (65.48 it/sec) -training >> step=4343400, episode=725 reward=0.7778012 (478.79 it/sec) -training >> step=4343500, episode=725 reward=0.7665078 (466.85 it/sec) -training >> step=4343600, episode=725 reward=0.762808 (526.28 it/sec) -training >> step=4343700, episode=725 reward=0.7746254 (505.49 it/sec) -training >> step=4343800, episode=725 reward=0.7663363 (493.51 it/sec) -training >> step=4343900, episode=725 reward=0.7809256 (462.50 it/sec) -training >> step=4344000, episode=725 reward=0.7644793 (493.28 it/sec) -training >> step=4344100, episode=725 reward=0.7669333 (508.66 it/sec) -training >> step=4344200, episode=725 reward=0.7444772 (510.21 it/sec) -training >> step=4344300, episode=725 reward=0.795523 (492.56 it/sec) -training >> step=4344400, episode=725 reward=0.7839864 (533.43 it/sec) -training >> step=4344500, episode=725 reward=0.7665668 (468.24 it/sec) -training >> step=4344600, episode=725 reward=0.771604 (406.80 it/sec) -training >> step=4344700, episode=725 reward=0.7746208 (440.08 it/sec) -training >> step=4344800, episode=725 reward=0.7759298 (412.64 it/sec) -training >> step=4344900, episode=725 reward=0.7644494 (364.62 it/sec) -training >> step=4345000, episode=725 reward=0.7971635 (373.21 it/sec) -training >> step=4345100, episode=725 reward=0.764861 (485.03 it/sec) -training >> step=4345200, episode=725 reward=0.790289 (417.37 it/sec) -training >> step=4345300, episode=725 reward=0.7596373 (490.86 it/sec) -training >> step=4345400, episode=725 reward=0.7815546 (445.08 it/sec) -training >> step=4345500, episode=725 reward=0.7845553 (496.72 it/sec) -training >> step=4345600, episode=725 reward=0.7835622 (443.68 it/sec) -training >> step=4345700, episode=725 reward=0.7963361 (402.00 it/sec) -training >> step=4345800, episode=725 reward=0.7722185 (506.09 it/sec) -training >> step=4345900, episode=725 reward=0.7794888 (437.21 it/sec) -training >> step=4346000, episode=725 reward=0.7840137 (419.23 it/sec) -training >> step=4346100, episode=725 reward=0.7636617 (472.20 it/sec) -training >> step=4346200, episode=725 reward=0.7790232 (464.89 it/sec) -training >> step=4346300, episode=725 reward=0.7970732 (434.90 it/sec) -training >> step=4346400, episode=725 reward=0.7866957 (454.05 it/sec) -training >> step=4346500, episode=725 reward=0.7869617 (390.81 it/sec) -training >> step=4346600, episode=725 reward=0.7536373 (418.95 it/sec) -training >> step=4346700, episode=725 reward=0.7885673 (406.29 it/sec) -training >> step=4346800, episode=725 reward=0.7645981 (453.65 it/sec) -training >> step=4346900, episode=725 reward=0.7939085 (439.25 it/sec) -training >> step=4347000, episode=725 reward=0.7834317 (387.81 it/sec) -training >> step=4347100, episode=725 reward=0.7838155 (422.72 it/sec) -training >> step=4347200, episode=725 reward=0.7824598 (388.74 it/sec) -training >> step=4347300, episode=725 reward=0.7617998 (487.25 it/sec) -training >> step=4347400, episode=725 reward=0.7869846 (399.84 it/sec) -training >> step=4347500, episode=725 reward=0.7840139 (442.54 it/sec) -training >> step=4347600, episode=725 reward=0.7702141 (444.30 it/sec) -training >> step=4347700, episode=725 reward=0.7880943 (481.88 it/sec) -training >> step=4347800, episode=725 reward=0.7953383 (508.29 it/sec) -training >> step=4347900, episode=725 reward=0.7938163 (482.58 it/sec) -training >> step=4348000, episode=725 reward=0.7540466 (480.37 it/sec) -training >> step=4348100, episode=725 reward=0.7759563 (465.08 it/sec) -training >> step=4348200, episode=725 reward=0.7870297 (464.84 it/sec) -training >> step=4348300, episode=725 reward=0.7781085 (398.59 it/sec) -training >> step=4348400, episode=725 reward=0.7777517 (501.04 it/sec) -training >> step=4348500, episode=725 reward=0.7705658 (477.40 it/sec) -training >> step=4348600, episode=725 reward=0.759009 (466.25 it/sec) -training >> step=4348700, episode=725 reward=0.7640553 (525.13 it/sec) -training >> step=4348800, episode=725 reward=0.7720353 (478.91 it/sec) -training >> step=4348900, episode=725 reward=0.7537647 (491.57 it/sec) -training >> step=4349000, episode=725 reward=0.7666682 (468.88 it/sec) -training >> step=4349100, episode=725 reward=0.7783596 (493.38 it/sec) -training >> step=4349200, episode=725 reward=0.7769176 (452.51 it/sec) -training >> step=4349300, episode=726 reward=0.785724 (98.29 it/sec) -training >> step=4349400, episode=726 reward=0.766096 (499.30 it/sec) -training >> step=4349500, episode=726 reward=0.7880663 (535.08 it/sec) -training >> step=4349600, episode=726 reward=0.7807954 (519.01 it/sec) -training >> step=4349700, episode=726 reward=0.7624431 (503.03 it/sec) -training >> step=4349800, episode=726 reward=0.7741755 (473.38 it/sec) -training >> step=4349900, episode=726 reward=0.7975078 (508.91 it/sec) -training >> step=4350000, episode=726 reward=0.7612599 (524.95 it/sec) -training >> step=4350100, episode=726 reward=0.7830853 (539.24 it/sec) -training >> step=4350200, episode=726 reward=0.7780221 (484.68 it/sec) -training >> step=4350300, episode=726 reward=0.7863166 (474.60 it/sec) -training >> step=4350400, episode=726 reward=0.7712852 (523.27 it/sec) -training >> step=4350500, episode=726 reward=0.788455 (415.57 it/sec) -training >> step=4350600, episode=726 reward=0.7804998 (494.53 it/sec) -training >> step=4350700, episode=726 reward=0.7700135 (531.38 it/sec) -training >> step=4350800, episode=726 reward=0.7909375 (503.12 it/sec) -training >> step=4350900, episode=726 reward=0.7826452 (505.33 it/sec) -training >> step=4351000, episode=726 reward=0.7764221 (526.47 it/sec) -training >> step=4351100, episode=726 reward=0.7752674 (538.40 it/sec) -training >> step=4351200, episode=726 reward=0.7780251 (533.14 it/sec) -training >> step=4351300, episode=726 reward=0.7539763 (501.47 it/sec) -training >> step=4351400, episode=726 reward=0.7615777 (530.76 it/sec) -training >> step=4351500, episode=726 reward=0.7698773 (529.04 it/sec) -training >> step=4351600, episode=726 reward=0.7792347 (486.84 it/sec) -training >> step=4351700, episode=726 reward=0.767861 (490.66 it/sec) -training >> step=4351800, episode=726 reward=0.7872745 (490.15 it/sec) -training >> step=4351900, episode=726 reward=0.7626848 (497.14 it/sec) -training >> step=4352000, episode=726 reward=0.7624403 (513.94 it/sec) -training >> step=4352100, episode=726 reward=0.7931691 (498.26 it/sec) -training >> step=4352200, episode=726 reward=0.7862071 (509.05 it/sec) -training >> step=4352300, episode=726 reward=0.7831394 (459.95 it/sec) -training >> step=4352400, episode=726 reward=0.7808106 (509.76 it/sec) -training >> step=4352500, episode=726 reward=0.7878963 (493.19 it/sec) -training >> step=4352600, episode=726 reward=0.7883813 (524.08 it/sec) -training >> step=4352700, episode=726 reward=0.7700582 (459.12 it/sec) -training >> step=4352800, episode=726 reward=0.7930307 (421.66 it/sec) -training >> step=4352900, episode=726 reward=0.7902121 (412.63 it/sec) -training >> step=4353000, episode=726 reward=0.7808577 (343.95 it/sec) -training >> step=4353100, episode=726 reward=0.7819576 (429.51 it/sec) -training >> step=4353200, episode=726 reward=0.7751946 (505.63 it/sec) -training >> step=4353300, episode=726 reward=0.7829548 (498.13 it/sec) -training >> step=4353400, episode=726 reward=0.7711582 (457.32 it/sec) -training >> step=4353500, episode=726 reward=0.7622683 (502.38 it/sec) -training >> step=4353600, episode=726 reward=0.76609 (538.96 it/sec) -training >> step=4353700, episode=726 reward=0.773019 (518.91 it/sec) -training >> step=4353800, episode=726 reward=0.7848822 (420.24 it/sec) -training >> step=4353900, episode=726 reward=0.780167 (502.27 it/sec) -training >> step=4354000, episode=726 reward=0.7683772 (442.87 it/sec) -training >> step=4354100, episode=726 reward=0.7614067 (532.52 it/sec) -training >> step=4354200, episode=726 reward=0.7776045 (514.24 it/sec) -training >> step=4354300, episode=726 reward=0.7751114 (526.06 it/sec) -training >> step=4354400, episode=726 reward=0.7737277 (522.15 it/sec) -training >> step=4354500, episode=726 reward=0.7769925 (498.82 it/sec) -training >> step=4354600, episode=726 reward=0.7927257 (556.15 it/sec) -training >> step=4354700, episode=726 reward=0.7715147 (516.25 it/sec) -training >> step=4354800, episode=726 reward=0.7477795 (507.44 it/sec) -training >> step=4354900, episode=726 reward=0.7852011 (483.77 it/sec) -training >> step=4355000, episode=726 reward=0.7670681 (532.99 it/sec) -training >> step=4355100, episode=726 reward=0.7501794 (497.91 it/sec) -training >> step=4355200, episode=726 reward=0.7933811 (522.46 it/sec) -training >> step=4355300, episode=727 reward=0.7762392 (107.28 it/sec) -training >> step=4355400, episode=727 reward=0.7707931 (362.73 it/sec) -training >> step=4355500, episode=727 reward=0.7851152 (423.83 it/sec) -training >> step=4355600, episode=727 reward=0.7736676 (459.64 it/sec) -training >> step=4355700, episode=727 reward=0.7747631 (495.48 it/sec) -training >> step=4355800, episode=727 reward=0.7715358 (517.28 it/sec) -training >> step=4355900, episode=727 reward=0.7684523 (535.02 it/sec) -training >> step=4356000, episode=727 reward=0.7757306 (492.73 it/sec) -training >> step=4356100, episode=727 reward=0.7864265 (537.98 it/sec) -training >> step=4356200, episode=727 reward=0.755889 (471.27 it/sec) -training >> step=4356300, episode=727 reward=0.7845017 (544.18 it/sec) -training >> step=4356400, episode=727 reward=0.7751348 (537.47 it/sec) -training >> step=4356500, episode=727 reward=0.7685589 (479.13 it/sec) -training >> step=4356600, episode=727 reward=0.783991 (489.74 it/sec) -training >> step=4356700, episode=727 reward=0.7760872 (468.39 it/sec) -training >> step=4356800, episode=727 reward=0.7645974 (505.34 it/sec) -training >> step=4356900, episode=727 reward=0.7720013 (474.76 it/sec) -training >> step=4357000, episode=727 reward=0.7848833 (499.39 it/sec) -training >> step=4357100, episode=727 reward=0.7766173 (483.33 it/sec) -training >> step=4357200, episode=727 reward=0.7759256 (509.00 it/sec) -training >> step=4357300, episode=727 reward=0.7814752 (537.00 it/sec) -training >> step=4357400, episode=727 reward=0.7767564 (529.54 it/sec) -training >> step=4357500, episode=727 reward=0.7708178 (486.79 it/sec) -training >> step=4357600, episode=727 reward=0.7883811 (507.29 it/sec) -training >> step=4357700, episode=727 reward=0.7633004 (500.25 it/sec) -training >> step=4357800, episode=727 reward=0.7740372 (536.65 it/sec) -training >> step=4357900, episode=727 reward=0.7838976 (455.30 it/sec) -training >> step=4358000, episode=727 reward=0.7761894 (380.77 it/sec) -training >> step=4358100, episode=727 reward=0.765844 (471.51 it/sec) -training >> step=4358200, episode=727 reward=0.782231 (444.98 it/sec) -training >> step=4358300, episode=727 reward=0.740483 (400.53 it/sec) -training >> step=4358400, episode=727 reward=0.7662822 (419.55 it/sec) -training >> step=4358500, episode=727 reward=0.7627735 (414.16 it/sec) -training >> step=4358600, episode=727 reward=0.751764 (387.28 it/sec) -training >> step=4358700, episode=727 reward=0.7803484 (430.04 it/sec) -training >> step=4358800, episode=727 reward=0.7730777 (483.94 it/sec) -training >> step=4358900, episode=727 reward=0.7701319 (456.76 it/sec) -training >> step=4359000, episode=727 reward=0.7754599 (446.16 it/sec) -training >> step=4359100, episode=727 reward=0.7834867 (436.02 it/sec) -training >> step=4359200, episode=727 reward=0.7896947 (493.98 it/sec) -training >> step=4359300, episode=727 reward=0.7915699 (441.19 it/sec) -training >> step=4359400, episode=727 reward=0.7790973 (440.83 it/sec) -training >> step=4359500, episode=727 reward=0.7757719 (438.22 it/sec) -training >> step=4359600, episode=727 reward=0.7902055 (489.92 it/sec) -training >> step=4359700, episode=727 reward=0.7688347 (459.68 it/sec) -training >> step=4359800, episode=727 reward=0.7758531 (460.69 it/sec) -training >> step=4359900, episode=727 reward=0.7687198 (504.08 it/sec) -training >> step=4360000, episode=727 reward=0.7675325 (429.85 it/sec) -training >> step=4360100, episode=727 reward=0.7736316 (451.05 it/sec) -training >> step=4360200, episode=727 reward=0.7802087 (419.41 it/sec) -training >> step=4360300, episode=727 reward=0.7778004 (473.40 it/sec) -training >> step=4360400, episode=727 reward=0.7686772 (418.96 it/sec) -training >> step=4360500, episode=727 reward=0.7783844 (450.19 it/sec) -training >> step=4360600, episode=727 reward=0.7755383 (450.42 it/sec) -training >> step=4360700, episode=727 reward=0.798542 (492.94 it/sec) -training >> step=4360800, episode=727 reward=0.7758704 (444.59 it/sec) -training >> step=4360900, episode=727 reward=0.7395722 (458.73 it/sec) -training >> step=4361000, episode=727 reward=0.7720281 (486.08 it/sec) -training >> step=4361100, episode=727 reward=0.7635209 (449.27 it/sec) -training >> step=4361200, episode=727 reward=0.7566484 (468.94 it/sec) -training >> step=4361300, episode=728 reward=0.7720751 (73.38 it/sec) -training >> step=4361400, episode=728 reward=0.7655879 (498.17 it/sec) -training >> step=4361500, episode=728 reward=0.7662067 (466.47 it/sec) -training >> step=4361600, episode=728 reward=0.7722911 (479.45 it/sec) -training >> step=4361700, episode=728 reward=0.7694656 (460.92 it/sec) -training >> step=4361800, episode=728 reward=0.7606185 (468.04 it/sec) -training >> step=4361900, episode=728 reward=0.7838049 (536.94 it/sec) -training >> step=4362000, episode=728 reward=0.7601262 (506.39 it/sec) -training >> step=4362100, episode=728 reward=0.7636593 (521.81 it/sec) -training >> step=4362200, episode=728 reward=0.7910005 (509.57 it/sec) -training >> step=4362300, episode=728 reward=0.7760793 (494.48 it/sec) -training >> step=4362400, episode=728 reward=0.7707502 (471.14 it/sec) -training >> step=4362500, episode=728 reward=0.7786863 (521.23 it/sec) -training >> step=4362600, episode=728 reward=0.783034 (486.31 it/sec) -training >> step=4362700, episode=728 reward=0.7852191 (500.91 it/sec) -training >> step=4362800, episode=728 reward=0.8092886 (530.50 it/sec) -training >> step=4362900, episode=728 reward=0.7893733 (489.73 it/sec) -training >> step=4363000, episode=728 reward=0.767111 (491.05 it/sec) -training >> step=4363100, episode=728 reward=0.7766361 (528.31 it/sec) -training >> step=4363200, episode=728 reward=0.8154029 (507.24 it/sec) -training >> step=4363300, episode=728 reward=0.7701967 (546.45 it/sec) -training >> step=4363400, episode=728 reward=0.7606264 (507.93 it/sec) -training >> step=4363500, episode=728 reward=0.7856764 (427.86 it/sec) -training >> step=4363600, episode=728 reward=0.7742119 (459.70 it/sec) -training >> step=4363700, episode=728 reward=0.7781065 (443.32 it/sec) -training >> step=4363800, episode=728 reward=0.7716196 (489.36 it/sec) -training >> step=4363900, episode=728 reward=0.7600303 (505.87 it/sec) -training >> step=4364000, episode=728 reward=0.7839367 (564.12 it/sec) -training >> step=4364100, episode=728 reward=0.7881368 (475.42 it/sec) -training >> step=4364200, episode=728 reward=0.7593092 (434.33 it/sec) -training >> step=4364300, episode=728 reward=0.7731181 (396.66 it/sec) -training >> step=4364400, episode=728 reward=0.7760677 (424.43 it/sec) -training >> step=4364500, episode=728 reward=0.768407 (356.56 it/sec) -training >> step=4364600, episode=728 reward=0.7825288 (385.21 it/sec) -training >> step=4364700, episode=728 reward=0.7657468 (410.07 it/sec) -training >> step=4364800, episode=728 reward=0.7647471 (449.01 it/sec) -training >> step=4364900, episode=728 reward=0.7863647 (394.95 it/sec) -training >> step=4365000, episode=728 reward=0.7892381 (432.46 it/sec) -training >> step=4365100, episode=728 reward=0.7810491 (450.46 it/sec) -training >> step=4365200, episode=728 reward=0.7618027 (436.46 it/sec) -training >> step=4365300, episode=728 reward=0.768283 (495.87 it/sec) -training >> step=4365400, episode=728 reward=0.7697021 (505.32 it/sec) -training >> step=4365500, episode=728 reward=0.7837033 (513.90 it/sec) -training >> step=4365600, episode=728 reward=0.7692047 (470.80 it/sec) -training >> step=4365700, episode=728 reward=0.7489855 (445.59 it/sec) -training >> step=4365800, episode=728 reward=0.7755501 (503.96 it/sec) -training >> step=4365900, episode=728 reward=0.7932267 (508.59 it/sec) -training >> step=4366000, episode=728 reward=0.7763619 (501.31 it/sec) -training >> step=4366100, episode=728 reward=0.7999298 (500.41 it/sec) -training >> step=4366200, episode=728 reward=0.7644642 (517.08 it/sec) -training >> step=4366300, episode=728 reward=0.7742859 (472.17 it/sec) -training >> step=4366400, episode=728 reward=0.7672711 (484.18 it/sec) -training >> step=4366500, episode=728 reward=0.7633219 (494.51 it/sec) -training >> step=4366600, episode=728 reward=0.764471 (488.75 it/sec) -training >> step=4366700, episode=728 reward=0.7844997 (474.78 it/sec) -training >> step=4366800, episode=728 reward=0.7833257 (487.72 it/sec) -training >> step=4366900, episode=728 reward=0.7709376 (428.83 it/sec) -training >> step=4367000, episode=728 reward=0.7814164 (439.76 it/sec) -training >> step=4367100, episode=728 reward=0.8030624 (335.00 it/sec) -training >> step=4367200, episode=728 reward=0.7776158 (415.61 it/sec) -training >> step=4367300, episode=729 reward=0.7881669 (93.91 it/sec) -training >> step=4367400, episode=729 reward=0.780223 (352.91 it/sec) -training >> step=4367500, episode=729 reward=0.7766128 (494.10 it/sec) -training >> step=4367600, episode=729 reward=0.7739692 (466.25 it/sec) -training >> step=4367700, episode=729 reward=0.7688331 (468.25 it/sec) -training >> step=4367800, episode=729 reward=0.7659652 (474.82 it/sec) -training >> step=4367900, episode=729 reward=0.7679919 (528.08 it/sec) -training >> step=4368000, episode=729 reward=0.773258 (470.00 it/sec) -training >> step=4368100, episode=729 reward=0.7902001 (455.30 it/sec) -training >> step=4368200, episode=729 reward=0.7883096 (475.88 it/sec) -training >> step=4368300, episode=729 reward=0.78576 (522.97 it/sec) -training >> step=4368400, episode=729 reward=0.771942 (512.53 it/sec) -training >> step=4368500, episode=729 reward=0.7724947 (489.07 it/sec) -training >> step=4368600, episode=729 reward=0.7904416 (519.02 it/sec) -training >> step=4368700, episode=729 reward=0.7477572 (490.02 it/sec) -training >> step=4368800, episode=729 reward=0.7710817 (491.29 it/sec) -training >> step=4368900, episode=729 reward=0.7987006 (377.31 it/sec) -training >> step=4369000, episode=729 reward=0.7617649 (444.71 it/sec) -training >> step=4369100, episode=729 reward=0.774914 (424.92 it/sec) -training >> step=4369200, episode=729 reward=0.7754916 (459.76 it/sec) -training >> step=4369300, episode=729 reward=0.7847998 (484.19 it/sec) -training >> step=4369400, episode=729 reward=0.7821454 (525.52 it/sec) -training >> step=4369500, episode=729 reward=0.7683131 (433.55 it/sec) -training >> step=4369600, episode=729 reward=0.7759749 (441.05 it/sec) -training >> step=4369700, episode=729 reward=0.7782289 (495.95 it/sec) -training >> step=4369800, episode=729 reward=0.7697834 (493.39 it/sec) -training >> step=4369900, episode=729 reward=0.7780951 (510.10 it/sec) -training >> step=4370000, episode=729 reward=0.7876717 (405.53 it/sec) -training >> step=4370100, episode=729 reward=0.7705911 (381.86 it/sec) -training >> step=4370200, episode=729 reward=0.772311 (352.93 it/sec) -training >> step=4370300, episode=729 reward=0.7546967 (354.61 it/sec) -training >> step=4370400, episode=729 reward=0.7849426 (360.82 it/sec) -training >> step=4370500, episode=729 reward=0.7883726 (391.66 it/sec) -training >> step=4370600, episode=729 reward=0.7918289 (389.79 it/sec) -training >> step=4370700, episode=729 reward=0.7717746 (455.41 it/sec) -training >> step=4370800, episode=729 reward=0.7890004 (462.02 it/sec) -training >> step=4370900, episode=729 reward=0.7771595 (449.59 it/sec) -training >> step=4371000, episode=729 reward=0.7793727 (472.07 it/sec) -training >> step=4371100, episode=729 reward=0.7829828 (466.97 it/sec) -training >> step=4371200, episode=729 reward=0.7632682 (504.42 it/sec) -training >> step=4371300, episode=729 reward=0.7783649 (441.20 it/sec) -training >> step=4371400, episode=729 reward=0.755237 (455.92 it/sec) -training >> step=4371500, episode=729 reward=0.7729591 (470.38 it/sec) -training >> step=4371600, episode=729 reward=0.7966002 (498.50 it/sec) -training >> step=4371700, episode=729 reward=0.7877028 (482.43 it/sec) -training >> step=4371800, episode=729 reward=0.7664096 (480.39 it/sec) -training >> step=4371900, episode=729 reward=0.7927096 (528.31 it/sec) -training >> step=4372000, episode=729 reward=0.763575 (487.75 it/sec) -training >> step=4372100, episode=729 reward=0.7647806 (495.02 it/sec) -training >> step=4372200, episode=729 reward=0.7633015 (489.62 it/sec) -training >> step=4372300, episode=729 reward=0.7785736 (476.11 it/sec) -training >> step=4372400, episode=729 reward=0.7801062 (492.76 it/sec) -training >> step=4372500, episode=729 reward=0.7603558 (498.88 it/sec) -training >> step=4372600, episode=729 reward=0.7644643 (491.29 it/sec) -training >> step=4372700, episode=729 reward=0.7735507 (495.66 it/sec) -training >> step=4372800, episode=729 reward=0.7688259 (461.18 it/sec) -training >> step=4372900, episode=729 reward=0.7803209 (496.91 it/sec) -training >> step=4373000, episode=729 reward=0.7736836 (538.59 it/sec) -training >> step=4373100, episode=729 reward=0.7589946 (455.43 it/sec) -training >> step=4373200, episode=729 reward=0.7777205 (497.43 it/sec) -training >> step=4373300, episode=730 reward=0.7845075 (87.71 it/sec) -training >> step=4373400, episode=730 reward=0.7552659 (497.57 it/sec) -training >> step=4373500, episode=730 reward=0.7593864 (513.41 it/sec) -training >> step=4373600, episode=730 reward=0.7533145 (428.90 it/sec) -training >> step=4373700, episode=730 reward=0.7863319 (498.12 it/sec) -training >> step=4373800, episode=730 reward=0.7608884 (448.11 it/sec) -training >> step=4373900, episode=730 reward=0.7702303 (481.62 it/sec) -training >> step=4374000, episode=730 reward=0.7903361 (508.49 it/sec) -training >> step=4374100, episode=730 reward=0.7953364 (513.34 it/sec) -training >> step=4374200, episode=730 reward=0.7875845 (539.24 it/sec) -training >> step=4374300, episode=730 reward=0.7809907 (467.56 it/sec) -training >> step=4374400, episode=730 reward=0.7882127 (465.99 it/sec) -training >> step=4374500, episode=730 reward=0.7770851 (494.48 it/sec) -training >> step=4374600, episode=730 reward=0.7774564 (509.45 it/sec) -training >> step=4374700, episode=730 reward=0.7785661 (518.23 it/sec) -training >> step=4374800, episode=730 reward=0.7504117 (462.98 it/sec) -training >> step=4374900, episode=730 reward=0.7875091 (479.70 it/sec) -training >> step=4375000, episode=730 reward=0.7829338 (489.16 it/sec) -training >> step=4375100, episode=730 reward=0.7707023 (483.05 it/sec) -training >> step=4375200, episode=730 reward=0.7791137 (494.80 it/sec) -training >> step=4375300, episode=730 reward=0.7760348 (489.17 it/sec) -training >> step=4375400, episode=730 reward=0.7665297 (522.78 it/sec) -training >> step=4375500, episode=730 reward=0.7820209 (494.45 it/sec) -training >> step=4375600, episode=730 reward=0.7717074 (536.62 it/sec) -training >> step=4375700, episode=730 reward=0.7905614 (519.39 it/sec) -training >> step=4375800, episode=730 reward=0.7993833 (414.27 it/sec) -training >> step=4375900, episode=730 reward=0.7744302 (423.15 it/sec) -training >> step=4376000, episode=730 reward=0.7909831 (512.31 it/sec) -training >> step=4376100, episode=730 reward=0.7853981 (513.82 it/sec) -training >> step=4376200, episode=730 reward=0.7675272 (489.46 it/sec) -training >> step=4376300, episode=730 reward=0.7728766 (461.03 it/sec) -training >> step=4376400, episode=730 reward=0.7825486 (529.19 it/sec) -training >> step=4376500, episode=730 reward=0.7815917 (436.07 it/sec) -training >> step=4376600, episode=730 reward=0.7700026 (435.81 it/sec) -training >> step=4376700, episode=730 reward=0.7824084 (506.20 it/sec) -training >> step=4376800, episode=730 reward=0.7831978 (468.46 it/sec) -training >> step=4376900, episode=730 reward=0.7855 (502.44 it/sec) -training >> step=4377000, episode=730 reward=0.7577815 (530.42 it/sec) -training >> step=4377100, episode=730 reward=0.8008693 (523.14 it/sec) -training >> step=4377200, episode=730 reward=0.7655113 (425.67 it/sec) -training >> step=4377300, episode=730 reward=0.7851147 (476.24 it/sec) -training >> step=4377400, episode=730 reward=0.7836205 (477.92 it/sec) -training >> step=4377500, episode=730 reward=0.7846301 (553.92 it/sec) -training >> step=4377600, episode=730 reward=0.7797769 (518.20 it/sec) -training >> step=4377700, episode=730 reward=0.7612886 (501.62 it/sec) -training >> step=4377800, episode=730 reward=0.7610658 (446.80 it/sec) -training >> step=4377900, episode=730 reward=0.7703432 (498.49 it/sec) -training >> step=4378000, episode=730 reward=0.7814062 (517.49 it/sec) -training >> step=4378100, episode=730 reward=0.7735673 (528.57 it/sec) -training >> step=4378200, episode=730 reward=0.7646279 (585.70 it/sec) -training >> step=4378300, episode=730 reward=0.7669876 (523.44 it/sec) -training >> step=4378400, episode=730 reward=0.7851351 (474.01 it/sec) -training >> step=4378500, episode=730 reward=0.782691 (535.19 it/sec) -training >> step=4378600, episode=730 reward=0.7861621 (526.15 it/sec) -training >> step=4378700, episode=730 reward=0.7665542 (524.88 it/sec) -training >> step=4378800, episode=730 reward=0.7592368 (495.35 it/sec) -training >> step=4378900, episode=730 reward=0.7623318 (465.78 it/sec) -training >> step=4379000, episode=730 reward=0.7483541 (464.78 it/sec) -training >> step=4379100, episode=730 reward=0.7603826 (524.81 it/sec) -training >> step=4379200, episode=730 reward=0.7549733 (531.87 it/sec) -training >> step=4379300, episode=731 reward=0.7873356 (74.21 it/sec) -training >> step=4379400, episode=731 reward=0.7617561 (435.62 it/sec) -training >> step=4379500, episode=731 reward=0.7755138 (465.31 it/sec) -training >> step=4379600, episode=731 reward=0.7847098 (502.22 it/sec) -training >> step=4379700, episode=731 reward=0.7611141 (513.28 it/sec) -training >> step=4379800, episode=731 reward=0.7805284 (495.70 it/sec) -training >> step=4379900, episode=731 reward=0.7787738 (530.51 it/sec) -training >> step=4380000, episode=731 reward=0.7853664 (484.73 it/sec) -training >> step=4380100, episode=731 reward=0.7790574 (509.17 it/sec) -training >> step=4380200, episode=731 reward=0.762373 (514.42 it/sec) -training >> step=4380300, episode=731 reward=0.789503 (464.85 it/sec) -training >> step=4380400, episode=731 reward=0.7675883 (478.07 it/sec) -training >> step=4380500, episode=731 reward=0.780831 (540.02 it/sec) -training >> step=4380600, episode=731 reward=0.7717819 (520.80 it/sec) -training >> step=4380700, episode=731 reward=0.7836871 (428.64 it/sec) -training >> step=4380800, episode=731 reward=0.7928945 (473.95 it/sec) -training >> step=4380900, episode=731 reward=0.7506248 (445.06 it/sec) -training >> step=4381000, episode=731 reward=0.7854227 (444.21 it/sec) -training >> step=4381100, episode=731 reward=0.773656 (481.34 it/sec) -training >> step=4381200, episode=731 reward=0.7733235 (545.81 it/sec) -training >> step=4381300, episode=731 reward=0.7698156 (443.59 it/sec) -training >> step=4381400, episode=731 reward=0.7828093 (403.04 it/sec) -training >> step=4381500, episode=731 reward=0.7933605 (504.01 it/sec) -training >> step=4381600, episode=731 reward=0.766973 (418.36 it/sec) -training >> step=4381700, episode=731 reward=0.7792199 (390.85 it/sec) -training >> step=4381800, episode=731 reward=0.7666919 (414.38 it/sec) -training >> step=4381900, episode=731 reward=0.7482635 (464.66 it/sec) -training >> step=4382000, episode=731 reward=0.788915 (457.22 it/sec) -training >> step=4382100, episode=731 reward=0.7655028 (515.35 it/sec) -training >> step=4382200, episode=731 reward=0.7711114 (468.61 it/sec) -training >> step=4382300, episode=731 reward=0.7738021 (467.07 it/sec) -training >> step=4382400, episode=731 reward=0.7725602 (456.84 it/sec) -training >> step=4382500, episode=731 reward=0.7844483 (420.34 it/sec) -training >> step=4382600, episode=731 reward=0.7843582 (474.33 it/sec) -training >> step=4382700, episode=731 reward=0.7594419 (413.33 it/sec) -training >> step=4382800, episode=731 reward=0.7726567 (477.98 it/sec) -training >> step=4382900, episode=731 reward=0.7656729 (520.39 it/sec) -training >> step=4383000, episode=731 reward=0.7827047 (537.81 it/sec) -training >> step=4383100, episode=731 reward=0.7799324 (464.26 it/sec) -training >> step=4383200, episode=731 reward=0.7693685 (467.95 it/sec) -training >> step=4383300, episode=731 reward=0.7753949 (480.50 it/sec) -training >> step=4383400, episode=731 reward=0.7618543 (445.54 it/sec) -training >> step=4383500, episode=731 reward=0.8039162 (380.68 it/sec) -training >> step=4383600, episode=731 reward=0.7679213 (442.18 it/sec) -training >> step=4383700, episode=731 reward=0.7752011 (361.02 it/sec) -training >> step=4383800, episode=731 reward=0.7661211 (344.79 it/sec) -training >> step=4383900, episode=731 reward=0.7494954 (461.20 it/sec) -training >> step=4384000, episode=731 reward=0.770265 (492.19 it/sec) -training >> step=4384100, episode=731 reward=0.7598549 (464.98 it/sec) -training >> step=4384200, episode=731 reward=0.7833137 (492.54 it/sec) -training >> step=4384300, episode=731 reward=0.7604519 (449.90 it/sec) -training >> step=4384400, episode=731 reward=0.7821796 (459.16 it/sec) -training >> step=4384500, episode=731 reward=0.7723156 (441.75 it/sec) -training >> step=4384600, episode=731 reward=0.7777902 (408.39 it/sec) -training >> step=4384700, episode=731 reward=0.7700558 (466.99 it/sec) -training >> step=4384800, episode=731 reward=0.7562497 (463.31 it/sec) -training >> step=4384900, episode=731 reward=0.7551675 (427.94 it/sec) -training >> step=4385000, episode=731 reward=0.7582945 (414.53 it/sec) -training >> step=4385100, episode=731 reward=0.7668205 (467.41 it/sec) -training >> step=4385200, episode=731 reward=0.7760069 (483.27 it/sec) -training >> step=4385300, episode=732 reward=0.7705246 (119.86 it/sec) -training >> step=4385400, episode=732 reward=0.7445055 (460.32 it/sec) -training >> step=4385500, episode=732 reward=0.7795249 (422.91 it/sec) -training >> step=4385600, episode=732 reward=0.7745346 (426.50 it/sec) -training >> step=4385700, episode=732 reward=0.7746033 (413.39 it/sec) -training >> step=4385800, episode=732 reward=0.7929257 (449.85 it/sec) -training >> step=4385900, episode=732 reward=0.790969 (502.83 it/sec) -training >> step=4386000, episode=732 reward=0.7782388 (486.29 it/sec) -training >> step=4386100, episode=732 reward=0.7867506 (465.72 it/sec) -training >> step=4386200, episode=732 reward=0.8007629 (458.32 it/sec) -training >> step=4386300, episode=732 reward=0.7777893 (480.86 it/sec) -training >> step=4386400, episode=732 reward=0.7708459 (462.78 it/sec) -training >> step=4386500, episode=732 reward=0.7593513 (450.50 it/sec) -training >> step=4386600, episode=732 reward=0.7848973 (392.50 it/sec) -training >> step=4386700, episode=732 reward=0.7622313 (381.21 it/sec) -training >> step=4386800, episode=732 reward=0.7697419 (375.66 it/sec) -training >> step=4386900, episode=732 reward=0.7780368 (447.44 it/sec) -training >> step=4387000, episode=732 reward=0.7634426 (486.82 it/sec) -training >> step=4387100, episode=732 reward=0.7899422 (430.85 it/sec) -training >> step=4387200, episode=732 reward=0.778953 (434.67 it/sec) -training >> step=4387300, episode=732 reward=0.7762538 (487.10 it/sec) -training >> step=4387400, episode=732 reward=0.7822793 (451.57 it/sec) -training >> step=4387500, episode=732 reward=0.7830809 (473.38 it/sec) -training >> step=4387600, episode=732 reward=0.7857242 (446.23 it/sec) -training >> step=4387700, episode=732 reward=0.7709423 (487.01 it/sec) -training >> step=4387800, episode=732 reward=0.7641922 (466.89 it/sec) -training >> step=4387900, episode=732 reward=0.7793483 (480.36 it/sec) -training >> step=4388000, episode=732 reward=0.7673264 (471.07 it/sec) -training >> step=4388100, episode=732 reward=0.799329 (470.93 it/sec) -training >> step=4388200, episode=732 reward=0.7449762 (494.24 it/sec) -training >> step=4388300, episode=732 reward=0.7952893 (453.40 it/sec) -training >> step=4388400, episode=732 reward=0.7709076 (443.81 it/sec) -training >> step=4388500, episode=732 reward=0.7722831 (456.73 it/sec) -training >> step=4388600, episode=732 reward=0.7706358 (459.39 it/sec) -training >> step=4388700, episode=732 reward=0.7723005 (490.61 it/sec) -training >> step=4388800, episode=732 reward=0.7775276 (501.35 it/sec) -training >> step=4388900, episode=732 reward=0.779767 (425.01 it/sec) -training >> step=4389000, episode=732 reward=0.7694584 (444.47 it/sec) -training >> step=4389100, episode=732 reward=0.7656255 (481.23 it/sec) -training >> step=4389200, episode=732 reward=0.7870074 (476.10 it/sec) -training >> step=4389300, episode=732 reward=0.7813624 (499.83 it/sec) -training >> step=4389400, episode=732 reward=0.7730948 (496.77 it/sec) -training >> step=4389500, episode=732 reward=0.7785237 (447.35 it/sec) -training >> step=4389600, episode=732 reward=0.7755278 (517.65 it/sec) -training >> step=4389700, episode=732 reward=0.7678785 (542.53 it/sec) -training >> step=4389800, episode=732 reward=0.7591512 (555.43 it/sec) -training >> step=4389900, episode=732 reward=0.7624575 (533.71 it/sec) -training >> step=4390000, episode=732 reward=0.7699612 (513.74 it/sec) -training >> step=4390100, episode=732 reward=0.7709983 (511.19 it/sec) -training >> step=4390200, episode=732 reward=0.7606578 (492.97 it/sec) -training >> step=4390300, episode=732 reward=0.7618738 (537.85 it/sec) -training >> step=4390400, episode=732 reward=0.7674739 (504.75 it/sec) -training >> step=4390500, episode=732 reward=0.7743471 (537.44 it/sec) -training >> step=4390600, episode=732 reward=0.7764606 (517.40 it/sec) -training >> step=4390700, episode=732 reward=0.7473056 (511.05 it/sec) -training >> step=4390800, episode=732 reward=0.7823328 (544.68 it/sec) -training >> step=4390900, episode=732 reward=0.7714264 (513.14 it/sec) -training >> step=4391000, episode=732 reward=0.7868586 (543.58 it/sec) -training >> step=4391100, episode=732 reward=0.7638874 (509.82 it/sec) -training >> step=4391200, episode=732 reward=0.7362436 (486.80 it/sec) -training >> step=4391300, episode=733 reward=0.7513704 (84.94 it/sec) -training >> step=4391400, episode=733 reward=0.7724007 (528.85 it/sec) -training >> step=4391500, episode=733 reward=0.7815269 (487.73 it/sec) -training >> step=4391600, episode=733 reward=0.7650873 (526.35 it/sec) -training >> step=4391700, episode=733 reward=0.7582424 (504.09 it/sec) -training >> step=4391800, episode=733 reward=0.7851626 (522.35 it/sec) -training >> step=4391900, episode=733 reward=0.7781423 (516.37 it/sec) -training >> step=4392000, episode=733 reward=0.779067 (483.39 it/sec) -training >> step=4392100, episode=733 reward=0.7859635 (402.14 it/sec) -training >> step=4392200, episode=733 reward=0.7956089 (479.16 it/sec) -training >> step=4392300, episode=733 reward=0.7820553 (467.98 it/sec) -training >> step=4392400, episode=733 reward=0.7950326 (432.50 it/sec) -training >> step=4392500, episode=733 reward=0.7551829 (529.57 it/sec) -training >> step=4392600, episode=733 reward=0.78751 (480.21 it/sec) -training >> step=4392700, episode=733 reward=0.7777722 (495.48 it/sec) -training >> step=4392800, episode=733 reward=0.7614881 (513.48 it/sec) -training >> step=4392900, episode=733 reward=0.782971 (532.60 it/sec) -training >> step=4393000, episode=733 reward=0.7581904 (482.53 it/sec) -training >> step=4393100, episode=733 reward=0.7704048 (491.99 it/sec) -training >> step=4393200, episode=733 reward=0.7796271 (471.69 it/sec) -training >> step=4393300, episode=733 reward=0.7796983 (510.88 it/sec) -training >> step=4393400, episode=733 reward=0.7627485 (540.02 it/sec) -training >> step=4393500, episode=733 reward=0.7811332 (447.37 it/sec) -training >> step=4393600, episode=733 reward=0.7734581 (481.03 it/sec) -training >> step=4393700, episode=733 reward=0.7753683 (459.69 it/sec) -training >> step=4393800, episode=733 reward=0.7775472 (464.67 it/sec) -training >> step=4393900, episode=733 reward=0.7740399 (512.23 it/sec) -training >> step=4394000, episode=733 reward=0.7693666 (484.31 it/sec) -training >> step=4394100, episode=733 reward=0.7814927 (450.04 it/sec) -training >> step=4394200, episode=733 reward=0.7738494 (530.17 it/sec) -training >> step=4394300, episode=733 reward=0.7648656 (539.50 it/sec) -training >> step=4394400, episode=733 reward=0.7778758 (510.76 it/sec) -training >> step=4394500, episode=733 reward=0.7710568 (481.08 it/sec) -training >> step=4394600, episode=733 reward=0.7607259 (439.59 it/sec) -training >> step=4394700, episode=733 reward=0.7728562 (486.08 it/sec) -training >> step=4394800, episode=733 reward=0.774687 (521.41 it/sec) -training >> step=4394900, episode=733 reward=0.7868944 (478.12 it/sec) -training >> step=4395000, episode=733 reward=0.7718888 (461.85 it/sec) -training >> step=4395100, episode=733 reward=0.7776803 (478.08 it/sec) -training >> step=4395200, episode=733 reward=0.7834625 (465.86 it/sec) -training >> step=4395300, episode=733 reward=0.7637347 (537.06 it/sec) -training >> step=4395400, episode=733 reward=0.7594509 (483.78 it/sec) -training >> step=4395500, episode=733 reward=0.7816887 (547.59 it/sec) -training >> step=4395600, episode=733 reward=0.7912639 (488.16 it/sec) -training >> step=4395700, episode=733 reward=0.7823437 (444.58 it/sec) -training >> step=4395800, episode=733 reward=0.7785349 (549.64 it/sec) -training >> step=4395900, episode=733 reward=0.771705 (525.13 it/sec) -training >> step=4396000, episode=733 reward=0.7737734 (478.18 it/sec) -training >> step=4396100, episode=733 reward=0.7996515 (508.51 it/sec) -training >> step=4396200, episode=733 reward=0.7466454 (525.10 it/sec) -training >> step=4396300, episode=733 reward=0.7694 (461.87 it/sec) -training >> step=4396400, episode=733 reward=0.7748645 (504.71 it/sec) -training >> step=4396500, episode=733 reward=0.7554473 (475.01 it/sec) -training >> step=4396600, episode=733 reward=0.7886496 (542.48 it/sec) -training >> step=4396700, episode=733 reward=0.7604523 (487.52 it/sec) -training >> step=4396800, episode=733 reward=0.7440733 (470.46 it/sec) -training >> step=4396900, episode=733 reward=0.7440368 (470.54 it/sec) -training >> step=4397000, episode=733 reward=0.7660122 (498.16 it/sec) -training >> step=4397100, episode=733 reward=0.7542626 (478.25 it/sec) -training >> step=4397200, episode=733 reward=0.7765356 (463.23 it/sec) -training >> step=4397300, episode=734 reward=0.7820395 (109.48 it/sec) -training >> step=4397400, episode=734 reward=0.7702931 (451.63 it/sec) -training >> step=4397500, episode=734 reward=0.7594458 (542.78 it/sec) -training >> step=4397600, episode=734 reward=0.7787763 (454.90 it/sec) -training >> step=4397700, episode=734 reward=0.7819837 (517.70 it/sec) -training >> step=4397800, episode=734 reward=0.7811085 (527.00 it/sec) -training >> step=4397900, episode=734 reward=0.7654363 (459.71 it/sec) -training >> step=4398000, episode=734 reward=0.7834724 (508.17 it/sec) -training >> step=4398100, episode=734 reward=0.7682855 (473.57 it/sec) -training >> step=4398200, episode=734 reward=0.7675055 (503.53 it/sec) -training >> step=4398300, episode=734 reward=0.7742854 (476.67 it/sec) -training >> step=4398400, episode=734 reward=0.7786974 (517.76 it/sec) -training >> step=4398500, episode=734 reward=0.7538189 (498.98 it/sec) -training >> step=4398600, episode=734 reward=0.7671294 (486.25 it/sec) -training >> step=4398700, episode=734 reward=0.7869971 (494.61 it/sec) -training >> step=4398800, episode=734 reward=0.7681701 (494.05 it/sec) -training >> step=4398900, episode=734 reward=0.7439409 (498.91 it/sec) -training >> step=4399000, episode=734 reward=0.7504375 (485.80 it/sec) -training >> step=4399100, episode=734 reward=0.7745982 (508.98 it/sec) -training >> step=4399200, episode=734 reward=0.769421 (451.25 it/sec) -training >> step=4399300, episode=734 reward=0.7613144 (496.28 it/sec) -training >> step=4399400, episode=734 reward=0.788542 (494.37 it/sec) -training >> step=4399500, episode=734 reward=0.782158 (535.60 it/sec) -training >> step=4399600, episode=734 reward=0.7753893 (465.11 it/sec) -training >> step=4399700, episode=734 reward=0.7796022 (535.01 it/sec) -training >> step=4399800, episode=734 reward=0.786089 (513.36 it/sec) -training >> step=4399900, episode=734 reward=0.7763953 (473.15 it/sec) -training >> step=4400000, episode=734 reward=0.7654886 (495.55 it/sec) -training >> step=4400100, episode=734 reward=0.7528782 (494.70 it/sec) -training >> step=4400200, episode=734 reward=0.7872135 (510.77 it/sec) -training >> step=4400300, episode=734 reward=0.7730409 (453.15 it/sec) -training >> step=4400400, episode=734 reward=0.7615353 (470.67 it/sec) -training >> step=4400500, episode=734 reward=0.7859892 (495.24 it/sec) -training >> step=4400600, episode=734 reward=0.7755207 (476.53 it/sec) -training >> step=4400700, episode=734 reward=0.7708876 (499.02 it/sec) -training >> step=4400800, episode=734 reward=0.7645996 (504.69 it/sec) -training >> step=4400900, episode=734 reward=0.7939873 (524.85 it/sec) -training >> step=4401000, episode=734 reward=0.7863212 (544.17 it/sec) -training >> step=4401100, episode=734 reward=0.791518 (452.79 it/sec) -training >> step=4401200, episode=734 reward=0.7770463 (479.18 it/sec) -training >> step=4401300, episode=734 reward=0.761723 (492.10 it/sec) -training >> step=4401400, episode=734 reward=0.761977 (507.32 it/sec) -training >> step=4401500, episode=734 reward=0.7638007 (553.62 it/sec) -training >> step=4401600, episode=734 reward=0.7700445 (555.70 it/sec) -training >> step=4401700, episode=734 reward=0.7498068 (453.06 it/sec) -training >> step=4401800, episode=734 reward=0.7805318 (475.34 it/sec) -training >> step=4401900, episode=734 reward=0.7392489 (487.14 it/sec) -training >> step=4402000, episode=734 reward=0.7630629 (493.17 it/sec) -training >> step=4402100, episode=734 reward=0.7859643 (519.10 it/sec) -training >> step=4402200, episode=734 reward=0.7436997 (470.87 it/sec) -training >> step=4402300, episode=734 reward=0.7719215 (536.46 it/sec) -training >> step=4402400, episode=734 reward=0.7555765 (515.60 it/sec) -training >> step=4402500, episode=734 reward=0.7529432 (490.63 it/sec) -training >> step=4402600, episode=734 reward=0.7512487 (541.42 it/sec) -training >> step=4402700, episode=734 reward=0.7672742 (501.90 it/sec) -training >> step=4402800, episode=734 reward=0.771912 (465.12 it/sec) -training >> step=4402900, episode=734 reward=0.7614246 (495.64 it/sec) -training >> step=4403000, episode=734 reward=0.7353506 (484.10 it/sec) -training >> step=4403100, episode=734 reward=0.7311575 (542.49 it/sec) -training >> step=4403200, episode=734 reward=0.7604881 (514.23 it/sec) -training >> step=4403300, episode=735 reward=0.7469147 (71.61 it/sec) -training >> step=4403400, episode=735 reward=0.7760117 (333.54 it/sec) -training >> step=4403500, episode=735 reward=0.7608886 (400.09 it/sec) -training >> step=4403600, episode=735 reward=0.7605376 (360.00 it/sec) -training >> step=4403700, episode=735 reward=0.7771919 (424.51 it/sec) -training >> step=4403800, episode=735 reward=0.7807731 (415.06 it/sec) -training >> step=4403900, episode=735 reward=0.7553726 (500.28 it/sec) -training >> step=4404000, episode=735 reward=0.7735494 (452.39 it/sec) -training >> step=4404100, episode=735 reward=0.7858641 (449.08 it/sec) -training >> step=4404200, episode=735 reward=0.771723 (471.43 it/sec) -training >> step=4404300, episode=735 reward=0.8000737 (461.92 it/sec) -training >> step=4404400, episode=735 reward=0.7657105 (457.83 it/sec) -training >> step=4404500, episode=735 reward=0.7729048 (439.37 it/sec) -training >> step=4404600, episode=735 reward=0.7694809 (460.74 it/sec) -training >> step=4404700, episode=735 reward=0.7758524 (429.41 it/sec) -training >> step=4404800, episode=735 reward=0.766448 (460.15 it/sec) -training >> step=4404900, episode=735 reward=0.7595791 (459.20 it/sec) -training >> step=4405000, episode=735 reward=0.7933314 (516.07 it/sec) -training >> step=4405100, episode=735 reward=0.7752684 (433.90 it/sec) -training >> step=4405200, episode=735 reward=0.7701467 (469.16 it/sec) -training >> step=4405300, episode=735 reward=0.7768572 (493.08 it/sec) -training >> step=4405400, episode=735 reward=0.7850064 (502.47 it/sec) -training >> step=4405500, episode=735 reward=0.7634205 (457.57 it/sec) -training >> step=4405600, episode=735 reward=0.7776503 (455.45 it/sec) -training >> step=4405700, episode=735 reward=0.7845731 (462.69 it/sec) -training >> step=4405800, episode=735 reward=0.7730739 (470.76 it/sec) -training >> step=4405900, episode=735 reward=0.7830146 (471.23 it/sec) -training >> step=4406000, episode=735 reward=0.7655169 (499.52 it/sec) -training >> step=4406100, episode=735 reward=0.7593186 (515.50 it/sec) -training >> step=4406200, episode=735 reward=0.7620993 (501.83 it/sec) -training >> step=4406300, episode=735 reward=0.7551379 (462.25 it/sec) -training >> step=4406400, episode=735 reward=0.7708759 (491.36 it/sec) -training >> step=4406500, episode=735 reward=0.7622373 (449.70 it/sec) -training >> step=4406600, episode=735 reward=0.7898692 (416.10 it/sec) -training >> step=4406700, episode=735 reward=0.7711479 (482.52 it/sec) -training >> step=4406800, episode=735 reward=0.7729921 (512.60 it/sec) -training >> step=4406900, episode=735 reward=0.7793896 (436.56 it/sec) -training >> step=4407000, episode=735 reward=0.7789965 (469.34 it/sec) -training >> step=4407100, episode=735 reward=0.7732462 (485.80 it/sec) -training >> step=4407200, episode=735 reward=0.7553878 (510.80 it/sec) -training >> step=4407300, episode=735 reward=0.7684154 (444.19 it/sec) -training >> step=4407400, episode=735 reward=0.7824637 (457.33 it/sec) -training >> step=4407500, episode=735 reward=0.7845146 (407.75 it/sec) -training >> step=4407600, episode=735 reward=0.7782568 (447.56 it/sec) -training >> step=4407700, episode=735 reward=0.7761078 (513.33 it/sec) -training >> step=4407800, episode=735 reward=0.7636698 (511.79 it/sec) -training >> step=4407900, episode=735 reward=0.7625425 (499.06 it/sec) -training >> step=4408000, episode=735 reward=0.7493715 (463.39 it/sec) -training >> step=4408100, episode=735 reward=0.7788671 (473.05 it/sec) -training >> step=4408200, episode=735 reward=0.7748403 (517.61 it/sec) -training >> step=4408300, episode=735 reward=0.7762547 (490.12 it/sec) -training >> step=4408400, episode=735 reward=0.750608 (485.78 it/sec) -training >> step=4408500, episode=735 reward=0.7700033 (499.74 it/sec) -training >> step=4408600, episode=735 reward=0.7647173 (511.80 it/sec) -training >> step=4408700, episode=735 reward=0.7615903 (507.43 it/sec) -training >> step=4408800, episode=735 reward=0.7537508 (497.30 it/sec) -training >> step=4408900, episode=735 reward=0.7676618 (520.96 it/sec) -training >> step=4409000, episode=735 reward=0.7559602 (535.75 it/sec) -training >> step=4409100, episode=735 reward=0.7786404 (516.69 it/sec) -training >> step=4409200, episode=735 reward=0.7703478 (511.05 it/sec) -training >> step=4409300, episode=736 reward=0.7624245 (92.04 it/sec) -training >> step=4409400, episode=736 reward=0.7521786 (472.04 it/sec) -training >> step=4409500, episode=736 reward=0.764419 (433.65 it/sec) -training >> step=4409600, episode=736 reward=0.7586657 (478.72 it/sec) -training >> step=4409700, episode=736 reward=0.7523081 (497.72 it/sec) -training >> step=4409800, episode=736 reward=0.7710422 (474.15 it/sec) -training >> step=4409900, episode=736 reward=0.7772869 (444.00 it/sec) -training >> step=4410000, episode=736 reward=0.7937247 (510.04 it/sec) -training >> step=4410100, episode=736 reward=0.7713436 (471.66 it/sec) -training >> step=4410200, episode=736 reward=0.7803466 (474.54 it/sec) -training >> step=4410300, episode=736 reward=0.7705376 (482.91 it/sec) -training >> step=4410400, episode=736 reward=0.776123 (466.78 it/sec) -training >> step=4410500, episode=736 reward=0.7822029 (452.84 it/sec) -training >> step=4410600, episode=736 reward=0.7755435 (458.18 it/sec) -training >> step=4410700, episode=736 reward=0.7884074 (438.61 it/sec) -training >> step=4410800, episode=736 reward=0.7760378 (470.07 it/sec) -training >> step=4410900, episode=736 reward=0.7723262 (477.16 it/sec) -training >> step=4411000, episode=736 reward=0.7901235 (487.86 it/sec) -training >> step=4411100, episode=736 reward=0.7606415 (479.92 it/sec) -training >> step=4411200, episode=736 reward=0.7674857 (454.80 it/sec) -training >> step=4411300, episode=736 reward=0.7663137 (457.31 it/sec) -training >> step=4411400, episode=736 reward=0.7770522 (476.03 it/sec) -training >> step=4411500, episode=736 reward=0.7699494 (486.69 it/sec) -training >> step=4411600, episode=736 reward=0.7632803 (449.79 it/sec) -training >> step=4411700, episode=736 reward=0.7826695 (478.18 it/sec) -training >> step=4411800, episode=736 reward=0.7901359 (470.66 it/sec) -training >> step=4411900, episode=736 reward=0.8073904 (453.53 it/sec) -training >> step=4412000, episode=736 reward=0.7814892 (457.88 it/sec) -training >> step=4412100, episode=736 reward=0.7843704 (454.18 it/sec) -training >> step=4412200, episode=736 reward=0.7742276 (476.47 it/sec) -training >> step=4412300, episode=736 reward=0.7387859 (477.04 it/sec) -training >> step=4412400, episode=736 reward=0.7670953 (445.03 it/sec) -training >> step=4412500, episode=736 reward=0.7511988 (493.63 it/sec) -training >> step=4412600, episode=736 reward=0.7703179 (432.58 it/sec) -training >> step=4412700, episode=736 reward=0.7973618 (476.76 it/sec) -training >> step=4412800, episode=736 reward=0.7974605 (483.75 it/sec) -training >> step=4412900, episode=736 reward=0.7901857 (503.46 it/sec) -training >> step=4413000, episode=736 reward=0.7642933 (496.63 it/sec) -training >> step=4413100, episode=736 reward=0.7469276 (500.79 it/sec) -training >> step=4413200, episode=736 reward=0.7752412 (531.69 it/sec) -training >> step=4413300, episode=736 reward=0.7928058 (475.06 it/sec) -training >> step=4413400, episode=736 reward=0.7743157 (448.08 it/sec) -training >> step=4413500, episode=736 reward=0.7697945 (488.21 it/sec) -training >> step=4413600, episode=736 reward=0.7816288 (511.21 it/sec) -training >> step=4413700, episode=736 reward=0.7895606 (478.50 it/sec) -training >> step=4413800, episode=736 reward=0.7755256 (476.53 it/sec) -training >> step=4413900, episode=736 reward=0.7887437 (518.46 it/sec) -training >> step=4414000, episode=736 reward=0.7539271 (477.56 it/sec) -training >> step=4414100, episode=736 reward=0.7903179 (502.91 it/sec) -training >> step=4414200, episode=736 reward=0.7599117 (509.45 it/sec) -training >> step=4414300, episode=736 reward=0.7644642 (500.77 it/sec) -training >> step=4414400, episode=736 reward=0.7721825 (495.23 it/sec) -training >> step=4414500, episode=736 reward=0.7698952 (470.68 it/sec) -training >> step=4414600, episode=736 reward=0.7624554 (519.64 it/sec) -training >> step=4414700, episode=736 reward=0.7598277 (497.67 it/sec) -training >> step=4414800, episode=736 reward=0.7717835 (500.16 it/sec) -training >> step=4414900, episode=736 reward=0.7422577 (520.91 it/sec) -training >> step=4415000, episode=736 reward=0.7759423 (513.25 it/sec) -training >> step=4415100, episode=736 reward=0.76162 (426.18 it/sec) -training >> step=4415200, episode=736 reward=0.7683817 (503.65 it/sec) -training >> step=4415300, episode=737 reward=0.7608194 (114.72 it/sec) -training >> step=4415400, episode=737 reward=0.7734156 (515.62 it/sec) -training >> step=4415500, episode=737 reward=0.7782644 (462.96 it/sec) -training >> step=4415600, episode=737 reward=0.7685655 (453.51 it/sec) -training >> step=4415700, episode=737 reward=0.7846037 (489.89 it/sec) -training >> step=4415800, episode=737 reward=0.7709641 (487.15 it/sec) -training >> step=4415900, episode=737 reward=0.769451 (444.73 it/sec) -training >> step=4416000, episode=737 reward=0.7798827 (471.24 it/sec) -training >> step=4416100, episode=737 reward=0.7609094 (518.04 it/sec) -training >> step=4416200, episode=737 reward=0.7967763 (437.91 it/sec) -training >> step=4416300, episode=737 reward=0.8029528 (477.56 it/sec) -training >> step=4416400, episode=737 reward=0.7754104 (496.53 it/sec) -training >> step=4416500, episode=737 reward=0.7762591 (499.30 it/sec) -training >> step=4416600, episode=737 reward=0.7921377 (496.47 it/sec) -training >> step=4416700, episode=737 reward=0.7721165 (465.18 it/sec) -training >> step=4416800, episode=737 reward=0.7835194 (507.95 it/sec) -training >> step=4416900, episode=737 reward=0.7833242 (496.72 it/sec) -training >> step=4417000, episode=737 reward=0.7917805 (486.70 it/sec) -training >> step=4417100, episode=737 reward=0.76156 (427.89 it/sec) -training >> step=4417200, episode=737 reward=0.7725273 (505.23 it/sec) -training >> step=4417300, episode=737 reward=0.7766712 (446.38 it/sec) -training >> step=4417400, episode=737 reward=0.7718117 (492.84 it/sec) -training >> step=4417500, episode=737 reward=0.7871797 (428.09 it/sec) -training >> step=4417600, episode=737 reward=0.7741557 (510.23 it/sec) -training >> step=4417700, episode=737 reward=0.7806014 (441.20 it/sec) -training >> step=4417800, episode=737 reward=0.7847011 (453.67 it/sec) -training >> step=4417900, episode=737 reward=0.7828227 (528.36 it/sec) -training >> step=4418000, episode=737 reward=0.7972991 (518.10 it/sec) -training >> step=4418100, episode=737 reward=0.8102734 (498.01 it/sec) -training >> step=4418200, episode=737 reward=0.7761742 (490.45 it/sec) -training >> step=4418300, episode=737 reward=0.7922397 (507.00 it/sec) -training >> step=4418400, episode=737 reward=0.7790714 (411.16 it/sec) -training >> step=4418500, episode=737 reward=0.7792052 (467.44 it/sec) -training >> step=4418600, episode=737 reward=0.7794341 (493.87 it/sec) -training >> step=4418700, episode=737 reward=0.775531 (520.77 it/sec) -training >> step=4418800, episode=737 reward=0.7795892 (489.57 it/sec) -training >> step=4418900, episode=737 reward=0.7690664 (464.99 it/sec) -training >> step=4419000, episode=737 reward=0.7810579 (516.66 it/sec) -training >> step=4419100, episode=737 reward=0.7745094 (494.85 it/sec) -training >> step=4419200, episode=737 reward=0.7736856 (476.11 it/sec) -training >> step=4419300, episode=737 reward=0.7850302 (472.69 it/sec) -training >> step=4419400, episode=737 reward=0.7842034 (519.20 it/sec) -training >> step=4419500, episode=737 reward=0.7694637 (484.65 it/sec) -training >> step=4419600, episode=737 reward=0.7753152 (504.61 it/sec) -training >> step=4419700, episode=737 reward=0.7627912 (502.42 it/sec) -training >> step=4419800, episode=737 reward=0.7747419 (486.02 it/sec) -training >> step=4419900, episode=737 reward=0.7701294 (485.09 it/sec) -training >> step=4420000, episode=737 reward=0.7528547 (497.17 it/sec) -training >> step=4420100, episode=737 reward=0.7562196 (491.37 it/sec) -training >> step=4420200, episode=737 reward=0.7931809 (504.77 it/sec) -training >> step=4420300, episode=737 reward=0.7592524 (467.23 it/sec) -training >> step=4420400, episode=737 reward=0.7414542 (447.93 it/sec) -training >> step=4420500, episode=737 reward=0.7709737 (465.91 it/sec) -training >> step=4420600, episode=737 reward=0.7727446 (422.23 it/sec) -training >> step=4420700, episode=737 reward=0.7605388 (486.82 it/sec) -training >> step=4420800, episode=737 reward=0.7442302 (524.81 it/sec) -training >> step=4420900, episode=737 reward=0.7550469 (495.36 it/sec) -training >> step=4421000, episode=737 reward=0.7541128 (483.95 it/sec) -training >> step=4421100, episode=737 reward=0.7626615 (440.34 it/sec) -training >> step=4421200, episode=737 reward=0.75653 (528.24 it/sec) -training >> step=4421300, episode=738 reward=0.770972 (110.11 it/sec) -training >> step=4421400, episode=738 reward=0.7932995 (462.03 it/sec) -training >> step=4421500, episode=738 reward=0.7697548 (475.67 it/sec) -training >> step=4421600, episode=738 reward=0.7559057 (458.28 it/sec) -training >> step=4421700, episode=738 reward=0.7626055 (500.42 it/sec) -training >> step=4421800, episode=738 reward=0.7682513 (471.05 it/sec) -training >> step=4421900, episode=738 reward=0.7701789 (451.09 it/sec) -training >> step=4422000, episode=738 reward=0.7897419 (499.22 it/sec) -training >> step=4422100, episode=738 reward=0.7801741 (487.79 it/sec) -training >> step=4422200, episode=738 reward=0.7942181 (485.49 it/sec) -training >> step=4422300, episode=738 reward=0.7712978 (482.03 it/sec) -training >> step=4422400, episode=738 reward=0.7747568 (479.73 it/sec) -training >> step=4422500, episode=738 reward=0.7802312 (453.54 it/sec) -training >> step=4422600, episode=738 reward=0.786016 (471.03 it/sec) -training >> step=4422700, episode=738 reward=0.7666323 (474.90 it/sec) -training >> step=4422800, episode=738 reward=0.7803638 (476.76 it/sec) -training >> step=4422900, episode=738 reward=0.8031706 (474.49 it/sec) -training >> step=4423000, episode=738 reward=0.7762443 (459.71 it/sec) -training >> step=4423100, episode=738 reward=0.7643901 (481.90 it/sec) -training >> step=4423200, episode=738 reward=0.7852357 (497.73 it/sec) -training >> step=4423300, episode=738 reward=0.7732687 (481.77 it/sec) -training >> step=4423400, episode=738 reward=0.7806318 (508.80 it/sec) -training >> step=4423500, episode=738 reward=0.76231 (447.72 it/sec) -training >> step=4423600, episode=738 reward=0.7882965 (504.77 it/sec) -training >> step=4423700, episode=738 reward=0.7556612 (470.70 it/sec) -training >> step=4423800, episode=738 reward=0.7863269 (526.20 it/sec) -training >> step=4423900, episode=738 reward=0.7794544 (487.59 it/sec) -training >> step=4424000, episode=738 reward=0.7783056 (457.22 it/sec) -training >> step=4424100, episode=738 reward=0.7856535 (529.06 it/sec) -training >> step=4424200, episode=738 reward=0.7959331 (474.74 it/sec) -training >> step=4424300, episode=738 reward=0.7714925 (491.67 it/sec) -training >> step=4424400, episode=738 reward=0.793703 (478.40 it/sec) -training >> step=4424500, episode=738 reward=0.7699197 (523.26 it/sec) -training >> step=4424600, episode=738 reward=0.7636409 (511.50 it/sec) -training >> step=4424700, episode=738 reward=0.7629434 (480.53 it/sec) -training >> step=4424800, episode=738 reward=0.7854317 (489.66 it/sec) -training >> step=4424900, episode=738 reward=0.7809713 (462.81 it/sec) -training >> step=4425000, episode=738 reward=0.7556166 (472.72 it/sec) -training >> step=4425100, episode=738 reward=0.7757452 (492.66 it/sec) -training >> step=4425200, episode=738 reward=0.7381399 (469.88 it/sec) -training >> step=4425300, episode=738 reward=0.782212 (460.69 it/sec) -training >> step=4425400, episode=738 reward=0.7594083 (479.90 it/sec) -training >> step=4425500, episode=738 reward=0.7765047 (483.81 it/sec) -training >> step=4425600, episode=738 reward=0.7855254 (480.04 it/sec) -training >> step=4425700, episode=738 reward=0.7808078 (485.17 it/sec) -training >> step=4425800, episode=738 reward=0.7791926 (490.27 it/sec) -training >> step=4425900, episode=738 reward=0.7749962 (482.00 it/sec) -training >> step=4426000, episode=738 reward=0.759425 (466.37 it/sec) -training >> step=4426100, episode=738 reward=0.7533922 (502.97 it/sec) -training >> step=4426200, episode=738 reward=0.772499 (498.26 it/sec) -training >> step=4426300, episode=738 reward=0.7701774 (523.73 it/sec) -training >> step=4426400, episode=738 reward=0.7815915 (476.79 it/sec) -training >> step=4426500, episode=738 reward=0.7783371 (502.46 it/sec) -training >> step=4426600, episode=738 reward=0.7445453 (455.00 it/sec) -training >> step=4426700, episode=738 reward=0.7550417 (516.51 it/sec) -training >> step=4426800, episode=738 reward=0.7617992 (499.23 it/sec) -training >> step=4426900, episode=738 reward=0.7661023 (483.97 it/sec) -training >> step=4427000, episode=738 reward=0.7780272 (502.05 it/sec) -training >> step=4427100, episode=738 reward=0.7686636 (502.34 it/sec) -training >> step=4427200, episode=738 reward=0.7647352 (459.92 it/sec) -training >> step=4427300, episode=739 reward=0.7682894 (93.78 it/sec) -training >> step=4427400, episode=739 reward=0.7848956 (441.15 it/sec) -training >> step=4427500, episode=739 reward=0.7788742 (425.07 it/sec) -training >> step=4427600, episode=739 reward=0.7794098 (478.47 it/sec) -training >> step=4427700, episode=739 reward=0.7783854 (489.87 it/sec) -training >> step=4427800, episode=739 reward=0.7622148 (479.31 it/sec) -training >> step=4427900, episode=739 reward=0.7820319 (487.60 it/sec) -training >> step=4428000, episode=739 reward=0.7752604 (528.94 it/sec) -training >> step=4428100, episode=739 reward=0.7583648 (515.98 it/sec) -training >> step=4428200, episode=739 reward=0.7852158 (513.74 it/sec) -training >> step=4428300, episode=739 reward=0.7589942 (499.77 it/sec) -training >> step=4428400, episode=739 reward=0.7734387 (527.86 it/sec) -training >> step=4428500, episode=739 reward=0.7609736 (502.12 it/sec) -training >> step=4428600, episode=739 reward=0.7895905 (543.78 it/sec) -training >> step=4428700, episode=739 reward=0.7860296 (507.65 it/sec) -training >> step=4428800, episode=739 reward=0.7839019 (509.04 it/sec) -training >> step=4428900, episode=739 reward=0.7569863 (523.46 it/sec) -training >> step=4429000, episode=739 reward=0.786046 (534.23 it/sec) -training >> step=4429100, episode=739 reward=0.763459 (494.41 it/sec) -training >> step=4429200, episode=739 reward=0.7662774 (532.02 it/sec) -training >> step=4429300, episode=739 reward=0.764739 (482.17 it/sec) -training >> step=4429400, episode=739 reward=0.7607426 (513.06 it/sec) -training >> step=4429500, episode=739 reward=0.8056085 (485.96 it/sec) -training >> step=4429600, episode=739 reward=0.7772263 (498.36 it/sec) -training >> step=4429700, episode=739 reward=0.7822394 (516.80 it/sec) -training >> step=4429800, episode=739 reward=0.7952914 (504.75 it/sec) -training >> step=4429900, episode=739 reward=0.7917168 (501.43 it/sec) -training >> step=4430000, episode=739 reward=0.7635316 (491.45 it/sec) -training >> step=4430100, episode=739 reward=0.774984 (481.65 it/sec) -training >> step=4430200, episode=739 reward=0.7678479 (513.76 it/sec) -training >> step=4430300, episode=739 reward=0.7793277 (503.81 it/sec) -training >> step=4430400, episode=739 reward=0.7588544 (464.10 it/sec) -training >> step=4430500, episode=739 reward=0.7881948 (505.01 it/sec) -training >> step=4430600, episode=739 reward=0.7793511 (490.28 it/sec) -training >> step=4430700, episode=739 reward=0.747248 (505.75 it/sec) -training >> step=4430800, episode=739 reward=0.7751363 (513.43 it/sec) -training >> step=4430900, episode=739 reward=0.7630672 (497.94 it/sec) -training >> step=4431000, episode=739 reward=0.7618753 (519.56 it/sec) -training >> step=4431100, episode=739 reward=0.752876 (467.51 it/sec) -training >> step=4431200, episode=739 reward=0.78123 (519.03 it/sec) -training >> step=4431300, episode=739 reward=0.7673926 (548.55 it/sec) -training >> step=4431400, episode=739 reward=0.7918125 (451.12 it/sec) -training >> step=4431500, episode=739 reward=0.7800253 (431.58 it/sec) -training >> step=4431600, episode=739 reward=0.7571604 (495.77 it/sec) -training >> step=4431700, episode=739 reward=0.7758417 (451.21 it/sec) -training >> step=4431800, episode=739 reward=0.7643809 (509.12 it/sec) -training >> step=4431900, episode=739 reward=0.760949 (502.75 it/sec) -training >> step=4432000, episode=739 reward=0.8058249 (449.05 it/sec) -training >> step=4432100, episode=739 reward=0.7893884 (522.32 it/sec) -training >> step=4432200, episode=739 reward=0.754846 (520.48 it/sec) -training >> step=4432300, episode=739 reward=0.7756218 (506.29 it/sec) -training >> step=4432400, episode=739 reward=0.7957877 (499.93 it/sec) -training >> step=4432500, episode=739 reward=0.7762415 (519.52 it/sec) -training >> step=4432600, episode=739 reward=0.76908 (502.11 it/sec) -training >> step=4432700, episode=739 reward=0.7541986 (532.69 it/sec) -training >> step=4432800, episode=739 reward=0.7476571 (511.24 it/sec) -training >> step=4432900, episode=739 reward=0.7788938 (492.01 it/sec) -training >> step=4433000, episode=739 reward=0.7464026 (491.73 it/sec) -training >> step=4433100, episode=739 reward=0.7706586 (524.93 it/sec) -training >> step=4433200, episode=739 reward=0.7771356 (486.88 it/sec) -training >> step=4433300, episode=740 reward=0.756433 (129.04 it/sec) -training >> step=4433400, episode=740 reward=0.7809106 (391.09 it/sec) -training >> step=4433500, episode=740 reward=0.7622225 (445.18 it/sec) -training >> step=4433600, episode=740 reward=0.7739626 (466.08 it/sec) -training >> step=4433700, episode=740 reward=0.7772853 (468.18 it/sec) -training >> step=4433800, episode=740 reward=0.7855334 (522.97 it/sec) -training >> step=4433900, episode=740 reward=0.7772086 (507.18 it/sec) -training >> step=4434000, episode=740 reward=0.7793206 (460.11 it/sec) -training >> step=4434100, episode=740 reward=0.7829595 (486.58 it/sec) -training >> step=4434200, episode=740 reward=0.8087471 (450.57 it/sec) -training >> step=4434300, episode=740 reward=0.7815531 (467.75 it/sec) -training >> step=4434400, episode=740 reward=0.7742602 (507.50 it/sec) -training >> step=4434500, episode=740 reward=0.7931784 (455.91 it/sec) -training >> step=4434600, episode=740 reward=0.7802793 (521.78 it/sec) -training >> step=4434700, episode=740 reward=0.7706605 (438.12 it/sec) -training >> step=4434800, episode=740 reward=0.8004625 (455.93 it/sec) -training >> step=4434900, episode=740 reward=0.7844086 (493.47 it/sec) -training >> step=4435000, episode=740 reward=0.7852582 (475.27 it/sec) -training >> step=4435100, episode=740 reward=0.790382 (458.02 it/sec) -training >> step=4435200, episode=740 reward=0.7805833 (418.23 it/sec) -training >> step=4435300, episode=740 reward=0.7551645 (472.92 it/sec) -training >> step=4435400, episode=740 reward=0.7691133 (466.56 it/sec) -training >> step=4435500, episode=740 reward=0.75892 (469.02 it/sec) -training >> step=4435600, episode=740 reward=0.7772051 (426.77 it/sec) -training >> step=4435700, episode=740 reward=0.7636259 (467.17 it/sec) -training >> step=4435800, episode=740 reward=0.7721182 (485.55 it/sec) -training >> step=4435900, episode=740 reward=0.7900507 (455.51 it/sec) -training >> step=4436000, episode=740 reward=0.7757326 (477.00 it/sec) -training >> step=4436100, episode=740 reward=0.7658499 (462.94 it/sec) -training >> step=4436200, episode=740 reward=0.7875924 (491.41 it/sec) -training >> step=4436300, episode=740 reward=0.773981 (409.64 it/sec) -training >> step=4436400, episode=740 reward=0.7745893 (497.08 it/sec) -training >> step=4436500, episode=740 reward=0.7772626 (488.12 it/sec) -training >> step=4436600, episode=740 reward=0.7703107 (468.14 it/sec) -training >> step=4436700, episode=740 reward=0.7681285 (486.56 it/sec) -training >> step=4436800, episode=740 reward=0.7760174 (449.96 it/sec) -training >> step=4436900, episode=740 reward=0.7789456 (461.43 it/sec) -training >> step=4437000, episode=740 reward=0.7685388 (473.29 it/sec) -training >> step=4437100, episode=740 reward=0.7817752 (461.65 it/sec) -training >> step=4437200, episode=740 reward=0.7874963 (474.60 it/sec) -training >> step=4437300, episode=740 reward=0.7793394 (482.66 it/sec) -training >> step=4437400, episode=740 reward=0.7626916 (504.45 it/sec) -training >> step=4437500, episode=740 reward=0.7581129 (479.47 it/sec) -training >> step=4437600, episode=740 reward=0.8130364 (497.42 it/sec) -training >> step=4437700, episode=740 reward=0.7719137 (447.93 it/sec) -training >> step=4437800, episode=740 reward=0.7853662 (457.00 it/sec) -training >> step=4437900, episode=740 reward=0.7649661 (496.90 it/sec) -training >> step=4438000, episode=740 reward=0.7739841 (429.91 it/sec) -training >> step=4438100, episode=740 reward=0.7894419 (404.98 it/sec) -training >> step=4438200, episode=740 reward=0.7888888 (448.71 it/sec) -training >> step=4438300, episode=740 reward=0.7865699 (486.64 it/sec) -training >> step=4438400, episode=740 reward=0.7738639 (498.99 it/sec) -training >> step=4438500, episode=740 reward=0.78472 (522.51 it/sec) -training >> step=4438600, episode=740 reward=0.7735232 (526.13 it/sec) -training >> step=4438700, episode=740 reward=0.7657371 (554.33 it/sec) -training >> step=4438800, episode=740 reward=0.7775899 (509.89 it/sec) -training >> step=4438900, episode=740 reward=0.7791785 (461.43 it/sec) -training >> step=4439000, episode=740 reward=0.80064 (488.89 it/sec) -training >> step=4439100, episode=740 reward=0.7744269 (539.81 it/sec) -training >> step=4439200, episode=740 reward=0.7664018 (530.01 it/sec) -training >> step=4439300, episode=741 reward=0.7595438 (127.38 it/sec) -training >> step=4439400, episode=741 reward=0.775022 (529.08 it/sec) -training >> step=4439500, episode=741 reward=0.7974824 (363.09 it/sec) -training >> step=4439600, episode=741 reward=0.7701507 (509.20 it/sec) -training >> step=4439700, episode=741 reward=0.7844465 (503.04 it/sec) -training >> step=4439800, episode=741 reward=0.7880403 (512.08 it/sec) -training >> step=4439900, episode=741 reward=0.7728795 (474.09 it/sec) -training >> step=4440000, episode=741 reward=0.7782952 (520.14 it/sec) -training >> step=4440100, episode=741 reward=0.7788119 (550.46 it/sec) -training >> step=4440200, episode=741 reward=0.7682462 (531.08 it/sec) -training >> step=4440300, episode=741 reward=0.7928705 (522.40 it/sec) -training >> step=4440400, episode=741 reward=0.7910923 (534.51 it/sec) -training >> step=4440500, episode=741 reward=0.7784896 (509.39 it/sec) -training >> step=4440600, episode=741 reward=0.7808999 (501.08 it/sec) -training >> step=4440700, episode=741 reward=0.7934268 (482.78 it/sec) -training >> step=4440800, episode=741 reward=0.7710403 (476.01 it/sec) -training >> step=4440900, episode=741 reward=0.7714823 (552.75 it/sec) -training >> step=4441000, episode=741 reward=0.7889735 (499.19 it/sec) -training >> step=4441100, episode=741 reward=0.7892585 (530.00 it/sec) -training >> step=4441200, episode=741 reward=0.7921296 (451.09 it/sec) -training >> step=4441300, episode=741 reward=0.7715757 (517.07 it/sec) -training >> step=4441400, episode=741 reward=0.7812566 (528.17 it/sec) -training >> step=4441500, episode=741 reward=0.7561485 (536.90 it/sec) -training >> step=4441600, episode=741 reward=0.8026931 (495.55 it/sec) -training >> step=4441700, episode=741 reward=0.7812805 (481.42 it/sec) -training >> step=4441800, episode=741 reward=0.7940736 (465.99 it/sec) -training >> step=4441900, episode=741 reward=0.7518578 (518.90 it/sec) -training >> step=4442000, episode=741 reward=0.7965063 (545.03 it/sec) -training >> step=4442100, episode=741 reward=0.7778389 (493.13 it/sec) -training >> step=4442200, episode=741 reward=0.7808006 (517.29 it/sec) -training >> step=4442300, episode=741 reward=0.7867938 (504.71 it/sec) -training >> step=4442400, episode=741 reward=0.7795258 (502.42 it/sec) -training >> step=4442500, episode=741 reward=0.7677901 (495.61 it/sec) -training >> step=4442600, episode=741 reward=0.7735261 (567.19 it/sec) -training >> step=4442700, episode=741 reward=0.774299 (489.09 it/sec) -training >> step=4442800, episode=741 reward=0.7635459 (487.86 it/sec) -training >> step=4442900, episode=741 reward=0.8007423 (486.09 it/sec) -training >> step=4443000, episode=741 reward=0.7406242 (514.49 it/sec) -training >> step=4443100, episode=741 reward=0.7954388 (503.23 it/sec) -training >> step=4443200, episode=741 reward=0.7797536 (539.98 it/sec) -training >> step=4443300, episode=741 reward=0.7796746 (506.18 it/sec) -training >> step=4443400, episode=741 reward=0.7716133 (512.99 it/sec) -training >> step=4443500, episode=741 reward=0.7729391 (494.63 it/sec) -training >> step=4443600, episode=741 reward=0.8023235 (461.92 it/sec) -training >> step=4443700, episode=741 reward=0.7786259 (526.16 it/sec) -training >> step=4443800, episode=741 reward=0.7744929 (471.86 it/sec) -training >> step=4443900, episode=741 reward=0.7620359 (471.85 it/sec) -training >> step=4444000, episode=741 reward=0.7532188 (452.54 it/sec) -training >> step=4444100, episode=741 reward=0.7619713 (516.66 it/sec) -training >> step=4444200, episode=741 reward=0.7762513 (434.49 it/sec) -training >> step=4444300, episode=741 reward=0.7552167 (463.17 it/sec) -training >> step=4444400, episode=741 reward=0.7818877 (493.24 it/sec) -training >> step=4444500, episode=741 reward=0.7870495 (472.33 it/sec) -training >> step=4444600, episode=741 reward=0.7803132 (489.55 it/sec) -training >> step=4444700, episode=741 reward=0.76321 (508.14 it/sec) -training >> step=4444800, episode=741 reward=0.7874639 (484.57 it/sec) -training >> step=4444900, episode=741 reward=0.7895072 (496.97 it/sec) -training >> step=4445000, episode=741 reward=0.7517817 (473.58 it/sec) -training >> step=4445100, episode=741 reward=0.7820776 (502.93 it/sec) -training >> step=4445200, episode=741 reward=0.7738088 (433.98 it/sec) -training >> step=4445300, episode=742 reward=0.7675071 (91.47 it/sec) -training >> step=4445400, episode=742 reward=0.77288 (437.60 it/sec) -training >> step=4445500, episode=742 reward=0.783039 (454.73 it/sec) -training >> step=4445600, episode=742 reward=0.7748495 (429.48 it/sec) -training >> step=4445700, episode=742 reward=0.7872128 (352.71 it/sec) -training >> step=4445800, episode=742 reward=0.7777915 (492.38 it/sec) -training >> step=4445900, episode=742 reward=0.7942098 (536.92 it/sec) -training >> step=4446000, episode=742 reward=0.7821477 (501.29 it/sec) -training >> step=4446100, episode=742 reward=0.7965516 (532.86 it/sec) -training >> step=4446200, episode=742 reward=0.8003826 (521.17 it/sec) -training >> step=4446300, episode=742 reward=0.7959458 (525.08 it/sec) -training >> step=4446400, episode=742 reward=0.7610486 (475.94 it/sec) -training >> step=4446500, episode=742 reward=0.788285 (511.78 it/sec) -training >> step=4446600, episode=742 reward=0.766265 (517.82 it/sec) -training >> step=4446700, episode=742 reward=0.7734519 (514.23 it/sec) -training >> step=4446800, episode=742 reward=0.7660288 (523.76 it/sec) -training >> step=4446900, episode=742 reward=0.756241 (507.99 it/sec) -training >> step=4447000, episode=742 reward=0.7709965 (500.05 it/sec) -training >> step=4447100, episode=742 reward=0.7866723 (505.91 it/sec) -training >> step=4447200, episode=742 reward=0.7596222 (515.54 it/sec) -training >> step=4447300, episode=742 reward=0.7873353 (551.31 it/sec) -training >> step=4447400, episode=742 reward=0.8058978 (494.17 it/sec) -training >> step=4447500, episode=742 reward=0.7853038 (500.61 it/sec) -training >> step=4447600, episode=742 reward=0.7766212 (539.46 it/sec) -training >> step=4447700, episode=742 reward=0.7917402 (524.45 it/sec) -training >> step=4447800, episode=742 reward=0.7953905 (529.84 it/sec) -training >> step=4447900, episode=742 reward=0.7799737 (506.24 it/sec) -training >> step=4448000, episode=742 reward=0.7763616 (510.42 it/sec) -training >> step=4448100, episode=742 reward=0.7795237 (504.44 it/sec) -training >> step=4448200, episode=742 reward=0.7806976 (518.39 it/sec) -training >> step=4448300, episode=742 reward=0.7833821 (526.91 it/sec) -training >> step=4448400, episode=742 reward=0.7781823 (497.49 it/sec) -training >> step=4448500, episode=742 reward=0.7812015 (491.94 it/sec) -training >> step=4448600, episode=742 reward=0.7838281 (477.95 it/sec) -training >> step=4448700, episode=742 reward=0.7553284 (497.49 it/sec) -training >> step=4448800, episode=742 reward=0.7654632 (458.70 it/sec) -training >> step=4448900, episode=742 reward=0.7726764 (475.63 it/sec) -training >> step=4449000, episode=742 reward=0.7921727 (475.10 it/sec) -training >> step=4449100, episode=742 reward=0.767152 (499.37 it/sec) -training >> step=4449200, episode=742 reward=0.7744114 (471.55 it/sec) -training >> step=4449300, episode=742 reward=0.8010484 (426.96 it/sec) -training >> step=4449400, episode=742 reward=0.7692236 (443.81 it/sec) -training >> step=4449500, episode=742 reward=0.7900661 (501.81 it/sec) -training >> step=4449600, episode=742 reward=0.7806384 (488.11 it/sec) -training >> step=4449700, episode=742 reward=0.7670956 (418.80 it/sec) -training >> step=4449800, episode=742 reward=0.7760411 (422.83 it/sec) -training >> step=4449900, episode=742 reward=0.7680809 (421.82 it/sec) -training >> step=4450000, episode=742 reward=0.7649837 (449.77 it/sec) -training >> step=4450100, episode=742 reward=0.7605664 (455.07 it/sec) -training >> step=4450200, episode=742 reward=0.7697424 (460.42 it/sec) -training >> step=4450300, episode=742 reward=0.7622045 (485.21 it/sec) -training >> step=4450400, episode=742 reward=0.7651594 (444.44 it/sec) -training >> step=4450500, episode=742 reward=0.7786701 (454.07 it/sec) -training >> step=4450600, episode=742 reward=0.7734863 (458.39 it/sec) -training >> step=4450700, episode=742 reward=0.7645597 (502.25 it/sec) -training >> step=4450800, episode=742 reward=0.7705015 (507.15 it/sec) -training >> step=4450900, episode=742 reward=0.7622871 (513.80 it/sec) -training >> step=4451000, episode=742 reward=0.7594284 (486.04 it/sec) -training >> step=4451100, episode=742 reward=0.7674532 (525.74 it/sec) -training >> step=4451200, episode=742 reward=0.7501547 (521.43 it/sec) -training >> step=4451300, episode=743 reward=0.7824548 (91.73 it/sec) -training >> step=4451400, episode=743 reward=0.7678982 (452.99 it/sec) -training >> step=4451500, episode=743 reward=0.7681963 (463.18 it/sec) -training >> step=4451600, episode=743 reward=0.7792814 (393.41 it/sec) -training >> step=4451700, episode=743 reward=0.7736769 (415.72 it/sec) -training >> step=4451800, episode=743 reward=0.7918104 (477.52 it/sec) -training >> step=4451900, episode=743 reward=0.796051 (346.00 it/sec) -training >> step=4452000, episode=743 reward=0.778881 (444.91 it/sec) -training >> step=4452100, episode=743 reward=0.7962945 (465.20 it/sec) -training >> step=4452200, episode=743 reward=0.7931111 (448.34 it/sec) -training >> step=4452300, episode=743 reward=0.788694 (438.89 it/sec) -training >> step=4452400, episode=743 reward=0.7686368 (514.71 it/sec) -training >> step=4452500, episode=743 reward=0.8161342 (497.09 it/sec) -training >> step=4452600, episode=743 reward=0.7863751 (513.90 it/sec) -training >> step=4452700, episode=743 reward=0.7820331 (483.82 it/sec) -training >> step=4452800, episode=743 reward=0.7840425 (517.83 it/sec) -training >> step=4452900, episode=743 reward=0.7760404 (472.03 it/sec) -training >> step=4453000, episode=743 reward=0.7698234 (437.66 it/sec) -training >> step=4453100, episode=743 reward=0.7859106 (450.62 it/sec) -training >> step=4453200, episode=743 reward=0.7765751 (472.02 it/sec) -training >> step=4453300, episode=743 reward=0.7617605 (505.42 it/sec) -training >> step=4453400, episode=743 reward=0.7506047 (488.57 it/sec) -training >> step=4453500, episode=743 reward=0.773463 (493.40 it/sec) -training >> step=4453600, episode=743 reward=0.7671856 (503.67 it/sec) -training >> step=4453700, episode=743 reward=0.7593151 (526.88 it/sec) -training >> step=4453800, episode=743 reward=0.790029 (441.05 it/sec) -training >> step=4453900, episode=743 reward=0.7823036 (482.28 it/sec) -training >> step=4454000, episode=743 reward=0.770419 (525.95 it/sec) -training >> step=4454100, episode=743 reward=0.7607256 (476.62 it/sec) -training >> step=4454200, episode=743 reward=0.7902052 (484.53 it/sec) -training >> step=4454300, episode=743 reward=0.7624213 (507.85 it/sec) -training >> step=4454400, episode=743 reward=0.7673034 (533.51 it/sec) -training >> step=4454500, episode=743 reward=0.7725431 (504.89 it/sec) -training >> step=4454600, episode=743 reward=0.780616 (495.21 it/sec) -training >> step=4454700, episode=743 reward=0.777844 (547.37 it/sec) -training >> step=4454800, episode=743 reward=0.7941119 (514.86 it/sec) -training >> step=4454900, episode=743 reward=0.7770472 (491.59 it/sec) -training >> step=4455000, episode=743 reward=0.7893069 (501.07 it/sec) -training >> step=4455100, episode=743 reward=0.7779657 (538.80 it/sec) -training >> step=4455200, episode=743 reward=0.7630494 (522.41 it/sec) -training >> step=4455300, episode=743 reward=0.777172 (529.53 it/sec) -training >> step=4455400, episode=743 reward=0.7568703 (508.82 it/sec) -training >> step=4455500, episode=743 reward=0.7804329 (539.03 it/sec) -training >> step=4455600, episode=743 reward=0.775353 (490.52 it/sec) -training >> step=4455700, episode=743 reward=0.793542 (521.67 it/sec) -training >> step=4455800, episode=743 reward=0.7894972 (543.15 it/sec) -training >> step=4455900, episode=743 reward=0.7720549 (527.07 it/sec) -training >> step=4456000, episode=743 reward=0.7816488 (456.37 it/sec) -training >> step=4456100, episode=743 reward=0.7738407 (487.96 it/sec) -training >> step=4456200, episode=743 reward=0.7774063 (529.36 it/sec) -training >> step=4456300, episode=743 reward=0.7863302 (538.35 it/sec) -training >> step=4456400, episode=743 reward=0.7812026 (485.48 it/sec) -training >> step=4456500, episode=743 reward=0.764903 (496.83 it/sec) -training >> step=4456600, episode=743 reward=0.7619766 (469.06 it/sec) -training >> step=4456700, episode=743 reward=0.7700307 (529.91 it/sec) -training >> step=4456800, episode=743 reward=0.766794 (501.30 it/sec) -training >> step=4456900, episode=743 reward=0.7765573 (540.05 it/sec) -training >> step=4457000, episode=743 reward=0.7739929 (494.42 it/sec) -training >> step=4457100, episode=743 reward=0.7631941 (507.20 it/sec) -training >> step=4457200, episode=743 reward=0.7691764 (518.39 it/sec) -training >> step=4457300, episode=744 reward=0.7878035 (141.33 it/sec) -training >> step=4457400, episode=744 reward=0.7984251 (505.37 it/sec) -training >> step=4457500, episode=744 reward=0.7891473 (510.28 it/sec) -training >> step=4457600, episode=744 reward=0.7406077 (508.71 it/sec) -training >> step=4457700, episode=744 reward=0.7828954 (517.45 it/sec) -training >> step=4457800, episode=744 reward=0.7991728 (454.85 it/sec) -training >> step=4457900, episode=744 reward=0.7645278 (445.13 it/sec) -training >> step=4458000, episode=744 reward=0.7733692 (364.18 it/sec) -training >> step=4458100, episode=744 reward=0.7826916 (528.29 it/sec) -training >> step=4458200, episode=744 reward=0.7768924 (505.70 it/sec) -training >> step=4458300, episode=744 reward=0.7819381 (516.75 it/sec) -training >> step=4458400, episode=744 reward=0.7651453 (499.38 it/sec) -training >> step=4458500, episode=744 reward=0.7946646 (492.86 it/sec) -training >> step=4458600, episode=744 reward=0.7765107 (454.07 it/sec) -training >> step=4458700, episode=744 reward=0.7783687 (498.27 it/sec) -training >> step=4458800, episode=744 reward=0.7753785 (484.34 it/sec) -training >> step=4458900, episode=744 reward=0.7791196 (408.39 it/sec) -training >> step=4459000, episode=744 reward=0.7759056 (448.00 it/sec) -training >> step=4459100, episode=744 reward=0.7896656 (434.82 it/sec) -training >> step=4459200, episode=744 reward=0.7792553 (496.15 it/sec) -training >> step=4459300, episode=744 reward=0.7607006 (514.74 it/sec) -training >> step=4459400, episode=744 reward=0.7675624 (491.20 it/sec) -training >> step=4459500, episode=744 reward=0.7765999 (479.09 it/sec) -training >> step=4459600, episode=744 reward=0.7724992 (493.21 it/sec) -training >> step=4459700, episode=744 reward=0.7697961 (415.22 it/sec) -training >> step=4459800, episode=744 reward=0.7835805 (463.56 it/sec) -training >> step=4459900, episode=744 reward=0.7619616 (512.16 it/sec) -training >> step=4460000, episode=744 reward=0.7996925 (508.42 it/sec) -training >> step=4460100, episode=744 reward=0.7799872 (486.99 it/sec) -training >> step=4460200, episode=744 reward=0.7847681 (516.54 it/sec) -training >> step=4460300, episode=744 reward=0.7661797 (439.62 it/sec) -training >> step=4460400, episode=744 reward=0.7654775 (452.11 it/sec) -training >> step=4460500, episode=744 reward=0.7792553 (515.95 it/sec) -training >> step=4460600, episode=744 reward=0.7864928 (482.44 it/sec) -training >> step=4460700, episode=744 reward=0.7901089 (521.38 it/sec) -training >> step=4460800, episode=744 reward=0.7771948 (492.75 it/sec) -training >> step=4460900, episode=744 reward=0.7836038 (481.84 it/sec) -training >> step=4461000, episode=744 reward=0.7646101 (521.32 it/sec) -training >> step=4461100, episode=744 reward=0.7711526 (487.93 it/sec) -training >> step=4461200, episode=744 reward=0.7950225 (449.87 it/sec) -training >> step=4461300, episode=744 reward=0.7711654 (426.10 it/sec) -training >> step=4461400, episode=744 reward=0.7846853 (444.02 it/sec) -training >> step=4461500, episode=744 reward=0.7637432 (483.23 it/sec) -training >> step=4461600, episode=744 reward=0.7801372 (536.63 it/sec) -training >> step=4461700, episode=744 reward=0.803478 (506.43 it/sec) -training >> step=4461800, episode=744 reward=0.7904756 (460.46 it/sec) -training >> step=4461900, episode=744 reward=0.7771747 (499.39 it/sec) -training >> step=4462000, episode=744 reward=0.7706068 (488.61 it/sec) -training >> step=4462100, episode=744 reward=0.7764004 (501.46 it/sec) -training >> step=4462200, episode=744 reward=0.7846633 (455.56 it/sec) -training >> step=4462300, episode=744 reward=0.7553269 (513.35 it/sec) -training >> step=4462400, episode=744 reward=0.7748328 (481.05 it/sec) -training >> step=4462500, episode=744 reward=0.7561498 (483.30 it/sec) -training >> step=4462600, episode=744 reward=0.7729194 (475.04 it/sec) -training >> step=4462700, episode=744 reward=0.8037432 (460.64 it/sec) -training >> step=4462800, episode=744 reward=0.7834729 (449.56 it/sec) -training >> step=4462900, episode=744 reward=0.759322 (501.83 it/sec) -training >> step=4463000, episode=744 reward=0.780827 (463.71 it/sec) -training >> step=4463100, episode=744 reward=0.7825257 (478.71 it/sec) -training >> step=4463200, episode=744 reward=0.7880591 (503.12 it/sec) -training >> step=4463300, episode=745 reward=0.7695535 (130.97 it/sec) -training >> step=4463400, episode=745 reward=0.7536176 (397.62 it/sec) -training >> step=4463500, episode=745 reward=0.78562 (433.51 it/sec) -training >> step=4463600, episode=745 reward=0.7477059 (394.56 it/sec) -training >> step=4463700, episode=745 reward=0.7723928 (434.94 it/sec) -training >> step=4463800, episode=745 reward=0.7653335 (470.15 it/sec) -training >> step=4463900, episode=745 reward=0.775655 (472.62 it/sec) -training >> step=4464000, episode=745 reward=0.7970197 (498.10 it/sec) -training >> step=4464100, episode=745 reward=0.7683472 (526.51 it/sec) -training >> step=4464200, episode=745 reward=0.7714599 (396.10 it/sec) -training >> step=4464300, episode=745 reward=0.8062134 (495.29 it/sec) -training >> step=4464400, episode=745 reward=0.7632419 (483.08 it/sec) -training >> step=4464500, episode=745 reward=0.7910488 (527.64 it/sec) -training >> step=4464600, episode=745 reward=0.7901716 (464.20 it/sec) -training >> step=4464700, episode=745 reward=0.7501525 (477.25 it/sec) -training >> step=4464800, episode=745 reward=0.7727391 (523.56 it/sec) -training >> step=4464900, episode=745 reward=0.7757828 (469.08 it/sec) -training >> step=4465000, episode=745 reward=0.7686879 (515.02 it/sec) -training >> step=4465100, episode=745 reward=0.7930766 (466.01 it/sec) -training >> step=4465200, episode=745 reward=0.7834695 (440.26 it/sec) -training >> step=4465300, episode=745 reward=0.7705015 (464.18 it/sec) -training >> step=4465400, episode=745 reward=0.7828822 (415.85 it/sec) -training >> step=4465500, episode=745 reward=0.7684828 (518.33 it/sec) -training >> step=4465600, episode=745 reward=0.7689548 (491.23 it/sec) -training >> step=4465700, episode=745 reward=0.7823271 (525.44 it/sec) -training >> step=4465800, episode=745 reward=0.7809302 (499.87 it/sec) -training >> step=4465900, episode=745 reward=0.771463 (467.68 it/sec) -training >> step=4466000, episode=745 reward=0.7695884 (476.90 it/sec) -training >> step=4466100, episode=745 reward=0.7730993 (458.34 it/sec) -training >> step=4466200, episode=745 reward=0.7739437 (499.54 it/sec) -training >> step=4466300, episode=745 reward=0.7511805 (501.04 it/sec) -training >> step=4466400, episode=745 reward=0.7792051 (484.15 it/sec) -training >> step=4466500, episode=745 reward=0.7901303 (482.94 it/sec) -training >> step=4466600, episode=745 reward=0.7690198 (489.82 it/sec) -training >> step=4466700, episode=745 reward=0.7637208 (523.29 it/sec) -training >> step=4466800, episode=745 reward=0.7667096 (494.02 it/sec) -training >> step=4466900, episode=745 reward=0.7791563 (504.58 it/sec) -training >> step=4467000, episode=745 reward=0.7684308 (459.38 it/sec) -training >> step=4467100, episode=745 reward=0.8056123 (458.03 it/sec) -training >> step=4467200, episode=745 reward=0.7620614 (420.17 it/sec) -training >> step=4467300, episode=745 reward=0.7931396 (476.22 it/sec) -training >> step=4467400, episode=745 reward=0.7813901 (451.42 it/sec) -training >> step=4467500, episode=745 reward=0.7774912 (514.18 it/sec) -training >> step=4467600, episode=745 reward=0.7802898 (484.45 it/sec) -training >> step=4467700, episode=745 reward=0.7632899 (552.16 it/sec) -training >> step=4467800, episode=745 reward=0.7698848 (467.25 it/sec) -training >> step=4467900, episode=745 reward=0.7929385 (482.24 it/sec) -training >> step=4468000, episode=745 reward=0.7776315 (501.41 it/sec) -training >> step=4468100, episode=745 reward=0.7832056 (544.26 it/sec) -training >> step=4468200, episode=745 reward=0.7665808 (509.89 it/sec) -training >> step=4468300, episode=745 reward=0.7814419 (499.79 it/sec) -training >> step=4468400, episode=745 reward=0.7679611 (497.47 it/sec) -training >> step=4468500, episode=745 reward=0.7754318 (530.99 it/sec) -training >> step=4468600, episode=745 reward=0.7814283 (556.18 it/sec) -training >> step=4468700, episode=745 reward=0.7811763 (493.32 it/sec) -training >> step=4468800, episode=745 reward=0.7771819 (493.29 it/sec) -training >> step=4468900, episode=745 reward=0.7537163 (524.08 it/sec) -training >> step=4469000, episode=745 reward=0.7659636 (452.02 it/sec) -training >> step=4469100, episode=745 reward=0.7806617 (477.16 it/sec) -training >> step=4469200, episode=745 reward=0.7837711 (512.34 it/sec) -training >> step=4469300, episode=746 reward=0.7796999 (114.35 it/sec) -training >> step=4469400, episode=746 reward=0.7788076 (465.24 it/sec) -training >> step=4469500, episode=746 reward=0.7525283 (481.75 it/sec) -training >> step=4469600, episode=746 reward=0.7784877 (502.38 it/sec) -training >> step=4469700, episode=746 reward=0.7725815 (516.89 it/sec) -training >> step=4469800, episode=746 reward=0.7900296 (528.14 it/sec) -training >> step=4469900, episode=746 reward=0.7863678 (468.94 it/sec) -training >> step=4470000, episode=746 reward=0.788723 (498.10 it/sec) -training >> step=4470100, episode=746 reward=0.7794929 (462.94 it/sec) -training >> step=4470200, episode=746 reward=0.773729 (500.91 it/sec) -training >> step=4470300, episode=746 reward=0.7711529 (369.06 it/sec) -training >> step=4470400, episode=746 reward=0.7873905 (519.54 it/sec) -training >> step=4470500, episode=746 reward=0.7675858 (521.66 it/sec) -training >> step=4470600, episode=746 reward=0.7709234 (508.84 it/sec) -training >> step=4470700, episode=746 reward=0.7872847 (496.38 it/sec) -training >> step=4470800, episode=746 reward=0.7717146 (430.02 it/sec) -training >> step=4470900, episode=746 reward=0.7939138 (527.00 it/sec) -training >> step=4471000, episode=746 reward=0.761619 (514.57 it/sec) -training >> step=4471100, episode=746 reward=0.754969 (490.03 it/sec) -training >> step=4471200, episode=746 reward=0.748446 (525.72 it/sec) -training >> step=4471300, episode=746 reward=0.762353 (451.41 it/sec) -training >> step=4471400, episode=746 reward=0.7613624 (526.26 it/sec) -training >> step=4471500, episode=746 reward=0.7624961 (501.60 it/sec) -training >> step=4471600, episode=746 reward=0.773288 (484.35 it/sec) -training >> step=4471700, episode=746 reward=0.7826256 (507.16 it/sec) -training >> step=4471800, episode=746 reward=0.774561 (463.60 it/sec) -training >> step=4471900, episode=746 reward=0.7915323 (486.05 it/sec) -training >> step=4472000, episode=746 reward=0.7574545 (493.27 it/sec) -training >> step=4472100, episode=746 reward=0.7888189 (491.03 it/sec) -training >> step=4472200, episode=746 reward=0.7738717 (487.76 it/sec) -training >> step=4472300, episode=746 reward=0.7573102 (499.30 it/sec) -training >> step=4472400, episode=746 reward=0.7801062 (492.01 it/sec) -training >> step=4472500, episode=746 reward=0.7748355 (444.51 it/sec) -training >> step=4472600, episode=746 reward=0.7654066 (494.64 it/sec) -training >> step=4472700, episode=746 reward=0.7536495 (458.66 it/sec) -training >> step=4472800, episode=746 reward=0.8027646 (460.95 it/sec) -training >> step=4472900, episode=746 reward=0.7817854 (478.04 it/sec) -training >> step=4473000, episode=746 reward=0.7760553 (495.94 it/sec) -training >> step=4473100, episode=746 reward=0.7895257 (496.15 it/sec) -training >> step=4473200, episode=746 reward=0.7784395 (485.40 it/sec) -training >> step=4473300, episode=746 reward=0.7821746 (445.51 it/sec) -training >> step=4473400, episode=746 reward=0.7996499 (464.59 it/sec) -training >> step=4473500, episode=746 reward=0.7778323 (474.07 it/sec) -training >> step=4473600, episode=746 reward=0.7620863 (461.89 it/sec) -training >> step=4473700, episode=746 reward=0.7874911 (488.27 it/sec) -training >> step=4473800, episode=746 reward=0.7616571 (489.15 it/sec) -training >> step=4473900, episode=746 reward=0.7772934 (513.89 it/sec) -training >> step=4474000, episode=746 reward=0.7654615 (468.74 it/sec) -training >> step=4474100, episode=746 reward=0.7484758 (452.84 it/sec) -training >> step=4474200, episode=746 reward=0.7816012 (483.59 it/sec) -training >> step=4474300, episode=746 reward=0.7803563 (476.89 it/sec) -training >> step=4474400, episode=746 reward=0.7684923 (489.99 it/sec) -training >> step=4474500, episode=746 reward=0.7936524 (459.72 it/sec) -training >> step=4474600, episode=746 reward=0.7749918 (439.53 it/sec) -training >> step=4474700, episode=746 reward=0.7765343 (470.11 it/sec) -training >> step=4474800, episode=746 reward=0.7916817 (449.04 it/sec) -training >> step=4474900, episode=746 reward=0.7582041 (497.28 it/sec) -training >> step=4475000, episode=746 reward=0.7757661 (466.63 it/sec) -training >> step=4475100, episode=746 reward=0.7722999 (453.84 it/sec) -training >> step=4475200, episode=746 reward=0.7770921 (465.87 it/sec) -training >> step=4475300, episode=747 reward=0.7629856 (96.96 it/sec) -training >> step=4475400, episode=747 reward=0.7660314 (442.52 it/sec) -training >> step=4475500, episode=747 reward=0.7655251 (422.61 it/sec) -training >> step=4475600, episode=747 reward=0.7727407 (479.85 it/sec) -training >> step=4475700, episode=747 reward=0.7750907 (470.74 it/sec) -training >> step=4475800, episode=747 reward=0.7922809 (481.76 it/sec) -training >> step=4475900, episode=747 reward=0.7766109 (496.13 it/sec) -training >> step=4476000, episode=747 reward=0.7665001 (466.99 it/sec) -training >> step=4476100, episode=747 reward=0.7570879 (421.32 it/sec) -training >> step=4476200, episode=747 reward=0.7727151 (468.65 it/sec) -training >> step=4476300, episode=747 reward=0.7929128 (471.11 it/sec) -training >> step=4476400, episode=747 reward=0.759227 (421.99 it/sec) -training >> step=4476500, episode=747 reward=0.7703491 (330.80 it/sec) -training >> step=4476600, episode=747 reward=0.7629079 (468.30 it/sec) -training >> step=4476700, episode=747 reward=0.7821612 (463.71 it/sec) -training >> step=4476800, episode=747 reward=0.7802915 (444.88 it/sec) -training >> step=4476900, episode=747 reward=0.765746 (430.19 it/sec) -training >> step=4477000, episode=747 reward=0.7844475 (452.00 it/sec) -training >> step=4477100, episode=747 reward=0.7867386 (456.93 it/sec) -training >> step=4477200, episode=747 reward=0.7751893 (493.18 it/sec) -training >> step=4477300, episode=747 reward=0.7719297 (465.88 it/sec) -training >> step=4477400, episode=747 reward=0.8038784 (472.62 it/sec) -training >> step=4477500, episode=747 reward=0.7821665 (474.04 it/sec) -training >> step=4477600, episode=747 reward=0.7896718 (442.67 it/sec) -training >> step=4477700, episode=747 reward=0.7569845 (411.92 it/sec) -training >> step=4477800, episode=747 reward=0.7799162 (478.40 it/sec) -training >> step=4477900, episode=747 reward=0.8012751 (407.81 it/sec) -training >> step=4478000, episode=747 reward=0.7742271 (469.59 it/sec) -training >> step=4478100, episode=747 reward=0.7841026 (470.98 it/sec) -training >> step=4478200, episode=747 reward=0.7666891 (473.06 it/sec) -training >> step=4478300, episode=747 reward=0.7983035 (427.41 it/sec) -training >> step=4478400, episode=747 reward=0.7655037 (477.50 it/sec) -training >> step=4478500, episode=747 reward=0.7525865 (476.93 it/sec) -training >> step=4478600, episode=747 reward=0.7885954 (518.52 it/sec) -training >> step=4478700, episode=747 reward=0.7763276 (533.54 it/sec) -training >> step=4478800, episode=747 reward=0.7716578 (494.25 it/sec) -training >> step=4478900, episode=747 reward=0.7842647 (500.20 it/sec) -training >> step=4479000, episode=747 reward=0.7646261 (531.27 it/sec) -training >> step=4479100, episode=747 reward=0.7716971 (530.74 it/sec) -training >> step=4479200, episode=747 reward=0.7858619 (501.89 it/sec) -training >> step=4479300, episode=747 reward=0.7839946 (496.55 it/sec) -training >> step=4479400, episode=747 reward=0.7690137 (527.57 it/sec) -training >> step=4479500, episode=747 reward=0.7790667 (528.78 it/sec) -training >> step=4479600, episode=747 reward=0.8003649 (499.22 it/sec) -training >> step=4479700, episode=747 reward=0.7683289 (527.11 it/sec) -training >> step=4479800, episode=747 reward=0.7760612 (506.12 it/sec) -training >> step=4479900, episode=747 reward=0.7969378 (495.00 it/sec) -training >> step=4480000, episode=747 reward=0.7859907 (482.08 it/sec) -training >> step=4480100, episode=747 reward=0.7947642 (514.75 it/sec) -training >> step=4480200, episode=747 reward=0.7773931 (560.46 it/sec) -training >> step=4480300, episode=747 reward=0.7870436 (518.99 it/sec) -training >> step=4480400, episode=747 reward=0.783684 (495.67 it/sec) -training >> step=4480500, episode=747 reward=0.7592785 (518.48 it/sec) -training >> step=4480600, episode=747 reward=0.7819258 (526.45 it/sec) -training >> step=4480700, episode=747 reward=0.7916323 (479.08 it/sec) -training >> step=4480800, episode=747 reward=0.7557266 (515.34 it/sec) -training >> step=4480900, episode=747 reward=0.7738389 (491.12 it/sec) -training >> step=4481000, episode=747 reward=0.7824731 (456.36 it/sec) -training >> step=4481100, episode=747 reward=0.7808925 (535.94 it/sec) -training >> step=4481200, episode=747 reward=0.7884723 (517.59 it/sec) -training >> step=4481300, episode=748 reward=0.7971618 (131.01 it/sec) -training >> step=4481400, episode=748 reward=0.7693581 (500.76 it/sec) -training >> step=4481500, episode=748 reward=0.7832704 (489.11 it/sec) -training >> step=4481600, episode=748 reward=0.7819028 (501.12 it/sec) -training >> step=4481700, episode=748 reward=0.770019 (513.79 it/sec) -training >> step=4481800, episode=748 reward=0.7563667 (492.51 it/sec) -training >> step=4481900, episode=748 reward=0.7862564 (520.29 it/sec) -training >> step=4482000, episode=748 reward=0.7783887 (529.58 it/sec) -training >> step=4482100, episode=748 reward=0.7746812 (508.69 it/sec) -training >> step=4482200, episode=748 reward=0.7814357 (475.43 it/sec) -training >> step=4482300, episode=748 reward=0.7776907 (502.38 it/sec) -training >> step=4482400, episode=748 reward=0.7721264 (544.04 it/sec) -training >> step=4482500, episode=748 reward=0.7795381 (482.95 it/sec) -training >> step=4482600, episode=748 reward=0.7511196 (440.57 it/sec) -training >> step=4482700, episode=748 reward=0.7720082 (348.26 it/sec) -training >> step=4482800, episode=748 reward=0.7714997 (465.04 it/sec) -training >> step=4482900, episode=748 reward=0.7725006 (466.82 it/sec) -training >> step=4483000, episode=748 reward=0.7894785 (499.76 it/sec) -training >> step=4483100, episode=748 reward=0.7916164 (510.26 it/sec) -training >> step=4483200, episode=748 reward=0.7604715 (505.40 it/sec) -training >> step=4483300, episode=748 reward=0.7920241 (491.06 it/sec) -training >> step=4483400, episode=748 reward=0.7862304 (468.60 it/sec) -training >> step=4483500, episode=748 reward=0.78071 (496.70 it/sec) -training >> step=4483600, episode=748 reward=0.7920077 (467.38 it/sec) -training >> step=4483700, episode=748 reward=0.7756856 (519.77 it/sec) -training >> step=4483800, episode=748 reward=0.7665437 (495.70 it/sec) -training >> step=4483900, episode=748 reward=0.7926819 (479.36 it/sec) -training >> step=4484000, episode=748 reward=0.7751551 (474.68 it/sec) -training >> step=4484100, episode=748 reward=0.7915499 (491.84 it/sec) -training >> step=4484200, episode=748 reward=0.7843922 (456.32 it/sec) -training >> step=4484300, episode=748 reward=0.7803548 (487.08 it/sec) -training >> step=4484400, episode=748 reward=0.7772654 (495.21 it/sec) -training >> step=4484500, episode=748 reward=0.7699246 (498.16 it/sec) -training >> step=4484600, episode=748 reward=0.7691666 (441.49 it/sec) -training >> step=4484700, episode=748 reward=0.7701637 (458.73 it/sec) -training >> step=4484800, episode=748 reward=0.7855713 (500.64 it/sec) -training >> step=4484900, episode=748 reward=0.7806016 (497.90 it/sec) -training >> step=4485000, episode=748 reward=0.7752049 (503.69 it/sec) -training >> step=4485100, episode=748 reward=0.7715049 (461.00 it/sec) -training >> step=4485200, episode=748 reward=0.7754785 (488.43 it/sec) -training >> step=4485300, episode=748 reward=0.7869834 (448.65 it/sec) -training >> step=4485400, episode=748 reward=0.7662035 (497.32 it/sec) -training >> step=4485500, episode=748 reward=0.7747991 (460.91 it/sec) -training >> step=4485600, episode=748 reward=0.7727484 (467.18 it/sec) -training >> step=4485700, episode=748 reward=0.7661076 (502.11 it/sec) -training >> step=4485800, episode=748 reward=0.7714192 (513.23 it/sec) -training >> step=4485900, episode=748 reward=0.7847854 (516.84 it/sec) -training >> step=4486000, episode=748 reward=0.7754065 (525.79 it/sec) -training >> step=4486100, episode=748 reward=0.7804618 (473.70 it/sec) -training >> step=4486200, episode=748 reward=0.7511604 (517.12 it/sec) -training >> step=4486300, episode=748 reward=0.7706463 (547.35 it/sec) -training >> step=4486400, episode=748 reward=0.7709371 (498.45 it/sec) -training >> step=4486500, episode=748 reward=0.7940953 (514.44 it/sec) -training >> step=4486600, episode=748 reward=0.7717133 (524.29 it/sec) -training >> step=4486700, episode=748 reward=0.7662626 (527.92 it/sec) -training >> step=4486800, episode=748 reward=0.7627618 (478.76 it/sec) -training >> step=4486900, episode=748 reward=0.7823131 (523.96 it/sec) -training >> step=4487000, episode=748 reward=0.7675304 (518.00 it/sec) -training >> step=4487100, episode=748 reward=0.7845144 (489.73 it/sec) -training >> step=4487200, episode=748 reward=0.7753483 (470.24 it/sec) -training >> step=4487300, episode=749 reward=0.7739871 (100.75 it/sec) -training >> step=4487400, episode=749 reward=0.7669222 (480.06 it/sec) -training >> step=4487500, episode=749 reward=0.7635777 (482.43 it/sec) -training >> step=4487600, episode=749 reward=0.7633838 (472.04 it/sec) -training >> step=4487700, episode=749 reward=0.7725469 (483.75 it/sec) -training >> step=4487800, episode=749 reward=0.7783766 (441.17 it/sec) -training >> step=4487900, episode=749 reward=0.7644423 (483.80 it/sec) -training >> step=4488000, episode=749 reward=0.7945107 (525.90 it/sec) -training >> step=4488100, episode=749 reward=0.7908686 (540.00 it/sec) -training >> step=4488200, episode=749 reward=0.7660425 (491.06 it/sec) -training >> step=4488300, episode=749 reward=0.7711661 (507.28 it/sec) -training >> step=4488400, episode=749 reward=0.768594 (528.80 it/sec) -training >> step=4488500, episode=749 reward=0.7717396 (478.40 it/sec) -training >> step=4488600, episode=749 reward=0.782024 (480.85 it/sec) -training >> step=4488700, episode=749 reward=0.7812402 (474.53 it/sec) -training >> step=4488800, episode=749 reward=0.7912148 (319.53 it/sec) -training >> step=4488900, episode=749 reward=0.7708623 (525.59 it/sec) -training >> step=4489000, episode=749 reward=0.753112 (482.75 it/sec) -training >> step=4489100, episode=749 reward=0.762056 (522.18 it/sec) -training >> step=4489200, episode=749 reward=0.7678154 (485.06 it/sec) -training >> step=4489300, episode=749 reward=0.768693 (450.57 it/sec) -training >> step=4489400, episode=749 reward=0.77595 (477.15 it/sec) -training >> step=4489500, episode=749 reward=0.7772732 (492.54 it/sec) -training >> step=4489600, episode=749 reward=0.7980449 (488.42 it/sec) -training >> step=4489700, episode=749 reward=0.7628035 (490.02 it/sec) -training >> step=4489800, episode=749 reward=0.7626311 (473.35 it/sec) -training >> step=4489900, episode=749 reward=0.7633828 (495.31 it/sec) -training >> step=4490000, episode=749 reward=0.7808875 (444.30 it/sec) -training >> step=4490100, episode=749 reward=0.7799158 (464.76 it/sec) -training >> step=4490200, episode=749 reward=0.7549474 (500.17 it/sec) -training >> step=4490300, episode=749 reward=0.7897364 (472.19 it/sec) -training >> step=4490400, episode=749 reward=0.7696118 (462.15 it/sec) -training >> step=4490500, episode=749 reward=0.7759804 (506.01 it/sec) -training >> step=4490600, episode=749 reward=0.7901925 (491.56 it/sec) -training >> step=4490700, episode=749 reward=0.7554262 (436.05 it/sec) -training >> step=4490800, episode=749 reward=0.7625554 (465.13 it/sec) -training >> step=4490900, episode=749 reward=0.7900811 (512.37 it/sec) -training >> step=4491000, episode=749 reward=0.7799986 (479.10 it/sec) -training >> step=4491100, episode=749 reward=0.7949522 (449.61 it/sec) -training >> step=4491200, episode=749 reward=0.7714651 (495.58 it/sec) -training >> step=4491300, episode=749 reward=0.7682337 (501.76 it/sec) -training >> step=4491400, episode=749 reward=0.7754292 (456.10 it/sec) -training >> step=4491500, episode=749 reward=0.7751272 (494.84 it/sec) -training >> step=4491600, episode=749 reward=0.782033 (508.98 it/sec) -training >> step=4491700, episode=749 reward=0.7913269 (510.95 it/sec) -training >> step=4491800, episode=749 reward=0.7851416 (506.67 it/sec) -training >> step=4491900, episode=749 reward=0.7865771 (486.55 it/sec) -training >> step=4492000, episode=749 reward=0.7726871 (518.44 it/sec) -training >> step=4492100, episode=749 reward=0.7742212 (522.32 it/sec) -training >> step=4492200, episode=749 reward=0.7644838 (506.61 it/sec) -training >> step=4492300, episode=749 reward=0.7514127 (544.37 it/sec) -training >> step=4492400, episode=749 reward=0.7579224 (506.78 it/sec) -training >> step=4492500, episode=749 reward=0.78418 (545.40 it/sec) -training >> step=4492600, episode=749 reward=0.7837167 (530.20 it/sec) -training >> step=4492700, episode=749 reward=0.78199 (530.32 it/sec) -training >> step=4492800, episode=749 reward=0.7596182 (482.66 it/sec) -training >> step=4492900, episode=749 reward=0.7694108 (486.79 it/sec) -training >> step=4493000, episode=749 reward=0.7935075 (530.73 it/sec) -training >> step=4493100, episode=749 reward=0.7553003 (511.79 it/sec) -training >> step=4493200, episode=749 reward=0.7483821 (532.13 it/sec) -training >> step=4493300, episode=750 reward=0.7709378 (115.64 it/sec) -training >> step=4493400, episode=750 reward=0.77956 (425.86 it/sec) -training >> step=4493500, episode=750 reward=0.7786753 (501.78 it/sec) -training >> step=4493600, episode=750 reward=0.7576269 (468.27 it/sec) -training >> step=4493700, episode=750 reward=0.7788891 (494.33 it/sec) -training >> step=4493800, episode=750 reward=0.7838901 (428.93 it/sec) -training >> step=4493900, episode=750 reward=0.7932603 (500.76 it/sec) -training >> step=4494000, episode=750 reward=0.7736507 (438.27 it/sec) -training >> step=4494100, episode=750 reward=0.7767082 (464.39 it/sec) -training >> step=4494200, episode=750 reward=0.7680251 (473.74 it/sec) -training >> step=4494300, episode=750 reward=0.7867798 (502.69 it/sec) -training >> step=4494400, episode=750 reward=0.7887788 (456.82 it/sec) -training >> step=4494500, episode=750 reward=0.7702312 (481.74 it/sec) -training >> step=4494600, episode=750 reward=0.7643285 (431.75 it/sec) -training >> step=4494700, episode=750 reward=0.7934119 (459.88 it/sec) -training >> step=4494800, episode=750 reward=0.7722385 (448.03 it/sec) -training >> step=4494900, episode=750 reward=0.7941192 (479.77 it/sec) -training >> step=4495000, episode=750 reward=0.7804306 (508.57 it/sec) -training >> step=4495100, episode=750 reward=0.7704904 (344.78 it/sec) -training >> step=4495200, episode=750 reward=0.7768953 (479.76 it/sec) -training >> step=4495300, episode=750 reward=0.7752808 (470.67 it/sec) -training >> step=4495400, episode=750 reward=0.7816901 (465.95 it/sec) -training >> step=4495500, episode=750 reward=0.7775568 (473.71 it/sec) -training >> step=4495600, episode=750 reward=0.7869973 (500.92 it/sec) -training >> step=4495700, episode=750 reward=0.7828948 (522.00 it/sec) -training >> step=4495800, episode=750 reward=0.7866228 (479.81 it/sec) -training >> step=4495900, episode=750 reward=0.7736374 (487.20 it/sec) -training >> step=4496000, episode=750 reward=0.7803993 (467.23 it/sec) -training >> step=4496100, episode=750 reward=0.7986779 (468.77 it/sec) -training >> step=4496200, episode=750 reward=0.7947536 (520.39 it/sec) -training >> step=4496300, episode=750 reward=0.7946117 (466.92 it/sec) -training >> step=4496400, episode=750 reward=0.7670166 (467.11 it/sec) -training >> step=4496500, episode=750 reward=0.7527659 (477.25 it/sec) -training >> step=4496600, episode=750 reward=0.7885139 (474.09 it/sec) -training >> step=4496700, episode=750 reward=0.7702966 (463.53 it/sec) -training >> step=4496800, episode=750 reward=0.7895246 (489.57 it/sec) -training >> step=4496900, episode=750 reward=0.7843139 (485.70 it/sec) -training >> step=4497000, episode=750 reward=0.7667383 (481.48 it/sec) -training >> step=4497100, episode=750 reward=0.7913325 (502.67 it/sec) -training >> step=4497200, episode=750 reward=0.7712796 (510.12 it/sec) -training >> step=4497300, episode=750 reward=0.763559 (452.55 it/sec) -training >> step=4497400, episode=750 reward=0.7975916 (504.41 it/sec) -training >> step=4497500, episode=750 reward=0.7701134 (511.18 it/sec) -training >> step=4497600, episode=750 reward=0.7808813 (508.30 it/sec) -training >> step=4497700, episode=750 reward=0.7807288 (512.91 it/sec) -training >> step=4497800, episode=750 reward=0.7829346 (517.57 it/sec) -training >> step=4497900, episode=750 reward=0.7689328 (507.21 it/sec) -training >> step=4498000, episode=750 reward=0.7720277 (485.28 it/sec) -training >> step=4498100, episode=750 reward=0.7776391 (442.95 it/sec) -training >> step=4498200, episode=750 reward=0.7953402 (559.39 it/sec) -training >> step=4498300, episode=750 reward=0.7705394 (503.33 it/sec) -training >> step=4498400, episode=750 reward=0.7791223 (481.81 it/sec) -training >> step=4498500, episode=750 reward=0.7582926 (497.48 it/sec) -training >> step=4498600, episode=750 reward=0.7859253 (494.83 it/sec) -training >> step=4498700, episode=750 reward=0.7893755 (522.90 it/sec) -training >> step=4498800, episode=750 reward=0.7615102 (532.44 it/sec) -training >> step=4498900, episode=750 reward=0.7784358 (536.90 it/sec) -training >> step=4499000, episode=750 reward=0.7739002 (505.28 it/sec) -training >> step=4499100, episode=750 reward=0.7676504 (483.14 it/sec) -training >> step=4499200, episode=750 reward=0.7718216 (522.62 it/sec) -training >> step=4499300, episode=751 reward=0.7615243 (110.77 it/sec) -training >> step=4499400, episode=751 reward=0.7535124 (503.08 it/sec) -training >> step=4499500, episode=751 reward=0.7329863 (515.61 it/sec) -training >> step=4499600, episode=751 reward=0.7840903 (529.01 it/sec) -training >> step=4499700, episode=751 reward=0.7718099 (492.69 it/sec) -training >> step=4499800, episode=751 reward=0.7676696 (508.72 it/sec) -training >> step=4499900, episode=751 reward=0.7987312 (526.15 it/sec) -training >> step=4500000, episode=751 reward=0.7756712 (530.61 it/sec) -training >> step=4500100, episode=751 reward=0.7857364 (506.40 it/sec) -training >> step=4500200, episode=751 reward=0.7749422 (513.93 it/sec) -training >> step=4500300, episode=751 reward=0.7595351 (484.95 it/sec) -training >> step=4500400, episode=751 reward=0.7908221 (562.61 it/sec) -training >> step=4500500, episode=751 reward=0.7916518 (511.33 it/sec) -training >> step=4500600, episode=751 reward=0.7518366 (544.35 it/sec) -training >> step=4500700, episode=751 reward=0.7889553 (527.19 it/sec) -training >> step=4500800, episode=751 reward=0.7941915 (478.49 it/sec) -training >> step=4500900, episode=751 reward=0.8041403 (538.14 it/sec) -training >> step=4501000, episode=751 reward=0.7712335 (526.13 it/sec) -training >> step=4501100, episode=751 reward=0.7770213 (531.84 it/sec) -training >> step=4501200, episode=751 reward=0.7928311 (476.95 it/sec) -training >> step=4501300, episode=751 reward=0.7803218 (389.47 it/sec) -training >> step=4501400, episode=751 reward=0.7839754 (556.47 it/sec) -training >> step=4501500, episode=751 reward=0.7570685 (517.08 it/sec) -training >> step=4501600, episode=751 reward=0.7896531 (516.39 it/sec) -training >> step=4501700, episode=751 reward=0.782136 (518.42 it/sec) -training >> step=4501800, episode=751 reward=0.7688875 (518.02 it/sec) -training >> step=4501900, episode=751 reward=0.7843034 (511.13 it/sec) -training >> step=4502000, episode=751 reward=0.7805766 (560.83 it/sec) -training >> step=4502100, episode=751 reward=0.7868987 (531.30 it/sec) -training >> step=4502200, episode=751 reward=0.7865095 (541.70 it/sec) -training >> step=4502300, episode=751 reward=0.7776138 (492.96 it/sec) -training >> step=4502400, episode=751 reward=0.7836893 (462.98 it/sec) -training >> step=4502500, episode=751 reward=0.7971205 (565.47 it/sec) -training >> step=4502600, episode=751 reward=0.7662622 (544.73 it/sec) -training >> step=4502700, episode=751 reward=0.7982349 (514.15 it/sec) -training >> step=4502800, episode=751 reward=0.769327 (505.75 it/sec) -training >> step=4502900, episode=751 reward=0.7756358 (484.45 it/sec) -training >> step=4503000, episode=751 reward=0.7725399 (520.35 it/sec) -training >> step=4503100, episode=751 reward=0.7736193 (552.45 it/sec) -training >> step=4503200, episode=751 reward=0.7884655 (503.88 it/sec) -training >> step=4503300, episode=751 reward=0.7757279 (509.19 it/sec) -training >> step=4503400, episode=751 reward=0.7816112 (450.04 it/sec) -training >> step=4503500, episode=751 reward=0.7594541 (522.56 it/sec) -training >> step=4503600, episode=751 reward=0.7672986 (531.20 it/sec) -training >> step=4503700, episode=751 reward=0.7830316 (512.01 it/sec) -training >> step=4503800, episode=751 reward=0.767344 (501.41 it/sec) -training >> step=4503900, episode=751 reward=0.7790518 (531.75 it/sec) -training >> step=4504000, episode=751 reward=0.7612257 (531.45 it/sec) -training >> step=4504100, episode=751 reward=0.7550139 (534.72 it/sec) -training >> step=4504200, episode=751 reward=0.759787 (500.78 it/sec) -training >> step=4504300, episode=751 reward=0.7809361 (520.03 it/sec) -training >> step=4504400, episode=751 reward=0.7769918 (497.14 it/sec) -training >> step=4504500, episode=751 reward=0.7791648 (504.47 it/sec) -training >> step=4504600, episode=751 reward=0.7553614 (535.62 it/sec) -training >> step=4504700, episode=751 reward=0.767576 (549.64 it/sec) -training >> step=4504800, episode=751 reward=0.7628026 (521.55 it/sec) -training >> step=4504900, episode=751 reward=0.7799065 (530.29 it/sec) -training >> step=4505000, episode=751 reward=0.7615815 (527.07 it/sec) -training >> step=4505100, episode=751 reward=0.7633029 (534.86 it/sec) -training >> step=4505200, episode=751 reward=0.7841542 (475.35 it/sec) -training >> step=4505300, episode=752 reward=0.7676672 (91.03 it/sec) -training >> step=4505400, episode=752 reward=0.7678384 (523.73 it/sec) -training >> step=4505500, episode=752 reward=0.773679 (479.98 it/sec) -training >> step=4505600, episode=752 reward=0.7646768 (462.57 it/sec) -training >> step=4505700, episode=752 reward=0.7763006 (497.33 it/sec) -training >> step=4505800, episode=752 reward=0.7824194 (526.66 it/sec) -training >> step=4505900, episode=752 reward=0.7761002 (472.74 it/sec) -training >> step=4506000, episode=752 reward=0.7463 (507.31 it/sec) -training >> step=4506100, episode=752 reward=0.7703281 (476.86 it/sec) -training >> step=4506200, episode=752 reward=0.7805079 (514.61 it/sec) -training >> step=4506300, episode=752 reward=0.7711684 (488.67 it/sec) -training >> step=4506400, episode=752 reward=0.7683913 (490.42 it/sec) -training >> step=4506500, episode=752 reward=0.7741925 (480.38 it/sec) -training >> step=4506600, episode=752 reward=0.7775671 (491.55 it/sec) -training >> step=4506700, episode=752 reward=0.764525 (510.44 it/sec) -training >> step=4506800, episode=752 reward=0.7879944 (495.97 it/sec) -training >> step=4506900, episode=752 reward=0.7699881 (498.29 it/sec) -training >> step=4507000, episode=752 reward=0.7655573 (484.31 it/sec) -training >> step=4507100, episode=752 reward=0.7807075 (467.62 it/sec) -training >> step=4507200, episode=752 reward=0.7564275 (527.79 it/sec) -training >> step=4507300, episode=752 reward=0.7902786 (510.66 it/sec) -training >> step=4507400, episode=752 reward=0.7829799 (342.63 it/sec) -training >> step=4507500, episode=752 reward=0.7812448 (502.95 it/sec) -training >> step=4507600, episode=752 reward=0.7823971 (472.35 it/sec) -training >> step=4507700, episode=752 reward=0.7858874 (490.12 it/sec) -training >> step=4507800, episode=752 reward=0.7713627 (492.54 it/sec) -training >> step=4507900, episode=752 reward=0.7687991 (500.22 it/sec) -training >> step=4508000, episode=752 reward=0.7674673 (493.40 it/sec) -training >> step=4508100, episode=752 reward=0.7785499 (467.97 it/sec) -training >> step=4508200, episode=752 reward=0.783767 (502.77 it/sec) -training >> step=4508300, episode=752 reward=0.7816452 (468.70 it/sec) -training >> step=4508400, episode=752 reward=0.8040755 (462.72 it/sec) -training >> step=4508500, episode=752 reward=0.7727616 (416.26 it/sec) -training >> step=4508600, episode=752 reward=0.7670346 (445.47 it/sec) -training >> step=4508700, episode=752 reward=0.7673379 (522.02 it/sec) -training >> step=4508800, episode=752 reward=0.7637773 (438.38 it/sec) -training >> step=4508900, episode=752 reward=0.776476 (443.35 it/sec) -training >> step=4509000, episode=752 reward=0.7856833 (483.01 it/sec) -training >> step=4509100, episode=752 reward=0.7790897 (462.62 it/sec) -training >> step=4509200, episode=752 reward=0.7903544 (444.19 it/sec) -training >> step=4509300, episode=752 reward=0.7792407 (437.22 it/sec) -training >> step=4509400, episode=752 reward=0.7688616 (513.30 it/sec) -training >> step=4509500, episode=752 reward=0.7939104 (454.56 it/sec) -training >> step=4509600, episode=752 reward=0.7924106 (462.62 it/sec) -training >> step=4509700, episode=752 reward=0.7980309 (462.37 it/sec) -training >> step=4509800, episode=752 reward=0.7573923 (429.74 it/sec) -training >> step=4509900, episode=752 reward=0.7912847 (478.35 it/sec) -training >> step=4510000, episode=752 reward=0.7712353 (449.70 it/sec) -training >> step=4510100, episode=752 reward=0.7670857 (434.32 it/sec) -training >> step=4510200, episode=752 reward=0.767814 (468.50 it/sec) -training >> step=4510300, episode=752 reward=0.7684704 (448.38 it/sec) -training >> step=4510400, episode=752 reward=0.7906818 (448.55 it/sec) -training >> step=4510500, episode=752 reward=0.7796105 (458.26 it/sec) -training >> step=4510600, episode=752 reward=0.7781085 (461.70 it/sec) -training >> step=4510700, episode=752 reward=0.7878682 (457.95 it/sec) -training >> step=4510800, episode=752 reward=0.7563421 (465.79 it/sec) -training >> step=4510900, episode=752 reward=0.7666978 (455.94 it/sec) -training >> step=4511000, episode=752 reward=0.7772608 (477.64 it/sec) -training >> step=4511100, episode=752 reward=0.7869316 (448.61 it/sec) -training >> step=4511200, episode=752 reward=0.7882852 (477.79 it/sec) -training >> step=4511300, episode=753 reward=0.7873584 (92.71 it/sec) -training >> step=4511400, episode=753 reward=0.7496567 (432.61 it/sec) -training >> step=4511500, episode=753 reward=0.763857 (417.97 it/sec) -training >> step=4511600, episode=753 reward=0.7690187 (473.95 it/sec) -training >> step=4511700, episode=753 reward=0.7578158 (425.39 it/sec) -training >> step=4511800, episode=753 reward=0.7837216 (446.64 it/sec) -training >> step=4511900, episode=753 reward=0.776795 (466.81 it/sec) -training >> step=4512000, episode=753 reward=0.7658576 (446.33 it/sec) -training >> step=4512100, episode=753 reward=0.7766283 (459.03 it/sec) -training >> step=4512200, episode=753 reward=0.7936994 (459.81 it/sec) -training >> step=4512300, episode=753 reward=0.7641758 (462.37 it/sec) -training >> step=4512400, episode=753 reward=0.7706968 (425.92 it/sec) -training >> step=4512500, episode=753 reward=0.7863916 (419.62 it/sec) -training >> step=4512600, episode=753 reward=0.7733672 (422.95 it/sec) -training >> step=4512700, episode=753 reward=0.7644795 (467.93 it/sec) -training >> step=4512800, episode=753 reward=0.778393 (471.94 it/sec) -training >> step=4512900, episode=753 reward=0.7749622 (429.44 it/sec) -training >> step=4513000, episode=753 reward=0.7723445 (401.45 it/sec) -training >> step=4513100, episode=753 reward=0.784796 (459.50 it/sec) -training >> step=4513200, episode=753 reward=0.8022128 (464.77 it/sec) -training >> step=4513300, episode=753 reward=0.7798793 (462.43 it/sec) -training >> step=4513400, episode=753 reward=0.7912796 (487.58 it/sec) -training >> step=4513500, episode=753 reward=0.7981709 (468.75 it/sec) -training >> step=4513600, episode=753 reward=0.786638 (336.73 it/sec) -training >> step=4513700, episode=753 reward=0.790543 (475.15 it/sec) -training >> step=4513800, episode=753 reward=0.7763115 (509.33 it/sec) -training >> step=4513900, episode=753 reward=0.7827071 (433.49 it/sec) -training >> step=4514000, episode=753 reward=0.7870603 (463.86 it/sec) -training >> step=4514100, episode=753 reward=0.773641 (527.96 it/sec) -training >> step=4514200, episode=753 reward=0.7795736 (463.74 it/sec) -training >> step=4514300, episode=753 reward=0.7818606 (463.44 it/sec) -training >> step=4514400, episode=753 reward=0.7537375 (485.56 it/sec) -training >> step=4514500, episode=753 reward=0.7812659 (476.84 it/sec) -training >> step=4514600, episode=753 reward=0.797785 (449.80 it/sec) -training >> step=4514700, episode=753 reward=0.7767551 (474.10 it/sec) -training >> step=4514800, episode=753 reward=0.7893727 (494.75 it/sec) -training >> step=4514900, episode=753 reward=0.7705194 (457.11 it/sec) -training >> step=4515000, episode=753 reward=0.7650887 (481.82 it/sec) -training >> step=4515100, episode=753 reward=0.7602785 (422.15 it/sec) -training >> step=4515200, episode=753 reward=0.7770705 (499.06 it/sec) -training >> step=4515300, episode=753 reward=0.780805 (464.86 it/sec) -training >> step=4515400, episode=753 reward=0.7700723 (492.01 it/sec) -training >> step=4515500, episode=753 reward=0.7897731 (449.35 it/sec) -training >> step=4515600, episode=753 reward=0.7825843 (454.99 it/sec) -training >> step=4515700, episode=753 reward=0.7826139 (466.05 it/sec) -training >> step=4515800, episode=753 reward=0.7566656 (480.66 it/sec) -training >> step=4515900, episode=753 reward=0.7790061 (473.02 it/sec) -training >> step=4516000, episode=753 reward=0.7868595 (447.45 it/sec) -training >> step=4516100, episode=753 reward=0.7786756 (461.25 it/sec) -training >> step=4516200, episode=753 reward=0.773227 (413.98 it/sec) -training >> step=4516300, episode=753 reward=0.7875407 (490.60 it/sec) -training >> step=4516400, episode=753 reward=0.7684258 (452.47 it/sec) -training >> step=4516500, episode=753 reward=0.7666161 (471.70 it/sec) -training >> step=4516600, episode=753 reward=0.750801 (470.36 it/sec) -training >> step=4516700, episode=753 reward=0.7719655 (489.67 it/sec) -training >> step=4516800, episode=753 reward=0.7940267 (477.49 it/sec) -training >> step=4516900, episode=753 reward=0.7769984 (435.69 it/sec) -training >> step=4517000, episode=753 reward=0.7627413 (509.93 it/sec) -training >> step=4517100, episode=753 reward=0.7832231 (446.99 it/sec) -training >> step=4517200, episode=753 reward=0.7869036 (499.00 it/sec) -training >> step=4517300, episode=754 reward=0.7684966 (131.21 it/sec) -training >> step=4517400, episode=754 reward=0.7726243 (456.42 it/sec) -training >> step=4517500, episode=754 reward=0.7620085 (404.09 it/sec) -training >> step=4517600, episode=754 reward=0.7761645 (434.83 it/sec) -training >> step=4517700, episode=754 reward=0.8099432 (446.60 it/sec) -training >> step=4517800, episode=754 reward=0.776979 (441.17 it/sec) -training >> step=4517900, episode=754 reward=0.782132 (450.60 it/sec) -training >> step=4518000, episode=754 reward=0.7579893 (444.80 it/sec) -training >> step=4518100, episode=754 reward=0.7841957 (469.08 it/sec) -training >> step=4518200, episode=754 reward=0.7781043 (476.04 it/sec) -training >> step=4518300, episode=754 reward=0.7531061 (425.79 it/sec) -training >> step=4518400, episode=754 reward=0.7554885 (477.64 it/sec) -training >> step=4518500, episode=754 reward=0.7835937 (468.30 it/sec) -training >> step=4518600, episode=754 reward=0.7820922 (450.75 it/sec) -training >> step=4518700, episode=754 reward=0.7908378 (465.43 it/sec) -training >> step=4518800, episode=754 reward=0.7565909 (443.21 it/sec) -training >> step=4518900, episode=754 reward=0.7861755 (432.09 it/sec) -training >> step=4519000, episode=754 reward=0.7879386 (482.31 it/sec) -training >> step=4519100, episode=754 reward=0.7717584 (410.32 it/sec) -training >> step=4519200, episode=754 reward=0.7771562 (439.19 it/sec) -training >> step=4519300, episode=754 reward=0.8005874 (462.95 it/sec) -training >> step=4519400, episode=754 reward=0.7650425 (454.53 it/sec) -training >> step=4519500, episode=754 reward=0.7605039 (467.11 it/sec) -training >> step=4519600, episode=754 reward=0.7906207 (469.79 it/sec) -training >> step=4519700, episode=754 reward=0.7864274 (318.55 it/sec) -training >> step=4519800, episode=754 reward=0.7832873 (439.89 it/sec) -training >> step=4519900, episode=754 reward=0.7740343 (481.50 it/sec) -training >> step=4520000, episode=754 reward=0.7852471 (452.54 it/sec) -training >> step=4520100, episode=754 reward=0.7849463 (458.39 it/sec) -training >> step=4520200, episode=754 reward=0.7696772 (397.10 it/sec) -training >> step=4520300, episode=754 reward=0.7715061 (487.77 it/sec) -training >> step=4520400, episode=754 reward=0.7825816 (461.85 it/sec) -training >> step=4520500, episode=754 reward=0.7854179 (443.65 it/sec) -training >> step=4520600, episode=754 reward=0.7936795 (477.71 it/sec) -training >> step=4520700, episode=754 reward=0.784068 (481.91 it/sec) -training >> step=4520800, episode=754 reward=0.791503 (428.87 it/sec) -training >> step=4520900, episode=754 reward=0.7721245 (467.52 it/sec) -training >> step=4521000, episode=754 reward=0.7766315 (506.80 it/sec) -training >> step=4521100, episode=754 reward=0.7675825 (490.27 it/sec) -training >> step=4521200, episode=754 reward=0.7987548 (471.63 it/sec) -training >> step=4521300, episode=754 reward=0.7724065 (447.46 it/sec) -training >> step=4521400, episode=754 reward=0.7719557 (515.11 it/sec) -training >> step=4521500, episode=754 reward=0.7850007 (443.12 it/sec) -training >> step=4521600, episode=754 reward=0.7899453 (496.09 it/sec) -training >> step=4521700, episode=754 reward=0.7736544 (457.09 it/sec) -training >> step=4521800, episode=754 reward=0.7677168 (538.85 it/sec) -training >> step=4521900, episode=754 reward=0.7574914 (470.97 it/sec) -training >> step=4522000, episode=754 reward=0.7624862 (478.14 it/sec) -training >> step=4522100, episode=754 reward=0.7871937 (444.29 it/sec) -training >> step=4522200, episode=754 reward=0.8053425 (491.83 it/sec) -training >> step=4522300, episode=754 reward=0.7741197 (486.06 it/sec) -training >> step=4522400, episode=754 reward=0.8064425 (500.74 it/sec) -training >> step=4522500, episode=754 reward=0.7717357 (497.41 it/sec) -training >> step=4522600, episode=754 reward=0.7747599 (444.79 it/sec) -training >> step=4522700, episode=754 reward=0.7797937 (473.31 it/sec) -training >> step=4522800, episode=754 reward=0.7965311 (454.99 it/sec) -training >> step=4522900, episode=754 reward=0.7825511 (512.01 it/sec) -training >> step=4523000, episode=754 reward=0.7925995 (483.27 it/sec) -training >> step=4523100, episode=754 reward=0.7758485 (469.66 it/sec) -training >> step=4523200, episode=754 reward=0.7632911 (495.01 it/sec) -training >> step=4523300, episode=755 reward=0.7507271 (119.04 it/sec) -training >> step=4523400, episode=755 reward=0.7899674 (484.29 it/sec) -training >> step=4523500, episode=755 reward=0.7626693 (485.70 it/sec) -training >> step=4523600, episode=755 reward=0.7789079 (507.91 it/sec) -training >> step=4523700, episode=755 reward=0.7595999 (466.69 it/sec) -training >> step=4523800, episode=755 reward=0.7800199 (437.07 it/sec) -training >> step=4523900, episode=755 reward=0.7723456 (497.27 it/sec) -training >> step=4524000, episode=755 reward=0.7913888 (474.11 it/sec) -training >> step=4524100, episode=755 reward=0.7900764 (482.02 it/sec) -training >> step=4524200, episode=755 reward=0.762849 (456.68 it/sec) -training >> step=4524300, episode=755 reward=0.7596302 (463.39 it/sec) -training >> step=4524400, episode=755 reward=0.7828326 (452.90 it/sec) -training >> step=4524500, episode=755 reward=0.7823822 (493.86 it/sec) -training >> step=4524600, episode=755 reward=0.7849995 (495.56 it/sec) -training >> step=4524700, episode=755 reward=0.7898588 (454.10 it/sec) -training >> step=4524800, episode=755 reward=0.7776461 (436.77 it/sec) -training >> step=4524900, episode=755 reward=0.7870472 (507.15 it/sec) -training >> step=4525000, episode=755 reward=0.7798962 (455.89 it/sec) -training >> step=4525100, episode=755 reward=0.8016235 (506.09 it/sec) -training >> step=4525200, episode=755 reward=0.7959059 (470.97 it/sec) -training >> step=4525300, episode=755 reward=0.7900881 (480.14 it/sec) -training >> step=4525400, episode=755 reward=0.7921992 (503.81 it/sec) -training >> step=4525500, episode=755 reward=0.7796842 (462.48 it/sec) -training >> step=4525600, episode=755 reward=0.7778957 (441.94 it/sec) -training >> step=4525700, episode=755 reward=0.7699075 (494.57 it/sec) -training >> step=4525800, episode=755 reward=0.7874211 (476.50 it/sec) -training >> step=4525900, episode=755 reward=0.7504186 (322.89 it/sec) -training >> step=4526000, episode=755 reward=0.7666413 (439.69 it/sec) -training >> step=4526100, episode=755 reward=0.7766614 (452.91 it/sec) -training >> step=4526200, episode=755 reward=0.7947733 (426.12 it/sec) -training >> step=4526300, episode=755 reward=0.7975794 (412.21 it/sec) -training >> step=4526400, episode=755 reward=0.7761608 (432.58 it/sec) -training >> step=4526500, episode=755 reward=0.7647769 (390.36 it/sec) -training >> step=4526600, episode=755 reward=0.7804267 (451.46 it/sec) -training >> step=4526700, episode=755 reward=0.7671135 (457.31 it/sec) -training >> step=4526800, episode=755 reward=0.7952326 (502.78 it/sec) -training >> step=4526900, episode=755 reward=0.7822477 (462.77 it/sec) -training >> step=4527000, episode=755 reward=0.7650199 (459.29 it/sec) -training >> step=4527100, episode=755 reward=0.7744549 (444.03 it/sec) -training >> step=4527200, episode=755 reward=0.7810147 (508.42 it/sec) -training >> step=4527300, episode=755 reward=0.791588 (459.94 it/sec) -training >> step=4527400, episode=755 reward=0.7619884 (469.20 it/sec) -training >> step=4527500, episode=755 reward=0.7677044 (446.34 it/sec) -training >> step=4527600, episode=755 reward=0.7633453 (529.37 it/sec) -training >> step=4527700, episode=755 reward=0.7833469 (492.63 it/sec) -training >> step=4527800, episode=755 reward=0.7676576 (489.48 it/sec) -training >> step=4527900, episode=755 reward=0.7795559 (500.06 it/sec) -training >> step=4528000, episode=755 reward=0.7827206 (478.61 it/sec) -training >> step=4528100, episode=755 reward=0.7665271 (442.14 it/sec) -training >> step=4528200, episode=755 reward=0.7742855 (490.95 it/sec) -training >> step=4528300, episode=755 reward=0.7825867 (497.79 it/sec) -training >> step=4528400, episode=755 reward=0.7685784 (449.62 it/sec) -training >> step=4528500, episode=755 reward=0.7685936 (457.32 it/sec) -training >> step=4528600, episode=755 reward=0.7886677 (434.82 it/sec) -training >> step=4528700, episode=755 reward=0.7720658 (487.72 it/sec) -training >> step=4528800, episode=755 reward=0.7639267 (506.75 it/sec) -training >> step=4528900, episode=755 reward=0.7747328 (514.47 it/sec) -training >> step=4529000, episode=755 reward=0.7893228 (476.98 it/sec) -training >> step=4529100, episode=755 reward=0.7803379 (483.09 it/sec) -training >> step=4529200, episode=755 reward=0.7601488 (460.71 it/sec) -training >> step=4529300, episode=756 reward=0.7655195 (132.85 it/sec) -training >> step=4529400, episode=756 reward=0.7769406 (514.51 it/sec) -training >> step=4529500, episode=756 reward=0.7657788 (471.28 it/sec) -training >> step=4529600, episode=756 reward=0.7648194 (463.25 it/sec) -training >> step=4529700, episode=756 reward=0.7906562 (453.18 it/sec) -training >> step=4529800, episode=756 reward=0.7789263 (501.08 it/sec) -training >> step=4529900, episode=756 reward=0.788391 (466.68 it/sec) -training >> step=4530000, episode=756 reward=0.7884596 (519.61 it/sec) -training >> step=4530100, episode=756 reward=0.7844065 (509.47 it/sec) -training >> step=4530200, episode=756 reward=0.7820774 (471.37 it/sec) -training >> step=4530300, episode=756 reward=0.7817199 (455.98 it/sec) -training >> step=4530400, episode=756 reward=0.7763953 (502.03 it/sec) -training >> step=4530500, episode=756 reward=0.7652857 (486.63 it/sec) -training >> step=4530600, episode=756 reward=0.7678287 (479.26 it/sec) -training >> step=4530700, episode=756 reward=0.7781432 (469.99 it/sec) -training >> step=4530800, episode=756 reward=0.776045 (451.80 it/sec) -training >> step=4530900, episode=756 reward=0.7781006 (470.68 it/sec) -training >> step=4531000, episode=756 reward=0.7958956 (486.66 it/sec) -training >> step=4531100, episode=756 reward=0.7839803 (474.85 it/sec) -training >> step=4531200, episode=756 reward=0.7707004 (439.98 it/sec) -training >> step=4531300, episode=756 reward=0.7988873 (446.24 it/sec) -training >> step=4531400, episode=756 reward=0.7887231 (444.09 it/sec) -training >> step=4531500, episode=756 reward=0.7735696 (511.02 it/sec) -training >> step=4531600, episode=756 reward=0.7705532 (473.30 it/sec) -training >> step=4531700, episode=756 reward=0.8072279 (450.61 it/sec) -training >> step=4531800, episode=756 reward=0.792894 (491.94 it/sec) -training >> step=4531900, episode=756 reward=0.7766761 (504.10 it/sec) -training >> step=4532000, episode=756 reward=0.7747833 (361.64 it/sec) -training >> step=4532100, episode=756 reward=0.7756271 (442.27 it/sec) -training >> step=4532200, episode=756 reward=0.776729 (469.54 it/sec) -training >> step=4532300, episode=756 reward=0.7800869 (484.61 it/sec) -training >> step=4532400, episode=756 reward=0.763082 (480.84 it/sec) -training >> step=4532500, episode=756 reward=0.7726467 (457.43 it/sec) -training >> step=4532600, episode=756 reward=0.7841623 (492.98 it/sec) -training >> step=4532700, episode=756 reward=0.7724611 (497.29 it/sec) -training >> step=4532800, episode=756 reward=0.7765895 (509.11 it/sec) -training >> step=4532900, episode=756 reward=0.7730157 (483.10 it/sec) -training >> step=4533000, episode=756 reward=0.785894 (497.27 it/sec) -training >> step=4533100, episode=756 reward=0.7772195 (440.08 it/sec) -training >> step=4533200, episode=756 reward=0.7623455 (458.73 it/sec) -training >> step=4533300, episode=756 reward=0.7682422 (473.07 it/sec) -training >> step=4533400, episode=756 reward=0.769012 (489.57 it/sec) -training >> step=4533500, episode=756 reward=0.7802273 (467.48 it/sec) -training >> step=4533600, episode=756 reward=0.7798741 (387.04 it/sec) -training >> step=4533700, episode=756 reward=0.7693977 (478.16 it/sec) -training >> step=4533800, episode=756 reward=0.7693866 (463.73 it/sec) -training >> step=4533900, episode=756 reward=0.7793149 (453.10 it/sec) -training >> step=4534000, episode=756 reward=0.758945 (468.26 it/sec) -training >> step=4534100, episode=756 reward=0.7897171 (457.03 it/sec) -training >> step=4534200, episode=756 reward=0.7553178 (441.98 it/sec) -training >> step=4534300, episode=756 reward=0.7651275 (433.50 it/sec) -training >> step=4534400, episode=756 reward=0.7817152 (436.13 it/sec) -training >> step=4534500, episode=756 reward=0.7649384 (447.89 it/sec) -training >> step=4534600, episode=756 reward=0.7615414 (509.95 it/sec) -training >> step=4534700, episode=756 reward=0.784871 (500.72 it/sec) -training >> step=4534800, episode=756 reward=0.7608763 (525.51 it/sec) -training >> step=4534900, episode=756 reward=0.7825868 (472.24 it/sec) -training >> step=4535000, episode=756 reward=0.7771568 (462.24 it/sec) -training >> step=4535100, episode=756 reward=0.7802346 (516.22 it/sec) -training >> step=4535200, episode=756 reward=0.7707189 (549.60 it/sec) -training >> step=4535300, episode=757 reward=0.7558448 (114.54 it/sec) -training >> step=4535400, episode=757 reward=0.7514429 (518.17 it/sec) -training >> step=4535500, episode=757 reward=0.7693763 (482.63 it/sec) -training >> step=4535600, episode=757 reward=0.7690277 (487.48 it/sec) -training >> step=4535700, episode=757 reward=0.7800824 (487.34 it/sec) -training >> step=4535800, episode=757 reward=0.7906596 (485.44 it/sec) -training >> step=4535900, episode=757 reward=0.7797264 (461.83 it/sec) -training >> step=4536000, episode=757 reward=0.7610657 (523.76 it/sec) -training >> step=4536100, episode=757 reward=0.7862272 (479.83 it/sec) -training >> step=4536200, episode=757 reward=0.7893156 (489.26 it/sec) -training >> step=4536300, episode=757 reward=0.7975626 (495.05 it/sec) -training >> step=4536400, episode=757 reward=0.7784202 (472.13 it/sec) -training >> step=4536500, episode=757 reward=0.774089 (466.13 it/sec) -training >> step=4536600, episode=757 reward=0.7776155 (490.27 it/sec) -training >> step=4536700, episode=757 reward=0.7589797 (455.08 it/sec) -training >> step=4536800, episode=757 reward=0.7787529 (495.93 it/sec) -training >> step=4536900, episode=757 reward=0.7685124 (507.54 it/sec) -training >> step=4537000, episode=757 reward=0.774062 (516.09 it/sec) -training >> step=4537100, episode=757 reward=0.7790684 (520.97 it/sec) -training >> step=4537200, episode=757 reward=0.7710931 (492.80 it/sec) -training >> step=4537300, episode=757 reward=0.7922521 (512.18 it/sec) -training >> step=4537400, episode=757 reward=0.793793 (528.17 it/sec) -training >> step=4537500, episode=757 reward=0.7844143 (543.28 it/sec) -training >> step=4537600, episode=757 reward=0.7919518 (498.79 it/sec) -training >> step=4537700, episode=757 reward=0.7740007 (555.81 it/sec) -training >> step=4537800, episode=757 reward=0.7832103 (498.00 it/sec) -training >> step=4537900, episode=757 reward=0.7615966 (487.21 it/sec) -training >> step=4538000, episode=757 reward=0.7762586 (570.61 it/sec) -training >> step=4538100, episode=757 reward=0.783258 (376.49 it/sec) -training >> step=4538200, episode=757 reward=0.7759514 (503.13 it/sec) -training >> step=4538300, episode=757 reward=0.7774839 (502.03 it/sec) -training >> step=4538400, episode=757 reward=0.7692999 (556.44 it/sec) -training >> step=4538500, episode=757 reward=0.7820846 (533.81 it/sec) -training >> step=4538600, episode=757 reward=0.7602605 (471.04 it/sec) -training >> step=4538700, episode=757 reward=0.7725172 (496.47 it/sec) -training >> step=4538800, episode=757 reward=0.8046476 (535.77 it/sec) -training >> step=4538900, episode=757 reward=0.7893072 (533.19 it/sec) -training >> step=4539000, episode=757 reward=0.7907691 (531.06 it/sec) -training >> step=4539100, episode=757 reward=0.777192 (548.27 it/sec) -training >> step=4539200, episode=757 reward=0.783523 (487.91 it/sec) -training >> step=4539300, episode=757 reward=0.7618163 (484.62 it/sec) -training >> step=4539400, episode=757 reward=0.7789857 (524.55 it/sec) -training >> step=4539500, episode=757 reward=0.7798339 (566.03 it/sec) -training >> step=4539600, episode=757 reward=0.7831813 (541.15 it/sec) -training >> step=4539700, episode=757 reward=0.7882416 (482.18 it/sec) -training >> step=4539800, episode=757 reward=0.772585 (509.05 it/sec) -training >> step=4539900, episode=757 reward=0.7758317 (442.05 it/sec) -training >> step=4540000, episode=757 reward=0.7811053 (504.91 it/sec) -training >> step=4540100, episode=757 reward=0.751997 (527.07 it/sec) -training >> step=4540200, episode=757 reward=0.7662208 (526.49 it/sec) -training >> step=4540300, episode=757 reward=0.791591 (459.45 it/sec) -training >> step=4540400, episode=757 reward=0.7739235 (477.52 it/sec) -training >> step=4540500, episode=757 reward=0.7725207 (545.45 it/sec) -training >> step=4540600, episode=757 reward=0.7531146 (526.35 it/sec) -training >> step=4540700, episode=757 reward=0.7665805 (465.56 it/sec) -training >> step=4540800, episode=757 reward=0.770951 (469.13 it/sec) -training >> step=4540900, episode=757 reward=0.7661722 (535.30 it/sec) -training >> step=4541000, episode=757 reward=0.7851747 (490.60 it/sec) -training >> step=4541100, episode=757 reward=0.7357602 (518.93 it/sec) -training >> step=4541200, episode=757 reward=0.7750774 (506.33 it/sec) -training >> step=4541300, episode=758 reward=0.7455878 (123.33 it/sec) -training >> step=4541400, episode=758 reward=0.786915 (469.99 it/sec) -training >> step=4541500, episode=758 reward=0.7487458 (459.30 it/sec) -training >> step=4541600, episode=758 reward=0.763887 (522.56 it/sec) -training >> step=4541700, episode=758 reward=0.7734269 (476.09 it/sec) -training >> step=4541800, episode=758 reward=0.7672174 (466.55 it/sec) -training >> step=4541900, episode=758 reward=0.8014971 (487.49 it/sec) -training >> step=4542000, episode=758 reward=0.7724255 (505.38 it/sec) -training >> step=4542100, episode=758 reward=0.7778293 (479.67 it/sec) -training >> step=4542200, episode=758 reward=0.768532 (482.56 it/sec) -training >> step=4542300, episode=758 reward=0.7665468 (520.78 it/sec) -training >> step=4542400, episode=758 reward=0.8014051 (499.57 it/sec) -training >> step=4542500, episode=758 reward=0.7739968 (496.64 it/sec) -training >> step=4542600, episode=758 reward=0.7752411 (472.03 it/sec) -training >> step=4542700, episode=758 reward=0.7820321 (531.15 it/sec) -training >> step=4542800, episode=758 reward=0.7705074 (478.18 it/sec) -training >> step=4542900, episode=758 reward=0.7896679 (488.58 it/sec) -training >> step=4543000, episode=758 reward=0.7623941 (500.17 it/sec) -training >> step=4543100, episode=758 reward=0.7591497 (462.30 it/sec) -training >> step=4543200, episode=758 reward=0.7738903 (483.24 it/sec) -training >> step=4543300, episode=758 reward=0.782946 (491.41 it/sec) -training >> step=4543400, episode=758 reward=0.7568845 (489.89 it/sec) -training >> step=4543500, episode=758 reward=0.7809231 (540.78 it/sec) -training >> step=4543600, episode=758 reward=0.7781498 (485.18 it/sec) -training >> step=4543700, episode=758 reward=0.7746041 (460.66 it/sec) -training >> step=4543800, episode=758 reward=0.7888164 (514.65 it/sec) -training >> step=4543900, episode=758 reward=0.7859034 (472.55 it/sec) -training >> step=4544000, episode=758 reward=0.7917654 (487.77 it/sec) -training >> step=4544100, episode=758 reward=0.7818996 (501.27 it/sec) -training >> step=4544200, episode=758 reward=0.759262 (348.95 it/sec) -training >> step=4544300, episode=758 reward=0.7779305 (465.04 it/sec) -training >> step=4544400, episode=758 reward=0.7714165 (467.80 it/sec) -training >> step=4544500, episode=758 reward=0.7860842 (479.47 it/sec) -training >> step=4544600, episode=758 reward=0.7839119 (511.48 it/sec) -training >> step=4544700, episode=758 reward=0.7780216 (521.47 it/sec) -training >> step=4544800, episode=758 reward=0.7906985 (457.30 it/sec) -training >> step=4544900, episode=758 reward=0.7760539 (455.27 it/sec) -training >> step=4545000, episode=758 reward=0.7836294 (430.53 it/sec) -training >> step=4545100, episode=758 reward=0.7630866 (490.26 it/sec) -training >> step=4545200, episode=758 reward=0.7727925 (524.16 it/sec) -training >> step=4545300, episode=758 reward=0.7729706 (515.21 it/sec) -training >> step=4545400, episode=758 reward=0.7686255 (484.69 it/sec) -training >> step=4545500, episode=758 reward=0.7686273 (454.91 it/sec) -training >> step=4545600, episode=758 reward=0.7869543 (490.81 it/sec) -training >> step=4545700, episode=758 reward=0.7683331 (510.95 it/sec) -training >> step=4545800, episode=758 reward=0.7718007 (532.88 it/sec) -training >> step=4545900, episode=758 reward=0.7742863 (470.77 it/sec) -training >> step=4546000, episode=758 reward=0.7567008 (489.74 it/sec) -training >> step=4546100, episode=758 reward=0.7662181 (440.53 it/sec) -training >> step=4546200, episode=758 reward=0.7673789 (475.77 it/sec) -training >> step=4546300, episode=758 reward=0.7679691 (477.45 it/sec) -training >> step=4546400, episode=758 reward=0.7772771 (500.51 it/sec) -training >> step=4546500, episode=758 reward=0.7584139 (493.16 it/sec) -training >> step=4546600, episode=758 reward=0.7828974 (475.01 it/sec) -training >> step=4546700, episode=758 reward=0.7807763 (522.21 it/sec) -training >> step=4546800, episode=758 reward=0.7752556 (513.73 it/sec) -training >> step=4546900, episode=758 reward=0.7833438 (452.71 it/sec) -training >> step=4547000, episode=758 reward=0.7699612 (477.92 it/sec) -training >> step=4547100, episode=758 reward=0.7769965 (493.83 it/sec) -training >> step=4547200, episode=758 reward=0.7721353 (478.43 it/sec) -training >> step=4547300, episode=759 reward=0.7874417 (106.57 it/sec) -training >> step=4547400, episode=759 reward=0.7651395 (490.57 it/sec) -training >> step=4547500, episode=759 reward=0.7470258 (450.78 it/sec) -training >> step=4547600, episode=759 reward=0.7732621 (465.07 it/sec) -training >> step=4547700, episode=759 reward=0.78321 (473.40 it/sec) -training >> step=4547800, episode=759 reward=0.7755174 (461.41 it/sec) -training >> step=4547900, episode=759 reward=0.7915331 (493.20 it/sec) -training >> step=4548000, episode=759 reward=0.7825125 (503.17 it/sec) -training >> step=4548100, episode=759 reward=0.7597035 (457.37 it/sec) -training >> step=4548200, episode=759 reward=0.7805501 (477.85 it/sec) -training >> step=4548300, episode=759 reward=0.7749885 (522.59 it/sec) -training >> step=4548400, episode=759 reward=0.7736775 (503.81 it/sec) -training >> step=4548500, episode=759 reward=0.763906 (432.73 it/sec) -training >> step=4548600, episode=759 reward=0.7886996 (466.99 it/sec) -training >> step=4548700, episode=759 reward=0.7647553 (496.68 it/sec) -training >> step=4548800, episode=759 reward=0.7855887 (497.15 it/sec) -training >> step=4548900, episode=759 reward=0.7712391 (505.24 it/sec) -training >> step=4549000, episode=759 reward=0.7809499 (422.30 it/sec) -training >> step=4549100, episode=759 reward=0.7940404 (442.62 it/sec) -training >> step=4549200, episode=759 reward=0.7939781 (463.04 it/sec) -training >> step=4549300, episode=759 reward=0.7752129 (493.42 it/sec) -training >> step=4549400, episode=759 reward=0.7658004 (482.75 it/sec) -training >> step=4549500, episode=759 reward=0.7748102 (419.64 it/sec) -training >> step=4549600, episode=759 reward=0.7715241 (467.62 it/sec) -training >> step=4549700, episode=759 reward=0.7696025 (471.22 it/sec) -training >> step=4549800, episode=759 reward=0.7673243 (476.34 it/sec) -training >> step=4549900, episode=759 reward=0.7749829 (430.35 it/sec) -training >> step=4550000, episode=759 reward=0.7733796 (484.61 it/sec) -training >> step=4550100, episode=759 reward=0.7704269 (486.48 it/sec) -training >> step=4550200, episode=759 reward=0.7694749 (480.35 it/sec) -training >> step=4550300, episode=759 reward=0.7836811 (471.09 it/sec) -training >> step=4550400, episode=759 reward=0.7606184 (387.97 it/sec) -training >> step=4550500, episode=759 reward=0.7745549 (516.58 it/sec) -training >> step=4550600, episode=759 reward=0.7853783 (527.29 it/sec) -training >> step=4550700, episode=759 reward=0.7803972 (520.47 it/sec) -training >> step=4550800, episode=759 reward=0.7923304 (525.04 it/sec) -training >> step=4550900, episode=759 reward=0.7627172 (512.25 it/sec) -training >> step=4551000, episode=759 reward=0.7731264 (474.31 it/sec) -training >> step=4551100, episode=759 reward=0.7726034 (509.58 it/sec) -training >> step=4551200, episode=759 reward=0.8044916 (552.81 it/sec) -training >> step=4551300, episode=759 reward=0.7713185 (542.93 it/sec) -training >> step=4551400, episode=759 reward=0.7684742 (486.07 it/sec) -training >> step=4551500, episode=759 reward=0.7810621 (528.38 it/sec) -training >> step=4551600, episode=759 reward=0.773239 (510.22 it/sec) -training >> step=4551700, episode=759 reward=0.7955582 (508.42 it/sec) -training >> step=4551800, episode=759 reward=0.7803834 (548.40 it/sec) -training >> step=4551900, episode=759 reward=0.780894 (489.28 it/sec) -training >> step=4552000, episode=759 reward=0.7960159 (497.99 it/sec) -training >> step=4552100, episode=759 reward=0.7881083 (520.71 it/sec) -training >> step=4552200, episode=759 reward=0.7876098 (495.86 it/sec) -training >> step=4552300, episode=759 reward=0.766619 (567.07 it/sec) -training >> step=4552400, episode=759 reward=0.7641979 (538.90 it/sec) -training >> step=4552500, episode=759 reward=0.7792295 (480.56 it/sec) -training >> step=4552600, episode=759 reward=0.7730407 (539.68 it/sec) -training >> step=4552700, episode=759 reward=0.7708009 (491.19 it/sec) -training >> step=4552800, episode=759 reward=0.7612739 (530.01 it/sec) -training >> step=4552900, episode=759 reward=0.7781343 (547.63 it/sec) -training >> step=4553000, episode=759 reward=0.7763742 (542.79 it/sec) -training >> step=4553100, episode=759 reward=0.7720577 (477.41 it/sec) -training >> step=4553200, episode=759 reward=0.7890751 (489.19 it/sec) -training >> step=4553300, episode=760 reward=0.7643516 (142.40 it/sec) -training >> step=4553400, episode=760 reward=0.759521 (516.48 it/sec) -training >> step=4553500, episode=760 reward=0.7592772 (491.81 it/sec) -training >> step=4553600, episode=760 reward=0.743099 (471.71 it/sec) -training >> step=4553700, episode=760 reward=0.7826015 (443.44 it/sec) -training >> step=4553800, episode=760 reward=0.7907299 (463.17 it/sec) -training >> step=4553900, episode=760 reward=0.7598075 (508.24 it/sec) -training >> step=4554000, episode=760 reward=0.7902969 (505.66 it/sec) -training >> step=4554100, episode=760 reward=0.7841111 (498.08 it/sec) -training >> step=4554200, episode=760 reward=0.7849368 (475.43 it/sec) -training >> step=4554300, episode=760 reward=0.7705173 (479.31 it/sec) -training >> step=4554400, episode=760 reward=0.7694718 (474.40 it/sec) -training >> step=4554500, episode=760 reward=0.7939538 (488.83 it/sec) -training >> step=4554600, episode=760 reward=0.7759702 (505.16 it/sec) -training >> step=4554700, episode=760 reward=0.7861444 (517.86 it/sec) -training >> step=4554800, episode=760 reward=0.7687736 (497.21 it/sec) -training >> step=4554900, episode=760 reward=0.7726218 (479.42 it/sec) -training >> step=4555000, episode=760 reward=0.7697333 (498.78 it/sec) -training >> step=4555100, episode=760 reward=0.7883688 (514.63 it/sec) -training >> step=4555200, episode=760 reward=0.7902476 (542.68 it/sec) -training >> step=4555300, episode=760 reward=0.7798156 (497.98 it/sec) -training >> step=4555400, episode=760 reward=0.7913274 (471.85 it/sec) -training >> step=4555500, episode=760 reward=0.7881725 (463.35 it/sec) -training >> step=4555600, episode=760 reward=0.7453819 (475.65 it/sec) -training >> step=4555700, episode=760 reward=0.7888695 (521.27 it/sec) -training >> step=4555800, episode=760 reward=0.773155 (529.75 it/sec) -training >> step=4555900, episode=760 reward=0.7907931 (512.76 it/sec) -training >> step=4556000, episode=760 reward=0.7763214 (523.12 it/sec) -training >> step=4556100, episode=760 reward=0.793486 (482.35 it/sec) -training >> step=4556200, episode=760 reward=0.7746558 (505.65 it/sec) -training >> step=4556300, episode=760 reward=0.7620211 (519.30 it/sec) -training >> step=4556400, episode=760 reward=0.7954895 (326.61 it/sec) -training >> step=4556500, episode=760 reward=0.7730759 (503.65 it/sec) -training >> step=4556600, episode=760 reward=0.7586749 (559.39 it/sec) -training >> step=4556700, episode=760 reward=0.7666787 (544.85 it/sec) -training >> step=4556800, episode=760 reward=0.7679842 (456.61 it/sec) -training >> step=4556900, episode=760 reward=0.7688181 (467.47 it/sec) -training >> step=4557000, episode=760 reward=0.7868991 (440.10 it/sec) -training >> step=4557100, episode=760 reward=0.8117321 (466.44 it/sec) -training >> step=4557200, episode=760 reward=0.8051009 (407.97 it/sec) -training >> step=4557300, episode=760 reward=0.7697452 (416.43 it/sec) -training >> step=4557400, episode=760 reward=0.7859132 (444.34 it/sec) -training >> step=4557500, episode=760 reward=0.786046 (505.98 it/sec) -training >> step=4557600, episode=760 reward=0.7597176 (502.49 it/sec) -training >> step=4557700, episode=760 reward=0.7983703 (503.55 it/sec) -training >> step=4557800, episode=760 reward=0.7865695 (479.97 it/sec) -training >> step=4557900, episode=760 reward=0.7700135 (420.01 it/sec) -training >> step=4558000, episode=760 reward=0.7637484 (490.92 it/sec) -training >> step=4558100, episode=760 reward=0.7754601 (568.68 it/sec) -training >> step=4558200, episode=760 reward=0.7889388 (515.67 it/sec) -training >> step=4558300, episode=760 reward=0.7521297 (484.95 it/sec) -training >> step=4558400, episode=760 reward=0.7812164 (463.82 it/sec) -training >> step=4558500, episode=760 reward=0.7563101 (500.94 it/sec) -training >> step=4558600, episode=760 reward=0.7795483 (454.78 it/sec) -training >> step=4558700, episode=760 reward=0.7759472 (532.16 it/sec) -training >> step=4558800, episode=760 reward=0.7886673 (491.60 it/sec) -training >> step=4558900, episode=760 reward=0.7831142 (428.37 it/sec) -training >> step=4559000, episode=760 reward=0.7803772 (375.33 it/sec) -training >> step=4559100, episode=760 reward=0.7692411 (408.93 it/sec) -training >> step=4559200, episode=760 reward=0.7877839 (424.02 it/sec) -training >> step=4559300, episode=761 reward=0.7880422 (107.97 it/sec) -training >> step=4559400, episode=761 reward=0.7623435 (315.04 it/sec) -training >> step=4559500, episode=761 reward=0.781297 (433.18 it/sec) -training >> step=4559600, episode=761 reward=0.7875343 (442.51 it/sec) -training >> step=4559700, episode=761 reward=0.7776688 (529.79 it/sec) -training >> step=4559800, episode=761 reward=0.7891926 (493.61 it/sec) -training >> step=4559900, episode=761 reward=0.7805008 (507.40 it/sec) -training >> step=4560000, episode=761 reward=0.7902167 (518.99 it/sec) -training >> step=4560100, episode=761 reward=0.7723988 (575.91 it/sec) -training >> step=4560200, episode=761 reward=0.7535046 (524.14 it/sec) -training >> step=4560300, episode=761 reward=0.7626928 (530.31 it/sec) -training >> step=4560400, episode=761 reward=0.7764432 (512.24 it/sec) -training >> step=4560500, episode=761 reward=0.7736596 (535.12 it/sec) -training >> step=4560600, episode=761 reward=0.7722877 (480.46 it/sec) -training >> step=4560700, episode=761 reward=0.7794671 (447.59 it/sec) -training >> step=4560800, episode=761 reward=0.8055915 (502.57 it/sec) -training >> step=4560900, episode=761 reward=0.784208 (474.75 it/sec) -training >> step=4561000, episode=761 reward=0.7660362 (439.42 it/sec) -training >> step=4561100, episode=761 reward=0.7628605 (446.46 it/sec) -training >> step=4561200, episode=761 reward=0.7839935 (452.57 it/sec) -training >> step=4561300, episode=761 reward=0.7800928 (444.20 it/sec) -training >> step=4561400, episode=761 reward=0.7767013 (502.35 it/sec) -training >> step=4561500, episode=761 reward=0.7820844 (465.61 it/sec) -training >> step=4561600, episode=761 reward=0.7774343 (525.72 it/sec) -training >> step=4561700, episode=761 reward=0.7701033 (465.37 it/sec) -training >> step=4561800, episode=761 reward=0.7624161 (484.34 it/sec) -training >> step=4561900, episode=761 reward=0.7845302 (484.28 it/sec) -training >> step=4562000, episode=761 reward=0.7705649 (435.27 it/sec) -training >> step=4562100, episode=761 reward=0.7797361 (433.93 it/sec) -training >> step=4562200, episode=761 reward=0.7683152 (492.63 it/sec) -training >> step=4562300, episode=761 reward=0.7848771 (482.73 it/sec) -training >> step=4562400, episode=761 reward=0.7901244 (354.01 it/sec) -training >> step=4562500, episode=761 reward=0.7899061 (507.55 it/sec) -training >> step=4562600, episode=761 reward=0.7947221 (492.90 it/sec) -training >> step=4562700, episode=761 reward=0.7717208 (486.99 it/sec) -training >> step=4562800, episode=761 reward=0.7807928 (462.14 it/sec) -training >> step=4562900, episode=761 reward=0.7783048 (440.48 it/sec) -training >> step=4563000, episode=761 reward=0.7897817 (519.28 it/sec) -training >> step=4563100, episode=761 reward=0.7890004 (509.11 it/sec) -training >> step=4563200, episode=761 reward=0.7699097 (521.82 it/sec) -training >> step=4563300, episode=761 reward=0.7595782 (436.35 it/sec) -training >> step=4563400, episode=761 reward=0.7804013 (434.46 it/sec) -training >> step=4563500, episode=761 reward=0.7860476 (457.06 it/sec) -training >> step=4563600, episode=761 reward=0.7784596 (442.47 it/sec) -training >> step=4563700, episode=761 reward=0.7769366 (468.59 it/sec) -training >> step=4563800, episode=761 reward=0.7537861 (503.22 it/sec) -training >> step=4563900, episode=761 reward=0.775773 (429.71 it/sec) -training >> step=4564000, episode=761 reward=0.7504589 (476.82 it/sec) -training >> step=4564100, episode=761 reward=0.7813819 (450.40 it/sec) -training >> step=4564200, episode=761 reward=0.7817285 (449.99 it/sec) -training >> step=4564300, episode=761 reward=0.7525951 (428.55 it/sec) -training >> step=4564400, episode=761 reward=0.7955635 (459.42 it/sec) -training >> step=4564500, episode=761 reward=0.7787132 (523.50 it/sec) -training >> step=4564600, episode=761 reward=0.7575758 (458.59 it/sec) -training >> step=4564700, episode=761 reward=0.7444322 (500.51 it/sec) -training >> step=4564800, episode=761 reward=0.7817037 (408.11 it/sec) -training >> step=4564900, episode=761 reward=0.7661952 (480.06 it/sec) -training >> step=4565000, episode=761 reward=0.756289 (489.99 it/sec) -training >> step=4565100, episode=761 reward=0.7783939 (507.14 it/sec) -training >> step=4565200, episode=761 reward=0.7642456 (499.98 it/sec) -training >> step=4565300, episode=762 reward=0.775847 (115.02 it/sec) -training >> step=4565400, episode=762 reward=0.7709851 (417.93 it/sec) -training >> step=4565500, episode=762 reward=0.7552972 (465.95 it/sec) -training >> step=4565600, episode=762 reward=0.7767866 (501.04 it/sec) -training >> step=4565700, episode=762 reward=0.7990613 (438.73 it/sec) -training >> step=4565800, episode=762 reward=0.7826027 (524.08 it/sec) -training >> step=4565900, episode=762 reward=0.7920828 (507.60 it/sec) -training >> step=4566000, episode=762 reward=0.7789258 (468.00 it/sec) -training >> step=4566100, episode=762 reward=0.7690623 (509.13 it/sec) -training >> step=4566200, episode=762 reward=0.7550293 (498.41 it/sec) -training >> step=4566300, episode=762 reward=0.7687873 (549.43 it/sec) -training >> step=4566400, episode=762 reward=0.7907265 (501.98 it/sec) -training >> step=4566500, episode=762 reward=0.7836753 (497.56 it/sec) -training >> step=4566600, episode=762 reward=0.7811298 (528.96 it/sec) -training >> step=4566700, episode=762 reward=0.7755916 (552.39 it/sec) -training >> step=4566800, episode=762 reward=0.7862294 (489.33 it/sec) -training >> step=4566900, episode=762 reward=0.7680341 (465.44 it/sec) -training >> step=4567000, episode=762 reward=0.7737793 (504.65 it/sec) -training >> step=4567100, episode=762 reward=0.7694668 (460.47 it/sec) -training >> step=4567200, episode=762 reward=0.7768061 (474.08 it/sec) -training >> step=4567300, episode=762 reward=0.7872134 (493.15 it/sec) -training >> step=4567400, episode=762 reward=0.7862199 (467.75 it/sec) -training >> step=4567500, episode=762 reward=0.7830143 (484.14 it/sec) -training >> step=4567600, episode=762 reward=0.7826658 (493.35 it/sec) -training >> step=4567700, episode=762 reward=0.7816491 (512.76 it/sec) -training >> step=4567800, episode=762 reward=0.777714 (472.50 it/sec) -training >> step=4567900, episode=762 reward=0.7866279 (505.93 it/sec) -training >> step=4568000, episode=762 reward=0.7993028 (455.28 it/sec) -training >> step=4568100, episode=762 reward=0.7956275 (526.34 it/sec) -training >> step=4568200, episode=762 reward=0.7789454 (495.40 it/sec) -training >> step=4568300, episode=762 reward=0.7896506 (431.34 it/sec) -training >> step=4568400, episode=762 reward=0.7808681 (477.14 it/sec) -training >> step=4568500, episode=762 reward=0.7616477 (498.00 it/sec) -training >> step=4568600, episode=762 reward=0.7812601 (502.90 it/sec) -training >> step=4568700, episode=762 reward=0.7698488 (436.67 it/sec) -training >> step=4568800, episode=762 reward=0.7718264 (420.33 it/sec) -training >> step=4568900, episode=762 reward=0.7813936 (357.07 it/sec) -training >> step=4569000, episode=762 reward=0.7811636 (480.52 it/sec) -training >> step=4569100, episode=762 reward=0.7564136 (468.85 it/sec) -training >> step=4569200, episode=762 reward=0.7795944 (509.02 it/sec) -training >> step=4569300, episode=762 reward=0.789802 (461.54 it/sec) -training >> step=4569400, episode=762 reward=0.7679186 (436.85 it/sec) -training >> step=4569500, episode=762 reward=0.7758548 (481.78 it/sec) -training >> step=4569600, episode=762 reward=0.7606571 (506.04 it/sec) -training >> step=4569700, episode=762 reward=0.7739288 (471.55 it/sec) -training >> step=4569800, episode=762 reward=0.7834168 (485.60 it/sec) -training >> step=4569900, episode=762 reward=0.7672052 (477.56 it/sec) -training >> step=4570000, episode=762 reward=0.7720153 (478.66 it/sec) -training >> step=4570100, episode=762 reward=0.7656857 (508.90 it/sec) -training >> step=4570200, episode=762 reward=0.7558869 (400.94 it/sec) -training >> step=4570300, episode=762 reward=0.7801606 (439.61 it/sec) -training >> step=4570400, episode=762 reward=0.7597769 (444.19 it/sec) -training >> step=4570500, episode=762 reward=0.7672446 (451.78 it/sec) -training >> step=4570600, episode=762 reward=0.761625 (450.24 it/sec) -training >> step=4570700, episode=762 reward=0.7641823 (453.77 it/sec) -training >> step=4570800, episode=762 reward=0.7500528 (495.00 it/sec) -training >> step=4570900, episode=762 reward=0.7709135 (449.24 it/sec) -training >> step=4571000, episode=762 reward=0.7844862 (497.92 it/sec) -training >> step=4571100, episode=762 reward=0.780015 (441.25 it/sec) -training >> step=4571200, episode=762 reward=0.7609342 (459.58 it/sec) -training >> step=4571300, episode=763 reward=0.7642421 (39.49 it/sec) -training >> step=4571400, episode=763 reward=0.764663 (416.24 it/sec) -training >> step=4571500, episode=763 reward=0.7628145 (393.58 it/sec) -training >> step=4571600, episode=763 reward=0.7887365 (441.68 it/sec) -training >> step=4571700, episode=763 reward=0.7702007 (401.06 it/sec) -training >> step=4571800, episode=763 reward=0.8002005 (408.33 it/sec) -training >> step=4571900, episode=763 reward=0.7856099 (479.23 it/sec) -training >> step=4572000, episode=763 reward=0.7849027 (460.89 it/sec) -training >> step=4572100, episode=763 reward=0.7751368 (451.81 it/sec) -training >> step=4572200, episode=763 reward=0.7780891 (481.36 it/sec) -training >> step=4572300, episode=763 reward=0.7710935 (524.24 it/sec) -training >> step=4572400, episode=763 reward=0.7724256 (531.45 it/sec) -training >> step=4572500, episode=763 reward=0.7782635 (440.69 it/sec) -training >> step=4572600, episode=763 reward=0.7869151 (480.02 it/sec) -training >> step=4572700, episode=763 reward=0.7767543 (482.02 it/sec) -training >> step=4572800, episode=763 reward=0.7722846 (450.51 it/sec) -training >> step=4572900, episode=763 reward=0.7892067 (516.03 it/sec) -training >> step=4573000, episode=763 reward=0.8041372 (474.22 it/sec) -training >> step=4573100, episode=763 reward=0.774481 (462.38 it/sec) -training >> step=4573200, episode=763 reward=0.7834759 (497.31 it/sec) -training >> step=4573300, episode=763 reward=0.7900866 (493.74 it/sec) -training >> step=4573400, episode=763 reward=0.7663023 (415.68 it/sec) -training >> step=4573500, episode=763 reward=0.7756745 (486.78 it/sec) -training >> step=4573600, episode=763 reward=0.789773 (485.08 it/sec) -training >> step=4573700, episode=763 reward=0.7997428 (496.65 it/sec) -training >> step=4573800, episode=763 reward=0.8015282 (508.15 it/sec) -training >> step=4573900, episode=763 reward=0.7666051 (500.05 it/sec) -training >> step=4574000, episode=763 reward=0.7740952 (507.11 it/sec) -training >> step=4574100, episode=763 reward=0.77877 (470.69 it/sec) -training >> step=4574200, episode=763 reward=0.7761447 (520.17 it/sec) -training >> step=4574300, episode=763 reward=0.7903236 (474.74 it/sec) -training >> step=4574400, episode=763 reward=0.7770095 (533.87 it/sec) -training >> step=4574500, episode=763 reward=0.7829565 (466.55 it/sec) -training >> step=4574600, episode=763 reward=0.7918962 (489.97 it/sec) -training >> step=4574700, episode=763 reward=0.7723048 (469.98 it/sec) -training >> step=4574800, episode=763 reward=0.7802015 (396.82 it/sec) -training >> step=4574900, episode=763 reward=0.7802225 (503.27 it/sec) -training >> step=4575000, episode=763 reward=0.7781469 (512.52 it/sec) -training >> step=4575100, episode=763 reward=0.7823706 (500.38 it/sec) -training >> step=4575200, episode=763 reward=0.8144295 (491.40 it/sec) -training >> step=4575300, episode=763 reward=0.7851729 (475.30 it/sec) -training >> step=4575400, episode=763 reward=0.7920517 (489.10 it/sec) -training >> step=4575500, episode=763 reward=0.7637153 (520.08 it/sec) -training >> step=4575600, episode=763 reward=0.7815507 (448.10 it/sec) -training >> step=4575700, episode=763 reward=0.7808889 (468.17 it/sec) -training >> step=4575800, episode=763 reward=0.7846882 (447.81 it/sec) -training >> step=4575900, episode=763 reward=0.7913836 (434.09 it/sec) -training >> step=4576000, episode=763 reward=0.7640227 (491.97 it/sec) -training >> step=4576100, episode=763 reward=0.7698749 (381.43 it/sec) -training >> step=4576200, episode=763 reward=0.7648518 (481.46 it/sec) -training >> step=4576300, episode=763 reward=0.7874719 (467.90 it/sec) -training >> step=4576400, episode=763 reward=0.7595261 (448.61 it/sec) -training >> step=4576500, episode=763 reward=0.7571123 (503.28 it/sec) -training >> step=4576600, episode=763 reward=0.7714784 (504.16 it/sec) -training >> step=4576700, episode=763 reward=0.7684575 (510.62 it/sec) -training >> step=4576800, episode=763 reward=0.7870588 (506.46 it/sec) -training >> step=4576900, episode=763 reward=0.7785115 (474.80 it/sec) -training >> step=4577000, episode=763 reward=0.7700433 (435.73 it/sec) -training >> step=4577100, episode=763 reward=0.7732098 (515.66 it/sec) -training >> step=4577200, episode=763 reward=0.7866404 (477.37 it/sec) -training >> step=4577300, episode=764 reward=0.7515894 (67.42 it/sec) -training >> step=4577400, episode=764 reward=0.7848375 (431.40 it/sec) -training >> step=4577500, episode=764 reward=0.7645035 (449.62 it/sec) -training >> step=4577600, episode=764 reward=0.7765892 (442.04 it/sec) -training >> step=4577700, episode=764 reward=0.7679951 (468.15 it/sec) -training >> step=4577800, episode=764 reward=0.76429 (505.20 it/sec) -training >> step=4577900, episode=764 reward=0.7771443 (514.91 it/sec) -training >> step=4578000, episode=764 reward=0.7819538 (471.51 it/sec) -training >> step=4578100, episode=764 reward=0.7700723 (503.14 it/sec) -training >> step=4578200, episode=764 reward=0.7786436 (460.90 it/sec) -training >> step=4578300, episode=764 reward=0.7781391 (465.32 it/sec) -training >> step=4578400, episode=764 reward=0.7671385 (506.04 it/sec) -training >> step=4578500, episode=764 reward=0.7652268 (514.77 it/sec) -training >> step=4578600, episode=764 reward=0.7873983 (511.17 it/sec) -training >> step=4578700, episode=764 reward=0.7824316 (504.35 it/sec) -training >> step=4578800, episode=764 reward=0.7669867 (479.63 it/sec) -training >> step=4578900, episode=764 reward=0.7895854 (493.89 it/sec) -training >> step=4579000, episode=764 reward=0.7823275 (492.94 it/sec) -training >> step=4579100, episode=764 reward=0.7893454 (482.96 it/sec) -training >> step=4579200, episode=764 reward=0.775156 (509.64 it/sec) -training >> step=4579300, episode=764 reward=0.7857636 (461.76 it/sec) -training >> step=4579400, episode=764 reward=0.7591658 (428.71 it/sec) -training >> step=4579500, episode=764 reward=0.7604321 (552.20 it/sec) -training >> step=4579600, episode=764 reward=0.7817459 (473.24 it/sec) -training >> step=4579700, episode=764 reward=0.7752154 (481.29 it/sec) -training >> step=4579800, episode=764 reward=0.7888042 (439.88 it/sec) -training >> step=4579900, episode=764 reward=0.7813802 (522.01 it/sec) -training >> step=4580000, episode=764 reward=0.771362 (492.95 it/sec) -training >> step=4580100, episode=764 reward=0.7824334 (531.94 it/sec) -training >> step=4580200, episode=764 reward=0.7707577 (518.86 it/sec) -training >> step=4580300, episode=764 reward=0.7702023 (527.16 it/sec) -training >> step=4580400, episode=764 reward=0.7735229 (439.14 it/sec) -training >> step=4580500, episode=764 reward=0.7876525 (502.95 it/sec) -training >> step=4580600, episode=764 reward=0.7795013 (483.41 it/sec) -training >> step=4580700, episode=764 reward=0.7659237 (486.35 it/sec) -training >> step=4580800, episode=764 reward=0.7985933 (458.72 it/sec) -training >> step=4580900, episode=764 reward=0.7794377 (364.12 it/sec) -training >> step=4581000, episode=764 reward=0.7965 (533.22 it/sec) -training >> step=4581100, episode=764 reward=0.7711262 (438.99 it/sec) -training >> step=4581200, episode=764 reward=0.7778646 (361.95 it/sec) -training >> step=4581300, episode=764 reward=0.7694941 (471.50 it/sec) -training >> step=4581400, episode=764 reward=0.7833545 (512.28 it/sec) -training >> step=4581500, episode=764 reward=0.7817675 (511.96 it/sec) -training >> step=4581600, episode=764 reward=0.7774125 (548.07 it/sec) -training >> step=4581700, episode=764 reward=0.7930879 (554.90 it/sec) -training >> step=4581800, episode=764 reward=0.7845677 (478.89 it/sec) -training >> step=4581900, episode=764 reward=0.7675484 (505.33 it/sec) -training >> step=4582000, episode=764 reward=0.7623672 (517.51 it/sec) -training >> step=4582100, episode=764 reward=0.7812524 (523.99 it/sec) -training >> step=4582200, episode=764 reward=0.7882012 (518.24 it/sec) -training >> step=4582300, episode=764 reward=0.785081 (496.93 it/sec) -training >> step=4582400, episode=764 reward=0.7748274 (546.59 it/sec) -training >> step=4582500, episode=764 reward=0.7725353 (527.56 it/sec) -training >> step=4582600, episode=764 reward=0.7937135 (553.52 it/sec) -training >> step=4582700, episode=764 reward=0.7875843 (506.36 it/sec) -training >> step=4582800, episode=764 reward=0.7567359 (544.22 it/sec) -training >> step=4582900, episode=764 reward=0.7721715 (513.23 it/sec) -training >> step=4583000, episode=764 reward=0.7719392 (500.56 it/sec) -training >> step=4583100, episode=764 reward=0.7748746 (534.33 it/sec) -training >> step=4583200, episode=764 reward=0.7791798 (514.99 it/sec) -training >> step=4583300, episode=765 reward=0.7893919 (96.57 it/sec) -training >> step=4583400, episode=765 reward=0.7745028 (523.74 it/sec) -training >> step=4583500, episode=765 reward=0.7729531 (508.86 it/sec) -training >> step=4583600, episode=765 reward=0.7762513 (529.10 it/sec) -training >> step=4583700, episode=765 reward=0.7768556 (525.81 it/sec) -training >> step=4583800, episode=765 reward=0.7757533 (522.44 it/sec) -training >> step=4583900, episode=765 reward=0.797598 (545.61 it/sec) -training >> step=4584000, episode=765 reward=0.7685892 (450.32 it/sec) -training >> step=4584100, episode=765 reward=0.781437 (509.83 it/sec) -training >> step=4584200, episode=765 reward=0.7975528 (532.98 it/sec) -training >> step=4584300, episode=765 reward=0.777347 (434.82 it/sec) -training >> step=4584400, episode=765 reward=0.7729945 (476.67 it/sec) -training >> step=4584500, episode=765 reward=0.7795815 (517.56 it/sec) -training >> step=4584600, episode=765 reward=0.8009221 (480.76 it/sec) -training >> step=4584700, episode=765 reward=0.7830579 (460.46 it/sec) -training >> step=4584800, episode=765 reward=0.7781909 (501.60 it/sec) -training >> step=4584900, episode=765 reward=0.7819389 (525.30 it/sec) -training >> step=4585000, episode=765 reward=0.7642659 (418.83 it/sec) -training >> step=4585100, episode=765 reward=0.767126 (498.48 it/sec) -training >> step=4585200, episode=765 reward=0.7712314 (520.94 it/sec) -training >> step=4585300, episode=765 reward=0.7846646 (486.42 it/sec) -training >> step=4585400, episode=765 reward=0.7636977 (515.35 it/sec) -training >> step=4585500, episode=765 reward=0.7638843 (508.09 it/sec) -training >> step=4585600, episode=765 reward=0.7854478 (505.70 it/sec) -training >> step=4585700, episode=765 reward=0.7622033 (516.80 it/sec) -training >> step=4585800, episode=765 reward=0.7715614 (485.16 it/sec) -training >> step=4585900, episode=765 reward=0.7786506 (549.07 it/sec) -training >> step=4586000, episode=765 reward=0.7920305 (528.16 it/sec) -training >> step=4586100, episode=765 reward=0.7926098 (452.84 it/sec) -training >> step=4586200, episode=765 reward=0.7758379 (489.95 it/sec) -training >> step=4586300, episode=765 reward=0.7652365 (498.63 it/sec) -training >> step=4586400, episode=765 reward=0.7829627 (463.59 it/sec) -training >> step=4586500, episode=765 reward=0.7722182 (479.99 it/sec) -training >> step=4586600, episode=765 reward=0.7761905 (482.48 it/sec) -training >> step=4586700, episode=765 reward=0.7800503 (566.74 it/sec) -training >> step=4586800, episode=765 reward=0.7776386 (499.58 it/sec) -training >> step=4586900, episode=765 reward=0.7696971 (487.59 it/sec) -training >> step=4587000, episode=765 reward=0.7832075 (510.97 it/sec) -training >> step=4587100, episode=765 reward=0.766683 (351.08 it/sec) -training >> step=4587200, episode=765 reward=0.7851531 (523.44 it/sec) -training >> step=4587300, episode=765 reward=0.7785636 (492.28 it/sec) -training >> step=4587400, episode=765 reward=0.7896376 (523.68 it/sec) -training >> step=4587500, episode=765 reward=0.7723509 (517.90 it/sec) -training >> step=4587600, episode=765 reward=0.7810965 (470.69 it/sec) -training >> step=4587700, episode=765 reward=0.7702194 (510.65 it/sec) -training >> step=4587800, episode=765 reward=0.7949544 (447.85 it/sec) -training >> step=4587900, episode=765 reward=0.7745607 (496.76 it/sec) -training >> step=4588000, episode=765 reward=0.776665 (478.22 it/sec) -training >> step=4588100, episode=765 reward=0.7645292 (503.76 it/sec) -training >> step=4588200, episode=765 reward=0.7664438 (364.85 it/sec) -training >> step=4588300, episode=765 reward=0.7770155 (446.66 it/sec) -training >> step=4588400, episode=765 reward=0.7847973 (455.09 it/sec) -training >> step=4588500, episode=765 reward=0.7670142 (444.49 it/sec) -training >> step=4588600, episode=765 reward=0.773249 (480.75 it/sec) -training >> step=4588700, episode=765 reward=0.7965904 (480.33 it/sec) -training >> step=4588800, episode=765 reward=0.7794967 (500.79 it/sec) -training >> step=4588900, episode=765 reward=0.7791331 (459.24 it/sec) -training >> step=4589000, episode=765 reward=0.7633955 (446.59 it/sec) -training >> step=4589100, episode=765 reward=0.7885068 (509.52 it/sec) -training >> step=4589200, episode=765 reward=0.7833053 (489.57 it/sec) -training >> step=4589300, episode=766 reward=0.7705603 (67.35 it/sec) -training >> step=4589400, episode=766 reward=0.7651055 (471.44 it/sec) -training >> step=4589500, episode=766 reward=0.7547215 (498.07 it/sec) -training >> step=4589600, episode=766 reward=0.7764642 (504.14 it/sec) -training >> step=4589700, episode=766 reward=0.7701532 (478.75 it/sec) -training >> step=4589800, episode=766 reward=0.7642868 (468.50 it/sec) -training >> step=4589900, episode=766 reward=0.7899871 (504.48 it/sec) -training >> step=4590000, episode=766 reward=0.7747806 (531.96 it/sec) -training >> step=4590100, episode=766 reward=0.7697411 (521.28 it/sec) -training >> step=4590200, episode=766 reward=0.7758062 (507.85 it/sec) -training >> step=4590300, episode=766 reward=0.776086 (533.28 it/sec) -training >> step=4590400, episode=766 reward=0.7664611 (516.63 it/sec) -training >> step=4590500, episode=766 reward=0.7578159 (481.76 it/sec) -training >> step=4590600, episode=766 reward=0.7823198 (513.87 it/sec) -training >> step=4590700, episode=766 reward=0.7793686 (501.42 it/sec) -training >> step=4590800, episode=766 reward=0.7611085 (502.89 it/sec) -training >> step=4590900, episode=766 reward=0.7851359 (481.76 it/sec) -training >> step=4591000, episode=766 reward=0.7614842 (497.54 it/sec) -training >> step=4591100, episode=766 reward=0.7870597 (500.78 it/sec) -training >> step=4591200, episode=766 reward=0.7766244 (525.51 it/sec) -training >> step=4591300, episode=766 reward=0.7528211 (494.75 it/sec) -training >> step=4591400, episode=766 reward=0.7699398 (491.67 it/sec) -training >> step=4591500, episode=766 reward=0.7693478 (432.97 it/sec) -training >> step=4591600, episode=766 reward=0.7781 (505.18 it/sec) -training >> step=4591700, episode=766 reward=0.7581687 (479.74 it/sec) -training >> step=4591800, episode=766 reward=0.7575691 (446.74 it/sec) -training >> step=4591900, episode=766 reward=0.7814002 (507.43 it/sec) -training >> step=4592000, episode=766 reward=0.7616769 (495.89 it/sec) -training >> step=4592100, episode=766 reward=0.7712459 (506.91 it/sec) -training >> step=4592200, episode=766 reward=0.7837352 (515.36 it/sec) -training >> step=4592300, episode=766 reward=0.7831615 (476.59 it/sec) -training >> step=4592400, episode=766 reward=0.7835831 (486.25 it/sec) -training >> step=4592500, episode=766 reward=0.7729004 (470.30 it/sec) -training >> step=4592600, episode=766 reward=0.7773803 (469.20 it/sec) -training >> step=4592700, episode=766 reward=0.7910121 (477.94 it/sec) -training >> step=4592800, episode=766 reward=0.7587193 (436.50 it/sec) -training >> step=4592900, episode=766 reward=0.7784086 (472.38 it/sec) -training >> step=4593000, episode=766 reward=0.7726025 (503.88 it/sec) -training >> step=4593100, episode=766 reward=0.7774633 (503.71 it/sec) -training >> step=4593200, episode=766 reward=0.7761738 (473.24 it/sec) -training >> step=4593300, episode=766 reward=0.7638957 (352.87 it/sec) -training >> step=4593400, episode=766 reward=0.7843455 (462.44 it/sec) -training >> step=4593500, episode=766 reward=0.7892697 (480.92 it/sec) -training >> step=4593600, episode=766 reward=0.7808188 (519.72 it/sec) -training >> step=4593700, episode=766 reward=0.7706381 (499.84 it/sec) -training >> step=4593800, episode=766 reward=0.7626487 (470.16 it/sec) -training >> step=4593900, episode=766 reward=0.7729894 (413.27 it/sec) -training >> step=4594000, episode=766 reward=0.7914222 (494.27 it/sec) -training >> step=4594100, episode=766 reward=0.7537305 (491.47 it/sec) -training >> step=4594200, episode=766 reward=0.7735676 (446.24 it/sec) -training >> step=4594300, episode=766 reward=0.7580183 (440.84 it/sec) -training >> step=4594400, episode=766 reward=0.7662873 (473.20 it/sec) -training >> step=4594500, episode=766 reward=0.762479 (486.20 it/sec) -training >> step=4594600, episode=766 reward=0.7677264 (456.19 it/sec) -training >> step=4594700, episode=766 reward=0.7776666 (514.47 it/sec) -training >> step=4594800, episode=766 reward=0.7827479 (471.36 it/sec) -training >> step=4594900, episode=766 reward=0.7729205 (481.88 it/sec) -training >> step=4595000, episode=766 reward=0.7949024 (503.66 it/sec) -training >> step=4595100, episode=766 reward=0.7784672 (476.34 it/sec) -training >> step=4595200, episode=766 reward=0.7691752 (465.19 it/sec) -training >> step=4595300, episode=767 reward=0.7625246 (83.99 it/sec) -training >> step=4595400, episode=767 reward=0.7815905 (500.60 it/sec) -training >> step=4595500, episode=767 reward=0.7621151 (482.78 it/sec) -training >> step=4595600, episode=767 reward=0.7868245 (464.83 it/sec) -training >> step=4595700, episode=767 reward=0.7779487 (442.94 it/sec) -training >> step=4595800, episode=767 reward=0.7899728 (452.03 it/sec) -training >> step=4595900, episode=767 reward=0.793278 (495.81 it/sec) -training >> step=4596000, episode=767 reward=0.7956463 (478.61 it/sec) -training >> step=4596100, episode=767 reward=0.7682297 (492.82 it/sec) -training >> step=4596200, episode=767 reward=0.7907068 (473.93 it/sec) -training >> step=4596300, episode=767 reward=0.7939123 (450.47 it/sec) -training >> step=4596400, episode=767 reward=0.7757857 (465.29 it/sec) -training >> step=4596500, episode=767 reward=0.792982 (506.97 it/sec) -training >> step=4596600, episode=767 reward=0.7662122 (496.44 it/sec) -training >> step=4596700, episode=767 reward=0.7504991 (467.21 it/sec) -training >> step=4596800, episode=767 reward=0.7721099 (468.35 it/sec) -training >> step=4596900, episode=767 reward=0.803256 (497.85 it/sec) -training >> step=4597000, episode=767 reward=0.7588045 (484.36 it/sec) -training >> step=4597100, episode=767 reward=0.7831756 (496.60 it/sec) -training >> step=4597200, episode=767 reward=0.7632033 (438.45 it/sec) -training >> step=4597300, episode=767 reward=0.7723244 (465.52 it/sec) -training >> step=4597400, episode=767 reward=0.7785786 (437.18 it/sec) -training >> step=4597500, episode=767 reward=0.7711814 (515.28 it/sec) -training >> step=4597600, episode=767 reward=0.7630915 (491.92 it/sec) -training >> step=4597700, episode=767 reward=0.7508248 (473.63 it/sec) -training >> step=4597800, episode=767 reward=0.7547972 (489.55 it/sec) -training >> step=4597900, episode=767 reward=0.7729993 (476.81 it/sec) -training >> step=4598000, episode=767 reward=0.7906763 (506.74 it/sec) -training >> step=4598100, episode=767 reward=0.7955589 (461.21 it/sec) -training >> step=4598200, episode=767 reward=0.7843006 (480.87 it/sec) -training >> step=4598300, episode=767 reward=0.7712415 (504.38 it/sec) -training >> step=4598400, episode=767 reward=0.7632435 (450.14 it/sec) -training >> step=4598500, episode=767 reward=0.7773306 (452.70 it/sec) -training >> step=4598600, episode=767 reward=0.7729911 (482.48 it/sec) -training >> step=4598700, episode=767 reward=0.8015302 (461.27 it/sec) -training >> step=4598800, episode=767 reward=0.7672625 (473.89 it/sec) -training >> step=4598900, episode=767 reward=0.7779459 (478.59 it/sec) -training >> step=4599000, episode=767 reward=0.7527948 (526.78 it/sec) -training >> step=4599100, episode=767 reward=0.7642771 (506.07 it/sec) -training >> step=4599200, episode=767 reward=0.7770157 (420.91 it/sec) -training >> step=4599300, episode=767 reward=0.7605214 (456.27 it/sec) -training >> step=4599400, episode=767 reward=0.7759649 (456.80 it/sec) -training >> step=4599500, episode=767 reward=0.7656967 (359.82 it/sec) -training >> step=4599600, episode=767 reward=0.7912595 (500.42 it/sec) -training >> step=4599700, episode=767 reward=0.7648513 (485.50 it/sec) -training >> step=4599800, episode=767 reward=0.7772714 (462.50 it/sec) -training >> step=4599900, episode=767 reward=0.7615951 (498.04 it/sec) -training >> step=4600000, episode=767 reward=0.757484 (485.99 it/sec) -training >> step=4600100, episode=767 reward=0.7784058 (485.79 it/sec) -training >> step=4600200, episode=767 reward=0.7835422 (455.32 it/sec) -training >> step=4600300, episode=767 reward=0.7821551 (467.08 it/sec) -training >> step=4600400, episode=767 reward=0.7766274 (502.04 it/sec) -training >> step=4600500, episode=767 reward=0.7745335 (508.97 it/sec) -training >> step=4600600, episode=767 reward=0.7839556 (484.22 it/sec) -training >> step=4600700, episode=767 reward=0.764223 (448.38 it/sec) -training >> step=4600800, episode=767 reward=0.7796431 (495.50 it/sec) -training >> step=4600900, episode=767 reward=0.7931214 (503.14 it/sec) -training >> step=4601000, episode=767 reward=0.7651799 (490.03 it/sec) -training >> step=4601100, episode=767 reward=0.7897243 (519.03 it/sec) -training >> step=4601200, episode=767 reward=0.7625874 (455.86 it/sec) -training >> step=4601300, episode=768 reward=0.8015385 (86.89 it/sec) -training >> step=4601400, episode=768 reward=0.7635106 (534.52 it/sec) -training >> step=4601500, episode=768 reward=0.7797408 (516.23 it/sec) -training >> step=4601600, episode=768 reward=0.7740503 (473.26 it/sec) -training >> step=4601700, episode=768 reward=0.7684541 (503.32 it/sec) -training >> step=4601800, episode=768 reward=0.7694555 (508.77 it/sec) -training >> step=4601900, episode=768 reward=0.7699147 (520.46 it/sec) -training >> step=4602000, episode=768 reward=0.779698 (521.09 it/sec) -training >> step=4602100, episode=768 reward=0.7956017 (498.59 it/sec) -training >> step=4602200, episode=768 reward=0.7964639 (483.85 it/sec) -training >> step=4602300, episode=768 reward=0.7763032 (469.40 it/sec) -training >> step=4602400, episode=768 reward=0.772929 (527.74 it/sec) -training >> step=4602500, episode=768 reward=0.7777885 (513.24 it/sec) -training >> step=4602600, episode=768 reward=0.7780123 (489.86 it/sec) -training >> step=4602700, episode=768 reward=0.7800179 (467.58 it/sec) -training >> step=4602800, episode=768 reward=0.8011183 (454.86 it/sec) -training >> step=4602900, episode=768 reward=0.7976211 (524.24 it/sec) -training >> step=4603000, episode=768 reward=0.7710507 (506.51 it/sec) -training >> step=4603100, episode=768 reward=0.7842997 (475.36 it/sec) -training >> step=4603200, episode=768 reward=0.7756695 (483.04 it/sec) -training >> step=4603300, episode=768 reward=0.7631872 (463.21 it/sec) -training >> step=4603400, episode=768 reward=0.7667217 (514.66 it/sec) -training >> step=4603500, episode=768 reward=0.7733058 (518.86 it/sec) -training >> step=4603600, episode=768 reward=0.7783049 (494.65 it/sec) -training >> step=4603700, episode=768 reward=0.791173 (487.66 it/sec) -training >> step=4603800, episode=768 reward=0.7784311 (464.52 it/sec) -training >> step=4603900, episode=768 reward=0.7826917 (520.84 it/sec) -training >> step=4604000, episode=768 reward=0.7860285 (506.18 it/sec) -training >> step=4604100, episode=768 reward=0.7894581 (476.80 it/sec) -training >> step=4604200, episode=768 reward=0.7744796 (500.63 it/sec) -training >> step=4604300, episode=768 reward=0.7493377 (488.66 it/sec) -training >> step=4604400, episode=768 reward=0.7874581 (504.20 it/sec) -training >> step=4604500, episode=768 reward=0.7868733 (495.42 it/sec) -training >> step=4604600, episode=768 reward=0.7825103 (480.92 it/sec) -training >> step=4604700, episode=768 reward=0.773691 (499.07 it/sec) -training >> step=4604800, episode=768 reward=0.7810497 (485.63 it/sec) -training >> step=4604900, episode=768 reward=0.7745875 (510.60 it/sec) -training >> step=4605000, episode=768 reward=0.7881423 (504.22 it/sec) -training >> step=4605100, episode=768 reward=0.7857499 (493.91 it/sec) -training >> step=4605200, episode=768 reward=0.7818745 (477.35 it/sec) -training >> step=4605300, episode=768 reward=0.768514 (510.37 it/sec) -training >> step=4605400, episode=768 reward=0.7626693 (504.57 it/sec) -training >> step=4605500, episode=768 reward=0.7529945 (469.94 it/sec) -training >> step=4605600, episode=768 reward=0.7894561 (512.91 it/sec) -training >> step=4605700, episode=768 reward=0.7743248 (417.19 it/sec) -training >> step=4605800, episode=768 reward=0.7703378 (497.06 it/sec) -training >> step=4605900, episode=768 reward=0.7786916 (489.15 it/sec) -training >> step=4606000, episode=768 reward=0.7781277 (494.58 it/sec) -training >> step=4606100, episode=768 reward=0.7888559 (483.21 it/sec) -training >> step=4606200, episode=768 reward=0.7772123 (495.42 it/sec) -training >> step=4606300, episode=768 reward=0.801326 (508.84 it/sec) -training >> step=4606400, episode=768 reward=0.7643303 (496.37 it/sec) -training >> step=4606500, episode=768 reward=0.7670037 (527.66 it/sec) -training >> step=4606600, episode=768 reward=0.7472563 (492.46 it/sec) -training >> step=4606700, episode=768 reward=0.780956 (503.45 it/sec) -training >> step=4606800, episode=768 reward=0.7602156 (514.59 it/sec) -training >> step=4606900, episode=768 reward=0.7726903 (486.03 it/sec) -training >> step=4607000, episode=768 reward=0.7792004 (508.03 it/sec) -training >> step=4607100, episode=768 reward=0.7743551 (493.08 it/sec) -training >> step=4607200, episode=768 reward=0.7901081 (503.60 it/sec) -training >> step=4607300, episode=769 reward=0.760463 (78.65 it/sec) -training >> step=4607400, episode=769 reward=0.7779321 (446.86 it/sec) -training >> step=4607500, episode=769 reward=0.764985 (475.37 it/sec) -training >> step=4607600, episode=769 reward=0.7701789 (490.46 it/sec) -training >> step=4607700, episode=769 reward=0.7696148 (495.74 it/sec) -training >> step=4607800, episode=769 reward=0.7824262 (503.31 it/sec) -training >> step=4607900, episode=769 reward=0.7827423 (465.35 it/sec) -training >> step=4608000, episode=769 reward=0.7809953 (502.58 it/sec) -training >> step=4608100, episode=769 reward=0.7618331 (514.20 it/sec) -training >> step=4608200, episode=769 reward=0.747832 (481.34 it/sec) -training >> step=4608300, episode=769 reward=0.7892086 (517.09 it/sec) -training >> step=4608400, episode=769 reward=0.7773386 (486.64 it/sec) -training >> step=4608500, episode=769 reward=0.7847431 (512.33 it/sec) -training >> step=4608600, episode=769 reward=0.7879445 (489.88 it/sec) -training >> step=4608700, episode=769 reward=0.7704934 (496.13 it/sec) -training >> step=4608800, episode=769 reward=0.777958 (459.89 it/sec) -training >> step=4608900, episode=769 reward=0.7726257 (500.64 it/sec) -training >> step=4609000, episode=769 reward=0.782014 (512.12 it/sec) -training >> step=4609100, episode=769 reward=0.7871128 (518.52 it/sec) -training >> step=4609200, episode=769 reward=0.7788931 (502.87 it/sec) -training >> step=4609300, episode=769 reward=0.7884102 (523.30 it/sec) -training >> step=4609400, episode=769 reward=0.7846471 (496.07 it/sec) -training >> step=4609500, episode=769 reward=0.7813743 (507.96 it/sec) -training >> step=4609600, episode=769 reward=0.7790366 (508.05 it/sec) -training >> step=4609700, episode=769 reward=0.7875164 (504.63 it/sec) -training >> step=4609800, episode=769 reward=0.7384423 (505.38 it/sec) -training >> step=4609900, episode=769 reward=0.7578683 (467.10 it/sec) -training >> step=4610000, episode=769 reward=0.767076 (532.89 it/sec) -training >> step=4610100, episode=769 reward=0.7885205 (497.90 it/sec) -training >> step=4610200, episode=769 reward=0.7699445 (470.34 it/sec) -training >> step=4610300, episode=769 reward=0.788714 (513.15 it/sec) -training >> step=4610400, episode=769 reward=0.7802498 (453.64 it/sec) -training >> step=4610500, episode=769 reward=0.7696628 (473.59 it/sec) -training >> step=4610600, episode=769 reward=0.7684867 (474.25 it/sec) -training >> step=4610700, episode=769 reward=0.7762637 (475.14 it/sec) -training >> step=4610800, episode=769 reward=0.7600883 (466.57 it/sec) -training >> step=4610900, episode=769 reward=0.767446 (499.38 it/sec) -training >> step=4611000, episode=769 reward=0.7684533 (493.54 it/sec) -training >> step=4611100, episode=769 reward=0.7654389 (521.78 it/sec) -training >> step=4611200, episode=769 reward=0.7786069 (488.81 it/sec) -training >> step=4611300, episode=769 reward=0.7754099 (450.97 it/sec) -training >> step=4611400, episode=769 reward=0.774456 (463.86 it/sec) -training >> step=4611500, episode=769 reward=0.7834017 (483.19 it/sec) -training >> step=4611600, episode=769 reward=0.7892272 (488.39 it/sec) -training >> step=4611700, episode=769 reward=0.7843843 (404.73 it/sec) -training >> step=4611800, episode=769 reward=0.7667155 (491.42 it/sec) -training >> step=4611900, episode=769 reward=0.7849265 (480.71 it/sec) -training >> step=4612000, episode=769 reward=0.7566073 (505.16 it/sec) -training >> step=4612100, episode=769 reward=0.7630066 (464.77 it/sec) -training >> step=4612200, episode=769 reward=0.7828923 (467.31 it/sec) -training >> step=4612300, episode=769 reward=0.7621164 (482.81 it/sec) -training >> step=4612400, episode=769 reward=0.7830319 (470.13 it/sec) -training >> step=4612500, episode=769 reward=0.7692577 (497.96 it/sec) -training >> step=4612600, episode=769 reward=0.7622842 (497.26 it/sec) -training >> step=4612700, episode=769 reward=0.753323 (483.18 it/sec) -training >> step=4612800, episode=769 reward=0.7868157 (496.88 it/sec) -training >> step=4612900, episode=769 reward=0.7774374 (455.53 it/sec) -training >> step=4613000, episode=769 reward=0.7639633 (489.20 it/sec) -training >> step=4613100, episode=769 reward=0.7798575 (497.74 it/sec) -training >> step=4613200, episode=769 reward=0.7604373 (523.96 it/sec) -training >> step=4613300, episode=770 reward=0.7797458 (111.74 it/sec) -training >> step=4613400, episode=770 reward=0.7728562 (488.58 it/sec) -training >> step=4613500, episode=770 reward=0.7872119 (497.89 it/sec) -training >> step=4613600, episode=770 reward=0.7814098 (488.90 it/sec) -training >> step=4613700, episode=770 reward=0.7825305 (500.87 it/sec) -training >> step=4613800, episode=770 reward=0.7767268 (517.42 it/sec) -training >> step=4613900, episode=770 reward=0.7817444 (501.92 it/sec) -training >> step=4614000, episode=770 reward=0.7864272 (544.33 it/sec) -training >> step=4614100, episode=770 reward=0.7686256 (492.20 it/sec) -training >> step=4614200, episode=770 reward=0.7794237 (513.06 it/sec) -training >> step=4614300, episode=770 reward=0.7612652 (512.99 it/sec) -training >> step=4614400, episode=770 reward=0.787089 (510.27 it/sec) -training >> step=4614500, episode=770 reward=0.7397118 (505.09 it/sec) -training >> step=4614600, episode=770 reward=0.7734411 (500.17 it/sec) -training >> step=4614700, episode=770 reward=0.7868968 (522.72 it/sec) -training >> step=4614800, episode=770 reward=0.7807868 (516.55 it/sec) -training >> step=4614900, episode=770 reward=0.7784196 (498.13 it/sec) -training >> step=4615000, episode=770 reward=0.8027691 (509.43 it/sec) -training >> step=4615100, episode=770 reward=0.7985863 (536.43 it/sec) -training >> step=4615200, episode=770 reward=0.7764412 (502.17 it/sec) -training >> step=4615300, episode=770 reward=0.7722663 (507.46 it/sec) -training >> step=4615400, episode=770 reward=0.7810599 (489.34 it/sec) -training >> step=4615500, episode=770 reward=0.7953408 (495.00 it/sec) -training >> step=4615600, episode=770 reward=0.7822368 (527.57 it/sec) -training >> step=4615700, episode=770 reward=0.8042409 (513.51 it/sec) -training >> step=4615800, episode=770 reward=0.7739921 (524.97 it/sec) -training >> step=4615900, episode=770 reward=0.7646154 (524.77 it/sec) -training >> step=4616000, episode=770 reward=0.775189 (466.80 it/sec) -training >> step=4616100, episode=770 reward=0.7796115 (529.91 it/sec) -training >> step=4616200, episode=770 reward=0.7777247 (513.96 it/sec) -training >> step=4616300, episode=770 reward=0.7974977 (525.08 it/sec) -training >> step=4616400, episode=770 reward=0.7770783 (518.76 it/sec) -training >> step=4616500, episode=770 reward=0.7867411 (530.00 it/sec) -training >> step=4616600, episode=770 reward=0.7900624 (508.39 it/sec) -training >> step=4616700, episode=770 reward=0.7879871 (479.30 it/sec) -training >> step=4616800, episode=770 reward=0.78359 (491.01 it/sec) -training >> step=4616900, episode=770 reward=0.775076 (518.92 it/sec) -training >> step=4617000, episode=770 reward=0.7899033 (496.39 it/sec) -training >> step=4617100, episode=770 reward=0.7723812 (491.04 it/sec) -training >> step=4617200, episode=770 reward=0.7727451 (474.85 it/sec) -training >> step=4617300, episode=770 reward=0.761035 (534.33 it/sec) -training >> step=4617400, episode=770 reward=0.7822207 (503.04 it/sec) -training >> step=4617500, episode=770 reward=0.7886487 (505.71 it/sec) -training >> step=4617600, episode=770 reward=0.7812947 (532.39 it/sec) -training >> step=4617700, episode=770 reward=0.7723641 (455.98 it/sec) -training >> step=4617800, episode=770 reward=0.7906628 (500.41 it/sec) -training >> step=4617900, episode=770 reward=0.760189 (437.28 it/sec) -training >> step=4618000, episode=770 reward=0.7776067 (522.30 it/sec) -training >> step=4618100, episode=770 reward=0.7700715 (512.54 it/sec) -training >> step=4618200, episode=770 reward=0.7645074 (470.01 it/sec) -training >> step=4618300, episode=770 reward=0.7756796 (532.93 it/sec) -training >> step=4618400, episode=770 reward=0.7802049 (519.81 it/sec) -training >> step=4618500, episode=770 reward=0.7593839 (493.62 it/sec) -training >> step=4618600, episode=770 reward=0.7764872 (523.32 it/sec) -training >> step=4618700, episode=770 reward=0.759616 (513.73 it/sec) -training >> step=4618800, episode=770 reward=0.787728 (512.04 it/sec) -training >> step=4618900, episode=770 reward=0.7741475 (515.82 it/sec) -training >> step=4619000, episode=770 reward=0.7872567 (558.35 it/sec) -training >> step=4619100, episode=770 reward=0.7673582 (479.99 it/sec) -training >> step=4619200, episode=770 reward=0.7674524 (493.92 it/sec) -training >> step=4619300, episode=771 reward=0.7719326 (89.15 it/sec) -training >> step=4619400, episode=771 reward=0.765469 (483.59 it/sec) -training >> step=4619500, episode=771 reward=0.7826424 (504.05 it/sec) -training >> step=4619600, episode=771 reward=0.7773935 (473.86 it/sec) -training >> step=4619700, episode=771 reward=0.7755362 (509.35 it/sec) -training >> step=4619800, episode=771 reward=0.763535 (467.23 it/sec) -training >> step=4619900, episode=771 reward=0.7882288 (524.14 it/sec) -training >> step=4620000, episode=771 reward=0.7645472 (533.71 it/sec) -training >> step=4620100, episode=771 reward=0.7703688 (495.19 it/sec) -training >> step=4620200, episode=771 reward=0.7972171 (524.02 it/sec) -training >> step=4620300, episode=771 reward=0.7856258 (496.06 it/sec) -training >> step=4620400, episode=771 reward=0.7780099 (501.32 it/sec) -training >> step=4620500, episode=771 reward=0.7967474 (514.59 it/sec) -training >> step=4620600, episode=771 reward=0.782923 (504.53 it/sec) -training >> step=4620700, episode=771 reward=0.7829576 (505.10 it/sec) -training >> step=4620800, episode=771 reward=0.8079544 (511.31 it/sec) -training >> step=4620900, episode=771 reward=0.7565048 (503.18 it/sec) -training >> step=4621000, episode=771 reward=0.7836662 (505.69 it/sec) -training >> step=4621100, episode=771 reward=0.783332 (559.34 it/sec) -training >> step=4621200, episode=771 reward=0.7545033 (496.15 it/sec) -training >> step=4621300, episode=771 reward=0.7718012 (506.38 it/sec) -training >> step=4621400, episode=771 reward=0.7817038 (480.85 it/sec) -training >> step=4621500, episode=771 reward=0.7822402 (480.58 it/sec) -training >> step=4621600, episode=771 reward=0.7678459 (480.74 it/sec) -training >> step=4621700, episode=771 reward=0.7564353 (522.33 it/sec) -training >> step=4621800, episode=771 reward=0.7774548 (502.40 it/sec) -training >> step=4621900, episode=771 reward=0.7648276 (463.38 it/sec) -training >> step=4622000, episode=771 reward=0.7672932 (492.66 it/sec) -training >> step=4622100, episode=771 reward=0.7819955 (531.02 it/sec) -training >> step=4622200, episode=771 reward=0.7965114 (537.59 it/sec) -training >> step=4622300, episode=771 reward=0.7767108 (494.41 it/sec) -training >> step=4622400, episode=771 reward=0.7680839 (460.99 it/sec) -training >> step=4622500, episode=771 reward=0.7879501 (547.26 it/sec) -training >> step=4622600, episode=771 reward=0.7945029 (497.24 it/sec) -training >> step=4622700, episode=771 reward=0.7647806 (520.33 it/sec) -training >> step=4622800, episode=771 reward=0.7721288 (493.92 it/sec) -training >> step=4622900, episode=771 reward=0.7888272 (511.39 it/sec) -training >> step=4623000, episode=771 reward=0.7774417 (472.55 it/sec) -training >> step=4623100, episode=771 reward=0.7925747 (479.14 it/sec) -training >> step=4623200, episode=771 reward=0.7843458 (519.76 it/sec) -training >> step=4623300, episode=771 reward=0.7810041 (514.68 it/sec) -training >> step=4623400, episode=771 reward=0.7625145 (477.64 it/sec) -training >> step=4623500, episode=771 reward=0.7541955 (497.28 it/sec) -training >> step=4623600, episode=771 reward=0.7898709 (517.41 it/sec) -training >> step=4623700, episode=771 reward=0.7748647 (496.74 it/sec) -training >> step=4623800, episode=771 reward=0.7656708 (473.69 it/sec) -training >> step=4623900, episode=771 reward=0.7894574 (512.57 it/sec) -training >> step=4624000, episode=771 reward=0.7712371 (409.29 it/sec) -training >> step=4624100, episode=771 reward=0.780907 (514.60 it/sec) -training >> step=4624200, episode=771 reward=0.7700188 (473.41 it/sec) -training >> step=4624300, episode=771 reward=0.7771652 (508.15 it/sec) -training >> step=4624400, episode=771 reward=0.782676 (438.63 it/sec) -training >> step=4624500, episode=771 reward=0.7903199 (485.19 it/sec) -training >> step=4624600, episode=771 reward=0.7693391 (523.35 it/sec) -training >> step=4624700, episode=771 reward=0.778694 (538.59 it/sec) -training >> step=4624800, episode=771 reward=0.7915608 (487.52 it/sec) -training >> step=4624900, episode=771 reward=0.8007248 (474.10 it/sec) -training >> step=4625000, episode=771 reward=0.7859979 (491.05 it/sec) -training >> step=4625100, episode=771 reward=0.7866622 (512.94 it/sec) -training >> step=4625200, episode=771 reward=0.7657562 (511.90 it/sec) -training >> step=4625300, episode=772 reward=0.7844582 (98.96 it/sec) -training >> step=4625400, episode=772 reward=0.7671898 (375.47 it/sec) -training >> step=4625500, episode=772 reward=0.78918 (468.02 it/sec) -training >> step=4625600, episode=772 reward=0.7571198 (461.01 it/sec) -training >> step=4625700, episode=772 reward=0.789258 (529.23 it/sec) -training >> step=4625800, episode=772 reward=0.7683433 (494.22 it/sec) -training >> step=4625900, episode=772 reward=0.7804512 (469.92 it/sec) -training >> step=4626000, episode=772 reward=0.7796062 (501.10 it/sec) -training >> step=4626100, episode=772 reward=0.7810204 (470.42 it/sec) -training >> step=4626200, episode=772 reward=0.7767545 (489.15 it/sec) -training >> step=4626300, episode=772 reward=0.7705422 (522.06 it/sec) -training >> step=4626400, episode=772 reward=0.7757362 (501.93 it/sec) -training >> step=4626500, episode=772 reward=0.7854324 (496.03 it/sec) -training >> step=4626600, episode=772 reward=0.7938502 (510.69 it/sec) -training >> step=4626700, episode=772 reward=0.7791256 (507.26 it/sec) -training >> step=4626800, episode=772 reward=0.7724602 (484.08 it/sec) -training >> step=4626900, episode=772 reward=0.8004469 (497.21 it/sec) -training >> step=4627000, episode=772 reward=0.7806983 (480.29 it/sec) -training >> step=4627100, episode=772 reward=0.7811403 (500.39 it/sec) -training >> step=4627200, episode=772 reward=0.7589588 (500.18 it/sec) -training >> step=4627300, episode=772 reward=0.7691123 (477.10 it/sec) -training >> step=4627400, episode=772 reward=0.7693644 (524.40 it/sec) -training >> step=4627500, episode=772 reward=0.7731886 (516.65 it/sec) -training >> step=4627600, episode=772 reward=0.7726343 (484.79 it/sec) -training >> step=4627700, episode=772 reward=0.7656323 (487.48 it/sec) -training >> step=4627800, episode=772 reward=0.7813262 (514.03 it/sec) -training >> step=4627900, episode=772 reward=0.7922089 (480.28 it/sec) -training >> step=4628000, episode=772 reward=0.7506729 (488.01 it/sec) -training >> step=4628100, episode=772 reward=0.760577 (486.24 it/sec) -training >> step=4628200, episode=772 reward=0.7874357 (542.86 it/sec) -training >> step=4628300, episode=772 reward=0.7508278 (502.30 it/sec) -training >> step=4628400, episode=772 reward=0.7881811 (498.69 it/sec) -training >> step=4628500, episode=772 reward=0.7822236 (445.97 it/sec) -training >> step=4628600, episode=772 reward=0.794259 (449.22 it/sec) -training >> step=4628700, episode=772 reward=0.7780646 (536.70 it/sec) -training >> step=4628800, episode=772 reward=0.7628356 (499.82 it/sec) -training >> step=4628900, episode=772 reward=0.7735485 (541.72 it/sec) -training >> step=4629000, episode=772 reward=0.8046299 (491.98 it/sec) -training >> step=4629100, episode=772 reward=0.7790033 (471.50 it/sec) -training >> step=4629200, episode=772 reward=0.7788913 (527.75 it/sec) -training >> step=4629300, episode=772 reward=0.7638516 (501.81 it/sec) -training >> step=4629400, episode=772 reward=0.7902606 (537.72 it/sec) -training >> step=4629500, episode=772 reward=0.781642 (514.32 it/sec) -training >> step=4629600, episode=772 reward=0.7878255 (555.66 it/sec) -training >> step=4629700, episode=772 reward=0.7703551 (559.32 it/sec) -training >> step=4629800, episode=772 reward=0.787729 (528.66 it/sec) -training >> step=4629900, episode=772 reward=0.7629588 (567.19 it/sec) -training >> step=4630000, episode=772 reward=0.7773817 (540.23 it/sec) -training >> step=4630100, episode=772 reward=0.7877533 (534.45 it/sec) -training >> step=4630200, episode=772 reward=0.7853295 (405.55 it/sec) -training >> step=4630300, episode=772 reward=0.7684327 (506.49 it/sec) -training >> step=4630400, episode=772 reward=0.7890503 (542.21 it/sec) -training >> step=4630500, episode=772 reward=0.7992477 (507.79 it/sec) -training >> step=4630600, episode=772 reward=0.7911652 (526.76 it/sec) -training >> step=4630700, episode=772 reward=0.7758315 (492.86 it/sec) -training >> step=4630800, episode=772 reward=0.771306 (534.65 it/sec) -training >> step=4630900, episode=772 reward=0.7733662 (554.43 it/sec) -training >> step=4631000, episode=772 reward=0.7685557 (491.95 it/sec) -training >> step=4631100, episode=772 reward=0.7916702 (504.41 it/sec) -training >> step=4631200, episode=772 reward=0.7767894 (501.27 it/sec) -training >> step=4631300, episode=773 reward=0.7690483 (101.07 it/sec) -training >> step=4631400, episode=773 reward=0.7927555 (503.47 it/sec) -training >> step=4631500, episode=773 reward=0.7987101 (557.37 it/sec) -training >> step=4631600, episode=773 reward=0.7546258 (518.59 it/sec) -training >> step=4631700, episode=773 reward=0.7575724 (517.21 it/sec) -training >> step=4631800, episode=773 reward=0.7909368 (540.95 it/sec) -training >> step=4631900, episode=773 reward=0.7474748 (536.09 it/sec) -training >> step=4632000, episode=773 reward=0.7710792 (552.73 it/sec) -training >> step=4632100, episode=773 reward=0.7650102 (580.12 it/sec) -training >> step=4632200, episode=773 reward=0.7806734 (525.94 it/sec) -training >> step=4632300, episode=773 reward=0.7638744 (503.03 it/sec) -training >> step=4632400, episode=773 reward=0.7811608 (535.87 it/sec) -training >> step=4632500, episode=773 reward=0.7748652 (523.99 it/sec) -training >> step=4632600, episode=773 reward=0.7818893 (508.75 it/sec) -training >> step=4632700, episode=773 reward=0.7855001 (537.52 it/sec) -training >> step=4632800, episode=773 reward=0.7738084 (562.18 it/sec) -training >> step=4632900, episode=773 reward=0.7707567 (491.95 it/sec) -training >> step=4633000, episode=773 reward=0.7890126 (500.43 it/sec) -training >> step=4633100, episode=773 reward=0.7866791 (546.25 it/sec) -training >> step=4633200, episode=773 reward=0.7843152 (550.65 it/sec) -training >> step=4633300, episode=773 reward=0.7877887 (457.39 it/sec) -training >> step=4633400, episode=773 reward=0.7775098 (543.89 it/sec) -training >> step=4633500, episode=773 reward=0.7835944 (506.93 it/sec) -training >> step=4633600, episode=773 reward=0.7847841 (483.71 it/sec) -training >> step=4633700, episode=773 reward=0.7654133 (520.08 it/sec) -training >> step=4633800, episode=773 reward=0.7602111 (549.87 it/sec) -training >> step=4633900, episode=773 reward=0.7821732 (487.54 it/sec) -training >> step=4634000, episode=773 reward=0.7656981 (504.48 it/sec) -training >> step=4634100, episode=773 reward=0.7819328 (562.69 it/sec) -training >> step=4634200, episode=773 reward=0.7696952 (547.11 it/sec) -training >> step=4634300, episode=773 reward=0.7934579 (540.14 it/sec) -training >> step=4634400, episode=773 reward=0.7869219 (445.91 it/sec) -training >> step=4634500, episode=773 reward=0.7700506 (572.09 it/sec) -training >> step=4634600, episode=773 reward=0.7802233 (529.79 it/sec) -training >> step=4634700, episode=773 reward=0.7758346 (552.43 it/sec) -training >> step=4634800, episode=773 reward=0.7627777 (552.73 it/sec) -training >> step=4634900, episode=773 reward=0.7924344 (531.45 it/sec) -training >> step=4635000, episode=773 reward=0.779794 (422.50 it/sec) -training >> step=4635100, episode=773 reward=0.7826136 (503.94 it/sec) -training >> step=4635200, episode=773 reward=0.7962177 (543.72 it/sec) -training >> step=4635300, episode=773 reward=0.7740856 (552.30 it/sec) -training >> step=4635400, episode=773 reward=0.7688357 (529.12 it/sec) -training >> step=4635500, episode=773 reward=0.7706777 (503.59 it/sec) -training >> step=4635600, episode=773 reward=0.7598609 (530.13 it/sec) -training >> step=4635700, episode=773 reward=0.7772921 (551.78 it/sec) -training >> step=4635800, episode=773 reward=0.7639515 (533.58 it/sec) -training >> step=4635900, episode=773 reward=0.7774869 (503.91 it/sec) -training >> step=4636000, episode=773 reward=0.7807974 (533.43 it/sec) -training >> step=4636100, episode=773 reward=0.7720731 (530.06 it/sec) -training >> step=4636200, episode=773 reward=0.779268 (521.65 it/sec) -training >> step=4636300, episode=773 reward=0.7707852 (554.35 it/sec) -training >> step=4636400, episode=773 reward=0.7805209 (384.01 it/sec) -training >> step=4636500, episode=773 reward=0.7642647 (529.43 it/sec) -training >> step=4636600, episode=773 reward=0.7821454 (524.30 it/sec) -training >> step=4636700, episode=773 reward=0.7896937 (486.25 it/sec) -training >> step=4636800, episode=773 reward=0.7420114 (544.13 it/sec) -training >> step=4636900, episode=773 reward=0.7497561 (537.60 it/sec) -training >> step=4637000, episode=773 reward=0.7710505 (543.71 it/sec) -training >> step=4637100, episode=773 reward=0.7789747 (481.97 it/sec) -training >> step=4637200, episode=773 reward=0.7895682 (497.27 it/sec) -training >> step=4637300, episode=774 reward=0.7760557 (140.09 it/sec) -training >> step=4637400, episode=774 reward=0.7648635 (520.69 it/sec) -training >> step=4637500, episode=774 reward=0.7607719 (490.83 it/sec) -training >> step=4637600, episode=774 reward=0.7874117 (523.38 it/sec) -training >> step=4637700, episode=774 reward=0.7959116 (527.40 it/sec) -training >> step=4637800, episode=774 reward=0.7895921 (520.18 it/sec) -training >> step=4637900, episode=774 reward=0.7868001 (499.49 it/sec) -training >> step=4638000, episode=774 reward=0.782373 (489.96 it/sec) -training >> step=4638100, episode=774 reward=0.7648716 (520.92 it/sec) -training >> step=4638200, episode=774 reward=0.7927886 (552.67 it/sec) -training >> step=4638300, episode=774 reward=0.776426 (491.99 it/sec) -training >> step=4638400, episode=774 reward=0.7598734 (497.77 it/sec) -training >> step=4638500, episode=774 reward=0.7627047 (536.25 it/sec) -training >> step=4638600, episode=774 reward=0.7827606 (504.29 it/sec) -training >> step=4638700, episode=774 reward=0.780768 (548.42 it/sec) -training >> step=4638800, episode=774 reward=0.7590614 (511.38 it/sec) -training >> step=4638900, episode=774 reward=0.7820948 (507.45 it/sec) -training >> step=4639000, episode=774 reward=0.772488 (480.16 it/sec) -training >> step=4639100, episode=774 reward=0.7665351 (466.53 it/sec) -training >> step=4639200, episode=774 reward=0.7716589 (478.33 it/sec) -training >> step=4639300, episode=774 reward=0.754749 (493.67 it/sec) -training >> step=4639400, episode=774 reward=0.7627085 (492.17 it/sec) -training >> step=4639500, episode=774 reward=0.7811981 (500.05 it/sec) -training >> step=4639600, episode=774 reward=0.7865824 (456.69 it/sec) -training >> step=4639700, episode=774 reward=0.7708774 (502.03 it/sec) -training >> step=4639800, episode=774 reward=0.7824989 (484.30 it/sec) -training >> step=4639900, episode=774 reward=0.7859193 (513.68 it/sec) -training >> step=4640000, episode=774 reward=0.7744029 (486.19 it/sec) -training >> step=4640100, episode=774 reward=0.7615603 (462.29 it/sec) -training >> step=4640200, episode=774 reward=0.7742326 (440.93 it/sec) -training >> step=4640300, episode=774 reward=0.7881103 (477.74 it/sec) -training >> step=4640400, episode=774 reward=0.769124 (456.69 it/sec) -training >> step=4640500, episode=774 reward=0.778483 (411.42 it/sec) -training >> step=4640600, episode=774 reward=0.7876434 (454.17 it/sec) -training >> step=4640700, episode=774 reward=0.7808905 (466.49 it/sec) -training >> step=4640800, episode=774 reward=0.7744433 (465.07 it/sec) -training >> step=4640900, episode=774 reward=0.7710823 (465.79 it/sec) -training >> step=4641000, episode=774 reward=0.7912145 (471.39 it/sec) -training >> step=4641100, episode=774 reward=0.7601332 (472.83 it/sec) -training >> step=4641200, episode=774 reward=0.7607156 (501.77 it/sec) -training >> step=4641300, episode=774 reward=0.782109 (501.63 it/sec) -training >> step=4641400, episode=774 reward=0.7893289 (504.79 it/sec) -training >> step=4641500, episode=774 reward=0.7820999 (484.89 it/sec) -training >> step=4641600, episode=774 reward=0.7743435 (495.12 it/sec) -training >> step=4641700, episode=774 reward=0.7673976 (491.23 it/sec) -training >> step=4641800, episode=774 reward=0.7955043 (494.93 it/sec) -training >> step=4641900, episode=774 reward=0.786751 (444.66 it/sec) -training >> step=4642000, episode=774 reward=0.7868782 (505.88 it/sec) -training >> step=4642100, episode=774 reward=0.7544738 (436.23 it/sec) -training >> step=4642200, episode=774 reward=0.7720678 (494.84 it/sec) -training >> step=4642300, episode=774 reward=0.7789299 (460.68 it/sec) -training >> step=4642400, episode=774 reward=0.7772229 (505.26 it/sec) -training >> step=4642500, episode=774 reward=0.7692986 (463.91 it/sec) -training >> step=4642600, episode=774 reward=0.7821372 (334.02 it/sec) -training >> step=4642700, episode=774 reward=0.7545484 (479.28 it/sec) -training >> step=4642800, episode=774 reward=0.7640138 (490.48 it/sec) -training >> step=4642900, episode=774 reward=0.7805554 (459.87 it/sec) -training >> step=4643000, episode=774 reward=0.775335 (458.64 it/sec) -training >> step=4643100, episode=774 reward=0.7912485 (486.16 it/sec) -training >> step=4643200, episode=774 reward=0.7904035 (492.49 it/sec) -training >> step=4643300, episode=775 reward=0.7512888 (111.81 it/sec) -training >> step=4643400, episode=775 reward=0.7887993 (487.24 it/sec) -training >> step=4643500, episode=775 reward=0.7706202 (496.00 it/sec) -training >> step=4643600, episode=775 reward=0.7600925 (483.57 it/sec) -training >> step=4643700, episode=775 reward=0.7652968 (465.22 it/sec) -training >> step=4643800, episode=775 reward=0.7649621 (506.93 it/sec) -training >> step=4643900, episode=775 reward=0.8026811 (513.29 it/sec) -training >> step=4644000, episode=775 reward=0.7662976 (490.23 it/sec) -training >> step=4644100, episode=775 reward=0.7645785 (471.37 it/sec) -training >> step=4644200, episode=775 reward=0.7858728 (478.72 it/sec) -training >> step=4644300, episode=775 reward=0.7961952 (441.15 it/sec) -training >> step=4644400, episode=775 reward=0.7724934 (486.27 it/sec) -training >> step=4644500, episode=775 reward=0.7599831 (441.50 it/sec) -training >> step=4644600, episode=775 reward=0.775614 (453.40 it/sec) -training >> step=4644700, episode=775 reward=0.7820601 (494.00 it/sec) -training >> step=4644800, episode=775 reward=0.7780194 (472.01 it/sec) -training >> step=4644900, episode=775 reward=0.76424 (497.85 it/sec) -training >> step=4645000, episode=775 reward=0.7857776 (449.01 it/sec) -training >> step=4645100, episode=775 reward=0.7779227 (405.33 it/sec) -training >> step=4645200, episode=775 reward=0.7925927 (496.61 it/sec) -training >> step=4645300, episode=775 reward=0.7819474 (463.83 it/sec) -training >> step=4645400, episode=775 reward=0.7589252 (508.88 it/sec) -training >> step=4645500, episode=775 reward=0.7915901 (459.10 it/sec) -training >> step=4645600, episode=775 reward=0.778327 (493.69 it/sec) -training >> step=4645700, episode=775 reward=0.7924448 (529.80 it/sec) -training >> step=4645800, episode=775 reward=0.7677473 (467.20 it/sec) -training >> step=4645900, episode=775 reward=0.8021522 (430.25 it/sec) -training >> step=4646000, episode=775 reward=0.7655993 (470.58 it/sec) -training >> step=4646100, episode=775 reward=0.7691875 (442.02 it/sec) -training >> step=4646200, episode=775 reward=0.7842161 (419.72 it/sec) -training >> step=4646300, episode=775 reward=0.7526449 (467.76 it/sec) -training >> step=4646400, episode=775 reward=0.7698448 (492.43 it/sec) -training >> step=4646500, episode=775 reward=0.7632114 (468.90 it/sec) -training >> step=4646600, episode=775 reward=0.7832006 (461.32 it/sec) -training >> step=4646700, episode=775 reward=0.7859648 (492.21 it/sec) -training >> step=4646800, episode=775 reward=0.7935458 (474.12 it/sec) -training >> step=4646900, episode=775 reward=0.7760467 (457.05 it/sec) -training >> step=4647000, episode=775 reward=0.7744 (473.95 it/sec) -training >> step=4647100, episode=775 reward=0.7698797 (467.60 it/sec) -training >> step=4647200, episode=775 reward=0.793367 (487.17 it/sec) -training >> step=4647300, episode=775 reward=0.7841836 (465.65 it/sec) -training >> step=4647400, episode=775 reward=0.7672946 (458.80 it/sec) -training >> step=4647500, episode=775 reward=0.7773691 (490.97 it/sec) -training >> step=4647600, episode=775 reward=0.7647971 (508.32 it/sec) -training >> step=4647700, episode=775 reward=0.7701061 (473.70 it/sec) -training >> step=4647800, episode=775 reward=0.7612309 (476.51 it/sec) -training >> step=4647900, episode=775 reward=0.7752666 (531.51 it/sec) -training >> step=4648000, episode=775 reward=0.7648744 (445.52 it/sec) -training >> step=4648100, episode=775 reward=0.7735867 (500.49 it/sec) -training >> step=4648200, episode=775 reward=0.7698542 (440.07 it/sec) -training >> step=4648300, episode=775 reward=0.7852727 (492.11 it/sec) -training >> step=4648400, episode=775 reward=0.7787756 (410.39 it/sec) -training >> step=4648500, episode=775 reward=0.7815813 (408.60 it/sec) -training >> step=4648600, episode=775 reward=0.7780908 (465.23 it/sec) -training >> step=4648700, episode=775 reward=0.787322 (454.79 it/sec) -training >> step=4648800, episode=775 reward=0.7726562 (350.27 it/sec) -training >> step=4648900, episode=775 reward=0.7732009 (454.49 it/sec) -training >> step=4649000, episode=775 reward=0.7690116 (544.83 it/sec) -training >> step=4649100, episode=775 reward=0.771101 (432.75 it/sec) -training >> step=4649200, episode=775 reward=0.7899738 (474.31 it/sec) -training >> step=4649300, episode=776 reward=0.7772515 (121.95 it/sec) -training >> step=4649400, episode=776 reward=0.7725945 (484.14 it/sec) -training >> step=4649500, episode=776 reward=0.7747502 (431.32 it/sec) -training >> step=4649600, episode=776 reward=0.7738162 (432.34 it/sec) -training >> step=4649700, episode=776 reward=0.7884424 (485.11 it/sec) -training >> step=4649800, episode=776 reward=0.8029649 (454.38 it/sec) -training >> step=4649900, episode=776 reward=0.7931646 (449.42 it/sec) -training >> step=4650000, episode=776 reward=0.7747718 (475.73 it/sec) -training >> step=4650100, episode=776 reward=0.7962806 (446.54 it/sec) -training >> step=4650200, episode=776 reward=0.7788517 (447.21 it/sec) -training >> step=4650300, episode=776 reward=0.7706192 (428.82 it/sec) -training >> step=4650400, episode=776 reward=0.7749728 (455.73 it/sec) -training >> step=4650500, episode=776 reward=0.7893662 (445.77 it/sec) -training >> step=4650600, episode=776 reward=0.7659077 (488.37 it/sec) -training >> step=4650700, episode=776 reward=0.7837741 (485.81 it/sec) -training >> step=4650800, episode=776 reward=0.7668573 (468.87 it/sec) -training >> step=4650900, episode=776 reward=0.7572708 (438.23 it/sec) -training >> step=4651000, episode=776 reward=0.7639393 (446.62 it/sec) -training >> step=4651100, episode=776 reward=0.8002401 (453.51 it/sec) -training >> step=4651200, episode=776 reward=0.7718948 (498.54 it/sec) -training >> step=4651300, episode=776 reward=0.7966371 (483.95 it/sec) -training >> step=4651400, episode=776 reward=0.803065 (437.18 it/sec) -training >> step=4651500, episode=776 reward=0.7721397 (440.80 it/sec) -training >> step=4651600, episode=776 reward=0.776254 (495.64 it/sec) -training >> step=4651700, episode=776 reward=0.7846022 (498.18 it/sec) -training >> step=4651800, episode=776 reward=0.789399 (438.64 it/sec) -training >> step=4651900, episode=776 reward=0.7857131 (460.71 it/sec) -training >> step=4652000, episode=776 reward=0.7899364 (475.89 it/sec) -training >> step=4652100, episode=776 reward=0.7859954 (435.33 it/sec) -training >> step=4652200, episode=776 reward=0.7799639 (478.68 it/sec) -training >> step=4652300, episode=776 reward=0.7764356 (454.73 it/sec) -training >> step=4652400, episode=776 reward=0.7598856 (433.00 it/sec) -training >> step=4652500, episode=776 reward=0.7908652 (455.90 it/sec) -training >> step=4652600, episode=776 reward=0.7897826 (508.56 it/sec) -training >> step=4652700, episode=776 reward=0.789459 (449.27 it/sec) -training >> step=4652800, episode=776 reward=0.7777334 (490.81 it/sec) -training >> step=4652900, episode=776 reward=0.7651203 (524.23 it/sec) -training >> step=4653000, episode=776 reward=0.7922592 (532.07 it/sec) -training >> step=4653100, episode=776 reward=0.7781671 (548.57 it/sec) -training >> step=4653200, episode=776 reward=0.7957201 (520.68 it/sec) -training >> step=4653300, episode=776 reward=0.7627208 (505.06 it/sec) -training >> step=4653400, episode=776 reward=0.7590055 (523.04 it/sec) -training >> step=4653500, episode=776 reward=0.7652368 (519.16 it/sec) -training >> step=4653600, episode=776 reward=0.7795364 (512.20 it/sec) -training >> step=4653700, episode=776 reward=0.7739668 (535.09 it/sec) -training >> step=4653800, episode=776 reward=0.7804181 (515.61 it/sec) -training >> step=4653900, episode=776 reward=0.7783951 (462.38 it/sec) -training >> step=4654000, episode=776 reward=0.7844246 (539.51 it/sec) -training >> step=4654100, episode=776 reward=0.7726809 (522.61 it/sec) -training >> step=4654200, episode=776 reward=0.7706565 (507.37 it/sec) -training >> step=4654300, episode=776 reward=0.7680384 (509.62 it/sec) -training >> step=4654400, episode=776 reward=0.7637191 (545.49 it/sec) -training >> step=4654500, episode=776 reward=0.7578574 (475.82 it/sec) -training >> step=4654600, episode=776 reward=0.7745796 (501.79 it/sec) -training >> step=4654700, episode=776 reward=0.7894311 (544.36 it/sec) -training >> step=4654800, episode=776 reward=0.7759119 (541.71 it/sec) -training >> step=4654900, episode=776 reward=0.7918456 (518.05 it/sec) -training >> step=4655000, episode=776 reward=0.7637569 (384.95 it/sec) -training >> step=4655100, episode=776 reward=0.7760849 (526.24 it/sec) -training >> step=4655200, episode=776 reward=0.7860315 (539.16 it/sec) -training >> step=4655300, episode=777 reward=0.7774435 (124.05 it/sec) -training >> step=4655400, episode=777 reward=0.7776007 (503.24 it/sec) -training >> step=4655500, episode=777 reward=0.7875499 (548.08 it/sec) -training >> step=4655600, episode=777 reward=0.7737523 (518.12 it/sec) -training >> step=4655700, episode=777 reward=0.7706723 (496.66 it/sec) -training >> step=4655800, episode=777 reward=0.761777 (522.14 it/sec) -training >> step=4655900, episode=777 reward=0.7745413 (554.77 it/sec) -training >> step=4656000, episode=777 reward=0.7839629 (500.41 it/sec) -training >> step=4656100, episode=777 reward=0.7491731 (471.66 it/sec) -training >> step=4656200, episode=777 reward=0.8142236 (463.23 it/sec) -training >> step=4656300, episode=777 reward=0.7732364 (426.29 it/sec) -training >> step=4656400, episode=777 reward=0.7923781 (469.16 it/sec) -training >> step=4656500, episode=777 reward=0.7696684 (444.60 it/sec) -training >> step=4656600, episode=777 reward=0.77691 (470.66 it/sec) -training >> step=4656700, episode=777 reward=0.7943767 (435.63 it/sec) -training >> step=4656800, episode=777 reward=0.7966043 (376.87 it/sec) -training >> step=4656900, episode=777 reward=0.7606888 (557.70 it/sec) -training >> step=4657000, episode=777 reward=0.7742304 (461.11 it/sec) -training >> step=4657100, episode=777 reward=0.7638856 (485.17 it/sec) -training >> step=4657200, episode=777 reward=0.7893487 (438.12 it/sec) -training >> step=4657300, episode=777 reward=0.7678043 (494.01 it/sec) -training >> step=4657400, episode=777 reward=0.7797489 (482.54 it/sec) -training >> step=4657500, episode=777 reward=0.7764657 (481.12 it/sec) -training >> step=4657600, episode=777 reward=0.7880462 (501.22 it/sec) -training >> step=4657700, episode=777 reward=0.7777231 (507.24 it/sec) -training >> step=4657800, episode=777 reward=0.7739835 (508.40 it/sec) -training >> step=4657900, episode=777 reward=0.7705811 (520.56 it/sec) -training >> step=4658000, episode=777 reward=0.7790098 (537.27 it/sec) -training >> step=4658100, episode=777 reward=0.7716357 (493.67 it/sec) -training >> step=4658200, episode=777 reward=0.7738432 (479.80 it/sec) -training >> step=4658300, episode=777 reward=0.7961271 (531.17 it/sec) -training >> step=4658400, episode=777 reward=0.7844483 (521.23 it/sec) -training >> step=4658500, episode=777 reward=0.7876245 (483.43 it/sec) -training >> step=4658600, episode=777 reward=0.7828553 (495.41 it/sec) -training >> step=4658700, episode=777 reward=0.7656214 (534.79 it/sec) -training >> step=4658800, episode=777 reward=0.7901719 (511.82 it/sec) -training >> step=4658900, episode=777 reward=0.7745963 (508.24 it/sec) -training >> step=4659000, episode=777 reward=0.7844459 (518.83 it/sec) -training >> step=4659100, episode=777 reward=0.7675461 (522.17 it/sec) -training >> step=4659200, episode=777 reward=0.7830819 (507.91 it/sec) -training >> step=4659300, episode=777 reward=0.7993106 (514.03 it/sec) -training >> step=4659400, episode=777 reward=0.7933611 (553.38 it/sec) -training >> step=4659500, episode=777 reward=0.7857264 (516.14 it/sec) -training >> step=4659600, episode=777 reward=0.76792 (449.04 it/sec) -training >> step=4659700, episode=777 reward=0.7834587 (507.15 it/sec) -training >> step=4659800, episode=777 reward=0.7565177 (505.15 it/sec) -training >> step=4659900, episode=777 reward=0.7727196 (519.79 it/sec) -training >> step=4660000, episode=777 reward=0.7811345 (511.60 it/sec) -training >> step=4660100, episode=777 reward=0.7720366 (460.41 it/sec) -training >> step=4660200, episode=777 reward=0.7583892 (516.39 it/sec) -training >> step=4660300, episode=777 reward=0.7575545 (472.77 it/sec) -training >> step=4660400, episode=777 reward=0.7684861 (434.72 it/sec) -training >> step=4660500, episode=777 reward=0.7465547 (481.90 it/sec) -training >> step=4660600, episode=777 reward=0.7917659 (464.39 it/sec) -training >> step=4660700, episode=777 reward=0.7790025 (489.44 it/sec) -training >> step=4660800, episode=777 reward=0.7770325 (498.79 it/sec) -training >> step=4660900, episode=777 reward=0.7777944 (535.70 it/sec) -training >> step=4661000, episode=777 reward=0.7817904 (492.85 it/sec) -training >> step=4661100, episode=777 reward=0.7905357 (368.61 it/sec) -training >> step=4661200, episode=777 reward=0.7695394 (533.89 it/sec) -training >> step=4661300, episode=778 reward=0.7754647 (122.51 it/sec) -training >> step=4661400, episode=778 reward=0.7670851 (478.67 it/sec) -training >> step=4661500, episode=778 reward=0.7803001 (502.60 it/sec) -training >> step=4661600, episode=778 reward=0.7849958 (514.48 it/sec) -training >> step=4661700, episode=778 reward=0.7688066 (473.97 it/sec) -training >> step=4661800, episode=778 reward=0.7674403 (456.21 it/sec) -training >> step=4661900, episode=778 reward=0.7695217 (451.49 it/sec) -training >> step=4662000, episode=778 reward=0.7821967 (455.22 it/sec) -training >> step=4662100, episode=778 reward=0.7790866 (414.17 it/sec) -training >> step=4662200, episode=778 reward=0.7965626 (380.62 it/sec) -training >> step=4662300, episode=778 reward=0.784775 (472.50 it/sec) -training >> step=4662400, episode=778 reward=0.7837825 (498.54 it/sec) -training >> step=4662500, episode=778 reward=0.7828446 (505.07 it/sec) -training >> step=4662600, episode=778 reward=0.7931325 (541.91 it/sec) -training >> step=4662700, episode=778 reward=0.7794092 (520.08 it/sec) -training >> step=4662800, episode=778 reward=0.7930771 (475.64 it/sec) -training >> step=4662900, episode=778 reward=0.7925919 (476.03 it/sec) -training >> step=4663000, episode=778 reward=0.7920986 (517.65 it/sec) -training >> step=4663100, episode=778 reward=0.764885 (508.32 it/sec) -training >> step=4663200, episode=778 reward=0.7706459 (450.16 it/sec) -training >> step=4663300, episode=778 reward=0.7799378 (485.63 it/sec) -training >> step=4663400, episode=778 reward=0.778692 (484.73 it/sec) -training >> step=4663500, episode=778 reward=0.7820125 (493.14 it/sec) -training >> step=4663600, episode=778 reward=0.7979142 (527.19 it/sec) -training >> step=4663700, episode=778 reward=0.7780091 (544.70 it/sec) -training >> step=4663800, episode=778 reward=0.7864283 (533.07 it/sec) -training >> step=4663900, episode=778 reward=0.7915382 (531.42 it/sec) -training >> step=4664000, episode=778 reward=0.7763486 (489.40 it/sec) -training >> step=4664100, episode=778 reward=0.7777451 (547.83 it/sec) -training >> step=4664200, episode=778 reward=0.7702259 (525.68 it/sec) -training >> step=4664300, episode=778 reward=0.7808418 (521.60 it/sec) -training >> step=4664400, episode=778 reward=0.773271 (506.28 it/sec) -training >> step=4664500, episode=778 reward=0.7774227 (490.87 it/sec) -training >> step=4664600, episode=778 reward=0.7896231 (492.71 it/sec) -training >> step=4664700, episode=778 reward=0.7886114 (517.99 it/sec) -training >> step=4664800, episode=778 reward=0.7582762 (516.93 it/sec) -training >> step=4664900, episode=778 reward=0.7629235 (563.98 it/sec) -training >> step=4665000, episode=778 reward=0.7817537 (516.22 it/sec) -training >> step=4665100, episode=778 reward=0.763647 (499.39 it/sec) -training >> step=4665200, episode=778 reward=0.7922418 (536.64 it/sec) -training >> step=4665300, episode=778 reward=0.7811742 (498.90 it/sec) -training >> step=4665400, episode=778 reward=0.8029917 (499.10 it/sec) -training >> step=4665500, episode=778 reward=0.7808905 (542.50 it/sec) -training >> step=4665600, episode=778 reward=0.774155 (538.03 it/sec) -training >> step=4665700, episode=778 reward=0.7793083 (487.23 it/sec) -training >> step=4665800, episode=778 reward=0.7871136 (523.05 it/sec) -training >> step=4665900, episode=778 reward=0.7764246 (511.28 it/sec) -training >> step=4666000, episode=778 reward=0.7754485 (422.52 it/sec) -training >> step=4666100, episode=778 reward=0.782424 (428.02 it/sec) -training >> step=4666200, episode=778 reward=0.7711402 (452.80 it/sec) -training >> step=4666300, episode=778 reward=0.7644246 (540.47 it/sec) -training >> step=4666400, episode=778 reward=0.8106905 (505.41 it/sec) -training >> step=4666500, episode=778 reward=0.7725216 (518.17 it/sec) -training >> step=4666600, episode=778 reward=0.776165 (470.62 it/sec) -training >> step=4666700, episode=778 reward=0.7866682 (484.72 it/sec) -training >> step=4666800, episode=778 reward=0.7821699 (458.38 it/sec) -training >> step=4666900, episode=778 reward=0.7739351 (492.62 it/sec) -training >> step=4667000, episode=778 reward=0.7695701 (480.14 it/sec) -training >> step=4667100, episode=778 reward=0.7557672 (461.84 it/sec) -training >> step=4667200, episode=778 reward=0.7838137 (527.20 it/sec) -training >> step=4667300, episode=779 reward=0.7706127 (117.86 it/sec) -training >> step=4667400, episode=779 reward=0.7744593 (513.08 it/sec) -training >> step=4667500, episode=779 reward=0.7375491 (535.38 it/sec) -training >> step=4667600, episode=779 reward=0.7749844 (504.14 it/sec) -training >> step=4667700, episode=779 reward=0.7780729 (562.15 it/sec) -training >> step=4667800, episode=779 reward=0.7473432 (503.79 it/sec) -training >> step=4667900, episode=779 reward=0.7706528 (527.15 it/sec) -training >> step=4668000, episode=779 reward=0.7500702 (527.19 it/sec) -training >> step=4668100, episode=779 reward=0.7932245 (479.36 it/sec) -training >> step=4668200, episode=779 reward=0.767391 (446.63 it/sec) -training >> step=4668300, episode=779 reward=0.7892336 (447.76 it/sec) -training >> step=4668400, episode=779 reward=0.7773868 (455.93 it/sec) -training >> step=4668500, episode=779 reward=0.7680557 (451.45 it/sec) -training >> step=4668600, episode=779 reward=0.7617716 (468.47 it/sec) -training >> step=4668700, episode=779 reward=0.7959525 (445.23 it/sec) -training >> step=4668800, episode=779 reward=0.7668864 (372.59 it/sec) -training >> step=4668900, episode=779 reward=0.7841469 (461.93 it/sec) -training >> step=4669000, episode=779 reward=0.7709609 (493.12 it/sec) -training >> step=4669100, episode=779 reward=0.7924958 (481.31 it/sec) -training >> step=4669200, episode=779 reward=0.7622761 (513.95 it/sec) -training >> step=4669300, episode=779 reward=0.7783713 (524.38 it/sec) -training >> step=4669400, episode=779 reward=0.7953908 (479.31 it/sec) -training >> step=4669500, episode=779 reward=0.7970504 (502.50 it/sec) -training >> step=4669600, episode=779 reward=0.777748 (468.28 it/sec) -training >> step=4669700, episode=779 reward=0.7956069 (479.40 it/sec) -training >> step=4669800, episode=779 reward=0.7715554 (476.40 it/sec) -training >> step=4669900, episode=779 reward=0.7726647 (501.23 it/sec) -training >> step=4670000, episode=779 reward=0.7760254 (523.76 it/sec) -training >> step=4670100, episode=779 reward=0.7983335 (480.73 it/sec) -training >> step=4670200, episode=779 reward=0.7802694 (523.64 it/sec) -training >> step=4670300, episode=779 reward=0.7832078 (441.52 it/sec) -training >> step=4670400, episode=779 reward=0.781895 (487.66 it/sec) -training >> step=4670500, episode=779 reward=0.7710416 (490.99 it/sec) -training >> step=4670600, episode=779 reward=0.768414 (504.91 it/sec) -training >> step=4670700, episode=779 reward=0.7661032 (501.67 it/sec) -training >> step=4670800, episode=779 reward=0.7816924 (531.60 it/sec) -training >> step=4670900, episode=779 reward=0.7824516 (521.47 it/sec) -training >> step=4671000, episode=779 reward=0.7907993 (479.81 it/sec) -training >> step=4671100, episode=779 reward=0.7550718 (501.10 it/sec) -training >> step=4671200, episode=779 reward=0.7859093 (481.26 it/sec) -training >> step=4671300, episode=779 reward=0.7789132 (484.55 it/sec) -training >> step=4671400, episode=779 reward=0.7557449 (524.77 it/sec) -training >> step=4671500, episode=779 reward=0.7887347 (412.70 it/sec) -training >> step=4671600, episode=779 reward=0.7705507 (436.86 it/sec) -training >> step=4671700, episode=779 reward=0.7829553 (449.54 it/sec) -training >> step=4671800, episode=779 reward=0.7783075 (458.80 it/sec) -training >> step=4671900, episode=779 reward=0.7795796 (471.98 it/sec) -training >> step=4672000, episode=779 reward=0.7856535 (440.99 it/sec) -training >> step=4672100, episode=779 reward=0.7803732 (454.02 it/sec) -training >> step=4672200, episode=779 reward=0.76346 (450.70 it/sec) -training >> step=4672300, episode=779 reward=0.7723898 (487.62 it/sec) -training >> step=4672400, episode=779 reward=0.7792852 (479.02 it/sec) -training >> step=4672500, episode=779 reward=0.764747 (542.75 it/sec) -training >> step=4672600, episode=779 reward=0.7526794 (501.77 it/sec) -training >> step=4672700, episode=779 reward=0.7744458 (461.44 it/sec) -training >> step=4672800, episode=779 reward=0.7834064 (470.71 it/sec) -training >> step=4672900, episode=779 reward=0.7848954 (448.17 it/sec) -training >> step=4673000, episode=779 reward=0.7867105 (426.74 it/sec) -training >> step=4673100, episode=779 reward=0.795917 (482.76 it/sec) -training >> step=4673200, episode=779 reward=0.7801918 (385.69 it/sec) -training >> step=4673300, episode=780 reward=0.7710227 (95.90 it/sec) -training >> step=4673400, episode=780 reward=0.7478241 (476.71 it/sec) -training >> step=4673500, episode=780 reward=0.7803507 (500.72 it/sec) -training >> step=4673600, episode=780 reward=0.7820551 (525.12 it/sec) -training >> step=4673700, episode=780 reward=0.7868726 (511.91 it/sec) -training >> step=4673800, episode=780 reward=0.7890869 (517.46 it/sec) -training >> step=4673900, episode=780 reward=0.7724158 (542.31 it/sec) -training >> step=4674000, episode=780 reward=0.7922633 (531.69 it/sec) -training >> step=4674100, episode=780 reward=0.7546405 (520.25 it/sec) -training >> step=4674200, episode=780 reward=0.7811033 (498.58 it/sec) -training >> step=4674300, episode=780 reward=0.7741063 (493.48 it/sec) -training >> step=4674400, episode=780 reward=0.7635524 (558.38 it/sec) -training >> step=4674500, episode=780 reward=0.7964956 (515.71 it/sec) -training >> step=4674600, episode=780 reward=0.788276 (534.10 it/sec) -training >> step=4674700, episode=780 reward=0.7801392 (524.99 it/sec) -training >> step=4674800, episode=780 reward=0.7769092 (509.24 it/sec) -training >> step=4674900, episode=780 reward=0.7840026 (466.91 it/sec) -training >> step=4675000, episode=780 reward=0.7790774 (468.15 it/sec) -training >> step=4675100, episode=780 reward=0.7869642 (444.84 it/sec) -training >> step=4675200, episode=780 reward=0.7811183 (474.65 it/sec) -training >> step=4675300, episode=780 reward=0.7831848 (479.50 it/sec) -training >> step=4675400, episode=780 reward=0.794771 (543.66 it/sec) -training >> step=4675500, episode=780 reward=0.763998 (503.78 it/sec) -training >> step=4675600, episode=780 reward=0.7840753 (462.92 it/sec) -training >> step=4675700, episode=780 reward=0.7687419 (495.28 it/sec) -training >> step=4675800, episode=780 reward=0.7643448 (445.62 it/sec) -training >> step=4675900, episode=780 reward=0.7569218 (429.22 it/sec) -training >> step=4676000, episode=780 reward=0.7761458 (448.68 it/sec) -training >> step=4676100, episode=780 reward=0.7861992 (416.67 it/sec) -training >> step=4676200, episode=780 reward=0.7839006 (381.36 it/sec) -training >> step=4676300, episode=780 reward=0.7951723 (434.94 it/sec) -training >> step=4676400, episode=780 reward=0.7525973 (510.77 it/sec) -training >> step=4676500, episode=780 reward=0.7599294 (491.22 it/sec) -training >> step=4676600, episode=780 reward=0.7788538 (469.27 it/sec) -training >> step=4676700, episode=780 reward=0.7857378 (481.14 it/sec) -training >> step=4676800, episode=780 reward=0.7662557 (504.99 it/sec) -training >> step=4676900, episode=780 reward=0.7998108 (513.66 it/sec) -training >> step=4677000, episode=780 reward=0.7610955 (478.85 it/sec) -training >> step=4677100, episode=780 reward=0.8030111 (528.75 it/sec) -training >> step=4677200, episode=780 reward=0.7597494 (501.43 it/sec) -training >> step=4677300, episode=780 reward=0.7845077 (526.01 it/sec) -training >> step=4677400, episode=780 reward=0.7629176 (506.22 it/sec) -training >> step=4677500, episode=780 reward=0.7927225 (501.50 it/sec) -training >> step=4677600, episode=780 reward=0.7832265 (467.13 it/sec) -training >> step=4677700, episode=780 reward=0.7837011 (499.85 it/sec) -training >> step=4677800, episode=780 reward=0.7778825 (513.92 it/sec) -training >> step=4677900, episode=780 reward=0.7832062 (563.18 it/sec) -training >> step=4678000, episode=780 reward=0.782241 (532.06 it/sec) -training >> step=4678100, episode=780 reward=0.7950809 (510.29 it/sec) -training >> step=4678200, episode=780 reward=0.7744924 (522.38 it/sec) -training >> step=4678300, episode=780 reward=0.7998469 (487.05 it/sec) -training >> step=4678400, episode=780 reward=0.7898982 (517.77 it/sec) -training >> step=4678500, episode=780 reward=0.7819631 (525.07 it/sec) -training >> step=4678600, episode=780 reward=0.7796408 (523.53 it/sec) -training >> step=4678700, episode=780 reward=0.7686483 (496.28 it/sec) -training >> step=4678800, episode=780 reward=0.7888657 (519.39 it/sec) -training >> step=4678900, episode=780 reward=0.7893417 (524.69 it/sec) -training >> step=4679000, episode=780 reward=0.7833693 (538.11 it/sec) -training >> step=4679100, episode=780 reward=0.7628871 (493.73 it/sec) -training >> step=4679200, episode=780 reward=0.7784213 (520.30 it/sec) -training >> step=4679300, episode=781 reward=0.7788786 (103.88 it/sec) -training >> step=4679400, episode=781 reward=0.7556205 (459.73 it/sec) -training >> step=4679500, episode=781 reward=0.7559276 (502.87 it/sec) -training >> step=4679600, episode=781 reward=0.7834223 (521.63 it/sec) -training >> step=4679700, episode=781 reward=0.7694851 (454.06 it/sec) -training >> step=4679800, episode=781 reward=0.7743295 (462.77 it/sec) -training >> step=4679900, episode=781 reward=0.7794598 (406.83 it/sec) -training >> step=4680000, episode=781 reward=0.7848409 (440.76 it/sec) -training >> step=4680100, episode=781 reward=0.7734876 (462.22 it/sec) -training >> step=4680200, episode=781 reward=0.7599834 (400.40 it/sec) -training >> step=4680300, episode=781 reward=0.7423409 (482.44 it/sec) -training >> step=4680400, episode=781 reward=0.7785521 (404.82 it/sec) -training >> step=4680500, episode=781 reward=0.7767428 (423.05 it/sec) -training >> step=4680600, episode=781 reward=0.7738301 (513.74 it/sec) -training >> step=4680700, episode=781 reward=0.7948039 (490.10 it/sec) -training >> step=4680800, episode=781 reward=0.783216 (483.79 it/sec) -training >> step=4680900, episode=781 reward=0.7621277 (515.13 it/sec) -training >> step=4681000, episode=781 reward=0.7714073 (523.02 it/sec) -training >> step=4681100, episode=781 reward=0.7732199 (511.83 it/sec) -training >> step=4681200, episode=781 reward=0.7662739 (529.47 it/sec) -training >> step=4681300, episode=781 reward=0.7726417 (527.86 it/sec) -training >> step=4681400, episode=781 reward=0.7977226 (529.04 it/sec) -training >> step=4681500, episode=781 reward=0.7896551 (516.55 it/sec) -training >> step=4681600, episode=781 reward=0.7646645 (519.49 it/sec) -training >> step=4681700, episode=781 reward=0.7880211 (514.27 it/sec) -training >> step=4681800, episode=781 reward=0.7818741 (500.31 it/sec) -training >> step=4681900, episode=781 reward=0.7649002 (533.30 it/sec) -training >> step=4682000, episode=781 reward=0.7865308 (450.97 it/sec) -training >> step=4682100, episode=781 reward=0.7833311 (499.52 it/sec) -training >> step=4682200, episode=781 reward=0.7743682 (504.77 it/sec) -training >> step=4682300, episode=781 reward=0.7703655 (525.86 it/sec) -training >> step=4682400, episode=781 reward=0.7651334 (513.55 it/sec) -training >> step=4682500, episode=781 reward=0.7899376 (520.33 it/sec) -training >> step=4682600, episode=781 reward=0.7694581 (542.70 it/sec) -training >> step=4682700, episode=781 reward=0.7603847 (504.43 it/sec) -training >> step=4682800, episode=781 reward=0.7908447 (521.35 it/sec) -training >> step=4682900, episode=781 reward=0.7631891 (538.70 it/sec) -training >> step=4683000, episode=781 reward=0.7635922 (509.38 it/sec) -training >> step=4683100, episode=781 reward=0.7786629 (525.70 it/sec) -training >> step=4683200, episode=781 reward=0.771533 (473.44 it/sec) -training >> step=4683300, episode=781 reward=0.7810443 (487.12 it/sec) -training >> step=4683400, episode=781 reward=0.7671459 (450.71 it/sec) -training >> step=4683500, episode=781 reward=0.7769073 (498.55 it/sec) -training >> step=4683600, episode=781 reward=0.7802522 (543.76 it/sec) -training >> step=4683700, episode=781 reward=0.7637883 (517.41 it/sec) -training >> step=4683800, episode=781 reward=0.778713 (529.23 it/sec) -training >> step=4683900, episode=781 reward=0.7784394 (529.35 it/sec) -training >> step=4684000, episode=781 reward=0.774333 (518.65 it/sec) -training >> step=4684100, episode=781 reward=0.7503332 (509.47 it/sec) -training >> step=4684200, episode=781 reward=0.7975412 (524.12 it/sec) -training >> step=4684300, episode=781 reward=0.7742153 (546.88 it/sec) -training >> step=4684400, episode=781 reward=0.8021792 (529.62 it/sec) -training >> step=4684500, episode=781 reward=0.7725018 (506.69 it/sec) -training >> step=4684600, episode=781 reward=0.7865367 (502.85 it/sec) -training >> step=4684700, episode=781 reward=0.7871234 (525.83 it/sec) -training >> step=4684800, episode=781 reward=0.7741113 (522.20 it/sec) -training >> step=4684900, episode=781 reward=0.7825419 (469.81 it/sec) -training >> step=4685000, episode=781 reward=0.7664697 (487.67 it/sec) -training >> step=4685100, episode=781 reward=0.7793881 (493.31 it/sec) -training >> step=4685200, episode=781 reward=0.7752228 (508.00 it/sec) -training >> step=4685300, episode=782 reward=0.7552339 (101.08 it/sec) -training >> step=4685400, episode=782 reward=0.7806464 (516.35 it/sec) -training >> step=4685500, episode=782 reward=0.7587306 (496.54 it/sec) -training >> step=4685600, episode=782 reward=0.775566 (507.86 it/sec) -training >> step=4685700, episode=782 reward=0.7553211 (529.00 it/sec) -training >> step=4685800, episode=782 reward=0.7890811 (498.67 it/sec) -training >> step=4685900, episode=782 reward=0.7641379 (519.58 it/sec) -training >> step=4686000, episode=782 reward=0.7822084 (524.40 it/sec) -training >> step=4686100, episode=782 reward=0.7918431 (540.44 it/sec) -training >> step=4686200, episode=782 reward=0.776757 (530.29 it/sec) -training >> step=4686300, episode=782 reward=0.7824349 (502.95 it/sec) -training >> step=4686400, episode=782 reward=0.7846662 (549.88 it/sec) -training >> step=4686500, episode=782 reward=0.7710205 (520.06 it/sec) -training >> step=4686600, episode=782 reward=0.7927276 (520.44 it/sec) -training >> step=4686700, episode=782 reward=0.7618049 (465.42 it/sec) -training >> step=4686800, episode=782 reward=0.7755405 (482.81 it/sec) -training >> step=4686900, episode=782 reward=0.7745518 (492.03 it/sec) -training >> step=4687000, episode=782 reward=0.7752558 (489.50 it/sec) -training >> step=4687100, episode=782 reward=0.759949 (478.44 it/sec) -training >> step=4687200, episode=782 reward=0.7642692 (456.67 it/sec) -training >> step=4687300, episode=782 reward=0.7876585 (453.46 it/sec) -training >> step=4687400, episode=782 reward=0.8058412 (471.24 it/sec) -training >> step=4687500, episode=782 reward=0.7729675 (481.00 it/sec) -training >> step=4687600, episode=782 reward=0.7653312 (439.78 it/sec) -training >> step=4687700, episode=782 reward=0.7714098 (428.64 it/sec) -training >> step=4687800, episode=782 reward=0.7709475 (438.75 it/sec) -training >> step=4687900, episode=782 reward=0.7753009 (537.77 it/sec) -training >> step=4688000, episode=782 reward=0.7899655 (512.85 it/sec) -training >> step=4688100, episode=782 reward=0.7782631 (500.73 it/sec) -training >> step=4688200, episode=782 reward=0.772874 (532.62 it/sec) -training >> step=4688300, episode=782 reward=0.7963027 (518.84 it/sec) -training >> step=4688400, episode=782 reward=0.8008237 (526.02 it/sec) -training >> step=4688500, episode=782 reward=0.7813089 (450.20 it/sec) -training >> step=4688600, episode=782 reward=0.7661338 (526.17 it/sec) -training >> step=4688700, episode=782 reward=0.777608 (503.29 it/sec) -training >> step=4688800, episode=782 reward=0.7777323 (525.46 it/sec) -training >> step=4688900, episode=782 reward=0.7857289 (470.81 it/sec) -training >> step=4689000, episode=782 reward=0.7907308 (527.37 it/sec) -training >> step=4689100, episode=782 reward=0.8041456 (511.27 it/sec) -training >> step=4689200, episode=782 reward=0.7735986 (489.94 it/sec) -training >> step=4689300, episode=782 reward=0.7817508 (521.51 it/sec) -training >> step=4689400, episode=782 reward=0.7681121 (524.61 it/sec) -training >> step=4689500, episode=782 reward=0.7578422 (499.09 it/sec) -training >> step=4689600, episode=782 reward=0.7862338 (482.26 it/sec) -training >> step=4689700, episode=782 reward=0.7682704 (498.46 it/sec) -training >> step=4689800, episode=782 reward=0.7703725 (468.30 it/sec) -training >> step=4689900, episode=782 reward=0.7695018 (431.50 it/sec) -training >> step=4690000, episode=782 reward=0.7709027 (462.30 it/sec) -training >> step=4690100, episode=782 reward=0.7684509 (475.02 it/sec) -training >> step=4690200, episode=782 reward=0.7753442 (481.94 it/sec) -training >> step=4690300, episode=782 reward=0.7872044 (464.31 it/sec) -training >> step=4690400, episode=782 reward=0.7968105 (474.27 it/sec) -training >> step=4690500, episode=782 reward=0.7972689 (471.00 it/sec) -training >> step=4690600, episode=782 reward=0.7637265 (513.02 it/sec) -training >> step=4690700, episode=782 reward=0.7585037 (516.84 it/sec) -training >> step=4690800, episode=782 reward=0.7810794 (520.51 it/sec) -training >> step=4690900, episode=782 reward=0.7749434 (499.40 it/sec) -training >> step=4691000, episode=782 reward=0.7723137 (504.72 it/sec) -training >> step=4691100, episode=782 reward=0.7781072 (484.77 it/sec) -training >> step=4691200, episode=782 reward=0.7850496 (508.75 it/sec) -training >> step=4691300, episode=783 reward=0.7656919 (95.76 it/sec) -training >> step=4691400, episode=783 reward=0.7673537 (487.45 it/sec) -training >> step=4691500, episode=783 reward=0.7615651 (497.31 it/sec) -training >> step=4691600, episode=783 reward=0.7643898 (531.24 it/sec) -training >> step=4691700, episode=783 reward=0.8022554 (489.32 it/sec) -training >> step=4691800, episode=783 reward=0.761097 (517.17 it/sec) -training >> step=4691900, episode=783 reward=0.7819281 (482.98 it/sec) -training >> step=4692000, episode=783 reward=0.7680918 (458.70 it/sec) -training >> step=4692100, episode=783 reward=0.7917815 (494.05 it/sec) -training >> step=4692200, episode=783 reward=0.7905365 (490.25 it/sec) -training >> step=4692300, episode=783 reward=0.7617059 (494.88 it/sec) -training >> step=4692400, episode=783 reward=0.7919352 (456.26 it/sec) -training >> step=4692500, episode=783 reward=0.7703435 (490.10 it/sec) -training >> step=4692600, episode=783 reward=0.7899536 (502.48 it/sec) -training >> step=4692700, episode=783 reward=0.7952394 (522.25 it/sec) -training >> step=4692800, episode=783 reward=0.7736997 (480.80 it/sec) -training >> step=4692900, episode=783 reward=0.7983487 (497.01 it/sec) -training >> step=4693000, episode=783 reward=0.7943385 (496.53 it/sec) -training >> step=4693100, episode=783 reward=0.763531 (498.13 it/sec) -training >> step=4693200, episode=783 reward=0.7836939 (510.17 it/sec) -training >> step=4693300, episode=783 reward=0.7659612 (511.47 it/sec) -training >> step=4693400, episode=783 reward=0.7937688 (527.23 it/sec) -training >> step=4693500, episode=783 reward=0.7692372 (459.17 it/sec) -training >> step=4693600, episode=783 reward=0.810524 (491.04 it/sec) -training >> step=4693700, episode=783 reward=0.7686607 (504.30 it/sec) -training >> step=4693800, episode=783 reward=0.7664341 (511.88 it/sec) -training >> step=4693900, episode=783 reward=0.7786343 (493.94 it/sec) -training >> step=4694000, episode=783 reward=0.7643217 (496.99 it/sec) -training >> step=4694100, episode=783 reward=0.7777277 (538.18 it/sec) -training >> step=4694200, episode=783 reward=0.7717136 (479.88 it/sec) -training >> step=4694300, episode=783 reward=0.7861158 (511.07 it/sec) -training >> step=4694400, episode=783 reward=0.7658608 (477.83 it/sec) -training >> step=4694500, episode=783 reward=0.7649982 (496.75 it/sec) -training >> step=4694600, episode=783 reward=0.7821813 (473.54 it/sec) -training >> step=4694700, episode=783 reward=0.7639933 (421.43 it/sec) -training >> step=4694800, episode=783 reward=0.7715725 (525.69 it/sec) -training >> step=4694900, episode=783 reward=0.7799847 (490.97 it/sec) -training >> step=4695000, episode=783 reward=0.7731327 (490.86 it/sec) -training >> step=4695100, episode=783 reward=0.8007854 (488.42 it/sec) -training >> step=4695200, episode=783 reward=0.7735916 (506.00 it/sec) -training >> step=4695300, episode=783 reward=0.7648241 (483.21 it/sec) -training >> step=4695400, episode=783 reward=0.7860976 (509.10 it/sec) -training >> step=4695500, episode=783 reward=0.765641 (505.55 it/sec) -training >> step=4695600, episode=783 reward=0.7886792 (540.48 it/sec) -training >> step=4695700, episode=783 reward=0.7917474 (500.96 it/sec) -training >> step=4695800, episode=783 reward=0.7887106 (517.74 it/sec) -training >> step=4695900, episode=783 reward=0.7944132 (537.10 it/sec) -training >> step=4696000, episode=783 reward=0.7612929 (477.07 it/sec) -training >> step=4696100, episode=783 reward=0.7764442 (522.09 it/sec) -training >> step=4696200, episode=783 reward=0.7822922 (470.86 it/sec) -training >> step=4696300, episode=783 reward=0.7774358 (518.37 it/sec) -training >> step=4696400, episode=783 reward=0.7831354 (508.57 it/sec) -training >> step=4696500, episode=783 reward=0.7771797 (474.06 it/sec) -training >> step=4696600, episode=783 reward=0.7669782 (482.40 it/sec) -training >> step=4696700, episode=783 reward=0.7900998 (515.52 it/sec) -training >> step=4696800, episode=783 reward=0.7870757 (521.99 it/sec) -training >> step=4696900, episode=783 reward=0.7746629 (508.07 it/sec) -training >> step=4697000, episode=783 reward=0.7958415 (496.17 it/sec) -training >> step=4697100, episode=783 reward=0.7619986 (492.65 it/sec) -training >> step=4697200, episode=783 reward=0.7449126 (507.11 it/sec) -training >> step=4697300, episode=784 reward=0.7778283 (106.12 it/sec) -training >> step=4697400, episode=784 reward=0.7617192 (367.32 it/sec) -training >> step=4697500, episode=784 reward=0.7626172 (468.70 it/sec) -training >> step=4697600, episode=784 reward=0.7577702 (502.65 it/sec) -training >> step=4697700, episode=784 reward=0.7839339 (450.63 it/sec) -training >> step=4697800, episode=784 reward=0.7903893 (469.79 it/sec) -training >> step=4697900, episode=784 reward=0.7752458 (491.24 it/sec) -training >> step=4698000, episode=784 reward=0.7855377 (489.28 it/sec) -training >> step=4698100, episode=784 reward=0.7663383 (535.90 it/sec) -training >> step=4698200, episode=784 reward=0.7911273 (458.14 it/sec) -training >> step=4698300, episode=784 reward=0.7920557 (453.99 it/sec) -training >> step=4698400, episode=784 reward=0.7759487 (482.76 it/sec) -training >> step=4698500, episode=784 reward=0.7877595 (466.16 it/sec) -training >> step=4698600, episode=784 reward=0.7701566 (511.37 it/sec) -training >> step=4698700, episode=784 reward=0.7933822 (509.12 it/sec) -training >> step=4698800, episode=784 reward=0.7724836 (499.79 it/sec) -training >> step=4698900, episode=784 reward=0.7888815 (434.12 it/sec) -training >> step=4699000, episode=784 reward=0.7518818 (483.05 it/sec) -training >> step=4699100, episode=784 reward=0.7945119 (437.02 it/sec) -training >> step=4699200, episode=784 reward=0.781468 (448.35 it/sec) -training >> step=4699300, episode=784 reward=0.774775 (462.74 it/sec) -training >> step=4699400, episode=784 reward=0.7906216 (488.68 it/sec) -training >> step=4699500, episode=784 reward=0.792184 (448.66 it/sec) -training >> step=4699600, episode=784 reward=0.7794474 (448.05 it/sec) -training >> step=4699700, episode=784 reward=0.7732347 (485.58 it/sec) -training >> step=4699800, episode=784 reward=0.7652646 (472.06 it/sec) -training >> step=4699900, episode=784 reward=0.7912283 (473.86 it/sec) -training >> step=4700000, episode=784 reward=0.7725505 (457.05 it/sec) -training >> step=4700100, episode=784 reward=0.7863275 (430.54 it/sec) -training >> step=4700200, episode=784 reward=0.7895409 (442.65 it/sec) -training >> step=4700300, episode=784 reward=0.78151 (448.25 it/sec) -training >> step=4700400, episode=784 reward=0.7802728 (483.46 it/sec) -training >> step=4700500, episode=784 reward=0.7730622 (496.63 it/sec) -training >> step=4700600, episode=784 reward=0.7864546 (420.83 it/sec) -training >> step=4700700, episode=784 reward=0.8021478 (426.46 it/sec) -training >> step=4700800, episode=784 reward=0.7889509 (512.70 it/sec) -training >> step=4700900, episode=784 reward=0.779318 (509.29 it/sec) -training >> step=4701000, episode=784 reward=0.768613 (485.72 it/sec) -training >> step=4701100, episode=784 reward=0.7890016 (452.93 it/sec) -training >> step=4701200, episode=784 reward=0.7925972 (414.66 it/sec) -training >> step=4701300, episode=784 reward=0.7939861 (431.19 it/sec) -training >> step=4701400, episode=784 reward=0.7788563 (466.77 it/sec) -training >> step=4701500, episode=784 reward=0.7663911 (395.28 it/sec) -training >> step=4701600, episode=784 reward=0.7752565 (393.94 it/sec) -training >> step=4701700, episode=784 reward=0.7670336 (457.17 it/sec) -training >> step=4701800, episode=784 reward=0.786423 (444.90 it/sec) -training >> step=4701900, episode=784 reward=0.7909263 (494.96 it/sec) -training >> step=4702000, episode=784 reward=0.7767427 (456.98 it/sec) -training >> step=4702100, episode=784 reward=0.7916791 (482.16 it/sec) -training >> step=4702200, episode=784 reward=0.788531 (491.38 it/sec) -training >> step=4702300, episode=784 reward=0.7595467 (488.83 it/sec) -training >> step=4702400, episode=784 reward=0.7865103 (435.45 it/sec) -training >> step=4702500, episode=784 reward=0.7657121 (488.09 it/sec) -training >> step=4702600, episode=784 reward=0.7821037 (462.66 it/sec) -training >> step=4702700, episode=784 reward=0.7718539 (495.40 it/sec) -training >> step=4702800, episode=784 reward=0.7714649 (476.04 it/sec) -training >> step=4702900, episode=784 reward=0.7737238 (450.16 it/sec) -training >> step=4703000, episode=784 reward=0.7702601 (482.66 it/sec) -training >> step=4703100, episode=784 reward=0.7927815 (480.76 it/sec) -training >> step=4703200, episode=784 reward=0.7606087 (420.11 it/sec) -training >> step=4703300, episode=785 reward=0.7718535 (103.29 it/sec) -training >> step=4703400, episode=785 reward=0.7668164 (537.53 it/sec) -training >> step=4703500, episode=785 reward=0.7445214 (528.07 it/sec) -training >> step=4703600, episode=785 reward=0.7730224 (510.17 it/sec) -training >> step=4703700, episode=785 reward=0.7670587 (497.98 it/sec) -training >> step=4703800, episode=785 reward=0.7816138 (510.48 it/sec) -training >> step=4703900, episode=785 reward=0.7717699 (515.03 it/sec) -training >> step=4704000, episode=785 reward=0.7536358 (544.36 it/sec) -training >> step=4704100, episode=785 reward=0.7817047 (538.19 it/sec) -training >> step=4704200, episode=785 reward=0.7932587 (515.10 it/sec) -training >> step=4704300, episode=785 reward=0.7698336 (520.05 it/sec) -training >> step=4704400, episode=785 reward=0.8072278 (486.32 it/sec) -training >> step=4704500, episode=785 reward=0.7755812 (516.65 it/sec) -training >> step=4704600, episode=785 reward=0.7708558 (469.26 it/sec) -training >> step=4704700, episode=785 reward=0.7701715 (488.04 it/sec) -training >> step=4704800, episode=785 reward=0.7859374 (464.79 it/sec) -training >> step=4704900, episode=785 reward=0.8061556 (516.48 it/sec) -training >> step=4705000, episode=785 reward=0.7950764 (476.46 it/sec) -training >> step=4705100, episode=785 reward=0.7788923 (434.78 it/sec) -training >> step=4705200, episode=785 reward=0.7688466 (517.44 it/sec) -training >> step=4705300, episode=785 reward=0.7755397 (489.64 it/sec) -training >> step=4705400, episode=785 reward=0.7651298 (578.92 it/sec) -training >> step=4705500, episode=785 reward=0.7481492 (541.92 it/sec) -training >> step=4705600, episode=785 reward=0.7589766 (485.42 it/sec) -training >> step=4705700, episode=785 reward=0.7993098 (451.64 it/sec) -training >> step=4705800, episode=785 reward=0.7848238 (515.17 it/sec) -training >> step=4705900, episode=785 reward=0.7882816 (436.56 it/sec) -training >> step=4706000, episode=785 reward=0.7795902 (497.16 it/sec) -training >> step=4706100, episode=785 reward=0.7725673 (515.72 it/sec) -training >> step=4706200, episode=785 reward=0.8005353 (503.18 it/sec) -training >> step=4706300, episode=785 reward=0.7705792 (475.21 it/sec) -training >> step=4706400, episode=785 reward=0.778061 (472.30 it/sec) -training >> step=4706500, episode=785 reward=0.7958546 (521.27 it/sec) -training >> step=4706600, episode=785 reward=0.7778057 (407.05 it/sec) -training >> step=4706700, episode=785 reward=0.7565135 (458.87 it/sec) -training >> step=4706800, episode=785 reward=0.7885417 (479.92 it/sec) -training >> step=4706900, episode=785 reward=0.775865 (525.65 it/sec) -training >> step=4707000, episode=785 reward=0.7948433 (476.86 it/sec) -training >> step=4707100, episode=785 reward=0.7899463 (481.47 it/sec) -training >> step=4707200, episode=785 reward=0.7799857 (492.84 it/sec) -training >> step=4707300, episode=785 reward=0.7766476 (538.03 it/sec) -training >> step=4707400, episode=785 reward=0.746892 (459.31 it/sec) -training >> step=4707500, episode=785 reward=0.7987947 (467.55 it/sec) -training >> step=4707600, episode=785 reward=0.7799821 (541.53 it/sec) -training >> step=4707700, episode=785 reward=0.7782621 (515.21 it/sec) -training >> step=4707800, episode=785 reward=0.792465 (478.18 it/sec) -training >> step=4707900, episode=785 reward=0.8022825 (472.55 it/sec) -training >> step=4708000, episode=785 reward=0.7778928 (515.08 it/sec) -training >> step=4708100, episode=785 reward=0.7797993 (439.99 it/sec) -training >> step=4708200, episode=785 reward=0.7885324 (481.64 it/sec) -training >> step=4708300, episode=785 reward=0.7618032 (519.70 it/sec) -training >> step=4708400, episode=785 reward=0.786038 (523.78 it/sec) -training >> step=4708500, episode=785 reward=0.7731624 (508.24 it/sec) -training >> step=4708600, episode=785 reward=0.771934 (496.63 it/sec) -training >> step=4708700, episode=785 reward=0.7654136 (482.56 it/sec) -training >> step=4708800, episode=785 reward=0.7756612 (491.59 it/sec) -training >> step=4708900, episode=785 reward=0.7682579 (497.80 it/sec) -training >> step=4709000, episode=785 reward=0.772891 (499.81 it/sec) -training >> step=4709100, episode=785 reward=0.7856287 (529.51 it/sec) -training >> step=4709200, episode=785 reward=0.7565677 (503.04 it/sec) -training >> step=4709300, episode=786 reward=0.7983467 (112.20 it/sec) -training >> step=4709400, episode=786 reward=0.7923237 (480.94 it/sec) -training >> step=4709500, episode=786 reward=0.7876426 (529.46 it/sec) -training >> step=4709600, episode=786 reward=0.7566016 (500.92 it/sec) -training >> step=4709700, episode=786 reward=0.7789184 (443.33 it/sec) -training >> step=4709800, episode=786 reward=0.7924935 (487.59 it/sec) -training >> step=4709900, episode=786 reward=0.801002 (473.61 it/sec) -training >> step=4710000, episode=786 reward=0.7692365 (519.02 it/sec) -training >> step=4710100, episode=786 reward=0.7626027 (536.00 it/sec) -training >> step=4710200, episode=786 reward=0.7806204 (451.82 it/sec) -training >> step=4710300, episode=786 reward=0.7786642 (418.04 it/sec) -training >> step=4710400, episode=786 reward=0.7860097 (441.29 it/sec) -training >> step=4710500, episode=786 reward=0.7787232 (489.48 it/sec) -training >> step=4710600, episode=786 reward=0.7873382 (431.96 it/sec) -training >> step=4710700, episode=786 reward=0.8101127 (487.74 it/sec) -training >> step=4710800, episode=786 reward=0.7716446 (443.82 it/sec) -training >> step=4710900, episode=786 reward=0.7804223 (517.25 it/sec) -training >> step=4711000, episode=786 reward=0.7952062 (488.36 it/sec) -training >> step=4711100, episode=786 reward=0.7755945 (521.96 it/sec) -training >> step=4711200, episode=786 reward=0.786539 (543.94 it/sec) -training >> step=4711300, episode=786 reward=0.7867103 (491.95 it/sec) -training >> step=4711400, episode=786 reward=0.7899042 (413.63 it/sec) -training >> step=4711500, episode=786 reward=0.773324 (440.46 it/sec) -training >> step=4711600, episode=786 reward=0.7828001 (493.91 it/sec) -training >> step=4711700, episode=786 reward=0.7964887 (303.14 it/sec) -training >> step=4711800, episode=786 reward=0.7872031 (415.82 it/sec) -training >> step=4711900, episode=786 reward=0.7759542 (465.57 it/sec) -training >> step=4712000, episode=786 reward=0.7888559 (484.39 it/sec) -training >> step=4712100, episode=786 reward=0.7659141 (462.35 it/sec) -training >> step=4712200, episode=786 reward=0.7755271 (477.75 it/sec) -training >> step=4712300, episode=786 reward=0.7778956 (377.24 it/sec) -training >> step=4712400, episode=786 reward=0.7627426 (482.15 it/sec) -training >> step=4712500, episode=786 reward=0.7723528 (425.58 it/sec) -training >> step=4712600, episode=786 reward=0.7656994 (377.79 it/sec) -training >> step=4712700, episode=786 reward=0.7789831 (435.83 it/sec) -training >> step=4712800, episode=786 reward=0.769684 (465.07 it/sec) -training >> step=4712900, episode=786 reward=0.7710941 (507.54 it/sec) -training >> step=4713000, episode=786 reward=0.7809841 (452.08 it/sec) -training >> step=4713100, episode=786 reward=0.7938243 (386.75 it/sec) -training >> step=4713200, episode=786 reward=0.7708948 (380.27 it/sec) -training >> step=4713300, episode=786 reward=0.7622757 (404.41 it/sec) -training >> step=4713400, episode=786 reward=0.7957249 (398.02 it/sec) -training >> step=4713500, episode=786 reward=0.7763509 (396.23 it/sec) -training >> step=4713600, episode=786 reward=0.7817559 (436.09 it/sec) -training >> step=4713700, episode=786 reward=0.7769759 (474.02 it/sec) -training >> step=4713800, episode=786 reward=0.7749838 (470.84 it/sec) -training >> step=4713900, episode=786 reward=0.7718621 (469.88 it/sec) -training >> step=4714000, episode=786 reward=0.7672952 (455.21 it/sec) -training >> step=4714100, episode=786 reward=0.7595515 (513.30 it/sec) -training >> step=4714200, episode=786 reward=0.7995162 (459.30 it/sec) -training >> step=4714300, episode=786 reward=0.803946 (456.36 it/sec) -training >> step=4714400, episode=786 reward=0.7842849 (453.34 it/sec) -training >> step=4714500, episode=786 reward=0.7842081 (474.12 it/sec) -training >> step=4714600, episode=786 reward=0.7748699 (379.04 it/sec) -training >> step=4714700, episode=786 reward=0.7593753 (472.55 it/sec) -training >> step=4714800, episode=786 reward=0.7753701 (471.83 it/sec) -training >> step=4714900, episode=786 reward=0.7614955 (521.25 it/sec) -training >> step=4715000, episode=786 reward=0.7724341 (513.43 it/sec) -training >> step=4715100, episode=786 reward=0.7818923 (509.73 it/sec) -training >> step=4715200, episode=786 reward=0.7794061 (544.54 it/sec) -training >> step=4715300, episode=787 reward=0.7780979 (72.76 it/sec) -training >> step=4715400, episode=787 reward=0.7634457 (459.23 it/sec) -training >> step=4715500, episode=787 reward=0.7599326 (448.59 it/sec) -training >> step=4715600, episode=787 reward=0.7727345 (474.66 it/sec) -training >> step=4715700, episode=787 reward=0.7517589 (459.47 it/sec) -training >> step=4715800, episode=787 reward=0.7939655 (462.84 it/sec) -training >> step=4715900, episode=787 reward=0.7949356 (425.20 it/sec) -training >> step=4716000, episode=787 reward=0.7811449 (472.83 it/sec) -training >> step=4716100, episode=787 reward=0.786754 (468.19 it/sec) -training >> step=4716200, episode=787 reward=0.7927397 (508.18 it/sec) -training >> step=4716300, episode=787 reward=0.7607974 (466.42 it/sec) -training >> step=4716400, episode=787 reward=0.7696282 (476.72 it/sec) -training >> step=4716500, episode=787 reward=0.793483 (483.62 it/sec) -training >> step=4716600, episode=787 reward=0.7932478 (464.93 it/sec) -training >> step=4716700, episode=787 reward=0.7743134 (424.35 it/sec) -training >> step=4716800, episode=787 reward=0.7676188 (439.01 it/sec) -training >> step=4716900, episode=787 reward=0.7923954 (478.40 it/sec) -training >> step=4717000, episode=787 reward=0.7737409 (500.09 it/sec) -training >> step=4717100, episode=787 reward=0.7830791 (478.74 it/sec) -training >> step=4717200, episode=787 reward=0.7936102 (501.70 it/sec) -training >> step=4717300, episode=787 reward=0.7505716 (463.27 it/sec) -training >> step=4717400, episode=787 reward=0.784763 (511.60 it/sec) -training >> step=4717500, episode=787 reward=0.7586536 (454.75 it/sec) -training >> step=4717600, episode=787 reward=0.7835853 (535.58 it/sec) -training >> step=4717700, episode=787 reward=0.7877463 (418.64 it/sec) -training >> step=4717800, episode=787 reward=0.7840934 (476.00 it/sec) -training >> step=4717900, episode=787 reward=0.7436336 (500.72 it/sec) -training >> step=4718000, episode=787 reward=0.7696941 (492.71 it/sec) -training >> step=4718100, episode=787 reward=0.7842495 (478.48 it/sec) -training >> step=4718200, episode=787 reward=0.7888143 (469.04 it/sec) -training >> step=4718300, episode=787 reward=0.7653199 (518.85 it/sec) -training >> step=4718400, episode=787 reward=0.7833893 (409.52 it/sec) -training >> step=4718500, episode=787 reward=0.7952164 (473.98 it/sec) -training >> step=4718600, episode=787 reward=0.7761037 (495.33 it/sec) -training >> step=4718700, episode=787 reward=0.7720045 (516.00 it/sec) -training >> step=4718800, episode=787 reward=0.7743859 (464.34 it/sec) -training >> step=4718900, episode=787 reward=0.7845418 (456.25 it/sec) -training >> step=4719000, episode=787 reward=0.7729803 (467.54 it/sec) -training >> step=4719100, episode=787 reward=0.794919 (453.23 it/sec) -training >> step=4719200, episode=787 reward=0.774129 (465.26 it/sec) -training >> step=4719300, episode=787 reward=0.7706382 (497.19 it/sec) -training >> step=4719400, episode=787 reward=0.7864128 (429.78 it/sec) -training >> step=4719500, episode=787 reward=0.7851241 (373.38 it/sec) -training >> step=4719600, episode=787 reward=0.7866001 (439.99 it/sec) -training >> step=4719700, episode=787 reward=0.8026736 (459.74 it/sec) -training >> step=4719800, episode=787 reward=0.7829546 (483.65 it/sec) -training >> step=4719900, episode=787 reward=0.7744913 (482.56 it/sec) -training >> step=4720000, episode=787 reward=0.768492 (515.35 it/sec) -training >> step=4720100, episode=787 reward=0.7996091 (443.54 it/sec) -training >> step=4720200, episode=787 reward=0.7843722 (468.15 it/sec) -training >> step=4720300, episode=787 reward=0.7901561 (473.84 it/sec) -training >> step=4720400, episode=787 reward=0.7960665 (512.01 it/sec) -training >> step=4720500, episode=787 reward=0.783613 (472.34 it/sec) -training >> step=4720600, episode=787 reward=0.7952135 (472.58 it/sec) -training >> step=4720700, episode=787 reward=0.7994516 (462.36 it/sec) -training >> step=4720800, episode=787 reward=0.7594018 (473.54 it/sec) -training >> step=4720900, episode=787 reward=0.765892 (497.66 it/sec) -training >> step=4721000, episode=787 reward=0.7727535 (504.17 it/sec) -training >> step=4721100, episode=787 reward=0.7811316 (522.87 it/sec) -training >> step=4721200, episode=787 reward=0.7842249 (471.76 it/sec) -training >> step=4721300, episode=788 reward=0.7710204 (89.29 it/sec) -training >> step=4721400, episode=788 reward=0.7761948 (489.11 it/sec) -training >> step=4721500, episode=788 reward=0.7681565 (512.22 it/sec) -training >> step=4721600, episode=788 reward=0.7814776 (506.13 it/sec) -training >> step=4721700, episode=788 reward=0.7719193 (495.76 it/sec) -training >> step=4721800, episode=788 reward=0.7791555 (522.08 it/sec) -training >> step=4721900, episode=788 reward=0.799897 (482.37 it/sec) -training >> step=4722000, episode=788 reward=0.7764626 (520.67 it/sec) -training >> step=4722100, episode=788 reward=0.7738918 (544.20 it/sec) -training >> step=4722200, episode=788 reward=0.7777928 (483.90 it/sec) -training >> step=4722300, episode=788 reward=0.7757787 (505.75 it/sec) -training >> step=4722400, episode=788 reward=0.7708688 (523.82 it/sec) -training >> step=4722500, episode=788 reward=0.7805063 (493.25 it/sec) -training >> step=4722600, episode=788 reward=0.7751092 (509.95 it/sec) -training >> step=4722700, episode=788 reward=0.7772728 (515.68 it/sec) -training >> step=4722800, episode=788 reward=0.7742854 (505.47 it/sec) -training >> step=4722900, episode=788 reward=0.7712272 (494.97 it/sec) -training >> step=4723000, episode=788 reward=0.7690941 (492.62 it/sec) -training >> step=4723100, episode=788 reward=0.7763041 (531.04 it/sec) -training >> step=4723200, episode=788 reward=0.7851448 (479.98 it/sec) -training >> step=4723300, episode=788 reward=0.7849159 (465.28 it/sec) -training >> step=4723400, episode=788 reward=0.7877522 (495.66 it/sec) -training >> step=4723500, episode=788 reward=0.7810789 (532.34 it/sec) -training >> step=4723600, episode=788 reward=0.7880074 (479.32 it/sec) -training >> step=4723700, episode=788 reward=0.7723698 (512.65 it/sec) -training >> step=4723800, episode=788 reward=0.7878389 (494.51 it/sec) -training >> step=4723900, episode=788 reward=0.8028046 (532.44 it/sec) -training >> step=4724000, episode=788 reward=0.7900864 (510.52 it/sec) -training >> step=4724100, episode=788 reward=0.7807353 (480.14 it/sec) -training >> step=4724200, episode=788 reward=0.7834183 (509.35 it/sec) -training >> step=4724300, episode=788 reward=0.7725043 (493.34 it/sec) -training >> step=4724400, episode=788 reward=0.8021058 (525.98 it/sec) -training >> step=4724500, episode=788 reward=0.7834563 (467.04 it/sec) -training >> step=4724600, episode=788 reward=0.772567 (485.20 it/sec) -training >> step=4724700, episode=788 reward=0.7919366 (483.80 it/sec) -training >> step=4724800, episode=788 reward=0.7761551 (490.63 it/sec) -training >> step=4724900, episode=788 reward=0.7794626 (529.75 it/sec) -training >> step=4725000, episode=788 reward=0.7691885 (516.52 it/sec) -training >> step=4725100, episode=788 reward=0.7662522 (524.86 it/sec) -training >> step=4725200, episode=788 reward=0.7844129 (528.81 it/sec) -training >> step=4725300, episode=788 reward=0.7886407 (527.00 it/sec) -training >> step=4725400, episode=788 reward=0.7887082 (471.07 it/sec) -training >> step=4725500, episode=788 reward=0.7839096 (510.01 it/sec) -training >> step=4725600, episode=788 reward=0.7608169 (485.26 it/sec) -training >> step=4725700, episode=788 reward=0.7860704 (546.49 it/sec) -training >> step=4725800, episode=788 reward=0.7932632 (453.10 it/sec) -training >> step=4725900, episode=788 reward=0.7916084 (511.93 it/sec) -training >> step=4726000, episode=788 reward=0.7800962 (529.30 it/sec) -training >> step=4726100, episode=788 reward=0.771791 (495.10 it/sec) -training >> step=4726200, episode=788 reward=0.7635161 (495.57 it/sec) -training >> step=4726300, episode=788 reward=0.7729349 (525.83 it/sec) -training >> step=4726400, episode=788 reward=0.768254 (539.09 it/sec) -training >> step=4726500, episode=788 reward=0.775122 (520.81 it/sec) -training >> step=4726600, episode=788 reward=0.768234 (531.79 it/sec) -training >> step=4726700, episode=788 reward=0.7664416 (498.59 it/sec) -training >> step=4726800, episode=788 reward=0.7691785 (517.69 it/sec) -training >> step=4726900, episode=788 reward=0.8039685 (491.24 it/sec) -training >> step=4727000, episode=788 reward=0.7775519 (480.44 it/sec) -training >> step=4727100, episode=788 reward=0.7759393 (543.28 it/sec) -training >> step=4727200, episode=788 reward=0.7782292 (517.69 it/sec) -training >> step=4727300, episode=789 reward=0.7630455 (114.41 it/sec) -training >> step=4727400, episode=789 reward=0.772032 (483.50 it/sec) -training >> step=4727500, episode=789 reward=0.7360208 (506.53 it/sec) -training >> step=4727600, episode=789 reward=0.7476112 (394.41 it/sec) -training >> step=4727700, episode=789 reward=0.773393 (496.95 it/sec) -training >> step=4727800, episode=789 reward=0.7871637 (487.78 it/sec) -training >> step=4727900, episode=789 reward=0.7721618 (466.56 it/sec) -training >> step=4728000, episode=789 reward=0.7792922 (500.83 it/sec) -training >> step=4728100, episode=789 reward=0.7707628 (505.46 it/sec) -training >> step=4728200, episode=789 reward=0.7875742 (529.61 it/sec) -training >> step=4728300, episode=789 reward=0.7871972 (533.66 it/sec) -training >> step=4728400, episode=789 reward=0.7845846 (496.71 it/sec) -training >> step=4728500, episode=789 reward=0.7759516 (536.51 it/sec) -training >> step=4728600, episode=789 reward=0.7583953 (518.80 it/sec) -training >> step=4728700, episode=789 reward=0.766984 (406.50 it/sec) -training >> step=4728800, episode=789 reward=0.7608863 (403.85 it/sec) -training >> step=4728900, episode=789 reward=0.7720156 (480.80 it/sec) -training >> step=4729000, episode=789 reward=0.7911951 (514.56 it/sec) -training >> step=4729100, episode=789 reward=0.7679046 (487.26 it/sec) -training >> step=4729200, episode=789 reward=0.7622548 (497.26 it/sec) -training >> step=4729300, episode=789 reward=0.7805023 (492.93 it/sec) -training >> step=4729400, episode=789 reward=0.7916356 (467.74 it/sec) -training >> step=4729500, episode=789 reward=0.7808989 (506.44 it/sec) -training >> step=4729600, episode=789 reward=0.7789723 (486.44 it/sec) -training >> step=4729700, episode=789 reward=0.7897045 (476.82 it/sec) -training >> step=4729800, episode=789 reward=0.7911524 (497.85 it/sec) -training >> step=4729900, episode=789 reward=0.7866584 (459.01 it/sec) -training >> step=4730000, episode=789 reward=0.7837705 (512.12 it/sec) -training >> step=4730100, episode=789 reward=0.7813127 (506.27 it/sec) -training >> step=4730200, episode=789 reward=0.7933167 (508.46 it/sec) -training >> step=4730300, episode=789 reward=0.8070925 (522.07 it/sec) -training >> step=4730400, episode=789 reward=0.7669784 (506.59 it/sec) -training >> step=4730500, episode=789 reward=0.7711339 (505.91 it/sec) -training >> step=4730600, episode=789 reward=0.7680277 (491.69 it/sec) -training >> step=4730700, episode=789 reward=0.7805948 (536.17 it/sec) -training >> step=4730800, episode=789 reward=0.7785592 (527.40 it/sec) -training >> step=4730900, episode=789 reward=0.7968295 (503.20 it/sec) -training >> step=4731000, episode=789 reward=0.7766247 (475.87 it/sec) -training >> step=4731100, episode=789 reward=0.8005633 (500.12 it/sec) -training >> step=4731200, episode=789 reward=0.77419 (508.33 it/sec) -training >> step=4731300, episode=789 reward=0.787906 (540.44 it/sec) -training >> step=4731400, episode=789 reward=0.7806727 (497.93 it/sec) -training >> step=4731500, episode=789 reward=0.7736253 (495.57 it/sec) -training >> step=4731600, episode=789 reward=0.7712705 (489.80 it/sec) -training >> step=4731700, episode=789 reward=0.8031955 (505.13 it/sec) -training >> step=4731800, episode=789 reward=0.7899768 (537.41 it/sec) -training >> step=4731900, episode=789 reward=0.7695453 (512.03 it/sec) -training >> step=4732000, episode=789 reward=0.7710057 (520.45 it/sec) -training >> step=4732100, episode=789 reward=0.7901393 (547.84 it/sec) -training >> step=4732200, episode=789 reward=0.7874684 (476.28 it/sec) -training >> step=4732300, episode=789 reward=0.781325 (501.79 it/sec) -training >> step=4732400, episode=789 reward=0.7711696 (496.38 it/sec) -training >> step=4732500, episode=789 reward=0.7845529 (467.40 it/sec) -training >> step=4732600, episode=789 reward=0.7810125 (493.97 it/sec) -training >> step=4732700, episode=789 reward=0.7728052 (459.62 it/sec) -training >> step=4732800, episode=789 reward=0.7915455 (492.72 it/sec) -training >> step=4732900, episode=789 reward=0.7766191 (453.01 it/sec) -training >> step=4733000, episode=789 reward=0.7698735 (459.30 it/sec) -training >> step=4733100, episode=789 reward=0.777585 (481.82 it/sec) -training >> step=4733200, episode=789 reward=0.7911258 (499.31 it/sec) -training >> step=4733300, episode=790 reward=0.7811903 (102.87 it/sec) -training >> step=4733400, episode=790 reward=0.7955649 (491.72 it/sec) -training >> step=4733500, episode=790 reward=0.7582049 (447.42 it/sec) -training >> step=4733600, episode=790 reward=0.7614339 (402.34 it/sec) -training >> step=4733700, episode=790 reward=0.7829528 (477.98 it/sec) -training >> step=4733800, episode=790 reward=0.7749228 (497.03 it/sec) -training >> step=4733900, episode=790 reward=0.7873592 (551.49 it/sec) -training >> step=4734000, episode=790 reward=0.7681209 (484.07 it/sec) -training >> step=4734100, episode=790 reward=0.7644994 (490.19 it/sec) -training >> step=4734200, episode=790 reward=0.77215 (503.58 it/sec) -training >> step=4734300, episode=790 reward=0.8017247 (515.28 it/sec) -training >> step=4734400, episode=790 reward=0.7737523 (527.44 it/sec) -training >> step=4734500, episode=790 reward=0.777036 (451.17 it/sec) -training >> step=4734600, episode=790 reward=0.7662893 (503.43 it/sec) -training >> step=4734700, episode=790 reward=0.781476 (564.50 it/sec) -training >> step=4734800, episode=790 reward=0.741773 (558.20 it/sec) -training >> step=4734900, episode=790 reward=0.7644958 (535.18 it/sec) -training >> step=4735000, episode=790 reward=0.7847149 (528.91 it/sec) -training >> step=4735100, episode=790 reward=0.7815109 (510.22 it/sec) -training >> step=4735200, episode=790 reward=0.7762839 (527.95 it/sec) -training >> step=4735300, episode=790 reward=0.7725424 (503.72 it/sec) -training >> step=4735400, episode=790 reward=0.7856175 (492.83 it/sec) -training >> step=4735500, episode=790 reward=0.765135 (531.29 it/sec) -training >> step=4735600, episode=790 reward=0.7654951 (483.18 it/sec) -training >> step=4735700, episode=790 reward=0.7906942 (572.27 it/sec) -training >> step=4735800, episode=790 reward=0.7727681 (554.34 it/sec) -training >> step=4735900, episode=790 reward=0.7549953 (548.01 it/sec) -training >> step=4736000, episode=790 reward=0.7826328 (541.79 it/sec) -training >> step=4736100, episode=790 reward=0.7605127 (574.51 it/sec) -training >> step=4736200, episode=790 reward=0.7730527 (502.75 it/sec) -training >> step=4736300, episode=790 reward=0.7739921 (528.96 it/sec) -training >> step=4736400, episode=790 reward=0.7757069 (566.97 it/sec) -training >> step=4736500, episode=790 reward=0.7749931 (513.39 it/sec) -training >> step=4736600, episode=790 reward=0.782172 (483.74 it/sec) -training >> step=4736700, episode=790 reward=0.778152 (529.66 it/sec) -training >> step=4736800, episode=790 reward=0.7970358 (536.84 it/sec) -training >> step=4736900, episode=790 reward=0.774654 (538.17 it/sec) -training >> step=4737000, episode=790 reward=0.7886934 (534.45 it/sec) -training >> step=4737100, episode=790 reward=0.766969 (540.60 it/sec) -training >> step=4737200, episode=790 reward=0.7666905 (530.96 it/sec) -training >> step=4737300, episode=790 reward=0.7834177 (514.65 it/sec) -training >> step=4737400, episode=790 reward=0.7694315 (539.76 it/sec) -training >> step=4737500, episode=790 reward=0.7832471 (522.92 it/sec) -training >> step=4737600, episode=790 reward=0.7743318 (534.32 it/sec) -training >> step=4737700, episode=790 reward=0.7894524 (556.24 it/sec) -training >> step=4737800, episode=790 reward=0.7658319 (504.22 it/sec) -training >> step=4737900, episode=790 reward=0.7856455 (572.34 it/sec) -training >> step=4738000, episode=790 reward=0.7932169 (542.76 it/sec) -training >> step=4738100, episode=790 reward=0.776615 (515.58 it/sec) -training >> step=4738200, episode=790 reward=0.7937544 (555.33 it/sec) -training >> step=4738300, episode=790 reward=0.7849439 (520.19 it/sec) -training >> step=4738400, episode=790 reward=0.7786202 (505.81 it/sec) -training >> step=4738500, episode=790 reward=0.7760327 (526.38 it/sec) -training >> step=4738600, episode=790 reward=0.7757057 (555.18 it/sec) -training >> step=4738700, episode=790 reward=0.7885913 (478.50 it/sec) -training >> step=4738800, episode=790 reward=0.7838072 (531.25 it/sec) -training >> step=4738900, episode=790 reward=0.7694497 (490.30 it/sec) -training >> step=4739000, episode=790 reward=0.7785113 (554.98 it/sec) -training >> step=4739100, episode=790 reward=0.7646703 (520.48 it/sec) -training >> step=4739200, episode=790 reward=0.7781733 (504.18 it/sec) -training >> step=4739300, episode=791 reward=0.7723342 (126.72 it/sec) -training >> step=4739400, episode=791 reward=0.7576268 (539.53 it/sec) -training >> step=4739500, episode=791 reward=0.777104 (500.40 it/sec) -training >> step=4739600, episode=791 reward=0.7785699 (464.55 it/sec) -training >> step=4739700, episode=791 reward=0.772974 (535.16 it/sec) -training >> step=4739800, episode=791 reward=0.7632147 (551.33 it/sec) -training >> step=4739900, episode=791 reward=0.7837315 (505.28 it/sec) -training >> step=4740000, episode=791 reward=0.770833 (567.33 it/sec) -training >> step=4740100, episode=791 reward=0.7884887 (489.11 it/sec) -training >> step=4740200, episode=791 reward=0.7716779 (529.97 it/sec) -training >> step=4740300, episode=791 reward=0.7737083 (493.82 it/sec) -training >> step=4740400, episode=791 reward=0.8008214 (528.12 it/sec) -training >> step=4740500, episode=791 reward=0.7847258 (467.00 it/sec) -training >> step=4740600, episode=791 reward=0.7766534 (533.61 it/sec) -training >> step=4740700, episode=791 reward=0.7643698 (421.10 it/sec) -training >> step=4740800, episode=791 reward=0.7875803 (534.82 it/sec) -training >> step=4740900, episode=791 reward=0.7600986 (522.82 it/sec) -training >> step=4741000, episode=791 reward=0.7771283 (500.17 it/sec) -training >> step=4741100, episode=791 reward=0.7761506 (537.86 it/sec) -training >> step=4741200, episode=791 reward=0.790669 (509.83 it/sec) -training >> step=4741300, episode=791 reward=0.7780491 (508.25 it/sec) -training >> step=4741400, episode=791 reward=0.7641622 (569.27 it/sec) -training >> step=4741500, episode=791 reward=0.7764611 (519.21 it/sec) -training >> step=4741600, episode=791 reward=0.7944787 (495.22 it/sec) -training >> step=4741700, episode=791 reward=0.7954184 (528.08 it/sec) -training >> step=4741800, episode=791 reward=0.7728711 (498.06 it/sec) -training >> step=4741900, episode=791 reward=0.7865365 (578.48 it/sec) -training >> step=4742000, episode=791 reward=0.8041817 (551.51 it/sec) -training >> step=4742100, episode=791 reward=0.8009251 (532.05 it/sec) -training >> step=4742200, episode=791 reward=0.7654343 (484.27 it/sec) -training >> step=4742300, episode=791 reward=0.7835317 (503.75 it/sec) -training >> step=4742400, episode=791 reward=0.7794483 (506.31 it/sec) -training >> step=4742500, episode=791 reward=0.7714128 (539.21 it/sec) -training >> step=4742600, episode=791 reward=0.7783701 (559.60 it/sec) -training >> step=4742700, episode=791 reward=0.7850238 (560.41 it/sec) -training >> step=4742800, episode=791 reward=0.7894382 (499.83 it/sec) -training >> step=4742900, episode=791 reward=0.7688559 (473.98 it/sec) -training >> step=4743000, episode=791 reward=0.7957814 (560.29 it/sec) -training >> step=4743100, episode=791 reward=0.7601137 (532.48 it/sec) -training >> step=4743200, episode=791 reward=0.7834229 (532.15 it/sec) -training >> step=4743300, episode=791 reward=0.793758 (571.23 it/sec) -training >> step=4743400, episode=791 reward=0.7852511 (498.34 it/sec) -training >> step=4743500, episode=791 reward=0.7688789 (530.76 it/sec) -training >> step=4743600, episode=791 reward=0.7775138 (530.78 it/sec) -training >> step=4743700, episode=791 reward=0.7710685 (554.00 it/sec) -training >> step=4743800, episode=791 reward=0.7879888 (516.29 it/sec) -training >> step=4743900, episode=791 reward=0.7766252 (468.08 it/sec) -training >> step=4744000, episode=791 reward=0.7758245 (565.39 it/sec) -training >> step=4744100, episode=791 reward=0.7822204 (535.81 it/sec) -training >> step=4744200, episode=791 reward=0.7693597 (524.70 it/sec) -training >> step=4744300, episode=791 reward=0.7782077 (461.39 it/sec) -training >> step=4744400, episode=791 reward=0.7767074 (506.63 it/sec) -training >> step=4744500, episode=791 reward=0.7886673 (491.62 it/sec) -training >> step=4744600, episode=791 reward=0.7732252 (469.13 it/sec) -training >> step=4744700, episode=791 reward=0.7791557 (531.08 it/sec) -training >> step=4744800, episode=791 reward=0.7722502 (499.81 it/sec) -training >> step=4744900, episode=791 reward=0.7661998 (537.50 it/sec) -training >> step=4745000, episode=791 reward=0.7708818 (512.82 it/sec) -training >> step=4745100, episode=791 reward=0.7767611 (512.69 it/sec) -training >> step=4745200, episode=791 reward=0.7802916 (437.10 it/sec) -training >> step=4745300, episode=792 reward=0.7929745 (102.27 it/sec) -training >> step=4745400, episode=792 reward=0.7565541 (499.43 it/sec) -training >> step=4745500, episode=792 reward=0.7741004 (494.04 it/sec) -training >> step=4745600, episode=792 reward=0.7863755 (467.72 it/sec) -training >> step=4745700, episode=792 reward=0.7855486 (482.96 it/sec) -training >> step=4745800, episode=792 reward=0.7657515 (477.23 it/sec) -training >> step=4745900, episode=792 reward=0.7636283 (485.96 it/sec) -training >> step=4746000, episode=792 reward=0.7917961 (481.22 it/sec) -training >> step=4746100, episode=792 reward=0.760771 (456.39 it/sec) -training >> step=4746200, episode=792 reward=0.7587872 (462.74 it/sec) -training >> step=4746300, episode=792 reward=0.775448 (443.00 it/sec) -training >> step=4746400, episode=792 reward=0.7629371 (504.32 it/sec) -training >> step=4746500, episode=792 reward=0.7850521 (480.61 it/sec) -training >> step=4746600, episode=792 reward=0.7633768 (482.85 it/sec) -training >> step=4746700, episode=792 reward=0.790262 (496.77 it/sec) -training >> step=4746800, episode=792 reward=0.7723912 (501.07 it/sec) -training >> step=4746900, episode=792 reward=0.7711656 (463.01 it/sec) -training >> step=4747000, episode=792 reward=0.7738799 (527.24 it/sec) -training >> step=4747100, episode=792 reward=0.7671979 (483.84 it/sec) -training >> step=4747200, episode=792 reward=0.79322 (521.73 it/sec) -training >> step=4747300, episode=792 reward=0.7986923 (505.12 it/sec) -training >> step=4747400, episode=792 reward=0.7725201 (457.23 it/sec) -training >> step=4747500, episode=792 reward=0.7829891 (527.12 it/sec) -training >> step=4747600, episode=792 reward=0.7806826 (488.23 it/sec) -training >> step=4747700, episode=792 reward=0.766783 (508.12 it/sec) -training >> step=4747800, episode=792 reward=0.7693709 (455.08 it/sec) -training >> step=4747900, episode=792 reward=0.7980406 (506.86 it/sec) -training >> step=4748000, episode=792 reward=0.7893038 (452.37 it/sec) -training >> step=4748100, episode=792 reward=0.8114508 (477.26 it/sec) -training >> step=4748200, episode=792 reward=0.7885381 (524.19 it/sec) -training >> step=4748300, episode=792 reward=0.7775083 (453.80 it/sec) -training >> step=4748400, episode=792 reward=0.7694438 (501.23 it/sec) -training >> step=4748500, episode=792 reward=0.788997 (483.09 it/sec) -training >> step=4748600, episode=792 reward=0.7860587 (508.45 it/sec) -training >> step=4748700, episode=792 reward=0.7944989 (472.27 it/sec) -training >> step=4748800, episode=792 reward=0.7711186 (506.86 it/sec) -training >> step=4748900, episode=792 reward=0.7817337 (484.03 it/sec) -training >> step=4749000, episode=792 reward=0.794027 (521.47 it/sec) -training >> step=4749100, episode=792 reward=0.7808395 (466.38 it/sec) -training >> step=4749200, episode=792 reward=0.7746925 (493.90 it/sec) -training >> step=4749300, episode=792 reward=0.7823235 (495.00 it/sec) -training >> step=4749400, episode=792 reward=0.7916399 (487.42 it/sec) -training >> step=4749500, episode=792 reward=0.7911638 (520.61 it/sec) -training >> step=4749600, episode=792 reward=0.7660546 (480.65 it/sec) -training >> step=4749700, episode=792 reward=0.7720693 (517.53 it/sec) -training >> step=4749800, episode=792 reward=0.7785764 (496.14 it/sec) -training >> step=4749900, episode=792 reward=0.7661625 (451.03 it/sec) -training >> step=4750000, episode=792 reward=0.7983813 (484.88 it/sec) -training >> step=4750100, episode=792 reward=0.7805585 (478.23 it/sec) -training >> step=4750200, episode=792 reward=0.7639364 (495.35 it/sec) -training >> step=4750300, episode=792 reward=0.7605247 (485.32 it/sec) -training >> step=4750400, episode=792 reward=0.7889076 (518.38 it/sec) -training >> step=4750500, episode=792 reward=0.7811347 (479.58 it/sec) -training >> step=4750600, episode=792 reward=0.7729746 (452.67 it/sec) -training >> step=4750700, episode=792 reward=0.790949 (458.36 it/sec) -training >> step=4750800, episode=792 reward=0.7824777 (529.52 it/sec) -training >> step=4750900, episode=792 reward=0.7802998 (476.32 it/sec) -training >> step=4751000, episode=792 reward=0.7965409 (439.46 it/sec) -training >> step=4751100, episode=792 reward=0.7826816 (482.20 it/sec) -training >> step=4751200, episode=792 reward=0.7742962 (482.77 it/sec) -training >> step=4751300, episode=793 reward=0.758177 (66.56 it/sec) -training >> step=4751400, episode=793 reward=0.7836883 (528.52 it/sec) -training >> step=4751500, episode=793 reward=0.7627821 (472.54 it/sec) -training >> step=4751600, episode=793 reward=0.7816613 (494.98 it/sec) -training >> step=4751700, episode=793 reward=0.7619845 (450.78 it/sec) -training >> step=4751800, episode=793 reward=0.7609042 (485.07 it/sec) -training >> step=4751900, episode=793 reward=0.7720286 (476.55 it/sec) -training >> step=4752000, episode=793 reward=0.7624745 (481.00 it/sec) -training >> step=4752100, episode=793 reward=0.7928441 (482.70 it/sec) -training >> step=4752200, episode=793 reward=0.7845231 (496.31 it/sec) -training >> step=4752300, episode=793 reward=0.7684531 (473.31 it/sec) -training >> step=4752400, episode=793 reward=0.7724816 (438.75 it/sec) -training >> step=4752500, episode=793 reward=0.7734827 (499.42 it/sec) -training >> step=4752600, episode=793 reward=0.7586912 (533.75 it/sec) -training >> step=4752700, episode=793 reward=0.7735583 (465.50 it/sec) -training >> step=4752800, episode=793 reward=0.7826257 (456.56 it/sec) -training >> step=4752900, episode=793 reward=0.7814038 (480.73 it/sec) -training >> step=4753000, episode=793 reward=0.7773663 (474.86 it/sec) -training >> step=4753100, episode=793 reward=0.7777519 (521.03 it/sec) -training >> step=4753200, episode=793 reward=0.765615 (483.92 it/sec) -training >> step=4753300, episode=793 reward=0.8064456 (482.00 it/sec) -training >> step=4753400, episode=793 reward=0.7804012 (470.96 it/sec) -training >> step=4753500, episode=793 reward=0.7882797 (420.72 it/sec) -training >> step=4753600, episode=793 reward=0.8012388 (495.31 it/sec) -training >> step=4753700, episode=793 reward=0.7704501 (463.01 it/sec) -training >> step=4753800, episode=793 reward=0.7802607 (460.39 it/sec) -training >> step=4753900, episode=793 reward=0.7710842 (452.79 it/sec) -training >> step=4754000, episode=793 reward=0.7768269 (474.55 it/sec) -training >> step=4754100, episode=793 reward=0.7975069 (471.96 it/sec) -training >> step=4754200, episode=793 reward=0.7692809 (473.89 it/sec) -training >> step=4754300, episode=793 reward=0.8080745 (501.97 it/sec) -training >> step=4754400, episode=793 reward=0.7830529 (505.60 it/sec) -training >> step=4754500, episode=793 reward=0.77585 (462.25 it/sec) -training >> step=4754600, episode=793 reward=0.7822044 (517.45 it/sec) -training >> step=4754700, episode=793 reward=0.7942193 (489.33 it/sec) -training >> step=4754800, episode=793 reward=0.7704643 (473.64 it/sec) -training >> step=4754900, episode=793 reward=0.760153 (495.66 it/sec) -training >> step=4755000, episode=793 reward=0.7764724 (464.92 it/sec) -training >> step=4755100, episode=793 reward=0.7794021 (513.51 it/sec) -training >> step=4755200, episode=793 reward=0.7832549 (461.71 it/sec) -training >> step=4755300, episode=793 reward=0.7708941 (507.47 it/sec) -training >> step=4755400, episode=793 reward=0.779158 (536.29 it/sec) -training >> step=4755500, episode=793 reward=0.7776068 (491.91 it/sec) -training >> step=4755600, episode=793 reward=0.7910233 (470.20 it/sec) -training >> step=4755700, episode=793 reward=0.7789847 (476.94 it/sec) -training >> step=4755800, episode=793 reward=0.7719409 (510.67 it/sec) -training >> step=4755900, episode=793 reward=0.7861958 (475.99 it/sec) -training >> step=4756000, episode=793 reward=0.7893639 (487.47 it/sec) -training >> step=4756100, episode=793 reward=0.7613347 (481.95 it/sec) -training >> step=4756200, episode=793 reward=0.7698445 (430.11 it/sec) -training >> step=4756300, episode=793 reward=0.7792025 (493.24 it/sec) -training >> step=4756400, episode=793 reward=0.775154 (497.56 it/sec) -training >> step=4756500, episode=793 reward=0.7753146 (490.69 it/sec) -training >> step=4756600, episode=793 reward=0.7551538 (505.44 it/sec) -training >> step=4756700, episode=793 reward=0.7823225 (492.34 it/sec) -training >> step=4756800, episode=793 reward=0.7901038 (478.77 it/sec) -training >> step=4756900, episode=793 reward=0.7784566 (469.51 it/sec) -training >> step=4757000, episode=793 reward=0.7757829 (472.02 it/sec) -training >> step=4757100, episode=793 reward=0.7754626 (507.10 it/sec) -training >> step=4757200, episode=793 reward=0.7909718 (471.70 it/sec) -training >> step=4757300, episode=794 reward=0.7655646 (53.55 it/sec) -training >> step=4757400, episode=794 reward=0.752459 (487.84 it/sec) -training >> step=4757500, episode=794 reward=0.7573848 (516.26 it/sec) -training >> step=4757600, episode=794 reward=0.7592632 (495.50 it/sec) -training >> step=4757700, episode=794 reward=0.7892129 (484.48 it/sec) -training >> step=4757800, episode=794 reward=0.7932745 (471.67 it/sec) -training >> step=4757900, episode=794 reward=0.7983829 (472.78 it/sec) -training >> step=4758000, episode=794 reward=0.7812533 (492.98 it/sec) -training >> step=4758100, episode=794 reward=0.7823467 (495.51 it/sec) -training >> step=4758200, episode=794 reward=0.7662688 (491.31 it/sec) -training >> step=4758300, episode=794 reward=0.7652872 (522.14 it/sec) -training >> step=4758400, episode=794 reward=0.778358 (493.20 it/sec) -training >> step=4758500, episode=794 reward=0.782656 (479.99 it/sec) -training >> step=4758600, episode=794 reward=0.7675769 (479.06 it/sec) -training >> step=4758700, episode=794 reward=0.7706131 (490.64 it/sec) -training >> step=4758800, episode=794 reward=0.7838099 (499.02 it/sec) -training >> step=4758900, episode=794 reward=0.7562418 (479.43 it/sec) -training >> step=4759000, episode=794 reward=0.7627412 (426.64 it/sec) -training >> step=4759100, episode=794 reward=0.7688846 (478.13 it/sec) -training >> step=4759200, episode=794 reward=0.7610464 (438.50 it/sec) -training >> step=4759300, episode=794 reward=0.7529027 (474.84 it/sec) -training >> step=4759400, episode=794 reward=0.8012865 (484.76 it/sec) -training >> step=4759500, episode=794 reward=0.7774069 (483.32 it/sec) -training >> step=4759600, episode=794 reward=0.7740162 (444.55 it/sec) -training >> step=4759700, episode=794 reward=0.7832846 (503.45 it/sec) -training >> step=4759800, episode=794 reward=0.7650113 (472.93 it/sec) -training >> step=4759900, episode=794 reward=0.7954994 (484.02 it/sec) -training >> step=4760000, episode=794 reward=0.7895468 (463.88 it/sec) -training >> step=4760100, episode=794 reward=0.7887985 (459.98 it/sec) -training >> step=4760200, episode=794 reward=0.7883582 (518.54 it/sec) -training >> step=4760300, episode=794 reward=0.7413249 (500.66 it/sec) -training >> step=4760400, episode=794 reward=0.7890526 (466.52 it/sec) -training >> step=4760500, episode=794 reward=0.7836301 (489.70 it/sec) -training >> step=4760600, episode=794 reward=0.7698257 (508.29 it/sec) -training >> step=4760700, episode=794 reward=0.8129444 (453.43 it/sec) -training >> step=4760800, episode=794 reward=0.777405 (494.05 it/sec) -training >> step=4760900, episode=794 reward=0.7833694 (441.56 it/sec) -training >> step=4761000, episode=794 reward=0.7918975 (440.06 it/sec) -training >> step=4761100, episode=794 reward=0.7861941 (421.31 it/sec) -training >> step=4761200, episode=794 reward=0.8064491 (517.11 it/sec) -training >> step=4761300, episode=794 reward=0.7970258 (454.15 it/sec) -training >> step=4761400, episode=794 reward=0.7860374 (491.46 it/sec) -training >> step=4761500, episode=794 reward=0.7707896 (490.62 it/sec) -training >> step=4761600, episode=794 reward=0.7540486 (467.89 it/sec) -training >> step=4761700, episode=794 reward=0.7932522 (496.25 it/sec) -training >> step=4761800, episode=794 reward=0.7533053 (478.68 it/sec) -training >> step=4761900, episode=794 reward=0.7858092 (510.88 it/sec) -training >> step=4762000, episode=794 reward=0.7787784 (509.73 it/sec) -training >> step=4762100, episode=794 reward=0.7851182 (467.80 it/sec) -training >> step=4762200, episode=794 reward=0.7864344 (432.76 it/sec) -training >> step=4762300, episode=794 reward=0.7838442 (453.43 it/sec) -training >> step=4762400, episode=794 reward=0.8128854 (467.32 it/sec) -training >> step=4762500, episode=794 reward=0.7890202 (487.19 it/sec) -training >> step=4762600, episode=794 reward=0.7984163 (480.44 it/sec) -training >> step=4762700, episode=794 reward=0.7651998 (504.95 it/sec) -training >> step=4762800, episode=794 reward=0.7797647 (461.47 it/sec) -training >> step=4762900, episode=794 reward=0.7750666 (478.47 it/sec) -training >> step=4763000, episode=794 reward=0.7797614 (466.22 it/sec) -training >> step=4763100, episode=794 reward=0.7812043 (469.13 it/sec) -training >> step=4763200, episode=794 reward=0.7790508 (490.35 it/sec) -training >> step=4763300, episode=795 reward=0.7679169 (54.07 it/sec) -training >> step=4763400, episode=795 reward=0.7747474 (483.13 it/sec) -training >> step=4763500, episode=795 reward=0.7755986 (477.77 it/sec) -training >> step=4763600, episode=795 reward=0.7555663 (474.70 it/sec) -training >> step=4763700, episode=795 reward=0.7783709 (404.95 it/sec) -training >> step=4763800, episode=795 reward=0.7673224 (412.33 it/sec) -training >> step=4763900, episode=795 reward=0.7782581 (458.20 it/sec) -training >> step=4764000, episode=795 reward=0.7835119 (433.57 it/sec) -training >> step=4764100, episode=795 reward=0.7723754 (484.82 it/sec) -training >> step=4764200, episode=795 reward=0.7883716 (487.78 it/sec) -training >> step=4764300, episode=795 reward=0.8150796 (477.07 it/sec) -training >> step=4764400, episode=795 reward=0.77634 (479.94 it/sec) -training >> step=4764500, episode=795 reward=0.8008322 (440.83 it/sec) -training >> step=4764600, episode=795 reward=0.7904812 (488.94 it/sec) -training >> step=4764700, episode=795 reward=0.763139 (365.90 it/sec) -training >> step=4764800, episode=795 reward=0.7826787 (443.08 it/sec) -training >> step=4764900, episode=795 reward=0.7838786 (442.36 it/sec) -training >> step=4765000, episode=795 reward=0.7811688 (396.21 it/sec) -training >> step=4765100, episode=795 reward=0.7796326 (432.58 it/sec) -training >> step=4765200, episode=795 reward=0.7917004 (461.32 it/sec) -training >> step=4765300, episode=795 reward=0.7669838 (426.59 it/sec) -training >> step=4765400, episode=795 reward=0.7821362 (412.16 it/sec) -training >> step=4765500, episode=795 reward=0.7739972 (342.88 it/sec) -training >> step=4765600, episode=795 reward=0.7826759 (429.13 it/sec) -training >> step=4765700, episode=795 reward=0.7889701 (427.25 it/sec) -training >> step=4765800, episode=795 reward=0.7769396 (385.56 it/sec) -training >> step=4765900, episode=795 reward=0.7777501 (473.78 it/sec) -training >> step=4766000, episode=795 reward=0.7719283 (481.93 it/sec) -training >> step=4766100, episode=795 reward=0.7999219 (461.09 it/sec) -training >> step=4766200, episode=795 reward=0.7911234 (393.85 it/sec) -training >> step=4766300, episode=795 reward=0.7777168 (404.98 it/sec) -training >> step=4766400, episode=795 reward=0.7746646 (433.68 it/sec) -training >> step=4766500, episode=795 reward=0.7723312 (492.81 it/sec) -training >> step=4766600, episode=795 reward=0.790581 (520.69 it/sec) -training >> step=4766700, episode=795 reward=0.7774193 (417.86 it/sec) -training >> step=4766800, episode=795 reward=0.7886112 (414.09 it/sec) -training >> step=4766900, episode=795 reward=0.7657348 (435.71 it/sec) -training >> step=4767000, episode=795 reward=0.7689902 (481.11 it/sec) -training >> step=4767100, episode=795 reward=0.7879296 (490.17 it/sec) -training >> step=4767200, episode=795 reward=0.7853492 (427.88 it/sec) -training >> step=4767300, episode=795 reward=0.7775105 (381.65 it/sec) -training >> step=4767400, episode=795 reward=0.7630923 (443.36 it/sec) -training >> step=4767500, episode=795 reward=0.7763159 (470.84 it/sec) -training >> step=4767600, episode=795 reward=0.7873412 (473.42 it/sec) -training >> step=4767700, episode=795 reward=0.7885441 (450.36 it/sec) -training >> step=4767800, episode=795 reward=0.7910497 (525.19 it/sec) -training >> step=4767900, episode=795 reward=0.750895 (448.82 it/sec) -training >> step=4768000, episode=795 reward=0.7696978 (492.50 it/sec) -training >> step=4768100, episode=795 reward=0.7826317 (494.84 it/sec) -training >> step=4768200, episode=795 reward=0.777873 (488.47 it/sec) -training >> step=4768300, episode=795 reward=0.7602878 (472.27 it/sec) -training >> step=4768400, episode=795 reward=0.7946531 (428.31 it/sec) -training >> step=4768500, episode=795 reward=0.7755079 (492.07 it/sec) -training >> step=4768600, episode=795 reward=0.7699131 (453.34 it/sec) -training >> step=4768700, episode=795 reward=0.7787768 (472.29 it/sec) -training >> step=4768800, episode=795 reward=0.7473165 (448.94 it/sec) -training >> step=4768900, episode=795 reward=0.7845938 (489.47 it/sec) -training >> step=4769000, episode=795 reward=0.7627651 (485.71 it/sec) -training >> step=4769100, episode=795 reward=0.7848058 (469.47 it/sec) -training >> step=4769200, episode=795 reward=0.7868882 (446.64 it/sec) -training >> step=4769300, episode=796 reward=0.764901 (86.10 it/sec) -training >> step=4769400, episode=796 reward=0.7855967 (438.51 it/sec) -training >> step=4769500, episode=796 reward=0.7590175 (469.70 it/sec) -training >> step=4769600, episode=796 reward=0.7514225 (479.20 it/sec) -training >> step=4769700, episode=796 reward=0.766634 (488.99 it/sec) -training >> step=4769800, episode=796 reward=0.7613583 (459.31 it/sec) -training >> step=4769900, episode=796 reward=0.783495 (486.88 it/sec) -training >> step=4770000, episode=796 reward=0.7830995 (460.82 it/sec) -training >> step=4770100, episode=796 reward=0.7846014 (424.75 it/sec) -training >> step=4770200, episode=796 reward=0.7683427 (494.62 it/sec) -training >> step=4770300, episode=796 reward=0.7721414 (412.64 it/sec) -training >> step=4770400, episode=796 reward=0.7685898 (468.45 it/sec) -training >> step=4770500, episode=796 reward=0.775851 (436.62 it/sec) -training >> step=4770600, episode=796 reward=0.7948028 (505.03 it/sec) -training >> step=4770700, episode=796 reward=0.7881363 (472.94 it/sec) -training >> step=4770800, episode=796 reward=0.7845619 (490.05 it/sec) -training >> step=4770900, episode=796 reward=0.7618725 (508.89 it/sec) -training >> step=4771000, episode=796 reward=0.7932775 (492.69 it/sec) -training >> step=4771100, episode=796 reward=0.7830341 (465.92 it/sec) -training >> step=4771200, episode=796 reward=0.797357 (487.65 it/sec) -training >> step=4771300, episode=796 reward=0.771727 (556.20 it/sec) -training >> step=4771400, episode=796 reward=0.7790963 (480.86 it/sec) -training >> step=4771500, episode=796 reward=0.7760351 (489.43 it/sec) -training >> step=4771600, episode=796 reward=0.7776729 (464.34 it/sec) -training >> step=4771700, episode=796 reward=0.7881249 (450.41 it/sec) -training >> step=4771800, episode=796 reward=0.7782828 (470.51 it/sec) -training >> step=4771900, episode=796 reward=0.7795939 (499.71 it/sec) -training >> step=4772000, episode=796 reward=0.7503521 (529.94 it/sec) -training >> step=4772100, episode=796 reward=0.787078 (468.02 it/sec) -training >> step=4772200, episode=796 reward=0.7859029 (473.77 it/sec) -training >> step=4772300, episode=796 reward=0.7840282 (473.72 it/sec) -training >> step=4772400, episode=796 reward=0.7858858 (516.33 it/sec) -training >> step=4772500, episode=796 reward=0.770034 (462.89 it/sec) -training >> step=4772600, episode=796 reward=0.7787078 (499.24 it/sec) -training >> step=4772700, episode=796 reward=0.7686996 (478.36 it/sec) -training >> step=4772800, episode=796 reward=0.7623009 (473.43 it/sec) -training >> step=4772900, episode=796 reward=0.7738318 (508.86 it/sec) -training >> step=4773000, episode=796 reward=0.7930784 (456.23 it/sec) -training >> step=4773100, episode=796 reward=0.77231 (534.90 it/sec) -training >> step=4773200, episode=796 reward=0.7790037 (497.87 it/sec) -training >> step=4773300, episode=796 reward=0.7727436 (510.64 it/sec) -training >> step=4773400, episode=796 reward=0.7608779 (500.29 it/sec) -training >> step=4773500, episode=796 reward=0.7887445 (502.39 it/sec) -training >> step=4773600, episode=796 reward=0.772916 (508.09 it/sec) -training >> step=4773700, episode=796 reward=0.7889681 (488.18 it/sec) -training >> step=4773800, episode=796 reward=0.7748447 (512.67 it/sec) -training >> step=4773900, episode=796 reward=0.7904213 (534.87 it/sec) -training >> step=4774000, episode=796 reward=0.7952082 (510.75 it/sec) -training >> step=4774100, episode=796 reward=0.7772715 (484.35 it/sec) -training >> step=4774200, episode=796 reward=0.8080535 (514.74 it/sec) -training >> step=4774300, episode=796 reward=0.7968363 (505.05 it/sec) -training >> step=4774400, episode=796 reward=0.7799085 (531.90 it/sec) -training >> step=4774500, episode=796 reward=0.7676558 (490.96 it/sec) -training >> step=4774600, episode=796 reward=0.7802445 (480.78 it/sec) -training >> step=4774700, episode=796 reward=0.7628495 (496.87 it/sec) -training >> step=4774800, episode=796 reward=0.8058641 (497.52 it/sec) -training >> step=4774900, episode=796 reward=0.789739 (503.80 it/sec) -training >> step=4775000, episode=796 reward=0.7493274 (506.39 it/sec) -training >> step=4775100, episode=796 reward=0.7813373 (506.33 it/sec) -training >> step=4775200, episode=796 reward=0.7921358 (506.48 it/sec) -training >> step=4775300, episode=797 reward=0.8028268 (113.12 it/sec) -training >> step=4775400, episode=797 reward=0.7887158 (512.56 it/sec) -training >> step=4775500, episode=797 reward=0.7795054 (466.90 it/sec) -training >> step=4775600, episode=797 reward=0.769841 (522.67 it/sec) -training >> step=4775700, episode=797 reward=0.788601 (491.43 it/sec) -training >> step=4775800, episode=797 reward=0.7811694 (440.73 it/sec) -training >> step=4775900, episode=797 reward=0.7820958 (489.58 it/sec) -training >> step=4776000, episode=797 reward=0.7735754 (473.24 it/sec) -training >> step=4776100, episode=797 reward=0.7694095 (506.89 it/sec) -training >> step=4776200, episode=797 reward=0.7727751 (494.30 it/sec) -training >> step=4776300, episode=797 reward=0.787591 (484.44 it/sec) -training >> step=4776400, episode=797 reward=0.7623509 (514.05 it/sec) -training >> step=4776500, episode=797 reward=0.8000408 (469.54 it/sec) -training >> step=4776600, episode=797 reward=0.761521 (483.96 it/sec) -training >> step=4776700, episode=797 reward=0.7668529 (448.59 it/sec) -training >> step=4776800, episode=797 reward=0.7676898 (442.52 it/sec) -training >> step=4776900, episode=797 reward=0.7796973 (416.83 it/sec) -training >> step=4777000, episode=797 reward=0.7654208 (454.59 it/sec) -training >> step=4777100, episode=797 reward=0.795221 (481.39 it/sec) -training >> step=4777200, episode=797 reward=0.7892413 (481.45 it/sec) -training >> step=4777300, episode=797 reward=0.7836296 (400.73 it/sec) -training >> step=4777400, episode=797 reward=0.7618012 (456.02 it/sec) -training >> step=4777500, episode=797 reward=0.7640003 (466.96 it/sec) -training >> step=4777600, episode=797 reward=0.7681431 (461.01 it/sec) -training >> step=4777700, episode=797 reward=0.7714895 (483.45 it/sec) -training >> step=4777800, episode=797 reward=0.784067 (461.31 it/sec) -training >> step=4777900, episode=797 reward=0.7862765 (389.66 it/sec) -training >> step=4778000, episode=797 reward=0.76482 (405.60 it/sec) -training >> step=4778100, episode=797 reward=0.7637967 (448.44 it/sec) -training >> step=4778200, episode=797 reward=0.7797019 (478.66 it/sec) -training >> step=4778300, episode=797 reward=0.7897698 (407.81 it/sec) -training >> step=4778400, episode=797 reward=0.779679 (457.22 it/sec) -training >> step=4778500, episode=797 reward=0.7878087 (471.19 it/sec) -training >> step=4778600, episode=797 reward=0.7841925 (445.43 it/sec) -training >> step=4778700, episode=797 reward=0.780853 (468.23 it/sec) -training >> step=4778800, episode=797 reward=0.7736614 (467.08 it/sec) -training >> step=4778900, episode=797 reward=0.7690058 (485.88 it/sec) -training >> step=4779000, episode=797 reward=0.7786359 (489.58 it/sec) -training >> step=4779100, episode=797 reward=0.776673 (469.95 it/sec) -training >> step=4779200, episode=797 reward=0.7699348 (497.65 it/sec) -training >> step=4779300, episode=797 reward=0.7823577 (490.65 it/sec) -training >> step=4779400, episode=797 reward=0.7822913 (496.52 it/sec) -training >> step=4779500, episode=797 reward=0.7637148 (467.07 it/sec) -training >> step=4779600, episode=797 reward=0.7629203 (478.69 it/sec) -training >> step=4779700, episode=797 reward=0.7838891 (466.94 it/sec) -training >> step=4779800, episode=797 reward=0.7858246 (488.59 it/sec) -training >> step=4779900, episode=797 reward=0.7744673 (512.84 it/sec) -training >> step=4780000, episode=797 reward=0.7874587 (507.95 it/sec) -training >> step=4780100, episode=797 reward=0.7725292 (447.79 it/sec) -training >> step=4780200, episode=797 reward=0.7688005 (483.31 it/sec) -training >> step=4780300, episode=797 reward=0.798258 (490.46 it/sec) -training >> step=4780400, episode=797 reward=0.7769378 (481.38 it/sec) -training >> step=4780500, episode=797 reward=0.7850819 (466.60 it/sec) -training >> step=4780600, episode=797 reward=0.7789261 (485.00 it/sec) -training >> step=4780700, episode=797 reward=0.7693156 (480.11 it/sec) -training >> step=4780800, episode=797 reward=0.7641413 (480.52 it/sec) -training >> step=4780900, episode=797 reward=0.7629795 (492.33 it/sec) -training >> step=4781000, episode=797 reward=0.7751849 (434.59 it/sec) -training >> step=4781100, episode=797 reward=0.7965762 (499.22 it/sec) -training >> step=4781200, episode=797 reward=0.7730015 (462.42 it/sec) -training >> step=4781300, episode=798 reward=0.7748398 (99.22 it/sec) -training >> step=4781400, episode=798 reward=0.7619301 (477.42 it/sec) -training >> step=4781500, episode=798 reward=0.7616066 (474.94 it/sec) -training >> step=4781600, episode=798 reward=0.784789 (461.88 it/sec) -training >> step=4781700, episode=798 reward=0.7698594 (472.06 it/sec) -training >> step=4781800, episode=798 reward=0.781652 (421.93 it/sec) -training >> step=4781900, episode=798 reward=0.7804621 (459.17 it/sec) -training >> step=4782000, episode=798 reward=0.7894214 (515.12 it/sec) -training >> step=4782100, episode=798 reward=0.7930402 (463.15 it/sec) -training >> step=4782200, episode=798 reward=0.7946531 (469.87 it/sec) -training >> step=4782300, episode=798 reward=0.7461469 (516.76 it/sec) -training >> step=4782400, episode=798 reward=0.7629038 (472.42 it/sec) -training >> step=4782500, episode=798 reward=0.779239 (479.20 it/sec) -training >> step=4782600, episode=798 reward=0.795074 (489.47 it/sec) -training >> step=4782700, episode=798 reward=0.7624815 (517.28 it/sec) -training >> step=4782800, episode=798 reward=0.7698131 (458.02 it/sec) -training >> step=4782900, episode=798 reward=0.7955596 (483.96 it/sec) -training >> step=4783000, episode=798 reward=0.7688884 (493.36 it/sec) -training >> step=4783100, episode=798 reward=0.7886668 (475.33 it/sec) -training >> step=4783200, episode=798 reward=0.77661 (470.54 it/sec) -training >> step=4783300, episode=798 reward=0.7787159 (468.61 it/sec) -training >> step=4783400, episode=798 reward=0.8007659 (462.90 it/sec) -training >> step=4783500, episode=798 reward=0.7557009 (441.79 it/sec) -training >> step=4783600, episode=798 reward=0.8077831 (460.56 it/sec) -training >> step=4783700, episode=798 reward=0.7976866 (470.62 it/sec) -training >> step=4783800, episode=798 reward=0.7875636 (488.32 it/sec) -training >> step=4783900, episode=798 reward=0.7730091 (544.92 it/sec) -training >> step=4784000, episode=798 reward=0.7747488 (514.82 it/sec) -training >> step=4784100, episode=798 reward=0.7825811 (480.85 it/sec) -training >> step=4784200, episode=798 reward=0.7975566 (476.63 it/sec) -training >> step=4784300, episode=798 reward=0.7806037 (410.62 it/sec) -training >> step=4784400, episode=798 reward=0.782954 (452.56 it/sec) -training >> step=4784500, episode=798 reward=0.7906941 (496.18 it/sec) -training >> step=4784600, episode=798 reward=0.7921475 (483.18 it/sec) -training >> step=4784700, episode=798 reward=0.7741593 (472.18 it/sec) -training >> step=4784800, episode=798 reward=0.7837562 (514.60 it/sec) -training >> step=4784900, episode=798 reward=0.7488074 (516.24 it/sec) -training >> step=4785000, episode=798 reward=0.791426 (538.92 it/sec) -training >> step=4785100, episode=798 reward=0.7823464 (515.76 it/sec) -training >> step=4785200, episode=798 reward=0.7552531 (504.32 it/sec) -training >> step=4785300, episode=798 reward=0.7677218 (527.77 it/sec) -training >> step=4785400, episode=798 reward=0.7656209 (495.93 it/sec) -training >> step=4785500, episode=798 reward=0.7722293 (481.08 it/sec) -training >> step=4785600, episode=798 reward=0.7882014 (546.64 it/sec) -training >> step=4785700, episode=798 reward=0.7533308 (542.14 it/sec) -training >> step=4785800, episode=798 reward=0.7809985 (532.83 it/sec) -training >> step=4785900, episode=798 reward=0.7636308 (527.80 it/sec) -training >> step=4786000, episode=798 reward=0.7767689 (522.09 it/sec) -training >> step=4786100, episode=798 reward=0.7746119 (507.04 it/sec) -training >> step=4786200, episode=798 reward=0.77832 (512.73 it/sec) -training >> step=4786300, episode=798 reward=0.7846051 (544.64 it/sec) -training >> step=4786400, episode=798 reward=0.7803075 (507.51 it/sec) -training >> step=4786500, episode=798 reward=0.7870919 (541.00 it/sec) -training >> step=4786600, episode=798 reward=0.7667599 (503.59 it/sec) -training >> step=4786700, episode=798 reward=0.771696 (497.98 it/sec) -training >> step=4786800, episode=798 reward=0.7758205 (536.88 it/sec) -training >> step=4786900, episode=798 reward=0.7776906 (545.70 it/sec) -training >> step=4787000, episode=798 reward=0.7696528 (559.05 it/sec) -training >> step=4787100, episode=798 reward=0.78482 (562.49 it/sec) -training >> step=4787200, episode=798 reward=0.7857342 (510.27 it/sec) -training >> step=4787300, episode=799 reward=0.7634048 (87.03 it/sec) -training >> step=4787400, episode=799 reward=0.771637 (444.45 it/sec) -training >> step=4787500, episode=799 reward=0.7645562 (409.60 it/sec) -training >> step=4787600, episode=799 reward=0.7762073 (430.88 it/sec) -training >> step=4787700, episode=799 reward=0.7785262 (445.17 it/sec) -training >> step=4787800, episode=799 reward=0.7685383 (496.60 it/sec) -training >> step=4787900, episode=799 reward=0.7621234 (497.74 it/sec) -training >> step=4788000, episode=799 reward=0.7690432 (526.81 it/sec) -training >> step=4788100, episode=799 reward=0.7868993 (583.45 it/sec) -training >> step=4788200, episode=799 reward=0.7594154 (510.35 it/sec) -training >> step=4788300, episode=799 reward=0.7739045 (457.15 it/sec) -training >> step=4788400, episode=799 reward=0.7749269 (455.31 it/sec) -training >> step=4788500, episode=799 reward=0.7901827 (442.28 it/sec) -training >> step=4788600, episode=799 reward=0.7694023 (413.39 it/sec) -training >> step=4788700, episode=799 reward=0.7718369 (475.91 it/sec) -training >> step=4788800, episode=799 reward=0.8053993 (473.50 it/sec) -training >> step=4788900, episode=799 reward=0.7788804 (411.43 it/sec) -training >> step=4789000, episode=799 reward=0.7450805 (433.87 it/sec) -training >> step=4789100, episode=799 reward=0.786889 (445.69 it/sec) -training >> step=4789200, episode=799 reward=0.7762551 (439.35 it/sec) -training >> step=4789300, episode=799 reward=0.7828786 (475.56 it/sec) -training >> step=4789400, episode=799 reward=0.7649624 (491.86 it/sec) -training >> step=4789500, episode=799 reward=0.7893404 (532.17 it/sec) -training >> step=4789600, episode=799 reward=0.7732329 (474.01 it/sec) -training >> step=4789700, episode=799 reward=0.7673273 (480.42 it/sec) -training >> step=4789800, episode=799 reward=0.7782125 (510.86 it/sec) -training >> step=4789900, episode=799 reward=0.7769874 (530.70 it/sec) -training >> step=4790000, episode=799 reward=0.7564695 (535.65 it/sec) -training >> step=4790100, episode=799 reward=0.7958516 (515.17 it/sec) -training >> step=4790200, episode=799 reward=0.7863674 (518.86 it/sec) -training >> step=4790300, episode=799 reward=0.7773338 (459.42 it/sec) -training >> step=4790400, episode=799 reward=0.7715429 (481.59 it/sec) -training >> step=4790500, episode=799 reward=0.7572936 (462.41 it/sec) -training >> step=4790600, episode=799 reward=0.7828751 (471.56 it/sec) -training >> step=4790700, episode=799 reward=0.7937416 (452.78 it/sec) -training >> step=4790800, episode=799 reward=0.7925049 (473.50 it/sec) -training >> step=4790900, episode=799 reward=0.8001942 (455.92 it/sec) -training >> step=4791000, episode=799 reward=0.787403 (429.56 it/sec) -training >> step=4791100, episode=799 reward=0.7737218 (394.19 it/sec) -training >> step=4791200, episode=799 reward=0.7854971 (416.12 it/sec) -training >> step=4791300, episode=799 reward=0.7704571 (484.94 it/sec) -training >> step=4791400, episode=799 reward=0.7717979 (460.05 it/sec) -training >> step=4791500, episode=799 reward=0.7748147 (461.99 it/sec) -training >> step=4791600, episode=799 reward=0.7828985 (476.56 it/sec) -training >> step=4791700, episode=799 reward=0.7876757 (493.78 it/sec) -training >> step=4791800, episode=799 reward=0.7520884 (465.71 it/sec) -training >> step=4791900, episode=799 reward=0.7757112 (506.96 it/sec) -training >> step=4792000, episode=799 reward=0.7732468 (473.89 it/sec) -training >> step=4792100, episode=799 reward=0.7835545 (468.48 it/sec) -training >> step=4792200, episode=799 reward=0.7888395 (477.73 it/sec) -training >> step=4792300, episode=799 reward=0.7848712 (470.99 it/sec) -training >> step=4792400, episode=799 reward=0.7761164 (510.08 it/sec) -training >> step=4792500, episode=799 reward=0.7825601 (511.43 it/sec) -training >> step=4792600, episode=799 reward=0.7562076 (449.93 it/sec) -training >> step=4792700, episode=799 reward=0.7472969 (510.25 it/sec) -training >> step=4792800, episode=799 reward=0.7745186 (511.07 it/sec) -training >> step=4792900, episode=799 reward=0.7840931 (454.22 it/sec) -training >> step=4793000, episode=799 reward=0.7647398 (444.82 it/sec) -training >> step=4793100, episode=799 reward=0.7764143 (509.55 it/sec) -training >> step=4793200, episode=799 reward=0.7755082 (491.41 it/sec) -training >> step=4793300, episode=800 reward=0.7781248 (117.48 it/sec) -training >> step=4793400, episode=800 reward=0.7575313 (492.45 it/sec) -training >> step=4793500, episode=800 reward=0.7722476 (510.52 it/sec) -training >> step=4793600, episode=800 reward=0.7475126 (508.04 it/sec) -training >> step=4793700, episode=800 reward=0.781628 (506.83 it/sec) -training >> step=4793800, episode=800 reward=0.7619307 (456.02 it/sec) -training >> step=4793900, episode=800 reward=0.7786363 (486.01 it/sec) -training >> step=4794000, episode=800 reward=0.7749818 (488.03 it/sec) -training >> step=4794100, episode=800 reward=0.7715038 (469.80 it/sec) -training >> step=4794200, episode=800 reward=0.7997535 (519.40 it/sec) -training >> step=4794300, episode=800 reward=0.7651538 (489.92 it/sec) -training >> step=4794400, episode=800 reward=0.7728038 (489.77 it/sec) -training >> step=4794500, episode=800 reward=0.779389 (519.28 it/sec) -training >> step=4794600, episode=800 reward=0.7784576 (465.03 it/sec) -training >> step=4794700, episode=800 reward=0.7718276 (497.90 it/sec) -training >> step=4794800, episode=800 reward=0.7688534 (453.19 it/sec) -training >> step=4794900, episode=800 reward=0.7936648 (529.21 it/sec) -training >> step=4795000, episode=800 reward=0.7955361 (501.30 it/sec) -training >> step=4795100, episode=800 reward=0.7798984 (487.93 it/sec) -training >> step=4795200, episode=800 reward=0.774035 (512.12 it/sec) -training >> step=4795300, episode=800 reward=0.7781177 (472.38 it/sec) -training >> step=4795400, episode=800 reward=0.7529519 (474.47 it/sec) -training >> step=4795500, episode=800 reward=0.773648 (448.03 it/sec) -training >> step=4795600, episode=800 reward=0.7847702 (516.11 it/sec) -training >> step=4795700, episode=800 reward=0.7780233 (470.63 it/sec) -training >> step=4795800, episode=800 reward=0.7738624 (457.27 it/sec) -training >> step=4795900, episode=800 reward=0.8071285 (450.22 it/sec) -training >> step=4796000, episode=800 reward=0.7630001 (491.94 it/sec) -training >> step=4796100, episode=800 reward=0.7631954 (502.54 it/sec) -training >> step=4796200, episode=800 reward=0.7868318 (476.88 it/sec) -training >> step=4796300, episode=800 reward=0.7861145 (481.23 it/sec) -training >> step=4796400, episode=800 reward=0.773226 (460.91 it/sec) -training >> step=4796500, episode=800 reward=0.7632671 (479.43 it/sec) -training >> step=4796600, episode=800 reward=0.7721013 (501.59 it/sec) -training >> step=4796700, episode=800 reward=0.7943916 (527.69 it/sec) -training >> step=4796800, episode=800 reward=0.7668567 (478.50 it/sec) -training >> step=4796900, episode=800 reward=0.7905184 (461.64 it/sec) -training >> step=4797000, episode=800 reward=0.7670773 (443.33 it/sec) -training >> step=4797100, episode=800 reward=0.7877352 (486.01 it/sec) -training >> step=4797200, episode=800 reward=0.7654923 (447.79 it/sec) -training >> step=4797300, episode=800 reward=0.7745644 (439.71 it/sec) -training >> step=4797400, episode=800 reward=0.7779703 (495.91 it/sec) -training >> step=4797500, episode=800 reward=0.7846388 (460.97 it/sec) -training >> step=4797600, episode=800 reward=0.7805362 (462.55 it/sec) -training >> step=4797700, episode=800 reward=0.7613359 (489.41 it/sec) -training >> step=4797800, episode=800 reward=0.76452 (517.60 it/sec) -training >> step=4797900, episode=800 reward=0.7720412 (478.12 it/sec) -training >> step=4798000, episode=800 reward=0.7740234 (460.99 it/sec) -training >> step=4798100, episode=800 reward=0.7733135 (417.05 it/sec) -training >> step=4798200, episode=800 reward=0.7738688 (391.79 it/sec) -training >> step=4798300, episode=800 reward=0.7799228 (425.37 it/sec) -training >> step=4798400, episode=800 reward=0.786987 (379.25 it/sec) -training >> step=4798500, episode=800 reward=0.7686521 (464.60 it/sec) -training >> step=4798600, episode=800 reward=0.7807251 (473.59 it/sec) -training >> step=4798700, episode=800 reward=0.7881529 (467.19 it/sec) -training >> step=4798800, episode=800 reward=0.7895147 (464.28 it/sec) -training >> step=4798900, episode=800 reward=0.7732315 (427.18 it/sec) -training >> step=4799000, episode=800 reward=0.7856256 (460.33 it/sec) -training >> step=4799100, episode=800 reward=0.7518008 (376.09 it/sec) -training >> step=4799200, episode=800 reward=0.7683121 (454.81 it/sec) -training >> step=4799300, episode=801 reward=0.7836123 (83.98 it/sec) -training >> step=4799400, episode=801 reward=0.7584763 (453.13 it/sec) -training >> step=4799500, episode=801 reward=0.7747405 (421.92 it/sec) -training >> step=4799600, episode=801 reward=0.7681153 (492.26 it/sec) -training >> step=4799700, episode=801 reward=0.7507038 (416.11 it/sec) -training >> step=4799800, episode=801 reward=0.7632547 (398.45 it/sec) -training >> step=4799900, episode=801 reward=0.7747495 (409.66 it/sec) -training >> step=4800000, episode=801 reward=0.7645233 (407.47 it/sec) -training >> step=4800100, episode=801 reward=0.7864812 (437.92 it/sec) -training >> step=4800200, episode=801 reward=0.777622 (408.70 it/sec) -training >> step=4800300, episode=801 reward=0.7692506 (462.53 it/sec) -training >> step=4800400, episode=801 reward=0.7787982 (505.89 it/sec) -training >> step=4800500, episode=801 reward=0.7631658 (446.99 it/sec) -training >> step=4800600, episode=801 reward=0.7891104 (487.76 it/sec) -training >> step=4800700, episode=801 reward=0.8009089 (511.43 it/sec) -training >> step=4800800, episode=801 reward=0.80382 (449.57 it/sec) -training >> step=4800900, episode=801 reward=0.7804818 (480.88 it/sec) -training >> step=4801000, episode=801 reward=0.7864147 (508.58 it/sec) -training >> step=4801100, episode=801 reward=0.77607 (485.78 it/sec) -training >> step=4801200, episode=801 reward=0.7847263 (468.95 it/sec) -training >> step=4801300, episode=801 reward=0.7801013 (508.19 it/sec) -training >> step=4801400, episode=801 reward=0.7878356 (437.35 it/sec) -training >> step=4801500, episode=801 reward=0.7775814 (434.91 it/sec) -training >> step=4801600, episode=801 reward=0.7863783 (442.58 it/sec) -training >> step=4801700, episode=801 reward=0.7658107 (507.11 it/sec) -training >> step=4801800, episode=801 reward=0.7939097 (481.13 it/sec) -training >> step=4801900, episode=801 reward=0.7752807 (472.76 it/sec) -training >> step=4802000, episode=801 reward=0.7716781 (448.51 it/sec) -training >> step=4802100, episode=801 reward=0.7753299 (474.87 it/sec) -training >> step=4802200, episode=801 reward=0.7731231 (499.66 it/sec) -training >> step=4802300, episode=801 reward=0.7860587 (491.42 it/sec) -training >> step=4802400, episode=801 reward=0.7915319 (453.28 it/sec) -training >> step=4802500, episode=801 reward=0.7703336 (469.19 it/sec) -training >> step=4802600, episode=801 reward=0.7631928 (493.30 it/sec) -training >> step=4802700, episode=801 reward=0.794017 (507.91 it/sec) -training >> step=4802800, episode=801 reward=0.7713635 (530.19 it/sec) -training >> step=4802900, episode=801 reward=0.7765258 (449.02 it/sec) -training >> step=4803000, episode=801 reward=0.7727478 (437.17 it/sec) -training >> step=4803100, episode=801 reward=0.7919539 (515.32 it/sec) -training >> step=4803200, episode=801 reward=0.77585 (503.27 it/sec) -training >> step=4803300, episode=801 reward=0.7650145 (505.73 it/sec) -training >> step=4803400, episode=801 reward=0.786981 (489.90 it/sec) -training >> step=4803500, episode=801 reward=0.7948888 (473.66 it/sec) -training >> step=4803600, episode=801 reward=0.7874438 (465.55 it/sec) -training >> step=4803700, episode=801 reward=0.7574121 (489.01 it/sec) -training >> step=4803800, episode=801 reward=0.7642902 (492.10 it/sec) -training >> step=4803900, episode=801 reward=0.7956027 (506.23 it/sec) -training >> step=4804000, episode=801 reward=0.7492386 (467.92 it/sec) -training >> step=4804100, episode=801 reward=0.7496379 (473.28 it/sec) -training >> step=4804200, episode=801 reward=0.7800217 (506.02 it/sec) -training >> step=4804300, episode=801 reward=0.7746609 (496.80 it/sec) -training >> step=4804400, episode=801 reward=0.7951916 (433.12 it/sec) -training >> step=4804500, episode=801 reward=0.7567057 (483.68 it/sec) -training >> step=4804600, episode=801 reward=0.771228 (517.14 it/sec) -training >> step=4804700, episode=801 reward=0.7832014 (487.57 it/sec) -training >> step=4804800, episode=801 reward=0.7893573 (510.80 it/sec) -training >> step=4804900, episode=801 reward=0.7740902 (457.75 it/sec) -training >> step=4805000, episode=801 reward=0.7767697 (499.02 it/sec) -training >> step=4805100, episode=801 reward=0.7842665 (505.41 it/sec) -training >> step=4805200, episode=801 reward=0.7724095 (524.41 it/sec) -training >> step=4805300, episode=802 reward=0.7650028 (110.18 it/sec) -training >> step=4805400, episode=802 reward=0.761334 (446.11 it/sec) -training >> step=4805500, episode=802 reward=0.7485011 (454.39 it/sec) -training >> step=4805600, episode=802 reward=0.7717896 (495.72 it/sec) -training >> step=4805700, episode=802 reward=0.7687176 (521.35 it/sec) -training >> step=4805800, episode=802 reward=0.7873321 (494.49 it/sec) -training >> step=4805900, episode=802 reward=0.7883568 (471.70 it/sec) -training >> step=4806000, episode=802 reward=0.7818681 (530.76 it/sec) -training >> step=4806100, episode=802 reward=0.7781276 (487.36 it/sec) -training >> step=4806200, episode=802 reward=0.7688301 (474.67 it/sec) -training >> step=4806300, episode=802 reward=0.7677293 (483.87 it/sec) -training >> step=4806400, episode=802 reward=0.8014132 (514.45 it/sec) -training >> step=4806500, episode=802 reward=0.7727802 (487.72 it/sec) -training >> step=4806600, episode=802 reward=0.7770713 (473.10 it/sec) -training >> step=4806700, episode=802 reward=0.7854053 (493.80 it/sec) -training >> step=4806800, episode=802 reward=0.7669471 (479.47 it/sec) -training >> step=4806900, episode=802 reward=0.7810175 (489.55 it/sec) -training >> step=4807000, episode=802 reward=0.783798 (501.27 it/sec) -training >> step=4807100, episode=802 reward=0.7880859 (443.93 it/sec) -training >> step=4807200, episode=802 reward=0.7966141 (435.37 it/sec) -training >> step=4807300, episode=802 reward=0.7909475 (460.82 it/sec) -training >> step=4807400, episode=802 reward=0.7903296 (449.17 it/sec) -training >> step=4807500, episode=802 reward=0.7763134 (467.95 it/sec) -training >> step=4807600, episode=802 reward=0.7677475 (467.49 it/sec) -training >> step=4807700, episode=802 reward=0.777859 (469.21 it/sec) -training >> step=4807800, episode=802 reward=0.7906185 (479.52 it/sec) -training >> step=4807900, episode=802 reward=0.7729036 (465.68 it/sec) -training >> step=4808000, episode=802 reward=0.7805883 (379.11 it/sec) -training >> step=4808100, episode=802 reward=0.7785642 (442.24 it/sec) -training >> step=4808200, episode=802 reward=0.7770844 (476.74 it/sec) -training >> step=4808300, episode=802 reward=0.7871389 (430.40 it/sec) -training >> step=4808400, episode=802 reward=0.7739779 (453.93 it/sec) -training >> step=4808500, episode=802 reward=0.7903817 (493.95 it/sec) -training >> step=4808600, episode=802 reward=0.7888469 (480.46 it/sec) -training >> step=4808700, episode=802 reward=0.7905164 (466.71 it/sec) -training >> step=4808800, episode=802 reward=0.7749889 (471.17 it/sec) -training >> step=4808900, episode=802 reward=0.7873329 (423.96 it/sec) -training >> step=4809000, episode=802 reward=0.8029952 (479.05 it/sec) -training >> step=4809100, episode=802 reward=0.7793735 (443.62 it/sec) -training >> step=4809200, episode=802 reward=0.7718555 (471.62 it/sec) -training >> step=4809300, episode=802 reward=0.7745864 (428.04 it/sec) -training >> step=4809400, episode=802 reward=0.7789508 (391.26 it/sec) -training >> step=4809500, episode=802 reward=0.7711234 (436.48 it/sec) -training >> step=4809600, episode=802 reward=0.7742918 (418.48 it/sec) -training >> step=4809700, episode=802 reward=0.7658771 (461.71 it/sec) -training >> step=4809800, episode=802 reward=0.7941514 (503.84 it/sec) -training >> step=4809900, episode=802 reward=0.7656891 (473.52 it/sec) -training >> step=4810000, episode=802 reward=0.7753392 (469.69 it/sec) -training >> step=4810100, episode=802 reward=0.7786135 (470.26 it/sec) -training >> step=4810200, episode=802 reward=0.7763186 (474.41 it/sec) -training >> step=4810300, episode=802 reward=0.7631596 (465.29 it/sec) -training >> step=4810400, episode=802 reward=0.7730063 (462.26 it/sec) -training >> step=4810500, episode=802 reward=0.7643088 (437.26 it/sec) -training >> step=4810600, episode=802 reward=0.7886461 (468.20 it/sec) -training >> step=4810700, episode=802 reward=0.7611582 (540.88 it/sec) -training >> step=4810800, episode=802 reward=0.7822396 (511.69 it/sec) -training >> step=4810900, episode=802 reward=0.7726958 (453.13 it/sec) -training >> step=4811000, episode=802 reward=0.7806475 (494.20 it/sec) -training >> step=4811100, episode=802 reward=0.7696294 (517.58 it/sec) -training >> step=4811200, episode=802 reward=0.7770808 (515.09 it/sec) -training >> step=4811300, episode=803 reward=0.777667 (84.25 it/sec) -training >> step=4811400, episode=803 reward=0.7863619 (414.39 it/sec) -training >> step=4811500, episode=803 reward=0.7665632 (343.52 it/sec) -training >> step=4811600, episode=803 reward=0.761853 (434.58 it/sec) -training >> step=4811700, episode=803 reward=0.7700869 (486.00 it/sec) -training >> step=4811800, episode=803 reward=0.7832785 (454.03 it/sec) -training >> step=4811900, episode=803 reward=0.7893952 (494.41 it/sec) -training >> step=4812000, episode=803 reward=0.8133786 (524.96 it/sec) -training >> step=4812100, episode=803 reward=0.8042498 (517.96 it/sec) -training >> step=4812200, episode=803 reward=0.79773 (514.13 it/sec) -training >> step=4812300, episode=803 reward=0.7682203 (490.11 it/sec) -training >> step=4812400, episode=803 reward=0.7788324 (539.35 it/sec) -training >> step=4812500, episode=803 reward=0.7568743 (484.04 it/sec) -training >> step=4812600, episode=803 reward=0.7703596 (471.21 it/sec) -training >> step=4812700, episode=803 reward=0.7884977 (499.39 it/sec) -training >> step=4812800, episode=803 reward=0.7807109 (438.39 it/sec) -training >> step=4812900, episode=803 reward=0.7716771 (470.45 it/sec) -training >> step=4813000, episode=803 reward=0.7920358 (441.29 it/sec) -training >> step=4813100, episode=803 reward=0.7586582 (397.62 it/sec) -training >> step=4813200, episode=803 reward=0.7821563 (414.15 it/sec) -training >> step=4813300, episode=803 reward=0.7794604 (378.84 it/sec) -training >> step=4813400, episode=803 reward=0.7974864 (445.70 it/sec) -training >> step=4813500, episode=803 reward=0.7651528 (413.86 it/sec) -training >> step=4813600, episode=803 reward=0.7627566 (463.08 it/sec) -training >> step=4813700, episode=803 reward=0.7807264 (455.67 it/sec) -training >> step=4813800, episode=803 reward=0.7781776 (483.55 it/sec) -training >> step=4813900, episode=803 reward=0.8039404 (476.45 it/sec) -training >> step=4814000, episode=803 reward=0.7805977 (500.49 it/sec) -training >> step=4814100, episode=803 reward=0.7727506 (515.16 it/sec) -training >> step=4814200, episode=803 reward=0.7650221 (515.31 it/sec) -training >> step=4814300, episode=803 reward=0.7881713 (467.05 it/sec) -training >> step=4814400, episode=803 reward=0.8053904 (453.06 it/sec) -training >> step=4814500, episode=803 reward=0.7776402 (466.13 it/sec) -training >> step=4814600, episode=803 reward=0.766229 (517.70 it/sec) -training >> step=4814700, episode=803 reward=0.7844445 (521.30 it/sec) -training >> step=4814800, episode=803 reward=0.7855477 (520.95 it/sec) -training >> step=4814900, episode=803 reward=0.7906902 (494.85 it/sec) -training >> step=4815000, episode=803 reward=0.7678449 (488.02 it/sec) -training >> step=4815100, episode=803 reward=0.772848 (500.45 it/sec) -training >> step=4815200, episode=803 reward=0.7854828 (506.50 it/sec) -training >> step=4815300, episode=803 reward=0.7857345 (525.77 it/sec) -training >> step=4815400, episode=803 reward=0.7881185 (537.09 it/sec) -training >> step=4815500, episode=803 reward=0.7626415 (541.18 it/sec) -training >> step=4815600, episode=803 reward=0.7657186 (490.36 it/sec) -training >> step=4815700, episode=803 reward=0.7870164 (440.07 it/sec) -training >> step=4815800, episode=803 reward=0.7788323 (476.08 it/sec) -training >> step=4815900, episode=803 reward=0.77112 (477.74 it/sec) -training >> step=4816000, episode=803 reward=0.7892295 (467.41 it/sec) -training >> step=4816100, episode=803 reward=0.7945932 (491.81 it/sec) -training >> step=4816200, episode=803 reward=0.791531 (450.43 it/sec) -training >> step=4816300, episode=803 reward=0.7886125 (458.07 it/sec) -training >> step=4816400, episode=803 reward=0.7879444 (420.24 it/sec) -training >> step=4816500, episode=803 reward=0.7678571 (437.17 it/sec) -training >> step=4816600, episode=803 reward=0.7967446 (446.06 it/sec) -training >> step=4816700, episode=803 reward=0.7783428 (453.96 it/sec) -training >> step=4816800, episode=803 reward=0.780616 (447.94 it/sec) -training >> step=4816900, episode=803 reward=0.7715775 (417.40 it/sec) -training >> step=4817000, episode=803 reward=0.7849757 (441.05 it/sec) -training >> step=4817100, episode=803 reward=0.7711104 (440.98 it/sec) -training >> step=4817200, episode=803 reward=0.7634091 (447.16 it/sec) -training >> step=4817300, episode=804 reward=0.7749858 (91.57 it/sec) -training >> step=4817400, episode=804 reward=0.7781199 (419.59 it/sec) -training >> step=4817500, episode=804 reward=0.7693804 (446.21 it/sec) -training >> step=4817600, episode=804 reward=0.7746008 (457.71 it/sec) -training >> step=4817700, episode=804 reward=0.7772821 (456.17 it/sec) -training >> step=4817800, episode=804 reward=0.7660758 (535.25 it/sec) -training >> step=4817900, episode=804 reward=0.7551121 (470.62 it/sec) -training >> step=4818000, episode=804 reward=0.8055121 (507.89 it/sec) -training >> step=4818100, episode=804 reward=0.7622723 (495.21 it/sec) -training >> step=4818200, episode=804 reward=0.768769 (445.12 it/sec) -training >> step=4818300, episode=804 reward=0.7810416 (484.47 it/sec) -training >> step=4818400, episode=804 reward=0.7827704 (476.68 it/sec) -training >> step=4818500, episode=804 reward=0.7698048 (516.24 it/sec) -training >> step=4818600, episode=804 reward=0.7794831 (517.39 it/sec) -training >> step=4818700, episode=804 reward=0.763378 (450.76 it/sec) -training >> step=4818800, episode=804 reward=0.7732899 (524.09 it/sec) -training >> step=4818900, episode=804 reward=0.7922745 (438.10 it/sec) -training >> step=4819000, episode=804 reward=0.7806775 (483.41 it/sec) -training >> step=4819100, episode=804 reward=0.7718011 (498.17 it/sec) -training >> step=4819200, episode=804 reward=0.7821263 (513.28 it/sec) -training >> step=4819300, episode=804 reward=0.7750376 (496.88 it/sec) -training >> step=4819400, episode=804 reward=0.8035019 (480.40 it/sec) -training >> step=4819500, episode=804 reward=0.7758037 (519.19 it/sec) -training >> step=4819600, episode=804 reward=0.7772205 (500.26 it/sec) -training >> step=4819700, episode=804 reward=0.8005614 (498.32 it/sec) -training >> step=4819800, episode=804 reward=0.7543646 (503.21 it/sec) -training >> step=4819900, episode=804 reward=0.7711009 (505.59 it/sec) -training >> step=4820000, episode=804 reward=0.7901073 (493.92 it/sec) -training >> step=4820100, episode=804 reward=0.7898945 (520.02 it/sec) -training >> step=4820200, episode=804 reward=0.7842861 (529.47 it/sec) -training >> step=4820300, episode=804 reward=0.7885653 (508.12 it/sec) -training >> step=4820400, episode=804 reward=0.7794738 (491.80 it/sec) -training >> step=4820500, episode=804 reward=0.7825054 (523.73 it/sec) -training >> step=4820600, episode=804 reward=0.762317 (469.60 it/sec) -training >> step=4820700, episode=804 reward=0.7893036 (465.07 it/sec) -training >> step=4820800, episode=804 reward=0.7627092 (488.09 it/sec) -training >> step=4820900, episode=804 reward=0.7815216 (506.22 it/sec) -training >> step=4821000, episode=804 reward=0.7837945 (495.19 it/sec) -training >> step=4821100, episode=804 reward=0.7841368 (499.15 it/sec) -training >> step=4821200, episode=804 reward=0.7831172 (443.29 it/sec) -training >> step=4821300, episode=804 reward=0.7820421 (532.09 it/sec) -training >> step=4821400, episode=804 reward=0.7918827 (466.93 it/sec) -training >> step=4821500, episode=804 reward=0.7685364 (473.45 it/sec) -training >> step=4821600, episode=804 reward=0.7634256 (489.86 it/sec) -training >> step=4821700, episode=804 reward=0.7829794 (394.62 it/sec) -training >> step=4821800, episode=804 reward=0.7909882 (466.08 it/sec) -training >> step=4821900, episode=804 reward=0.7611594 (452.42 it/sec) -training >> step=4822000, episode=804 reward=0.7795954 (443.98 it/sec) -training >> step=4822100, episode=804 reward=0.7946581 (447.34 it/sec) -training >> step=4822200, episode=804 reward=0.782481 (415.33 it/sec) -training >> step=4822300, episode=804 reward=0.7584889 (449.18 it/sec) -training >> step=4822400, episode=804 reward=0.7740458 (459.22 it/sec) -training >> step=4822500, episode=804 reward=0.8025804 (459.92 it/sec) -training >> step=4822600, episode=804 reward=0.7981961 (464.00 it/sec) -training >> step=4822700, episode=804 reward=0.7717934 (495.48 it/sec) -training >> step=4822800, episode=804 reward=0.7470253 (503.92 it/sec) -training >> step=4822900, episode=804 reward=0.7780575 (433.87 it/sec) -training >> step=4823000, episode=804 reward=0.7663452 (494.45 it/sec) -training >> step=4823100, episode=804 reward=0.7859558 (515.84 it/sec) -training >> step=4823200, episode=804 reward=0.7758984 (502.26 it/sec) -training >> step=4823300, episode=805 reward=0.7807764 (75.71 it/sec) -training >> step=4823400, episode=805 reward=0.7836218 (520.51 it/sec) -training >> step=4823500, episode=805 reward=0.7654004 (488.35 it/sec) -training >> step=4823600, episode=805 reward=0.7578082 (496.96 it/sec) -training >> step=4823700, episode=805 reward=0.7787427 (531.72 it/sec) -training >> step=4823800, episode=805 reward=0.7934703 (536.10 it/sec) -training >> step=4823900, episode=805 reward=0.7699305 (509.48 it/sec) -training >> step=4824000, episode=805 reward=0.7977943 (556.45 it/sec) -training >> step=4824100, episode=805 reward=0.7857124 (491.51 it/sec) -training >> step=4824200, episode=805 reward=0.7847766 (512.19 it/sec) -training >> step=4824300, episode=805 reward=0.7961337 (540.75 it/sec) -training >> step=4824400, episode=805 reward=0.7802638 (505.56 it/sec) -training >> step=4824500, episode=805 reward=0.7922921 (529.86 it/sec) -training >> step=4824600, episode=805 reward=0.779114 (545.52 it/sec) -training >> step=4824700, episode=805 reward=0.7848914 (510.51 it/sec) -training >> step=4824800, episode=805 reward=0.7694248 (522.38 it/sec) -training >> step=4824900, episode=805 reward=0.7773646 (522.99 it/sec) -training >> step=4825000, episode=805 reward=0.7711671 (530.34 it/sec) -training >> step=4825100, episode=805 reward=0.7906529 (525.21 it/sec) -training >> step=4825200, episode=805 reward=0.7708386 (467.53 it/sec) -training >> step=4825300, episode=805 reward=0.790593 (514.26 it/sec) -training >> step=4825400, episode=805 reward=0.7750867 (498.63 it/sec) -training >> step=4825500, episode=805 reward=0.7754849 (510.11 it/sec) -training >> step=4825600, episode=805 reward=0.7815058 (539.80 it/sec) -training >> step=4825700, episode=805 reward=0.7679 (531.35 it/sec) -training >> step=4825800, episode=805 reward=0.7778224 (534.22 it/sec) -training >> step=4825900, episode=805 reward=0.8034689 (472.74 it/sec) -training >> step=4826000, episode=805 reward=0.7828435 (505.60 it/sec) -training >> step=4826100, episode=805 reward=0.7564062 (491.55 it/sec) -training >> step=4826200, episode=805 reward=0.7863927 (462.61 it/sec) -training >> step=4826300, episode=805 reward=0.7781596 (509.38 it/sec) -training >> step=4826400, episode=805 reward=0.8025073 (537.67 it/sec) -training >> step=4826500, episode=805 reward=0.7885624 (527.31 it/sec) -training >> step=4826600, episode=805 reward=0.7913019 (470.69 it/sec) -training >> step=4826700, episode=805 reward=0.776939 (465.66 it/sec) -training >> step=4826800, episode=805 reward=0.7789831 (515.76 it/sec) -training >> step=4826900, episode=805 reward=0.7755204 (530.34 it/sec) -training >> step=4827000, episode=805 reward=0.7817686 (519.70 it/sec) -training >> step=4827100, episode=805 reward=0.7791166 (557.35 it/sec) -training >> step=4827200, episode=805 reward=0.783158 (514.21 it/sec) -training >> step=4827300, episode=805 reward=0.7771109 (495.01 it/sec) -training >> step=4827400, episode=805 reward=0.7603117 (518.19 it/sec) -training >> step=4827500, episode=805 reward=0.7811394 (519.71 it/sec) -training >> step=4827600, episode=805 reward=0.7807896 (536.55 it/sec) -training >> step=4827700, episode=805 reward=0.7659521 (522.07 it/sec) -training >> step=4827800, episode=805 reward=0.7989369 (518.49 it/sec) -training >> step=4827900, episode=805 reward=0.7891548 (521.92 it/sec) -training >> step=4828000, episode=805 reward=0.7569865 (519.71 it/sec) -training >> step=4828100, episode=805 reward=0.7927046 (510.64 it/sec) -training >> step=4828200, episode=805 reward=0.796214 (578.95 it/sec) -training >> step=4828300, episode=805 reward=0.7939212 (486.66 it/sec) -training >> step=4828400, episode=805 reward=0.7697136 (528.97 it/sec) -training >> step=4828500, episode=805 reward=0.7782108 (528.42 it/sec) -training >> step=4828600, episode=805 reward=0.7867531 (529.25 it/sec) -training >> step=4828700, episode=805 reward=0.777883 (508.79 it/sec) -training >> step=4828800, episode=805 reward=0.7830998 (496.08 it/sec) -training >> step=4828900, episode=805 reward=0.7717302 (526.42 it/sec) -training >> step=4829000, episode=805 reward=0.7712294 (543.55 it/sec) -training >> step=4829100, episode=805 reward=0.7736359 (503.57 it/sec) -training >> step=4829200, episode=805 reward=0.7612657 (529.67 it/sec) -training >> step=4829300, episode=806 reward=0.7862188 (141.34 it/sec) -training >> step=4829400, episode=806 reward=0.7813588 (469.39 it/sec) -training >> step=4829500, episode=806 reward=0.7675862 (532.92 it/sec) -training >> step=4829600, episode=806 reward=0.780042 (511.50 it/sec) -training >> step=4829700, episode=806 reward=0.7520069 (534.62 it/sec) -training >> step=4829800, episode=806 reward=0.7502329 (515.48 it/sec) -training >> step=4829900, episode=806 reward=0.796917 (449.54 it/sec) -training >> step=4830000, episode=806 reward=0.7642584 (490.10 it/sec) -training >> step=4830100, episode=806 reward=0.7616826 (505.81 it/sec) -training >> step=4830200, episode=806 reward=0.7699254 (508.53 it/sec) -training >> step=4830300, episode=806 reward=0.7781726 (524.72 it/sec) -training >> step=4830400, episode=806 reward=0.7902013 (541.40 it/sec) -training >> step=4830500, episode=806 reward=0.7813637 (496.89 it/sec) -training >> step=4830600, episode=806 reward=0.7703865 (514.95 it/sec) -training >> step=4830700, episode=806 reward=0.7687894 (512.87 it/sec) -training >> step=4830800, episode=806 reward=0.7989807 (526.76 it/sec) -training >> step=4830900, episode=806 reward=0.7635057 (524.03 it/sec) -training >> step=4831000, episode=806 reward=0.7550485 (464.80 it/sec) -training >> step=4831100, episode=806 reward=0.7897224 (516.05 it/sec) -training >> step=4831200, episode=806 reward=0.784091 (507.35 it/sec) -training >> step=4831300, episode=806 reward=0.781068 (519.55 it/sec) -training >> step=4831400, episode=806 reward=0.7837084 (471.33 it/sec) -training >> step=4831500, episode=806 reward=0.789732 (452.72 it/sec) -training >> step=4831600, episode=806 reward=0.7774563 (438.81 it/sec) -training >> step=4831700, episode=806 reward=0.7687514 (472.15 it/sec) -training >> step=4831800, episode=806 reward=0.7538906 (519.24 it/sec) -training >> step=4831900, episode=806 reward=0.7776955 (401.98 it/sec) -training >> step=4832000, episode=806 reward=0.7739454 (472.12 it/sec) -training >> step=4832100, episode=806 reward=0.785718 (485.43 it/sec) -training >> step=4832200, episode=806 reward=0.7685614 (521.63 it/sec) -training >> step=4832300, episode=806 reward=0.7747893 (465.73 it/sec) -training >> step=4832400, episode=806 reward=0.7896554 (430.59 it/sec) -training >> step=4832500, episode=806 reward=0.7919043 (528.86 it/sec) -training >> step=4832600, episode=806 reward=0.77612 (519.64 it/sec) -training >> step=4832700, episode=806 reward=0.7799931 (493.12 it/sec) -training >> step=4832800, episode=806 reward=0.7648849 (517.19 it/sec) -training >> step=4832900, episode=806 reward=0.786776 (489.99 it/sec) -training >> step=4833000, episode=806 reward=0.7736101 (429.41 it/sec) -training >> step=4833100, episode=806 reward=0.759928 (445.67 it/sec) -training >> step=4833200, episode=806 reward=0.7722524 (451.39 it/sec) -training >> step=4833300, episode=806 reward=0.7824017 (501.67 it/sec) -training >> step=4833400, episode=806 reward=0.7829722 (502.32 it/sec) -training >> step=4833500, episode=806 reward=0.798819 (513.52 it/sec) -training >> step=4833600, episode=806 reward=0.7807988 (529.77 it/sec) -training >> step=4833700, episode=806 reward=0.7806503 (522.35 it/sec) -training >> step=4833800, episode=806 reward=0.7802359 (532.05 it/sec) -training >> step=4833900, episode=806 reward=0.7625345 (529.98 it/sec) -training >> step=4834000, episode=806 reward=0.7626864 (530.57 it/sec) -training >> step=4834100, episode=806 reward=0.7739369 (526.54 it/sec) -training >> step=4834200, episode=806 reward=0.769354 (535.71 it/sec) -training >> step=4834300, episode=806 reward=0.7834423 (517.38 it/sec) -training >> step=4834400, episode=806 reward=0.7652375 (445.45 it/sec) -training >> step=4834500, episode=806 reward=0.7682763 (322.55 it/sec) -training >> step=4834600, episode=806 reward=0.7715021 (396.90 it/sec) -training >> step=4834700, episode=806 reward=0.7781076 (457.97 it/sec) -training >> step=4834800, episode=806 reward=0.7770993 (492.41 it/sec) -training >> step=4834900, episode=806 reward=0.7778594 (433.46 it/sec) -training >> step=4835000, episode=806 reward=0.7792475 (463.71 it/sec) -training >> step=4835100, episode=806 reward=0.7968106 (525.07 it/sec) -training >> step=4835200, episode=806 reward=0.7699287 (475.35 it/sec) -training >> step=4835300, episode=807 reward=0.7751843 (98.65 it/sec) -training >> step=4835400, episode=807 reward=0.7709671 (470.97 it/sec) -training >> step=4835500, episode=807 reward=0.7836751 (470.69 it/sec) -training >> step=4835600, episode=807 reward=0.765913 (480.45 it/sec) -training >> step=4835700, episode=807 reward=0.7614121 (499.41 it/sec) -training >> step=4835800, episode=807 reward=0.7738202 (477.09 it/sec) -training >> step=4835900, episode=807 reward=0.7782746 (504.52 it/sec) -training >> step=4836000, episode=807 reward=0.7626845 (446.94 it/sec) -training >> step=4836100, episode=807 reward=0.7650317 (485.42 it/sec) -training >> step=4836200, episode=807 reward=0.7882208 (499.67 it/sec) -training >> step=4836300, episode=807 reward=0.7964453 (487.97 it/sec) -training >> step=4836400, episode=807 reward=0.8075343 (476.29 it/sec) -training >> step=4836500, episode=807 reward=0.7749422 (453.99 it/sec) -training >> step=4836600, episode=807 reward=0.7837665 (494.60 it/sec) -training >> step=4836700, episode=807 reward=0.7768908 (492.96 it/sec) -training >> step=4836800, episode=807 reward=0.7727116 (445.28 it/sec) -training >> step=4836900, episode=807 reward=0.7816815 (464.44 it/sec) -training >> step=4837000, episode=807 reward=0.7614236 (514.02 it/sec) -training >> step=4837100, episode=807 reward=0.7473604 (478.88 it/sec) -training >> step=4837200, episode=807 reward=0.784473 (502.84 it/sec) -training >> step=4837300, episode=807 reward=0.7956468 (482.56 it/sec) -training >> step=4837400, episode=807 reward=0.7777095 (481.44 it/sec) -training >> step=4837500, episode=807 reward=0.7701483 (471.78 it/sec) -training >> step=4837600, episode=807 reward=0.7736972 (494.21 it/sec) -training >> step=4837700, episode=807 reward=0.770491 (453.36 it/sec) -training >> step=4837800, episode=807 reward=0.7850345 (509.90 it/sec) -training >> step=4837900, episode=807 reward=0.7801975 (437.22 it/sec) -training >> step=4838000, episode=807 reward=0.7821899 (465.58 it/sec) -training >> step=4838100, episode=807 reward=0.7755684 (443.00 it/sec) -training >> step=4838200, episode=807 reward=0.7790291 (449.00 it/sec) -training >> step=4838300, episode=807 reward=0.80308 (503.78 it/sec) -training >> step=4838400, episode=807 reward=0.7908768 (450.35 it/sec) -training >> step=4838500, episode=807 reward=0.7901285 (505.35 it/sec) -training >> step=4838600, episode=807 reward=0.7775406 (508.51 it/sec) -training >> step=4838700, episode=807 reward=0.7818902 (499.23 it/sec) -training >> step=4838800, episode=807 reward=0.7965106 (496.10 it/sec) -training >> step=4838900, episode=807 reward=0.7631405 (509.47 it/sec) -training >> step=4839000, episode=807 reward=0.7975511 (481.50 it/sec) -training >> step=4839100, episode=807 reward=0.7640655 (497.11 it/sec) -training >> step=4839200, episode=807 reward=0.7757947 (475.35 it/sec) -training >> step=4839300, episode=807 reward=0.7796028 (526.81 it/sec) -training >> step=4839400, episode=807 reward=0.7563294 (476.28 it/sec) -training >> step=4839500, episode=807 reward=0.7777403 (498.82 it/sec) -training >> step=4839600, episode=807 reward=0.8014554 (513.08 it/sec) -training >> step=4839700, episode=807 reward=0.7734776 (476.87 it/sec) -training >> step=4839800, episode=807 reward=0.79557 (520.38 it/sec) -training >> step=4839900, episode=807 reward=0.8051331 (470.30 it/sec) -training >> step=4840000, episode=807 reward=0.772672 (517.00 it/sec) -training >> step=4840100, episode=807 reward=0.8082616 (501.52 it/sec) -training >> step=4840200, episode=807 reward=0.7819028 (485.51 it/sec) -training >> step=4840300, episode=807 reward=0.7741075 (509.72 it/sec) -training >> step=4840400, episode=807 reward=0.7776893 (471.44 it/sec) -training >> step=4840500, episode=807 reward=0.7699383 (497.74 it/sec) -training >> step=4840600, episode=807 reward=0.766744 (488.09 it/sec) -training >> step=4840700, episode=807 reward=0.7827233 (489.81 it/sec) -training >> step=4840800, episode=807 reward=0.7822642 (467.93 it/sec) -training >> step=4840900, episode=807 reward=0.785848 (430.50 it/sec) -training >> step=4841000, episode=807 reward=0.7937594 (439.85 it/sec) -training >> step=4841100, episode=807 reward=0.764378 (471.92 it/sec) -training >> step=4841200, episode=807 reward=0.7666975 (451.69 it/sec) -training >> step=4841300, episode=808 reward=0.7544161 (89.04 it/sec) -training >> step=4841400, episode=808 reward=0.7577751 (451.71 it/sec) -training >> step=4841500, episode=808 reward=0.7544864 (422.62 it/sec) -training >> step=4841600, episode=808 reward=0.7726869 (461.19 it/sec) -training >> step=4841700, episode=808 reward=0.7723173 (460.94 it/sec) -training >> step=4841800, episode=808 reward=0.7839865 (502.48 it/sec) -training >> step=4841900, episode=808 reward=0.7828243 (447.50 it/sec) -training >> step=4842000, episode=808 reward=0.7867697 (460.97 it/sec) -training >> step=4842100, episode=808 reward=0.7984124 (478.12 it/sec) -training >> step=4842200, episode=808 reward=0.7708826 (407.21 it/sec) -training >> step=4842300, episode=808 reward=0.7882814 (483.14 it/sec) -training >> step=4842400, episode=808 reward=0.7860342 (442.92 it/sec) -training >> step=4842500, episode=808 reward=0.7836626 (444.72 it/sec) -training >> step=4842600, episode=808 reward=0.7769958 (461.24 it/sec) -training >> step=4842700, episode=808 reward=0.7547331 (489.20 it/sec) -training >> step=4842800, episode=808 reward=0.7960047 (484.62 it/sec) -training >> step=4842900, episode=808 reward=0.7760471 (479.69 it/sec) -training >> step=4843000, episode=808 reward=0.7965379 (503.64 it/sec) -training >> step=4843100, episode=808 reward=0.7722512 (465.58 it/sec) -training >> step=4843200, episode=808 reward=0.7881468 (503.51 it/sec) -training >> step=4843300, episode=808 reward=0.785154 (497.01 it/sec) -training >> step=4843400, episode=808 reward=0.7796035 (516.54 it/sec) -training >> step=4843500, episode=808 reward=0.775089 (531.10 it/sec) -training >> step=4843600, episode=808 reward=0.778524 (508.43 it/sec) -training >> step=4843700, episode=808 reward=0.78427 (475.90 it/sec) -training >> step=4843800, episode=808 reward=0.7802173 (560.19 it/sec) -training >> step=4843900, episode=808 reward=0.7647861 (488.96 it/sec) -training >> step=4844000, episode=808 reward=0.7842767 (506.07 it/sec) -training >> step=4844100, episode=808 reward=0.7907405 (512.24 it/sec) -training >> step=4844200, episode=808 reward=0.7983868 (512.51 it/sec) -training >> step=4844300, episode=808 reward=0.7879738 (470.22 it/sec) -training >> step=4844400, episode=808 reward=0.79057 (482.09 it/sec) -training >> step=4844500, episode=808 reward=0.8024809 (517.88 it/sec) -training >> step=4844600, episode=808 reward=0.7878208 (527.69 it/sec) -training >> step=4844700, episode=808 reward=0.7722566 (500.26 it/sec) -training >> step=4844800, episode=808 reward=0.7659557 (433.92 it/sec) -training >> step=4844900, episode=808 reward=0.7754587 (463.07 it/sec) -training >> step=4845000, episode=808 reward=0.7710575 (456.97 it/sec) -training >> step=4845100, episode=808 reward=0.7828828 (479.38 it/sec) -training >> step=4845200, episode=808 reward=0.8080356 (406.33 it/sec) -training >> step=4845300, episode=808 reward=0.7632779 (466.09 it/sec) -training >> step=4845400, episode=808 reward=0.7694834 (421.60 it/sec) -training >> step=4845500, episode=808 reward=0.7850807 (456.94 it/sec) -training >> step=4845600, episode=808 reward=0.7753499 (457.01 it/sec) -training >> step=4845700, episode=808 reward=0.797258 (556.09 it/sec) -training >> step=4845800, episode=808 reward=0.7739503 (523.37 it/sec) -training >> step=4845900, episode=808 reward=0.7740372 (494.45 it/sec) -training >> step=4846000, episode=808 reward=0.7748768 (472.14 it/sec) -training >> step=4846100, episode=808 reward=0.8006519 (448.51 it/sec) -training >> step=4846200, episode=808 reward=0.7820828 (494.89 it/sec) -training >> step=4846300, episode=808 reward=0.7787019 (425.19 it/sec) -training >> step=4846400, episode=808 reward=0.7852562 (496.87 it/sec) -training >> step=4846500, episode=808 reward=0.7856551 (498.50 it/sec) -training >> step=4846600, episode=808 reward=0.7745165 (493.69 it/sec) -training >> step=4846700, episode=808 reward=0.7811206 (506.87 it/sec) -training >> step=4846800, episode=808 reward=0.7861677 (539.23 it/sec) -training >> step=4846900, episode=808 reward=0.7666761 (473.41 it/sec) -training >> step=4847000, episode=808 reward=0.783639 (516.23 it/sec) -training >> step=4847100, episode=808 reward=0.7835753 (518.66 it/sec) -training >> step=4847200, episode=808 reward=0.7854792 (488.12 it/sec) -training >> step=4847300, episode=809 reward=0.7714605 (93.07 it/sec) -training >> step=4847400, episode=809 reward=0.7632896 (487.96 it/sec) -training >> step=4847500, episode=809 reward=0.7563765 (532.65 it/sec) -training >> step=4847600, episode=809 reward=0.7653961 (476.89 it/sec) -training >> step=4847700, episode=809 reward=0.791692 (386.33 it/sec) -training >> step=4847800, episode=809 reward=0.7839043 (411.79 it/sec) -training >> step=4847900, episode=809 reward=0.7676525 (464.92 it/sec) -training >> step=4848000, episode=809 reward=0.776902 (493.84 it/sec) -training >> step=4848100, episode=809 reward=0.7900007 (508.10 it/sec) -training >> step=4848200, episode=809 reward=0.789347 (529.15 it/sec) -training >> step=4848300, episode=809 reward=0.7677807 (518.62 it/sec) -training >> step=4848400, episode=809 reward=0.7681044 (528.24 it/sec) -training >> step=4848500, episode=809 reward=0.7946828 (494.47 it/sec) -training >> step=4848600, episode=809 reward=0.7711133 (556.88 it/sec) -training >> step=4848700, episode=809 reward=0.7887373 (496.90 it/sec) -training >> step=4848800, episode=809 reward=0.7791417 (477.43 it/sec) -training >> step=4848900, episode=809 reward=0.7794436 (476.99 it/sec) -training >> step=4849000, episode=809 reward=0.7955503 (519.88 it/sec) -training >> step=4849100, episode=809 reward=0.7691458 (481.21 it/sec) -training >> step=4849200, episode=809 reward=0.7842947 (486.78 it/sec) -training >> step=4849300, episode=809 reward=0.7765191 (498.14 it/sec) -training >> step=4849400, episode=809 reward=0.7879914 (521.29 it/sec) -training >> step=4849500, episode=809 reward=0.7867359 (478.00 it/sec) -training >> step=4849600, episode=809 reward=0.7744206 (481.25 it/sec) -training >> step=4849700, episode=809 reward=0.7576205 (472.67 it/sec) -training >> step=4849800, episode=809 reward=0.786761 (459.57 it/sec) -training >> step=4849900, episode=809 reward=0.7927448 (472.65 it/sec) -training >> step=4850000, episode=809 reward=0.7875595 (480.49 it/sec) -training >> step=4850100, episode=809 reward=0.7763871 (521.30 it/sec) -training >> step=4850200, episode=809 reward=0.7717704 (501.27 it/sec) -training >> step=4850300, episode=809 reward=0.7743312 (514.04 it/sec) -training >> step=4850400, episode=809 reward=0.7602172 (505.03 it/sec) -training >> step=4850500, episode=809 reward=0.7804119 (488.85 it/sec) -training >> step=4850600, episode=809 reward=0.77591 (480.55 it/sec) -training >> step=4850700, episode=809 reward=0.7705185 (497.29 it/sec) -training >> step=4850800, episode=809 reward=0.7963411 (490.09 it/sec) -training >> step=4850900, episode=809 reward=0.7580198 (521.53 it/sec) -training >> step=4851000, episode=809 reward=0.7608372 (510.51 it/sec) -training >> step=4851100, episode=809 reward=0.7667004 (507.40 it/sec) -training >> step=4851200, episode=809 reward=0.7738268 (512.41 it/sec) -training >> step=4851300, episode=809 reward=0.7899456 (468.30 it/sec) -training >> step=4851400, episode=809 reward=0.7927347 (490.39 it/sec) -training >> step=4851500, episode=809 reward=0.7630484 (458.33 it/sec) -training >> step=4851600, episode=809 reward=0.7707008 (474.60 it/sec) -training >> step=4851700, episode=809 reward=0.7845728 (482.24 it/sec) -training >> step=4851800, episode=809 reward=0.7905086 (536.67 it/sec) -training >> step=4851900, episode=809 reward=0.7678434 (437.60 it/sec) -training >> step=4852000, episode=809 reward=0.7663766 (507.68 it/sec) -training >> step=4852100, episode=809 reward=0.777483 (492.90 it/sec) -training >> step=4852200, episode=809 reward=0.7899636 (517.29 it/sec) -training >> step=4852300, episode=809 reward=0.7724667 (463.78 it/sec) -training >> step=4852400, episode=809 reward=0.7899854 (459.25 it/sec) -training >> step=4852500, episode=809 reward=0.8039517 (494.62 it/sec) -training >> step=4852600, episode=809 reward=0.7726752 (496.69 it/sec) -training >> step=4852700, episode=809 reward=0.7885907 (440.15 it/sec) -training >> step=4852800, episode=809 reward=0.803623 (499.75 it/sec) -training >> step=4852900, episode=809 reward=0.7813621 (499.14 it/sec) -training >> step=4853000, episode=809 reward=0.7941251 (493.93 it/sec) -training >> step=4853100, episode=809 reward=0.7740738 (472.33 it/sec) -training >> step=4853200, episode=809 reward=0.773359 (435.35 it/sec) -training >> step=4853300, episode=810 reward=0.7716855 (84.44 it/sec) -training >> step=4853400, episode=810 reward=0.7653643 (484.67 it/sec) -training >> step=4853500, episode=810 reward=0.7827842 (467.97 it/sec) -training >> step=4853600, episode=810 reward=0.7466924 (519.07 it/sec) -training >> step=4853700, episode=810 reward=0.7682143 (490.50 it/sec) -training >> step=4853800, episode=810 reward=0.7752183 (470.22 it/sec) -training >> step=4853900, episode=810 reward=0.7681997 (518.26 it/sec) -training >> step=4854000, episode=810 reward=0.7804195 (503.40 it/sec) -training >> step=4854100, episode=810 reward=0.7801695 (497.14 it/sec) -training >> step=4854200, episode=810 reward=0.7659579 (487.40 it/sec) -training >> step=4854300, episode=810 reward=0.7660652 (450.01 it/sec) -training >> step=4854400, episode=810 reward=0.7965863 (435.23 it/sec) -training >> step=4854500, episode=810 reward=0.7671966 (487.12 it/sec) -training >> step=4854600, episode=810 reward=0.7985319 (499.71 it/sec) -training >> step=4854700, episode=810 reward=0.7925605 (495.20 it/sec) -training >> step=4854800, episode=810 reward=0.7852087 (483.81 it/sec) -training >> step=4854900, episode=810 reward=0.7674911 (515.83 it/sec) -training >> step=4855000, episode=810 reward=0.7716384 (502.92 it/sec) -training >> step=4855100, episode=810 reward=0.7779803 (534.80 it/sec) -training >> step=4855200, episode=810 reward=0.7692103 (506.84 it/sec) -training >> step=4855300, episode=810 reward=0.7857091 (411.84 it/sec) -training >> step=4855400, episode=810 reward=0.7874765 (494.01 it/sec) -training >> step=4855500, episode=810 reward=0.7770544 (481.29 it/sec) -training >> step=4855600, episode=810 reward=0.7710927 (493.69 it/sec) -training >> step=4855700, episode=810 reward=0.7859774 (480.00 it/sec) -training >> step=4855800, episode=810 reward=0.7775339 (504.88 it/sec) -training >> step=4855900, episode=810 reward=0.7723105 (484.04 it/sec) -training >> step=4856000, episode=810 reward=0.7738251 (490.60 it/sec) -training >> step=4856100, episode=810 reward=0.7954558 (495.01 it/sec) -training >> step=4856200, episode=810 reward=0.7732611 (485.14 it/sec) -training >> step=4856300, episode=810 reward=0.7817839 (509.86 it/sec) -training >> step=4856400, episode=810 reward=0.7919487 (552.20 it/sec) -training >> step=4856500, episode=810 reward=0.7673115 (481.33 it/sec) -training >> step=4856600, episode=810 reward=0.7689472 (496.00 it/sec) -training >> step=4856700, episode=810 reward=0.7858422 (511.79 it/sec) -training >> step=4856800, episode=810 reward=0.7761771 (468.12 it/sec) -training >> step=4856900, episode=810 reward=0.7887293 (431.29 it/sec) -training >> step=4857000, episode=810 reward=0.7912967 (495.62 it/sec) -training >> step=4857100, episode=810 reward=0.7869074 (532.84 it/sec) -training >> step=4857200, episode=810 reward=0.7866607 (511.39 it/sec) -training >> step=4857300, episode=810 reward=0.7935392 (486.15 it/sec) -training >> step=4857400, episode=810 reward=0.751751 (451.15 it/sec) -training >> step=4857500, episode=810 reward=0.7566957 (469.23 it/sec) -training >> step=4857600, episode=810 reward=0.7884407 (493.22 it/sec) -training >> step=4857700, episode=810 reward=0.7696419 (473.73 it/sec) -training >> step=4857800, episode=810 reward=0.7858536 (480.98 it/sec) -training >> step=4857900, episode=810 reward=0.7809971 (491.00 it/sec) -training >> step=4858000, episode=810 reward=0.7678757 (417.89 it/sec) -training >> step=4858100, episode=810 reward=0.7815755 (469.17 it/sec) -training >> step=4858200, episode=810 reward=0.7766501 (521.57 it/sec) -training >> step=4858300, episode=810 reward=0.7833631 (463.79 it/sec) -training >> step=4858400, episode=810 reward=0.7829325 (509.87 it/sec) -training >> step=4858500, episode=810 reward=0.7768063 (490.96 it/sec) -training >> step=4858600, episode=810 reward=0.7849588 (475.42 it/sec) -training >> step=4858700, episode=810 reward=0.7874668 (486.63 it/sec) -training >> step=4858800, episode=810 reward=0.7745326 (506.81 it/sec) -training >> step=4858900, episode=810 reward=0.7664895 (510.20 it/sec) -training >> step=4859000, episode=810 reward=0.7662365 (470.74 it/sec) -training >> step=4859100, episode=810 reward=0.7700299 (413.05 it/sec) -training >> step=4859200, episode=810 reward=0.7802044 (467.76 it/sec) -training >> step=4859300, episode=811 reward=0.778638 (70.70 it/sec) -training >> step=4859400, episode=811 reward=0.7559239 (354.83 it/sec) -training >> step=4859500, episode=811 reward=0.7680297 (527.11 it/sec) -training >> step=4859600, episode=811 reward=0.7588922 (435.42 it/sec) -training >> step=4859700, episode=811 reward=0.7709458 (488.08 it/sec) -training >> step=4859800, episode=811 reward=0.7805392 (473.75 it/sec) -training >> step=4859900, episode=811 reward=0.7927427 (486.27 it/sec) -training >> step=4860000, episode=811 reward=0.7654339 (457.08 it/sec) -training >> step=4860100, episode=811 reward=0.7625655 (512.27 it/sec) -training >> step=4860200, episode=811 reward=0.7658443 (448.25 it/sec) -training >> step=4860300, episode=811 reward=0.7996552 (508.10 it/sec) -training >> step=4860400, episode=811 reward=0.7933525 (496.35 it/sec) -training >> step=4860500, episode=811 reward=0.7840433 (506.72 it/sec) -training >> step=4860600, episode=811 reward=0.7748676 (513.36 it/sec) -training >> step=4860700, episode=811 reward=0.8045769 (507.27 it/sec) -training >> step=4860800, episode=811 reward=0.7955796 (478.64 it/sec) -training >> step=4860900, episode=811 reward=0.77269 (493.80 it/sec) -training >> step=4861000, episode=811 reward=0.7718521 (556.22 it/sec) -training >> step=4861100, episode=811 reward=0.7580599 (485.50 it/sec) -training >> step=4861200, episode=811 reward=0.7903467 (463.27 it/sec) -training >> step=4861300, episode=811 reward=0.7755367 (488.30 it/sec) -training >> step=4861400, episode=811 reward=0.7760261 (464.24 it/sec) -training >> step=4861500, episode=811 reward=0.787612 (495.37 it/sec) -training >> step=4861600, episode=811 reward=0.7935031 (477.85 it/sec) -training >> step=4861700, episode=811 reward=0.7792872 (509.19 it/sec) -training >> step=4861800, episode=811 reward=0.7907092 (486.35 it/sec) -training >> step=4861900, episode=811 reward=0.7852368 (494.96 it/sec) -training >> step=4862000, episode=811 reward=0.7903336 (484.04 it/sec) -training >> step=4862100, episode=811 reward=0.7703655 (534.65 it/sec) -training >> step=4862200, episode=811 reward=0.7839302 (456.84 it/sec) -training >> step=4862300, episode=811 reward=0.7788528 (510.03 it/sec) -training >> step=4862400, episode=811 reward=0.7619558 (497.65 it/sec) -training >> step=4862500, episode=811 reward=0.7762238 (483.94 it/sec) -training >> step=4862600, episode=811 reward=0.8026012 (512.91 it/sec) -training >> step=4862700, episode=811 reward=0.7892615 (425.60 it/sec) -training >> step=4862800, episode=811 reward=0.7955753 (515.28 it/sec) -training >> step=4862900, episode=811 reward=0.7805956 (465.90 it/sec) -training >> step=4863000, episode=811 reward=0.7799909 (447.58 it/sec) -training >> step=4863100, episode=811 reward=0.7680633 (469.84 it/sec) -training >> step=4863200, episode=811 reward=0.7954294 (501.65 it/sec) -training >> step=4863300, episode=811 reward=0.7914219 (471.13 it/sec) -training >> step=4863400, episode=811 reward=0.7723403 (474.41 it/sec) -training >> step=4863500, episode=811 reward=0.7784507 (498.29 it/sec) -training >> step=4863600, episode=811 reward=0.7784529 (519.99 it/sec) -training >> step=4863700, episode=811 reward=0.771732 (483.14 it/sec) -training >> step=4863800, episode=811 reward=0.7687049 (493.22 it/sec) -training >> step=4863900, episode=811 reward=0.7854695 (505.24 it/sec) -training >> step=4864000, episode=811 reward=0.778772 (499.09 it/sec) -training >> step=4864100, episode=811 reward=0.7908205 (463.69 it/sec) -training >> step=4864200, episode=811 reward=0.7867554 (443.64 it/sec) -training >> step=4864300, episode=811 reward=0.7912452 (495.13 it/sec) -training >> step=4864400, episode=811 reward=0.7960779 (456.12 it/sec) -training >> step=4864500, episode=811 reward=0.7938307 (477.92 it/sec) -training >> step=4864600, episode=811 reward=0.7676803 (442.79 it/sec) -training >> step=4864700, episode=811 reward=0.7724423 (453.37 it/sec) -training >> step=4864800, episode=811 reward=0.7591525 (473.99 it/sec) -training >> step=4864900, episode=811 reward=0.7874224 (473.87 it/sec) -training >> step=4865000, episode=811 reward=0.7802569 (512.50 it/sec) -training >> step=4865100, episode=811 reward=0.7794027 (481.73 it/sec) -training >> step=4865200, episode=811 reward=0.7561527 (445.22 it/sec) -training >> step=4865300, episode=812 reward=0.7647606 (71.23 it/sec) -training >> step=4865400, episode=812 reward=0.7733924 (370.66 it/sec) -training >> step=4865500, episode=812 reward=0.7566744 (483.50 it/sec) -training >> step=4865600, episode=812 reward=0.7720522 (499.97 it/sec) -training >> step=4865700, episode=812 reward=0.7803063 (499.48 it/sec) -training >> step=4865800, episode=812 reward=0.7692392 (471.23 it/sec) -training >> step=4865900, episode=812 reward=0.7637225 (522.60 it/sec) -training >> step=4866000, episode=812 reward=0.7900512 (490.00 it/sec) -training >> step=4866100, episode=812 reward=0.7710174 (463.84 it/sec) -training >> step=4866200, episode=812 reward=0.7651978 (457.50 it/sec) -training >> step=4866300, episode=812 reward=0.786366 (438.19 it/sec) -training >> step=4866400, episode=812 reward=0.7826715 (511.81 it/sec) -training >> step=4866500, episode=812 reward=0.7884094 (498.92 it/sec) -training >> step=4866600, episode=812 reward=0.7790375 (460.66 it/sec) -training >> step=4866700, episode=812 reward=0.7867091 (495.36 it/sec) -training >> step=4866800, episode=812 reward=0.7800484 (504.79 it/sec) -training >> step=4866900, episode=812 reward=0.7729322 (484.82 it/sec) -training >> step=4867000, episode=812 reward=0.7683121 (440.83 it/sec) -training >> step=4867100, episode=812 reward=0.788354 (492.46 it/sec) -training >> step=4867200, episode=812 reward=0.7916011 (494.01 it/sec) -training >> step=4867300, episode=812 reward=0.7713369 (499.74 it/sec) -training >> step=4867400, episode=812 reward=0.7824351 (511.82 it/sec) -training >> step=4867500, episode=812 reward=0.7928209 (446.65 it/sec) -training >> step=4867600, episode=812 reward=0.7694399 (464.80 it/sec) -training >> step=4867700, episode=812 reward=0.7975981 (489.04 it/sec) -training >> step=4867800, episode=812 reward=0.7629017 (510.90 it/sec) -training >> step=4867900, episode=812 reward=0.7762236 (516.92 it/sec) -training >> step=4868000, episode=812 reward=0.7697755 (490.49 it/sec) -training >> step=4868100, episode=812 reward=0.7918926 (505.49 it/sec) -training >> step=4868200, episode=812 reward=0.7761441 (497.41 it/sec) -training >> step=4868300, episode=812 reward=0.7776823 (511.10 it/sec) -training >> step=4868400, episode=812 reward=0.7803822 (521.41 it/sec) -training >> step=4868500, episode=812 reward=0.7754704 (486.15 it/sec) -training >> step=4868600, episode=812 reward=0.7929631 (480.18 it/sec) -training >> step=4868700, episode=812 reward=0.785678 (473.86 it/sec) -training >> step=4868800, episode=812 reward=0.776198 (507.92 it/sec) -training >> step=4868900, episode=812 reward=0.7920787 (473.99 it/sec) -training >> step=4869000, episode=812 reward=0.7737354 (498.31 it/sec) -training >> step=4869100, episode=812 reward=0.7687621 (452.13 it/sec) -training >> step=4869200, episode=812 reward=0.7813194 (485.34 it/sec) -training >> step=4869300, episode=812 reward=0.789476 (485.39 it/sec) -training >> step=4869400, episode=812 reward=0.7683845 (514.46 it/sec) -training >> step=4869500, episode=812 reward=0.7605017 (476.81 it/sec) -training >> step=4869600, episode=812 reward=0.7876168 (472.64 it/sec) -training >> step=4869700, episode=812 reward=0.7673197 (466.04 it/sec) -training >> step=4869800, episode=812 reward=0.7849479 (504.11 it/sec) -training >> step=4869900, episode=812 reward=0.7537114 (449.09 it/sec) -training >> step=4870000, episode=812 reward=0.7798088 (489.28 it/sec) -training >> step=4870100, episode=812 reward=0.7733361 (444.90 it/sec) -training >> step=4870200, episode=812 reward=0.7752977 (449.88 it/sec) -training >> step=4870300, episode=812 reward=0.7733474 (491.14 it/sec) -training >> step=4870400, episode=812 reward=0.7826964 (462.18 it/sec) -training >> step=4870500, episode=812 reward=0.795231 (487.18 it/sec) -training >> step=4870600, episode=812 reward=0.7367377 (483.69 it/sec) -training >> step=4870700, episode=812 reward=0.7726094 (390.77 it/sec) -training >> step=4870800, episode=812 reward=0.7891572 (441.52 it/sec) -training >> step=4870900, episode=812 reward=0.7595037 (467.91 it/sec) -training >> step=4871000, episode=812 reward=0.7677686 (454.84 it/sec) -training >> step=4871100, episode=812 reward=0.7812436 (466.35 it/sec) -training >> step=4871200, episode=812 reward=0.7842355 (472.83 it/sec) -training >> step=4871300, episode=813 reward=0.7927896 (60.04 it/sec) -training >> step=4871400, episode=813 reward=0.7558274 (436.84 it/sec) -training >> step=4871500, episode=813 reward=0.7548381 (462.04 it/sec) -training >> step=4871600, episode=813 reward=0.7528038 (327.70 it/sec) -training >> step=4871700, episode=813 reward=0.7697623 (480.48 it/sec) -training >> step=4871800, episode=813 reward=0.7779534 (501.75 it/sec) -training >> step=4871900, episode=813 reward=0.7928576 (482.26 it/sec) -training >> step=4872000, episode=813 reward=0.7910767 (455.55 it/sec) -training >> step=4872100, episode=813 reward=0.78563 (472.58 it/sec) -training >> step=4872200, episode=813 reward=0.7726644 (481.04 it/sec) -training >> step=4872300, episode=813 reward=0.7881364 (467.24 it/sec) -training >> step=4872400, episode=813 reward=0.7906098 (474.13 it/sec) -training >> step=4872500, episode=813 reward=0.748894 (490.43 it/sec) -training >> step=4872600, episode=813 reward=0.7916642 (433.48 it/sec) -training >> step=4872700, episode=813 reward=0.7534765 (433.18 it/sec) -training >> step=4872800, episode=813 reward=0.7901273 (461.76 it/sec) -training >> step=4872900, episode=813 reward=0.7648374 (521.23 it/sec) -training >> step=4873000, episode=813 reward=0.7895116 (457.63 it/sec) -training >> step=4873100, episode=813 reward=0.7828271 (475.86 it/sec) -training >> step=4873200, episode=813 reward=0.7911589 (486.96 it/sec) -training >> step=4873300, episode=813 reward=0.7903394 (460.80 it/sec) -training >> step=4873400, episode=813 reward=0.7825424 (469.15 it/sec) -training >> step=4873500, episode=813 reward=0.7803466 (450.51 it/sec) -training >> step=4873600, episode=813 reward=0.7911642 (475.93 it/sec) -training >> step=4873700, episode=813 reward=0.7584463 (488.38 it/sec) -training >> step=4873800, episode=813 reward=0.7886763 (498.04 it/sec) -training >> step=4873900, episode=813 reward=0.7791476 (469.19 it/sec) -training >> step=4874000, episode=813 reward=0.7773558 (531.26 it/sec) -training >> step=4874100, episode=813 reward=0.7910269 (482.85 it/sec) -training >> step=4874200, episode=813 reward=0.7724704 (471.39 it/sec) -training >> step=4874300, episode=813 reward=0.7818877 (473.74 it/sec) -training >> step=4874400, episode=813 reward=0.7460988 (509.67 it/sec) -training >> step=4874500, episode=813 reward=0.7697525 (502.27 it/sec) -training >> step=4874600, episode=813 reward=0.7782046 (423.51 it/sec) -training >> step=4874700, episode=813 reward=0.7923618 (518.39 it/sec) -training >> step=4874800, episode=813 reward=0.7724816 (476.10 it/sec) -training >> step=4874900, episode=813 reward=0.7694739 (498.65 it/sec) -training >> step=4875000, episode=813 reward=0.7705874 (519.35 it/sec) -training >> step=4875100, episode=813 reward=0.7684486 (507.06 it/sec) -training >> step=4875200, episode=813 reward=0.7808465 (496.69 it/sec) -training >> step=4875300, episode=813 reward=0.7772514 (499.90 it/sec) -training >> step=4875400, episode=813 reward=0.7637319 (509.72 it/sec) -training >> step=4875500, episode=813 reward=0.7752174 (504.45 it/sec) -training >> step=4875600, episode=813 reward=0.7851247 (516.24 it/sec) -training >> step=4875700, episode=813 reward=0.7555935 (449.42 it/sec) -training >> step=4875800, episode=813 reward=0.7841769 (538.35 it/sec) -training >> step=4875900, episode=813 reward=0.7518326 (497.30 it/sec) -training >> step=4876000, episode=813 reward=0.7866979 (521.83 it/sec) -training >> step=4876100, episode=813 reward=0.783212 (475.42 it/sec) -training >> step=4876200, episode=813 reward=0.758611 (491.49 it/sec) -training >> step=4876300, episode=813 reward=0.7751669 (494.78 it/sec) -training >> step=4876400, episode=813 reward=0.7890065 (463.55 it/sec) -training >> step=4876500, episode=813 reward=0.7625509 (509.92 it/sec) -training >> step=4876600, episode=813 reward=0.736245 (481.88 it/sec) -training >> step=4876700, episode=813 reward=0.7708867 (493.40 it/sec) -training >> step=4876800, episode=813 reward=0.7735274 (488.72 it/sec) -training >> step=4876900, episode=813 reward=0.7549163 (531.71 it/sec) -training >> step=4877000, episode=813 reward=0.7797389 (507.19 it/sec) -training >> step=4877100, episode=813 reward=0.7921935 (496.20 it/sec) -training >> step=4877200, episode=813 reward=0.773249 (498.52 it/sec) -training >> step=4877300, episode=814 reward=0.8001465 (75.88 it/sec) -training >> step=4877400, episode=814 reward=0.7655176 (491.34 it/sec) -training >> step=4877500, episode=814 reward=0.7815546 (476.67 it/sec) -training >> step=4877600, episode=814 reward=0.7758042 (492.63 it/sec) -training >> step=4877700, episode=814 reward=0.7936815 (444.44 it/sec) -training >> step=4877800, episode=814 reward=0.7859643 (382.87 it/sec) -training >> step=4877900, episode=814 reward=0.7987722 (476.30 it/sec) -training >> step=4878000, episode=814 reward=0.7771929 (513.79 it/sec) -training >> step=4878100, episode=814 reward=0.7827372 (508.78 it/sec) -training >> step=4878200, episode=814 reward=0.7650536 (486.78 it/sec) -training >> step=4878300, episode=814 reward=0.79864 (507.02 it/sec) -training >> step=4878400, episode=814 reward=0.7670429 (492.20 it/sec) -training >> step=4878500, episode=814 reward=0.7890105 (521.90 it/sec) -training >> step=4878600, episode=814 reward=0.7914875 (522.34 it/sec) -training >> step=4878700, episode=814 reward=0.7828709 (491.92 it/sec) -training >> step=4878800, episode=814 reward=0.7981609 (523.32 it/sec) -training >> step=4878900, episode=814 reward=0.7838479 (522.88 it/sec) -training >> step=4879000, episode=814 reward=0.7742863 (490.24 it/sec) -training >> step=4879100, episode=814 reward=0.7702441 (499.54 it/sec) -training >> step=4879200, episode=814 reward=0.7672362 (505.52 it/sec) -training >> step=4879300, episode=814 reward=0.77852 (496.33 it/sec) -training >> step=4879400, episode=814 reward=0.794256 (500.61 it/sec) -training >> step=4879500, episode=814 reward=0.7627777 (511.26 it/sec) -training >> step=4879600, episode=814 reward=0.7852586 (508.48 it/sec) -training >> step=4879700, episode=814 reward=0.7885182 (467.90 it/sec) -training >> step=4879800, episode=814 reward=0.7868411 (468.83 it/sec) -training >> step=4879900, episode=814 reward=0.7991635 (548.89 it/sec) -training >> step=4880000, episode=814 reward=0.7845867 (506.07 it/sec) -training >> step=4880100, episode=814 reward=0.7874332 (501.59 it/sec) -training >> step=4880200, episode=814 reward=0.7650422 (507.09 it/sec) -training >> step=4880300, episode=814 reward=0.8043092 (472.15 it/sec) -training >> step=4880400, episode=814 reward=0.7946752 (519.56 it/sec) -training >> step=4880500, episode=814 reward=0.7620763 (481.15 it/sec) -training >> step=4880600, episode=814 reward=0.7693849 (511.22 it/sec) -training >> step=4880700, episode=814 reward=0.7659662 (499.87 it/sec) -training >> step=4880800, episode=814 reward=0.7653186 (506.63 it/sec) -training >> step=4880900, episode=814 reward=0.7838799 (463.14 it/sec) -training >> step=4881000, episode=814 reward=0.7847546 (489.62 it/sec) -training >> step=4881100, episode=814 reward=0.7857064 (496.01 it/sec) -training >> step=4881200, episode=814 reward=0.7557675 (497.75 it/sec) -training >> step=4881300, episode=814 reward=0.7710526 (514.99 it/sec) -training >> step=4881400, episode=814 reward=0.7600103 (548.54 it/sec) -training >> step=4881500, episode=814 reward=0.7976369 (491.33 it/sec) -training >> step=4881600, episode=814 reward=0.7660838 (489.70 it/sec) -training >> step=4881700, episode=814 reward=0.7963123 (530.92 it/sec) -training >> step=4881800, episode=814 reward=0.7713509 (473.35 it/sec) -training >> step=4881900, episode=814 reward=0.77511 (505.70 it/sec) -training >> step=4882000, episode=814 reward=0.754187 (509.48 it/sec) -training >> step=4882100, episode=814 reward=0.7870849 (474.36 it/sec) -training >> step=4882200, episode=814 reward=0.756 (481.19 it/sec) -training >> step=4882300, episode=814 reward=0.7778476 (451.48 it/sec) -training >> step=4882400, episode=814 reward=0.7620118 (504.95 it/sec) -training >> step=4882500, episode=814 reward=0.8019637 (516.39 it/sec) -training >> step=4882600, episode=814 reward=0.778458 (474.07 it/sec) -training >> step=4882700, episode=814 reward=0.7646143 (484.93 it/sec) -training >> step=4882800, episode=814 reward=0.7862424 (487.72 it/sec) -training >> step=4882900, episode=814 reward=0.7854366 (513.38 it/sec) -training >> step=4883000, episode=814 reward=0.7650802 (523.65 it/sec) -training >> step=4883100, episode=814 reward=0.7681782 (488.49 it/sec) -training >> step=4883200, episode=814 reward=0.7532589 (544.17 it/sec) -training >> step=4883300, episode=815 reward=0.776888 (91.40 it/sec) -training >> step=4883400, episode=815 reward=0.7694924 (497.23 it/sec) -training >> step=4883500, episode=815 reward=0.7739074 (517.39 it/sec) -training >> step=4883600, episode=815 reward=0.7981135 (488.27 it/sec) -training >> step=4883700, episode=815 reward=0.7924237 (514.11 it/sec) -training >> step=4883800, episode=815 reward=0.7744515 (487.78 it/sec) -training >> step=4883900, episode=815 reward=0.7772272 (468.35 it/sec) -training >> step=4884000, episode=815 reward=0.7932839 (334.43 it/sec) -training >> step=4884100, episode=815 reward=0.7813652 (479.25 it/sec) -training >> step=4884200, episode=815 reward=0.7836486 (502.86 it/sec) -training >> step=4884300, episode=815 reward=0.7804902 (471.34 it/sec) -training >> step=4884400, episode=815 reward=0.7713177 (498.58 it/sec) -training >> step=4884500, episode=815 reward=0.7725381 (512.23 it/sec) -training >> step=4884600, episode=815 reward=0.7822651 (458.70 it/sec) -training >> step=4884700, episode=815 reward=0.7821951 (514.80 it/sec) -training >> step=4884800, episode=815 reward=0.7973897 (483.55 it/sec) -training >> step=4884900, episode=815 reward=0.7659245 (529.83 it/sec) -training >> step=4885000, episode=815 reward=0.792858 (421.26 it/sec) -training >> step=4885100, episode=815 reward=0.7929317 (510.72 it/sec) -training >> step=4885200, episode=815 reward=0.7724616 (492.90 it/sec) -training >> step=4885300, episode=815 reward=0.7744744 (438.05 it/sec) -training >> step=4885400, episode=815 reward=0.7661632 (468.92 it/sec) -training >> step=4885500, episode=815 reward=0.7817081 (529.00 it/sec) -training >> step=4885600, episode=815 reward=0.7794294 (511.96 it/sec) -training >> step=4885700, episode=815 reward=0.7840045 (482.97 it/sec) -training >> step=4885800, episode=815 reward=0.797415 (485.25 it/sec) -training >> step=4885900, episode=815 reward=0.7727843 (501.77 it/sec) -training >> step=4886000, episode=815 reward=0.7965464 (530.88 it/sec) -training >> step=4886100, episode=815 reward=0.7686125 (501.34 it/sec) -training >> step=4886200, episode=815 reward=0.7890623 (544.86 it/sec) -training >> step=4886300, episode=815 reward=0.7778533 (516.22 it/sec) -training >> step=4886400, episode=815 reward=0.7569597 (481.35 it/sec) -training >> step=4886500, episode=815 reward=0.7678966 (495.86 it/sec) -training >> step=4886600, episode=815 reward=0.7379491 (488.82 it/sec) -training >> step=4886700, episode=815 reward=0.7774165 (484.47 it/sec) -training >> step=4886800, episode=815 reward=0.7838934 (499.25 it/sec) -training >> step=4886900, episode=815 reward=0.8002796 (493.83 it/sec) -training >> step=4887000, episode=815 reward=0.7543621 (433.54 it/sec) -training >> step=4887100, episode=815 reward=0.7895666 (456.04 it/sec) -training >> step=4887200, episode=815 reward=0.7653214 (488.17 it/sec) -training >> step=4887300, episode=815 reward=0.7677884 (519.78 it/sec) -training >> step=4887400, episode=815 reward=0.7772317 (469.31 it/sec) -training >> step=4887500, episode=815 reward=0.7733466 (445.13 it/sec) -training >> step=4887600, episode=815 reward=0.7818134 (517.59 it/sec) -training >> step=4887700, episode=815 reward=0.7790778 (466.57 it/sec) -training >> step=4887800, episode=815 reward=0.7637023 (459.11 it/sec) -training >> step=4887900, episode=815 reward=0.7887647 (488.43 it/sec) -training >> step=4888000, episode=815 reward=0.8032535 (535.44 it/sec) -training >> step=4888100, episode=815 reward=0.7751604 (438.26 it/sec) -training >> step=4888200, episode=815 reward=0.7738003 (434.57 it/sec) -training >> step=4888300, episode=815 reward=0.7736298 (487.45 it/sec) -training >> step=4888400, episode=815 reward=0.7846863 (473.96 it/sec) -training >> step=4888500, episode=815 reward=0.7914891 (483.08 it/sec) -training >> step=4888600, episode=815 reward=0.7786912 (479.04 it/sec) -training >> step=4888700, episode=815 reward=0.7819001 (426.33 it/sec) -training >> step=4888800, episode=815 reward=0.757239 (349.87 it/sec) -training >> step=4888900, episode=815 reward=0.7858964 (380.21 it/sec) -training >> step=4889000, episode=815 reward=0.8029414 (414.73 it/sec) -training >> step=4889100, episode=815 reward=0.778336 (469.98 it/sec) -training >> step=4889200, episode=815 reward=0.7681392 (451.72 it/sec) -training >> step=4889300, episode=816 reward=0.7391521 (89.81 it/sec) -training >> step=4889400, episode=816 reward=0.7702161 (477.10 it/sec) -training >> step=4889500, episode=816 reward=0.7742916 (514.72 it/sec) -training >> step=4889600, episode=816 reward=0.7796252 (523.79 it/sec) -training >> step=4889700, episode=816 reward=0.7510924 (489.40 it/sec) -training >> step=4889800, episode=816 reward=0.7621264 (436.15 it/sec) -training >> step=4889900, episode=816 reward=0.7698213 (528.78 it/sec) -training >> step=4890000, episode=816 reward=0.7898729 (513.38 it/sec) -training >> step=4890100, episode=816 reward=0.7979689 (489.03 it/sec) -training >> step=4890200, episode=816 reward=0.7854499 (341.92 it/sec) -training >> step=4890300, episode=816 reward=0.7936883 (453.22 it/sec) -training >> step=4890400, episode=816 reward=0.7721395 (473.49 it/sec) -training >> step=4890500, episode=816 reward=0.7715981 (406.80 it/sec) -training >> step=4890600, episode=816 reward=0.7771053 (492.94 it/sec) -training >> step=4890700, episode=816 reward=0.7750207 (445.67 it/sec) -training >> step=4890800, episode=816 reward=0.7791883 (458.69 it/sec) -training >> step=4890900, episode=816 reward=0.7953194 (483.89 it/sec) -training >> step=4891000, episode=816 reward=0.7654977 (486.86 it/sec) -training >> step=4891100, episode=816 reward=0.7706215 (482.91 it/sec) -training >> step=4891200, episode=816 reward=0.7703111 (448.91 it/sec) -training >> step=4891300, episode=816 reward=0.7570157 (491.12 it/sec) -training >> step=4891400, episode=816 reward=0.7726566 (469.48 it/sec) -training >> step=4891500, episode=816 reward=0.7736645 (453.02 it/sec) -training >> step=4891600, episode=816 reward=0.7850564 (479.05 it/sec) -training >> step=4891700, episode=816 reward=0.7547216 (419.05 it/sec) -training >> step=4891800, episode=816 reward=0.7861136 (430.73 it/sec) -training >> step=4891900, episode=816 reward=0.7772104 (458.40 it/sec) -training >> step=4892000, episode=816 reward=0.7837659 (452.32 it/sec) -training >> step=4892100, episode=816 reward=0.8130795 (357.16 it/sec) -training >> step=4892200, episode=816 reward=0.8011013 (392.87 it/sec) -training >> step=4892300, episode=816 reward=0.7777884 (440.62 it/sec) -training >> step=4892400, episode=816 reward=0.7844002 (428.99 it/sec) -training >> step=4892500, episode=816 reward=0.7816806 (472.29 it/sec) -training >> step=4892600, episode=816 reward=0.7685161 (459.00 it/sec) -training >> step=4892700, episode=816 reward=0.7858855 (448.12 it/sec) -training >> step=4892800, episode=816 reward=0.7862484 (487.41 it/sec) -training >> step=4892900, episode=816 reward=0.7888169 (450.85 it/sec) -training >> step=4893000, episode=816 reward=0.8025529 (457.82 it/sec) -training >> step=4893100, episode=816 reward=0.7844877 (483.73 it/sec) -training >> step=4893200, episode=816 reward=0.7798522 (367.41 it/sec) -training >> step=4893300, episode=816 reward=0.7928967 (421.81 it/sec) -training >> step=4893400, episode=816 reward=0.769454 (496.01 it/sec) -training >> step=4893500, episode=816 reward=0.7693269 (501.41 it/sec) -training >> step=4893600, episode=816 reward=0.7859769 (469.36 it/sec) -training >> step=4893700, episode=816 reward=0.7696457 (480.01 it/sec) -training >> step=4893800, episode=816 reward=0.7680679 (492.10 it/sec) -training >> step=4893900, episode=816 reward=0.7770223 (488.30 it/sec) -training >> step=4894000, episode=816 reward=0.7497093 (475.38 it/sec) -training >> step=4894100, episode=816 reward=0.7736349 (483.53 it/sec) -training >> step=4894200, episode=816 reward=0.7618404 (464.65 it/sec) -training >> step=4894300, episode=816 reward=0.7660167 (423.82 it/sec) -training >> step=4894400, episode=816 reward=0.7905063 (500.97 it/sec) -training >> step=4894500, episode=816 reward=0.7718532 (463.95 it/sec) -training >> step=4894600, episode=816 reward=0.7707259 (487.62 it/sec) -training >> step=4894700, episode=816 reward=0.7748849 (448.69 it/sec) -training >> step=4894800, episode=816 reward=0.7847155 (436.43 it/sec) -training >> step=4894900, episode=816 reward=0.7716875 (447.87 it/sec) -training >> step=4895000, episode=816 reward=0.7766725 (459.47 it/sec) -training >> step=4895100, episode=816 reward=0.7841676 (505.32 it/sec) -training >> step=4895200, episode=816 reward=0.770206 (466.19 it/sec) -training >> step=4895300, episode=817 reward=0.7725008 (65.21 it/sec) -training >> step=4895400, episode=817 reward=0.7764882 (463.00 it/sec) -training >> step=4895500, episode=817 reward=0.7913974 (490.39 it/sec) -training >> step=4895600, episode=817 reward=0.7580263 (488.52 it/sec) -training >> step=4895700, episode=817 reward=0.7986025 (499.72 it/sec) -training >> step=4895800, episode=817 reward=0.7730702 (481.35 it/sec) -training >> step=4895900, episode=817 reward=0.7825263 (489.53 it/sec) -training >> step=4896000, episode=817 reward=0.7704666 (502.67 it/sec) -training >> step=4896100, episode=817 reward=0.776732 (491.18 it/sec) -training >> step=4896200, episode=817 reward=0.7842485 (479.87 it/sec) -training >> step=4896300, episode=817 reward=0.7931146 (331.61 it/sec) -training >> step=4896400, episode=817 reward=0.7547705 (482.44 it/sec) -training >> step=4896500, episode=817 reward=0.785396 (462.86 it/sec) -training >> step=4896600, episode=817 reward=0.7919391 (475.74 it/sec) -training >> step=4896700, episode=817 reward=0.7763467 (502.56 it/sec) -training >> step=4896800, episode=817 reward=0.7768637 (405.92 it/sec) -training >> step=4896900, episode=817 reward=0.7577092 (514.74 it/sec) -training >> step=4897000, episode=817 reward=0.7820007 (471.06 it/sec) -training >> step=4897100, episode=817 reward=0.7655091 (496.24 it/sec) -training >> step=4897200, episode=817 reward=0.7864267 (467.20 it/sec) -training >> step=4897300, episode=817 reward=0.7585196 (477.65 it/sec) -training >> step=4897400, episode=817 reward=0.777263 (522.65 it/sec) -training >> step=4897500, episode=817 reward=0.7684718 (441.27 it/sec) -training >> step=4897600, episode=817 reward=0.7558021 (482.37 it/sec) -training >> step=4897700, episode=817 reward=0.7744976 (470.26 it/sec) -training >> step=4897800, episode=817 reward=0.7820625 (497.52 it/sec) -training >> step=4897900, episode=817 reward=0.7933842 (495.03 it/sec) -training >> step=4898000, episode=817 reward=0.7837641 (450.66 it/sec) -training >> step=4898100, episode=817 reward=0.7794008 (474.51 it/sec) -training >> step=4898200, episode=817 reward=0.7980439 (467.68 it/sec) -training >> step=4898300, episode=817 reward=0.784826 (469.40 it/sec) -training >> step=4898400, episode=817 reward=0.7881728 (491.17 it/sec) -training >> step=4898500, episode=817 reward=0.8096505 (498.06 it/sec) -training >> step=4898600, episode=817 reward=0.7949502 (487.91 it/sec) -training >> step=4898700, episode=817 reward=0.7746091 (460.37 it/sec) -training >> step=4898800, episode=817 reward=0.8050143 (509.63 it/sec) -training >> step=4898900, episode=817 reward=0.7903701 (465.04 it/sec) -training >> step=4899000, episode=817 reward=0.7888165 (481.67 it/sec) -training >> step=4899100, episode=817 reward=0.7744878 (485.68 it/sec) -training >> step=4899200, episode=817 reward=0.8019382 (503.52 it/sec) -training >> step=4899300, episode=817 reward=0.7748145 (476.76 it/sec) -training >> step=4899400, episode=817 reward=0.7665501 (504.08 it/sec) -training >> step=4899500, episode=817 reward=0.7774549 (458.52 it/sec) -training >> step=4899600, episode=817 reward=0.7742902 (500.96 it/sec) -training >> step=4899700, episode=817 reward=0.7657541 (458.55 it/sec) -training >> step=4899800, episode=817 reward=0.7733867 (463.07 it/sec) -training >> step=4899900, episode=817 reward=0.7903267 (526.48 it/sec) -training >> step=4900000, episode=817 reward=0.7776565 (468.65 it/sec) -training >> step=4900100, episode=817 reward=0.7832893 (487.60 it/sec) -training >> step=4900200, episode=817 reward=0.7755486 (479.54 it/sec) -training >> step=4900300, episode=817 reward=0.7740502 (498.69 it/sec) -training >> step=4900400, episode=817 reward=0.7777978 (503.73 it/sec) -training >> step=4900500, episode=817 reward=0.8075699 (452.09 it/sec) -training >> step=4900600, episode=817 reward=0.7720459 (506.73 it/sec) -training >> step=4900700, episode=817 reward=0.7947825 (485.13 it/sec) -training >> step=4900800, episode=817 reward=0.7918077 (491.90 it/sec) -training >> step=4900900, episode=817 reward=0.7795615 (506.10 it/sec) -training >> step=4901000, episode=817 reward=0.7678005 (445.74 it/sec) -training >> step=4901100, episode=817 reward=0.7822492 (462.35 it/sec) -training >> step=4901200, episode=817 reward=0.7819124 (481.94 it/sec) -training >> step=4901300, episode=818 reward=0.7669243 (38.52 it/sec) -training >> step=4901400, episode=818 reward=0.7798547 (385.12 it/sec) -training >> step=4901500, episode=818 reward=0.7591406 (367.18 it/sec) -training >> step=4901600, episode=818 reward=0.7723684 (412.07 it/sec) -training >> step=4901700, episode=818 reward=0.7775572 (460.81 it/sec) -training >> step=4901800, episode=818 reward=0.7800046 (512.44 it/sec) -training >> step=4901900, episode=818 reward=0.772524 (502.44 it/sec) -training >> step=4902000, episode=818 reward=0.7845263 (430.43 it/sec) -training >> step=4902100, episode=818 reward=0.7502803 (520.37 it/sec) -training >> step=4902200, episode=818 reward=0.7659048 (522.67 it/sec) -training >> step=4902300, episode=818 reward=0.8141165 (508.15 it/sec) -training >> step=4902400, episode=818 reward=0.8021891 (544.73 it/sec) -training >> step=4902500, episode=818 reward=0.7632489 (511.05 it/sec) -training >> step=4902600, episode=818 reward=0.7722772 (385.85 it/sec) -training >> step=4902700, episode=818 reward=0.7946029 (501.63 it/sec) -training >> step=4902800, episode=818 reward=0.7852401 (477.41 it/sec) -training >> step=4902900, episode=818 reward=0.7998447 (523.37 it/sec) -training >> step=4903000, episode=818 reward=0.7896189 (505.69 it/sec) -training >> step=4903100, episode=818 reward=0.777056 (534.01 it/sec) -training >> step=4903200, episode=818 reward=0.7670356 (539.42 it/sec) -training >> step=4903300, episode=818 reward=0.7771129 (539.08 it/sec) -training >> step=4903400, episode=818 reward=0.7614446 (522.12 it/sec) -training >> step=4903500, episode=818 reward=0.7623643 (454.02 it/sec) -training >> step=4903600, episode=818 reward=0.7814746 (522.04 it/sec) -training >> step=4903700, episode=818 reward=0.7738122 (529.18 it/sec) -training >> step=4903800, episode=818 reward=0.7994562 (479.58 it/sec) -training >> step=4903900, episode=818 reward=0.7803674 (518.83 it/sec) -training >> step=4904000, episode=818 reward=0.7841236 (539.17 it/sec) -training >> step=4904100, episode=818 reward=0.7710601 (513.65 it/sec) -training >> step=4904200, episode=818 reward=0.7948476 (518.76 it/sec) -training >> step=4904300, episode=818 reward=0.8137054 (521.75 it/sec) -training >> step=4904400, episode=818 reward=0.7947111 (532.54 it/sec) -training >> step=4904500, episode=818 reward=0.7986239 (507.91 it/sec) -training >> step=4904600, episode=818 reward=0.777441 (482.09 it/sec) -training >> step=4904700, episode=818 reward=0.7744499 (467.97 it/sec) -training >> step=4904800, episode=818 reward=0.7888172 (451.47 it/sec) -training >> step=4904900, episode=818 reward=0.7756745 (525.26 it/sec) -training >> step=4905000, episode=818 reward=0.7720507 (491.33 it/sec) -training >> step=4905100, episode=818 reward=0.787297 (553.16 it/sec) -training >> step=4905200, episode=818 reward=0.7594285 (469.05 it/sec) -training >> step=4905300, episode=818 reward=0.7773088 (477.77 it/sec) -training >> step=4905400, episode=818 reward=0.7723646 (525.24 it/sec) -training >> step=4905500, episode=818 reward=0.787416 (506.02 it/sec) -training >> step=4905600, episode=818 reward=0.7754117 (504.03 it/sec) -training >> step=4905700, episode=818 reward=0.7733548 (468.22 it/sec) -training >> step=4905800, episode=818 reward=0.8001035 (514.31 it/sec) -training >> step=4905900, episode=818 reward=0.764013 (505.94 it/sec) -training >> step=4906000, episode=818 reward=0.7737684 (537.79 it/sec) -training >> step=4906100, episode=818 reward=0.7753748 (534.64 it/sec) -training >> step=4906200, episode=818 reward=0.762798 (494.14 it/sec) -training >> step=4906300, episode=818 reward=0.791146 (518.49 it/sec) -training >> step=4906400, episode=818 reward=0.7772778 (515.41 it/sec) -training >> step=4906500, episode=818 reward=0.7635817 (564.16 it/sec) -training >> step=4906600, episode=818 reward=0.790628 (502.24 it/sec) -training >> step=4906700, episode=818 reward=0.7789278 (517.69 it/sec) -training >> step=4906800, episode=818 reward=0.7741891 (552.83 it/sec) -training >> step=4906900, episode=818 reward=0.7868342 (545.41 it/sec) -training >> step=4907000, episode=818 reward=0.778827 (495.14 it/sec) -training >> step=4907100, episode=818 reward=0.776372 (441.20 it/sec) -training >> step=4907200, episode=818 reward=0.7833804 (469.24 it/sec) -training >> step=4907300, episode=819 reward=0.7780925 (119.09 it/sec) -training >> step=4907400, episode=819 reward=0.7789861 (485.95 it/sec) -training >> step=4907500, episode=819 reward=0.7647885 (550.45 it/sec) -training >> step=4907600, episode=819 reward=0.7606395 (548.54 it/sec) -training >> step=4907700, episode=819 reward=0.7799892 (515.59 it/sec) -training >> step=4907800, episode=819 reward=0.7850086 (533.69 it/sec) -training >> step=4907900, episode=819 reward=0.7851316 (538.90 it/sec) -training >> step=4908000, episode=819 reward=0.8071346 (569.15 it/sec) -training >> step=4908100, episode=819 reward=0.7879377 (497.46 it/sec) -training >> step=4908200, episode=819 reward=0.7644163 (479.69 it/sec) -training >> step=4908300, episode=819 reward=0.7831626 (550.98 it/sec) -training >> step=4908400, episode=819 reward=0.7725552 (540.33 it/sec) -training >> step=4908500, episode=819 reward=0.771989 (540.54 it/sec) -training >> step=4908600, episode=819 reward=0.7868763 (563.29 it/sec) -training >> step=4908700, episode=819 reward=0.7861977 (544.75 it/sec) -training >> step=4908800, episode=819 reward=0.8014613 (492.07 it/sec) -training >> step=4908900, episode=819 reward=0.7707488 (497.01 it/sec) -training >> step=4909000, episode=819 reward=0.7859748 (412.60 it/sec) -training >> step=4909100, episode=819 reward=0.7780363 (531.82 it/sec) -training >> step=4909200, episode=819 reward=0.7753242 (522.85 it/sec) -training >> step=4909300, episode=819 reward=0.7787241 (514.37 it/sec) -training >> step=4909400, episode=819 reward=0.7892998 (522.75 it/sec) -training >> step=4909500, episode=819 reward=0.7940149 (484.68 it/sec) -training >> step=4909600, episode=819 reward=0.7772811 (507.66 it/sec) -training >> step=4909700, episode=819 reward=0.7885406 (538.72 it/sec) -training >> step=4909800, episode=819 reward=0.7987375 (490.45 it/sec) -training >> step=4909900, episode=819 reward=0.7761367 (537.09 it/sec) -training >> step=4910000, episode=819 reward=0.7909246 (485.83 it/sec) -training >> step=4910100, episode=819 reward=0.7733978 (540.29 it/sec) -training >> step=4910200, episode=819 reward=0.779992 (548.39 it/sec) -training >> step=4910300, episode=819 reward=0.7734968 (475.38 it/sec) -training >> step=4910400, episode=819 reward=0.7767533 (528.63 it/sec) -training >> step=4910500, episode=819 reward=0.7708443 (485.36 it/sec) -training >> step=4910600, episode=819 reward=0.7844681 (468.95 it/sec) -training >> step=4910700, episode=819 reward=0.7853617 (500.47 it/sec) -training >> step=4910800, episode=819 reward=0.7805187 (488.65 it/sec) -training >> step=4910900, episode=819 reward=0.7751595 (533.31 it/sec) -training >> step=4911000, episode=819 reward=0.7686009 (514.64 it/sec) -training >> step=4911100, episode=819 reward=0.76873 (467.57 it/sec) -training >> step=4911200, episode=819 reward=0.774017 (542.78 it/sec) -training >> step=4911300, episode=819 reward=0.7775664 (490.25 it/sec) -training >> step=4911400, episode=819 reward=0.7669789 (470.74 it/sec) -training >> step=4911500, episode=819 reward=0.7964132 (499.01 it/sec) -training >> step=4911600, episode=819 reward=0.7823884 (523.90 it/sec) -training >> step=4911700, episode=819 reward=0.7931588 (475.15 it/sec) -training >> step=4911800, episode=819 reward=0.7647197 (472.94 it/sec) -training >> step=4911900, episode=819 reward=0.7646767 (491.93 it/sec) -training >> step=4912000, episode=819 reward=0.7985484 (469.10 it/sec) -training >> step=4912100, episode=819 reward=0.7945713 (462.61 it/sec) -training >> step=4912200, episode=819 reward=0.7642214 (450.49 it/sec) -training >> step=4912300, episode=819 reward=0.7840092 (469.98 it/sec) -training >> step=4912400, episode=819 reward=0.7615698 (421.77 it/sec) -training >> step=4912500, episode=819 reward=0.7688257 (449.60 it/sec) -training >> step=4912600, episode=819 reward=0.7890084 (469.38 it/sec) -training >> step=4912700, episode=819 reward=0.7770286 (501.63 it/sec) -training >> step=4912800, episode=819 reward=0.7730503 (435.44 it/sec) -training >> step=4912900, episode=819 reward=0.7659088 (465.81 it/sec) -training >> step=4913000, episode=819 reward=0.7544038 (424.75 it/sec) -training >> step=4913100, episode=819 reward=0.7934651 (406.47 it/sec) -training >> step=4913200, episode=819 reward=0.7777114 (473.69 it/sec) -training >> step=4913300, episode=820 reward=0.7931281 (74.88 it/sec) -training >> step=4913400, episode=820 reward=0.7609914 (463.84 it/sec) -training >> step=4913500, episode=820 reward=0.7849159 (500.14 it/sec) -training >> step=4913600, episode=820 reward=0.7527833 (527.99 it/sec) -training >> step=4913700, episode=820 reward=0.8004558 (492.36 it/sec) -training >> step=4913800, episode=820 reward=0.7663801 (502.57 it/sec) -training >> step=4913900, episode=820 reward=0.7811943 (490.37 it/sec) -training >> step=4914000, episode=820 reward=0.7993934 (509.58 it/sec) -training >> step=4914100, episode=820 reward=0.7804253 (476.83 it/sec) -training >> step=4914200, episode=820 reward=0.7779671 (503.43 it/sec) -training >> step=4914300, episode=820 reward=0.7786238 (505.82 it/sec) -training >> step=4914400, episode=820 reward=0.7708513 (499.39 it/sec) -training >> step=4914500, episode=820 reward=0.7698799 (513.13 it/sec) -training >> step=4914600, episode=820 reward=0.7721909 (547.24 it/sec) -training >> step=4914700, episode=820 reward=0.7607525 (511.88 it/sec) -training >> step=4914800, episode=820 reward=0.7761046 (483.52 it/sec) -training >> step=4914900, episode=820 reward=0.78058 (495.51 it/sec) -training >> step=4915000, episode=820 reward=0.7711356 (514.77 it/sec) -training >> step=4915100, episode=820 reward=0.7323637 (353.22 it/sec) -training >> step=4915200, episode=820 reward=0.8101439 (513.12 it/sec) -training >> step=4915300, episode=820 reward=0.7713525 (568.11 it/sec) -training >> step=4915400, episode=820 reward=0.7591766 (473.42 it/sec) -training >> step=4915500, episode=820 reward=0.7868542 (480.77 it/sec) -training >> step=4915600, episode=820 reward=0.7652683 (509.83 it/sec) -training >> step=4915700, episode=820 reward=0.7794171 (510.85 it/sec) -training >> step=4915800, episode=820 reward=0.7729129 (524.05 it/sec) -training >> step=4915900, episode=820 reward=0.7932659 (469.50 it/sec) -training >> step=4916000, episode=820 reward=0.7677391 (515.14 it/sec) -training >> step=4916100, episode=820 reward=0.799098 (489.16 it/sec) -training >> step=4916200, episode=820 reward=0.7800444 (484.19 it/sec) -training >> step=4916300, episode=820 reward=0.7904562 (520.16 it/sec) -training >> step=4916400, episode=820 reward=0.7879499 (505.85 it/sec) -training >> step=4916500, episode=820 reward=0.7884225 (381.64 it/sec) -training >> step=4916600, episode=820 reward=0.7810808 (529.97 it/sec) -training >> step=4916700, episode=820 reward=0.7931838 (515.08 it/sec) -training >> step=4916800, episode=820 reward=0.7929748 (542.19 it/sec) -training >> step=4916900, episode=820 reward=0.7676482 (480.52 it/sec) -training >> step=4917000, episode=820 reward=0.788676 (539.42 it/sec) -training >> step=4917100, episode=820 reward=0.8048262 (540.35 it/sec) -training >> step=4917200, episode=820 reward=0.7703909 (525.68 it/sec) -training >> step=4917300, episode=820 reward=0.7853277 (533.98 it/sec) -training >> step=4917400, episode=820 reward=0.7819832 (536.63 it/sec) -training >> step=4917500, episode=820 reward=0.7887053 (432.44 it/sec) -training >> step=4917600, episode=820 reward=0.7842447 (463.47 it/sec) -training >> step=4917700, episode=820 reward=0.792402 (463.59 it/sec) -training >> step=4917800, episode=820 reward=0.7634028 (481.99 it/sec) -training >> step=4917900, episode=820 reward=0.7649035 (497.97 it/sec) -training >> step=4918000, episode=820 reward=0.7662414 (379.06 it/sec) -training >> step=4918100, episode=820 reward=0.7893321 (443.41 it/sec) -training >> step=4918200, episode=820 reward=0.7969326 (462.40 it/sec) -training >> step=4918300, episode=820 reward=0.7745264 (457.40 it/sec) -training >> step=4918400, episode=820 reward=0.7673364 (468.29 it/sec) -training >> step=4918500, episode=820 reward=0.7620307 (483.36 it/sec) -training >> step=4918600, episode=820 reward=0.7649407 (462.55 it/sec) -training >> step=4918700, episode=820 reward=0.7892928 (418.03 it/sec) -training >> step=4918800, episode=820 reward=0.7886598 (484.61 it/sec) -training >> step=4918900, episode=820 reward=0.7936835 (414.19 it/sec) -training >> step=4919000, episode=820 reward=0.7738376 (412.32 it/sec) -training >> step=4919100, episode=820 reward=0.7747812 (462.68 it/sec) -training >> step=4919200, episode=820 reward=0.7671747 (465.40 it/sec) -training >> step=4919300, episode=821 reward=0.7748796 (122.58 it/sec) -training >> step=4919400, episode=821 reward=0.7705107 (374.67 it/sec) -training >> step=4919500, episode=821 reward=0.7711814 (353.57 it/sec) -training >> step=4919600, episode=821 reward=0.7465218 (452.37 it/sec) -training >> step=4919700, episode=821 reward=0.7742506 (439.54 it/sec) -training >> step=4919800, episode=821 reward=0.7834948 (422.30 it/sec) -training >> step=4919900, episode=821 reward=0.7928247 (482.77 it/sec) -training >> step=4920000, episode=821 reward=0.77053 (388.74 it/sec) -training >> step=4920100, episode=821 reward=0.7479482 (485.13 it/sec) -training >> step=4920200, episode=821 reward=0.7688476 (517.59 it/sec) -training >> step=4920300, episode=821 reward=0.7799003 (457.55 it/sec) -training >> step=4920400, episode=821 reward=0.7589562 (448.91 it/sec) -training >> step=4920500, episode=821 reward=0.7730539 (428.28 it/sec) -training >> step=4920600, episode=821 reward=0.7994774 (468.96 it/sec) -training >> step=4920700, episode=821 reward=0.7912112 (528.48 it/sec) -training >> step=4920800, episode=821 reward=0.7905802 (459.96 it/sec) -training >> step=4920900, episode=821 reward=0.7944821 (440.94 it/sec) -training >> step=4921000, episode=821 reward=0.7844723 (496.17 it/sec) -training >> step=4921100, episode=821 reward=0.7801303 (485.07 it/sec) -training >> step=4921200, episode=821 reward=0.7767881 (351.07 it/sec) -training >> step=4921300, episode=821 reward=0.7823253 (456.10 it/sec) -training >> step=4921400, episode=821 reward=0.7551258 (501.70 it/sec) -training >> step=4921500, episode=821 reward=0.7858784 (428.46 it/sec) -training >> step=4921600, episode=821 reward=0.7710979 (389.89 it/sec) -training >> step=4921700, episode=821 reward=0.7637243 (385.28 it/sec) -training >> step=4921800, episode=821 reward=0.8034967 (497.17 it/sec) -training >> step=4921900, episode=821 reward=0.7926192 (433.77 it/sec) -training >> step=4922000, episode=821 reward=0.7735175 (424.32 it/sec) -training >> step=4922100, episode=821 reward=0.7710713 (432.64 it/sec) -training >> step=4922200, episode=821 reward=0.7731906 (488.60 it/sec) -training >> step=4922300, episode=821 reward=0.792621 (465.84 it/sec) -training >> step=4922400, episode=821 reward=0.7721208 (492.31 it/sec) -training >> step=4922500, episode=821 reward=0.7782078 (479.75 it/sec) -training >> step=4922600, episode=821 reward=0.7809175 (377.83 it/sec) -training >> step=4922700, episode=821 reward=0.7613522 (401.01 it/sec) -training >> step=4922800, episode=821 reward=0.787008 (453.58 it/sec) -training >> step=4922900, episode=821 reward=0.7611681 (434.47 it/sec) -training >> step=4923000, episode=821 reward=0.7773089 (429.33 it/sec) -training >> step=4923100, episode=821 reward=0.7878634 (438.06 it/sec) -training >> step=4923200, episode=821 reward=0.7709641 (421.76 it/sec) -training >> step=4923300, episode=821 reward=0.7886569 (432.27 it/sec) -training >> step=4923400, episode=821 reward=0.8019907 (452.74 it/sec) -training >> step=4923500, episode=821 reward=0.7489142 (440.73 it/sec) -training >> step=4923600, episode=821 reward=0.7744768 (450.17 it/sec) -training >> step=4923700, episode=821 reward=0.7568696 (453.76 it/sec) -training >> step=4923800, episode=821 reward=0.7773019 (452.98 it/sec) -training >> step=4923900, episode=821 reward=0.7925973 (488.46 it/sec) -training >> step=4924000, episode=821 reward=0.7795391 (343.55 it/sec) -training >> step=4924100, episode=821 reward=0.7795503 (343.78 it/sec) -training >> step=4924200, episode=821 reward=0.7678099 (385.45 it/sec) -training >> step=4924300, episode=821 reward=0.7891784 (437.99 it/sec) -training >> step=4924400, episode=821 reward=0.7682766 (396.18 it/sec) -training >> step=4924500, episode=821 reward=0.7987196 (423.91 it/sec) -training >> step=4924600, episode=821 reward=0.7658512 (433.64 it/sec) -training >> step=4924700, episode=821 reward=0.7644644 (458.47 it/sec) -training >> step=4924800, episode=821 reward=0.7676166 (441.24 it/sec) -training >> step=4924900, episode=821 reward=0.7749361 (451.53 it/sec) -training >> step=4925000, episode=821 reward=0.7739515 (452.68 it/sec) -training >> step=4925100, episode=821 reward=0.7595392 (477.97 it/sec) -training >> step=4925200, episode=821 reward=0.7859393 (485.64 it/sec) -training >> step=4925300, episode=822 reward=0.7870628 (89.91 it/sec) -training >> step=4925400, episode=822 reward=0.7488968 (493.17 it/sec) -training >> step=4925500, episode=822 reward=0.7548642 (456.13 it/sec) -training >> step=4925600, episode=822 reward=0.7710419 (464.49 it/sec) -training >> step=4925700, episode=822 reward=0.8034484 (517.24 it/sec) -training >> step=4925800, episode=822 reward=0.7908833 (451.52 it/sec) -training >> step=4925900, episode=822 reward=0.7836048 (454.84 it/sec) -training >> step=4926000, episode=822 reward=0.8022385 (454.38 it/sec) -training >> step=4926100, episode=822 reward=0.7839798 (474.68 it/sec) -training >> step=4926200, episode=822 reward=0.7921169 (447.23 it/sec) -training >> step=4926300, episode=822 reward=0.7851513 (478.92 it/sec) -training >> step=4926400, episode=822 reward=0.7905426 (490.03 it/sec) -training >> step=4926500, episode=822 reward=0.7839764 (480.51 it/sec) -training >> step=4926600, episode=822 reward=0.7743511 (518.81 it/sec) -training >> step=4926700, episode=822 reward=0.7886804 (534.67 it/sec) -training >> step=4926800, episode=822 reward=0.7787066 (465.74 it/sec) -training >> step=4926900, episode=822 reward=0.7836471 (488.48 it/sec) -training >> step=4927000, episode=822 reward=0.7843435 (442.54 it/sec) -training >> step=4927100, episode=822 reward=0.779155 (459.92 it/sec) -training >> step=4927200, episode=822 reward=0.7928202 (435.98 it/sec) -training >> step=4927300, episode=822 reward=0.7722128 (384.74 it/sec) -training >> step=4927400, episode=822 reward=0.785632 (417.91 it/sec) -training >> step=4927500, episode=822 reward=0.7865217 (290.58 it/sec) -training >> step=4927600, episode=822 reward=0.7661428 (412.20 it/sec) -training >> step=4927700, episode=822 reward=0.7829897 (398.56 it/sec) -training >> step=4927800, episode=822 reward=0.7784707 (372.12 it/sec) -training >> step=4927900, episode=822 reward=0.7944474 (427.63 it/sec) -training >> step=4928000, episode=822 reward=0.7837983 (395.90 it/sec) -training >> step=4928100, episode=822 reward=0.7691124 (443.67 it/sec) -training >> step=4928200, episode=822 reward=0.7656212 (389.10 it/sec) -training >> step=4928300, episode=822 reward=0.7651986 (447.61 it/sec) -training >> step=4928400, episode=822 reward=0.7697113 (440.25 it/sec) -training >> step=4928500, episode=822 reward=0.7819482 (467.55 it/sec) -training >> step=4928600, episode=822 reward=0.7746751 (427.38 it/sec) -training >> step=4928700, episode=822 reward=0.7898609 (407.99 it/sec) -training >> step=4928800, episode=822 reward=0.7775741 (455.09 it/sec) -training >> step=4928900, episode=822 reward=0.7642269 (420.55 it/sec) -training >> step=4929000, episode=822 reward=0.7893723 (421.33 it/sec) -training >> step=4929100, episode=822 reward=0.7731804 (440.37 it/sec) -training >> step=4929200, episode=822 reward=0.7845138 (467.74 it/sec) -training >> step=4929300, episode=822 reward=0.7574226 (428.68 it/sec) -training >> step=4929400, episode=822 reward=0.7900855 (419.38 it/sec) -training >> step=4929500, episode=822 reward=0.7963281 (413.04 it/sec) -training >> step=4929600, episode=822 reward=0.7981083 (449.85 it/sec) -training >> step=4929700, episode=822 reward=0.7834154 (457.44 it/sec) -training >> step=4929800, episode=822 reward=0.7864998 (427.02 it/sec) -training >> step=4929900, episode=822 reward=0.787405 (443.42 it/sec) -training >> step=4930000, episode=822 reward=0.7784635 (450.47 it/sec) -training >> step=4930100, episode=822 reward=0.7774441 (448.39 it/sec) -training >> step=4930200, episode=822 reward=0.7609806 (420.26 it/sec) -training >> step=4930300, episode=822 reward=0.7531049 (443.80 it/sec) -training >> step=4930400, episode=822 reward=0.7841874 (429.68 it/sec) -training >> step=4930500, episode=822 reward=0.794153 (462.45 it/sec) -training >> step=4930600, episode=822 reward=0.7728473 (422.91 it/sec) -training >> step=4930700, episode=822 reward=0.7686406 (431.91 it/sec) -training >> step=4930800, episode=822 reward=0.7855411 (423.22 it/sec) -training >> step=4930900, episode=822 reward=0.7962859 (406.49 it/sec) -training >> step=4931000, episode=822 reward=0.7796058 (428.16 it/sec) -training >> step=4931100, episode=822 reward=0.7815112 (400.39 it/sec) -training >> step=4931200, episode=822 reward=0.7978919 (463.78 it/sec) -training >> step=4931300, episode=823 reward=0.7725003 (90.74 it/sec) -training >> step=4931400, episode=823 reward=0.7803211 (424.21 it/sec) -training >> step=4931500, episode=823 reward=0.7663873 (492.31 it/sec) -training >> step=4931600, episode=823 reward=0.7760095 (474.40 it/sec) -training >> step=4931700, episode=823 reward=0.772141 (469.86 it/sec) -training >> step=4931800, episode=823 reward=0.7797229 (486.94 it/sec) -training >> step=4931900, episode=823 reward=0.789346 (483.34 it/sec) -training >> step=4932000, episode=823 reward=0.7868671 (536.60 it/sec) -training >> step=4932100, episode=823 reward=0.7756532 (507.06 it/sec) -training >> step=4932200, episode=823 reward=0.7741163 (481.42 it/sec) -training >> step=4932300, episode=823 reward=0.7960122 (496.60 it/sec) -training >> step=4932400, episode=823 reward=0.7826349 (462.89 it/sec) -training >> step=4932500, episode=823 reward=0.7738298 (489.96 it/sec) -training >> step=4932600, episode=823 reward=0.7731027 (526.17 it/sec) -training >> step=4932700, episode=823 reward=0.8037941 (529.22 it/sec) -training >> step=4932800, episode=823 reward=0.7906107 (436.83 it/sec) -training >> step=4932900, episode=823 reward=0.7738371 (486.17 it/sec) -training >> step=4933000, episode=823 reward=0.8061917 (469.72 it/sec) -training >> step=4933100, episode=823 reward=0.7714984 (468.38 it/sec) -training >> step=4933200, episode=823 reward=0.7818684 (486.34 it/sec) -training >> step=4933300, episode=823 reward=0.7899636 (485.95 it/sec) -training >> step=4933400, episode=823 reward=0.7718433 (480.05 it/sec) -training >> step=4933500, episode=823 reward=0.7792108 (502.89 it/sec) -training >> step=4933600, episode=823 reward=0.7767926 (523.71 it/sec) -training >> step=4933700, episode=823 reward=0.764091 (473.35 it/sec) -training >> step=4933800, episode=823 reward=0.7698151 (386.47 it/sec) -training >> step=4933900, episode=823 reward=0.7825013 (485.00 it/sec) -training >> step=4934000, episode=823 reward=0.7819604 (512.94 it/sec) -training >> step=4934100, episode=823 reward=0.7885911 (519.07 it/sec) -training >> step=4934200, episode=823 reward=0.7535095 (420.10 it/sec) -training >> step=4934300, episode=823 reward=0.7821181 (490.69 it/sec) -training >> step=4934400, episode=823 reward=0.7804371 (481.12 it/sec) -training >> step=4934500, episode=823 reward=0.7869801 (524.96 it/sec) -training >> step=4934600, episode=823 reward=0.7775947 (513.91 it/sec) -training >> step=4934700, episode=823 reward=0.8012448 (525.31 it/sec) -training >> step=4934800, episode=823 reward=0.798259 (467.64 it/sec) -training >> step=4934900, episode=823 reward=0.7890854 (498.39 it/sec) -training >> step=4935000, episode=823 reward=0.7889137 (534.04 it/sec) -training >> step=4935100, episode=823 reward=0.7822828 (520.73 it/sec) -training >> step=4935200, episode=823 reward=0.7852561 (496.91 it/sec) -training >> step=4935300, episode=823 reward=0.7781346 (479.41 it/sec) -training >> step=4935400, episode=823 reward=0.782167 (516.41 it/sec) -training >> step=4935500, episode=823 reward=0.7645407 (480.81 it/sec) -training >> step=4935600, episode=823 reward=0.7696719 (542.48 it/sec) -training >> step=4935700, episode=823 reward=0.7805585 (515.39 it/sec) -training >> step=4935800, episode=823 reward=0.7881062 (493.16 it/sec) -training >> step=4935900, episode=823 reward=0.7834688 (496.15 it/sec) -training >> step=4936000, episode=823 reward=0.7882736 (518.72 it/sec) -training >> step=4936100, episode=823 reward=0.779143 (504.68 it/sec) -training >> step=4936200, episode=823 reward=0.7560146 (501.57 it/sec) -training >> step=4936300, episode=823 reward=0.775902 (496.76 it/sec) -training >> step=4936400, episode=823 reward=0.7779053 (485.01 it/sec) -training >> step=4936500, episode=823 reward=0.7911768 (548.80 it/sec) -training >> step=4936600, episode=823 reward=0.7731803 (510.65 it/sec) -training >> step=4936700, episode=823 reward=0.7689328 (492.83 it/sec) -training >> step=4936800, episode=823 reward=0.7925143 (504.64 it/sec) -training >> step=4936900, episode=823 reward=0.7741041 (514.84 it/sec) -training >> step=4937000, episode=823 reward=0.7792424 (504.09 it/sec) -training >> step=4937100, episode=823 reward=0.7574267 (512.60 it/sec) -training >> step=4937200, episode=823 reward=0.7958347 (513.01 it/sec) -training >> step=4937300, episode=824 reward=0.7740128 (122.33 it/sec) -training >> step=4937400, episode=824 reward=0.7976016 (534.94 it/sec) -training >> step=4937500, episode=824 reward=0.77943 (465.79 it/sec) -training >> step=4937600, episode=824 reward=0.7671936 (501.24 it/sec) -training >> step=4937700, episode=824 reward=0.7741667 (504.18 it/sec) -training >> step=4937800, episode=824 reward=0.8010337 (450.36 it/sec) -training >> step=4937900, episode=824 reward=0.7751357 (440.49 it/sec) -training >> step=4938000, episode=824 reward=0.8037691 (490.59 it/sec) -training >> step=4938100, episode=824 reward=0.7785622 (481.18 it/sec) -training >> step=4938200, episode=824 reward=0.7752517 (471.50 it/sec) -training >> step=4938300, episode=824 reward=0.7619147 (498.77 it/sec) -training >> step=4938400, episode=824 reward=0.7709189 (492.27 it/sec) -training >> step=4938500, episode=824 reward=0.7737389 (491.00 it/sec) -training >> step=4938600, episode=824 reward=0.7735301 (453.13 it/sec) -training >> step=4938700, episode=824 reward=0.7876808 (494.96 it/sec) -training >> step=4938800, episode=824 reward=0.768911 (503.51 it/sec) -training >> step=4938900, episode=824 reward=0.7649825 (483.14 it/sec) -training >> step=4939000, episode=824 reward=0.7826834 (515.18 it/sec) -training >> step=4939100, episode=824 reward=0.7635409 (473.94 it/sec) -training >> step=4939200, episode=824 reward=0.7926192 (485.63 it/sec) -training >> step=4939300, episode=824 reward=0.7882248 (407.56 it/sec) -training >> step=4939400, episode=824 reward=0.7717842 (465.43 it/sec) -training >> step=4939500, episode=824 reward=0.7818505 (404.29 it/sec) -training >> step=4939600, episode=824 reward=0.7735548 (412.05 it/sec) -training >> step=4939700, episode=824 reward=0.7919325 (419.93 it/sec) -training >> step=4939800, episode=824 reward=0.8086901 (513.51 it/sec) -training >> step=4939900, episode=824 reward=0.7849931 (317.84 it/sec) -training >> step=4940000, episode=824 reward=0.7669756 (506.55 it/sec) -training >> step=4940100, episode=824 reward=0.7771951 (466.22 it/sec) -training >> step=4940200, episode=824 reward=0.7647844 (457.69 it/sec) -training >> step=4940300, episode=824 reward=0.7850521 (464.17 it/sec) -training >> step=4940400, episode=824 reward=0.7971597 (467.52 it/sec) -training >> step=4940500, episode=824 reward=0.7828864 (489.51 it/sec) -training >> step=4940600, episode=824 reward=0.7884715 (505.50 it/sec) -training >> step=4940700, episode=824 reward=0.7992358 (492.08 it/sec) -training >> step=4940800, episode=824 reward=0.7783687 (509.57 it/sec) -training >> step=4940900, episode=824 reward=0.7658882 (495.36 it/sec) -training >> step=4941000, episode=824 reward=0.7771411 (491.17 it/sec) -training >> step=4941100, episode=824 reward=0.7787084 (494.05 it/sec) -training >> step=4941200, episode=824 reward=0.7894999 (536.31 it/sec) -training >> step=4941300, episode=824 reward=0.7927402 (466.67 it/sec) -training >> step=4941400, episode=824 reward=0.7527933 (498.61 it/sec) -training >> step=4941500, episode=824 reward=0.7688801 (490.71 it/sec) -training >> step=4941600, episode=824 reward=0.7869436 (485.21 it/sec) -training >> step=4941700, episode=824 reward=0.7728425 (496.11 it/sec) -training >> step=4941800, episode=824 reward=0.7828558 (440.01 it/sec) -training >> step=4941900, episode=824 reward=0.7949824 (512.15 it/sec) -training >> step=4942000, episode=824 reward=0.765124 (440.48 it/sec) -training >> step=4942100, episode=824 reward=0.773719 (479.38 it/sec) -training >> step=4942200, episode=824 reward=0.7659536 (497.67 it/sec) -training >> step=4942300, episode=824 reward=0.7844514 (535.50 it/sec) -training >> step=4942400, episode=824 reward=0.7828447 (498.99 it/sec) -training >> step=4942500, episode=824 reward=0.7846349 (480.72 it/sec) -training >> step=4942600, episode=824 reward=0.7698434 (486.51 it/sec) -training >> step=4942700, episode=824 reward=0.7883309 (503.03 it/sec) -training >> step=4942800, episode=824 reward=0.7808449 (449.84 it/sec) -training >> step=4942900, episode=824 reward=0.7688665 (461.63 it/sec) -training >> step=4943000, episode=824 reward=0.7740937 (510.88 it/sec) -training >> step=4943100, episode=824 reward=0.7724046 (522.91 it/sec) -training >> step=4943200, episode=824 reward=0.7664146 (531.06 it/sec) -training >> step=4943300, episode=825 reward=0.7825324 (110.15 it/sec) -training >> step=4943400, episode=825 reward=0.7815468 (561.25 it/sec) -training >> step=4943500, episode=825 reward=0.7855958 (517.49 it/sec) -training >> step=4943600, episode=825 reward=0.7520074 (513.32 it/sec) -training >> step=4943700, episode=825 reward=0.7808715 (390.00 it/sec) -training >> step=4943800, episode=825 reward=0.7953167 (473.18 it/sec) -training >> step=4943900, episode=825 reward=0.7735474 (524.07 it/sec) -training >> step=4944000, episode=825 reward=0.7741958 (438.71 it/sec) -training >> step=4944100, episode=825 reward=0.7885402 (477.43 it/sec) -training >> step=4944200, episode=825 reward=0.7967073 (417.87 it/sec) -training >> step=4944300, episode=825 reward=0.7862886 (443.98 it/sec) -training >> step=4944400, episode=825 reward=0.7856041 (514.06 it/sec) -training >> step=4944500, episode=825 reward=0.7723626 (508.79 it/sec) -training >> step=4944600, episode=825 reward=0.7904763 (434.80 it/sec) -training >> step=4944700, episode=825 reward=0.7895139 (502.68 it/sec) -training >> step=4944800, episode=825 reward=0.7845536 (506.91 it/sec) -training >> step=4944900, episode=825 reward=0.8062137 (491.70 it/sec) -training >> step=4945000, episode=825 reward=0.774893 (500.34 it/sec) -training >> step=4945100, episode=825 reward=0.787898 (477.36 it/sec) -training >> step=4945200, episode=825 reward=0.78204 (504.95 it/sec) -training >> step=4945300, episode=825 reward=0.7636194 (520.88 it/sec) -training >> step=4945400, episode=825 reward=0.7778736 (494.47 it/sec) -training >> step=4945500, episode=825 reward=0.7847089 (484.68 it/sec) -training >> step=4945600, episode=825 reward=0.7838799 (439.40 it/sec) -training >> step=4945700, episode=825 reward=0.7820777 (485.60 it/sec) -training >> step=4945800, episode=825 reward=0.7782327 (447.94 it/sec) -training >> step=4945900, episode=825 reward=0.7927079 (471.38 it/sec) -training >> step=4946000, episode=825 reward=0.7811094 (469.60 it/sec) -training >> step=4946100, episode=825 reward=0.7954285 (460.72 it/sec) -training >> step=4946200, episode=825 reward=0.7882356 (328.92 it/sec) -training >> step=4946300, episode=825 reward=0.7833717 (498.85 it/sec) -training >> step=4946400, episode=825 reward=0.7764062 (468.98 it/sec) -training >> step=4946500, episode=825 reward=0.7480351 (473.25 it/sec) -training >> step=4946600, episode=825 reward=0.7733275 (448.69 it/sec) -training >> step=4946700, episode=825 reward=0.7693498 (501.61 it/sec) -training >> step=4946800, episode=825 reward=0.7941866 (503.48 it/sec) -training >> step=4946900, episode=825 reward=0.8069957 (506.86 it/sec) -training >> step=4947000, episode=825 reward=0.7949943 (479.65 it/sec) -training >> step=4947100, episode=825 reward=0.7857246 (455.73 it/sec) -training >> step=4947200, episode=825 reward=0.7456549 (501.33 it/sec) -training >> step=4947300, episode=825 reward=0.7834319 (548.28 it/sec) -training >> step=4947400, episode=825 reward=0.7724438 (491.79 it/sec) -training >> step=4947500, episode=825 reward=0.7803478 (500.41 it/sec) -training >> step=4947600, episode=825 reward=0.7741815 (510.52 it/sec) -training >> step=4947700, episode=825 reward=0.7782314 (521.68 it/sec) -training >> step=4947800, episode=825 reward=0.7928598 (470.69 it/sec) -training >> step=4947900, episode=825 reward=0.7769803 (495.24 it/sec) -training >> step=4948000, episode=825 reward=0.7821651 (530.96 it/sec) -training >> step=4948100, episode=825 reward=0.7708046 (454.43 it/sec) -training >> step=4948200, episode=825 reward=0.7779744 (488.97 it/sec) -training >> step=4948300, episode=825 reward=0.7401966 (566.59 it/sec) -training >> step=4948400, episode=825 reward=0.7771634 (501.08 it/sec) -training >> step=4948500, episode=825 reward=0.7769341 (510.64 it/sec) -training >> step=4948600, episode=825 reward=0.7947065 (476.40 it/sec) -training >> step=4948700, episode=825 reward=0.8071139 (528.30 it/sec) -training >> step=4948800, episode=825 reward=0.7671574 (535.46 it/sec) -training >> step=4948900, episode=825 reward=0.7687563 (523.48 it/sec) -training >> step=4949000, episode=825 reward=0.7492806 (544.90 it/sec) -training >> step=4949100, episode=825 reward=0.7626824 (506.44 it/sec) -training >> step=4949200, episode=825 reward=0.7798368 (487.64 it/sec) -training >> step=4949300, episode=826 reward=0.7766683 (117.91 it/sec) -training >> step=4949400, episode=826 reward=0.7771311 (530.43 it/sec) -training >> step=4949500, episode=826 reward=0.7653524 (538.83 it/sec) -training >> step=4949600, episode=826 reward=0.7690523 (488.93 it/sec) -training >> step=4949700, episode=826 reward=0.7751318 (518.30 it/sec) -training >> step=4949800, episode=826 reward=0.77723 (444.62 it/sec) -training >> step=4949900, episode=826 reward=0.7687744 (494.70 it/sec) -training >> step=4950000, episode=826 reward=0.7888867 (509.47 it/sec) -training >> step=4950100, episode=826 reward=0.7720093 (537.13 it/sec) -training >> step=4950200, episode=826 reward=0.7721052 (491.47 it/sec) -training >> step=4950300, episode=826 reward=0.7848948 (511.44 it/sec) -training >> step=4950400, episode=826 reward=0.7910337 (462.52 it/sec) -training >> step=4950500, episode=826 reward=0.7943168 (489.53 it/sec) -training >> step=4950600, episode=826 reward=0.7733024 (505.72 it/sec) -training >> step=4950700, episode=826 reward=0.801268 (494.87 it/sec) -training >> step=4950800, episode=826 reward=0.7642981 (535.81 it/sec) -training >> step=4950900, episode=826 reward=0.7686467 (487.33 it/sec) -training >> step=4951000, episode=826 reward=0.7649466 (529.38 it/sec) -training >> step=4951100, episode=826 reward=0.7711025 (504.79 it/sec) -training >> step=4951200, episode=826 reward=0.7688105 (524.28 it/sec) -training >> step=4951300, episode=826 reward=0.7556455 (525.64 it/sec) -training >> step=4951400, episode=826 reward=0.777979 (457.68 it/sec) -training >> step=4951500, episode=826 reward=0.7917823 (512.80 it/sec) -training >> step=4951600, episode=826 reward=0.7819161 (513.93 it/sec) -training >> step=4951700, episode=826 reward=0.7920616 (492.10 it/sec) -training >> step=4951800, episode=826 reward=0.7897303 (450.51 it/sec) -training >> step=4951900, episode=826 reward=0.7749004 (484.94 it/sec) -training >> step=4952000, episode=826 reward=0.7673002 (470.10 it/sec) -training >> step=4952100, episode=826 reward=0.8065557 (494.05 it/sec) -training >> step=4952200, episode=826 reward=0.7835841 (502.68 it/sec) -training >> step=4952300, episode=826 reward=0.7981804 (318.52 it/sec) -training >> step=4952400, episode=826 reward=0.775317 (482.48 it/sec) -training >> step=4952500, episode=826 reward=0.7962396 (499.55 it/sec) -training >> step=4952600, episode=826 reward=0.7765368 (503.07 it/sec) -training >> step=4952700, episode=826 reward=0.7735402 (499.27 it/sec) -training >> step=4952800, episode=826 reward=0.7872263 (483.50 it/sec) -training >> step=4952900, episode=826 reward=0.7644444 (470.71 it/sec) -training >> step=4953000, episode=826 reward=0.7608663 (510.68 it/sec) -training >> step=4953100, episode=826 reward=0.7809432 (494.21 it/sec) -training >> step=4953200, episode=826 reward=0.7867632 (512.63 it/sec) -training >> step=4953300, episode=826 reward=0.7544574 (534.62 it/sec) -training >> step=4953400, episode=826 reward=0.7866559 (473.01 it/sec) -training >> step=4953500, episode=826 reward=0.7857562 (547.88 it/sec) -training >> step=4953600, episode=826 reward=0.7924476 (498.46 it/sec) -training >> step=4953700, episode=826 reward=0.7896042 (473.54 it/sec) -training >> step=4953800, episode=826 reward=0.785768 (527.47 it/sec) -training >> step=4953900, episode=826 reward=0.7732652 (448.69 it/sec) -training >> step=4954000, episode=826 reward=0.7754713 (548.05 it/sec) -training >> step=4954100, episode=826 reward=0.787383 (500.99 it/sec) -training >> step=4954200, episode=826 reward=0.7790917 (505.58 it/sec) -training >> step=4954300, episode=826 reward=0.7765138 (515.49 it/sec) -training >> step=4954400, episode=826 reward=0.786591 (479.64 it/sec) -training >> step=4954500, episode=826 reward=0.7698073 (502.80 it/sec) -training >> step=4954600, episode=826 reward=0.7946098 (545.42 it/sec) -training >> step=4954700, episode=826 reward=0.7829805 (499.03 it/sec) -training >> step=4954800, episode=826 reward=0.7827943 (530.00 it/sec) -training >> step=4954900, episode=826 reward=0.7980518 (471.13 it/sec) -training >> step=4955000, episode=826 reward=0.7854699 (501.43 it/sec) -training >> step=4955100, episode=826 reward=0.7759592 (478.43 it/sec) -training >> step=4955200, episode=826 reward=0.760117 (506.44 it/sec) -training >> step=4955300, episode=827 reward=0.7751693 (131.04 it/sec) -training >> step=4955400, episode=827 reward=0.7770433 (525.91 it/sec) -training >> step=4955500, episode=827 reward=0.7400534 (525.72 it/sec) -training >> step=4955600, episode=827 reward=0.7728701 (420.93 it/sec) -training >> step=4955700, episode=827 reward=0.7655216 (479.79 it/sec) -training >> step=4955800, episode=827 reward=0.7798476 (527.39 it/sec) -training >> step=4955900, episode=827 reward=0.7821706 (516.97 it/sec) -training >> step=4956000, episode=827 reward=0.7664669 (494.37 it/sec) -training >> step=4956100, episode=827 reward=0.7775034 (519.45 it/sec) -training >> step=4956200, episode=827 reward=0.7781137 (456.79 it/sec) -training >> step=4956300, episode=827 reward=0.7860488 (451.90 it/sec) -training >> step=4956400, episode=827 reward=0.7874951 (412.75 it/sec) -training >> step=4956500, episode=827 reward=0.7795104 (470.61 it/sec) -training >> step=4956600, episode=827 reward=0.7943339 (485.88 it/sec) -training >> step=4956700, episode=827 reward=0.7760812 (476.56 it/sec) -training >> step=4956800, episode=827 reward=0.7947473 (411.56 it/sec) -training >> step=4956900, episode=827 reward=0.7875851 (483.62 it/sec) -training >> step=4957000, episode=827 reward=0.769264 (491.16 it/sec) -training >> step=4957100, episode=827 reward=0.7752143 (468.64 it/sec) -training >> step=4957200, episode=827 reward=0.7754229 (492.25 it/sec) -training >> step=4957300, episode=827 reward=0.7946964 (521.27 it/sec) -training >> step=4957400, episode=827 reward=0.7592479 (474.79 it/sec) -training >> step=4957500, episode=827 reward=0.7807261 (471.78 it/sec) -training >> step=4957600, episode=827 reward=0.7801901 (444.44 it/sec) -training >> step=4957700, episode=827 reward=0.7972295 (473.07 it/sec) -training >> step=4957800, episode=827 reward=0.7752351 (493.53 it/sec) -training >> step=4957900, episode=827 reward=0.757269 (468.95 it/sec) -training >> step=4958000, episode=827 reward=0.778937 (524.54 it/sec) -training >> step=4958100, episode=827 reward=0.7931551 (482.89 it/sec) -training >> step=4958200, episode=827 reward=0.7958426 (505.65 it/sec) -training >> step=4958300, episode=827 reward=0.7815764 (518.80 it/sec) -training >> step=4958400, episode=827 reward=0.7843115 (404.47 it/sec) -training >> step=4958500, episode=827 reward=0.785641 (507.52 it/sec) -training >> step=4958600, episode=827 reward=0.7704829 (473.33 it/sec) -training >> step=4958700, episode=827 reward=0.7726359 (511.05 it/sec) -training >> step=4958800, episode=827 reward=0.7823507 (525.99 it/sec) -training >> step=4958900, episode=827 reward=0.7756962 (490.76 it/sec) -training >> step=4959000, episode=827 reward=0.7826114 (504.34 it/sec) -training >> step=4959100, episode=827 reward=0.7708858 (514.84 it/sec) -training >> step=4959200, episode=827 reward=0.7912636 (451.06 it/sec) -training >> step=4959300, episode=827 reward=0.7795287 (511.19 it/sec) -training >> step=4959400, episode=827 reward=0.7776767 (493.26 it/sec) -training >> step=4959500, episode=827 reward=0.7873439 (491.90 it/sec) -training >> step=4959600, episode=827 reward=0.7743633 (486.90 it/sec) -training >> step=4959700, episode=827 reward=0.7717594 (491.93 it/sec) -training >> step=4959800, episode=827 reward=0.7758383 (521.58 it/sec) -training >> step=4959900, episode=827 reward=0.7869446 (478.64 it/sec) -training >> step=4960000, episode=827 reward=0.7858389 (514.29 it/sec) -training >> step=4960100, episode=827 reward=0.7788824 (490.18 it/sec) -training >> step=4960200, episode=827 reward=0.784396 (519.44 it/sec) -training >> step=4960300, episode=827 reward=0.7797115 (493.26 it/sec) -training >> step=4960400, episode=827 reward=0.7863571 (489.33 it/sec) -training >> step=4960500, episode=827 reward=0.7745945 (480.82 it/sec) -training >> step=4960600, episode=827 reward=0.7867599 (459.44 it/sec) -training >> step=4960700, episode=827 reward=0.7744063 (472.38 it/sec) -training >> step=4960800, episode=827 reward=0.7889629 (480.48 it/sec) -training >> step=4960900, episode=827 reward=0.7536669 (484.02 it/sec) -training >> step=4961000, episode=827 reward=0.7776352 (474.33 it/sec) -training >> step=4961100, episode=827 reward=0.8110606 (494.70 it/sec) -training >> step=4961200, episode=827 reward=0.7763806 (468.37 it/sec) -training >> step=4961300, episode=828 reward=0.7869838 (124.62 it/sec) -training >> step=4961400, episode=828 reward=0.7744922 (497.08 it/sec) -training >> step=4961500, episode=828 reward=0.768787 (500.25 it/sec) -training >> step=4961600, episode=828 reward=0.7808171 (469.62 it/sec) -training >> step=4961700, episode=828 reward=0.7860495 (474.89 it/sec) -training >> step=4961800, episode=828 reward=0.7743474 (490.09 it/sec) -training >> step=4961900, episode=828 reward=0.7830968 (477.03 it/sec) -training >> step=4962000, episode=828 reward=0.7807917 (529.00 it/sec) -training >> step=4962100, episode=828 reward=0.7885628 (491.08 it/sec) -training >> step=4962200, episode=828 reward=0.7956425 (450.61 it/sec) -training >> step=4962300, episode=828 reward=0.7765845 (488.90 it/sec) -training >> step=4962400, episode=828 reward=0.7926652 (472.09 it/sec) -training >> step=4962500, episode=828 reward=0.780592 (510.67 it/sec) -training >> step=4962600, episode=828 reward=0.7876316 (485.02 it/sec) -training >> step=4962700, episode=828 reward=0.7798687 (492.52 it/sec) -training >> step=4962800, episode=828 reward=0.7796965 (424.27 it/sec) -training >> step=4962900, episode=828 reward=0.7746744 (489.74 it/sec) -training >> step=4963000, episode=828 reward=0.7836084 (481.39 it/sec) -training >> step=4963100, episode=828 reward=0.7861412 (457.92 it/sec) -training >> step=4963200, episode=828 reward=0.7929063 (429.91 it/sec) -training >> step=4963300, episode=828 reward=0.7747158 (436.33 it/sec) -training >> step=4963400, episode=828 reward=0.8018254 (479.23 it/sec) -training >> step=4963500, episode=828 reward=0.7937557 (470.01 it/sec) -training >> step=4963600, episode=828 reward=0.7726531 (421.43 it/sec) -training >> step=4963700, episode=828 reward=0.8007273 (379.37 it/sec) -training >> step=4963800, episode=828 reward=0.7793646 (449.62 it/sec) -training >> step=4963900, episode=828 reward=0.778088 (469.52 it/sec) -training >> step=4964000, episode=828 reward=0.7808248 (435.32 it/sec) -training >> step=4964100, episode=828 reward=0.7817226 (489.67 it/sec) -training >> step=4964200, episode=828 reward=0.7904314 (458.29 it/sec) -training >> step=4964300, episode=828 reward=0.7821612 (432.25 it/sec) -training >> step=4964400, episode=828 reward=0.7880865 (447.55 it/sec) -training >> step=4964500, episode=828 reward=0.7957758 (370.66 it/sec) -training >> step=4964600, episode=828 reward=0.7809947 (463.73 it/sec) -training >> step=4964700, episode=828 reward=0.7886421 (477.02 it/sec) -training >> step=4964800, episode=828 reward=0.7715256 (496.67 it/sec) -training >> step=4964900, episode=828 reward=0.7807169 (530.63 it/sec) -training >> step=4965000, episode=828 reward=0.7881347 (498.72 it/sec) -training >> step=4965100, episode=828 reward=0.7774606 (499.79 it/sec) -training >> step=4965200, episode=828 reward=0.762485 (429.66 it/sec) -training >> step=4965300, episode=828 reward=0.7788296 (480.84 it/sec) -training >> step=4965400, episode=828 reward=0.7877123 (488.86 it/sec) -training >> step=4965500, episode=828 reward=0.7824314 (488.48 it/sec) -training >> step=4965600, episode=828 reward=0.7666703 (514.01 it/sec) -training >> step=4965700, episode=828 reward=0.7842347 (417.19 it/sec) -training >> step=4965800, episode=828 reward=0.771721 (488.17 it/sec) -training >> step=4965900, episode=828 reward=0.7592576 (496.23 it/sec) -training >> step=4966000, episode=828 reward=0.7645265 (459.76 it/sec) -training >> step=4966100, episode=828 reward=0.7754405 (480.76 it/sec) -training >> step=4966200, episode=828 reward=0.7965716 (482.93 it/sec) -training >> step=4966300, episode=828 reward=0.7865794 (517.72 it/sec) -training >> step=4966400, episode=828 reward=0.7839264 (494.71 it/sec) -training >> step=4966500, episode=828 reward=0.775611 (485.12 it/sec) -training >> step=4966600, episode=828 reward=0.7999508 (454.87 it/sec) -training >> step=4966700, episode=828 reward=0.785762 (478.42 it/sec) -training >> step=4966800, episode=828 reward=0.7655683 (468.56 it/sec) -training >> step=4966900, episode=828 reward=0.789795 (448.65 it/sec) -training >> step=4967000, episode=828 reward=0.7800413 (488.89 it/sec) -training >> step=4967100, episode=828 reward=0.7760777 (411.52 it/sec) -training >> step=4967200, episode=828 reward=0.7733794 (430.58 it/sec) -training >> step=4967300, episode=829 reward=0.7602081 (93.21 it/sec) -training >> step=4967400, episode=829 reward=0.7785478 (398.30 it/sec) -training >> step=4967500, episode=829 reward=0.7757687 (463.65 it/sec) -training >> step=4967600, episode=829 reward=0.7798412 (419.68 it/sec) -training >> step=4967700, episode=829 reward=0.7632664 (450.79 it/sec) -training >> step=4967800, episode=829 reward=0.7879849 (451.46 it/sec) -training >> step=4967900, episode=829 reward=0.78657 (448.47 it/sec) -training >> step=4968000, episode=829 reward=0.7829525 (466.45 it/sec) -training >> step=4968100, episode=829 reward=0.793407 (453.74 it/sec) -training >> step=4968200, episode=829 reward=0.802699 (455.51 it/sec) -training >> step=4968300, episode=829 reward=0.7625492 (475.03 it/sec) -training >> step=4968400, episode=829 reward=0.7878668 (455.18 it/sec) -training >> step=4968500, episode=829 reward=0.7805214 (534.09 it/sec) -training >> step=4968600, episode=829 reward=0.7910938 (523.04 it/sec) -training >> step=4968700, episode=829 reward=0.7759862 (549.49 it/sec) -training >> step=4968800, episode=829 reward=0.7663024 (484.85 it/sec) -training >> step=4968900, episode=829 reward=0.7671321 (465.58 it/sec) -training >> step=4969000, episode=829 reward=0.790682 (520.27 it/sec) -training >> step=4969100, episode=829 reward=0.7993557 (547.33 it/sec) -training >> step=4969200, episode=829 reward=0.8010392 (497.38 it/sec) -training >> step=4969300, episode=829 reward=0.7629117 (506.91 it/sec) -training >> step=4969400, episode=829 reward=0.8039381 (462.20 it/sec) -training >> step=4969500, episode=829 reward=0.7737592 (472.18 it/sec) -training >> step=4969600, episode=829 reward=0.7922904 (484.52 it/sec) -training >> step=4969700, episode=829 reward=0.7698219 (524.16 it/sec) -training >> step=4969800, episode=829 reward=0.787984 (557.80 it/sec) -training >> step=4969900, episode=829 reward=0.7756287 (541.56 it/sec) -training >> step=4970000, episode=829 reward=0.7745633 (476.93 it/sec) -training >> step=4970100, episode=829 reward=0.7968391 (512.33 it/sec) -training >> step=4970200, episode=829 reward=0.7708808 (518.64 it/sec) -training >> step=4970300, episode=829 reward=0.7884832 (531.45 it/sec) -training >> step=4970400, episode=829 reward=0.7778252 (515.82 it/sec) -training >> step=4970500, episode=829 reward=0.7834664 (522.73 it/sec) -training >> step=4970600, episode=829 reward=0.776327 (519.32 it/sec) -training >> step=4970700, episode=829 reward=0.783398 (476.68 it/sec) -training >> step=4970800, episode=829 reward=0.7874379 (406.03 it/sec) -training >> step=4970900, episode=829 reward=0.782902 (485.09 it/sec) -training >> step=4971000, episode=829 reward=0.7889539 (462.09 it/sec) -training >> step=4971100, episode=829 reward=0.8015217 (507.94 it/sec) -training >> step=4971200, episode=829 reward=0.7907573 (468.77 it/sec) -training >> step=4971300, episode=829 reward=0.7804965 (392.90 it/sec) -training >> step=4971400, episode=829 reward=0.7709371 (399.25 it/sec) -training >> step=4971500, episode=829 reward=0.7946445 (493.94 it/sec) -training >> step=4971600, episode=829 reward=0.7876081 (476.76 it/sec) -training >> step=4971700, episode=829 reward=0.7800364 (424.71 it/sec) -training >> step=4971800, episode=829 reward=0.7866153 (486.22 it/sec) -training >> step=4971900, episode=829 reward=0.7764551 (470.90 it/sec) -training >> step=4972000, episode=829 reward=0.7548589 (473.58 it/sec) -training >> step=4972100, episode=829 reward=0.7803397 (452.43 it/sec) -training >> step=4972200, episode=829 reward=0.7754602 (488.59 it/sec) -training >> step=4972300, episode=829 reward=0.7859007 (497.19 it/sec) -training >> step=4972400, episode=829 reward=0.8030069 (473.66 it/sec) -training >> step=4972500, episode=829 reward=0.7877218 (485.75 it/sec) -training >> step=4972600, episode=829 reward=0.7841748 (452.82 it/sec) -training >> step=4972700, episode=829 reward=0.7696272 (469.22 it/sec) -training >> step=4972800, episode=829 reward=0.7579054 (471.50 it/sec) -training >> step=4972900, episode=829 reward=0.7813972 (448.46 it/sec) -training >> step=4973000, episode=829 reward=0.7678744 (466.55 it/sec) -training >> step=4973100, episode=829 reward=0.7816259 (441.66 it/sec) -training >> step=4973200, episode=829 reward=0.7883638 (483.12 it/sec) -training >> step=4973300, episode=830 reward=0.7794138 (60.41 it/sec) -training >> step=4973400, episode=830 reward=0.7619752 (474.37 it/sec) -training >> step=4973500, episode=830 reward=0.757836 (508.85 it/sec) -training >> step=4973600, episode=830 reward=0.7786656 (506.54 it/sec) -training >> step=4973700, episode=830 reward=0.7739848 (512.17 it/sec) -training >> step=4973800, episode=830 reward=0.780962 (455.06 it/sec) -training >> step=4973900, episode=830 reward=0.7836055 (389.09 it/sec) -training >> step=4974000, episode=830 reward=0.7865019 (365.97 it/sec) -training >> step=4974100, episode=830 reward=0.7747179 (337.28 it/sec) -training >> step=4974200, episode=830 reward=0.7816452 (332.21 it/sec) -training >> step=4974300, episode=830 reward=0.7801437 (391.99 it/sec) -training >> step=4974400, episode=830 reward=0.7961267 (463.28 it/sec) -training >> step=4974500, episode=830 reward=0.7750712 (454.43 it/sec) -training >> step=4974600, episode=830 reward=0.7834493 (455.03 it/sec) -training >> step=4974700, episode=830 reward=0.7777885 (469.00 it/sec) -training >> step=4974800, episode=830 reward=0.7790146 (498.96 it/sec) -training >> step=4974900, episode=830 reward=0.7810698 (438.53 it/sec) -training >> step=4975000, episode=830 reward=0.7697598 (448.23 it/sec) -training >> step=4975100, episode=830 reward=0.7822869 (488.61 it/sec) -training >> step=4975200, episode=830 reward=0.7953597 (481.28 it/sec) -training >> step=4975300, episode=830 reward=0.769824 (449.71 it/sec) -training >> step=4975400, episode=830 reward=0.7652975 (471.21 it/sec) -training >> step=4975500, episode=830 reward=0.7697173 (479.23 it/sec) -training >> step=4975600, episode=830 reward=0.7737935 (470.40 it/sec) -training >> step=4975700, episode=830 reward=0.7746176 (359.82 it/sec) -training >> step=4975800, episode=830 reward=0.7876507 (452.37 it/sec) -training >> step=4975900, episode=830 reward=0.7659495 (567.57 it/sec) -training >> step=4976000, episode=830 reward=0.7952532 (503.55 it/sec) -training >> step=4976100, episode=830 reward=0.7872347 (410.35 it/sec) -training >> step=4976200, episode=830 reward=0.7911649 (416.57 it/sec) -training >> step=4976300, episode=830 reward=0.7729661 (360.14 it/sec) -training >> step=4976400, episode=830 reward=0.7957528 (385.10 it/sec) -training >> step=4976500, episode=830 reward=0.7724118 (398.82 it/sec) -training >> step=4976600, episode=830 reward=0.7878907 (298.03 it/sec) -training >> step=4976700, episode=830 reward=0.793972 (435.26 it/sec) -training >> step=4976800, episode=830 reward=0.7915403 (437.66 it/sec) -training >> step=4976900, episode=830 reward=0.7772802 (439.52 it/sec) -training >> step=4977000, episode=830 reward=0.7692509 (446.13 it/sec) -training >> step=4977100, episode=830 reward=0.7793601 (427.63 it/sec) -training >> step=4977200, episode=830 reward=0.7704705 (375.16 it/sec) -training >> step=4977300, episode=830 reward=0.7756924 (383.51 it/sec) -training >> step=4977400, episode=830 reward=0.7732142 (474.44 it/sec) -training >> step=4977500, episode=830 reward=0.782145 (488.52 it/sec) -training >> step=4977600, episode=830 reward=0.8019599 (513.95 it/sec) -training >> step=4977700, episode=830 reward=0.7703642 (538.38 it/sec) -training >> step=4977800, episode=830 reward=0.7589238 (502.11 it/sec) -training >> step=4977900, episode=830 reward=0.7823586 (490.62 it/sec) -training >> step=4978000, episode=830 reward=0.7864375 (558.34 it/sec) -training >> step=4978100, episode=830 reward=0.7832288 (493.40 it/sec) -training >> step=4978200, episode=830 reward=0.7752151 (520.00 it/sec) -training >> step=4978300, episode=830 reward=0.7856323 (490.24 it/sec) -training >> step=4978400, episode=830 reward=0.7734489 (451.41 it/sec) -training >> step=4978500, episode=830 reward=0.8074161 (433.80 it/sec) -training >> step=4978600, episode=830 reward=0.788859 (483.79 it/sec) -training >> step=4978700, episode=830 reward=0.7692428 (492.46 it/sec) -training >> step=4978800, episode=830 reward=0.7714795 (530.98 it/sec) -training >> step=4978900, episode=830 reward=0.7786875 (498.96 it/sec) -training >> step=4979000, episode=830 reward=0.7735178 (526.17 it/sec) -training >> step=4979100, episode=830 reward=0.789914 (519.57 it/sec) -training >> step=4979200, episode=830 reward=0.7814502 (514.43 it/sec) -training >> step=4979300, episode=831 reward=0.7542375 (117.13 it/sec) -training >> step=4979400, episode=831 reward=0.7942445 (501.91 it/sec) -training >> step=4979500, episode=831 reward=0.7837074 (512.68 it/sec) -training >> step=4979600, episode=831 reward=0.7559879 (493.99 it/sec) -training >> step=4979700, episode=831 reward=0.7689241 (502.47 it/sec) -training >> step=4979800, episode=831 reward=0.7889339 (536.86 it/sec) -training >> step=4979900, episode=831 reward=0.7712047 (454.86 it/sec) -training >> step=4980000, episode=831 reward=0.7948093 (532.41 it/sec) -training >> step=4980100, episode=831 reward=0.7797226 (496.14 it/sec) -training >> step=4980200, episode=831 reward=0.7779827 (512.79 it/sec) -training >> step=4980300, episode=831 reward=0.7768524 (531.40 it/sec) -training >> step=4980400, episode=831 reward=0.7833956 (519.40 it/sec) -training >> step=4980500, episode=831 reward=0.7764251 (498.44 it/sec) -training >> step=4980600, episode=831 reward=0.7799928 (508.96 it/sec) -training >> step=4980700, episode=831 reward=0.7838988 (521.49 it/sec) -training >> step=4980800, episode=831 reward=0.7759935 (493.83 it/sec) -training >> step=4980900, episode=831 reward=0.7903762 (446.63 it/sec) -training >> step=4981000, episode=831 reward=0.7773706 (462.89 it/sec) -training >> step=4981100, episode=831 reward=0.7838712 (462.89 it/sec) -training >> step=4981200, episode=831 reward=0.7798595 (499.34 it/sec) -training >> step=4981300, episode=831 reward=0.7922544 (542.60 it/sec) -training >> step=4981400, episode=831 reward=0.785063 (478.15 it/sec) -training >> step=4981500, episode=831 reward=0.7881675 (521.08 it/sec) -training >> step=4981600, episode=831 reward=0.7911766 (517.10 it/sec) -training >> step=4981700, episode=831 reward=0.7780527 (549.76 it/sec) -training >> step=4981800, episode=831 reward=0.7829553 (514.13 it/sec) -training >> step=4981900, episode=831 reward=0.7862875 (519.50 it/sec) -training >> step=4982000, episode=831 reward=0.7548061 (517.86 it/sec) -training >> step=4982100, episode=831 reward=0.7753525 (514.72 it/sec) -training >> step=4982200, episode=831 reward=0.7779625 (517.72 it/sec) -training >> step=4982300, episode=831 reward=0.7626828 (535.52 it/sec) -training >> step=4982400, episode=831 reward=0.7985794 (506.43 it/sec) -training >> step=4982500, episode=831 reward=0.7901901 (462.83 it/sec) -training >> step=4982600, episode=831 reward=0.7887429 (522.51 it/sec) -training >> step=4982700, episode=831 reward=0.7740635 (392.10 it/sec) -training >> step=4982800, episode=831 reward=0.7824436 (558.53 it/sec) -training >> step=4982900, episode=831 reward=0.7602353 (488.83 it/sec) -training >> step=4983000, episode=831 reward=0.7563393 (467.81 it/sec) -training >> step=4983100, episode=831 reward=0.7759837 (524.99 it/sec) -training >> step=4983200, episode=831 reward=0.7851062 (471.49 it/sec) -training >> step=4983300, episode=831 reward=0.7847962 (495.78 it/sec) -training >> step=4983400, episode=831 reward=0.7697557 (481.04 it/sec) -training >> step=4983500, episode=831 reward=0.8147038 (488.90 it/sec) -training >> step=4983600, episode=831 reward=0.7822663 (483.18 it/sec) -training >> step=4983700, episode=831 reward=0.785374 (454.67 it/sec) -training >> step=4983800, episode=831 reward=0.7949606 (498.97 it/sec) -training >> step=4983900, episode=831 reward=0.7893095 (492.55 it/sec) -training >> step=4984000, episode=831 reward=0.7753968 (491.44 it/sec) -training >> step=4984100, episode=831 reward=0.7836416 (530.81 it/sec) -training >> step=4984200, episode=831 reward=0.7896943 (536.18 it/sec) -training >> step=4984300, episode=831 reward=0.7810764 (525.36 it/sec) -training >> step=4984400, episode=831 reward=0.7901744 (515.75 it/sec) -training >> step=4984500, episode=831 reward=0.7690261 (485.85 it/sec) -training >> step=4984600, episode=831 reward=0.7849705 (538.48 it/sec) -training >> step=4984700, episode=831 reward=0.7755744 (521.06 it/sec) -training >> step=4984800, episode=831 reward=0.777222 (538.86 it/sec) -training >> step=4984900, episode=831 reward=0.7946328 (547.74 it/sec) -training >> step=4985000, episode=831 reward=0.7932785 (489.16 it/sec) -training >> step=4985100, episode=831 reward=0.7913694 (529.40 it/sec) -training >> step=4985200, episode=831 reward=0.7845251 (519.32 it/sec) -training >> step=4985300, episode=832 reward=0.7772064 (112.33 it/sec) -training >> step=4985400, episode=832 reward=0.7878765 (496.68 it/sec) -training >> step=4985500, episode=832 reward=0.7669901 (496.46 it/sec) -training >> step=4985600, episode=832 reward=0.771363 (480.01 it/sec) -training >> step=4985700, episode=832 reward=0.7762721 (452.18 it/sec) -training >> step=4985800, episode=832 reward=0.7781361 (513.05 it/sec) -training >> step=4985900, episode=832 reward=0.7721371 (460.24 it/sec) -training >> step=4986000, episode=832 reward=0.7689307 (478.40 it/sec) -training >> step=4986100, episode=832 reward=0.7746465 (462.36 it/sec) -training >> step=4986200, episode=832 reward=0.7701381 (413.59 it/sec) -training >> step=4986300, episode=832 reward=0.7810666 (458.31 it/sec) -training >> step=4986400, episode=832 reward=0.7900106 (463.00 it/sec) -training >> step=4986500, episode=832 reward=0.7804834 (438.86 it/sec) -training >> step=4986600, episode=832 reward=0.772988 (477.15 it/sec) -training >> step=4986700, episode=832 reward=0.790706 (480.45 it/sec) -training >> step=4986800, episode=832 reward=0.7843719 (462.69 it/sec) -training >> step=4986900, episode=832 reward=0.794781 (457.23 it/sec) -training >> step=4987000, episode=832 reward=0.7735826 (459.19 it/sec) -training >> step=4987100, episode=832 reward=0.7643242 (472.64 it/sec) -training >> step=4987200, episode=832 reward=0.7762367 (449.18 it/sec) -training >> step=4987300, episode=832 reward=0.7870023 (427.40 it/sec) -training >> step=4987400, episode=832 reward=0.7894432 (472.53 it/sec) -training >> step=4987500, episode=832 reward=0.7780524 (455.88 it/sec) -training >> step=4987600, episode=832 reward=0.7684033 (458.60 it/sec) -training >> step=4987700, episode=832 reward=0.7511947 (334.50 it/sec) -training >> step=4987800, episode=832 reward=0.7696769 (423.16 it/sec) -training >> step=4987900, episode=832 reward=0.7771465 (427.13 it/sec) -training >> step=4988000, episode=832 reward=0.768666 (450.60 it/sec) -training >> step=4988100, episode=832 reward=0.7595221 (431.69 it/sec) -training >> step=4988200, episode=832 reward=0.7704513 (498.08 it/sec) -training >> step=4988300, episode=832 reward=0.772781 (409.02 it/sec) -training >> step=4988400, episode=832 reward=0.7704102 (428.66 it/sec) -training >> step=4988500, episode=832 reward=0.769685 (380.07 it/sec) -training >> step=4988600, episode=832 reward=0.7889962 (356.87 it/sec) -training >> step=4988700, episode=832 reward=0.7721436 (405.02 it/sec) -training >> step=4988800, episode=832 reward=0.7679548 (267.37 it/sec) -training >> step=4988900, episode=832 reward=0.7958603 (329.32 it/sec) -training >> step=4989000, episode=832 reward=0.7749422 (307.76 it/sec) -training >> step=4989100, episode=832 reward=0.7786245 (328.12 it/sec) -training >> step=4989200, episode=832 reward=0.7775701 (350.10 it/sec) -training >> step=4989300, episode=832 reward=0.7863113 (364.72 it/sec) -training >> step=4989400, episode=832 reward=0.7964766 (379.58 it/sec) -training >> step=4989500, episode=832 reward=0.7904443 (428.60 it/sec) -training >> step=4989600, episode=832 reward=0.772252 (436.28 it/sec) -training >> step=4989700, episode=832 reward=0.7910374 (440.80 it/sec) -training >> step=4989800, episode=832 reward=0.7794448 (453.58 it/sec) -training >> step=4989900, episode=832 reward=0.8043569 (478.06 it/sec) -training >> step=4990000, episode=832 reward=0.7829807 (449.53 it/sec) -training >> step=4990100, episode=832 reward=0.7924778 (371.49 it/sec) -training >> step=4990200, episode=832 reward=0.7619917 (426.82 it/sec) -training >> step=4990300, episode=832 reward=0.785 (461.39 it/sec) -training >> step=4990400, episode=832 reward=0.796613 (473.30 it/sec) -training >> step=4990500, episode=832 reward=0.768247 (449.73 it/sec) -training >> step=4990600, episode=832 reward=0.7599213 (454.14 it/sec) -training >> step=4990700, episode=832 reward=0.7748171 (484.66 it/sec) -training >> step=4990800, episode=832 reward=0.774469 (448.54 it/sec) -training >> step=4990900, episode=832 reward=0.7685958 (414.63 it/sec) -training >> step=4991000, episode=832 reward=0.7693123 (403.21 it/sec) -training >> step=4991100, episode=832 reward=0.7591189 (426.70 it/sec) -training >> step=4991200, episode=832 reward=0.7755368 (392.87 it/sec) -training >> step=4991300, episode=833 reward=0.7670795 (76.87 it/sec) -training >> step=4991400, episode=833 reward=0.7860314 (440.03 it/sec) -training >> step=4991500, episode=833 reward=0.7794253 (346.23 it/sec) -training >> step=4991600, episode=833 reward=0.7626947 (398.25 it/sec) -training >> step=4991700, episode=833 reward=0.7785854 (441.98 it/sec) -training >> step=4991800, episode=833 reward=0.7890713 (441.65 it/sec) -training >> step=4991900, episode=833 reward=0.766328 (458.15 it/sec) -training >> step=4992000, episode=833 reward=0.7912537 (470.52 it/sec) -training >> step=4992100, episode=833 reward=0.7749065 (472.46 it/sec) -training >> step=4992200, episode=833 reward=0.7879017 (463.73 it/sec) -training >> step=4992300, episode=833 reward=0.8068483 (470.04 it/sec) -training >> step=4992400, episode=833 reward=0.7747097 (466.86 it/sec) -training >> step=4992500, episode=833 reward=0.771608 (480.65 it/sec) -training >> step=4992600, episode=833 reward=0.7575217 (489.78 it/sec) -training >> step=4992700, episode=833 reward=0.7767451 (475.67 it/sec) -training >> step=4992800, episode=833 reward=0.7772617 (441.01 it/sec) -training >> step=4992900, episode=833 reward=0.7687066 (466.20 it/sec) -training >> step=4993000, episode=833 reward=0.7685052 (520.01 it/sec) -training >> step=4993100, episode=833 reward=0.7718208 (444.04 it/sec) -training >> step=4993200, episode=833 reward=0.7717734 (456.22 it/sec) -training >> step=4993300, episode=833 reward=0.7920192 (504.78 it/sec) -training >> step=4993400, episode=833 reward=0.7688587 (457.61 it/sec) -training >> step=4993500, episode=833 reward=0.7715957 (445.57 it/sec) -training >> step=4993600, episode=833 reward=0.7848143 (470.76 it/sec) -training >> step=4993700, episode=833 reward=0.7790922 (472.83 it/sec) -training >> step=4993800, episode=833 reward=0.7685274 (461.24 it/sec) -training >> step=4993900, episode=833 reward=0.7942566 (476.73 it/sec) -training >> step=4994000, episode=833 reward=0.7810682 (495.68 it/sec) -training >> step=4994100, episode=833 reward=0.7773092 (482.72 it/sec) -training >> step=4994200, episode=833 reward=0.785321 (457.34 it/sec) -training >> step=4994300, episode=833 reward=0.800276 (445.00 it/sec) -training >> step=4994400, episode=833 reward=0.7845002 (393.69 it/sec) -training >> step=4994500, episode=833 reward=0.7880546 (426.09 it/sec) -training >> step=4994600, episode=833 reward=0.7734332 (456.17 it/sec) -training >> step=4994700, episode=833 reward=0.7701837 (479.65 it/sec) -training >> step=4994800, episode=833 reward=0.7497826 (430.05 it/sec) -training >> step=4994900, episode=833 reward=0.7752746 (483.15 it/sec) -training >> step=4995000, episode=833 reward=0.7754363 (442.78 it/sec) -training >> step=4995100, episode=833 reward=0.7857181 (327.83 it/sec) -training >> step=4995200, episode=833 reward=0.783201 (446.65 it/sec) -training >> step=4995300, episode=833 reward=0.7792099 (443.32 it/sec) -training >> step=4995400, episode=833 reward=0.7765939 (436.23 it/sec) -training >> step=4995500, episode=833 reward=0.7936645 (458.63 it/sec) -training >> step=4995600, episode=833 reward=0.7947665 (452.81 it/sec) -training >> step=4995700, episode=833 reward=0.8017427 (477.24 it/sec) -training >> step=4995800, episode=833 reward=0.7689466 (479.12 it/sec) -training >> step=4995900, episode=833 reward=0.787 (433.70 it/sec) -training >> step=4996000, episode=833 reward=0.763795 (379.56 it/sec) -training >> step=4996100, episode=833 reward=0.8025736 (423.10 it/sec) -training >> step=4996200, episode=833 reward=0.7866257 (457.77 it/sec) -training >> step=4996300, episode=833 reward=0.7857585 (472.70 it/sec) -training >> step=4996400, episode=833 reward=0.7765633 (455.23 it/sec) -training >> step=4996500, episode=833 reward=0.7633503 (485.23 it/sec) -training >> step=4996600, episode=833 reward=0.7819877 (423.06 it/sec) -training >> step=4996700, episode=833 reward=0.7972301 (486.78 it/sec) -training >> step=4996800, episode=833 reward=0.786926 (507.09 it/sec) -training >> step=4996900, episode=833 reward=0.7685715 (484.69 it/sec) -training >> step=4997000, episode=833 reward=0.7844452 (447.85 it/sec) -training >> step=4997100, episode=833 reward=0.7901053 (453.36 it/sec) -training >> step=4997200, episode=833 reward=0.8019936 (457.69 it/sec) -training >> step=4997300, episode=834 reward=0.7734869 (88.11 it/sec) -training >> step=4997400, episode=834 reward=0.7818712 (489.51 it/sec) -training >> step=4997500, episode=834 reward=0.7740468 (440.82 it/sec) -training >> step=4997600, episode=834 reward=0.7730269 (403.20 it/sec) -training >> step=4997700, episode=834 reward=0.768842 (398.09 it/sec) -training >> step=4997800, episode=834 reward=0.7629761 (448.01 it/sec) -training >> step=4997900, episode=834 reward=0.7719565 (448.60 it/sec) -training >> step=4998000, episode=834 reward=0.7821821 (460.45 it/sec) -training >> step=4998100, episode=834 reward=0.7962173 (444.94 it/sec) -training >> step=4998200, episode=834 reward=0.7823662 (475.96 it/sec) -training >> step=4998300, episode=834 reward=0.7793766 (446.79 it/sec) -training >> step=4998400, episode=834 reward=0.7891788 (417.67 it/sec) -training >> step=4998500, episode=834 reward=0.7809319 (485.99 it/sec) -training >> step=4998600, episode=834 reward=0.7770738 (468.33 it/sec) -training >> step=4998700, episode=834 reward=0.7776259 (478.14 it/sec) -training >> step=4998800, episode=834 reward=0.7788864 (438.87 it/sec) -training >> step=4998900, episode=834 reward=0.7825738 (437.03 it/sec) -training >> step=4999000, episode=834 reward=0.8041013 (458.76 it/sec) -training >> step=4999100, episode=834 reward=0.7802637 (467.93 it/sec) -training >> step=4999200, episode=834 reward=0.7849134 (494.67 it/sec) -training >> step=4999300, episode=834 reward=0.7776069 (505.27 it/sec) -training >> step=4999400, episode=834 reward=0.7981235 (480.91 it/sec) -training >> step=4999500, episode=834 reward=0.7930281 (449.24 it/sec) -training >> step=4999600, episode=834 reward=0.795451 (479.87 it/sec) -training >> step=4999700, episode=834 reward=0.7448552 (464.46 it/sec) -training >> step=4999800, episode=834 reward=0.7652066 (489.41 it/sec) -training >> step=4999900, episode=834 reward=0.7687836 (440.06 it/sec) -training >> step=5000000, episode=834 reward=0.7795207 (506.32 it/sec) -training >> step=5000100, episode=834 reward=0.7891386 (411.67 it/sec) -training >> step=5000200, episode=834 reward=0.7718693 (451.30 it/sec) -training >> step=5000300, episode=834 reward=0.7689257 (437.33 it/sec) -training >> step=5000400, episode=834 reward=0.7817391 (441.16 it/sec) -training >> step=5000500, episode=834 reward=0.7677275 (401.24 it/sec) -training >> step=5000600, episode=834 reward=0.7720948 (429.45 it/sec) -training >> step=5000700, episode=834 reward=0.7693583 (483.98 it/sec) -training >> step=5000800, episode=834 reward=0.7857304 (454.49 it/sec) -training >> step=5000900, episode=834 reward=0.7794439 (430.54 it/sec) -training >> step=5001000, episode=834 reward=0.7748165 (441.57 it/sec) -training >> step=5001100, episode=834 reward=0.7870879 (477.90 it/sec) -training >> step=5001200, episode=834 reward=0.7876301 (443.33 it/sec) -training >> step=5001300, episode=834 reward=0.7956752 (343.40 it/sec) -training >> step=5001400, episode=834 reward=0.7809173 (492.26 it/sec) -training >> step=5001500, episode=834 reward=0.781193 (448.61 it/sec) -training >> step=5001600, episode=834 reward=0.7660204 (431.60 it/sec) -training >> step=5001700, episode=834 reward=0.7781061 (452.54 it/sec) -training >> step=5001800, episode=834 reward=0.7835016 (468.10 it/sec) -training >> step=5001900, episode=834 reward=0.7921452 (451.56 it/sec) -training >> step=5002000, episode=834 reward=0.7701283 (452.95 it/sec) -training >> step=5002100, episode=834 reward=0.7917956 (468.32 it/sec) -training >> step=5002200, episode=834 reward=0.773879 (446.84 it/sec) -training >> step=5002300, episode=834 reward=0.802083 (416.87 it/sec) -training >> step=5002400, episode=834 reward=0.7793538 (445.74 it/sec) -training >> step=5002500, episode=834 reward=0.7794034 (458.29 it/sec) -training >> step=5002600, episode=834 reward=0.7807052 (450.74 it/sec) -training >> step=5002700, episode=834 reward=0.7696245 (430.04 it/sec) -training >> step=5002800, episode=834 reward=0.7891028 (378.32 it/sec) -training >> step=5002900, episode=834 reward=0.7595568 (447.37 it/sec) -training >> step=5003000, episode=834 reward=0.7859157 (345.72 it/sec) -training >> step=5003100, episode=834 reward=0.7988496 (327.30 it/sec) -training >> step=5003200, episode=834 reward=0.7830552 (409.41 it/sec) -training >> step=5003300, episode=835 reward=0.7729646 (70.99 it/sec) -training >> step=5003400, episode=835 reward=0.7900371 (533.15 it/sec) -training >> step=5003500, episode=835 reward=0.7764116 (524.59 it/sec) -training >> step=5003600, episode=835 reward=0.7731746 (503.27 it/sec) -training >> step=5003700, episode=835 reward=0.78179 (502.01 it/sec) -training >> step=5003800, episode=835 reward=0.7904852 (534.97 it/sec) -training >> step=5003900, episode=835 reward=0.7955024 (513.47 it/sec) -training >> step=5004000, episode=835 reward=0.7754927 (502.85 it/sec) -training >> step=5004100, episode=835 reward=0.7822941 (497.21 it/sec) -training >> step=5004200, episode=835 reward=0.7836474 (524.38 it/sec) -training >> step=5004300, episode=835 reward=0.7909757 (449.01 it/sec) -training >> step=5004400, episode=835 reward=0.7664877 (490.20 it/sec) -training >> step=5004500, episode=835 reward=0.7906856 (507.49 it/sec) -training >> step=5004600, episode=835 reward=0.7833772 (517.82 it/sec) -training >> step=5004700, episode=835 reward=0.7495482 (521.79 it/sec) -training >> step=5004800, episode=835 reward=0.7869966 (519.76 it/sec) -training >> step=5004900, episode=835 reward=0.7861281 (446.08 it/sec) -training >> step=5005000, episode=835 reward=0.8083437 (505.15 it/sec) -training >> step=5005100, episode=835 reward=0.7652402 (519.81 it/sec) -training >> step=5005200, episode=835 reward=0.772141 (532.03 it/sec) -training >> step=5005300, episode=835 reward=0.7794635 (493.41 it/sec) -training >> step=5005400, episode=835 reward=0.8014873 (456.87 it/sec) -training >> step=5005500, episode=835 reward=0.7816721 (515.72 it/sec) -training >> step=5005600, episode=835 reward=0.7686667 (475.67 it/sec) -training >> step=5005700, episode=835 reward=0.7782003 (529.75 it/sec) -training >> step=5005800, episode=835 reward=0.7920614 (502.03 it/sec) -training >> step=5005900, episode=835 reward=0.7870342 (497.52 it/sec) -training >> step=5006000, episode=835 reward=0.785693 (505.83 it/sec) -training >> step=5006100, episode=835 reward=0.7744246 (504.94 it/sec) -training >> step=5006200, episode=835 reward=0.750528 (531.90 it/sec) -training >> step=5006300, episode=835 reward=0.7488025 (470.09 it/sec) -training >> step=5006400, episode=835 reward=0.7737077 (480.76 it/sec) -training >> step=5006500, episode=835 reward=0.7630609 (480.22 it/sec) -training >> step=5006600, episode=835 reward=0.7806227 (521.70 it/sec) -training >> step=5006700, episode=835 reward=0.7824121 (484.87 it/sec) -training >> step=5006800, episode=835 reward=0.7705038 (507.87 it/sec) -training >> step=5006900, episode=835 reward=0.7775646 (486.07 it/sec) -training >> step=5007000, episode=835 reward=0.7805092 (475.04 it/sec) -training >> step=5007100, episode=835 reward=0.7778152 (467.91 it/sec) -training >> step=5007200, episode=835 reward=0.7690527 (476.36 it/sec) -training >> step=5007300, episode=835 reward=0.7863435 (468.84 it/sec) -training >> step=5007400, episode=835 reward=0.7640942 (351.44 it/sec) -training >> step=5007500, episode=835 reward=0.7835413 (497.53 it/sec) -training >> step=5007600, episode=835 reward=0.7842401 (476.19 it/sec) -training >> step=5007700, episode=835 reward=0.7807495 (443.35 it/sec) -training >> step=5007800, episode=835 reward=0.781745 (411.76 it/sec) -training >> step=5007900, episode=835 reward=0.791649 (434.09 it/sec) -training >> step=5008000, episode=835 reward=0.795409 (450.74 it/sec) -training >> step=5008100, episode=835 reward=0.7823821 (483.72 it/sec) -training >> step=5008200, episode=835 reward=0.7821658 (503.95 it/sec) -training >> step=5008300, episode=835 reward=0.7918245 (494.82 it/sec) -training >> step=5008400, episode=835 reward=0.7671295 (458.51 it/sec) -training >> step=5008500, episode=835 reward=0.7728536 (487.90 it/sec) -training >> step=5008600, episode=835 reward=0.7785603 (493.01 it/sec) -training >> step=5008700, episode=835 reward=0.7509795 (542.46 it/sec) -training >> step=5008800, episode=835 reward=0.7483993 (499.58 it/sec) -training >> step=5008900, episode=835 reward=0.7762933 (410.56 it/sec) -training >> step=5009000, episode=835 reward=0.7827281 (421.11 it/sec) -training >> step=5009100, episode=835 reward=0.7665704 (473.09 it/sec) -training >> step=5009200, episode=835 reward=0.7784144 (438.26 it/sec) -training >> step=5009300, episode=836 reward=0.774228 (84.24 it/sec) -training >> step=5009400, episode=836 reward=0.7700341 (431.21 it/sec) -training >> step=5009500, episode=836 reward=0.782132 (437.32 it/sec) -training >> step=5009600, episode=836 reward=0.7734693 (475.17 it/sec) -training >> step=5009700, episode=836 reward=0.7751709 (508.74 it/sec) -training >> step=5009800, episode=836 reward=0.7608913 (491.68 it/sec) -training >> step=5009900, episode=836 reward=0.7820778 (500.12 it/sec) -training >> step=5010000, episode=836 reward=0.8027217 (527.40 it/sec) -training >> step=5010100, episode=836 reward=0.7566363 (535.41 it/sec) -training >> step=5010200, episode=836 reward=0.7767861 (531.82 it/sec) -training >> step=5010300, episode=836 reward=0.7767388 (500.79 it/sec) -training >> step=5010400, episode=836 reward=0.7942562 (529.10 it/sec) -training >> step=5010500, episode=836 reward=0.7955171 (500.10 it/sec) -training >> step=5010600, episode=836 reward=0.7799612 (507.71 it/sec) -training >> step=5010700, episode=836 reward=0.7967913 (499.08 it/sec) -training >> step=5010800, episode=836 reward=0.7830071 (470.22 it/sec) -training >> step=5010900, episode=836 reward=0.778138 (481.96 it/sec) -training >> step=5011000, episode=836 reward=0.772472 (515.96 it/sec) -training >> step=5011100, episode=836 reward=0.7646927 (560.11 it/sec) -training >> step=5011200, episode=836 reward=0.7916118 (483.47 it/sec) -training >> step=5011300, episode=836 reward=0.7700893 (484.17 it/sec) -training >> step=5011400, episode=836 reward=0.7592487 (539.09 it/sec) -training >> step=5011500, episode=836 reward=0.7816355 (477.59 it/sec) -training >> step=5011600, episode=836 reward=0.7709144 (460.11 it/sec) -training >> step=5011700, episode=836 reward=0.7940656 (504.00 it/sec) -training >> step=5011800, episode=836 reward=0.7710027 (488.55 it/sec) -training >> step=5011900, episode=836 reward=0.7838396 (477.09 it/sec) -training >> step=5012000, episode=836 reward=0.795776 (418.00 it/sec) -training >> step=5012100, episode=836 reward=0.7691675 (546.88 it/sec) -training >> step=5012200, episode=836 reward=0.7742087 (450.93 it/sec) -training >> step=5012300, episode=836 reward=0.7646574 (505.91 it/sec) -training >> step=5012400, episode=836 reward=0.7836364 (530.15 it/sec) -training >> step=5012500, episode=836 reward=0.7961442 (487.30 it/sec) -training >> step=5012600, episode=836 reward=0.7958194 (509.40 it/sec) -training >> step=5012700, episode=836 reward=0.7646275 (448.30 it/sec) -training >> step=5012800, episode=836 reward=0.7788635 (487.01 it/sec) -training >> step=5012900, episode=836 reward=0.7747328 (453.14 it/sec) -training >> step=5013000, episode=836 reward=0.7794213 (476.79 it/sec) -training >> step=5013100, episode=836 reward=0.7867697 (460.91 it/sec) -training >> step=5013200, episode=836 reward=0.7817435 (496.66 it/sec) -training >> step=5013300, episode=836 reward=0.7833924 (451.21 it/sec) -training >> step=5013400, episode=836 reward=0.787254 (431.49 it/sec) -training >> step=5013500, episode=836 reward=0.7767676 (471.46 it/sec) -training >> step=5013600, episode=836 reward=0.7819296 (471.77 it/sec) -training >> step=5013700, episode=836 reward=0.8026355 (329.97 it/sec) -training >> step=5013800, episode=836 reward=0.7870257 (443.65 it/sec) -training >> step=5013900, episode=836 reward=0.7861605 (464.08 it/sec) -training >> step=5014000, episode=836 reward=0.8156375 (469.01 it/sec) -training >> step=5014100, episode=836 reward=0.7928049 (479.45 it/sec) -training >> step=5014200, episode=836 reward=0.7855759 (459.07 it/sec) -training >> step=5014300, episode=836 reward=0.7689868 (482.69 it/sec) -training >> step=5014400, episode=836 reward=0.7799502 (415.10 it/sec) -training >> step=5014500, episode=836 reward=0.7873663 (422.74 it/sec) -training >> step=5014600, episode=836 reward=0.7725292 (352.70 it/sec) -training >> step=5014700, episode=836 reward=0.77725 (432.86 it/sec) -training >> step=5014800, episode=836 reward=0.7616944 (422.15 it/sec) -training >> step=5014900, episode=836 reward=0.7564479 (474.96 it/sec) -training >> step=5015000, episode=836 reward=0.7566931 (539.96 it/sec) -training >> step=5015100, episode=836 reward=0.7842984 (493.67 it/sec) -training >> step=5015200, episode=836 reward=0.7816741 (428.96 it/sec) -training >> step=5015300, episode=837 reward=0.7867569 (64.44 it/sec) -training >> step=5015400, episode=837 reward=0.7573503 (513.71 it/sec) -training >> step=5015500, episode=837 reward=0.7736475 (477.01 it/sec) -training >> step=5015600, episode=837 reward=0.7837805 (536.43 it/sec) -training >> step=5015700, episode=837 reward=0.7799107 (446.58 it/sec) -training >> step=5015800, episode=837 reward=0.7725157 (486.92 it/sec) -training >> step=5015900, episode=837 reward=0.7954822 (493.69 it/sec) -training >> step=5016000, episode=837 reward=0.7726609 (510.82 it/sec) -training >> step=5016100, episode=837 reward=0.7979384 (529.96 it/sec) -training >> step=5016200, episode=837 reward=0.7911592 (450.58 it/sec) -training >> step=5016300, episode=837 reward=0.7753004 (456.56 it/sec) -training >> step=5016400, episode=837 reward=0.7689116 (440.64 it/sec) -training >> step=5016500, episode=837 reward=0.7890584 (421.65 it/sec) -training >> step=5016600, episode=837 reward=0.7821972 (409.75 it/sec) -training >> step=5016700, episode=837 reward=0.7504249 (515.11 it/sec) -training >> step=5016800, episode=837 reward=0.7869167 (465.27 it/sec) -training >> step=5016900, episode=837 reward=0.7849424 (443.96 it/sec) -training >> step=5017000, episode=837 reward=0.7854356 (456.47 it/sec) -training >> step=5017100, episode=837 reward=0.7714352 (525.73 it/sec) -training >> step=5017200, episode=837 reward=0.7800657 (465.77 it/sec) -training >> step=5017300, episode=837 reward=0.7865949 (448.58 it/sec) -training >> step=5017400, episode=837 reward=0.7806333 (469.73 it/sec) -training >> step=5017500, episode=837 reward=0.7948667 (497.50 it/sec) -training >> step=5017600, episode=837 reward=0.7816203 (471.78 it/sec) -training >> step=5017700, episode=837 reward=0.7728801 (505.46 it/sec) -training >> step=5017800, episode=837 reward=0.7816495 (513.72 it/sec) -training >> step=5017900, episode=837 reward=0.7914187 (471.79 it/sec) -training >> step=5018000, episode=837 reward=0.7875473 (482.65 it/sec) -training >> step=5018100, episode=837 reward=0.7614262 (455.43 it/sec) -training >> step=5018200, episode=837 reward=0.8000511 (500.17 it/sec) -training >> step=5018300, episode=837 reward=0.78564 (520.52 it/sec) -training >> step=5018400, episode=837 reward=0.7729912 (512.59 it/sec) -training >> step=5018500, episode=837 reward=0.7731258 (495.87 it/sec) -training >> step=5018600, episode=837 reward=0.774301 (400.99 it/sec) -training >> step=5018700, episode=837 reward=0.7679099 (400.69 it/sec) -training >> step=5018800, episode=837 reward=0.77106 (438.10 it/sec) -training >> step=5018900, episode=837 reward=0.7808579 (382.44 it/sec) -training >> step=5019000, episode=837 reward=0.7827685 (362.86 it/sec) -training >> step=5019100, episode=837 reward=0.7522485 (482.85 it/sec) -training >> step=5019200, episode=837 reward=0.7770476 (504.68 it/sec) -training >> step=5019300, episode=837 reward=0.7718635 (494.88 it/sec) -training >> step=5019400, episode=837 reward=0.8030497 (512.34 it/sec) -training >> step=5019500, episode=837 reward=0.7800911 (465.40 it/sec) -training >> step=5019600, episode=837 reward=0.7916468 (488.65 it/sec) -training >> step=5019700, episode=837 reward=0.7923063 (525.71 it/sec) -training >> step=5019800, episode=837 reward=0.7915332 (437.15 it/sec) -training >> step=5019900, episode=837 reward=0.7863057 (453.53 it/sec) -training >> step=5020000, episode=837 reward=0.7787551 (346.00 it/sec) -training >> step=5020100, episode=837 reward=0.7659106 (478.84 it/sec) -training >> step=5020200, episode=837 reward=0.759015 (480.28 it/sec) -training >> step=5020300, episode=837 reward=0.7921135 (512.82 it/sec) -training >> step=5020400, episode=837 reward=0.7757323 (514.14 it/sec) -training >> step=5020500, episode=837 reward=0.7665162 (459.29 it/sec) -training >> step=5020600, episode=837 reward=0.7807417 (384.79 it/sec) -training >> step=5020700, episode=837 reward=0.768038 (439.85 it/sec) -training >> step=5020800, episode=837 reward=0.7715932 (444.72 it/sec) -training >> step=5020900, episode=837 reward=0.7923957 (390.81 it/sec) -training >> step=5021000, episode=837 reward=0.7828389 (394.89 it/sec) -training >> step=5021100, episode=837 reward=0.7831174 (473.30 it/sec) -training >> step=5021200, episode=837 reward=0.7811629 (473.35 it/sec) -training >> step=5021300, episode=838 reward=0.7776617 (52.46 it/sec) -training >> step=5021400, episode=838 reward=0.7857671 (393.93 it/sec) -training >> step=5021500, episode=838 reward=0.7781832 (354.19 it/sec) -training >> step=5021600, episode=838 reward=0.7744269 (390.02 it/sec) -training >> step=5021700, episode=838 reward=0.7724473 (430.43 it/sec) -training >> step=5021800, episode=838 reward=0.7867725 (420.38 it/sec) -training >> step=5021900, episode=838 reward=0.7790826 (450.57 it/sec) -training >> step=5022000, episode=838 reward=0.7724332 (455.62 it/sec) -training >> step=5022100, episode=838 reward=0.7889801 (454.85 it/sec) -training >> step=5022200, episode=838 reward=0.7799969 (450.88 it/sec) -training >> step=5022300, episode=838 reward=0.7721082 (467.70 it/sec) -training >> step=5022400, episode=838 reward=0.806375 (415.38 it/sec) -training >> step=5022500, episode=838 reward=0.8062392 (438.65 it/sec) -training >> step=5022600, episode=838 reward=0.7693644 (430.18 it/sec) -training >> step=5022700, episode=838 reward=0.7741456 (411.34 it/sec) -training >> step=5022800, episode=838 reward=0.7886952 (420.78 it/sec) -training >> step=5022900, episode=838 reward=0.7858468 (393.66 it/sec) -training >> step=5023000, episode=838 reward=0.7842152 (434.68 it/sec) -training >> step=5023100, episode=838 reward=0.7744222 (460.12 it/sec) -training >> step=5023200, episode=838 reward=0.7687104 (469.05 it/sec) -training >> step=5023300, episode=838 reward=0.7642428 (455.59 it/sec) -training >> step=5023400, episode=838 reward=0.781468 (464.32 it/sec) -training >> step=5023500, episode=838 reward=0.78969 (438.25 it/sec) -training >> step=5023600, episode=838 reward=0.7816777 (406.39 it/sec) -training >> step=5023700, episode=838 reward=0.7747546 (462.70 it/sec) -training >> step=5023800, episode=838 reward=0.7739653 (449.77 it/sec) -training >> step=5023900, episode=838 reward=0.8046516 (462.74 it/sec) -training >> step=5024000, episode=838 reward=0.7782723 (452.04 it/sec) -training >> step=5024100, episode=838 reward=0.7910528 (491.95 it/sec) -training >> step=5024200, episode=838 reward=0.7666618 (482.58 it/sec) -training >> step=5024300, episode=838 reward=0.7938685 (477.57 it/sec) -training >> step=5024400, episode=838 reward=0.7743651 (498.66 it/sec) -training >> step=5024500, episode=838 reward=0.7939505 (506.22 it/sec) -training >> step=5024600, episode=838 reward=0.7573887 (456.86 it/sec) -training >> step=5024700, episode=838 reward=0.768095 (482.30 it/sec) -training >> step=5024800, episode=838 reward=0.7717375 (442.48 it/sec) -training >> step=5024900, episode=838 reward=0.7750052 (394.68 it/sec) -training >> step=5025000, episode=838 reward=0.7752994 (443.03 it/sec) -training >> step=5025100, episode=838 reward=0.7855634 (458.37 it/sec) -training >> step=5025200, episode=838 reward=0.7465428 (455.61 it/sec) -training >> step=5025300, episode=838 reward=0.7647618 (446.26 it/sec) -training >> step=5025400, episode=838 reward=0.7616157 (432.42 it/sec) -training >> step=5025500, episode=838 reward=0.7699956 (430.76 it/sec) -training >> step=5025600, episode=838 reward=0.7885365 (462.99 it/sec) -training >> step=5025700, episode=838 reward=0.7856003 (448.81 it/sec) -training >> step=5025800, episode=838 reward=0.7915571 (449.79 it/sec) -training >> step=5025900, episode=838 reward=0.7724519 (466.44 it/sec) -training >> step=5026000, episode=838 reward=0.7455572 (461.55 it/sec) -training >> step=5026100, episode=838 reward=0.7775812 (458.50 it/sec) -training >> step=5026200, episode=838 reward=0.7632703 (444.57 it/sec) -training >> step=5026300, episode=838 reward=0.7843871 (327.51 it/sec) -training >> step=5026400, episode=838 reward=0.7818936 (472.78 it/sec) -training >> step=5026500, episode=838 reward=0.7772637 (481.12 it/sec) -training >> step=5026600, episode=838 reward=0.7578588 (440.92 it/sec) -training >> step=5026700, episode=838 reward=0.7651097 (473.74 it/sec) -training >> step=5026800, episode=838 reward=0.7751304 (458.00 it/sec) -training >> step=5026900, episode=838 reward=0.7663283 (477.96 it/sec) -training >> step=5027000, episode=838 reward=0.7793463 (492.42 it/sec) -training >> step=5027100, episode=838 reward=0.7890668 (363.07 it/sec) -training >> step=5027200, episode=838 reward=0.7744785 (399.32 it/sec) -training >> step=5027300, episode=839 reward=0.7822399 (57.16 it/sec) -training >> step=5027400, episode=839 reward=0.7562537 (432.55 it/sec) -training >> step=5027500, episode=839 reward=0.7841905 (432.86 it/sec) -training >> step=5027600, episode=839 reward=0.7811514 (449.11 it/sec) -training >> step=5027700, episode=839 reward=0.7869845 (464.20 it/sec) -training >> step=5027800, episode=839 reward=0.7706719 (462.34 it/sec) -training >> step=5027900, episode=839 reward=0.8055987 (458.80 it/sec) -training >> step=5028000, episode=839 reward=0.7837481 (406.40 it/sec) -training >> step=5028100, episode=839 reward=0.8045727 (452.77 it/sec) -training >> step=5028200, episode=839 reward=0.7886955 (514.38 it/sec) -training >> step=5028300, episode=839 reward=0.7893306 (435.62 it/sec) -training >> step=5028400, episode=839 reward=0.7796192 (462.42 it/sec) -training >> step=5028500, episode=839 reward=0.7743117 (463.69 it/sec) -training >> step=5028600, episode=839 reward=0.7746347 (487.68 it/sec) -training >> step=5028700, episode=839 reward=0.782782 (478.59 it/sec) -training >> step=5028800, episode=839 reward=0.7702566 (456.25 it/sec) -training >> step=5028900, episode=839 reward=0.7785856 (469.61 it/sec) -training >> step=5029000, episode=839 reward=0.7807682 (411.67 it/sec) -training >> step=5029100, episode=839 reward=0.7847046 (467.99 it/sec) -training >> step=5029200, episode=839 reward=0.7815124 (415.32 it/sec) -training >> step=5029300, episode=839 reward=0.7757693 (470.08 it/sec) -training >> step=5029400, episode=839 reward=0.7754633 (401.81 it/sec) -training >> step=5029500, episode=839 reward=0.7956617 (432.60 it/sec) -training >> step=5029600, episode=839 reward=0.759437 (448.41 it/sec) -training >> step=5029700, episode=839 reward=0.7945852 (437.90 it/sec) -training >> step=5029800, episode=839 reward=0.7808822 (468.80 it/sec) -training >> step=5029900, episode=839 reward=0.7806083 (459.62 it/sec) -training >> step=5030000, episode=839 reward=0.7661517 (473.31 it/sec) -training >> step=5030100, episode=839 reward=0.7783357 (465.45 it/sec) -training >> step=5030200, episode=839 reward=0.7883915 (429.08 it/sec) -training >> step=5030300, episode=839 reward=0.7802942 (495.25 it/sec) -training >> step=5030400, episode=839 reward=0.7798881 (486.50 it/sec) -training >> step=5030500, episode=839 reward=0.7956401 (492.73 it/sec) -training >> step=5030600, episode=839 reward=0.792719 (473.90 it/sec) -training >> step=5030700, episode=839 reward=0.7666378 (418.09 it/sec) -training >> step=5030800, episode=839 reward=0.7630407 (415.67 it/sec) -training >> step=5030900, episode=839 reward=0.7882214 (465.11 it/sec) -training >> step=5031000, episode=839 reward=0.7874027 (480.02 it/sec) -training >> step=5031100, episode=839 reward=0.7860239 (466.70 it/sec) -training >> step=5031200, episode=839 reward=0.7761888 (459.69 it/sec) -training >> step=5031300, episode=839 reward=0.771183 (487.24 it/sec) -training >> step=5031400, episode=839 reward=0.7764766 (456.06 it/sec) -training >> step=5031500, episode=839 reward=0.7797156 (463.58 it/sec) -training >> step=5031600, episode=839 reward=0.7719619 (481.85 it/sec) -training >> step=5031700, episode=839 reward=0.7649049 (495.27 it/sec) -training >> step=5031800, episode=839 reward=0.792998 (509.09 it/sec) -training >> step=5031900, episode=839 reward=0.7598777 (478.32 it/sec) -training >> step=5032000, episode=839 reward=0.7748582 (430.69 it/sec) -training >> step=5032100, episode=839 reward=0.8011117 (486.03 it/sec) -training >> step=5032200, episode=839 reward=0.7576022 (458.59 it/sec) -training >> step=5032300, episode=839 reward=0.7474276 (462.73 it/sec) -training >> step=5032400, episode=839 reward=0.7844269 (328.85 it/sec) -training >> step=5032500, episode=839 reward=0.7664986 (487.15 it/sec) -training >> step=5032600, episode=839 reward=0.7971701 (466.97 it/sec) -training >> step=5032700, episode=839 reward=0.7834375 (490.03 it/sec) -training >> step=5032800, episode=839 reward=0.7818829 (499.72 it/sec) -training >> step=5032900, episode=839 reward=0.7870219 (484.69 it/sec) -training >> step=5033000, episode=839 reward=0.7829645 (442.87 it/sec) -training >> step=5033100, episode=839 reward=0.7911137 (501.51 it/sec) -training >> step=5033200, episode=839 reward=0.7769963 (483.57 it/sec) -training >> step=5033300, episode=840 reward=0.7816461 (58.56 it/sec) -training >> step=5033400, episode=840 reward=0.7629541 (407.56 it/sec) -training >> step=5033500, episode=840 reward=0.7431943 (467.19 it/sec) -training >> step=5033600, episode=840 reward=0.7656691 (414.49 it/sec) -training >> step=5033700, episode=840 reward=0.7690158 (435.41 it/sec) -training >> step=5033800, episode=840 reward=0.7812404 (439.70 it/sec) -training >> step=5033900, episode=840 reward=0.7827762 (445.16 it/sec) -training >> step=5034000, episode=840 reward=0.7855628 (413.88 it/sec) -training >> step=5034100, episode=840 reward=0.7816368 (476.57 it/sec) -training >> step=5034200, episode=840 reward=0.7819373 (464.88 it/sec) -training >> step=5034300, episode=840 reward=0.7954455 (465.74 it/sec) -training >> step=5034400, episode=840 reward=0.7831461 (432.14 it/sec) -training >> step=5034500, episode=840 reward=0.7721075 (423.49 it/sec) -training >> step=5034600, episode=840 reward=0.7826409 (439.88 it/sec) -training >> step=5034700, episode=840 reward=0.7730294 (488.33 it/sec) -training >> step=5034800, episode=840 reward=0.7792428 (467.26 it/sec) -training >> step=5034900, episode=840 reward=0.7834935 (495.15 it/sec) -training >> step=5035000, episode=840 reward=0.7870363 (429.78 it/sec) -training >> step=5035100, episode=840 reward=0.7823982 (482.61 it/sec) -training >> step=5035200, episode=840 reward=0.7801857 (514.62 it/sec) -training >> step=5035300, episode=840 reward=0.7818001 (509.10 it/sec) -training >> step=5035400, episode=840 reward=0.748634 (454.14 it/sec) -training >> step=5035500, episode=840 reward=0.7827782 (422.08 it/sec) -training >> step=5035600, episode=840 reward=0.7905044 (421.18 it/sec) -training >> step=5035700, episode=840 reward=0.7803469 (470.30 it/sec) -training >> step=5035800, episode=840 reward=0.7773729 (436.45 it/sec) -training >> step=5035900, episode=840 reward=0.7666951 (421.24 it/sec) -training >> step=5036000, episode=840 reward=0.7815454 (432.25 it/sec) -training >> step=5036100, episode=840 reward=0.7807497 (489.20 it/sec) -training >> step=5036200, episode=840 reward=0.7705951 (492.47 it/sec) -training >> step=5036300, episode=840 reward=0.7672324 (497.30 it/sec) -training >> step=5036400, episode=840 reward=0.7922655 (482.70 it/sec) -training >> step=5036500, episode=840 reward=0.7577958 (408.31 it/sec) -training >> step=5036600, episode=840 reward=0.7710322 (400.61 it/sec) -training >> step=5036700, episode=840 reward=0.7788132 (414.37 it/sec) -training >> step=5036800, episode=840 reward=0.7685157 (433.27 it/sec) -training >> step=5036900, episode=840 reward=0.8031312 (457.13 it/sec) -training >> step=5037000, episode=840 reward=0.7811583 (465.83 it/sec) -training >> step=5037100, episode=840 reward=0.7600268 (474.90 it/sec) -training >> step=5037200, episode=840 reward=0.7872962 (523.38 it/sec) -training >> step=5037300, episode=840 reward=0.7736227 (439.60 it/sec) -training >> step=5037400, episode=840 reward=0.7616697 (382.20 it/sec) -training >> step=5037500, episode=840 reward=0.760314 (405.37 it/sec) -training >> step=5037600, episode=840 reward=0.7967774 (452.85 it/sec) -training >> step=5037700, episode=840 reward=0.7792478 (467.65 it/sec) -training >> step=5037800, episode=840 reward=0.7789462 (421.50 it/sec) -training >> step=5037900, episode=840 reward=0.7749646 (429.51 it/sec) -training >> step=5038000, episode=840 reward=0.7666196 (437.72 it/sec) -training >> step=5038100, episode=840 reward=0.7834795 (438.28 it/sec) -training >> step=5038200, episode=840 reward=0.7820054 (425.98 it/sec) -training >> step=5038300, episode=840 reward=0.7769237 (465.05 it/sec) -training >> step=5038400, episode=840 reward=0.7900998 (480.40 it/sec) -training >> step=5038500, episode=840 reward=0.7603147 (434.25 it/sec) -training >> step=5038600, episode=840 reward=0.7766302 (342.21 it/sec) -training >> step=5038700, episode=840 reward=0.7607359 (461.48 it/sec) -training >> step=5038800, episode=840 reward=0.7855862 (483.74 it/sec) -training >> step=5038900, episode=840 reward=0.7912591 (416.53 it/sec) -training >> step=5039000, episode=840 reward=0.7595966 (429.36 it/sec) -training >> step=5039100, episode=840 reward=0.7950801 (397.02 it/sec) -training >> step=5039200, episode=840 reward=0.7714639 (414.20 it/sec) -training >> step=5039300, episode=841 reward=0.7701248 (104.70 it/sec) -training >> step=5039400, episode=841 reward=0.7818366 (486.29 it/sec) -training >> step=5039500, episode=841 reward=0.7873405 (494.20 it/sec) -training >> step=5039600, episode=841 reward=0.7692954 (479.64 it/sec) -training >> step=5039700, episode=841 reward=0.7973979 (525.88 it/sec) -training >> step=5039800, episode=841 reward=0.7696806 (481.43 it/sec) -training >> step=5039900, episode=841 reward=0.774304 (505.75 it/sec) -training >> step=5040000, episode=841 reward=0.7705637 (488.42 it/sec) -training >> step=5040100, episode=841 reward=0.7836763 (461.47 it/sec) -training >> step=5040200, episode=841 reward=0.76964 (512.30 it/sec) -training >> step=5040300, episode=841 reward=0.78703 (485.41 it/sec) -training >> step=5040400, episode=841 reward=0.7901412 (489.43 it/sec) -training >> step=5040500, episode=841 reward=0.795401 (430.61 it/sec) -training >> step=5040600, episode=841 reward=0.7941445 (476.47 it/sec) -training >> step=5040700, episode=841 reward=0.7938731 (531.74 it/sec) -training >> step=5040800, episode=841 reward=0.7657632 (447.81 it/sec) -training >> step=5040900, episode=841 reward=0.7545114 (500.21 it/sec) -training >> step=5041000, episode=841 reward=0.7797198 (475.34 it/sec) -training >> step=5041100, episode=841 reward=0.7715051 (519.57 it/sec) -training >> step=5041200, episode=841 reward=0.7662007 (494.56 it/sec) -training >> step=5041300, episode=841 reward=0.7967268 (489.52 it/sec) -training >> step=5041400, episode=841 reward=0.7773235 (560.38 it/sec) -training >> step=5041500, episode=841 reward=0.7807923 (474.18 it/sec) -training >> step=5041600, episode=841 reward=0.7826279 (466.58 it/sec) -training >> step=5041700, episode=841 reward=0.7771356 (515.04 it/sec) -training >> step=5041800, episode=841 reward=0.7906246 (455.20 it/sec) -training >> step=5041900, episode=841 reward=0.7999287 (489.86 it/sec) -training >> step=5042000, episode=841 reward=0.7839103 (484.76 it/sec) -training >> step=5042100, episode=841 reward=0.7594017 (509.52 it/sec) -training >> step=5042200, episode=841 reward=0.7840404 (437.06 it/sec) -training >> step=5042300, episode=841 reward=0.7916543 (449.45 it/sec) -training >> step=5042400, episode=841 reward=0.7607622 (529.38 it/sec) -training >> step=5042500, episode=841 reward=0.7817306 (516.20 it/sec) -training >> step=5042600, episode=841 reward=0.7492198 (478.75 it/sec) -training >> step=5042700, episode=841 reward=0.7745562 (508.97 it/sec) -training >> step=5042800, episode=841 reward=0.793727 (463.02 it/sec) -training >> step=5042900, episode=841 reward=0.7787085 (523.17 it/sec) -training >> step=5043000, episode=841 reward=0.7789699 (503.79 it/sec) -training >> step=5043100, episode=841 reward=0.7852371 (469.61 it/sec) -training >> step=5043200, episode=841 reward=0.7819415 (459.13 it/sec) -training >> step=5043300, episode=841 reward=0.7924443 (462.24 it/sec) -training >> step=5043400, episode=841 reward=0.8000042 (495.29 it/sec) -training >> step=5043500, episode=841 reward=0.7712024 (481.80 it/sec) -training >> step=5043600, episode=841 reward=0.7926083 (469.25 it/sec) -training >> step=5043700, episode=841 reward=0.7531069 (500.50 it/sec) -training >> step=5043800, episode=841 reward=0.7742578 (460.53 it/sec) -training >> step=5043900, episode=841 reward=0.7587056 (523.54 it/sec) -training >> step=5044000, episode=841 reward=0.7661256 (486.28 it/sec) -training >> step=5044100, episode=841 reward=0.773017 (457.82 it/sec) -training >> step=5044200, episode=841 reward=0.7683561 (467.32 it/sec) -training >> step=5044300, episode=841 reward=0.7706544 (496.12 it/sec) -training >> step=5044400, episode=841 reward=0.7666765 (494.30 it/sec) -training >> step=5044500, episode=841 reward=0.7832117 (504.05 it/sec) -training >> step=5044600, episode=841 reward=0.7997888 (524.41 it/sec) -training >> step=5044700, episode=841 reward=0.7845383 (335.67 it/sec) -training >> step=5044800, episode=841 reward=0.7605324 (470.86 it/sec) -training >> step=5044900, episode=841 reward=0.7717481 (451.05 it/sec) -training >> step=5045000, episode=841 reward=0.796635 (492.96 it/sec) -training >> step=5045100, episode=841 reward=0.7839158 (481.75 it/sec) -training >> step=5045200, episode=841 reward=0.7762284 (430.90 it/sec) -training >> step=5045300, episode=842 reward=0.7863134 (115.21 it/sec) -training >> step=5045400, episode=842 reward=0.7655873 (424.32 it/sec) -training >> step=5045500, episode=842 reward=0.7648739 (453.64 it/sec) -training >> step=5045600, episode=842 reward=0.7812761 (468.12 it/sec) -training >> step=5045700, episode=842 reward=0.800958 (478.66 it/sec) -training >> step=5045800, episode=842 reward=0.785683 (461.36 it/sec) -training >> step=5045900, episode=842 reward=0.7765177 (454.36 it/sec) -training >> step=5046000, episode=842 reward=0.7848178 (511.62 it/sec) -training >> step=5046100, episode=842 reward=0.7927175 (474.76 it/sec) -training >> step=5046200, episode=842 reward=0.7757032 (509.51 it/sec) -training >> step=5046300, episode=842 reward=0.7654468 (496.05 it/sec) -training >> step=5046400, episode=842 reward=0.788779 (511.92 it/sec) -training >> step=5046500, episode=842 reward=0.7666928 (485.14 it/sec) -training >> step=5046600, episode=842 reward=0.7808635 (501.07 it/sec) -training >> step=5046700, episode=842 reward=0.792231 (451.24 it/sec) -training >> step=5046800, episode=842 reward=0.7974934 (453.76 it/sec) -training >> step=5046900, episode=842 reward=0.779436 (468.18 it/sec) -training >> step=5047000, episode=842 reward=0.7785034 (477.64 it/sec) -training >> step=5047100, episode=842 reward=0.7788772 (522.81 it/sec) -training >> step=5047200, episode=842 reward=0.7832478 (487.58 it/sec) -training >> step=5047300, episode=842 reward=0.7964684 (459.45 it/sec) -training >> step=5047400, episode=842 reward=0.7702113 (457.50 it/sec) -training >> step=5047500, episode=842 reward=0.7675595 (492.89 it/sec) -training >> step=5047600, episode=842 reward=0.7742733 (467.44 it/sec) -training >> step=5047700, episode=842 reward=0.7681273 (485.11 it/sec) -training >> step=5047800, episode=842 reward=0.7715529 (405.58 it/sec) -training >> step=5047900, episode=842 reward=0.7933169 (482.95 it/sec) -training >> step=5048000, episode=842 reward=0.7839025 (405.19 it/sec) -training >> step=5048100, episode=842 reward=0.7883871 (497.66 it/sec) -training >> step=5048200, episode=842 reward=0.75978 (442.63 it/sec) -training >> step=5048300, episode=842 reward=0.7814885 (447.79 it/sec) -training >> step=5048400, episode=842 reward=0.7802309 (494.58 it/sec) -training >> step=5048500, episode=842 reward=0.7852399 (473.35 it/sec) -training >> step=5048600, episode=842 reward=0.7643502 (527.26 it/sec) -training >> step=5048700, episode=842 reward=0.7969843 (462.93 it/sec) -training >> step=5048800, episode=842 reward=0.7797436 (450.06 it/sec) -training >> step=5048900, episode=842 reward=0.78877 (511.78 it/sec) -training >> step=5049000, episode=842 reward=0.7777808 (421.04 it/sec) -training >> step=5049100, episode=842 reward=0.7867902 (457.73 it/sec) -training >> step=5049200, episode=842 reward=0.7860016 (402.26 it/sec) -training >> step=5049300, episode=842 reward=0.7847493 (448.24 it/sec) -training >> step=5049400, episode=842 reward=0.7771413 (488.00 it/sec) -training >> step=5049500, episode=842 reward=0.7827975 (444.94 it/sec) -training >> step=5049600, episode=842 reward=0.7757266 (476.41 it/sec) -training >> step=5049700, episode=842 reward=0.7537091 (477.95 it/sec) -training >> step=5049800, episode=842 reward=0.8002434 (478.14 it/sec) -training >> step=5049900, episode=842 reward=0.7782672 (473.20 it/sec) -training >> step=5050000, episode=842 reward=0.7988598 (458.75 it/sec) -training >> step=5050100, episode=842 reward=0.7641261 (479.41 it/sec) -training >> step=5050200, episode=842 reward=0.7931871 (456.62 it/sec) -training >> step=5050300, episode=842 reward=0.7639609 (482.25 it/sec) -training >> step=5050400, episode=842 reward=0.7893521 (503.84 it/sec) -training >> step=5050500, episode=842 reward=0.7533315 (394.96 it/sec) -training >> step=5050600, episode=842 reward=0.7816887 (387.73 it/sec) -training >> step=5050700, episode=842 reward=0.7586142 (337.82 it/sec) -training >> step=5050800, episode=842 reward=0.7722532 (453.47 it/sec) -training >> step=5050900, episode=842 reward=0.7764505 (470.79 it/sec) -training >> step=5051000, episode=842 reward=0.7723545 (478.63 it/sec) -training >> step=5051100, episode=842 reward=0.784052 (478.52 it/sec) -training >> step=5051200, episode=842 reward=0.7698669 (448.77 it/sec) -training >> step=5051300, episode=843 reward=0.7805113 (85.05 it/sec) -training >> step=5051400, episode=843 reward=0.7616147 (482.48 it/sec) -training >> step=5051500, episode=843 reward=0.7654019 (432.43 it/sec) -training >> step=5051600, episode=843 reward=0.7781112 (442.46 it/sec) -training >> step=5051700, episode=843 reward=0.7754696 (460.24 it/sec) -training >> step=5051800, episode=843 reward=0.7785918 (433.26 it/sec) -training >> step=5051900, episode=843 reward=0.7827687 (473.47 it/sec) -training >> step=5052000, episode=843 reward=0.7843402 (428.59 it/sec) -training >> step=5052100, episode=843 reward=0.7955934 (426.15 it/sec) -training >> step=5052200, episode=843 reward=0.777126 (440.11 it/sec) -training >> step=5052300, episode=843 reward=0.7842785 (437.59 it/sec) -training >> step=5052400, episode=843 reward=0.7778236 (464.77 it/sec) -training >> step=5052500, episode=843 reward=0.7776839 (459.17 it/sec) -training >> step=5052600, episode=843 reward=0.771938 (454.14 it/sec) -training >> step=5052700, episode=843 reward=0.7839476 (449.05 it/sec) -training >> step=5052800, episode=843 reward=0.778855 (493.20 it/sec) -training >> step=5052900, episode=843 reward=0.7691965 (455.96 it/sec) -training >> step=5053000, episode=843 reward=0.7436638 (430.31 it/sec) -training >> step=5053100, episode=843 reward=0.7674726 (465.16 it/sec) -training >> step=5053200, episode=843 reward=0.7696279 (453.19 it/sec) -training >> step=5053300, episode=843 reward=0.7655736 (427.23 it/sec) -training >> step=5053400, episode=843 reward=0.7892811 (442.89 it/sec) -training >> step=5053500, episode=843 reward=0.7653475 (481.11 it/sec) -training >> step=5053600, episode=843 reward=0.7737678 (461.12 it/sec) -training >> step=5053700, episode=843 reward=0.7535094 (445.58 it/sec) -training >> step=5053800, episode=843 reward=0.800094 (425.32 it/sec) -training >> step=5053900, episode=843 reward=0.7793177 (440.15 it/sec) -training >> step=5054000, episode=843 reward=0.7792307 (458.75 it/sec) -training >> step=5054100, episode=843 reward=0.782148 (458.28 it/sec) -training >> step=5054200, episode=843 reward=0.7550489 (454.81 it/sec) -training >> step=5054300, episode=843 reward=0.7694944 (444.47 it/sec) -training >> step=5054400, episode=843 reward=0.7867065 (447.10 it/sec) -training >> step=5054500, episode=843 reward=0.7704067 (438.80 it/sec) -training >> step=5054600, episode=843 reward=0.7901745 (467.67 it/sec) -training >> step=5054700, episode=843 reward=0.7748263 (456.85 it/sec) -training >> step=5054800, episode=843 reward=0.7746447 (455.73 it/sec) -training >> step=5054900, episode=843 reward=0.7780411 (434.50 it/sec) -training >> step=5055000, episode=843 reward=0.7736248 (472.34 it/sec) -training >> step=5055100, episode=843 reward=0.7904148 (446.29 it/sec) -training >> step=5055200, episode=843 reward=0.7690354 (425.79 it/sec) -training >> step=5055300, episode=843 reward=0.7835352 (432.26 it/sec) -training >> step=5055400, episode=843 reward=0.7766245 (428.33 it/sec) -training >> step=5055500, episode=843 reward=0.7787623 (385.11 it/sec) -training >> step=5055600, episode=843 reward=0.7886497 (453.79 it/sec) -training >> step=5055700, episode=843 reward=0.7551152 (460.23 it/sec) -training >> step=5055800, episode=843 reward=0.7839279 (459.69 it/sec) -training >> step=5055900, episode=843 reward=0.7681659 (452.59 it/sec) -training >> step=5056000, episode=843 reward=0.7517848 (410.21 it/sec) -training >> step=5056100, episode=843 reward=0.7924023 (515.58 it/sec) -training >> step=5056200, episode=843 reward=0.7752751 (489.16 it/sec) -training >> step=5056300, episode=843 reward=0.7859861 (471.61 it/sec) -training >> step=5056400, episode=843 reward=0.7869171 (457.58 it/sec) -training >> step=5056500, episode=843 reward=0.761349 (507.08 it/sec) -training >> step=5056600, episode=843 reward=0.7724708 (485.69 it/sec) -training >> step=5056700, episode=843 reward=0.772216 (459.50 it/sec) -training >> step=5056800, episode=843 reward=0.7638657 (514.44 it/sec) -training >> step=5056900, episode=843 reward=0.768456 (463.37 it/sec) -training >> step=5057000, episode=843 reward=0.7799519 (371.68 it/sec) -training >> step=5057100, episode=843 reward=0.7809202 (488.70 it/sec) -training >> step=5057200, episode=843 reward=0.7617542 (516.81 it/sec) -training >> step=5057300, episode=844 reward=0.7933308 (89.41 it/sec) -training >> step=5057400, episode=844 reward=0.7666398 (468.93 it/sec) -training >> step=5057500, episode=844 reward=0.7893071 (527.43 it/sec) -training >> step=5057600, episode=844 reward=0.7680225 (470.30 it/sec) -training >> step=5057700, episode=844 reward=0.7798396 (460.09 it/sec) -training >> step=5057800, episode=844 reward=0.76999 (482.16 it/sec) -training >> step=5057900, episode=844 reward=0.7769912 (504.66 it/sec) -training >> step=5058000, episode=844 reward=0.7730056 (509.18 it/sec) -training >> step=5058100, episode=844 reward=0.7854396 (498.76 it/sec) -training >> step=5058200, episode=844 reward=0.7837771 (509.46 it/sec) -training >> step=5058300, episode=844 reward=0.7637701 (484.48 it/sec) -training >> step=5058400, episode=844 reward=0.7704719 (489.94 it/sec) -training >> step=5058500, episode=844 reward=0.7648149 (514.83 it/sec) -training >> step=5058600, episode=844 reward=0.7516344 (483.57 it/sec) -training >> step=5058700, episode=844 reward=0.7965491 (512.02 it/sec) -training >> step=5058800, episode=844 reward=0.7720631 (474.66 it/sec) -training >> step=5058900, episode=844 reward=0.779963 (468.88 it/sec) -training >> step=5059000, episode=844 reward=0.7767811 (486.32 it/sec) -training >> step=5059100, episode=844 reward=0.7837192 (489.35 it/sec) -training >> step=5059200, episode=844 reward=0.7746103 (515.89 it/sec) -training >> step=5059300, episode=844 reward=0.7686082 (484.19 it/sec) -training >> step=5059400, episode=844 reward=0.80366 (472.13 it/sec) -training >> step=5059500, episode=844 reward=0.7653282 (491.67 it/sec) -training >> step=5059600, episode=844 reward=0.775869 (487.00 it/sec) -training >> step=5059700, episode=844 reward=0.778401 (452.74 it/sec) -training >> step=5059800, episode=844 reward=0.7853338 (465.41 it/sec) -training >> step=5059900, episode=844 reward=0.7795568 (506.37 it/sec) -training >> step=5060000, episode=844 reward=0.7906158 (485.74 it/sec) -training >> step=5060100, episode=844 reward=0.7978261 (468.73 it/sec) -training >> step=5060200, episode=844 reward=0.7959831 (454.21 it/sec) -training >> step=5060300, episode=844 reward=0.794789 (524.14 it/sec) -training >> step=5060400, episode=844 reward=0.793955 (468.18 it/sec) -training >> step=5060500, episode=844 reward=0.7678891 (522.15 it/sec) -training >> step=5060600, episode=844 reward=0.7732973 (436.43 it/sec) -training >> step=5060700, episode=844 reward=0.7766126 (466.36 it/sec) -training >> step=5060800, episode=844 reward=0.7982111 (472.37 it/sec) -training >> step=5060900, episode=844 reward=0.7885982 (488.07 it/sec) -training >> step=5061000, episode=844 reward=0.7812912 (496.94 it/sec) -training >> step=5061100, episode=844 reward=0.7854857 (466.99 it/sec) -training >> step=5061200, episode=844 reward=0.7865837 (436.32 it/sec) -training >> step=5061300, episode=844 reward=0.7895424 (528.07 it/sec) -training >> step=5061400, episode=844 reward=0.7716548 (488.01 it/sec) -training >> step=5061500, episode=844 reward=0.7618185 (482.77 it/sec) -training >> step=5061600, episode=844 reward=0.7679825 (473.82 it/sec) -training >> step=5061700, episode=844 reward=0.7912307 (524.53 it/sec) -training >> step=5061800, episode=844 reward=0.7816291 (494.28 it/sec) -training >> step=5061900, episode=844 reward=0.7715226 (505.47 it/sec) -training >> step=5062000, episode=844 reward=0.7798394 (493.21 it/sec) -training >> step=5062100, episode=844 reward=0.7746917 (510.68 it/sec) -training >> step=5062200, episode=844 reward=0.7891209 (485.47 it/sec) -training >> step=5062300, episode=844 reward=0.7855269 (479.44 it/sec) -training >> step=5062400, episode=844 reward=0.7796491 (457.56 it/sec) -training >> step=5062500, episode=844 reward=0.7808161 (511.06 it/sec) -training >> step=5062600, episode=844 reward=0.7718259 (482.97 it/sec) -training >> step=5062700, episode=844 reward=0.7713158 (456.08 it/sec) -training >> step=5062800, episode=844 reward=0.7845412 (557.83 it/sec) -training >> step=5062900, episode=844 reward=0.7907275 (489.36 it/sec) -training >> step=5063000, episode=844 reward=0.8006063 (496.50 it/sec) -training >> step=5063100, episode=844 reward=0.7815286 (524.71 it/sec) -training >> step=5063200, episode=844 reward=0.7882508 (539.53 it/sec) -training >> step=5063300, episode=845 reward=0.7929264 (50.42 it/sec) -training >> step=5063400, episode=845 reward=0.7631616 (485.58 it/sec) -training >> step=5063500, episode=845 reward=0.7903032 (425.80 it/sec) -training >> step=5063600, episode=845 reward=0.7737333 (427.77 it/sec) -training >> step=5063700, episode=845 reward=0.7882926 (459.92 it/sec) -training >> step=5063800, episode=845 reward=0.7837894 (438.86 it/sec) -training >> step=5063900, episode=845 reward=0.7923524 (460.53 it/sec) -training >> step=5064000, episode=845 reward=0.7879568 (473.62 it/sec) -training >> step=5064100, episode=845 reward=0.7702653 (499.52 it/sec) -training >> step=5064200, episode=845 reward=0.7791988 (439.81 it/sec) -training >> step=5064300, episode=845 reward=0.7721722 (449.65 it/sec) -training >> step=5064400, episode=845 reward=0.7867792 (477.86 it/sec) -training >> step=5064500, episode=845 reward=0.7665874 (435.25 it/sec) -training >> step=5064600, episode=845 reward=0.803844 (468.17 it/sec) -training >> step=5064700, episode=845 reward=0.7817469 (437.10 it/sec) -training >> step=5064800, episode=845 reward=0.7644523 (442.67 it/sec) -training >> step=5064900, episode=845 reward=0.7794693 (448.86 it/sec) -training >> step=5065000, episode=845 reward=0.7729241 (424.65 it/sec) -training >> step=5065100, episode=845 reward=0.7735366 (432.55 it/sec) -training >> step=5065200, episode=845 reward=0.7738406 (460.50 it/sec) -training >> step=5065300, episode=845 reward=0.7943516 (460.83 it/sec) -training >> step=5065400, episode=845 reward=0.7878779 (435.79 it/sec) -training >> step=5065500, episode=845 reward=0.7721646 (448.22 it/sec) -training >> step=5065600, episode=845 reward=0.7947488 (455.99 it/sec) -training >> step=5065700, episode=845 reward=0.7834213 (450.76 it/sec) -training >> step=5065800, episode=845 reward=0.7564683 (434.80 it/sec) -training >> step=5065900, episode=845 reward=0.7891735 (459.58 it/sec) -training >> step=5066000, episode=845 reward=0.7620914 (443.51 it/sec) -training >> step=5066100, episode=845 reward=0.7725388 (471.77 it/sec) -training >> step=5066200, episode=845 reward=0.7795912 (485.61 it/sec) -training >> step=5066300, episode=845 reward=0.7914841 (451.25 it/sec) -training >> step=5066400, episode=845 reward=0.7796174 (397.91 it/sec) -training >> step=5066500, episode=845 reward=0.8029284 (453.02 it/sec) -training >> step=5066600, episode=845 reward=0.7697449 (453.80 it/sec) -training >> step=5066700, episode=845 reward=0.7780355 (451.57 it/sec) -training >> step=5066800, episode=845 reward=0.7898983 (490.05 it/sec) -training >> step=5066900, episode=845 reward=0.7757931 (525.86 it/sec) -training >> step=5067000, episode=845 reward=0.7693322 (476.82 it/sec) -training >> step=5067100, episode=845 reward=0.7773443 (464.27 it/sec) -training >> step=5067200, episode=845 reward=0.7879917 (487.05 it/sec) -training >> step=5067300, episode=845 reward=0.7793242 (524.13 it/sec) -training >> step=5067400, episode=845 reward=0.7829782 (540.57 it/sec) -training >> step=5067500, episode=845 reward=0.7949833 (509.08 it/sec) -training >> step=5067600, episode=845 reward=0.7881687 (534.34 it/sec) -training >> step=5067700, episode=845 reward=0.8025239 (531.94 it/sec) -training >> step=5067800, episode=845 reward=0.762714 (511.18 it/sec) -training >> step=5067900, episode=845 reward=0.77722 (532.54 it/sec) -training >> step=5068000, episode=845 reward=0.7823408 (465.32 it/sec) -training >> step=5068100, episode=845 reward=0.7949234 (462.01 it/sec) -training >> step=5068200, episode=845 reward=0.7824288 (474.87 it/sec) -training >> step=5068300, episode=845 reward=0.7841163 (492.28 it/sec) -training >> step=5068400, episode=845 reward=0.7986058 (461.47 it/sec) -training >> step=5068500, episode=845 reward=0.7792225 (473.99 it/sec) -training >> step=5068600, episode=845 reward=0.7777343 (520.28 it/sec) -training >> step=5068700, episode=845 reward=0.7715369 (536.40 it/sec) -training >> step=5068800, episode=845 reward=0.7687547 (492.74 it/sec) -training >> step=5068900, episode=845 reward=0.7883098 (506.14 it/sec) -training >> step=5069000, episode=845 reward=0.788956 (523.86 it/sec) -training >> step=5069100, episode=845 reward=0.758482 (504.43 it/sec) -training >> step=5069200, episode=845 reward=0.7727807 (531.94 it/sec) -training >> step=5069300, episode=846 reward=0.7663067 (109.15 it/sec) -training >> step=5069400, episode=846 reward=0.7840388 (460.92 it/sec) -training >> step=5069500, episode=846 reward=0.777976 (503.01 it/sec) -training >> step=5069600, episode=846 reward=0.765895 (498.98 it/sec) -training >> step=5069700, episode=846 reward=0.7717617 (504.50 it/sec) -training >> step=5069800, episode=846 reward=0.7629408 (484.59 it/sec) -training >> step=5069900, episode=846 reward=0.7993236 (509.54 it/sec) -training >> step=5070000, episode=846 reward=0.7768843 (518.89 it/sec) -training >> step=5070100, episode=846 reward=0.7840521 (510.67 it/sec) -training >> step=5070200, episode=846 reward=0.7701203 (519.05 it/sec) -training >> step=5070300, episode=846 reward=0.7814892 (459.36 it/sec) -training >> step=5070400, episode=846 reward=0.7951486 (505.23 it/sec) -training >> step=5070500, episode=846 reward=0.7467958 (517.04 it/sec) -training >> step=5070600, episode=846 reward=0.7864958 (500.88 it/sec) -training >> step=5070700, episode=846 reward=0.7758245 (506.39 it/sec) -training >> step=5070800, episode=846 reward=0.7888673 (491.70 it/sec) -training >> step=5070900, episode=846 reward=0.7823388 (509.41 it/sec) -training >> step=5071000, episode=846 reward=0.7910005 (511.04 it/sec) -training >> step=5071100, episode=846 reward=0.7889529 (484.71 it/sec) -training >> step=5071200, episode=846 reward=0.7939429 (527.55 it/sec) -training >> step=5071300, episode=846 reward=0.7825735 (487.81 it/sec) -training >> step=5071400, episode=846 reward=0.7877975 (492.77 it/sec) -training >> step=5071500, episode=846 reward=0.7745386 (521.87 it/sec) -training >> step=5071600, episode=846 reward=0.7779524 (518.50 it/sec) -training >> step=5071700, episode=846 reward=0.7897775 (506.09 it/sec) -training >> step=5071800, episode=846 reward=0.7989127 (480.14 it/sec) -training >> step=5071900, episode=846 reward=0.7786018 (499.96 it/sec) -training >> step=5072000, episode=846 reward=0.7755633 (506.69 it/sec) -training >> step=5072100, episode=846 reward=0.7622745 (450.70 it/sec) -training >> step=5072200, episode=846 reward=0.7773014 (509.54 it/sec) -training >> step=5072300, episode=846 reward=0.7926111 (520.07 it/sec) -training >> step=5072400, episode=846 reward=0.7552283 (494.55 it/sec) -training >> step=5072500, episode=846 reward=0.7887136 (489.45 it/sec) -training >> step=5072600, episode=846 reward=0.7878534 (530.52 it/sec) -training >> step=5072700, episode=846 reward=0.7864234 (496.66 it/sec) -training >> step=5072800, episode=846 reward=0.7843075 (470.92 it/sec) -training >> step=5072900, episode=846 reward=0.7548137 (488.63 it/sec) -training >> step=5073000, episode=846 reward=0.7844042 (495.94 it/sec) -training >> step=5073100, episode=846 reward=0.7743171 (474.33 it/sec) -training >> step=5073200, episode=846 reward=0.7756681 (467.53 it/sec) -training >> step=5073300, episode=846 reward=0.7632945 (501.19 it/sec) -training >> step=5073400, episode=846 reward=0.7844962 (500.32 it/sec) -training >> step=5073500, episode=846 reward=0.8078426 (504.67 it/sec) -training >> step=5073600, episode=846 reward=0.7829514 (511.61 it/sec) -training >> step=5073700, episode=846 reward=0.778773 (518.86 it/sec) -training >> step=5073800, episode=846 reward=0.7744747 (491.97 it/sec) -training >> step=5073900, episode=846 reward=0.7638033 (432.17 it/sec) -training >> step=5074000, episode=846 reward=0.7840407 (498.61 it/sec) -training >> step=5074100, episode=846 reward=0.7728707 (547.18 it/sec) -training >> step=5074200, episode=846 reward=0.79771 (533.31 it/sec) -training >> step=5074300, episode=846 reward=0.7810557 (506.25 it/sec) -training >> step=5074400, episode=846 reward=0.792574 (482.65 it/sec) -training >> step=5074500, episode=846 reward=0.7803127 (464.31 it/sec) -training >> step=5074600, episode=846 reward=0.7876749 (509.28 it/sec) -training >> step=5074700, episode=846 reward=0.7838541 (459.57 it/sec) -training >> step=5074800, episode=846 reward=0.7753835 (551.12 it/sec) -training >> step=5074900, episode=846 reward=0.7829404 (467.61 it/sec) -training >> step=5075000, episode=846 reward=0.7940752 (434.88 it/sec) -training >> step=5075100, episode=846 reward=0.7776874 (522.51 it/sec) -training >> step=5075200, episode=846 reward=0.7785265 (528.81 it/sec) -training >> step=5075300, episode=847 reward=0.7807896 (94.38 it/sec) -training >> step=5075400, episode=847 reward=0.7844589 (259.84 it/sec) -training >> step=5075500, episode=847 reward=0.7819656 (522.53 it/sec) -training >> step=5075600, episode=847 reward=0.7674073 (463.29 it/sec) -training >> step=5075700, episode=847 reward=0.7709655 (522.45 it/sec) -training >> step=5075800, episode=847 reward=0.7681383 (485.82 it/sec) -training >> step=5075900, episode=847 reward=0.7885121 (458.43 it/sec) -training >> step=5076000, episode=847 reward=0.7745616 (492.42 it/sec) -training >> step=5076100, episode=847 reward=0.7654635 (532.10 it/sec) -training >> step=5076200, episode=847 reward=0.7762471 (518.00 it/sec) -training >> step=5076300, episode=847 reward=0.7680106 (513.91 it/sec) -training >> step=5076400, episode=847 reward=0.781137 (485.05 it/sec) -training >> step=5076500, episode=847 reward=0.7911217 (528.02 it/sec) -training >> step=5076600, episode=847 reward=0.7846383 (525.63 it/sec) -training >> step=5076700, episode=847 reward=0.7879739 (529.16 it/sec) -training >> step=5076800, episode=847 reward=0.762069 (491.82 it/sec) -training >> step=5076900, episode=847 reward=0.7848172 (482.67 it/sec) -training >> step=5077000, episode=847 reward=0.7639494 (499.07 it/sec) -training >> step=5077100, episode=847 reward=0.7704793 (486.55 it/sec) -training >> step=5077200, episode=847 reward=0.7562066 (514.96 it/sec) -training >> step=5077300, episode=847 reward=0.7770783 (474.03 it/sec) -training >> step=5077400, episode=847 reward=0.7775656 (539.54 it/sec) -training >> step=5077500, episode=847 reward=0.7938819 (477.16 it/sec) -training >> step=5077600, episode=847 reward=0.7817304 (427.78 it/sec) -training >> step=5077700, episode=847 reward=0.7929826 (476.05 it/sec) -training >> step=5077800, episode=847 reward=0.7835761 (546.10 it/sec) -training >> step=5077900, episode=847 reward=0.8159934 (500.12 it/sec) -training >> step=5078000, episode=847 reward=0.7714891 (478.21 it/sec) -training >> step=5078100, episode=847 reward=0.7609401 (528.08 it/sec) -training >> step=5078200, episode=847 reward=0.7780908 (500.08 it/sec) -training >> step=5078300, episode=847 reward=0.7834517 (518.89 it/sec) -training >> step=5078400, episode=847 reward=0.7873976 (491.27 it/sec) -training >> step=5078500, episode=847 reward=0.7942294 (509.87 it/sec) -training >> step=5078600, episode=847 reward=0.7905918 (510.61 it/sec) -training >> step=5078700, episode=847 reward=0.7809459 (517.58 it/sec) -training >> step=5078800, episode=847 reward=0.7871832 (485.27 it/sec) -training >> step=5078900, episode=847 reward=0.7897125 (467.04 it/sec) -training >> step=5079000, episode=847 reward=0.7934501 (497.95 it/sec) -training >> step=5079100, episode=847 reward=0.7887213 (505.80 it/sec) -training >> step=5079200, episode=847 reward=0.7896208 (528.58 it/sec) -training >> step=5079300, episode=847 reward=0.7826824 (467.16 it/sec) -training >> step=5079400, episode=847 reward=0.787111 (465.43 it/sec) -training >> step=5079500, episode=847 reward=0.7809418 (476.68 it/sec) -training >> step=5079600, episode=847 reward=0.7658502 (497.49 it/sec) -training >> step=5079700, episode=847 reward=0.7684138 (523.29 it/sec) -training >> step=5079800, episode=847 reward=0.787564 (454.41 it/sec) -training >> step=5079900, episode=847 reward=0.7742324 (555.22 it/sec) -training >> step=5080000, episode=847 reward=0.7771333 (469.26 it/sec) -training >> step=5080100, episode=847 reward=0.7740577 (507.56 it/sec) -training >> step=5080200, episode=847 reward=0.7860597 (521.39 it/sec) -training >> step=5080300, episode=847 reward=0.7711443 (523.10 it/sec) -training >> step=5080400, episode=847 reward=0.7684948 (501.27 it/sec) -training >> step=5080500, episode=847 reward=0.7817306 (478.94 it/sec) -training >> step=5080600, episode=847 reward=0.7680598 (509.88 it/sec) -training >> step=5080700, episode=847 reward=0.777447 (525.72 it/sec) -training >> step=5080800, episode=847 reward=0.7941976 (496.07 it/sec) -training >> step=5080900, episode=847 reward=0.7834376 (528.00 it/sec) -training >> step=5081000, episode=847 reward=0.7816124 (515.75 it/sec) -training >> step=5081100, episode=847 reward=0.781396 (515.17 it/sec) -training >> step=5081200, episode=847 reward=0.7593551 (454.15 it/sec) -training >> step=5081300, episode=848 reward=0.7595806 (74.31 it/sec) -training >> step=5081400, episode=848 reward=0.7772292 (488.19 it/sec) -training >> step=5081500, episode=848 reward=0.7768053 (509.49 it/sec) -training >> step=5081600, episode=848 reward=0.7731483 (451.93 it/sec) -training >> step=5081700, episode=848 reward=0.7792668 (515.78 it/sec) -training >> step=5081800, episode=848 reward=0.7869983 (478.63 it/sec) -training >> step=5081900, episode=848 reward=0.7595152 (512.03 it/sec) -training >> step=5082000, episode=848 reward=0.7749325 (471.80 it/sec) -training >> step=5082100, episode=848 reward=0.8046265 (449.44 it/sec) -training >> step=5082200, episode=848 reward=0.7946049 (478.19 it/sec) -training >> step=5082300, episode=848 reward=0.765111 (500.22 it/sec) -training >> step=5082400, episode=848 reward=0.785633 (497.86 it/sec) -training >> step=5082500, episode=848 reward=0.7898234 (521.03 it/sec) -training >> step=5082600, episode=848 reward=0.7905059 (486.29 it/sec) -training >> step=5082700, episode=848 reward=0.7938576 (525.86 it/sec) -training >> step=5082800, episode=848 reward=0.7750347 (494.24 it/sec) -training >> step=5082900, episode=848 reward=0.7731428 (547.28 it/sec) -training >> step=5083000, episode=848 reward=0.777452 (447.51 it/sec) -training >> step=5083100, episode=848 reward=0.7828013 (507.51 it/sec) -training >> step=5083200, episode=848 reward=0.7670699 (525.79 it/sec) -training >> step=5083300, episode=848 reward=0.7851485 (493.03 it/sec) -training >> step=5083400, episode=848 reward=0.7967243 (428.86 it/sec) -training >> step=5083500, episode=848 reward=0.7936124 (481.91 it/sec) -training >> step=5083600, episode=848 reward=0.7988459 (515.39 it/sec) -training >> step=5083700, episode=848 reward=0.8126903 (514.41 it/sec) -training >> step=5083800, episode=848 reward=0.786272 (508.74 it/sec) -training >> step=5083900, episode=848 reward=0.7933292 (557.70 it/sec) -training >> step=5084000, episode=848 reward=0.7709551 (449.90 it/sec) -training >> step=5084100, episode=848 reward=0.7714534 (508.56 it/sec) -training >> step=5084200, episode=848 reward=0.7736248 (510.08 it/sec) -training >> step=5084300, episode=848 reward=0.7857872 (494.28 it/sec) -training >> step=5084400, episode=848 reward=0.7818694 (534.45 it/sec) -training >> step=5084500, episode=848 reward=0.7878614 (492.08 it/sec) -training >> step=5084600, episode=848 reward=0.7720249 (504.31 it/sec) -training >> step=5084700, episode=848 reward=0.7869762 (522.64 it/sec) -training >> step=5084800, episode=848 reward=0.7868996 (511.09 it/sec) -training >> step=5084900, episode=848 reward=0.7831376 (528.89 it/sec) -training >> step=5085000, episode=848 reward=0.7980648 (518.96 it/sec) -training >> step=5085100, episode=848 reward=0.7622864 (487.33 it/sec) -training >> step=5085200, episode=848 reward=0.7768726 (509.01 it/sec) -training >> step=5085300, episode=848 reward=0.7948315 (544.57 it/sec) -training >> step=5085400, episode=848 reward=0.7669408 (522.12 it/sec) -training >> step=5085500, episode=848 reward=0.7834349 (488.54 it/sec) -training >> step=5085600, episode=848 reward=0.7847471 (476.53 it/sec) -training >> step=5085700, episode=848 reward=0.7837819 (521.16 it/sec) -training >> step=5085800, episode=848 reward=0.7728034 (518.87 it/sec) -training >> step=5085900, episode=848 reward=0.7723228 (488.46 it/sec) -training >> step=5086000, episode=848 reward=0.7707284 (526.45 it/sec) -training >> step=5086100, episode=848 reward=0.7550809 (505.67 it/sec) -training >> step=5086200, episode=848 reward=0.774412 (507.29 it/sec) -training >> step=5086300, episode=848 reward=0.7851057 (518.70 it/sec) -training >> step=5086400, episode=848 reward=0.7734317 (486.93 it/sec) -training >> step=5086500, episode=848 reward=0.7950032 (529.66 it/sec) -training >> step=5086600, episode=848 reward=0.7610425 (482.53 it/sec) -training >> step=5086700, episode=848 reward=0.7738599 (487.96 it/sec) -training >> step=5086800, episode=848 reward=0.7779827 (529.59 it/sec) -training >> step=5086900, episode=848 reward=0.7664686 (504.91 it/sec) -training >> step=5087000, episode=848 reward=0.7711657 (486.38 it/sec) -training >> step=5087100, episode=848 reward=0.7795578 (459.16 it/sec) -training >> step=5087200, episode=848 reward=0.8103144 (508.17 it/sec) -training >> step=5087300, episode=849 reward=0.7865371 (125.47 it/sec) -training >> step=5087400, episode=849 reward=0.7574899 (508.56 it/sec) -training >> step=5087500, episode=849 reward=0.7646061 (554.36 it/sec) -training >> step=5087600, episode=849 reward=0.7908401 (494.69 it/sec) -training >> step=5087700, episode=849 reward=0.7462385 (516.90 it/sec) -training >> step=5087800, episode=849 reward=0.7723501 (495.82 it/sec) -training >> step=5087900, episode=849 reward=0.7694573 (515.16 it/sec) -training >> step=5088000, episode=849 reward=0.7811749 (461.11 it/sec) -training >> step=5088100, episode=849 reward=0.7864949 (513.47 it/sec) -training >> step=5088200, episode=849 reward=0.7885813 (531.07 it/sec) -training >> step=5088300, episode=849 reward=0.7886626 (482.74 it/sec) -training >> step=5088400, episode=849 reward=0.7711926 (455.61 it/sec) -training >> step=5088500, episode=849 reward=0.7618979 (489.24 it/sec) -training >> step=5088600, episode=849 reward=0.7658911 (523.45 it/sec) -training >> step=5088700, episode=849 reward=0.7866398 (541.90 it/sec) -training >> step=5088800, episode=849 reward=0.7715036 (537.03 it/sec) -training >> step=5088900, episode=849 reward=0.793121 (522.63 it/sec) -training >> step=5089000, episode=849 reward=0.7775626 (528.74 it/sec) -training >> step=5089100, episode=849 reward=0.7868459 (528.67 it/sec) -training >> step=5089200, episode=849 reward=0.7610691 (503.78 it/sec) -training >> step=5089300, episode=849 reward=0.7840757 (560.72 it/sec) -training >> step=5089400, episode=849 reward=0.7827557 (458.46 it/sec) -training >> step=5089500, episode=849 reward=0.7735432 (432.03 it/sec) -training >> step=5089600, episode=849 reward=0.7833688 (527.70 it/sec) -training >> step=5089700, episode=849 reward=0.7819517 (481.46 it/sec) -training >> step=5089800, episode=849 reward=0.7813817 (512.60 it/sec) -training >> step=5089900, episode=849 reward=0.7918155 (490.29 it/sec) -training >> step=5090000, episode=849 reward=0.7822743 (530.45 it/sec) -training >> step=5090100, episode=849 reward=0.7815888 (515.80 it/sec) -training >> step=5090200, episode=849 reward=0.7696841 (461.06 it/sec) -training >> step=5090300, episode=849 reward=0.77996 (522.93 it/sec) -training >> step=5090400, episode=849 reward=0.7837138 (519.87 it/sec) -training >> step=5090500, episode=849 reward=0.7875745 (503.02 it/sec) -training >> step=5090600, episode=849 reward=0.7690862 (503.86 it/sec) -training >> step=5090700, episode=849 reward=0.7944032 (506.50 it/sec) -training >> step=5090800, episode=849 reward=0.7754536 (529.12 it/sec) -training >> step=5090900, episode=849 reward=0.7653602 (481.66 it/sec) -training >> step=5091000, episode=849 reward=0.7886363 (505.39 it/sec) -training >> step=5091100, episode=849 reward=0.7741669 (513.14 it/sec) -training >> step=5091200, episode=849 reward=0.7935544 (499.33 it/sec) -training >> step=5091300, episode=849 reward=0.764723 (494.94 it/sec) -training >> step=5091400, episode=849 reward=0.7904296 (551.51 it/sec) -training >> step=5091500, episode=849 reward=0.7658716 (502.69 it/sec) -training >> step=5091600, episode=849 reward=0.782051 (508.95 it/sec) -training >> step=5091700, episode=849 reward=0.7818391 (510.27 it/sec) -training >> step=5091800, episode=849 reward=0.784152 (527.98 it/sec) -training >> step=5091900, episode=849 reward=0.7657168 (402.98 it/sec) -training >> step=5092000, episode=849 reward=0.7813176 (478.74 it/sec) -training >> step=5092100, episode=849 reward=0.7692412 (506.64 it/sec) -training >> step=5092200, episode=849 reward=0.7844539 (502.13 it/sec) -training >> step=5092300, episode=849 reward=0.7661191 (518.16 it/sec) -training >> step=5092400, episode=849 reward=0.7867872 (503.43 it/sec) -training >> step=5092500, episode=849 reward=0.7704107 (504.53 it/sec) -training >> step=5092600, episode=849 reward=0.779003 (491.23 it/sec) -training >> step=5092700, episode=849 reward=0.800244 (518.55 it/sec) -training >> step=5092800, episode=849 reward=0.7959976 (514.21 it/sec) -training >> step=5092900, episode=849 reward=0.7809001 (558.06 it/sec) -training >> step=5093000, episode=849 reward=0.798439 (498.99 it/sec) -training >> step=5093100, episode=849 reward=0.7670174 (499.64 it/sec) -training >> step=5093200, episode=849 reward=0.7767566 (515.61 it/sec) -training >> step=5093300, episode=850 reward=0.7729788 (114.81 it/sec) -training >> step=5093400, episode=850 reward=0.7698563 (465.48 it/sec) -training >> step=5093500, episode=850 reward=0.7815994 (502.97 it/sec) -training >> step=5093600, episode=850 reward=0.7794449 (542.49 it/sec) -training >> step=5093700, episode=850 reward=0.7836376 (464.78 it/sec) -training >> step=5093800, episode=850 reward=0.7711004 (516.90 it/sec) -training >> step=5093900, episode=850 reward=0.7965623 (501.15 it/sec) -training >> step=5094000, episode=850 reward=0.7797628 (524.95 it/sec) -training >> step=5094100, episode=850 reward=0.7994553 (499.59 it/sec) -training >> step=5094200, episode=850 reward=0.7893655 (501.76 it/sec) -training >> step=5094300, episode=850 reward=0.7655133 (489.78 it/sec) -training >> step=5094400, episode=850 reward=0.7917344 (517.92 it/sec) -training >> step=5094500, episode=850 reward=0.7522793 (469.17 it/sec) -training >> step=5094600, episode=850 reward=0.7687585 (538.93 it/sec) -training >> step=5094700, episode=850 reward=0.7840996 (521.83 it/sec) -training >> step=5094800, episode=850 reward=0.7869809 (520.39 it/sec) -training >> step=5094900, episode=850 reward=0.7933461 (436.83 it/sec) -training >> step=5095000, episode=850 reward=0.7750515 (508.25 it/sec) -training >> step=5095100, episode=850 reward=0.7852886 (528.85 it/sec) -training >> step=5095200, episode=850 reward=0.785166 (509.92 it/sec) -training >> step=5095300, episode=850 reward=0.7978153 (483.14 it/sec) -training >> step=5095400, episode=850 reward=0.7630584 (530.30 it/sec) -training >> step=5095500, episode=850 reward=0.7622681 (455.05 it/sec) -training >> step=5095600, episode=850 reward=0.7809128 (491.97 it/sec) -training >> step=5095700, episode=850 reward=0.7917721 (551.30 it/sec) -training >> step=5095800, episode=850 reward=0.7456092 (483.07 it/sec) -training >> step=5095900, episode=850 reward=0.7762036 (502.49 it/sec) -training >> step=5096000, episode=850 reward=0.7773554 (512.04 it/sec) -training >> step=5096100, episode=850 reward=0.7868417 (545.86 it/sec) -training >> step=5096200, episode=850 reward=0.7615703 (502.39 it/sec) -training >> step=5096300, episode=850 reward=0.7783949 (479.17 it/sec) -training >> step=5096400, episode=850 reward=0.7858822 (495.95 it/sec) -training >> step=5096500, episode=850 reward=0.7880064 (557.84 it/sec) -training >> step=5096600, episode=850 reward=0.7928126 (478.57 it/sec) -training >> step=5096700, episode=850 reward=0.7529736 (496.98 it/sec) -training >> step=5096800, episode=850 reward=0.7743074 (505.89 it/sec) -training >> step=5096900, episode=850 reward=0.7919796 (478.36 it/sec) -training >> step=5097000, episode=850 reward=0.7885899 (522.02 it/sec) -training >> step=5097100, episode=850 reward=0.8037627 (483.43 it/sec) -training >> step=5097200, episode=850 reward=0.80131 (548.48 it/sec) -training >> step=5097300, episode=850 reward=0.8005419 (485.60 it/sec) -training >> step=5097400, episode=850 reward=0.77947 (486.87 it/sec) -training >> step=5097500, episode=850 reward=0.7767454 (542.97 it/sec) -training >> step=5097600, episode=850 reward=0.7928178 (490.75 it/sec) -training >> step=5097700, episode=850 reward=0.768415 (514.05 it/sec) -training >> step=5097800, episode=850 reward=0.7967857 (512.76 it/sec) -training >> step=5097900, episode=850 reward=0.7757143 (514.84 it/sec) -training >> step=5098000, episode=850 reward=0.7877319 (517.84 it/sec) -training >> step=5098100, episode=850 reward=0.7728954 (491.14 it/sec) -training >> step=5098200, episode=850 reward=0.77397 (498.55 it/sec) -training >> step=5098300, episode=850 reward=0.7763327 (557.99 it/sec) -training >> step=5098400, episode=850 reward=0.7939103 (452.75 it/sec) -training >> step=5098500, episode=850 reward=0.7698152 (535.30 it/sec) -training >> step=5098600, episode=850 reward=0.7874641 (553.64 it/sec) -training >> step=5098700, episode=850 reward=0.7782389 (523.60 it/sec) -training >> step=5098800, episode=850 reward=0.7778857 (489.26 it/sec) -training >> step=5098900, episode=850 reward=0.7907553 (485.74 it/sec) -training >> step=5099000, episode=850 reward=0.7858455 (514.22 it/sec) -training >> step=5099100, episode=850 reward=0.7845894 (505.12 it/sec) -training >> step=5099200, episode=850 reward=0.7647291 (412.10 it/sec) -training >> step=5099300, episode=851 reward=0.8002073 (122.56 it/sec) -training >> step=5099400, episode=851 reward=0.7894342 (455.14 it/sec) -training >> step=5099500, episode=851 reward=0.7685632 (492.42 it/sec) -training >> step=5099600, episode=851 reward=0.7866861 (450.79 it/sec) -training >> step=5099700, episode=851 reward=0.7860449 (480.63 it/sec) -training >> step=5099800, episode=851 reward=0.7962452 (441.35 it/sec) -training >> step=5099900, episode=851 reward=0.7824447 (486.27 it/sec) -training >> step=5100000, episode=851 reward=0.784537 (506.73 it/sec) -training >> step=5100100, episode=851 reward=0.791316 (463.52 it/sec) -training >> step=5100200, episode=851 reward=0.7579435 (473.75 it/sec) -training >> step=5100300, episode=851 reward=0.7949471 (504.83 it/sec) -training >> step=5100400, episode=851 reward=0.7643124 (510.82 it/sec) -training >> step=5100500, episode=851 reward=0.7676819 (482.52 it/sec) -training >> step=5100600, episode=851 reward=0.7707174 (439.40 it/sec) -training >> step=5100700, episode=851 reward=0.7994928 (492.98 it/sec) -training >> step=5100800, episode=851 reward=0.8024911 (471.24 it/sec) -training >> step=5100900, episode=851 reward=0.7701172 (440.97 it/sec) -training >> step=5101000, episode=851 reward=0.7740608 (482.84 it/sec) -training >> step=5101100, episode=851 reward=0.7905219 (443.69 it/sec) -training >> step=5101200, episode=851 reward=0.7848386 (433.44 it/sec) -training >> step=5101300, episode=851 reward=0.7621458 (446.89 it/sec) -training >> step=5101400, episode=851 reward=0.7801247 (508.70 it/sec) -training >> step=5101500, episode=851 reward=0.7576315 (444.01 it/sec) -training >> step=5101600, episode=851 reward=0.753147 (452.64 it/sec) -training >> step=5101700, episode=851 reward=0.7721816 (499.02 it/sec) -training >> step=5101800, episode=851 reward=0.7628397 (468.85 it/sec) -training >> step=5101900, episode=851 reward=0.7620649 (508.18 it/sec) -training >> step=5102000, episode=851 reward=0.7952913 (455.02 it/sec) -training >> step=5102100, episode=851 reward=0.7731817 (519.09 it/sec) -training >> step=5102200, episode=851 reward=0.7639732 (456.98 it/sec) -training >> step=5102300, episode=851 reward=0.7714 (444.12 it/sec) -training >> step=5102400, episode=851 reward=0.7701123 (455.68 it/sec) -training >> step=5102500, episode=851 reward=0.7945216 (506.16 it/sec) -training >> step=5102600, episode=851 reward=0.7835786 (397.64 it/sec) -training >> step=5102700, episode=851 reward=0.7781228 (483.64 it/sec) -training >> step=5102800, episode=851 reward=0.7850342 (480.67 it/sec) -training >> step=5102900, episode=851 reward=0.7784848 (516.83 it/sec) -training >> step=5103000, episode=851 reward=0.7669348 (422.95 it/sec) -training >> step=5103100, episode=851 reward=0.7880449 (494.40 it/sec) -training >> step=5103200, episode=851 reward=0.7966974 (470.86 it/sec) -training >> step=5103300, episode=851 reward=0.7746547 (402.85 it/sec) -training >> step=5103400, episode=851 reward=0.7747589 (438.64 it/sec) -training >> step=5103500, episode=851 reward=0.7726756 (447.68 it/sec) -training >> step=5103600, episode=851 reward=0.8015595 (539.55 it/sec) -training >> step=5103700, episode=851 reward=0.7693381 (461.61 it/sec) -training >> step=5103800, episode=851 reward=0.7860488 (453.12 it/sec) -training >> step=5103900, episode=851 reward=0.7688158 (468.91 it/sec) -training >> step=5104000, episode=851 reward=0.7549508 (455.17 it/sec) -training >> step=5104100, episode=851 reward=0.794627 (466.92 it/sec) -training >> step=5104200, episode=851 reward=0.7857975 (486.95 it/sec) -training >> step=5104300, episode=851 reward=0.7777677 (519.02 it/sec) -training >> step=5104400, episode=851 reward=0.7920809 (437.08 it/sec) -training >> step=5104500, episode=851 reward=0.7787367 (474.38 it/sec) -training >> step=5104600, episode=851 reward=0.775699 (492.51 it/sec) -training >> step=5104700, episode=851 reward=0.7978967 (481.34 it/sec) -training >> step=5104800, episode=851 reward=0.7745273 (493.24 it/sec) -training >> step=5104900, episode=851 reward=0.7841489 (419.31 it/sec) -training >> step=5105000, episode=851 reward=0.7966337 (488.24 it/sec) -training >> step=5105100, episode=851 reward=0.7752525 (450.34 it/sec) -training >> step=5105200, episode=851 reward=0.7962735 (451.42 it/sec) -training >> step=5105300, episode=852 reward=0.7689248 (95.28 it/sec) -training >> step=5105400, episode=852 reward=0.7726938 (464.36 it/sec) -training >> step=5105500, episode=852 reward=0.7680569 (453.28 it/sec) -training >> step=5105600, episode=852 reward=0.7869626 (418.32 it/sec) -training >> step=5105700, episode=852 reward=0.7779142 (470.72 it/sec) -training >> step=5105800, episode=852 reward=0.79191 (441.45 it/sec) -training >> step=5105900, episode=852 reward=0.7747535 (447.53 it/sec) -training >> step=5106000, episode=852 reward=0.7756501 (511.39 it/sec) -training >> step=5106100, episode=852 reward=0.7573602 (492.72 it/sec) -training >> step=5106200, episode=852 reward=0.7807095 (471.26 it/sec) -training >> step=5106300, episode=852 reward=0.7705664 (482.07 it/sec) -training >> step=5106400, episode=852 reward=0.7812123 (431.14 it/sec) -training >> step=5106500, episode=852 reward=0.7910578 (493.82 it/sec) -training >> step=5106600, episode=852 reward=0.7688169 (493.90 it/sec) -training >> step=5106700, episode=852 reward=0.7459679 (517.71 it/sec) -training >> step=5106800, episode=852 reward=0.788056 (506.30 it/sec) -training >> step=5106900, episode=852 reward=0.7829556 (459.23 it/sec) -training >> step=5107000, episode=852 reward=0.7721007 (464.44 it/sec) -training >> step=5107100, episode=852 reward=0.7787302 (489.19 it/sec) -training >> step=5107200, episode=852 reward=0.7595696 (481.38 it/sec) -training >> step=5107300, episode=852 reward=0.7830872 (485.53 it/sec) -training >> step=5107400, episode=852 reward=0.7525654 (480.10 it/sec) -training >> step=5107500, episode=852 reward=0.7815276 (491.97 it/sec) -training >> step=5107600, episode=852 reward=0.7842764 (495.51 it/sec) -training >> step=5107700, episode=852 reward=0.797671 (512.88 it/sec) -training >> step=5107800, episode=852 reward=0.7846431 (532.47 it/sec) -training >> step=5107900, episode=852 reward=0.7763726 (501.10 it/sec) -training >> step=5108000, episode=852 reward=0.7874865 (443.56 it/sec) -training >> step=5108100, episode=852 reward=0.8001909 (550.27 it/sec) -training >> step=5108200, episode=852 reward=0.758684 (488.72 it/sec) -training >> step=5108300, episode=852 reward=0.7882488 (505.06 it/sec) -training >> step=5108400, episode=852 reward=0.760472 (529.09 it/sec) -training >> step=5108500, episode=852 reward=0.7730725 (497.96 it/sec) -training >> step=5108600, episode=852 reward=0.7726635 (479.79 it/sec) -training >> step=5108700, episode=852 reward=0.7743655 (510.36 it/sec) -training >> step=5108800, episode=852 reward=0.7735354 (539.39 it/sec) -training >> step=5108900, episode=852 reward=0.7734413 (483.78 it/sec) -training >> step=5109000, episode=852 reward=0.791549 (492.24 it/sec) -training >> step=5109100, episode=852 reward=0.7771456 (525.01 it/sec) -training >> step=5109200, episode=852 reward=0.7859825 (517.26 it/sec) -training >> step=5109300, episode=852 reward=0.7801453 (521.75 it/sec) -training >> step=5109400, episode=852 reward=0.7766817 (553.47 it/sec) -training >> step=5109500, episode=852 reward=0.7786054 (488.57 it/sec) -training >> step=5109600, episode=852 reward=0.7847362 (507.17 it/sec) -training >> step=5109700, episode=852 reward=0.8114439 (537.65 it/sec) -training >> step=5109800, episode=852 reward=0.7823237 (495.55 it/sec) -training >> step=5109900, episode=852 reward=0.7811033 (532.69 it/sec) -training >> step=5110000, episode=852 reward=0.784946 (460.54 it/sec) -training >> step=5110100, episode=852 reward=0.7747629 (531.95 it/sec) -training >> step=5110200, episode=852 reward=0.7749296 (481.62 it/sec) -training >> step=5110300, episode=852 reward=0.7900587 (475.12 it/sec) -training >> step=5110400, episode=852 reward=0.7877057 (503.37 it/sec) -training >> step=5110500, episode=852 reward=0.7773163 (462.95 it/sec) -training >> step=5110600, episode=852 reward=0.7759888 (516.30 it/sec) -training >> step=5110700, episode=852 reward=0.7854915 (520.12 it/sec) -training >> step=5110800, episode=852 reward=0.7795846 (508.76 it/sec) -training >> step=5110900, episode=852 reward=0.7826835 (538.57 it/sec) -training >> step=5111000, episode=852 reward=0.7850637 (501.87 it/sec) -training >> step=5111100, episode=852 reward=0.7826448 (495.07 it/sec) -training >> step=5111200, episode=852 reward=0.7638296 (544.57 it/sec) -training >> step=5111300, episode=853 reward=0.7831832 (95.99 it/sec) -training >> step=5111400, episode=853 reward=0.7560704 (496.16 it/sec) -training >> step=5111500, episode=853 reward=0.788379 (528.23 it/sec) -training >> step=5111600, episode=853 reward=0.7796889 (503.78 it/sec) -training >> step=5111700, episode=853 reward=0.7865493 (489.44 it/sec) -training >> step=5111800, episode=853 reward=0.7644266 (523.05 it/sec) -training >> step=5111900, episode=853 reward=0.7815378 (510.27 it/sec) -training >> step=5112000, episode=853 reward=0.7837675 (454.56 it/sec) -training >> step=5112100, episode=853 reward=0.7990239 (563.52 it/sec) -training >> step=5112200, episode=853 reward=0.7750723 (474.38 it/sec) -training >> step=5112300, episode=853 reward=0.7567781 (509.08 it/sec) -training >> step=5112400, episode=853 reward=0.7805887 (492.96 it/sec) -training >> step=5112500, episode=853 reward=0.8004339 (518.69 it/sec) -training >> step=5112600, episode=853 reward=0.7754859 (523.07 it/sec) -training >> step=5112700, episode=853 reward=0.7927596 (500.23 it/sec) -training >> step=5112800, episode=853 reward=0.7777879 (519.91 it/sec) -training >> step=5112900, episode=853 reward=0.7761498 (470.84 it/sec) -training >> step=5113000, episode=853 reward=0.7804997 (529.48 it/sec) -training >> step=5113100, episode=853 reward=0.782121 (508.39 it/sec) -training >> step=5113200, episode=853 reward=0.779966 (473.43 it/sec) -training >> step=5113300, episode=853 reward=0.7871453 (538.12 it/sec) -training >> step=5113400, episode=853 reward=0.789129 (491.33 it/sec) -training >> step=5113500, episode=853 reward=0.7881864 (510.00 it/sec) -training >> step=5113600, episode=853 reward=0.7761124 (552.51 it/sec) -training >> step=5113700, episode=853 reward=0.7851193 (506.03 it/sec) -training >> step=5113800, episode=853 reward=0.7794337 (428.37 it/sec) -training >> step=5113900, episode=853 reward=0.781606 (545.81 it/sec) -training >> step=5114000, episode=853 reward=0.7911317 (520.90 it/sec) -training >> step=5114100, episode=853 reward=0.7935317 (499.04 it/sec) -training >> step=5114200, episode=853 reward=0.7897854 (523.50 it/sec) -training >> step=5114300, episode=853 reward=0.7614496 (516.03 it/sec) -training >> step=5114400, episode=853 reward=0.7828314 (512.41 it/sec) -training >> step=5114500, episode=853 reward=0.7771715 (506.58 it/sec) -training >> step=5114600, episode=853 reward=0.7801718 (451.09 it/sec) -training >> step=5114700, episode=853 reward=0.7688082 (494.07 it/sec) -training >> step=5114800, episode=853 reward=0.7441404 (453.53 it/sec) -training >> step=5114900, episode=853 reward=0.7855749 (471.35 it/sec) -training >> step=5115000, episode=853 reward=0.7983804 (530.69 it/sec) -training >> step=5115100, episode=853 reward=0.7628264 (473.42 it/sec) -training >> step=5115200, episode=853 reward=0.7765103 (475.35 it/sec) -training >> step=5115300, episode=853 reward=0.764443 (466.49 it/sec) -training >> step=5115400, episode=853 reward=0.7757901 (512.35 it/sec) -training >> step=5115500, episode=853 reward=0.7806236 (464.38 it/sec) -training >> step=5115600, episode=853 reward=0.7718488 (508.47 it/sec) -training >> step=5115700, episode=853 reward=0.7773964 (447.28 it/sec) -training >> step=5115800, episode=853 reward=0.7675452 (442.84 it/sec) -training >> step=5115900, episode=853 reward=0.7830883 (461.68 it/sec) -training >> step=5116000, episode=853 reward=0.7884245 (452.82 it/sec) -training >> step=5116100, episode=853 reward=0.7573657 (516.53 it/sec) -training >> step=5116200, episode=853 reward=0.7503452 (452.11 it/sec) -training >> step=5116300, episode=853 reward=0.7847148 (492.26 it/sec) -training >> step=5116400, episode=853 reward=0.7713161 (472.84 it/sec) -training >> step=5116500, episode=853 reward=0.7794657 (485.22 it/sec) -training >> step=5116600, episode=853 reward=0.7999108 (467.09 it/sec) -training >> step=5116700, episode=853 reward=0.7677479 (457.93 it/sec) -training >> step=5116800, episode=853 reward=0.7877655 (479.41 it/sec) -training >> step=5116900, episode=853 reward=0.7727169 (502.13 it/sec) -training >> step=5117000, episode=853 reward=0.773131 (455.78 it/sec) -training >> step=5117100, episode=853 reward=0.7834892 (471.33 it/sec) -training >> step=5117200, episode=853 reward=0.7787244 (435.07 it/sec) -training >> step=5117300, episode=854 reward=0.772262 (85.35 it/sec) -training >> step=5117400, episode=854 reward=0.8011554 (506.81 it/sec) -training >> step=5117500, episode=854 reward=0.7740695 (499.27 it/sec) -training >> step=5117600, episode=854 reward=0.7808878 (460.84 it/sec) -training >> step=5117700, episode=854 reward=0.7680142 (476.76 it/sec) -training >> step=5117800, episode=854 reward=0.7938582 (431.04 it/sec) -training >> step=5117900, episode=854 reward=0.7804875 (488.75 it/sec) -training >> step=5118000, episode=854 reward=0.7792864 (470.12 it/sec) -training >> step=5118100, episode=854 reward=0.7666761 (482.04 it/sec) -training >> step=5118200, episode=854 reward=0.7823118 (473.54 it/sec) -training >> step=5118300, episode=854 reward=0.7657729 (485.20 it/sec) -training >> step=5118400, episode=854 reward=0.79272 (439.16 it/sec) -training >> step=5118500, episode=854 reward=0.8035064 (480.53 it/sec) -training >> step=5118600, episode=854 reward=0.787585 (481.97 it/sec) -training >> step=5118700, episode=854 reward=0.7861689 (460.66 it/sec) -training >> step=5118800, episode=854 reward=0.7806224 (485.97 it/sec) -training >> step=5118900, episode=854 reward=0.8057616 (508.54 it/sec) -training >> step=5119000, episode=854 reward=0.8049539 (504.45 it/sec) -training >> step=5119100, episode=854 reward=0.7810513 (487.11 it/sec) -training >> step=5119200, episode=854 reward=0.7873374 (481.55 it/sec) -training >> step=5119300, episode=854 reward=0.7693507 (491.82 it/sec) -training >> step=5119400, episode=854 reward=0.7972459 (454.33 it/sec) -training >> step=5119500, episode=854 reward=0.7987354 (481.55 it/sec) -training >> step=5119600, episode=854 reward=0.7730177 (541.83 it/sec) -training >> step=5119700, episode=854 reward=0.766273 (389.26 it/sec) -training >> step=5119800, episode=854 reward=0.7923999 (519.37 it/sec) -training >> step=5119900, episode=854 reward=0.794072 (438.73 it/sec) -training >> step=5120000, episode=854 reward=0.7905999 (525.23 it/sec) -training >> step=5120100, episode=854 reward=0.7745848 (498.43 it/sec) -training >> step=5120200, episode=854 reward=0.7946785 (454.61 it/sec) -training >> step=5120300, episode=854 reward=0.8025187 (490.91 it/sec) -training >> step=5120400, episode=854 reward=0.7751133 (488.37 it/sec) -training >> step=5120500, episode=854 reward=0.7738723 (511.96 it/sec) -training >> step=5120600, episode=854 reward=0.773941 (494.33 it/sec) -training >> step=5120700, episode=854 reward=0.7622428 (500.88 it/sec) -training >> step=5120800, episode=854 reward=0.7728593 (504.76 it/sec) -training >> step=5120900, episode=854 reward=0.7944956 (465.14 it/sec) -training >> step=5121000, episode=854 reward=0.7912397 (512.92 it/sec) -training >> step=5121100, episode=854 reward=0.7795041 (475.63 it/sec) -training >> step=5121200, episode=854 reward=0.7833817 (463.43 it/sec) -training >> step=5121300, episode=854 reward=0.7890372 (465.38 it/sec) -training >> step=5121400, episode=854 reward=0.7779031 (506.35 it/sec) -training >> step=5121500, episode=854 reward=0.7855887 (444.01 it/sec) -training >> step=5121600, episode=854 reward=0.7685412 (481.78 it/sec) -training >> step=5121700, episode=854 reward=0.7842 (470.50 it/sec) -training >> step=5121800, episode=854 reward=0.7804931 (496.12 it/sec) -training >> step=5121900, episode=854 reward=0.7867438 (480.07 it/sec) -training >> step=5122000, episode=854 reward=0.75258 (450.13 it/sec) -training >> step=5122100, episode=854 reward=0.7661034 (454.38 it/sec) -training >> step=5122200, episode=854 reward=0.7856249 (439.81 it/sec) -training >> step=5122300, episode=854 reward=0.7869526 (505.10 it/sec) -training >> step=5122400, episode=854 reward=0.7916882 (490.99 it/sec) -training >> step=5122500, episode=854 reward=0.7701126 (441.84 it/sec) -training >> step=5122600, episode=854 reward=0.7592176 (474.53 it/sec) -training >> step=5122700, episode=854 reward=0.8037397 (455.06 it/sec) -training >> step=5122800, episode=854 reward=0.7985359 (517.53 it/sec) -training >> step=5122900, episode=854 reward=0.7873577 (464.65 it/sec) -training >> step=5123000, episode=854 reward=0.7841154 (462.35 it/sec) -training >> step=5123100, episode=854 reward=0.7754045 (444.39 it/sec) -training >> step=5123200, episode=854 reward=0.7911853 (505.96 it/sec) -training >> step=5123300, episode=855 reward=0.7623163 (89.97 it/sec) -training >> step=5123400, episode=855 reward=0.7793377 (464.53 it/sec) -training >> step=5123500, episode=855 reward=0.7602959 (487.38 it/sec) -training >> step=5123600, episode=855 reward=0.7755153 (509.34 it/sec) -training >> step=5123700, episode=855 reward=0.7806893 (490.31 it/sec) -training >> step=5123800, episode=855 reward=0.7835646 (404.45 it/sec) -training >> step=5123900, episode=855 reward=0.7905494 (478.10 it/sec) -training >> step=5124000, episode=855 reward=0.7719054 (478.60 it/sec) -training >> step=5124100, episode=855 reward=0.7847851 (496.59 it/sec) -training >> step=5124200, episode=855 reward=0.7975574 (468.86 it/sec) -training >> step=5124300, episode=855 reward=0.7801185 (480.87 it/sec) -training >> step=5124400, episode=855 reward=0.7598785 (490.27 it/sec) -training >> step=5124500, episode=855 reward=0.7846759 (477.74 it/sec) -training >> step=5124600, episode=855 reward=0.8040642 (476.06 it/sec) -training >> step=5124700, episode=855 reward=0.7980983 (464.70 it/sec) -training >> step=5124800, episode=855 reward=0.8039733 (497.23 it/sec) -training >> step=5124900, episode=855 reward=0.8009365 (479.50 it/sec) -training >> step=5125000, episode=855 reward=0.7647277 (464.01 it/sec) -training >> step=5125100, episode=855 reward=0.7851905 (478.62 it/sec) -training >> step=5125200, episode=855 reward=0.7823649 (452.29 it/sec) -training >> step=5125300, episode=855 reward=0.7744141 (512.97 it/sec) -training >> step=5125400, episode=855 reward=0.7925801 (482.08 it/sec) -training >> step=5125500, episode=855 reward=0.7821552 (498.85 it/sec) -training >> step=5125600, episode=855 reward=0.7768632 (476.55 it/sec) -training >> step=5125700, episode=855 reward=0.7939314 (490.40 it/sec) -training >> step=5125800, episode=855 reward=0.7722155 (462.09 it/sec) -training >> step=5125900, episode=855 reward=0.7777894 (471.59 it/sec) -training >> step=5126000, episode=855 reward=0.7727389 (509.49 it/sec) -training >> step=5126100, episode=855 reward=0.7886995 (501.63 it/sec) -training >> step=5126200, episode=855 reward=0.7795661 (474.72 it/sec) -training >> step=5126300, episode=855 reward=0.7873273 (465.52 it/sec) -training >> step=5126400, episode=855 reward=0.7684182 (479.23 it/sec) -training >> step=5126500, episode=855 reward=0.7679172 (476.32 it/sec) -training >> step=5126600, episode=855 reward=0.7899637 (471.66 it/sec) -training >> step=5126700, episode=855 reward=0.77548 (448.13 it/sec) -training >> step=5126800, episode=855 reward=0.7824621 (471.91 it/sec) -training >> step=5126900, episode=855 reward=0.7514007 (458.19 it/sec) -training >> step=5127000, episode=855 reward=0.7788822 (393.54 it/sec) -training >> step=5127100, episode=855 reward=0.7981046 (467.17 it/sec) -training >> step=5127200, episode=855 reward=0.796903 (472.33 it/sec) -training >> step=5127300, episode=855 reward=0.7765126 (460.88 it/sec) -training >> step=5127400, episode=855 reward=0.7788851 (500.93 it/sec) -training >> step=5127500, episode=855 reward=0.7641773 (477.62 it/sec) -training >> step=5127600, episode=855 reward=0.7720467 (464.76 it/sec) -training >> step=5127700, episode=855 reward=0.7709857 (458.40 it/sec) -training >> step=5127800, episode=855 reward=0.7721583 (493.76 it/sec) -training >> step=5127900, episode=855 reward=0.7830743 (477.19 it/sec) -training >> step=5128000, episode=855 reward=0.762809 (447.33 it/sec) -training >> step=5128100, episode=855 reward=0.7984704 (453.55 it/sec) -training >> step=5128200, episode=855 reward=0.7635662 (533.22 it/sec) -training >> step=5128300, episode=855 reward=0.7797129 (440.19 it/sec) -training >> step=5128400, episode=855 reward=0.7804566 (474.70 it/sec) -training >> step=5128500, episode=855 reward=0.776262 (429.79 it/sec) -training >> step=5128600, episode=855 reward=0.7526718 (452.57 it/sec) -training >> step=5128700, episode=855 reward=0.7907729 (486.47 it/sec) -training >> step=5128800, episode=855 reward=0.7570656 (481.36 it/sec) -training >> step=5128900, episode=855 reward=0.7975634 (496.53 it/sec) -training >> step=5129000, episode=855 reward=0.7733338 (454.92 it/sec) -training >> step=5129100, episode=855 reward=0.7796534 (459.38 it/sec) -training >> step=5129200, episode=855 reward=0.7779608 (497.92 it/sec) -training >> step=5129300, episode=856 reward=0.7749135 (89.64 it/sec) -training >> step=5129400, episode=856 reward=0.7767394 (473.67 it/sec) -training >> step=5129500, episode=856 reward=0.8074241 (459.48 it/sec) -training >> step=5129600, episode=856 reward=0.7805349 (443.64 it/sec) -training >> step=5129700, episode=856 reward=0.7783272 (461.92 it/sec) -training >> step=5129800, episode=856 reward=0.7924082 (465.03 it/sec) -training >> step=5129900, episode=856 reward=0.7956826 (455.90 it/sec) -training >> step=5130000, episode=856 reward=0.7785791 (476.87 it/sec) -training >> step=5130100, episode=856 reward=0.7723627 (474.10 it/sec) -training >> step=5130200, episode=856 reward=0.7779472 (494.83 it/sec) -training >> step=5130300, episode=856 reward=0.7801746 (511.65 it/sec) -training >> step=5130400, episode=856 reward=0.7580621 (500.02 it/sec) -training >> step=5130500, episode=856 reward=0.7879427 (495.91 it/sec) -training >> step=5130600, episode=856 reward=0.7914369 (488.14 it/sec) -training >> step=5130700, episode=856 reward=0.8025328 (470.12 it/sec) -training >> step=5130800, episode=856 reward=0.7856584 (476.60 it/sec) -training >> step=5130900, episode=856 reward=0.7735327 (517.21 it/sec) -training >> step=5131000, episode=856 reward=0.768424 (507.86 it/sec) -training >> step=5131100, episode=856 reward=0.8131607 (493.68 it/sec) -training >> step=5131200, episode=856 reward=0.7944472 (439.21 it/sec) -training >> step=5131300, episode=856 reward=0.7697195 (500.39 it/sec) -training >> step=5131400, episode=856 reward=0.7760701 (523.54 it/sec) -training >> step=5131500, episode=856 reward=0.7815264 (474.18 it/sec) -training >> step=5131600, episode=856 reward=0.7796499 (473.29 it/sec) -training >> step=5131700, episode=856 reward=0.7878687 (490.98 it/sec) -training >> step=5131800, episode=856 reward=0.7777849 (499.37 it/sec) -training >> step=5131900, episode=856 reward=0.7739193 (499.75 it/sec) -training >> step=5132000, episode=856 reward=0.7669583 (503.40 it/sec) -training >> step=5132100, episode=856 reward=0.7906367 (524.17 it/sec) -training >> step=5132200, episode=856 reward=0.7876666 (514.53 it/sec) -training >> step=5132300, episode=856 reward=0.7785715 (500.00 it/sec) -training >> step=5132400, episode=856 reward=0.7824126 (514.14 it/sec) -training >> step=5132500, episode=856 reward=0.7825121 (463.83 it/sec) -training >> step=5132600, episode=856 reward=0.789634 (492.53 it/sec) -training >> step=5132700, episode=856 reward=0.7942691 (429.68 it/sec) -training >> step=5132800, episode=856 reward=0.7755067 (488.79 it/sec) -training >> step=5132900, episode=856 reward=0.7894892 (503.15 it/sec) -training >> step=5133000, episode=856 reward=0.7943538 (478.95 it/sec) -training >> step=5133100, episode=856 reward=0.7852673 (512.81 it/sec) -training >> step=5133200, episode=856 reward=0.7607177 (512.74 it/sec) -training >> step=5133300, episode=856 reward=0.7645436 (503.56 it/sec) -training >> step=5133400, episode=856 reward=0.7793753 (495.29 it/sec) -training >> step=5133500, episode=856 reward=0.7742322 (505.98 it/sec) -training >> step=5133600, episode=856 reward=0.7905304 (473.39 it/sec) -training >> step=5133700, episode=856 reward=0.750544 (486.15 it/sec) -training >> step=5133800, episode=856 reward=0.7929171 (511.75 it/sec) -training >> step=5133900, episode=856 reward=0.7779539 (516.84 it/sec) -training >> step=5134000, episode=856 reward=0.7751337 (524.42 it/sec) -training >> step=5134100, episode=856 reward=0.7686326 (455.78 it/sec) -training >> step=5134200, episode=856 reward=0.7808688 (490.84 it/sec) -training >> step=5134300, episode=856 reward=0.7532885 (486.86 it/sec) -training >> step=5134400, episode=856 reward=0.7728834 (496.71 it/sec) -training >> step=5134500, episode=856 reward=0.7876666 (493.54 it/sec) -training >> step=5134600, episode=856 reward=0.7715462 (496.20 it/sec) -training >> step=5134700, episode=856 reward=0.7720731 (450.29 it/sec) -training >> step=5134800, episode=856 reward=0.7709148 (437.06 it/sec) -training >> step=5134900, episode=856 reward=0.7773457 (528.86 it/sec) -training >> step=5135000, episode=856 reward=0.7624677 (460.85 it/sec) -training >> step=5135100, episode=856 reward=0.7676982 (459.60 it/sec) -training >> step=5135200, episode=856 reward=0.7815353 (466.54 it/sec) -training >> step=5135300, episode=857 reward=0.7885274 (67.30 it/sec) -training >> step=5135400, episode=857 reward=0.7891236 (450.10 it/sec) -training >> step=5135500, episode=857 reward=0.7715082 (464.21 it/sec) -training >> step=5135600, episode=857 reward=0.7693641 (479.46 it/sec) -training >> step=5135700, episode=857 reward=0.7778443 (494.13 it/sec) -training >> step=5135800, episode=857 reward=0.7881464 (482.87 it/sec) -training >> step=5135900, episode=857 reward=0.7971518 (491.84 it/sec) -training >> step=5136000, episode=857 reward=0.7877547 (476.30 it/sec) -training >> step=5136100, episode=857 reward=0.77141 (465.09 it/sec) -training >> step=5136200, episode=857 reward=0.7928054 (516.08 it/sec) -training >> step=5136300, episode=857 reward=0.7879304 (476.34 it/sec) -training >> step=5136400, episode=857 reward=0.7671981 (456.42 it/sec) -training >> step=5136500, episode=857 reward=0.7959254 (434.13 it/sec) -training >> step=5136600, episode=857 reward=0.7993963 (506.72 it/sec) -training >> step=5136700, episode=857 reward=0.7718518 (484.47 it/sec) -training >> step=5136800, episode=857 reward=0.7611904 (484.32 it/sec) -training >> step=5136900, episode=857 reward=0.7687324 (477.16 it/sec) -training >> step=5137000, episode=857 reward=0.7959599 (449.81 it/sec) -training >> step=5137100, episode=857 reward=0.7859222 (478.40 it/sec) -training >> step=5137200, episode=857 reward=0.7872503 (495.92 it/sec) -training >> step=5137300, episode=857 reward=0.7954377 (479.08 it/sec) -training >> step=5137400, episode=857 reward=0.763954 (479.39 it/sec) -training >> step=5137500, episode=857 reward=0.7842618 (504.99 it/sec) -training >> step=5137600, episode=857 reward=0.7656538 (514.36 it/sec) -training >> step=5137700, episode=857 reward=0.7760273 (495.06 it/sec) -training >> step=5137800, episode=857 reward=0.7924199 (504.10 it/sec) -training >> step=5137900, episode=857 reward=0.775686 (496.31 it/sec) -training >> step=5138000, episode=857 reward=0.7745478 (482.62 it/sec) -training >> step=5138100, episode=857 reward=0.7932596 (527.41 it/sec) -training >> step=5138200, episode=857 reward=0.766305 (507.96 it/sec) -training >> step=5138300, episode=857 reward=0.7975153 (483.40 it/sec) -training >> step=5138400, episode=857 reward=0.7767966 (450.13 it/sec) -training >> step=5138500, episode=857 reward=0.786732 (462.83 it/sec) -training >> step=5138600, episode=857 reward=0.7789412 (511.67 it/sec) -training >> step=5138700, episode=857 reward=0.7880245 (495.08 it/sec) -training >> step=5138800, episode=857 reward=0.7838934 (480.28 it/sec) -training >> step=5138900, episode=857 reward=0.7655646 (497.20 it/sec) -training >> step=5139000, episode=857 reward=0.7599156 (511.03 it/sec) -training >> step=5139100, episode=857 reward=0.77543 (493.04 it/sec) -training >> step=5139200, episode=857 reward=0.7777246 (493.07 it/sec) -training >> step=5139300, episode=857 reward=0.7838905 (449.23 it/sec) -training >> step=5139400, episode=857 reward=0.8009197 (494.87 it/sec) -training >> step=5139500, episode=857 reward=0.7559822 (494.85 it/sec) -training >> step=5139600, episode=857 reward=0.7765231 (535.17 it/sec) -training >> step=5139700, episode=857 reward=0.7689933 (540.90 it/sec) -training >> step=5139800, episode=857 reward=0.7828068 (508.47 it/sec) -training >> step=5139900, episode=857 reward=0.7859803 (537.95 it/sec) -training >> step=5140000, episode=857 reward=0.8023826 (456.51 it/sec) -training >> step=5140100, episode=857 reward=0.773123 (546.75 it/sec) -training >> step=5140200, episode=857 reward=0.7779125 (519.97 it/sec) -training >> step=5140300, episode=857 reward=0.7770723 (519.90 it/sec) -training >> step=5140400, episode=857 reward=0.7682473 (554.67 it/sec) -training >> step=5140500, episode=857 reward=0.7546444 (458.87 it/sec) -training >> step=5140600, episode=857 reward=0.797699 (477.97 it/sec) -training >> step=5140700, episode=857 reward=0.7792663 (535.72 it/sec) -training >> step=5140800, episode=857 reward=0.7557421 (532.92 it/sec) -training >> step=5140900, episode=857 reward=0.7618689 (495.26 it/sec) -training >> step=5141000, episode=857 reward=0.7862548 (498.26 it/sec) -training >> step=5141100, episode=857 reward=0.7684551 (492.21 it/sec) -training >> step=5141200, episode=857 reward=0.7871494 (528.50 it/sec) -training >> step=5141300, episode=858 reward=0.7781336 (113.40 it/sec) -training >> step=5141400, episode=858 reward=0.7871546 (518.65 it/sec) -training >> step=5141500, episode=858 reward=0.7899827 (528.98 it/sec) -training >> step=5141600, episode=858 reward=0.7831562 (515.27 it/sec) -training >> step=5141700, episode=858 reward=0.7845153 (500.25 it/sec) -training >> step=5141800, episode=858 reward=0.7819617 (479.24 it/sec) -training >> step=5141900, episode=858 reward=0.798013 (484.77 it/sec) -training >> step=5142000, episode=858 reward=0.7663863 (494.09 it/sec) -training >> step=5142100, episode=858 reward=0.8033597 (513.65 it/sec) -training >> step=5142200, episode=858 reward=0.7885207 (499.39 it/sec) -training >> step=5142300, episode=858 reward=0.8003705 (472.20 it/sec) -training >> step=5142400, episode=858 reward=0.7946482 (508.73 it/sec) -training >> step=5142500, episode=858 reward=0.8097156 (490.70 it/sec) -training >> step=5142600, episode=858 reward=0.7588228 (523.70 it/sec) -training >> step=5142700, episode=858 reward=0.7788433 (446.56 it/sec) -training >> step=5142800, episode=858 reward=0.7824728 (481.27 it/sec) -training >> step=5142900, episode=858 reward=0.7773871 (566.35 it/sec) -training >> step=5143000, episode=858 reward=0.7884934 (513.00 it/sec) -training >> step=5143100, episode=858 reward=0.749339 (515.79 it/sec) -training >> step=5143200, episode=858 reward=0.7671732 (506.18 it/sec) -training >> step=5143300, episode=858 reward=0.7875907 (474.04 it/sec) -training >> step=5143400, episode=858 reward=0.7700785 (527.22 it/sec) -training >> step=5143500, episode=858 reward=0.7679085 (486.91 it/sec) -training >> step=5143600, episode=858 reward=0.7825059 (506.72 it/sec) -training >> step=5143700, episode=858 reward=0.7804591 (545.20 it/sec) -training >> step=5143800, episode=858 reward=0.7991163 (466.57 it/sec) -training >> step=5143900, episode=858 reward=0.767945 (532.39 it/sec) -training >> step=5144000, episode=858 reward=0.7898581 (436.97 it/sec) -training >> step=5144100, episode=858 reward=0.7861828 (527.52 it/sec) -training >> step=5144200, episode=858 reward=0.7662579 (503.98 it/sec) -training >> step=5144300, episode=858 reward=0.7808457 (484.39 it/sec) -training >> step=5144400, episode=858 reward=0.7640164 (516.20 it/sec) -training >> step=5144500, episode=858 reward=0.7858323 (472.71 it/sec) -training >> step=5144600, episode=858 reward=0.7772837 (447.10 it/sec) -training >> step=5144700, episode=858 reward=0.7813942 (568.80 it/sec) -training >> step=5144800, episode=858 reward=0.7882537 (456.57 it/sec) -training >> step=5144900, episode=858 reward=0.7798808 (491.94 it/sec) -training >> step=5145000, episode=858 reward=0.7715207 (478.01 it/sec) -training >> step=5145100, episode=858 reward=0.7693681 (502.70 it/sec) -training >> step=5145200, episode=858 reward=0.7747341 (484.49 it/sec) -training >> step=5145300, episode=858 reward=0.78303 (411.93 it/sec) -training >> step=5145400, episode=858 reward=0.8039119 (506.58 it/sec) -training >> step=5145500, episode=858 reward=0.7726952 (525.32 it/sec) -training >> step=5145600, episode=858 reward=0.7692362 (488.56 it/sec) -training >> step=5145700, episode=858 reward=0.7720848 (472.54 it/sec) -training >> step=5145800, episode=858 reward=0.7901224 (441.31 it/sec) -training >> step=5145900, episode=858 reward=0.7696632 (442.17 it/sec) -training >> step=5146000, episode=858 reward=0.7848794 (498.22 it/sec) -training >> step=5146100, episode=858 reward=0.7887803 (506.45 it/sec) -training >> step=5146200, episode=858 reward=0.7534881 (516.39 it/sec) -training >> step=5146300, episode=858 reward=0.7690145 (436.22 it/sec) -training >> step=5146400, episode=858 reward=0.765938 (487.59 it/sec) -training >> step=5146500, episode=858 reward=0.7637198 (460.19 it/sec) -training >> step=5146600, episode=858 reward=0.7652348 (517.89 it/sec) -training >> step=5146700, episode=858 reward=0.773403 (472.18 it/sec) -training >> step=5146800, episode=858 reward=0.7668672 (410.15 it/sec) -training >> step=5146900, episode=858 reward=0.7800952 (453.11 it/sec) -training >> step=5147000, episode=858 reward=0.7690899 (461.54 it/sec) -training >> step=5147100, episode=858 reward=0.8051786 (500.87 it/sec) -training >> step=5147200, episode=858 reward=0.7485816 (459.31 it/sec) -training >> step=5147300, episode=859 reward=0.7921991 (88.83 it/sec) -training >> step=5147400, episode=859 reward=0.7742204 (452.74 it/sec) -training >> step=5147500, episode=859 reward=0.7854968 (486.28 it/sec) -training >> step=5147600, episode=859 reward=0.7872784 (482.24 it/sec) -training >> step=5147700, episode=859 reward=0.7855179 (482.45 it/sec) -training >> step=5147800, episode=859 reward=0.7536445 (467.76 it/sec) -training >> step=5147900, episode=859 reward=0.7646864 (525.17 it/sec) -training >> step=5148000, episode=859 reward=0.7785927 (477.11 it/sec) -training >> step=5148100, episode=859 reward=0.7883069 (485.94 it/sec) -training >> step=5148200, episode=859 reward=0.7789606 (526.56 it/sec) -training >> step=5148300, episode=859 reward=0.7989045 (463.81 it/sec) -training >> step=5148400, episode=859 reward=0.7735542 (482.61 it/sec) -training >> step=5148500, episode=859 reward=0.7866449 (481.01 it/sec) -training >> step=5148600, episode=859 reward=0.7884557 (525.77 it/sec) -training >> step=5148700, episode=859 reward=0.7855464 (490.75 it/sec) -training >> step=5148800, episode=859 reward=0.7613184 (453.85 it/sec) -training >> step=5148900, episode=859 reward=0.7874109 (524.99 it/sec) -training >> step=5149000, episode=859 reward=0.7810603 (502.67 it/sec) -training >> step=5149100, episode=859 reward=0.7624464 (494.09 it/sec) -training >> step=5149200, episode=859 reward=0.7768199 (475.77 it/sec) -training >> step=5149300, episode=859 reward=0.7920941 (473.09 it/sec) -training >> step=5149400, episode=859 reward=0.7706437 (475.18 it/sec) -training >> step=5149500, episode=859 reward=0.7674473 (500.74 it/sec) -training >> step=5149600, episode=859 reward=0.7683914 (504.67 it/sec) -training >> step=5149700, episode=859 reward=0.806908 (509.72 it/sec) -training >> step=5149800, episode=859 reward=0.7774931 (480.61 it/sec) -training >> step=5149900, episode=859 reward=0.778353 (511.56 it/sec) -training >> step=5150000, episode=859 reward=0.775079 (519.00 it/sec) -training >> step=5150100, episode=859 reward=0.7753038 (515.37 it/sec) -training >> step=5150200, episode=859 reward=0.7748981 (537.69 it/sec) -training >> step=5150300, episode=859 reward=0.7873048 (503.98 it/sec) -training >> step=5150400, episode=859 reward=0.7892085 (527.70 it/sec) -training >> step=5150500, episode=859 reward=0.7916002 (515.00 it/sec) -training >> step=5150600, episode=859 reward=0.7667274 (512.45 it/sec) -training >> step=5150700, episode=859 reward=0.7526935 (516.67 it/sec) -training >> step=5150800, episode=859 reward=0.7951962 (524.65 it/sec) -training >> step=5150900, episode=859 reward=0.7869037 (471.16 it/sec) -training >> step=5151000, episode=859 reward=0.7752943 (537.39 it/sec) -training >> step=5151100, episode=859 reward=0.7863415 (530.24 it/sec) -training >> step=5151200, episode=859 reward=0.7767921 (499.51 it/sec) -training >> step=5151300, episode=859 reward=0.7848346 (492.40 it/sec) -training >> step=5151400, episode=859 reward=0.7809129 (451.93 it/sec) -training >> step=5151500, episode=859 reward=0.791186 (552.88 it/sec) -training >> step=5151600, episode=859 reward=0.8058096 (514.52 it/sec) -training >> step=5151700, episode=859 reward=0.7618862 (518.02 it/sec) -training >> step=5151800, episode=859 reward=0.7748577 (535.66 it/sec) -training >> step=5151900, episode=859 reward=0.785604 (494.54 it/sec) -training >> step=5152000, episode=859 reward=0.7889302 (474.69 it/sec) -training >> step=5152100, episode=859 reward=0.7634914 (497.22 it/sec) -training >> step=5152200, episode=859 reward=0.7938586 (551.88 it/sec) -training >> step=5152300, episode=859 reward=0.7861266 (497.01 it/sec) -training >> step=5152400, episode=859 reward=0.7805977 (483.22 it/sec) -training >> step=5152500, episode=859 reward=0.7720256 (502.89 it/sec) -training >> step=5152600, episode=859 reward=0.7648887 (547.99 it/sec) -training >> step=5152700, episode=859 reward=0.773725 (501.52 it/sec) -training >> step=5152800, episode=859 reward=0.7719637 (520.45 it/sec) -training >> step=5152900, episode=859 reward=0.7657722 (527.80 it/sec) -training >> step=5153000, episode=859 reward=0.7773351 (483.23 it/sec) -training >> step=5153100, episode=859 reward=0.7800621 (501.28 it/sec) -training >> step=5153200, episode=859 reward=0.7781965 (469.75 it/sec) -training >> step=5153300, episode=860 reward=0.7821177 (109.28 it/sec) -training >> step=5153400, episode=860 reward=0.7720376 (351.24 it/sec) -training >> step=5153500, episode=860 reward=0.7845071 (450.39 it/sec) -training >> step=5153600, episode=860 reward=0.7554571 (454.08 it/sec) -training >> step=5153700, episode=860 reward=0.7766409 (469.08 it/sec) -training >> step=5153800, episode=860 reward=0.7881751 (458.91 it/sec) -training >> step=5153900, episode=860 reward=0.7849173 (428.83 it/sec) -training >> step=5154000, episode=860 reward=0.7829378 (473.60 it/sec) -training >> step=5154100, episode=860 reward=0.7912761 (469.38 it/sec) -training >> step=5154200, episode=860 reward=0.7791347 (431.03 it/sec) -training >> step=5154300, episode=860 reward=0.786734 (493.53 it/sec) -training >> step=5154400, episode=860 reward=0.7502619 (453.58 it/sec) -training >> step=5154500, episode=860 reward=0.7974702 (442.55 it/sec) -training >> step=5154600, episode=860 reward=0.7559382 (464.75 it/sec) -training >> step=5154700, episode=860 reward=0.7824069 (490.88 it/sec) -training >> step=5154800, episode=860 reward=0.7775015 (409.68 it/sec) -training >> step=5154900, episode=860 reward=0.7769369 (393.58 it/sec) -training >> step=5155000, episode=860 reward=0.7715564 (427.38 it/sec) -training >> step=5155100, episode=860 reward=0.7586999 (491.72 it/sec) -training >> step=5155200, episode=860 reward=0.7796648 (466.37 it/sec) -training >> step=5155300, episode=860 reward=0.7972461 (461.11 it/sec) -training >> step=5155400, episode=860 reward=0.7920405 (480.08 it/sec) -training >> step=5155500, episode=860 reward=0.7835538 (410.81 it/sec) -training >> step=5155600, episode=860 reward=0.78419 (422.63 it/sec) -training >> step=5155700, episode=860 reward=0.8042622 (460.27 it/sec) -training >> step=5155800, episode=860 reward=0.7895962 (420.30 it/sec) -training >> step=5155900, episode=860 reward=0.773392 (451.69 it/sec) -training >> step=5156000, episode=860 reward=0.7994079 (423.32 it/sec) -training >> step=5156100, episode=860 reward=0.7738109 (473.66 it/sec) -training >> step=5156200, episode=860 reward=0.7862555 (482.15 it/sec) -training >> step=5156300, episode=860 reward=0.7815289 (478.75 it/sec) -training >> step=5156400, episode=860 reward=0.7804454 (445.05 it/sec) -training >> step=5156500, episode=860 reward=0.781732 (480.45 it/sec) -training >> step=5156600, episode=860 reward=0.7837639 (434.86 it/sec) -training >> step=5156700, episode=860 reward=0.7922237 (455.34 it/sec) -training >> step=5156800, episode=860 reward=0.7676998 (484.25 it/sec) -training >> step=5156900, episode=860 reward=0.7859047 (436.13 it/sec) -training >> step=5157000, episode=860 reward=0.7600514 (464.38 it/sec) -training >> step=5157100, episode=860 reward=0.7762095 (450.09 it/sec) -training >> step=5157200, episode=860 reward=0.7738214 (480.41 it/sec) -training >> step=5157300, episode=860 reward=0.7810069 (472.29 it/sec) -training >> step=5157400, episode=860 reward=0.7675946 (456.14 it/sec) -training >> step=5157500, episode=860 reward=0.7793683 (419.80 it/sec) -training >> step=5157600, episode=860 reward=0.7706298 (470.79 it/sec) -training >> step=5157700, episode=860 reward=0.7841147 (472.57 it/sec) -training >> step=5157800, episode=860 reward=0.7697201 (424.13 it/sec) -training >> step=5157900, episode=860 reward=0.7849859 (492.68 it/sec) -training >> step=5158000, episode=860 reward=0.7769132 (471.35 it/sec) -training >> step=5158100, episode=860 reward=0.7832331 (444.31 it/sec) -training >> step=5158200, episode=860 reward=0.7771881 (443.07 it/sec) -training >> step=5158300, episode=860 reward=0.7817925 (486.92 it/sec) -training >> step=5158400, episode=860 reward=0.7735093 (436.88 it/sec) -training >> step=5158500, episode=860 reward=0.7762275 (480.17 it/sec) -training >> step=5158600, episode=860 reward=0.7880172 (472.09 it/sec) -training >> step=5158700, episode=860 reward=0.759439 (486.26 it/sec) -training >> step=5158800, episode=860 reward=0.7825392 (458.66 it/sec) -training >> step=5158900, episode=860 reward=0.7679518 (450.80 it/sec) -training >> step=5159000, episode=860 reward=0.7644867 (495.22 it/sec) -training >> step=5159100, episode=860 reward=0.7788516 (450.94 it/sec) -training >> step=5159200, episode=860 reward=0.7907357 (450.28 it/sec) -training >> step=5159300, episode=861 reward=0.7767575 (88.49 it/sec) -training >> step=5159400, episode=861 reward=0.7948809 (466.59 it/sec) -training >> step=5159500, episode=861 reward=0.7787079 (444.01 it/sec) -training >> step=5159600, episode=861 reward=0.7828657 (477.78 it/sec) -training >> step=5159700, episode=861 reward=0.7950811 (494.03 it/sec) -training >> step=5159800, episode=861 reward=0.7962469 (468.85 it/sec) -training >> step=5159900, episode=861 reward=0.7657763 (451.88 it/sec) -training >> step=5160000, episode=861 reward=0.7929685 (475.45 it/sec) -training >> step=5160100, episode=861 reward=0.7711299 (450.75 it/sec) -training >> step=5160200, episode=861 reward=0.7708308 (475.13 it/sec) -training >> step=5160300, episode=861 reward=0.7919866 (451.71 it/sec) -training >> step=5160400, episode=861 reward=0.7826685 (427.78 it/sec) -training >> step=5160500, episode=861 reward=0.7884378 (444.49 it/sec) -training >> step=5160600, episode=861 reward=0.791266 (451.25 it/sec) -training >> step=5160700, episode=861 reward=0.7894472 (489.02 it/sec) -training >> step=5160800, episode=861 reward=0.7678384 (498.75 it/sec) -training >> step=5160900, episode=861 reward=0.7852135 (472.00 it/sec) -training >> step=5161000, episode=861 reward=0.7831704 (441.16 it/sec) -training >> step=5161100, episode=861 reward=0.7671994 (477.04 it/sec) -training >> step=5161200, episode=861 reward=0.7947078 (462.92 it/sec) -training >> step=5161300, episode=861 reward=0.7714688 (454.22 it/sec) -training >> step=5161400, episode=861 reward=0.7841476 (495.21 it/sec) -training >> step=5161500, episode=861 reward=0.7744036 (499.15 it/sec) -training >> step=5161600, episode=861 reward=0.7741853 (433.95 it/sec) -training >> step=5161700, episode=861 reward=0.7876898 (431.28 it/sec) -training >> step=5161800, episode=861 reward=0.7860289 (442.67 it/sec) -training >> step=5161900, episode=861 reward=0.772617 (474.06 it/sec) -training >> step=5162000, episode=861 reward=0.7839624 (465.58 it/sec) -training >> step=5162100, episode=861 reward=0.7759203 (411.96 it/sec) -training >> step=5162200, episode=861 reward=0.7871698 (499.18 it/sec) -training >> step=5162300, episode=861 reward=0.7674809 (385.97 it/sec) -training >> step=5162400, episode=861 reward=0.7971691 (473.44 it/sec) -training >> step=5162500, episode=861 reward=0.7867759 (424.59 it/sec) -training >> step=5162600, episode=861 reward=0.7934991 (408.30 it/sec) -training >> step=5162700, episode=861 reward=0.778543 (473.89 it/sec) -training >> step=5162800, episode=861 reward=0.7767872 (467.49 it/sec) -training >> step=5162900, episode=861 reward=0.7588465 (491.19 it/sec) -training >> step=5163000, episode=861 reward=0.7574509 (471.66 it/sec) -training >> step=5163100, episode=861 reward=0.7768431 (440.80 it/sec) -training >> step=5163200, episode=861 reward=0.7722655 (483.32 it/sec) -training >> step=5163300, episode=861 reward=0.7757267 (444.62 it/sec) -training >> step=5163400, episode=861 reward=0.7585468 (440.92 it/sec) -training >> step=5163500, episode=861 reward=0.769935 (461.68 it/sec) -training >> step=5163600, episode=861 reward=0.7712896 (428.67 it/sec) -training >> step=5163700, episode=861 reward=0.7942813 (449.80 it/sec) -training >> step=5163800, episode=861 reward=0.7610561 (457.60 it/sec) -training >> step=5163900, episode=861 reward=0.753208 (440.89 it/sec) -training >> step=5164000, episode=861 reward=0.7931637 (468.05 it/sec) -training >> step=5164100, episode=861 reward=0.7917467 (505.66 it/sec) -training >> step=5164200, episode=861 reward=0.7808309 (456.81 it/sec) -training >> step=5164300, episode=861 reward=0.7712603 (452.76 it/sec) -training >> step=5164400, episode=861 reward=0.7468889 (435.58 it/sec) -training >> step=5164500, episode=861 reward=0.7874913 (425.40 it/sec) -training >> step=5164600, episode=861 reward=0.7394229 (462.58 it/sec) -training >> step=5164700, episode=861 reward=0.7861949 (499.82 it/sec) -training >> step=5164800, episode=861 reward=0.7658275 (463.48 it/sec) -training >> step=5164900, episode=861 reward=0.7813614 (455.48 it/sec) -training >> step=5165000, episode=861 reward=0.7693079 (463.54 it/sec) -training >> step=5165100, episode=861 reward=0.7829295 (456.82 it/sec) -training >> step=5165200, episode=861 reward=0.7748235 (478.07 it/sec) -training >> step=5165300, episode=862 reward=0.7917752 (68.93 it/sec) -training >> step=5165400, episode=862 reward=0.7588915 (454.07 it/sec) -training >> step=5165500, episode=862 reward=0.8021614 (411.40 it/sec) -training >> step=5165600, episode=862 reward=0.7810818 (452.20 it/sec) -training >> step=5165700, episode=862 reward=0.7626449 (470.33 it/sec) -training >> step=5165800, episode=862 reward=0.7704591 (466.84 it/sec) -training >> step=5165900, episode=862 reward=0.780805 (447.68 it/sec) -training >> step=5166000, episode=862 reward=0.801317 (431.72 it/sec) -training >> step=5166100, episode=862 reward=0.7935389 (444.95 it/sec) -training >> step=5166200, episode=862 reward=0.7884638 (475.17 it/sec) -training >> step=5166300, episode=862 reward=0.7949857 (408.78 it/sec) -training >> step=5166400, episode=862 reward=0.7904199 (463.01 it/sec) -training >> step=5166500, episode=862 reward=0.7695041 (491.69 it/sec) -training >> step=5166600, episode=862 reward=0.7918321 (503.12 it/sec) -training >> step=5166700, episode=862 reward=0.7886661 (448.54 it/sec) -training >> step=5166800, episode=862 reward=0.7623286 (489.42 it/sec) -training >> step=5166900, episode=862 reward=0.7597435 (502.59 it/sec) -training >> step=5167000, episode=862 reward=0.8013164 (488.62 it/sec) -training >> step=5167100, episode=862 reward=0.801445 (476.49 it/sec) -training >> step=5167200, episode=862 reward=0.7759225 (491.97 it/sec) -training >> step=5167300, episode=862 reward=0.7545318 (455.73 it/sec) -training >> step=5167400, episode=862 reward=0.7730177 (435.02 it/sec) -training >> step=5167500, episode=862 reward=0.7784364 (453.08 it/sec) -training >> step=5167600, episode=862 reward=0.7766431 (471.52 it/sec) -training >> step=5167700, episode=862 reward=0.7731816 (458.54 it/sec) -training >> step=5167800, episode=862 reward=0.792897 (475.22 it/sec) -training >> step=5167900, episode=862 reward=0.7785763 (440.55 it/sec) -training >> step=5168000, episode=862 reward=0.7631852 (491.95 it/sec) -training >> step=5168100, episode=862 reward=0.8014531 (422.55 it/sec) -training >> step=5168200, episode=862 reward=0.7946439 (405.91 it/sec) -training >> step=5168300, episode=862 reward=0.7811626 (440.50 it/sec) -training >> step=5168400, episode=862 reward=0.7677069 (490.43 it/sec) -training >> step=5168500, episode=862 reward=0.7859122 (450.59 it/sec) -training >> step=5168600, episode=862 reward=0.771333 (416.55 it/sec) -training >> step=5168700, episode=862 reward=0.7825351 (434.27 it/sec) -training >> step=5168800, episode=862 reward=0.7903218 (467.94 it/sec) -training >> step=5168900, episode=862 reward=0.7733229 (432.16 it/sec) -training >> step=5169000, episode=862 reward=0.7836732 (446.67 it/sec) -training >> step=5169100, episode=862 reward=0.7919506 (484.18 it/sec) -training >> step=5169200, episode=862 reward=0.7846018 (435.81 it/sec) -training >> step=5169300, episode=862 reward=0.7653824 (451.48 it/sec) -training >> step=5169400, episode=862 reward=0.8015717 (441.72 it/sec) -training >> step=5169500, episode=862 reward=0.7930366 (458.48 it/sec) -training >> step=5169600, episode=862 reward=0.7645711 (431.91 it/sec) -training >> step=5169700, episode=862 reward=0.7641188 (457.12 it/sec) -training >> step=5169800, episode=862 reward=0.7800215 (484.99 it/sec) -training >> step=5169900, episode=862 reward=0.7646086 (462.13 it/sec) -training >> step=5170000, episode=862 reward=0.7874009 (461.03 it/sec) -training >> step=5170100, episode=862 reward=0.756807 (455.27 it/sec) -training >> step=5170200, episode=862 reward=0.7745303 (481.99 it/sec) -training >> step=5170300, episode=862 reward=0.7770656 (432.43 it/sec) -training >> step=5170400, episode=862 reward=0.7724549 (454.93 it/sec) -training >> step=5170500, episode=862 reward=0.7829161 (472.04 it/sec) -training >> step=5170600, episode=862 reward=0.7667692 (448.54 it/sec) -training >> step=5170700, episode=862 reward=0.7920165 (462.26 it/sec) -training >> step=5170800, episode=862 reward=0.766547 (463.37 it/sec) -training >> step=5170900, episode=862 reward=0.7776738 (462.97 it/sec) -training >> step=5171000, episode=862 reward=0.7688791 (461.42 it/sec) -training >> step=5171100, episode=862 reward=0.7724103 (481.29 it/sec) -training >> step=5171200, episode=862 reward=0.7506139 (406.20 it/sec) -training >> step=5171300, episode=863 reward=0.7691973 (86.41 it/sec) -training >> step=5171400, episode=863 reward=0.783343 (390.77 it/sec) -training >> step=5171500, episode=863 reward=0.7447692 (419.89 it/sec) -training >> step=5171600, episode=863 reward=0.7920549 (453.73 it/sec) -training >> step=5171700, episode=863 reward=0.7685511 (457.63 it/sec) -training >> step=5171800, episode=863 reward=0.7613609 (453.00 it/sec) -training >> step=5171900, episode=863 reward=0.7662929 (447.90 it/sec) -training >> step=5172000, episode=863 reward=0.7832941 (454.13 it/sec) -training >> step=5172100, episode=863 reward=0.7834413 (454.02 it/sec) -training >> step=5172200, episode=863 reward=0.8059833 (452.00 it/sec) -training >> step=5172300, episode=863 reward=0.7845463 (428.00 it/sec) -training >> step=5172400, episode=863 reward=0.8171635 (443.28 it/sec) -training >> step=5172500, episode=863 reward=0.799633 (422.72 it/sec) -training >> step=5172600, episode=863 reward=0.7864477 (405.47 it/sec) -training >> step=5172700, episode=863 reward=0.7619014 (405.08 it/sec) -training >> step=5172800, episode=863 reward=0.785454 (460.30 it/sec) -training >> step=5172900, episode=863 reward=0.7793669 (458.19 it/sec) -training >> step=5173000, episode=863 reward=0.7967793 (449.12 it/sec) -training >> step=5173100, episode=863 reward=0.7713919 (477.09 it/sec) -training >> step=5173200, episode=863 reward=0.8022222 (432.75 it/sec) -training >> step=5173300, episode=863 reward=0.7870208 (422.53 it/sec) -training >> step=5173400, episode=863 reward=0.795812 (428.30 it/sec) -training >> step=5173500, episode=863 reward=0.7702571 (418.19 it/sec) -training >> step=5173600, episode=863 reward=0.7940388 (409.76 it/sec) -training >> step=5173700, episode=863 reward=0.7991337 (390.75 it/sec) -training >> step=5173800, episode=863 reward=0.7964832 (445.91 it/sec) -training >> step=5173900, episode=863 reward=0.7974558 (437.62 it/sec) -training >> step=5174000, episode=863 reward=0.784798 (445.31 it/sec) -training >> step=5174100, episode=863 reward=0.7765453 (449.45 it/sec) -training >> step=5174200, episode=863 reward=0.7742953 (462.36 it/sec) -training >> step=5174300, episode=863 reward=0.7847871 (444.09 it/sec) -training >> step=5174400, episode=863 reward=0.7985103 (457.36 it/sec) -training >> step=5174500, episode=863 reward=0.7737511 (432.22 it/sec) -training >> step=5174600, episode=863 reward=0.771842 (463.63 it/sec) -training >> step=5174700, episode=863 reward=0.7804017 (470.25 it/sec) -training >> step=5174800, episode=863 reward=0.7845565 (491.09 it/sec) -training >> step=5174900, episode=863 reward=0.7763198 (498.79 it/sec) -training >> step=5175000, episode=863 reward=0.7866828 (386.76 it/sec) -training >> step=5175100, episode=863 reward=0.7879273 (478.58 it/sec) -training >> step=5175200, episode=863 reward=0.7766237 (495.89 it/sec) -training >> step=5175300, episode=863 reward=0.7884162 (446.47 it/sec) -training >> step=5175400, episode=863 reward=0.7718995 (455.56 it/sec) -training >> step=5175500, episode=863 reward=0.7825944 (471.96 it/sec) -training >> step=5175600, episode=863 reward=0.7714606 (461.60 it/sec) -training >> step=5175700, episode=863 reward=0.7666742 (438.17 it/sec) -training >> step=5175800, episode=863 reward=0.7927087 (396.83 it/sec) -training >> step=5175900, episode=863 reward=0.7635386 (441.67 it/sec) -training >> step=5176000, episode=863 reward=0.7723343 (479.29 it/sec) -training >> step=5176100, episode=863 reward=0.7544382 (453.69 it/sec) -training >> step=5176200, episode=863 reward=0.7687761 (437.26 it/sec) -training >> step=5176300, episode=863 reward=0.7575321 (448.96 it/sec) -training >> step=5176400, episode=863 reward=0.7740831 (450.05 it/sec) -training >> step=5176500, episode=863 reward=0.7534938 (455.89 it/sec) -training >> step=5176600, episode=863 reward=0.7744246 (474.44 it/sec) -training >> step=5176700, episode=863 reward=0.7696881 (421.72 it/sec) -training >> step=5176800, episode=863 reward=0.7871857 (443.57 it/sec) -training >> step=5176900, episode=863 reward=0.7890626 (461.94 it/sec) -training >> step=5177000, episode=863 reward=0.7812098 (478.27 it/sec) -training >> step=5177100, episode=863 reward=0.7864465 (412.02 it/sec) -training >> step=5177200, episode=863 reward=0.791716 (458.12 it/sec) -training >> step=5177300, episode=864 reward=0.7566801 (74.11 it/sec) -training >> step=5177400, episode=864 reward=0.7598504 (436.31 it/sec) -training >> step=5177500, episode=864 reward=0.7952184 (499.67 it/sec) -training >> step=5177600, episode=864 reward=0.7708893 (508.35 it/sec) -training >> step=5177700, episode=864 reward=0.7785219 (468.58 it/sec) -training >> step=5177800, episode=864 reward=0.7940766 (499.59 it/sec) -training >> step=5177900, episode=864 reward=0.7871588 (524.12 it/sec) -training >> step=5178000, episode=864 reward=0.7971088 (490.16 it/sec) -training >> step=5178100, episode=864 reward=0.7960473 (534.44 it/sec) -training >> step=5178200, episode=864 reward=0.759257 (518.84 it/sec) -training >> step=5178300, episode=864 reward=0.780241 (494.62 it/sec) -training >> step=5178400, episode=864 reward=0.7839358 (508.67 it/sec) -training >> step=5178500, episode=864 reward=0.780645 (539.91 it/sec) -training >> step=5178600, episode=864 reward=0.761824 (501.11 it/sec) -training >> step=5178700, episode=864 reward=0.7837005 (498.92 it/sec) -training >> step=5178800, episode=864 reward=0.8055641 (500.35 it/sec) -training >> step=5178900, episode=864 reward=0.7935771 (510.55 it/sec) -training >> step=5179000, episode=864 reward=0.7775433 (449.53 it/sec) -training >> step=5179100, episode=864 reward=0.7698433 (503.08 it/sec) -training >> step=5179200, episode=864 reward=0.7614022 (539.21 it/sec) -training >> step=5179300, episode=864 reward=0.8011371 (503.58 it/sec) -training >> step=5179400, episode=864 reward=0.7896436 (501.58 it/sec) -training >> step=5179500, episode=864 reward=0.7726026 (522.38 it/sec) -training >> step=5179600, episode=864 reward=0.7757668 (525.66 it/sec) -training >> step=5179700, episode=864 reward=0.7866696 (491.94 it/sec) -training >> step=5179800, episode=864 reward=0.7717498 (496.46 it/sec) -training >> step=5179900, episode=864 reward=0.7610881 (502.88 it/sec) -training >> step=5180000, episode=864 reward=0.792609 (525.98 it/sec) -training >> step=5180100, episode=864 reward=0.7690313 (509.37 it/sec) -training >> step=5180200, episode=864 reward=0.7611579 (419.18 it/sec) -training >> step=5180300, episode=864 reward=0.7683002 (550.47 it/sec) -training >> step=5180400, episode=864 reward=0.7821496 (488.15 it/sec) -training >> step=5180500, episode=864 reward=0.7876949 (508.03 it/sec) -training >> step=5180600, episode=864 reward=0.7731941 (501.56 it/sec) -training >> step=5180700, episode=864 reward=0.7690502 (548.88 it/sec) -training >> step=5180800, episode=864 reward=0.7822898 (488.61 it/sec) -training >> step=5180900, episode=864 reward=0.7915656 (516.02 it/sec) -training >> step=5181000, episode=864 reward=0.7925068 (485.59 it/sec) -training >> step=5181100, episode=864 reward=0.7704921 (529.63 it/sec) -training >> step=5181200, episode=864 reward=0.7880527 (500.15 it/sec) -training >> step=5181300, episode=864 reward=0.7436242 (494.84 it/sec) -training >> step=5181400, episode=864 reward=0.7729739 (524.67 it/sec) -training >> step=5181500, episode=864 reward=0.782388 (531.59 it/sec) -training >> step=5181600, episode=864 reward=0.7677373 (487.82 it/sec) -training >> step=5181700, episode=864 reward=0.7867907 (476.83 it/sec) -training >> step=5181800, episode=864 reward=0.7696303 (541.06 it/sec) -training >> step=5181900, episode=864 reward=0.7555836 (468.81 it/sec) -training >> step=5182000, episode=864 reward=0.7732303 (486.47 it/sec) -training >> step=5182100, episode=864 reward=0.8022037 (514.30 it/sec) -training >> step=5182200, episode=864 reward=0.7665894 (499.39 it/sec) -training >> step=5182300, episode=864 reward=0.7539916 (496.58 it/sec) -training >> step=5182400, episode=864 reward=0.7580698 (474.33 it/sec) -training >> step=5182500, episode=864 reward=0.7738022 (497.87 it/sec) -training >> step=5182600, episode=864 reward=0.7921272 (512.17 it/sec) -training >> step=5182700, episode=864 reward=0.7846702 (473.84 it/sec) -training >> step=5182800, episode=864 reward=0.7972512 (510.97 it/sec) -training >> step=5182900, episode=864 reward=0.7850178 (470.82 it/sec) -training >> step=5183000, episode=864 reward=0.76798 (454.69 it/sec) -training >> step=5183100, episode=864 reward=0.7728577 (474.32 it/sec) -training >> step=5183200, episode=864 reward=0.7675707 (428.33 it/sec) -training >> step=5183300, episode=865 reward=0.7673696 (87.37 it/sec) -training >> step=5183400, episode=865 reward=0.7697603 (458.56 it/sec) -training >> step=5183500, episode=865 reward=0.7796575 (488.60 it/sec) -training >> step=5183600, episode=865 reward=0.7933657 (479.32 it/sec) -training >> step=5183700, episode=865 reward=0.777624 (475.99 it/sec) -training >> step=5183800, episode=865 reward=0.7899336 (501.64 it/sec) -training >> step=5183900, episode=865 reward=0.7896445 (485.18 it/sec) -training >> step=5184000, episode=865 reward=0.7878227 (484.27 it/sec) -training >> step=5184100, episode=865 reward=0.7819799 (500.56 it/sec) -training >> step=5184200, episode=865 reward=0.764621 (473.99 it/sec) -training >> step=5184300, episode=865 reward=0.7806143 (444.61 it/sec) -training >> step=5184400, episode=865 reward=0.7574419 (471.36 it/sec) -training >> step=5184500, episode=865 reward=0.7994557 (460.88 it/sec) -training >> step=5184600, episode=865 reward=0.7820352 (478.92 it/sec) -training >> step=5184700, episode=865 reward=0.7848367 (460.71 it/sec) -training >> step=5184800, episode=865 reward=0.7818979 (438.92 it/sec) -training >> step=5184900, episode=865 reward=0.7665966 (430.69 it/sec) -training >> step=5185000, episode=865 reward=0.767752 (447.58 it/sec) -training >> step=5185100, episode=865 reward=0.770323 (458.76 it/sec) -training >> step=5185200, episode=865 reward=0.7739181 (465.69 it/sec) -training >> step=5185300, episode=865 reward=0.7596526 (437.47 it/sec) -training >> step=5185400, episode=865 reward=0.7803532 (449.48 it/sec) -training >> step=5185500, episode=865 reward=0.7809042 (503.69 it/sec) -training >> step=5185600, episode=865 reward=0.7951944 (482.74 it/sec) -training >> step=5185700, episode=865 reward=0.7831386 (478.63 it/sec) -training >> step=5185800, episode=865 reward=0.7588745 (439.97 it/sec) -training >> step=5185900, episode=865 reward=0.7751089 (455.70 it/sec) -training >> step=5186000, episode=865 reward=0.7870546 (444.02 it/sec) -training >> step=5186100, episode=865 reward=0.7780301 (463.98 it/sec) -training >> step=5186200, episode=865 reward=0.7997813 (507.93 it/sec) -training >> step=5186300, episode=865 reward=0.7821622 (498.41 it/sec) -training >> step=5186400, episode=865 reward=0.7854775 (484.55 it/sec) -training >> step=5186500, episode=865 reward=0.7774985 (475.29 it/sec) -training >> step=5186600, episode=865 reward=0.780148 (468.56 it/sec) -training >> step=5186700, episode=865 reward=0.7700844 (500.98 it/sec) -training >> step=5186800, episode=865 reward=0.8071465 (478.07 it/sec) -training >> step=5186900, episode=865 reward=0.7634853 (467.81 it/sec) -training >> step=5187000, episode=865 reward=0.7757472 (498.85 it/sec) -training >> step=5187100, episode=865 reward=0.7563542 (419.02 it/sec) -training >> step=5187200, episode=865 reward=0.7745702 (477.24 it/sec) -training >> step=5187300, episode=865 reward=0.773622 (466.30 it/sec) -training >> step=5187400, episode=865 reward=0.7736622 (397.18 it/sec) -training >> step=5187500, episode=865 reward=0.8040391 (450.51 it/sec) -training >> step=5187600, episode=865 reward=0.7659158 (433.80 it/sec) -training >> step=5187700, episode=865 reward=0.7715372 (473.74 it/sec) -training >> step=5187800, episode=865 reward=0.7808125 (447.39 it/sec) -training >> step=5187900, episode=865 reward=0.7586226 (485.62 it/sec) -training >> step=5188000, episode=865 reward=0.7897781 (402.84 it/sec) -training >> step=5188100, episode=865 reward=0.7559279 (501.43 it/sec) -training >> step=5188200, episode=865 reward=0.7790231 (443.74 it/sec) -training >> step=5188300, episode=865 reward=0.7846382 (442.99 it/sec) -training >> step=5188400, episode=865 reward=0.795523 (481.20 it/sec) -training >> step=5188500, episode=865 reward=0.7655442 (445.34 it/sec) -training >> step=5188600, episode=865 reward=0.7621297 (474.85 it/sec) -training >> step=5188700, episode=865 reward=0.7586359 (458.81 it/sec) -training >> step=5188800, episode=865 reward=0.7756301 (522.64 it/sec) -training >> step=5188900, episode=865 reward=0.774223 (492.26 it/sec) -training >> step=5189000, episode=865 reward=0.7978768 (502.03 it/sec) -training >> step=5189100, episode=865 reward=0.7627783 (509.86 it/sec) -training >> step=5189200, episode=865 reward=0.7679213 (481.47 it/sec) -training >> step=5189300, episode=866 reward=0.7963241 (85.26 it/sec) -training >> step=5189400, episode=866 reward=0.752169 (437.18 it/sec) -training >> step=5189500, episode=866 reward=0.776737 (463.32 it/sec) -training >> step=5189600, episode=866 reward=0.7531629 (465.23 it/sec) -training >> step=5189700, episode=866 reward=0.7818101 (449.32 it/sec) -training >> step=5189800, episode=866 reward=0.7741297 (463.28 it/sec) -training >> step=5189900, episode=866 reward=0.7905309 (436.18 it/sec) -training >> step=5190000, episode=866 reward=0.781229 (439.36 it/sec) -training >> step=5190100, episode=866 reward=0.7911435 (491.28 it/sec) -training >> step=5190200, episode=866 reward=0.7795281 (432.64 it/sec) -training >> step=5190300, episode=866 reward=0.791217 (458.08 it/sec) -training >> step=5190400, episode=866 reward=0.7580161 (442.89 it/sec) -training >> step=5190500, episode=866 reward=0.7577614 (483.71 it/sec) -training >> step=5190600, episode=866 reward=0.7866768 (443.05 it/sec) -training >> step=5190700, episode=866 reward=0.7833768 (473.79 it/sec) -training >> step=5190800, episode=866 reward=0.7998933 (497.64 it/sec) -training >> step=5190900, episode=866 reward=0.7928876 (471.05 it/sec) -training >> step=5191000, episode=866 reward=0.7826104 (460.76 it/sec) -training >> step=5191100, episode=866 reward=0.7906638 (461.22 it/sec) -training >> step=5191200, episode=866 reward=0.7778222 (495.93 it/sec) -training >> step=5191300, episode=866 reward=0.7683867 (461.90 it/sec) -training >> step=5191400, episode=866 reward=0.7765947 (481.24 it/sec) -training >> step=5191500, episode=866 reward=0.777106 (473.09 it/sec) -training >> step=5191600, episode=866 reward=0.767846 (475.57 it/sec) -training >> step=5191700, episode=866 reward=0.7808031 (471.30 it/sec) -training >> step=5191800, episode=866 reward=0.8004621 (467.09 it/sec) -training >> step=5191900, episode=866 reward=0.7879817 (498.77 it/sec) -training >> step=5192000, episode=866 reward=0.7935404 (468.32 it/sec) -training >> step=5192100, episode=866 reward=0.7704191 (450.23 it/sec) -training >> step=5192200, episode=866 reward=0.7683291 (473.89 it/sec) -training >> step=5192300, episode=866 reward=0.783005 (490.06 it/sec) -training >> step=5192400, episode=866 reward=0.7771918 (471.14 it/sec) -training >> step=5192500, episode=866 reward=0.7723678 (406.24 it/sec) -training >> step=5192600, episode=866 reward=0.7553648 (433.67 it/sec) -training >> step=5192700, episode=866 reward=0.7748601 (499.76 it/sec) -training >> step=5192800, episode=866 reward=0.7713114 (448.88 it/sec) -training >> step=5192900, episode=866 reward=0.7832814 (440.00 it/sec) -training >> step=5193000, episode=866 reward=0.7717856 (449.77 it/sec) -training >> step=5193100, episode=866 reward=0.787325 (493.63 it/sec) -training >> step=5193200, episode=866 reward=0.7808104 (492.50 it/sec) -training >> step=5193300, episode=866 reward=0.7912792 (435.01 it/sec) -training >> step=5193400, episode=866 reward=0.7909139 (501.66 it/sec) -training >> step=5193500, episode=866 reward=0.7783678 (463.97 it/sec) -training >> step=5193600, episode=866 reward=0.7885944 (468.73 it/sec) -training >> step=5193700, episode=866 reward=0.7854093 (503.03 it/sec) -training >> step=5193800, episode=866 reward=0.7891858 (483.18 it/sec) -training >> step=5193900, episode=866 reward=0.7683526 (473.18 it/sec) -training >> step=5194000, episode=866 reward=0.7851678 (453.38 it/sec) -training >> step=5194100, episode=866 reward=0.7846895 (498.88 it/sec) -training >> step=5194200, episode=866 reward=0.7843034 (485.47 it/sec) -training >> step=5194300, episode=866 reward=0.8059732 (484.60 it/sec) -training >> step=5194400, episode=866 reward=0.7821087 (467.32 it/sec) -training >> step=5194500, episode=866 reward=0.7850038 (473.24 it/sec) -training >> step=5194600, episode=866 reward=0.7928037 (485.36 it/sec) -training >> step=5194700, episode=866 reward=0.7564991 (518.51 it/sec) -training >> step=5194800, episode=866 reward=0.7644203 (457.59 it/sec) -training >> step=5194900, episode=866 reward=0.7881791 (481.57 it/sec) -training >> step=5195000, episode=866 reward=0.7769002 (454.92 it/sec) -training >> step=5195100, episode=866 reward=0.7803365 (471.24 it/sec) -training >> step=5195200, episode=866 reward=0.7836733 (501.17 it/sec) -training >> step=5195300, episode=867 reward=0.7627398 (66.91 it/sec) -training >> step=5195400, episode=867 reward=0.7857322 (469.38 it/sec) -training >> step=5195500, episode=867 reward=0.7823238 (471.02 it/sec) -training >> step=5195600, episode=867 reward=0.7686825 (447.59 it/sec) -training >> step=5195700, episode=867 reward=0.7789765 (469.27 it/sec) -training >> step=5195800, episode=867 reward=0.7750028 (495.56 it/sec) -training >> step=5195900, episode=867 reward=0.7698897 (486.36 it/sec) -training >> step=5196000, episode=867 reward=0.7692915 (469.13 it/sec) -training >> step=5196100, episode=867 reward=0.7601265 (489.67 it/sec) -training >> step=5196200, episode=867 reward=0.7702038 (501.88 it/sec) -training >> step=5196300, episode=867 reward=0.7893052 (499.28 it/sec) -training >> step=5196400, episode=867 reward=0.7675062 (471.50 it/sec) -training >> step=5196500, episode=867 reward=0.7672439 (480.63 it/sec) -training >> step=5196600, episode=867 reward=0.7853492 (467.57 it/sec) -training >> step=5196700, episode=867 reward=0.768624 (465.53 it/sec) -training >> step=5196800, episode=867 reward=0.7926239 (432.25 it/sec) -training >> step=5196900, episode=867 reward=0.7940541 (473.60 it/sec) -training >> step=5197000, episode=867 reward=0.7896931 (428.69 it/sec) -training >> step=5197100, episode=867 reward=0.790635 (445.48 it/sec) -training >> step=5197200, episode=867 reward=0.7777053 (462.31 it/sec) -training >> step=5197300, episode=867 reward=0.7896571 (483.28 it/sec) -training >> step=5197400, episode=867 reward=0.786191 (484.44 it/sec) -training >> step=5197500, episode=867 reward=0.7601287 (491.60 it/sec) -training >> step=5197600, episode=867 reward=0.7746641 (467.85 it/sec) -training >> step=5197700, episode=867 reward=0.7660218 (480.81 it/sec) -training >> step=5197800, episode=867 reward=0.7889988 (458.78 it/sec) -training >> step=5197900, episode=867 reward=0.7786517 (511.45 it/sec) -training >> step=5198000, episode=867 reward=0.7899054 (503.00 it/sec) -training >> step=5198100, episode=867 reward=0.7718472 (473.70 it/sec) -training >> step=5198200, episode=867 reward=0.7802037 (493.53 it/sec) -training >> step=5198300, episode=867 reward=0.7565927 (423.10 it/sec) -training >> step=5198400, episode=867 reward=0.7860177 (487.16 it/sec) -training >> step=5198500, episode=867 reward=0.7850285 (475.84 it/sec) -training >> step=5198600, episode=867 reward=0.7796289 (516.34 it/sec) -training >> step=5198700, episode=867 reward=0.7846712 (492.12 it/sec) -training >> step=5198800, episode=867 reward=0.7871796 (414.26 it/sec) -training >> step=5198900, episode=867 reward=0.7981269 (452.10 it/sec) -training >> step=5199000, episode=867 reward=0.7833172 (455.23 it/sec) -training >> step=5199100, episode=867 reward=0.7881109 (459.27 it/sec) -training >> step=5199200, episode=867 reward=0.7644873 (461.58 it/sec) -training >> step=5199300, episode=867 reward=0.7883283 (488.04 it/sec) -training >> step=5199400, episode=867 reward=0.7613258 (465.03 it/sec) -training >> step=5199500, episode=867 reward=0.789468 (453.97 it/sec) -training >> step=5199600, episode=867 reward=0.7751981 (459.29 it/sec) -training >> step=5199700, episode=867 reward=0.7944429 (509.04 it/sec) -training >> step=5199800, episode=867 reward=0.7661296 (475.62 it/sec) -training >> step=5199900, episode=867 reward=0.7563277 (473.61 it/sec) -training >> step=5200000, episode=867 reward=0.7647364 (502.77 it/sec) -training >> step=5200100, episode=867 reward=0.7667186 (494.21 it/sec) -training >> step=5200200, episode=867 reward=0.7657415 (505.38 it/sec) -training >> step=5200300, episode=867 reward=0.7489869 (508.81 it/sec) -training >> step=5200400, episode=867 reward=0.7817287 (546.12 it/sec) -training >> step=5200500, episode=867 reward=0.7915096 (499.04 it/sec) -training >> step=5200600, episode=867 reward=0.7815243 (517.44 it/sec) -training >> step=5200700, episode=867 reward=0.7950163 (512.60 it/sec) -training >> step=5200800, episode=867 reward=0.7704961 (543.55 it/sec) -training >> step=5200900, episode=867 reward=0.7714074 (549.29 it/sec) -training >> step=5201000, episode=867 reward=0.7829106 (515.19 it/sec) -training >> step=5201100, episode=867 reward=0.7698815 (533.36 it/sec) -training >> step=5201200, episode=867 reward=0.757787 (503.03 it/sec) -training >> step=5201300, episode=868 reward=0.7568568 (63.01 it/sec) -training >> step=5201400, episode=868 reward=0.7631165 (502.91 it/sec) -training >> step=5201500, episode=868 reward=0.7631721 (495.71 it/sec) -training >> step=5201600, episode=868 reward=0.7715166 (448.41 it/sec) -training >> step=5201700, episode=868 reward=0.7928001 (512.73 it/sec) -training >> step=5201800, episode=868 reward=0.7842375 (482.34 it/sec) -training >> step=5201900, episode=868 reward=0.7720204 (494.13 it/sec) -training >> step=5202000, episode=868 reward=0.7736528 (500.05 it/sec) -training >> step=5202100, episode=868 reward=0.7878351 (499.87 it/sec) -training >> step=5202200, episode=868 reward=0.7769818 (472.17 it/sec) -training >> step=5202300, episode=868 reward=0.8020061 (505.39 it/sec) -training >> step=5202400, episode=868 reward=0.771665 (504.56 it/sec) -training >> step=5202500, episode=868 reward=0.7725581 (472.24 it/sec) -training >> step=5202600, episode=868 reward=0.7800651 (502.25 it/sec) -training >> step=5202700, episode=868 reward=0.7848272 (497.58 it/sec) -training >> step=5202800, episode=868 reward=0.7988306 (478.96 it/sec) -training >> step=5202900, episode=868 reward=0.7631283 (472.19 it/sec) -training >> step=5203000, episode=868 reward=0.7889415 (460.14 it/sec) -training >> step=5203100, episode=868 reward=0.770543 (525.49 it/sec) -training >> step=5203200, episode=868 reward=0.750965 (480.68 it/sec) -training >> step=5203300, episode=868 reward=0.7872729 (477.30 it/sec) -training >> step=5203400, episode=868 reward=0.7766404 (491.43 it/sec) -training >> step=5203500, episode=868 reward=0.7740103 (445.85 it/sec) -training >> step=5203600, episode=868 reward=0.7863822 (486.27 it/sec) -training >> step=5203700, episode=868 reward=0.787124 (444.87 it/sec) -training >> step=5203800, episode=868 reward=0.7903571 (511.50 it/sec) -training >> step=5203900, episode=868 reward=0.7714316 (453.40 it/sec) -training >> step=5204000, episode=868 reward=0.7800869 (442.08 it/sec) -training >> step=5204100, episode=868 reward=0.7935573 (475.25 it/sec) -training >> step=5204200, episode=868 reward=0.7784632 (466.73 it/sec) -training >> step=5204300, episode=868 reward=0.7860539 (460.28 it/sec) -training >> step=5204400, episode=868 reward=0.7868782 (466.40 it/sec) -training >> step=5204500, episode=868 reward=0.7718033 (497.25 it/sec) -training >> step=5204600, episode=868 reward=0.771926 (479.40 it/sec) -training >> step=5204700, episode=868 reward=0.778896 (455.56 it/sec) -training >> step=5204800, episode=868 reward=0.7874297 (483.42 it/sec) -training >> step=5204900, episode=868 reward=0.7708066 (469.18 it/sec) -training >> step=5205000, episode=868 reward=0.7842293 (455.76 it/sec) -training >> step=5205100, episode=868 reward=0.8061416 (468.37 it/sec) -training >> step=5205200, episode=868 reward=0.7575803 (489.99 it/sec) -training >> step=5205300, episode=868 reward=0.7753487 (494.43 it/sec) -training >> step=5205400, episode=868 reward=0.7734867 (441.23 it/sec) -training >> step=5205500, episode=868 reward=0.7840523 (460.01 it/sec) -training >> step=5205600, episode=868 reward=0.7683468 (506.49 it/sec) -training >> step=5205700, episode=868 reward=0.7779698 (473.12 it/sec) -training >> step=5205800, episode=868 reward=0.7801884 (447.77 it/sec) -training >> step=5205900, episode=868 reward=0.7830593 (432.28 it/sec) -training >> step=5206000, episode=868 reward=0.7864176 (451.13 it/sec) -training >> step=5206100, episode=868 reward=0.7820983 (474.73 it/sec) -training >> step=5206200, episode=868 reward=0.7853835 (490.67 it/sec) -training >> step=5206300, episode=868 reward=0.780471 (514.07 it/sec) -training >> step=5206400, episode=868 reward=0.7786093 (402.69 it/sec) -training >> step=5206500, episode=868 reward=0.7709909 (486.68 it/sec) -training >> step=5206600, episode=868 reward=0.7809923 (489.91 it/sec) -training >> step=5206700, episode=868 reward=0.7775044 (467.54 it/sec) -training >> step=5206800, episode=868 reward=0.7835332 (485.74 it/sec) -training >> step=5206900, episode=868 reward=0.7939305 (433.10 it/sec) -training >> step=5207000, episode=868 reward=0.7632307 (501.51 it/sec) -training >> step=5207100, episode=868 reward=0.7792033 (496.81 it/sec) -training >> step=5207200, episode=868 reward=0.7487786 (449.14 it/sec) -training >> step=5207300, episode=869 reward=0.7526172 (158.57 it/sec) -training >> step=5207400, episode=869 reward=0.7792066 (481.69 it/sec) -training >> step=5207500, episode=869 reward=0.7646519 (458.74 it/sec) -training >> step=5207600, episode=869 reward=0.7771804 (429.44 it/sec) -training >> step=5207700, episode=869 reward=0.7802062 (468.09 it/sec) -training >> step=5207800, episode=869 reward=0.7817286 (453.40 it/sec) -training >> step=5207900, episode=869 reward=0.7761683 (405.07 it/sec) -training >> step=5208000, episode=869 reward=0.7770628 (379.29 it/sec) -training >> step=5208100, episode=869 reward=0.7736143 (411.48 it/sec) -training >> step=5208200, episode=869 reward=0.7921286 (311.19 it/sec) -training >> step=5208300, episode=869 reward=0.769214 (419.77 it/sec) -training >> step=5208400, episode=869 reward=0.760928 (408.52 it/sec) -training >> step=5208500, episode=869 reward=0.7841934 (376.82 it/sec) -training >> step=5208600, episode=869 reward=0.7847752 (431.74 it/sec) -training >> step=5208700, episode=869 reward=0.7929245 (458.73 it/sec) -training >> step=5208800, episode=869 reward=0.7771588 (429.77 it/sec) -training >> step=5208900, episode=869 reward=0.7733655 (463.76 it/sec) -training >> step=5209000, episode=869 reward=0.7783946 (434.16 it/sec) -training >> step=5209100, episode=869 reward=0.7725719 (404.68 it/sec) -training >> step=5209200, episode=869 reward=0.7859448 (459.56 it/sec) -training >> step=5209300, episode=869 reward=0.7965567 (462.59 it/sec) -training >> step=5209400, episode=869 reward=0.7854753 (433.91 it/sec) -training >> step=5209500, episode=869 reward=0.7778366 (424.75 it/sec) -training >> step=5209600, episode=869 reward=0.7941114 (438.15 it/sec) -training >> step=5209700, episode=869 reward=0.7868612 (441.91 it/sec) -training >> step=5209800, episode=869 reward=0.7831194 (390.25 it/sec) -training >> step=5209900, episode=869 reward=0.7680619 (428.13 it/sec) -training >> step=5210000, episode=869 reward=0.781533 (437.10 it/sec) -training >> step=5210100, episode=869 reward=0.7887511 (428.84 it/sec) -training >> step=5210200, episode=869 reward=0.7886025 (430.08 it/sec) -training >> step=5210300, episode=869 reward=0.7810768 (497.68 it/sec) -training >> step=5210400, episode=869 reward=0.7744345 (439.71 it/sec) -training >> step=5210500, episode=869 reward=0.7690103 (428.98 it/sec) -training >> step=5210600, episode=869 reward=0.7744971 (438.05 it/sec) -training >> step=5210700, episode=869 reward=0.7854504 (433.63 it/sec) -training >> step=5210800, episode=869 reward=0.7750421 (455.80 it/sec) -training >> step=5210900, episode=869 reward=0.7928286 (454.73 it/sec) -training >> step=5211000, episode=869 reward=0.7911518 (442.88 it/sec) -training >> step=5211100, episode=869 reward=0.7855121 (430.53 it/sec) -training >> step=5211200, episode=869 reward=0.7641207 (436.21 it/sec) -training >> step=5211300, episode=869 reward=0.7885699 (455.43 it/sec) -training >> step=5211400, episode=869 reward=0.7703182 (483.22 it/sec) -training >> step=5211500, episode=869 reward=0.7594598 (457.05 it/sec) -training >> step=5211600, episode=869 reward=0.7788482 (434.96 it/sec) -training >> step=5211700, episode=869 reward=0.7708704 (489.02 it/sec) -training >> step=5211800, episode=869 reward=0.7769283 (464.27 it/sec) -training >> step=5211900, episode=869 reward=0.7793097 (485.43 it/sec) -training >> step=5212000, episode=869 reward=0.7881684 (474.15 it/sec) -training >> step=5212100, episode=869 reward=0.7723829 (489.15 it/sec) -training >> step=5212200, episode=869 reward=0.7779373 (405.35 it/sec) -training >> step=5212300, episode=869 reward=0.764772 (445.86 it/sec) -training >> step=5212400, episode=869 reward=0.7760951 (477.48 it/sec) -training >> step=5212500, episode=869 reward=0.7848386 (485.99 it/sec) -training >> step=5212600, episode=869 reward=0.7851611 (430.80 it/sec) -training >> step=5212700, episode=869 reward=0.7703865 (456.44 it/sec) -training >> step=5212800, episode=869 reward=0.786858 (472.26 it/sec) -training >> step=5212900, episode=869 reward=0.7720847 (487.64 it/sec) -training >> step=5213000, episode=869 reward=0.7921513 (447.53 it/sec) -training >> step=5213100, episode=869 reward=0.7585943 (451.30 it/sec) -training >> step=5213200, episode=869 reward=0.7753401 (428.96 it/sec) -training >> step=5213300, episode=870 reward=0.772908 (66.23 it/sec) -training >> step=5213400, episode=870 reward=0.7892756 (436.71 it/sec) -training >> step=5213500, episode=870 reward=0.7755412 (481.33 it/sec) -training >> step=5213600, episode=870 reward=0.7884021 (441.39 it/sec) -training >> step=5213700, episode=870 reward=0.7713505 (449.09 it/sec) -training >> step=5213800, episode=870 reward=0.7828448 (447.69 it/sec) -training >> step=5213900, episode=870 reward=0.7911851 (455.14 it/sec) -training >> step=5214000, episode=870 reward=0.7930933 (462.85 it/sec) -training >> step=5214100, episode=870 reward=0.7880142 (492.92 it/sec) -training >> step=5214200, episode=870 reward=0.7890491 (461.43 it/sec) -training >> step=5214300, episode=870 reward=0.7986608 (467.52 it/sec) -training >> step=5214400, episode=870 reward=0.7645392 (496.91 it/sec) -training >> step=5214500, episode=870 reward=0.7893062 (438.40 it/sec) -training >> step=5214600, episode=870 reward=0.7795667 (465.82 it/sec) -training >> step=5214700, episode=870 reward=0.7845742 (491.54 it/sec) -training >> step=5214800, episode=870 reward=0.7959018 (454.63 it/sec) -training >> step=5214900, episode=870 reward=0.7805508 (467.41 it/sec) -training >> step=5215000, episode=870 reward=0.7827734 (480.25 it/sec) -training >> step=5215100, episode=870 reward=0.7765152 (423.33 it/sec) -training >> step=5215200, episode=870 reward=0.7657539 (456.17 it/sec) -training >> step=5215300, episode=870 reward=0.7867272 (445.43 it/sec) -training >> step=5215400, episode=870 reward=0.7840614 (507.80 it/sec) -training >> step=5215500, episode=870 reward=0.7954203 (475.47 it/sec) -training >> step=5215600, episode=870 reward=0.7889377 (466.92 it/sec) -training >> step=5215700, episode=870 reward=0.7689483 (476.34 it/sec) -training >> step=5215800, episode=870 reward=0.7837511 (469.19 it/sec) -training >> step=5215900, episode=870 reward=0.7642431 (464.58 it/sec) -training >> step=5216000, episode=870 reward=0.7994053 (463.57 it/sec) -training >> step=5216100, episode=870 reward=0.7811651 (471.14 it/sec) -training >> step=5216200, episode=870 reward=0.7742323 (430.77 it/sec) -training >> step=5216300, episode=870 reward=0.7897101 (382.19 it/sec) -training >> step=5216400, episode=870 reward=0.7669953 (454.84 it/sec) -training >> step=5216500, episode=870 reward=0.7727605 (470.92 it/sec) -training >> step=5216600, episode=870 reward=0.7842649 (501.68 it/sec) -training >> step=5216700, episode=870 reward=0.7856098 (453.03 it/sec) -training >> step=5216800, episode=870 reward=0.7973878 (485.32 it/sec) -training >> step=5216900, episode=870 reward=0.7659932 (447.13 it/sec) -training >> step=5217000, episode=870 reward=0.7822452 (427.09 it/sec) -training >> step=5217100, episode=870 reward=0.7897099 (420.34 it/sec) -training >> step=5217200, episode=870 reward=0.7777259 (511.49 it/sec) -training >> step=5217300, episode=870 reward=0.7936595 (481.44 it/sec) -training >> step=5217400, episode=870 reward=0.7618556 (446.37 it/sec) -training >> step=5217500, episode=870 reward=0.7499629 (486.81 it/sec) -training >> step=5217600, episode=870 reward=0.7954433 (445.59 it/sec) -training >> step=5217700, episode=870 reward=0.7730682 (448.62 it/sec) -training >> step=5217800, episode=870 reward=0.7847425 (446.68 it/sec) -training >> step=5217900, episode=870 reward=0.8012836 (490.96 it/sec) -training >> step=5218000, episode=870 reward=0.7754821 (441.88 it/sec) -training >> step=5218100, episode=870 reward=0.7755395 (443.45 it/sec) -training >> step=5218200, episode=870 reward=0.7825707 (436.46 it/sec) -training >> step=5218300, episode=870 reward=0.7863448 (438.07 it/sec) -training >> step=5218400, episode=870 reward=0.7937363 (441.89 it/sec) -training >> step=5218500, episode=870 reward=0.7777429 (464.47 it/sec) -training >> step=5218600, episode=870 reward=0.7913089 (445.39 it/sec) -training >> step=5218700, episode=870 reward=0.7913588 (459.47 it/sec) -training >> step=5218800, episode=870 reward=0.7845742 (464.70 it/sec) -training >> step=5218900, episode=870 reward=0.7712057 (471.79 it/sec) -training >> step=5219000, episode=870 reward=0.7652619 (446.80 it/sec) -training >> step=5219100, episode=870 reward=0.794952 (449.42 it/sec) -training >> step=5219200, episode=870 reward=0.8032157 (465.90 it/sec) -training >> step=5219300, episode=871 reward=0.7924234 (97.40 it/sec) -training >> step=5219400, episode=871 reward=0.7872692 (449.02 it/sec) -training >> step=5219500, episode=871 reward=0.78167 (463.48 it/sec) -training >> step=5219600, episode=871 reward=0.7592155 (442.45 it/sec) -training >> step=5219700, episode=871 reward=0.7994462 (475.88 it/sec) -training >> step=5219800, episode=871 reward=0.7986119 (430.45 it/sec) -training >> step=5219900, episode=871 reward=0.782786 (444.72 it/sec) -training >> step=5220000, episode=871 reward=0.7808679 (449.87 it/sec) -training >> step=5220100, episode=871 reward=0.7532648 (458.89 it/sec) -training >> step=5220200, episode=871 reward=0.7799558 (495.07 it/sec) -training >> step=5220300, episode=871 reward=0.7763985 (445.88 it/sec) -training >> step=5220400, episode=871 reward=0.7822321 (460.89 it/sec) -training >> step=5220500, episode=871 reward=0.7881072 (442.84 it/sec) -training >> step=5220600, episode=871 reward=0.7670969 (393.91 it/sec) -training >> step=5220700, episode=871 reward=0.7848523 (460.74 it/sec) -training >> step=5220800, episode=871 reward=0.7871428 (473.77 it/sec) -training >> step=5220900, episode=871 reward=0.7884916 (471.19 it/sec) -training >> step=5221000, episode=871 reward=0.7586147 (482.24 it/sec) -training >> step=5221100, episode=871 reward=0.7800473 (449.88 it/sec) -training >> step=5221200, episode=871 reward=0.776431 (436.01 it/sec) -training >> step=5221300, episode=871 reward=0.7867376 (487.22 it/sec) -training >> step=5221400, episode=871 reward=0.771948 (467.52 it/sec) -training >> step=5221500, episode=871 reward=0.7753968 (415.75 it/sec) -training >> step=5221600, episode=871 reward=0.7898151 (444.58 it/sec) -training >> step=5221700, episode=871 reward=0.7836336 (473.73 it/sec) -training >> step=5221800, episode=871 reward=0.7892523 (478.47 it/sec) -training >> step=5221900, episode=871 reward=0.7965488 (456.80 it/sec) -training >> step=5222000, episode=871 reward=0.7875158 (507.81 it/sec) -training >> step=5222100, episode=871 reward=0.8012457 (488.09 it/sec) -training >> step=5222200, episode=871 reward=0.7854266 (457.75 it/sec) -training >> step=5222300, episode=871 reward=0.7991488 (460.03 it/sec) -training >> step=5222400, episode=871 reward=0.789453 (486.37 it/sec) -training >> step=5222500, episode=871 reward=0.7842106 (464.73 it/sec) -training >> step=5222600, episode=871 reward=0.7830477 (467.05 it/sec) -training >> step=5222700, episode=871 reward=0.7824239 (485.38 it/sec) -training >> step=5222800, episode=871 reward=0.7864637 (479.25 it/sec) -training >> step=5222900, episode=871 reward=0.7702758 (458.98 it/sec) -training >> step=5223000, episode=871 reward=0.7786529 (516.17 it/sec) -training >> step=5223100, episode=871 reward=0.8113913 (447.69 it/sec) -training >> step=5223200, episode=871 reward=0.7711417 (485.72 it/sec) -training >> step=5223300, episode=871 reward=0.7953508 (488.36 it/sec) -training >> step=5223400, episode=871 reward=0.7789931 (427.37 it/sec) -training >> step=5223500, episode=871 reward=0.7766235 (514.19 it/sec) -training >> step=5223600, episode=871 reward=0.7726725 (473.14 it/sec) -training >> step=5223700, episode=871 reward=0.7746482 (416.24 it/sec) -training >> step=5223800, episode=871 reward=0.7768535 (461.40 it/sec) -training >> step=5223900, episode=871 reward=0.7925047 (486.20 it/sec) -training >> step=5224000, episode=871 reward=0.7847852 (479.38 it/sec) -training >> step=5224100, episode=871 reward=0.7799498 (477.99 it/sec) -training >> step=5224200, episode=871 reward=0.7923794 (539.65 it/sec) -training >> step=5224300, episode=871 reward=0.7748907 (545.70 it/sec) -training >> step=5224400, episode=871 reward=0.7617572 (465.49 it/sec) -training >> step=5224500, episode=871 reward=0.76456 (505.97 it/sec) -training >> step=5224600, episode=871 reward=0.7589282 (543.07 it/sec) -training >> step=5224700, episode=871 reward=0.760605 (490.74 it/sec) -training >> step=5224800, episode=871 reward=0.7919991 (520.77 it/sec) -training >> step=5224900, episode=871 reward=0.7811055 (513.91 it/sec) -training >> step=5225000, episode=871 reward=0.7751542 (503.41 it/sec) -training >> step=5225100, episode=871 reward=0.7924823 (501.25 it/sec) -training >> step=5225200, episode=871 reward=0.7774796 (468.40 it/sec) -training >> step=5225300, episode=872 reward=0.7834908 (89.57 it/sec) -training >> step=5225400, episode=872 reward=0.797245 (448.52 it/sec) -training >> step=5225500, episode=872 reward=0.7838253 (472.43 it/sec) -training >> step=5225600, episode=872 reward=0.7736019 (457.33 it/sec) -training >> step=5225700, episode=872 reward=0.7661217 (512.21 it/sec) -training >> step=5225800, episode=872 reward=0.785578 (466.53 it/sec) -training >> step=5225900, episode=872 reward=0.7653729 (382.25 it/sec) -training >> step=5226000, episode=872 reward=0.7500029 (434.41 it/sec) -training >> step=5226100, episode=872 reward=0.7663637 (472.17 it/sec) -training >> step=5226200, episode=872 reward=0.7844924 (476.02 it/sec) -training >> step=5226300, episode=872 reward=0.7772924 (449.81 it/sec) -training >> step=5226400, episode=872 reward=0.7786976 (501.57 it/sec) -training >> step=5226500, episode=872 reward=0.785244 (487.63 it/sec) -training >> step=5226600, episode=872 reward=0.7826541 (460.54 it/sec) -training >> step=5226700, episode=872 reward=0.790448 (514.47 it/sec) -training >> step=5226800, episode=872 reward=0.7866582 (470.84 it/sec) -training >> step=5226900, episode=872 reward=0.7829337 (424.29 it/sec) -training >> step=5227000, episode=872 reward=0.7902418 (452.78 it/sec) -training >> step=5227100, episode=872 reward=0.7758003 (453.24 it/sec) -training >> step=5227200, episode=872 reward=0.7901089 (404.01 it/sec) -training >> step=5227300, episode=872 reward=0.7837069 (498.78 it/sec) -training >> step=5227400, episode=872 reward=0.7867069 (470.00 it/sec) -training >> step=5227500, episode=872 reward=0.7631878 (409.44 it/sec) -training >> step=5227600, episode=872 reward=0.7838916 (475.90 it/sec) -training >> step=5227700, episode=872 reward=0.7714796 (487.42 it/sec) -training >> step=5227800, episode=872 reward=0.7795581 (460.80 it/sec) -training >> step=5227900, episode=872 reward=0.781706 (512.63 it/sec) -training >> step=5228000, episode=872 reward=0.794235 (452.53 it/sec) -training >> step=5228100, episode=872 reward=0.7847103 (528.76 it/sec) -training >> step=5228200, episode=872 reward=0.7807679 (511.67 it/sec) -training >> step=5228300, episode=872 reward=0.7839951 (495.06 it/sec) -training >> step=5228400, episode=872 reward=0.7725317 (481.62 it/sec) -training >> step=5228500, episode=872 reward=0.7794309 (496.34 it/sec) -training >> step=5228600, episode=872 reward=0.7873515 (476.04 it/sec) -training >> step=5228700, episode=872 reward=0.787329 (501.85 it/sec) -training >> step=5228800, episode=872 reward=0.7724639 (560.31 it/sec) -training >> step=5228900, episode=872 reward=0.8053918 (473.91 it/sec) -training >> step=5229000, episode=872 reward=0.7914051 (506.90 it/sec) -training >> step=5229100, episode=872 reward=0.7927716 (551.13 it/sec) -training >> step=5229200, episode=872 reward=0.7885618 (527.26 it/sec) -training >> step=5229300, episode=872 reward=0.786714 (494.17 it/sec) -training >> step=5229400, episode=872 reward=0.7746162 (473.90 it/sec) -training >> step=5229500, episode=872 reward=0.7824343 (561.98 it/sec) -training >> step=5229600, episode=872 reward=0.7809846 (478.26 it/sec) -training >> step=5229700, episode=872 reward=0.7867937 (497.33 it/sec) -training >> step=5229800, episode=872 reward=0.7887506 (506.99 it/sec) -training >> step=5229900, episode=872 reward=0.7773855 (524.43 it/sec) -training >> step=5230000, episode=872 reward=0.7819334 (524.56 it/sec) -training >> step=5230100, episode=872 reward=0.7578341 (487.58 it/sec) -training >> step=5230200, episode=872 reward=0.7752761 (566.74 it/sec) -training >> step=5230300, episode=872 reward=0.7762439 (495.38 it/sec) -training >> step=5230400, episode=872 reward=0.7748331 (483.36 it/sec) -training >> step=5230500, episode=872 reward=0.7908288 (509.65 it/sec) -training >> step=5230600, episode=872 reward=0.7608567 (512.75 it/sec) -training >> step=5230700, episode=872 reward=0.7740542 (519.85 it/sec) -training >> step=5230800, episode=872 reward=0.7910069 (483.86 it/sec) -training >> step=5230900, episode=872 reward=0.754756 (509.69 it/sec) -training >> step=5231000, episode=872 reward=0.766016 (519.30 it/sec) -training >> step=5231100, episode=872 reward=0.7800961 (520.42 it/sec) -training >> step=5231200, episode=872 reward=0.7952864 (523.19 it/sec) -training >> step=5231300, episode=873 reward=0.7735319 (57.32 it/sec) -training >> step=5231400, episode=873 reward=0.7707616 (365.00 it/sec) -training >> step=5231500, episode=873 reward=0.7739708 (520.90 it/sec) -training >> step=5231600, episode=873 reward=0.7904444 (550.20 it/sec) -training >> step=5231700, episode=873 reward=0.7928506 (517.48 it/sec) -training >> step=5231800, episode=873 reward=0.7791873 (520.97 it/sec) -training >> step=5231900, episode=873 reward=0.786618 (499.76 it/sec) -training >> step=5232000, episode=873 reward=0.787968 (502.55 it/sec) -training >> step=5232100, episode=873 reward=0.7739002 (490.68 it/sec) -training >> step=5232200, episode=873 reward=0.779055 (522.97 it/sec) -training >> step=5232300, episode=873 reward=0.7740272 (521.34 it/sec) -training >> step=5232400, episode=873 reward=0.7908859 (455.15 it/sec) -training >> step=5232500, episode=873 reward=0.7637599 (473.77 it/sec) -training >> step=5232600, episode=873 reward=0.7842984 (512.39 it/sec) -training >> step=5232700, episode=873 reward=0.7802052 (519.19 it/sec) -training >> step=5232800, episode=873 reward=0.7766592 (480.65 it/sec) -training >> step=5232900, episode=873 reward=0.7812414 (532.91 it/sec) -training >> step=5233000, episode=873 reward=0.7847623 (526.77 it/sec) -training >> step=5233100, episode=873 reward=0.7790338 (485.51 it/sec) -training >> step=5233200, episode=873 reward=0.7938226 (537.90 it/sec) -training >> step=5233300, episode=873 reward=0.7891748 (519.77 it/sec) -training >> step=5233400, episode=873 reward=0.7841513 (481.81 it/sec) -training >> step=5233500, episode=873 reward=0.7883664 (475.74 it/sec) -training >> step=5233600, episode=873 reward=0.777102 (512.65 it/sec) -training >> step=5233700, episode=873 reward=0.7714735 (474.71 it/sec) -training >> step=5233800, episode=873 reward=0.7911086 (497.31 it/sec) -training >> step=5233900, episode=873 reward=0.7844031 (456.27 it/sec) -training >> step=5234000, episode=873 reward=0.7801635 (485.58 it/sec) -training >> step=5234100, episode=873 reward=0.7788526 (485.28 it/sec) -training >> step=5234200, episode=873 reward=0.7706875 (514.70 it/sec) -training >> step=5234300, episode=873 reward=0.7832292 (534.08 it/sec) -training >> step=5234400, episode=873 reward=0.7747341 (476.21 it/sec) -training >> step=5234500, episode=873 reward=0.7805745 (468.17 it/sec) -training >> step=5234600, episode=873 reward=0.7787119 (521.28 it/sec) -training >> step=5234700, episode=873 reward=0.7765476 (491.39 it/sec) -training >> step=5234800, episode=873 reward=0.7760552 (487.74 it/sec) -training >> step=5234900, episode=873 reward=0.7779406 (481.31 it/sec) -training >> step=5235000, episode=873 reward=0.7719102 (468.17 it/sec) -training >> step=5235100, episode=873 reward=0.7933072 (453.19 it/sec) -training >> step=5235200, episode=873 reward=0.7815462 (486.34 it/sec) -training >> step=5235300, episode=873 reward=0.7977713 (483.43 it/sec) -training >> step=5235400, episode=873 reward=0.7767675 (481.57 it/sec) -training >> step=5235500, episode=873 reward=0.7733993 (461.25 it/sec) -training >> step=5235600, episode=873 reward=0.7781612 (496.79 it/sec) -training >> step=5235700, episode=873 reward=0.7870538 (515.96 it/sec) -training >> step=5235800, episode=873 reward=0.7815693 (506.19 it/sec) -training >> step=5235900, episode=873 reward=0.7863127 (483.66 it/sec) -training >> step=5236000, episode=873 reward=0.7853112 (472.84 it/sec) -training >> step=5236100, episode=873 reward=0.7698952 (544.50 it/sec) -training >> step=5236200, episode=873 reward=0.7844397 (461.98 it/sec) -training >> step=5236300, episode=873 reward=0.7830967 (493.18 it/sec) -training >> step=5236400, episode=873 reward=0.7814925 (497.48 it/sec) -training >> step=5236500, episode=873 reward=0.7658249 (455.23 it/sec) -training >> step=5236600, episode=873 reward=0.7787968 (501.27 it/sec) -training >> step=5236700, episode=873 reward=0.7926046 (451.98 it/sec) -training >> step=5236800, episode=873 reward=0.7921164 (516.31 it/sec) -training >> step=5236900, episode=873 reward=0.776409 (480.12 it/sec) -training >> step=5237000, episode=873 reward=0.7967879 (493.80 it/sec) -training >> step=5237100, episode=873 reward=0.7810085 (505.67 it/sec) -training >> step=5237200, episode=873 reward=0.7790174 (505.81 it/sec) -training >> step=5237300, episode=874 reward=0.7846689 (69.25 it/sec) -training >> step=5237400, episode=874 reward=0.7811691 (461.43 it/sec) -training >> step=5237500, episode=874 reward=0.7822961 (512.38 it/sec) -training >> step=5237600, episode=874 reward=0.7937045 (490.53 it/sec) -training >> step=5237700, episode=874 reward=0.7794759 (372.07 it/sec) -training >> step=5237800, episode=874 reward=0.7804381 (527.79 it/sec) -training >> step=5237900, episode=874 reward=0.7829633 (502.17 it/sec) -training >> step=5238000, episode=874 reward=0.7735757 (529.15 it/sec) -training >> step=5238100, episode=874 reward=0.7662235 (486.22 it/sec) -training >> step=5238200, episode=874 reward=0.7610738 (455.58 it/sec) -training >> step=5238300, episode=874 reward=0.7815557 (500.43 it/sec) -training >> step=5238400, episode=874 reward=0.771431 (510.35 it/sec) -training >> step=5238500, episode=874 reward=0.7714072 (478.70 it/sec) -training >> step=5238600, episode=874 reward=0.78392 (488.24 it/sec) -training >> step=5238700, episode=874 reward=0.8022409 (514.21 it/sec) -training >> step=5238800, episode=874 reward=0.777428 (516.31 it/sec) -training >> step=5238900, episode=874 reward=0.7895168 (498.70 it/sec) -training >> step=5239000, episode=874 reward=0.7898397 (532.67 it/sec) -training >> step=5239100, episode=874 reward=0.7841278 (522.51 it/sec) -training >> step=5239200, episode=874 reward=0.7794815 (526.87 it/sec) -training >> step=5239300, episode=874 reward=0.7911767 (466.28 it/sec) -training >> step=5239400, episode=874 reward=0.7697796 (493.67 it/sec) -training >> step=5239500, episode=874 reward=0.7831873 (551.18 it/sec) -training >> step=5239600, episode=874 reward=0.8025158 (531.21 it/sec) -training >> step=5239700, episode=874 reward=0.7770674 (484.79 it/sec) -training >> step=5239800, episode=874 reward=0.7625853 (506.31 it/sec) -training >> step=5239900, episode=874 reward=0.7640235 (466.59 it/sec) -training >> step=5240000, episode=874 reward=0.7915686 (544.45 it/sec) -training >> step=5240100, episode=874 reward=0.7834588 (513.58 it/sec) -training >> step=5240200, episode=874 reward=0.7786704 (519.72 it/sec) -training >> step=5240300, episode=874 reward=0.7816966 (509.62 it/sec) -training >> step=5240400, episode=874 reward=0.794797 (489.63 it/sec) -training >> step=5240500, episode=874 reward=0.7983842 (537.39 it/sec) -training >> step=5240600, episode=874 reward=0.7694467 (475.63 it/sec) -training >> step=5240700, episode=874 reward=0.7866989 (524.70 it/sec) -training >> step=5240800, episode=874 reward=0.803821 (513.03 it/sec) -training >> step=5240900, episode=874 reward=0.7875168 (525.68 it/sec) -training >> step=5241000, episode=874 reward=0.761156 (458.46 it/sec) -training >> step=5241100, episode=874 reward=0.7848872 (532.78 it/sec) -training >> step=5241200, episode=874 reward=0.7823808 (539.92 it/sec) -training >> step=5241300, episode=874 reward=0.7734283 (532.01 it/sec) -training >> step=5241400, episode=874 reward=0.797749 (490.23 it/sec) -training >> step=5241500, episode=874 reward=0.7868909 (491.69 it/sec) -training >> step=5241600, episode=874 reward=0.7909352 (552.18 it/sec) -training >> step=5241700, episode=874 reward=0.7809488 (510.21 it/sec) -training >> step=5241800, episode=874 reward=0.7717066 (475.53 it/sec) -training >> step=5241900, episode=874 reward=0.7833923 (504.61 it/sec) -training >> step=5242000, episode=874 reward=0.7701231 (539.75 it/sec) -training >> step=5242100, episode=874 reward=0.7676061 (464.56 it/sec) -training >> step=5242200, episode=874 reward=0.7790368 (493.25 it/sec) -training >> step=5242300, episode=874 reward=0.7947951 (508.61 it/sec) -training >> step=5242400, episode=874 reward=0.77653 (534.75 it/sec) -training >> step=5242500, episode=874 reward=0.7641892 (504.60 it/sec) -training >> step=5242600, episode=874 reward=0.7785137 (520.45 it/sec) -training >> step=5242700, episode=874 reward=0.7775006 (563.83 it/sec) -training >> step=5242800, episode=874 reward=0.7790405 (492.13 it/sec) -training >> step=5242900, episode=874 reward=0.76933 (490.63 it/sec) -training >> step=5243000, episode=874 reward=0.7759628 (503.90 it/sec) -training >> step=5243100, episode=874 reward=0.7950439 (560.52 it/sec) -training >> step=5243200, episode=874 reward=0.7795727 (493.49 it/sec) -training >> step=5243300, episode=875 reward=0.7830546 (138.68 it/sec) -training >> step=5243400, episode=875 reward=0.7753037 (563.45 it/sec) -training >> step=5243500, episode=875 reward=0.7758892 (518.22 it/sec) -training >> step=5243600, episode=875 reward=0.7689832 (501.37 it/sec) -training >> step=5243700, episode=875 reward=0.7727462 (486.08 it/sec) -training >> step=5243800, episode=875 reward=0.7955429 (495.02 it/sec) -training >> step=5243900, episode=875 reward=0.7776826 (319.70 it/sec) -training >> step=5244000, episode=875 reward=0.7719728 (486.45 it/sec) -training >> step=5244100, episode=875 reward=0.7819503 (486.61 it/sec) -training >> step=5244200, episode=875 reward=0.7826864 (495.17 it/sec) -training >> step=5244300, episode=875 reward=0.7796416 (469.08 it/sec) -training >> step=5244400, episode=875 reward=0.7863072 (464.28 it/sec) -training >> step=5244500, episode=875 reward=0.7682158 (481.89 it/sec) -training >> step=5244600, episode=875 reward=0.7750337 (485.00 it/sec) -training >> step=5244700, episode=875 reward=0.7993225 (494.85 it/sec) -training >> step=5244800, episode=875 reward=0.776643 (435.68 it/sec) -training >> step=5244900, episode=875 reward=0.7831443 (537.74 it/sec) -training >> step=5245000, episode=875 reward=0.7792549 (483.27 it/sec) -training >> step=5245100, episode=875 reward=0.7775193 (471.45 it/sec) -training >> step=5245200, episode=875 reward=0.7806829 (490.28 it/sec) -training >> step=5245300, episode=875 reward=0.7658132 (465.65 it/sec) -training >> step=5245400, episode=875 reward=0.7848539 (506.76 it/sec) -training >> step=5245500, episode=875 reward=0.7672063 (479.53 it/sec) -training >> step=5245600, episode=875 reward=0.7735195 (464.16 it/sec) -training >> step=5245700, episode=875 reward=0.7727363 (478.12 it/sec) -training >> step=5245800, episode=875 reward=0.7660855 (493.09 it/sec) -training >> step=5245900, episode=875 reward=0.7768188 (497.18 it/sec) -training >> step=5246000, episode=875 reward=0.780757 (542.04 it/sec) -training >> step=5246100, episode=875 reward=0.7745168 (517.03 it/sec) -training >> step=5246200, episode=875 reward=0.7788591 (506.13 it/sec) -training >> step=5246300, episode=875 reward=0.7800432 (519.21 it/sec) -training >> step=5246400, episode=875 reward=0.7970899 (508.83 it/sec) -training >> step=5246500, episode=875 reward=0.7817495 (527.55 it/sec) -training >> step=5246600, episode=875 reward=0.7969411 (485.45 it/sec) -training >> step=5246700, episode=875 reward=0.8120819 (545.54 it/sec) -training >> step=5246800, episode=875 reward=0.7946572 (503.40 it/sec) -training >> step=5246900, episode=875 reward=0.763259 (468.63 it/sec) -training >> step=5247000, episode=875 reward=0.772882 (521.61 it/sec) -training >> step=5247100, episode=875 reward=0.7861704 (492.02 it/sec) -training >> step=5247200, episode=875 reward=0.7600371 (482.79 it/sec) -training >> step=5247300, episode=875 reward=0.7778934 (451.84 it/sec) -training >> step=5247400, episode=875 reward=0.7900918 (539.85 it/sec) -training >> step=5247500, episode=875 reward=0.7745206 (450.93 it/sec) -training >> step=5247600, episode=875 reward=0.7792588 (476.91 it/sec) -training >> step=5247700, episode=875 reward=0.7660337 (410.46 it/sec) -training >> step=5247800, episode=875 reward=0.7954786 (491.12 it/sec) -training >> step=5247900, episode=875 reward=0.7905747 (474.66 it/sec) -training >> step=5248000, episode=875 reward=0.7823679 (437.17 it/sec) -training >> step=5248100, episode=875 reward=0.7786421 (506.31 it/sec) -training >> step=5248200, episode=875 reward=0.7703676 (465.12 it/sec) -training >> step=5248300, episode=875 reward=0.7742476 (461.36 it/sec) -training >> step=5248400, episode=875 reward=0.770175 (505.35 it/sec) -training >> step=5248500, episode=875 reward=0.7836605 (490.66 it/sec) -training >> step=5248600, episode=875 reward=0.7832154 (484.29 it/sec) -training >> step=5248700, episode=875 reward=0.7654419 (437.08 it/sec) -training >> step=5248800, episode=875 reward=0.7792597 (473.13 it/sec) -training >> step=5248900, episode=875 reward=0.7743758 (493.34 it/sec) -training >> step=5249000, episode=875 reward=0.7625867 (480.85 it/sec) -training >> step=5249100, episode=875 reward=0.7679589 (460.56 it/sec) -training >> step=5249200, episode=875 reward=0.7761953 (511.81 it/sec) -training >> step=5249300, episode=876 reward=0.7536337 (108.29 it/sec) -training >> step=5249400, episode=876 reward=0.7850772 (475.03 it/sec) -training >> step=5249500, episode=876 reward=0.7848747 (473.32 it/sec) -training >> step=5249600, episode=876 reward=0.7815377 (516.25 it/sec) -training >> step=5249700, episode=876 reward=0.7814382 (434.89 it/sec) -training >> step=5249800, episode=876 reward=0.7803068 (468.30 it/sec) -training >> step=5249900, episode=876 reward=0.7670071 (470.71 it/sec) -training >> step=5250000, episode=876 reward=0.7850211 (382.58 it/sec) -training >> step=5250100, episode=876 reward=0.7639427 (466.91 it/sec) -training >> step=5250200, episode=876 reward=0.7891001 (440.57 it/sec) -training >> step=5250300, episode=876 reward=0.7741813 (507.63 it/sec) -training >> step=5250400, episode=876 reward=0.8147578 (470.50 it/sec) -training >> step=5250500, episode=876 reward=0.7858352 (448.02 it/sec) -training >> step=5250600, episode=876 reward=0.7828063 (490.93 it/sec) -training >> step=5250700, episode=876 reward=0.7776411 (471.66 it/sec) -training >> step=5250800, episode=876 reward=0.7899248 (472.63 it/sec) -training >> step=5250900, episode=876 reward=0.7795362 (494.57 it/sec) -training >> step=5251000, episode=876 reward=0.785937 (489.73 it/sec) -training >> step=5251100, episode=876 reward=0.7722946 (466.64 it/sec) -training >> step=5251200, episode=876 reward=0.7757084 (426.16 it/sec) -training >> step=5251300, episode=876 reward=0.7705333 (452.79 it/sec) -training >> step=5251400, episode=876 reward=0.7891688 (476.59 it/sec) -training >> step=5251500, episode=876 reward=0.7621123 (473.96 it/sec) -training >> step=5251600, episode=876 reward=0.7873405 (395.37 it/sec) -training >> step=5251700, episode=876 reward=0.7893258 (477.12 it/sec) -training >> step=5251800, episode=876 reward=0.8044465 (451.40 it/sec) -training >> step=5251900, episode=876 reward=0.7964538 (473.71 it/sec) -training >> step=5252000, episode=876 reward=0.7676756 (507.02 it/sec) -training >> step=5252100, episode=876 reward=0.787259 (475.25 it/sec) -training >> step=5252200, episode=876 reward=0.752402 (448.82 it/sec) -training >> step=5252300, episode=876 reward=0.787487 (435.52 it/sec) -training >> step=5252400, episode=876 reward=0.7790669 (484.63 it/sec) -training >> step=5252500, episode=876 reward=0.7832928 (452.12 it/sec) -training >> step=5252600, episode=876 reward=0.7768812 (412.18 it/sec) -training >> step=5252700, episode=876 reward=0.7784624 (467.18 it/sec) -training >> step=5252800, episode=876 reward=0.7972435 (488.52 it/sec) -training >> step=5252900, episode=876 reward=0.7758362 (469.59 it/sec) -training >> step=5253000, episode=876 reward=0.7990144 (447.61 it/sec) -training >> step=5253100, episode=876 reward=0.7772769 (490.04 it/sec) -training >> step=5253200, episode=876 reward=0.768546 (493.36 it/sec) -training >> step=5253300, episode=876 reward=0.7835748 (497.15 it/sec) -training >> step=5253400, episode=876 reward=0.7845263 (462.06 it/sec) -training >> step=5253500, episode=876 reward=0.7961699 (448.65 it/sec) -training >> step=5253600, episode=876 reward=0.7757406 (442.60 it/sec) -training >> step=5253700, episode=876 reward=0.781745 (495.45 it/sec) -training >> step=5253800, episode=876 reward=0.7848296 (509.92 it/sec) -training >> step=5253900, episode=876 reward=0.7819498 (485.68 it/sec) -training >> step=5254000, episode=876 reward=0.785639 (407.33 it/sec) -training >> step=5254100, episode=876 reward=0.7635745 (462.21 it/sec) -training >> step=5254200, episode=876 reward=0.7686086 (491.92 it/sec) -training >> step=5254300, episode=876 reward=0.8102592 (487.03 it/sec) -training >> step=5254400, episode=876 reward=0.7920175 (496.42 it/sec) -training >> step=5254500, episode=876 reward=0.7740567 (461.85 it/sec) -training >> step=5254600, episode=876 reward=0.8069274 (478.56 it/sec) -training >> step=5254700, episode=876 reward=0.787941 (486.80 it/sec) -training >> step=5254800, episode=876 reward=0.77438 (484.57 it/sec) -training >> step=5254900, episode=876 reward=0.7686304 (506.71 it/sec) -training >> step=5255000, episode=876 reward=0.7745687 (493.60 it/sec) -training >> step=5255100, episode=876 reward=0.7977731 (489.26 it/sec) -training >> step=5255200, episode=876 reward=0.7770466 (472.57 it/sec) -training >> step=5255300, episode=877 reward=0.7603504 (98.83 it/sec) -training >> step=5255400, episode=877 reward=0.792231 (478.14 it/sec) -training >> step=5255500, episode=877 reward=0.7616353 (474.12 it/sec) -training >> step=5255600, episode=877 reward=0.7769702 (495.39 it/sec) -training >> step=5255700, episode=877 reward=0.7749161 (489.76 it/sec) -training >> step=5255800, episode=877 reward=0.7832746 (511.16 it/sec) -training >> step=5255900, episode=877 reward=0.7964224 (485.64 it/sec) -training >> step=5256000, episode=877 reward=0.7682522 (498.95 it/sec) -training >> step=5256100, episode=877 reward=0.7981512 (496.01 it/sec) -training >> step=5256200, episode=877 reward=0.8054525 (342.61 it/sec) -training >> step=5256300, episode=877 reward=0.8011008 (509.19 it/sec) -training >> step=5256400, episode=877 reward=0.7672421 (507.36 it/sec) -training >> step=5256500, episode=877 reward=0.7982249 (488.43 it/sec) -training >> step=5256600, episode=877 reward=0.7938372 (433.82 it/sec) -training >> step=5256700, episode=877 reward=0.7747211 (483.62 it/sec) -training >> step=5256800, episode=877 reward=0.7796648 (504.70 it/sec) -training >> step=5256900, episode=877 reward=0.7769405 (482.75 it/sec) -training >> step=5257000, episode=877 reward=0.7893041 (522.34 it/sec) -training >> step=5257100, episode=877 reward=0.7868578 (436.46 it/sec) -training >> step=5257200, episode=877 reward=0.7762931 (463.43 it/sec) -training >> step=5257300, episode=877 reward=0.7706938 (502.60 it/sec) -training >> step=5257400, episode=877 reward=0.776727 (520.60 it/sec) -training >> step=5257500, episode=877 reward=0.7883049 (502.57 it/sec) -training >> step=5257600, episode=877 reward=0.8031773 (485.95 it/sec) -training >> step=5257700, episode=877 reward=0.7845063 (472.63 it/sec) -training >> step=5257800, episode=877 reward=0.7662544 (470.08 it/sec) -training >> step=5257900, episode=877 reward=0.7876179 (489.82 it/sec) -training >> step=5258000, episode=877 reward=0.7751952 (465.09 it/sec) -training >> step=5258100, episode=877 reward=0.7984517 (517.88 it/sec) -training >> step=5258200, episode=877 reward=0.7676114 (510.93 it/sec) -training >> step=5258300, episode=877 reward=0.7921283 (473.12 it/sec) -training >> step=5258400, episode=877 reward=0.778965 (520.94 it/sec) -training >> step=5258500, episode=877 reward=0.7988402 (526.73 it/sec) -training >> step=5258600, episode=877 reward=0.7882349 (480.01 it/sec) -training >> step=5258700, episode=877 reward=0.7815247 (529.12 it/sec) -training >> step=5258800, episode=877 reward=0.800521 (502.55 it/sec) -training >> step=5258900, episode=877 reward=0.7829651 (514.19 it/sec) -training >> step=5259000, episode=877 reward=0.773224 (525.44 it/sec) -training >> step=5259100, episode=877 reward=0.7804566 (496.13 it/sec) -training >> step=5259200, episode=877 reward=0.7842103 (404.80 it/sec) -training >> step=5259300, episode=877 reward=0.7868102 (475.23 it/sec) -training >> step=5259400, episode=877 reward=0.7645916 (452.01 it/sec) -training >> step=5259500, episode=877 reward=0.7660245 (520.80 it/sec) -training >> step=5259600, episode=877 reward=0.788962 (460.09 it/sec) -training >> step=5259700, episode=877 reward=0.8044671 (424.39 it/sec) -training >> step=5259800, episode=877 reward=0.7881052 (410.23 it/sec) -training >> step=5259900, episode=877 reward=0.7853191 (450.35 it/sec) -training >> step=5260000, episode=877 reward=0.7800003 (438.37 it/sec) -training >> step=5260100, episode=877 reward=0.7653769 (428.09 it/sec) -training >> step=5260200, episode=877 reward=0.7997577 (457.57 it/sec) -training >> step=5260300, episode=877 reward=0.7703589 (483.17 it/sec) -training >> step=5260400, episode=877 reward=0.7774528 (469.60 it/sec) -training >> step=5260500, episode=877 reward=0.8101156 (423.08 it/sec) -training >> step=5260600, episode=877 reward=0.8032292 (443.93 it/sec) -training >> step=5260700, episode=877 reward=0.7795758 (469.29 it/sec) -training >> step=5260800, episode=877 reward=0.7837209 (436.16 it/sec) -training >> step=5260900, episode=877 reward=0.779901 (464.13 it/sec) -training >> step=5261000, episode=877 reward=0.78286 (469.71 it/sec) -training >> step=5261100, episode=877 reward=0.771498 (478.15 it/sec) -training >> step=5261200, episode=877 reward=0.7851439 (442.89 it/sec) -training >> step=5261300, episode=878 reward=0.7931826 (101.04 it/sec) -training >> step=5261400, episode=878 reward=0.7907615 (427.15 it/sec) -training >> step=5261500, episode=878 reward=0.7940234 (450.50 it/sec) -training >> step=5261600, episode=878 reward=0.7958844 (437.28 it/sec) -training >> step=5261700, episode=878 reward=0.7852304 (489.03 it/sec) -training >> step=5261800, episode=878 reward=0.7845308 (463.16 it/sec) -training >> step=5261900, episode=878 reward=0.7842214 (484.01 it/sec) -training >> step=5262000, episode=878 reward=0.7689524 (461.05 it/sec) -training >> step=5262100, episode=878 reward=0.777851 (462.73 it/sec) -training >> step=5262200, episode=878 reward=0.766325 (498.48 it/sec) -training >> step=5262300, episode=878 reward=0.7788199 (330.65 it/sec) -training >> step=5262400, episode=878 reward=0.7858985 (496.84 it/sec) -training >> step=5262500, episode=878 reward=0.7994093 (466.93 it/sec) -training >> step=5262600, episode=878 reward=0.7860462 (474.18 it/sec) -training >> step=5262700, episode=878 reward=0.7771975 (440.51 it/sec) -training >> step=5262800, episode=878 reward=0.7816333 (472.05 it/sec) -training >> step=5262900, episode=878 reward=0.7855764 (512.01 it/sec) -training >> step=5263000, episode=878 reward=0.7677511 (479.78 it/sec) -training >> step=5263100, episode=878 reward=0.7797822 (404.85 it/sec) -training >> step=5263200, episode=878 reward=0.7666255 (496.69 it/sec) -training >> step=5263300, episode=878 reward=0.780017 (477.11 it/sec) -training >> step=5263400, episode=878 reward=0.8057388 (424.68 it/sec) -training >> step=5263500, episode=878 reward=0.7954617 (463.56 it/sec) -training >> step=5263600, episode=878 reward=0.778766 (466.76 it/sec) -training >> step=5263700, episode=878 reward=0.7876467 (469.77 it/sec) -training >> step=5263800, episode=878 reward=0.7974643 (503.55 it/sec) -training >> step=5263900, episode=878 reward=0.7581984 (494.27 it/sec) -training >> step=5264000, episode=878 reward=0.8042538 (505.24 it/sec) -training >> step=5264100, episode=878 reward=0.7826468 (497.32 it/sec) -training >> step=5264200, episode=878 reward=0.7697883 (460.55 it/sec) -training >> step=5264300, episode=878 reward=0.7821394 (485.70 it/sec) -training >> step=5264400, episode=878 reward=0.7814747 (466.65 it/sec) -training >> step=5264500, episode=878 reward=0.7819166 (452.64 it/sec) -training >> step=5264600, episode=878 reward=0.7668777 (484.30 it/sec) -training >> step=5264700, episode=878 reward=0.7703362 (497.60 it/sec) -training >> step=5264800, episode=878 reward=0.782608 (504.63 it/sec) -training >> step=5264900, episode=878 reward=0.8050334 (462.02 it/sec) -training >> step=5265000, episode=878 reward=0.7956641 (484.51 it/sec) -training >> step=5265100, episode=878 reward=0.7728726 (498.44 it/sec) -training >> step=5265200, episode=878 reward=0.7955673 (495.08 it/sec) -training >> step=5265300, episode=878 reward=0.7829182 (500.62 it/sec) -training >> step=5265400, episode=878 reward=0.7649892 (461.59 it/sec) -training >> step=5265500, episode=878 reward=0.7836787 (461.45 it/sec) -training >> step=5265600, episode=878 reward=0.7760034 (497.54 it/sec) -training >> step=5265700, episode=878 reward=0.8022894 (465.44 it/sec) -training >> step=5265800, episode=878 reward=0.7785262 (460.18 it/sec) -training >> step=5265900, episode=878 reward=0.7877389 (493.92 it/sec) -training >> step=5266000, episode=878 reward=0.7932981 (438.27 it/sec) -training >> step=5266100, episode=878 reward=0.7953978 (498.63 it/sec) -training >> step=5266200, episode=878 reward=0.7836556 (488.44 it/sec) -training >> step=5266300, episode=878 reward=0.7666982 (493.11 it/sec) -training >> step=5266400, episode=878 reward=0.782727 (477.28 it/sec) -training >> step=5266500, episode=878 reward=0.7905263 (517.50 it/sec) -training >> step=5266600, episode=878 reward=0.7832782 (511.06 it/sec) -training >> step=5266700, episode=878 reward=0.7837841 (479.62 it/sec) -training >> step=5266800, episode=878 reward=0.7918222 (516.90 it/sec) -training >> step=5266900, episode=878 reward=0.7829496 (495.26 it/sec) -training >> step=5267000, episode=878 reward=0.7650986 (459.29 it/sec) -training >> step=5267100, episode=878 reward=0.7905265 (458.28 it/sec) -training >> step=5267200, episode=878 reward=0.7781665 (473.93 it/sec) -training >> step=5267300, episode=879 reward=0.7699642 (93.84 it/sec) -training >> step=5267400, episode=879 reward=0.7751101 (485.15 it/sec) -training >> step=5267500, episode=879 reward=0.7777602 (474.52 it/sec) -training >> step=5267600, episode=879 reward=0.7782407 (474.66 it/sec) -training >> step=5267700, episode=879 reward=0.7876427 (469.56 it/sec) -training >> step=5267800, episode=879 reward=0.7578275 (489.75 it/sec) -training >> step=5267900, episode=879 reward=0.7938203 (505.30 it/sec) -training >> step=5268000, episode=879 reward=0.7853863 (479.56 it/sec) -training >> step=5268100, episode=879 reward=0.7695123 (476.96 it/sec) -training >> step=5268200, episode=879 reward=0.7539825 (499.34 it/sec) -training >> step=5268300, episode=879 reward=0.7880087 (510.17 it/sec) -training >> step=5268400, episode=879 reward=0.768814 (486.61 it/sec) -training >> step=5268500, episode=879 reward=0.7877669 (336.81 it/sec) -training >> step=5268600, episode=879 reward=0.7654344 (490.45 it/sec) -training >> step=5268700, episode=879 reward=0.7919362 (508.39 it/sec) -training >> step=5268800, episode=879 reward=0.7933673 (495.23 it/sec) -training >> step=5268900, episode=879 reward=0.7854497 (500.24 it/sec) -training >> step=5269000, episode=879 reward=0.7791876 (500.19 it/sec) -training >> step=5269100, episode=879 reward=0.7814639 (471.34 it/sec) -training >> step=5269200, episode=879 reward=0.7785755 (472.19 it/sec) -training >> step=5269300, episode=879 reward=0.7879995 (504.13 it/sec) -training >> step=5269400, episode=879 reward=0.7939087 (535.27 it/sec) -training >> step=5269500, episode=879 reward=0.7715765 (480.84 it/sec) -training >> step=5269600, episode=879 reward=0.7767487 (466.84 it/sec) -training >> step=5269700, episode=879 reward=0.7851925 (508.33 it/sec) -training >> step=5269800, episode=879 reward=0.7833496 (487.21 it/sec) -training >> step=5269900, episode=879 reward=0.7977393 (422.86 it/sec) -training >> step=5270000, episode=879 reward=0.7591625 (492.67 it/sec) -training >> step=5270100, episode=879 reward=0.7776781 (475.32 it/sec) -training >> step=5270200, episode=879 reward=0.7775508 (453.09 it/sec) -training >> step=5270300, episode=879 reward=0.7869308 (465.20 it/sec) -training >> step=5270400, episode=879 reward=0.7746543 (476.47 it/sec) -training >> step=5270500, episode=879 reward=0.801446 (480.79 it/sec) -training >> step=5270600, episode=879 reward=0.7946278 (456.87 it/sec) -training >> step=5270700, episode=879 reward=0.761286 (501.75 it/sec) -training >> step=5270800, episode=879 reward=0.7829612 (499.70 it/sec) -training >> step=5270900, episode=879 reward=0.7610372 (460.28 it/sec) -training >> step=5271000, episode=879 reward=0.7845528 (479.25 it/sec) -training >> step=5271100, episode=879 reward=0.7868413 (499.36 it/sec) -training >> step=5271200, episode=879 reward=0.799931 (536.02 it/sec) -training >> step=5271300, episode=879 reward=0.7931033 (503.87 it/sec) -training >> step=5271400, episode=879 reward=0.7719632 (457.64 it/sec) -training >> step=5271500, episode=879 reward=0.788686 (498.50 it/sec) -training >> step=5271600, episode=879 reward=0.8049496 (456.77 it/sec) -training >> step=5271700, episode=879 reward=0.7771464 (493.74 it/sec) -training >> step=5271800, episode=879 reward=0.8002829 (507.47 it/sec) -training >> step=5271900, episode=879 reward=0.7677117 (485.71 it/sec) -training >> step=5272000, episode=879 reward=0.7689766 (486.81 it/sec) -training >> step=5272100, episode=879 reward=0.7637553 (471.38 it/sec) -training >> step=5272200, episode=879 reward=0.768252 (471.14 it/sec) -training >> step=5272300, episode=879 reward=0.7794193 (506.78 it/sec) -training >> step=5272400, episode=879 reward=0.7761061 (447.91 it/sec) -training >> step=5272500, episode=879 reward=0.7734086 (478.65 it/sec) -training >> step=5272600, episode=879 reward=0.7913798 (496.06 it/sec) -training >> step=5272700, episode=879 reward=0.795004 (469.53 it/sec) -training >> step=5272800, episode=879 reward=0.7697521 (480.30 it/sec) -training >> step=5272900, episode=879 reward=0.778329 (463.80 it/sec) -training >> step=5273000, episode=879 reward=0.7747429 (517.02 it/sec) -training >> step=5273100, episode=879 reward=0.7864071 (468.49 it/sec) -training >> step=5273200, episode=879 reward=0.7571393 (497.69 it/sec) -training >> step=5273300, episode=880 reward=0.7739432 (93.84 it/sec) -training >> step=5273400, episode=880 reward=0.7590564 (485.85 it/sec) -training >> step=5273500, episode=880 reward=0.7913132 (472.80 it/sec) -training >> step=5273600, episode=880 reward=0.78428 (478.94 it/sec) -training >> step=5273700, episode=880 reward=0.7784979 (496.49 it/sec) -training >> step=5273800, episode=880 reward=0.776853 (490.42 it/sec) -training >> step=5273900, episode=880 reward=0.7864453 (509.80 it/sec) -training >> step=5274000, episode=880 reward=0.7564728 (411.30 it/sec) -training >> step=5274100, episode=880 reward=0.7750106 (469.26 it/sec) -training >> step=5274200, episode=880 reward=0.7866243 (463.83 it/sec) -training >> step=5274300, episode=880 reward=0.7809901 (519.84 it/sec) -training >> step=5274400, episode=880 reward=0.7888877 (477.97 it/sec) -training >> step=5274500, episode=880 reward=0.7933272 (447.27 it/sec) -training >> step=5274600, episode=880 reward=0.7816696 (449.38 it/sec) -training >> step=5274700, episode=880 reward=0.7848649 (505.82 it/sec) -training >> step=5274800, episode=880 reward=0.7852947 (347.85 it/sec) -training >> step=5274900, episode=880 reward=0.803433 (431.75 it/sec) -training >> step=5275000, episode=880 reward=0.7753644 (454.38 it/sec) -training >> step=5275100, episode=880 reward=0.7774677 (443.96 it/sec) -training >> step=5275200, episode=880 reward=0.7695568 (477.59 it/sec) -training >> step=5275300, episode=880 reward=0.7850812 (489.77 it/sec) -training >> step=5275400, episode=880 reward=0.7974735 (492.49 it/sec) -training >> step=5275500, episode=880 reward=0.7834268 (466.63 it/sec) -training >> step=5275600, episode=880 reward=0.7777663 (460.15 it/sec) -training >> step=5275700, episode=880 reward=0.7745509 (515.39 it/sec) -training >> step=5275800, episode=880 reward=0.7942477 (460.54 it/sec) -training >> step=5275900, episode=880 reward=0.7858093 (464.92 it/sec) -training >> step=5276000, episode=880 reward=0.8040625 (459.79 it/sec) -training >> step=5276100, episode=880 reward=0.7826387 (491.32 it/sec) -training >> step=5276200, episode=880 reward=0.7773075 (452.44 it/sec) -training >> step=5276300, episode=880 reward=0.7832196 (489.08 it/sec) -training >> step=5276400, episode=880 reward=0.7823097 (471.69 it/sec) -training >> step=5276500, episode=880 reward=0.7835193 (481.44 it/sec) -training >> step=5276600, episode=880 reward=0.7889536 (470.88 it/sec) -training >> step=5276700, episode=880 reward=0.7925303 (479.71 it/sec) -training >> step=5276800, episode=880 reward=0.7742582 (471.82 it/sec) -training >> step=5276900, episode=880 reward=0.7883592 (485.17 it/sec) -training >> step=5277000, episode=880 reward=0.768667 (453.03 it/sec) -training >> step=5277100, episode=880 reward=0.7644778 (473.19 it/sec) -training >> step=5277200, episode=880 reward=0.7667834 (509.42 it/sec) -training >> step=5277300, episode=880 reward=0.7981044 (462.34 it/sec) -training >> step=5277400, episode=880 reward=0.7791719 (439.69 it/sec) -training >> step=5277500, episode=880 reward=0.7994012 (502.53 it/sec) -training >> step=5277600, episode=880 reward=0.7851246 (514.70 it/sec) -training >> step=5277700, episode=880 reward=0.7785379 (493.48 it/sec) -training >> step=5277800, episode=880 reward=0.7811544 (466.69 it/sec) -training >> step=5277900, episode=880 reward=0.7869554 (500.83 it/sec) -training >> step=5278000, episode=880 reward=0.7667809 (396.67 it/sec) -training >> step=5278100, episode=880 reward=0.7789063 (507.83 it/sec) -training >> step=5278200, episode=880 reward=0.7648456 (487.90 it/sec) -training >> step=5278300, episode=880 reward=0.7649805 (474.25 it/sec) -training >> step=5278400, episode=880 reward=0.8011793 (462.64 it/sec) -training >> step=5278500, episode=880 reward=0.7833424 (485.02 it/sec) -training >> step=5278600, episode=880 reward=0.7538722 (505.72 it/sec) -training >> step=5278700, episode=880 reward=0.775987 (478.32 it/sec) -training >> step=5278800, episode=880 reward=0.7788723 (465.28 it/sec) -training >> step=5278900, episode=880 reward=0.7866751 (491.21 it/sec) -training >> step=5279000, episode=880 reward=0.7870833 (480.04 it/sec) -training >> step=5279100, episode=880 reward=0.7728144 (484.86 it/sec) -training >> step=5279200, episode=880 reward=0.7545756 (509.71 it/sec) -training >> step=5279300, episode=881 reward=0.7784613 (86.94 it/sec) -training >> step=5279400, episode=881 reward=0.787546 (430.37 it/sec) -training >> step=5279500, episode=881 reward=0.7941712 (454.87 it/sec) -training >> step=5279600, episode=881 reward=0.7773979 (459.12 it/sec) -training >> step=5279700, episode=881 reward=0.7541553 (468.76 it/sec) -training >> step=5279800, episode=881 reward=0.7820338 (439.15 it/sec) -training >> step=5279900, episode=881 reward=0.7718126 (446.14 it/sec) -training >> step=5280000, episode=881 reward=0.7688998 (431.08 it/sec) -training >> step=5280100, episode=881 reward=0.7655528 (431.11 it/sec) -training >> step=5280200, episode=881 reward=0.7721385 (441.01 it/sec) -training >> step=5280300, episode=881 reward=0.7982324 (486.67 it/sec) -training >> step=5280400, episode=881 reward=0.7628086 (444.17 it/sec) -training >> step=5280500, episode=881 reward=0.8023496 (437.02 it/sec) -training >> step=5280600, episode=881 reward=0.7910376 (465.15 it/sec) -training >> step=5280700, episode=881 reward=0.7704067 (448.96 it/sec) -training >> step=5280800, episode=881 reward=0.7876639 (390.71 it/sec) -training >> step=5280900, episode=881 reward=0.7781597 (476.52 it/sec) -training >> step=5281000, episode=881 reward=0.7709828 (333.08 it/sec) -training >> step=5281100, episode=881 reward=0.776306 (424.02 it/sec) -training >> step=5281200, episode=881 reward=0.792474 (441.50 it/sec) -training >> step=5281300, episode=881 reward=0.794979 (445.22 it/sec) -training >> step=5281400, episode=881 reward=0.7750609 (481.50 it/sec) -training >> step=5281500, episode=881 reward=0.794008 (462.57 it/sec) -training >> step=5281600, episode=881 reward=0.7797027 (443.27 it/sec) -training >> step=5281700, episode=881 reward=0.7861785 (404.45 it/sec) -training >> step=5281800, episode=881 reward=0.786791 (462.73 it/sec) -training >> step=5281900, episode=881 reward=0.780878 (469.79 it/sec) -training >> step=5282000, episode=881 reward=0.7742744 (481.73 it/sec) -training >> step=5282100, episode=881 reward=0.7853932 (450.54 it/sec) -training >> step=5282200, episode=881 reward=0.7902423 (437.96 it/sec) -training >> step=5282300, episode=881 reward=0.7817666 (481.83 it/sec) -training >> step=5282400, episode=881 reward=0.7728951 (476.93 it/sec) -training >> step=5282500, episode=881 reward=0.7775502 (427.96 it/sec) -training >> step=5282600, episode=881 reward=0.8008479 (469.98 it/sec) -training >> step=5282700, episode=881 reward=0.7863278 (470.26 it/sec) -training >> step=5282800, episode=881 reward=0.7889824 (473.68 it/sec) -training >> step=5282900, episode=881 reward=0.782657 (486.85 it/sec) -training >> step=5283000, episode=881 reward=0.7708105 (487.04 it/sec) -training >> step=5283100, episode=881 reward=0.7862638 (460.17 it/sec) -training >> step=5283200, episode=881 reward=0.7961344 (458.65 it/sec) -training >> step=5283300, episode=881 reward=0.7880763 (471.86 it/sec) -training >> step=5283400, episode=881 reward=0.7914208 (456.71 it/sec) -training >> step=5283500, episode=881 reward=0.7876768 (433.47 it/sec) -training >> step=5283600, episode=881 reward=0.7599695 (454.14 it/sec) -training >> step=5283700, episode=881 reward=0.7632419 (460.33 it/sec) -training >> step=5283800, episode=881 reward=0.7810573 (483.68 it/sec) -training >> step=5283900, episode=881 reward=0.7815478 (445.95 it/sec) -training >> step=5284000, episode=881 reward=0.7989241 (461.20 it/sec) -training >> step=5284100, episode=881 reward=0.7740319 (467.43 it/sec) -training >> step=5284200, episode=881 reward=0.791537 (420.08 it/sec) -training >> step=5284300, episode=881 reward=0.7861209 (463.60 it/sec) -training >> step=5284400, episode=881 reward=0.7784109 (457.42 it/sec) -training >> step=5284500, episode=881 reward=0.7758043 (447.31 it/sec) -training >> step=5284600, episode=881 reward=0.7777275 (452.06 it/sec) -training >> step=5284700, episode=881 reward=0.7870687 (497.79 it/sec) -training >> step=5284800, episode=881 reward=0.7771823 (492.17 it/sec) -training >> step=5284900, episode=881 reward=0.7427713 (477.44 it/sec) -training >> step=5285000, episode=881 reward=0.7736883 (458.64 it/sec) -training >> step=5285100, episode=881 reward=0.7907805 (465.82 it/sec) -training >> step=5285200, episode=881 reward=0.7845864 (488.20 it/sec) -training >> step=5285300, episode=882 reward=0.7811706 (92.81 it/sec) -training >> step=5285400, episode=882 reward=0.8054724 (463.51 it/sec) -training >> step=5285500, episode=882 reward=0.7609609 (472.63 it/sec) -training >> step=5285600, episode=882 reward=0.7640175 (483.12 it/sec) -training >> step=5285700, episode=882 reward=0.77885 (409.93 it/sec) -training >> step=5285800, episode=882 reward=0.7905923 (461.65 it/sec) -training >> step=5285900, episode=882 reward=0.7966158 (465.36 it/sec) -training >> step=5286000, episode=882 reward=0.7873676 (461.70 it/sec) -training >> step=5286100, episode=882 reward=0.7647301 (432.39 it/sec) -training >> step=5286200, episode=882 reward=0.7838293 (417.70 it/sec) -training >> step=5286300, episode=882 reward=0.7936186 (438.50 it/sec) -training >> step=5286400, episode=882 reward=0.7863622 (464.84 it/sec) -training >> step=5286500, episode=882 reward=0.7753327 (411.66 it/sec) -training >> step=5286600, episode=882 reward=0.7881369 (469.88 it/sec) -training >> step=5286700, episode=882 reward=0.7830284 (501.58 it/sec) -training >> step=5286800, episode=882 reward=0.7752618 (452.50 it/sec) -training >> step=5286900, episode=882 reward=0.7905469 (485.17 it/sec) -training >> step=5287000, episode=882 reward=0.7837114 (461.25 it/sec) -training >> step=5287100, episode=882 reward=0.7868966 (477.33 it/sec) -training >> step=5287200, episode=882 reward=0.7741846 (331.91 it/sec) -training >> step=5287300, episode=882 reward=0.7767812 (437.90 it/sec) -training >> step=5287400, episode=882 reward=0.7899776 (454.48 it/sec) -training >> step=5287500, episode=882 reward=0.7854558 (477.75 it/sec) -training >> step=5287600, episode=882 reward=0.7844279 (403.89 it/sec) -training >> step=5287700, episode=882 reward=0.7942048 (451.07 it/sec) -training >> step=5287800, episode=882 reward=0.7918856 (467.43 it/sec) -training >> step=5287900, episode=882 reward=0.7857877 (456.55 it/sec) -training >> step=5288000, episode=882 reward=0.8044003 (498.83 it/sec) -training >> step=5288100, episode=882 reward=0.801839 (479.46 it/sec) -training >> step=5288200, episode=882 reward=0.8064819 (421.37 it/sec) -training >> step=5288300, episode=882 reward=0.7734239 (468.96 it/sec) -training >> step=5288400, episode=882 reward=0.780607 (425.36 it/sec) -training >> step=5288500, episode=882 reward=0.7731435 (500.94 it/sec) -training >> step=5288600, episode=882 reward=0.777639 (455.02 it/sec) -training >> step=5288700, episode=882 reward=0.7964761 (423.36 it/sec) -training >> step=5288800, episode=882 reward=0.7987903 (486.62 it/sec) -training >> step=5288900, episode=882 reward=0.7737501 (448.06 it/sec) -training >> step=5289000, episode=882 reward=0.7868465 (481.81 it/sec) -training >> step=5289100, episode=882 reward=0.7780836 (486.77 it/sec) -training >> step=5289200, episode=882 reward=0.7931415 (495.28 it/sec) -training >> step=5289300, episode=882 reward=0.7831427 (439.67 it/sec) -training >> step=5289400, episode=882 reward=0.8062098 (452.40 it/sec) -training >> step=5289500, episode=882 reward=0.7822332 (492.73 it/sec) -training >> step=5289600, episode=882 reward=0.7746125 (489.13 it/sec) -training >> step=5289700, episode=882 reward=0.7706338 (495.09 it/sec) -training >> step=5289800, episode=882 reward=0.7896325 (446.46 it/sec) -training >> step=5289900, episode=882 reward=0.7839254 (454.73 it/sec) -training >> step=5290000, episode=882 reward=0.7648553 (493.92 it/sec) -training >> step=5290100, episode=882 reward=0.7968448 (430.65 it/sec) -training >> step=5290200, episode=882 reward=0.7935688 (482.99 it/sec) -training >> step=5290300, episode=882 reward=0.7842386 (478.65 it/sec) -training >> step=5290400, episode=882 reward=0.7728696 (476.90 it/sec) -training >> step=5290500, episode=882 reward=0.7742349 (451.93 it/sec) -training >> step=5290600, episode=882 reward=0.7862801 (488.67 it/sec) -training >> step=5290700, episode=882 reward=0.7882081 (472.29 it/sec) -training >> step=5290800, episode=882 reward=0.7818729 (488.68 it/sec) -training >> step=5290900, episode=882 reward=0.7940318 (471.58 it/sec) -training >> step=5291000, episode=882 reward=0.7726211 (485.87 it/sec) -training >> step=5291100, episode=882 reward=0.7890059 (433.44 it/sec) -training >> step=5291200, episode=882 reward=0.7449834 (410.77 it/sec) -training >> step=5291300, episode=883 reward=0.7623848 (104.45 it/sec) -training >> step=5291400, episode=883 reward=0.7962047 (440.48 it/sec) -training >> step=5291500, episode=883 reward=0.7569081 (448.71 it/sec) -training >> step=5291600, episode=883 reward=0.7810075 (468.57 it/sec) -training >> step=5291700, episode=883 reward=0.7881252 (432.85 it/sec) -training >> step=5291800, episode=883 reward=0.7812594 (446.01 it/sec) -training >> step=5291900, episode=883 reward=0.7822943 (408.18 it/sec) -training >> step=5292000, episode=883 reward=0.788427 (441.74 it/sec) -training >> step=5292100, episode=883 reward=0.7821319 (479.27 it/sec) -training >> step=5292200, episode=883 reward=0.7875815 (462.63 it/sec) -training >> step=5292300, episode=883 reward=0.791979 (428.25 it/sec) -training >> step=5292400, episode=883 reward=0.7697487 (494.23 it/sec) -training >> step=5292500, episode=883 reward=0.7780804 (478.24 it/sec) -training >> step=5292600, episode=883 reward=0.7685398 (476.18 it/sec) -training >> step=5292700, episode=883 reward=0.7835582 (483.39 it/sec) -training >> step=5292800, episode=883 reward=0.7856776 (461.98 it/sec) -training >> step=5292900, episode=883 reward=0.7796896 (419.90 it/sec) -training >> step=5293000, episode=883 reward=0.7763024 (483.25 it/sec) -training >> step=5293100, episode=883 reward=0.7876217 (442.93 it/sec) -training >> step=5293200, episode=883 reward=0.8129414 (510.36 it/sec) -training >> step=5293300, episode=883 reward=0.7994347 (430.50 it/sec) -training >> step=5293400, episode=883 reward=0.8005884 (309.05 it/sec) -training >> step=5293500, episode=883 reward=0.8030228 (460.62 it/sec) -training >> step=5293600, episode=883 reward=0.7892824 (467.36 it/sec) -training >> step=5293700, episode=883 reward=0.7704395 (471.51 it/sec) -training >> step=5293800, episode=883 reward=0.7702847 (481.23 it/sec) -training >> step=5293900, episode=883 reward=0.7826082 (495.77 it/sec) -training >> step=5294000, episode=883 reward=0.7684135 (466.10 it/sec) -training >> step=5294100, episode=883 reward=0.7722901 (411.93 it/sec) -training >> step=5294200, episode=883 reward=0.8053292 (436.51 it/sec) -training >> step=5294300, episode=883 reward=0.7630296 (487.99 it/sec) -training >> step=5294400, episode=883 reward=0.7737895 (465.99 it/sec) -training >> step=5294500, episode=883 reward=0.7894976 (455.30 it/sec) -training >> step=5294600, episode=883 reward=0.7649969 (456.12 it/sec) -training >> step=5294700, episode=883 reward=0.7926409 (467.08 it/sec) -training >> step=5294800, episode=883 reward=0.8119121 (448.22 it/sec) -training >> step=5294900, episode=883 reward=0.7873151 (429.07 it/sec) -training >> step=5295000, episode=883 reward=0.7867309 (442.82 it/sec) -training >> step=5295100, episode=883 reward=0.784048 (438.17 it/sec) -training >> step=5295200, episode=883 reward=0.7879097 (450.94 it/sec) -training >> step=5295300, episode=883 reward=0.7765178 (487.93 it/sec) -training >> step=5295400, episode=883 reward=0.7730027 (461.94 it/sec) -training >> step=5295500, episode=883 reward=0.7827753 (460.09 it/sec) -training >> step=5295600, episode=883 reward=0.7802714 (405.98 it/sec) -training >> step=5295700, episode=883 reward=0.7774639 (466.04 it/sec) -training >> step=5295800, episode=883 reward=0.7761835 (436.46 it/sec) -training >> step=5295900, episode=883 reward=0.7764909 (444.98 it/sec) -training >> step=5296000, episode=883 reward=0.7863818 (455.30 it/sec) -training >> step=5296100, episode=883 reward=0.7692137 (454.98 it/sec) -training >> step=5296200, episode=883 reward=0.7868288 (457.84 it/sec) -training >> step=5296300, episode=883 reward=0.7837933 (470.80 it/sec) -training >> step=5296400, episode=883 reward=0.7868444 (443.04 it/sec) -training >> step=5296500, episode=883 reward=0.7638502 (427.14 it/sec) -training >> step=5296600, episode=883 reward=0.7747666 (491.35 it/sec) -training >> step=5296700, episode=883 reward=0.796564 (495.62 it/sec) -training >> step=5296800, episode=883 reward=0.7770717 (508.61 it/sec) -training >> step=5296900, episode=883 reward=0.7645593 (474.62 it/sec) -training >> step=5297000, episode=883 reward=0.7980044 (469.77 it/sec) -training >> step=5297100, episode=883 reward=0.775215 (520.06 it/sec) -training >> step=5297200, episode=883 reward=0.7867357 (495.55 it/sec) -training >> step=5297300, episode=884 reward=0.7744951 (75.20 it/sec) -training >> step=5297400, episode=884 reward=0.7771486 (460.89 it/sec) -training >> step=5297500, episode=884 reward=0.7651595 (483.78 it/sec) -training >> step=5297600, episode=884 reward=0.7973531 (475.46 it/sec) -training >> step=5297700, episode=884 reward=0.7757134 (483.05 it/sec) -training >> step=5297800, episode=884 reward=0.7905781 (481.80 it/sec) -training >> step=5297900, episode=884 reward=0.7789865 (471.42 it/sec) -training >> step=5298000, episode=884 reward=0.782385 (490.17 it/sec) -training >> step=5298100, episode=884 reward=0.7636349 (482.15 it/sec) -training >> step=5298200, episode=884 reward=0.7576529 (476.01 it/sec) -training >> step=5298300, episode=884 reward=0.7836635 (427.26 it/sec) -training >> step=5298400, episode=884 reward=0.791379 (477.97 it/sec) -training >> step=5298500, episode=884 reward=0.7738089 (500.47 it/sec) -training >> step=5298600, episode=884 reward=0.7797393 (470.42 it/sec) -training >> step=5298700, episode=884 reward=0.7802502 (504.83 it/sec) -training >> step=5298800, episode=884 reward=0.7839702 (500.84 it/sec) -training >> step=5298900, episode=884 reward=0.7867157 (469.04 it/sec) -training >> step=5299000, episode=884 reward=0.7957464 (473.89 it/sec) -training >> step=5299100, episode=884 reward=0.7842186 (529.25 it/sec) -training >> step=5299200, episode=884 reward=0.7816287 (488.16 it/sec) -training >> step=5299300, episode=884 reward=0.7783229 (501.45 it/sec) -training >> step=5299400, episode=884 reward=0.7877113 (462.22 it/sec) -training >> step=5299500, episode=884 reward=0.8008014 (343.15 it/sec) -training >> step=5299600, episode=884 reward=0.7818126 (473.37 it/sec) -training >> step=5299700, episode=884 reward=0.7674798 (465.21 it/sec) -training >> step=5299800, episode=884 reward=0.7855962 (524.69 it/sec) -training >> step=5299900, episode=884 reward=0.7766739 (470.32 it/sec) -training >> step=5300000, episode=884 reward=0.7864078 (499.98 it/sec) -training >> step=5300100, episode=884 reward=0.7767599 (511.22 it/sec) -training >> step=5300200, episode=884 reward=0.8009455 (470.01 it/sec) -training >> step=5300300, episode=884 reward=0.7811894 (489.69 it/sec) -training >> step=5300400, episode=884 reward=0.7805604 (466.59 it/sec) -training >> step=5300500, episode=884 reward=0.7717295 (508.06 it/sec) -training >> step=5300600, episode=884 reward=0.7685128 (522.91 it/sec) -training >> step=5300700, episode=884 reward=0.7873852 (440.09 it/sec) -training >> step=5300800, episode=884 reward=0.7883997 (456.22 it/sec) -training >> step=5300900, episode=884 reward=0.7817114 (484.82 it/sec) -training >> step=5301000, episode=884 reward=0.7847976 (462.41 it/sec) -training >> step=5301100, episode=884 reward=0.7941325 (424.72 it/sec) -training >> step=5301200, episode=884 reward=0.7911929 (458.54 it/sec) -training >> step=5301300, episode=884 reward=0.7789841 (470.91 it/sec) -training >> step=5301400, episode=884 reward=0.7741444 (465.49 it/sec) -training >> step=5301500, episode=884 reward=0.7912207 (494.99 it/sec) -training >> step=5301600, episode=884 reward=0.7846462 (465.63 it/sec) -training >> step=5301700, episode=884 reward=0.7922602 (397.25 it/sec) -training >> step=5301800, episode=884 reward=0.7726505 (468.69 it/sec) -training >> step=5301900, episode=884 reward=0.7919182 (451.47 it/sec) -training >> step=5302000, episode=884 reward=0.7900813 (533.11 it/sec) -training >> step=5302100, episode=884 reward=0.787383 (505.49 it/sec) -training >> step=5302200, episode=884 reward=0.7921197 (498.13 it/sec) -training >> step=5302300, episode=884 reward=0.7696165 (507.16 it/sec) -training >> step=5302400, episode=884 reward=0.7758889 (549.33 it/sec) -training >> step=5302500, episode=884 reward=0.7725598 (520.76 it/sec) -training >> step=5302600, episode=884 reward=0.7641717 (495.58 it/sec) -training >> step=5302700, episode=884 reward=0.792684 (525.32 it/sec) -training >> step=5302800, episode=884 reward=0.7722219 (513.71 it/sec) -training >> step=5302900, episode=884 reward=0.7709442 (518.97 it/sec) -training >> step=5303000, episode=884 reward=0.7785205 (531.07 it/sec) -training >> step=5303100, episode=884 reward=0.7937292 (548.19 it/sec) -training >> step=5303200, episode=884 reward=0.7781957 (488.41 it/sec) -training >> step=5303300, episode=885 reward=0.7801022 (52.57 it/sec) -training >> step=5303400, episode=885 reward=0.7670314 (484.01 it/sec) -training >> step=5303500, episode=885 reward=0.767684 (499.18 it/sec) -training >> step=5303600, episode=885 reward=0.7841592 (518.17 it/sec) -training >> step=5303700, episode=885 reward=0.7748071 (542.17 it/sec) -training >> step=5303800, episode=885 reward=0.7825028 (502.23 it/sec) -training >> step=5303900, episode=885 reward=0.7755845 (432.17 it/sec) -training >> step=5304000, episode=885 reward=0.7934996 (568.04 it/sec) -training >> step=5304100, episode=885 reward=0.772411 (479.37 it/sec) -training >> step=5304200, episode=885 reward=0.7822205 (530.96 it/sec) -training >> step=5304300, episode=885 reward=0.7727953 (535.06 it/sec) -training >> step=5304400, episode=885 reward=0.7780588 (480.71 it/sec) -training >> step=5304500, episode=885 reward=0.7752369 (512.70 it/sec) -training >> step=5304600, episode=885 reward=0.790528 (541.80 it/sec) -training >> step=5304700, episode=885 reward=0.7936579 (552.96 it/sec) -training >> step=5304800, episode=885 reward=0.7888694 (487.99 it/sec) -training >> step=5304900, episode=885 reward=0.7550611 (468.79 it/sec) -training >> step=5305000, episode=885 reward=0.7746643 (508.43 it/sec) -training >> step=5305100, episode=885 reward=0.77712 (506.40 it/sec) -training >> step=5305200, episode=885 reward=0.7822 (504.34 it/sec) -training >> step=5305300, episode=885 reward=0.7827259 (530.82 it/sec) -training >> step=5305400, episode=885 reward=0.7687568 (532.84 it/sec) -training >> step=5305500, episode=885 reward=0.7914703 (486.65 it/sec) -training >> step=5305600, episode=885 reward=0.7989611 (534.75 it/sec) -training >> step=5305700, episode=885 reward=0.8065438 (527.88 it/sec) -training >> step=5305800, episode=885 reward=0.7711771 (552.87 it/sec) -training >> step=5305900, episode=885 reward=0.7958858 (352.03 it/sec) -training >> step=5306000, episode=885 reward=0.7875845 (545.92 it/sec) -training >> step=5306100, episode=885 reward=0.793282 (512.02 it/sec) -training >> step=5306200, episode=885 reward=0.7855659 (517.42 it/sec) -training >> step=5306300, episode=885 reward=0.7784721 (517.17 it/sec) -training >> step=5306400, episode=885 reward=0.7975159 (495.95 it/sec) -training >> step=5306500, episode=885 reward=0.767607 (467.01 it/sec) -training >> step=5306600, episode=885 reward=0.7708223 (490.44 it/sec) -training >> step=5306700, episode=885 reward=0.7805098 (524.18 it/sec) -training >> step=5306800, episode=885 reward=0.7908594 (530.99 it/sec) -training >> step=5306900, episode=885 reward=0.7871076 (480.39 it/sec) -training >> step=5307000, episode=885 reward=0.7842517 (499.36 it/sec) -training >> step=5307100, episode=885 reward=0.7974921 (484.92 it/sec) -training >> step=5307200, episode=885 reward=0.7897899 (466.89 it/sec) -training >> step=5307300, episode=885 reward=0.7771428 (484.56 it/sec) -training >> step=5307400, episode=885 reward=0.7918542 (501.02 it/sec) -training >> step=5307500, episode=885 reward=0.7728061 (458.71 it/sec) -training >> step=5307600, episode=885 reward=0.765433 (513.62 it/sec) -training >> step=5307700, episode=885 reward=0.8005161 (469.20 it/sec) -training >> step=5307800, episode=885 reward=0.7602426 (485.06 it/sec) -training >> step=5307900, episode=885 reward=0.7874517 (478.76 it/sec) -training >> step=5308000, episode=885 reward=0.790175 (460.64 it/sec) -training >> step=5308100, episode=885 reward=0.7690606 (457.92 it/sec) -training >> step=5308200, episode=885 reward=0.7508707 (476.00 it/sec) -training >> step=5308300, episode=885 reward=0.7673376 (462.05 it/sec) -training >> step=5308400, episode=885 reward=0.7703626 (479.56 it/sec) -training >> step=5308500, episode=885 reward=0.7621289 (490.68 it/sec) -training >> step=5308600, episode=885 reward=0.7602497 (492.77 it/sec) -training >> step=5308700, episode=885 reward=0.7651457 (464.04 it/sec) -training >> step=5308800, episode=885 reward=0.7899824 (539.66 it/sec) -training >> step=5308900, episode=885 reward=0.7847394 (498.33 it/sec) -training >> step=5309000, episode=885 reward=0.7858779 (467.70 it/sec) -training >> step=5309100, episode=885 reward=0.7751476 (474.61 it/sec) -training >> step=5309200, episode=885 reward=0.7842706 (516.59 it/sec) -training >> step=5309300, episode=886 reward=0.77654 (54.80 it/sec) -training >> step=5309400, episode=886 reward=0.7589275 (436.37 it/sec) -training >> step=5309500, episode=886 reward=0.7687639 (499.59 it/sec) -training >> step=5309600, episode=886 reward=0.7607329 (508.13 it/sec) -training >> step=5309700, episode=886 reward=0.7744876 (462.72 it/sec) -training >> step=5309800, episode=886 reward=0.7620541 (491.43 it/sec) -training >> step=5309900, episode=886 reward=0.7778514 (474.07 it/sec) -training >> step=5310000, episode=886 reward=0.7827728 (489.74 it/sec) -training >> step=5310100, episode=886 reward=0.7870461 (481.20 it/sec) -training >> step=5310200, episode=886 reward=0.7688604 (472.53 it/sec) -training >> step=5310300, episode=886 reward=0.7730538 (485.51 it/sec) -training >> step=5310400, episode=886 reward=0.7927758 (452.31 it/sec) -training >> step=5310500, episode=886 reward=0.7701406 (516.59 it/sec) -training >> step=5310600, episode=886 reward=0.7799479 (437.14 it/sec) -training >> step=5310700, episode=886 reward=0.800315 (481.85 it/sec) -training >> step=5310800, episode=886 reward=0.7739329 (491.57 it/sec) -training >> step=5310900, episode=886 reward=0.7766135 (506.53 it/sec) -training >> step=5311000, episode=886 reward=0.7795352 (489.32 it/sec) -training >> step=5311100, episode=886 reward=0.7730507 (509.43 it/sec) -training >> step=5311200, episode=886 reward=0.7888724 (473.62 it/sec) -training >> step=5311300, episode=886 reward=0.8014799 (525.04 it/sec) -training >> step=5311400, episode=886 reward=0.7670599 (472.64 it/sec) -training >> step=5311500, episode=886 reward=0.7552593 (468.43 it/sec) -training >> step=5311600, episode=886 reward=0.7708222 (516.77 it/sec) -training >> step=5311700, episode=886 reward=0.7873569 (489.60 it/sec) -training >> step=5311800, episode=886 reward=0.7742646 (494.39 it/sec) -training >> step=5311900, episode=886 reward=0.7756194 (506.82 it/sec) -training >> step=5312000, episode=886 reward=0.7872756 (530.06 it/sec) -training >> step=5312100, episode=886 reward=0.7804007 (344.71 it/sec) -training >> step=5312200, episode=886 reward=0.7770895 (476.73 it/sec) -training >> step=5312300, episode=886 reward=0.8026286 (466.22 it/sec) -training >> step=5312400, episode=886 reward=0.7598363 (536.47 it/sec) -training >> step=5312500, episode=886 reward=0.7872871 (489.97 it/sec) -training >> step=5312600, episode=886 reward=0.7762403 (467.98 it/sec) -training >> step=5312700, episode=886 reward=0.7729041 (501.57 it/sec) -training >> step=5312800, episode=886 reward=0.7769749 (484.72 it/sec) -training >> step=5312900, episode=886 reward=0.7904491 (504.16 it/sec) -training >> step=5313000, episode=886 reward=0.7980174 (500.57 it/sec) -training >> step=5313100, episode=886 reward=0.7864683 (533.85 it/sec) -training >> step=5313200, episode=886 reward=0.7855212 (474.70 it/sec) -training >> step=5313300, episode=886 reward=0.7819113 (474.32 it/sec) -training >> step=5313400, episode=886 reward=0.7911909 (483.40 it/sec) -training >> step=5313500, episode=886 reward=0.786393 (514.10 it/sec) -training >> step=5313600, episode=886 reward=0.7835705 (507.47 it/sec) -training >> step=5313700, episode=886 reward=0.7886106 (482.14 it/sec) -training >> step=5313800, episode=886 reward=0.749662 (500.11 it/sec) -training >> step=5313900, episode=886 reward=0.7682401 (478.49 it/sec) -training >> step=5314000, episode=886 reward=0.7687327 (490.58 it/sec) -training >> step=5314100, episode=886 reward=0.7867702 (500.70 it/sec) -training >> step=5314200, episode=886 reward=0.7650952 (454.83 it/sec) -training >> step=5314300, episode=886 reward=0.768487 (457.58 it/sec) -training >> step=5314400, episode=886 reward=0.7887194 (480.40 it/sec) -training >> step=5314500, episode=886 reward=0.7834185 (515.67 it/sec) -training >> step=5314600, episode=886 reward=0.768696 (531.08 it/sec) -training >> step=5314700, episode=886 reward=0.7829983 (497.59 it/sec) -training >> step=5314800, episode=886 reward=0.763777 (509.37 it/sec) -training >> step=5314900, episode=886 reward=0.782244 (511.47 it/sec) -training >> step=5315000, episode=886 reward=0.7944749 (516.50 it/sec) -training >> step=5315100, episode=886 reward=0.7894052 (523.24 it/sec) -training >> step=5315200, episode=886 reward=0.7881476 (510.05 it/sec) -training >> step=5315300, episode=887 reward=0.7912202 (95.00 it/sec) -training >> step=5315400, episode=887 reward=0.7715818 (434.91 it/sec) -training >> step=5315500, episode=887 reward=0.7900817 (481.05 it/sec) -training >> step=5315600, episode=887 reward=0.7691647 (506.77 it/sec) -training >> step=5315700, episode=887 reward=0.7859409 (468.13 it/sec) -training >> step=5315800, episode=887 reward=0.7707289 (481.56 it/sec) -training >> step=5315900, episode=887 reward=0.7857296 (509.42 it/sec) -training >> step=5316000, episode=887 reward=0.8036841 (471.79 it/sec) -training >> step=5316100, episode=887 reward=0.7860013 (492.49 it/sec) -training >> step=5316200, episode=887 reward=0.755451 (418.69 it/sec) -training >> step=5316300, episode=887 reward=0.8074479 (466.52 it/sec) -training >> step=5316400, episode=887 reward=0.7771274 (445.29 it/sec) -training >> step=5316500, episode=887 reward=0.7864207 (449.77 it/sec) -training >> step=5316600, episode=887 reward=0.7757373 (385.95 it/sec) -training >> step=5316700, episode=887 reward=0.7691029 (436.21 it/sec) -training >> step=5316800, episode=887 reward=0.773156 (499.55 it/sec) -training >> step=5316900, episode=887 reward=0.7715021 (511.77 it/sec) -training >> step=5317000, episode=887 reward=0.7877681 (521.28 it/sec) -training >> step=5317100, episode=887 reward=0.7779046 (506.03 it/sec) -training >> step=5317200, episode=887 reward=0.7851622 (538.04 it/sec) -training >> step=5317300, episode=887 reward=0.7906351 (501.00 it/sec) -training >> step=5317400, episode=887 reward=0.776898 (510.85 it/sec) -training >> step=5317500, episode=887 reward=0.7748195 (565.22 it/sec) -training >> step=5317600, episode=887 reward=0.7878534 (511.16 it/sec) -training >> step=5317700, episode=887 reward=0.7777776 (503.41 it/sec) -training >> step=5317800, episode=887 reward=0.790082 (463.14 it/sec) -training >> step=5317900, episode=887 reward=0.7818989 (423.96 it/sec) -training >> step=5318000, episode=887 reward=0.7849032 (465.27 it/sec) -training >> step=5318100, episode=887 reward=0.7892224 (463.33 it/sec) -training >> step=5318200, episode=887 reward=0.7825633 (398.59 it/sec) -training >> step=5318300, episode=887 reward=0.7816612 (288.88 it/sec) -training >> step=5318400, episode=887 reward=0.7714574 (453.47 it/sec) -training >> step=5318500, episode=887 reward=0.7837359 (453.59 it/sec) -training >> step=5318600, episode=887 reward=0.8121178 (451.75 it/sec) -training >> step=5318700, episode=887 reward=0.7791615 (405.26 it/sec) -training >> step=5318800, episode=887 reward=0.7622184 (464.72 it/sec) -training >> step=5318900, episode=887 reward=0.7851871 (468.71 it/sec) -training >> step=5319000, episode=887 reward=0.8075939 (446.39 it/sec) -training >> step=5319100, episode=887 reward=0.7826746 (440.55 it/sec) -training >> step=5319200, episode=887 reward=0.7942061 (490.12 it/sec) -training >> step=5319300, episode=887 reward=0.7818902 (430.83 it/sec) -training >> step=5319400, episode=887 reward=0.7538021 (454.72 it/sec) -training >> step=5319500, episode=887 reward=0.7837794 (440.84 it/sec) -training >> step=5319600, episode=887 reward=0.7739852 (456.03 it/sec) -training >> step=5319700, episode=887 reward=0.7903097 (419.42 it/sec) -training >> step=5319800, episode=887 reward=0.7932656 (445.21 it/sec) -training >> step=5319900, episode=887 reward=0.7949799 (497.03 it/sec) -training >> step=5320000, episode=887 reward=0.8000121 (431.26 it/sec) -training >> step=5320100, episode=887 reward=0.7765725 (463.81 it/sec) -training >> step=5320200, episode=887 reward=0.7660437 (502.10 it/sec) -training >> step=5320300, episode=887 reward=0.7915323 (513.25 it/sec) -training >> step=5320400, episode=887 reward=0.7782323 (506.18 it/sec) -training >> step=5320500, episode=887 reward=0.7580268 (503.45 it/sec) -training >> step=5320600, episode=887 reward=0.7844163 (469.83 it/sec) -training >> step=5320700, episode=887 reward=0.7897539 (484.15 it/sec) -training >> step=5320800, episode=887 reward=0.7762824 (512.45 it/sec) -training >> step=5320900, episode=887 reward=0.7766591 (482.61 it/sec) -training >> step=5321000, episode=887 reward=0.7934391 (433.83 it/sec) -training >> step=5321100, episode=887 reward=0.7816679 (440.70 it/sec) -training >> step=5321200, episode=887 reward=0.783618 (448.13 it/sec) -training >> step=5321300, episode=888 reward=0.7929942 (98.00 it/sec) -training >> step=5321400, episode=888 reward=0.769697 (334.02 it/sec) -training >> step=5321500, episode=888 reward=0.7708116 (463.09 it/sec) -training >> step=5321600, episode=888 reward=0.7704847 (451.91 it/sec) -training >> step=5321700, episode=888 reward=0.793546 (415.64 it/sec) -training >> step=5321800, episode=888 reward=0.7595409 (405.95 it/sec) -training >> step=5321900, episode=888 reward=0.8070244 (456.38 it/sec) -training >> step=5322000, episode=888 reward=0.7784107 (458.63 it/sec) -training >> step=5322100, episode=888 reward=0.7905754 (442.23 it/sec) -training >> step=5322200, episode=888 reward=0.7710243 (483.49 it/sec) -training >> step=5322300, episode=888 reward=0.7709445 (414.47 it/sec) -training >> step=5322400, episode=888 reward=0.7854886 (427.73 it/sec) -training >> step=5322500, episode=888 reward=0.7832171 (441.03 it/sec) -training >> step=5322600, episode=888 reward=0.7794975 (458.54 it/sec) -training >> step=5322700, episode=888 reward=0.7862216 (472.20 it/sec) -training >> step=5322800, episode=888 reward=0.8030804 (458.46 it/sec) -training >> step=5322900, episode=888 reward=0.7777862 (442.94 it/sec) -training >> step=5323000, episode=888 reward=0.7657259 (467.56 it/sec) -training >> step=5323100, episode=888 reward=0.7689229 (463.94 it/sec) -training >> step=5323200, episode=888 reward=0.7879845 (459.02 it/sec) -training >> step=5323300, episode=888 reward=0.7608657 (494.71 it/sec) -training >> step=5323400, episode=888 reward=0.7756761 (489.55 it/sec) -training >> step=5323500, episode=888 reward=0.76569 (473.56 it/sec) -training >> step=5323600, episode=888 reward=0.7937838 (465.85 it/sec) -training >> step=5323700, episode=888 reward=0.7910144 (473.10 it/sec) -training >> step=5323800, episode=888 reward=0.7844442 (474.08 it/sec) -training >> step=5323900, episode=888 reward=0.7966864 (465.96 it/sec) -training >> step=5324000, episode=888 reward=0.7813841 (525.13 it/sec) -training >> step=5324100, episode=888 reward=0.800595 (466.76 it/sec) -training >> step=5324200, episode=888 reward=0.77529 (446.50 it/sec) -training >> step=5324300, episode=888 reward=0.8014751 (500.59 it/sec) -training >> step=5324400, episode=888 reward=0.7714792 (483.25 it/sec) -training >> step=5324500, episode=888 reward=0.801891 (355.09 it/sec) -training >> step=5324600, episode=888 reward=0.7763838 (472.86 it/sec) -training >> step=5324700, episode=888 reward=0.7900817 (482.63 it/sec) -training >> step=5324800, episode=888 reward=0.7790921 (444.25 it/sec) -training >> step=5324900, episode=888 reward=0.7804484 (473.24 it/sec) -training >> step=5325000, episode=888 reward=0.7724875 (456.23 it/sec) -training >> step=5325100, episode=888 reward=0.8001824 (515.76 it/sec) -training >> step=5325200, episode=888 reward=0.7659559 (395.18 it/sec) -training >> step=5325300, episode=888 reward=0.7950341 (436.51 it/sec) -training >> step=5325400, episode=888 reward=0.77086 (470.26 it/sec) -training >> step=5325500, episode=888 reward=0.8003651 (431.47 it/sec) -training >> step=5325600, episode=888 reward=0.7743356 (474.51 it/sec) -training >> step=5325700, episode=888 reward=0.792309 (467.89 it/sec) -training >> step=5325800, episode=888 reward=0.7924913 (441.21 it/sec) -training >> step=5325900, episode=888 reward=0.7847188 (442.30 it/sec) -training >> step=5326000, episode=888 reward=0.7894056 (464.86 it/sec) -training >> step=5326100, episode=888 reward=0.7988836 (507.49 it/sec) -training >> step=5326200, episode=888 reward=0.7815589 (473.46 it/sec) -training >> step=5326300, episode=888 reward=0.7834294 (455.40 it/sec) -training >> step=5326400, episode=888 reward=0.7757187 (432.28 it/sec) -training >> step=5326500, episode=888 reward=0.7782418 (478.56 it/sec) -training >> step=5326600, episode=888 reward=0.7896241 (473.55 it/sec) -training >> step=5326700, episode=888 reward=0.7783947 (448.18 it/sec) -training >> step=5326800, episode=888 reward=0.7654998 (472.33 it/sec) -training >> step=5326900, episode=888 reward=0.7644223 (451.35 it/sec) -training >> step=5327000, episode=888 reward=0.7871023 (463.39 it/sec) -training >> step=5327100, episode=888 reward=0.7806873 (431.10 it/sec) -training >> step=5327200, episode=888 reward=0.7745308 (482.44 it/sec) -training >> step=5327300, episode=889 reward=0.7757887 (94.76 it/sec) -training >> step=5327400, episode=889 reward=0.7857792 (505.32 it/sec) -training >> step=5327500, episode=889 reward=0.7447963 (472.15 it/sec) -training >> step=5327600, episode=889 reward=0.7530353 (493.03 it/sec) -training >> step=5327700, episode=889 reward=0.7736232 (428.10 it/sec) -training >> step=5327800, episode=889 reward=0.7877209 (456.69 it/sec) -training >> step=5327900, episode=889 reward=0.7733271 (447.58 it/sec) -training >> step=5328000, episode=889 reward=0.7760698 (467.43 it/sec) -training >> step=5328100, episode=889 reward=0.7804561 (483.62 it/sec) -training >> step=5328200, episode=889 reward=0.7962819 (433.59 it/sec) -training >> step=5328300, episode=889 reward=0.7875881 (438.50 it/sec) -training >> step=5328400, episode=889 reward=0.7777545 (474.12 it/sec) -training >> step=5328500, episode=889 reward=0.7802896 (478.14 it/sec) -training >> step=5328600, episode=889 reward=0.7981775 (452.46 it/sec) -training >> step=5328700, episode=889 reward=0.7703274 (461.49 it/sec) -training >> step=5328800, episode=889 reward=0.7867643 (490.37 it/sec) -training >> step=5328900, episode=889 reward=0.7784353 (447.12 it/sec) -training >> step=5329000, episode=889 reward=0.7846524 (462.31 it/sec) -training >> step=5329100, episode=889 reward=0.7746112 (513.55 it/sec) -training >> step=5329200, episode=889 reward=0.7821599 (465.49 it/sec) -training >> step=5329300, episode=889 reward=0.7839331 (492.03 it/sec) -training >> step=5329400, episode=889 reward=0.7915547 (464.13 it/sec) -training >> step=5329500, episode=889 reward=0.7953917 (478.39 it/sec) -training >> step=5329600, episode=889 reward=0.7756755 (493.52 it/sec) -training >> step=5329700, episode=889 reward=0.7983211 (429.75 it/sec) -training >> step=5329800, episode=889 reward=0.7836885 (469.69 it/sec) -training >> step=5329900, episode=889 reward=0.7864041 (468.33 it/sec) -training >> step=5330000, episode=889 reward=0.7741137 (480.18 it/sec) -training >> step=5330100, episode=889 reward=0.7967878 (470.72 it/sec) -training >> step=5330200, episode=889 reward=0.8134371 (438.41 it/sec) -training >> step=5330300, episode=889 reward=0.7870733 (464.35 it/sec) -training >> step=5330400, episode=889 reward=0.7883953 (466.06 it/sec) -training >> step=5330500, episode=889 reward=0.7780651 (366.55 it/sec) -training >> step=5330600, episode=889 reward=0.7972842 (444.86 it/sec) -training >> step=5330700, episode=889 reward=0.7820619 (450.23 it/sec) -training >> step=5330800, episode=889 reward=0.7958772 (467.76 it/sec) -training >> step=5330900, episode=889 reward=0.7795783 (481.07 it/sec) -training >> step=5331000, episode=889 reward=0.7520933 (441.76 it/sec) -training >> step=5331100, episode=889 reward=0.7797278 (464.33 it/sec) -training >> step=5331200, episode=889 reward=0.7948467 (486.15 it/sec) -training >> step=5331300, episode=889 reward=0.7838877 (432.02 it/sec) -training >> step=5331400, episode=889 reward=0.7900197 (409.68 it/sec) -training >> step=5331500, episode=889 reward=0.7918912 (459.46 it/sec) -training >> step=5331600, episode=889 reward=0.7850279 (458.17 it/sec) -training >> step=5331700, episode=889 reward=0.7907511 (451.50 it/sec) -training >> step=5331800, episode=889 reward=0.7862331 (501.94 it/sec) -training >> step=5331900, episode=889 reward=0.7903976 (501.33 it/sec) -training >> step=5332000, episode=889 reward=0.7942263 (463.43 it/sec) -training >> step=5332100, episode=889 reward=0.7937789 (458.88 it/sec) -training >> step=5332200, episode=889 reward=0.7587922 (484.20 it/sec) -training >> step=5332300, episode=889 reward=0.79002 (459.13 it/sec) -training >> step=5332400, episode=889 reward=0.7935597 (495.32 it/sec) -training >> step=5332500, episode=889 reward=0.7933729 (473.14 it/sec) -training >> step=5332600, episode=889 reward=0.7670072 (474.89 it/sec) -training >> step=5332700, episode=889 reward=0.7900203 (431.91 it/sec) -training >> step=5332800, episode=889 reward=0.779273 (479.04 it/sec) -training >> step=5332900, episode=889 reward=0.7615101 (479.00 it/sec) -training >> step=5333000, episode=889 reward=0.7737904 (457.85 it/sec) -training >> step=5333100, episode=889 reward=0.7549985 (476.23 it/sec) -training >> step=5333200, episode=889 reward=0.7757877 (461.34 it/sec) -training >> step=5333300, episode=890 reward=0.7768608 (97.23 it/sec) -training >> step=5333400, episode=890 reward=0.7672144 (480.36 it/sec) -training >> step=5333500, episode=890 reward=0.7849833 (521.29 it/sec) -training >> step=5333600, episode=890 reward=0.7607276 (488.12 it/sec) -training >> step=5333700, episode=890 reward=0.7768732 (510.18 it/sec) -training >> step=5333800, episode=890 reward=0.7850707 (499.46 it/sec) -training >> step=5333900, episode=890 reward=0.7848147 (510.34 it/sec) -training >> step=5334000, episode=890 reward=0.7847281 (491.14 it/sec) -training >> step=5334100, episode=890 reward=0.7770736 (508.83 it/sec) -training >> step=5334200, episode=890 reward=0.7773598 (512.66 it/sec) -training >> step=5334300, episode=890 reward=0.7816296 (531.15 it/sec) -training >> step=5334400, episode=890 reward=0.7971184 (481.54 it/sec) -training >> step=5334500, episode=890 reward=0.7766897 (499.35 it/sec) -training >> step=5334600, episode=890 reward=0.7804599 (489.78 it/sec) -training >> step=5334700, episode=890 reward=0.7889885 (447.43 it/sec) -training >> step=5334800, episode=890 reward=0.7920845 (445.17 it/sec) -training >> step=5334900, episode=890 reward=0.7935508 (505.89 it/sec) -training >> step=5335000, episode=890 reward=0.7799296 (485.50 it/sec) -training >> step=5335100, episode=890 reward=0.7754317 (481.99 it/sec) -training >> step=5335200, episode=890 reward=0.8094842 (489.94 it/sec) -training >> step=5335300, episode=890 reward=0.7859278 (505.61 it/sec) -training >> step=5335400, episode=890 reward=0.7777284 (529.15 it/sec) -training >> step=5335500, episode=890 reward=0.7858816 (514.94 it/sec) -training >> step=5335600, episode=890 reward=0.7952559 (473.32 it/sec) -training >> step=5335700, episode=890 reward=0.7816252 (548.78 it/sec) -training >> step=5335800, episode=890 reward=0.7707034 (438.11 it/sec) -training >> step=5335900, episode=890 reward=0.7660033 (496.14 it/sec) -training >> step=5336000, episode=890 reward=0.7951834 (498.51 it/sec) -training >> step=5336100, episode=890 reward=0.7996973 (514.20 it/sec) -training >> step=5336200, episode=890 reward=0.776212 (492.14 it/sec) -training >> step=5336300, episode=890 reward=0.7766866 (436.48 it/sec) -training >> step=5336400, episode=890 reward=0.7869882 (540.67 it/sec) -training >> step=5336500, episode=890 reward=0.7889833 (404.16 it/sec) -training >> step=5336600, episode=890 reward=0.7694646 (494.24 it/sec) -training >> step=5336700, episode=890 reward=0.7752389 (483.75 it/sec) -training >> step=5336800, episode=890 reward=0.782636 (516.04 it/sec) -training >> step=5336900, episode=890 reward=0.8130493 (502.06 it/sec) -training >> step=5337000, episode=890 reward=0.7975732 (528.33 it/sec) -training >> step=5337100, episode=890 reward=0.7973003 (495.48 it/sec) -training >> step=5337200, episode=890 reward=0.7780256 (524.79 it/sec) -training >> step=5337300, episode=890 reward=0.7699457 (495.78 it/sec) -training >> step=5337400, episode=890 reward=0.7917635 (458.47 it/sec) -training >> step=5337500, episode=890 reward=0.7913532 (533.64 it/sec) -training >> step=5337600, episode=890 reward=0.7895897 (497.28 it/sec) -training >> step=5337700, episode=890 reward=0.7762264 (497.16 it/sec) -training >> step=5337800, episode=890 reward=0.7840868 (486.48 it/sec) -training >> step=5337900, episode=890 reward=0.7742648 (500.70 it/sec) -training >> step=5338000, episode=890 reward=0.7802503 (509.24 it/sec) -training >> step=5338100, episode=890 reward=0.765156 (464.76 it/sec) -training >> step=5338200, episode=890 reward=0.7933041 (520.04 it/sec) -training >> step=5338300, episode=890 reward=0.770118 (502.96 it/sec) -training >> step=5338400, episode=890 reward=0.7572987 (479.42 it/sec) -training >> step=5338500, episode=890 reward=0.7968535 (517.53 it/sec) -training >> step=5338600, episode=890 reward=0.793379 (482.11 it/sec) -training >> step=5338700, episode=890 reward=0.7892583 (475.39 it/sec) -training >> step=5338800, episode=890 reward=0.766695 (505.43 it/sec) -training >> step=5338900, episode=890 reward=0.7809863 (497.28 it/sec) -training >> step=5339000, episode=890 reward=0.7691442 (520.75 it/sec) -training >> step=5339100, episode=890 reward=0.7780142 (469.90 it/sec) -training >> step=5339200, episode=890 reward=0.7690468 (442.48 it/sec) -training >> step=5339300, episode=891 reward=0.7737364 (113.04 it/sec) -training >> step=5339400, episode=891 reward=0.7813163 (461.46 it/sec) -training >> step=5339500, episode=891 reward=0.7697984 (425.73 it/sec) -training >> step=5339600, episode=891 reward=0.7735811 (472.95 it/sec) -training >> step=5339700, episode=891 reward=0.7719672 (492.82 it/sec) -training >> step=5339800, episode=891 reward=0.8015517 (451.41 it/sec) -training >> step=5339900, episode=891 reward=0.7974562 (474.32 it/sec) -training >> step=5340000, episode=891 reward=0.7796572 (429.56 it/sec) -training >> step=5340100, episode=891 reward=0.7816696 (494.81 it/sec) -training >> step=5340200, episode=891 reward=0.7815381 (498.66 it/sec) -training >> step=5340300, episode=891 reward=0.7937019 (401.17 it/sec) -training >> step=5340400, episode=891 reward=0.7918171 (464.01 it/sec) -training >> step=5340500, episode=891 reward=0.7957222 (453.75 it/sec) -training >> step=5340600, episode=891 reward=0.7889411 (484.68 it/sec) -training >> step=5340700, episode=891 reward=0.7797039 (460.43 it/sec) -training >> step=5340800, episode=891 reward=0.7790655 (511.64 it/sec) -training >> step=5340900, episode=891 reward=0.7816374 (463.66 it/sec) -training >> step=5341000, episode=891 reward=0.7962657 (472.21 it/sec) -training >> step=5341100, episode=891 reward=0.7785115 (454.17 it/sec) -training >> step=5341200, episode=891 reward=0.7789927 (449.90 it/sec) -training >> step=5341300, episode=891 reward=0.7910293 (494.69 it/sec) -training >> step=5341400, episode=891 reward=0.7764605 (438.67 it/sec) -training >> step=5341500, episode=891 reward=0.7829908 (498.74 it/sec) -training >> step=5341600, episode=891 reward=0.7817503 (462.27 it/sec) -training >> step=5341700, episode=891 reward=0.803746 (438.91 it/sec) -training >> step=5341800, episode=891 reward=0.7940342 (462.90 it/sec) -training >> step=5341900, episode=891 reward=0.7791929 (488.98 it/sec) -training >> step=5342000, episode=891 reward=0.7787741 (463.45 it/sec) -training >> step=5342100, episode=891 reward=0.7916536 (487.10 it/sec) -training >> step=5342200, episode=891 reward=0.7796404 (461.31 it/sec) -training >> step=5342300, episode=891 reward=0.7781559 (417.16 it/sec) -training >> step=5342400, episode=891 reward=0.7770412 (471.46 it/sec) -training >> step=5342500, episode=891 reward=0.7688541 (494.11 it/sec) -training >> step=5342600, episode=891 reward=0.7906212 (463.16 it/sec) -training >> step=5342700, episode=891 reward=0.793254 (439.34 it/sec) -training >> step=5342800, episode=891 reward=0.7734175 (335.47 it/sec) -training >> step=5342900, episode=891 reward=0.7958012 (472.03 it/sec) -training >> step=5343000, episode=891 reward=0.7769092 (475.55 it/sec) -training >> step=5343100, episode=891 reward=0.7797073 (473.08 it/sec) -training >> step=5343200, episode=891 reward=0.7773607 (460.29 it/sec) -training >> step=5343300, episode=891 reward=0.7920758 (477.39 it/sec) -training >> step=5343400, episode=891 reward=0.7813401 (474.71 it/sec) -training >> step=5343500, episode=891 reward=0.7871675 (483.69 it/sec) -training >> step=5343600, episode=891 reward=0.7891402 (476.91 it/sec) -training >> step=5343700, episode=891 reward=0.767268 (491.79 it/sec) -training >> step=5343800, episode=891 reward=0.7917576 (445.72 it/sec) -training >> step=5343900, episode=891 reward=0.7820594 (445.05 it/sec) -training >> step=5344000, episode=891 reward=0.7971604 (490.54 it/sec) -training >> step=5344100, episode=891 reward=0.7846505 (489.32 it/sec) -training >> step=5344200, episode=891 reward=0.7801704 (481.98 it/sec) -training >> step=5344300, episode=891 reward=0.7911234 (478.84 it/sec) -training >> step=5344400, episode=891 reward=0.7705976 (507.42 it/sec) -training >> step=5344500, episode=891 reward=0.7706299 (451.21 it/sec) -training >> step=5344600, episode=891 reward=0.7829595 (454.77 it/sec) -training >> step=5344700, episode=891 reward=0.7857924 (484.28 it/sec) -training >> step=5344800, episode=891 reward=0.7716951 (479.96 it/sec) -training >> step=5344900, episode=891 reward=0.7775555 (455.28 it/sec) -training >> step=5345000, episode=891 reward=0.7635371 (421.84 it/sec) -training >> step=5345100, episode=891 reward=0.8026838 (479.70 it/sec) -training >> step=5345200, episode=891 reward=0.7734893 (442.15 it/sec) -training >> step=5345300, episode=892 reward=0.7857651 (86.75 it/sec) -training >> step=5345400, episode=892 reward=0.7698886 (488.12 it/sec) -training >> step=5345500, episode=892 reward=0.7664094 (468.24 it/sec) -training >> step=5345600, episode=892 reward=0.7688377 (449.14 it/sec) -training >> step=5345700, episode=892 reward=0.7804648 (434.82 it/sec) -training >> step=5345800, episode=892 reward=0.7760545 (443.43 it/sec) -training >> step=5345900, episode=892 reward=0.7734599 (527.41 it/sec) -training >> step=5346000, episode=892 reward=0.7948329 (458.82 it/sec) -training >> step=5346100, episode=892 reward=0.7920706 (490.45 it/sec) -training >> step=5346200, episode=892 reward=0.7822864 (464.63 it/sec) -training >> step=5346300, episode=892 reward=0.7889394 (438.37 it/sec) -training >> step=5346400, episode=892 reward=0.7818775 (455.65 it/sec) -training >> step=5346500, episode=892 reward=0.7699122 (476.54 it/sec) -training >> step=5346600, episode=892 reward=0.7670252 (427.29 it/sec) -training >> step=5346700, episode=892 reward=0.7942442 (467.46 it/sec) -training >> step=5346800, episode=892 reward=0.7844384 (421.51 it/sec) -training >> step=5346900, episode=892 reward=0.7947785 (442.66 it/sec) -training >> step=5347000, episode=892 reward=0.7812356 (436.63 it/sec) -training >> step=5347100, episode=892 reward=0.7945877 (391.60 it/sec) -training >> step=5347200, episode=892 reward=0.7861722 (432.44 it/sec) -training >> step=5347300, episode=892 reward=0.7964422 (451.51 it/sec) -training >> step=5347400, episode=892 reward=0.7696858 (446.76 it/sec) -training >> step=5347500, episode=892 reward=0.7769675 (461.74 it/sec) -training >> step=5347600, episode=892 reward=0.7767981 (434.16 it/sec) -training >> step=5347700, episode=892 reward=0.7780829 (489.93 it/sec) -training >> step=5347800, episode=892 reward=0.7813663 (470.91 it/sec) -training >> step=5347900, episode=892 reward=0.7834569 (485.47 it/sec) -training >> step=5348000, episode=892 reward=0.7844733 (471.16 it/sec) -training >> step=5348100, episode=892 reward=0.7897112 (490.40 it/sec) -training >> step=5348200, episode=892 reward=0.7769001 (477.12 it/sec) -training >> step=5348300, episode=892 reward=0.7678982 (477.43 it/sec) -training >> step=5348400, episode=892 reward=0.8045489 (478.97 it/sec) -training >> step=5348500, episode=892 reward=0.7941486 (486.73 it/sec) -training >> step=5348600, episode=892 reward=0.7799593 (456.45 it/sec) -training >> step=5348700, episode=892 reward=0.772737 (478.55 it/sec) -training >> step=5348800, episode=892 reward=0.7513047 (467.38 it/sec) -training >> step=5348900, episode=892 reward=0.7826154 (343.13 it/sec) -training >> step=5349000, episode=892 reward=0.7835469 (461.20 it/sec) -training >> step=5349100, episode=892 reward=0.7913457 (485.02 it/sec) -training >> step=5349200, episode=892 reward=0.7838737 (427.74 it/sec) -training >> step=5349300, episode=892 reward=0.7655068 (470.07 it/sec) -training >> step=5349400, episode=892 reward=0.7756178 (462.92 it/sec) -training >> step=5349500, episode=892 reward=0.7981158 (485.09 it/sec) -training >> step=5349600, episode=892 reward=0.7797703 (490.07 it/sec) -training >> step=5349700, episode=892 reward=0.7861456 (466.99 it/sec) -training >> step=5349800, episode=892 reward=0.7690848 (496.60 it/sec) -training >> step=5349900, episode=892 reward=0.7819092 (501.96 it/sec) -training >> step=5350000, episode=892 reward=0.7887853 (485.11 it/sec) -training >> step=5350100, episode=892 reward=0.7835034 (507.20 it/sec) -training >> step=5350200, episode=892 reward=0.7910068 (493.82 it/sec) -training >> step=5350300, episode=892 reward=0.7709937 (521.99 it/sec) -training >> step=5350400, episode=892 reward=0.7611292 (469.03 it/sec) -training >> step=5350500, episode=892 reward=0.7806256 (472.99 it/sec) -training >> step=5350600, episode=892 reward=0.7851112 (527.00 it/sec) -training >> step=5350700, episode=892 reward=0.8038753 (487.08 it/sec) -training >> step=5350800, episode=892 reward=0.7783511 (484.99 it/sec) -training >> step=5350900, episode=892 reward=0.7781251 (386.48 it/sec) -training >> step=5351000, episode=892 reward=0.7812479 (501.83 it/sec) -training >> step=5351100, episode=892 reward=0.7620121 (510.18 it/sec) -training >> step=5351200, episode=892 reward=0.7739946 (427.40 it/sec) -training >> step=5351300, episode=893 reward=0.7642375 (93.89 it/sec) -training >> step=5351400, episode=893 reward=0.7853425 (488.03 it/sec) -training >> step=5351500, episode=893 reward=0.7969912 (483.54 it/sec) -training >> step=5351600, episode=893 reward=0.7991886 (493.30 it/sec) -training >> step=5351700, episode=893 reward=0.7682378 (490.13 it/sec) -training >> step=5351800, episode=893 reward=0.7760278 (522.36 it/sec) -training >> step=5351900, episode=893 reward=0.7862748 (492.45 it/sec) -training >> step=5352000, episode=893 reward=0.7804676 (511.82 it/sec) -training >> step=5352100, episode=893 reward=0.7991181 (478.24 it/sec) -training >> step=5352200, episode=893 reward=0.7775162 (518.34 it/sec) -training >> step=5352300, episode=893 reward=0.7844177 (507.78 it/sec) -training >> step=5352400, episode=893 reward=0.7833146 (525.26 it/sec) -training >> step=5352500, episode=893 reward=0.7789056 (456.64 it/sec) -training >> step=5352600, episode=893 reward=0.775833 (459.03 it/sec) -training >> step=5352700, episode=893 reward=0.7752274 (499.53 it/sec) -training >> step=5352800, episode=893 reward=0.7780893 (433.28 it/sec) -training >> step=5352900, episode=893 reward=0.7860821 (454.33 it/sec) -training >> step=5353000, episode=893 reward=0.7890184 (491.96 it/sec) -training >> step=5353100, episode=893 reward=0.7741479 (442.60 it/sec) -training >> step=5353200, episode=893 reward=0.7867146 (460.88 it/sec) -training >> step=5353300, episode=893 reward=0.7634826 (435.18 it/sec) -training >> step=5353400, episode=893 reward=0.7799922 (443.38 it/sec) -training >> step=5353500, episode=893 reward=0.7721601 (435.53 it/sec) -training >> step=5353600, episode=893 reward=0.7724934 (436.90 it/sec) -training >> step=5353700, episode=893 reward=0.7837297 (469.73 it/sec) -training >> step=5353800, episode=893 reward=0.8013387 (458.67 it/sec) -training >> step=5353900, episode=893 reward=0.7756925 (455.90 it/sec) -training >> step=5354000, episode=893 reward=0.7784547 (467.57 it/sec) -training >> step=5354100, episode=893 reward=0.7895814 (505.86 it/sec) -training >> step=5354200, episode=893 reward=0.7901165 (482.44 it/sec) -training >> step=5354300, episode=893 reward=0.7899988 (484.85 it/sec) -training >> step=5354400, episode=893 reward=0.7836604 (484.00 it/sec) -training >> step=5354500, episode=893 reward=0.7624108 (432.45 it/sec) -training >> step=5354600, episode=893 reward=0.7802709 (456.38 it/sec) -training >> step=5354700, episode=893 reward=0.775941 (486.29 it/sec) -training >> step=5354800, episode=893 reward=0.7677567 (476.35 it/sec) -training >> step=5354900, episode=893 reward=0.7799026 (487.01 it/sec) -training >> step=5355000, episode=893 reward=0.8000625 (475.13 it/sec) -training >> step=5355100, episode=893 reward=0.7590902 (393.64 it/sec) -training >> step=5355200, episode=893 reward=0.7836155 (481.54 it/sec) -training >> step=5355300, episode=893 reward=0.7761724 (449.71 it/sec) -training >> step=5355400, episode=893 reward=0.790517 (478.14 it/sec) -training >> step=5355500, episode=893 reward=0.7956767 (480.28 it/sec) -training >> step=5355600, episode=893 reward=0.7944438 (467.21 it/sec) -training >> step=5355700, episode=893 reward=0.7550851 (471.48 it/sec) -training >> step=5355800, episode=893 reward=0.7972479 (509.17 it/sec) -training >> step=5355900, episode=893 reward=0.7871662 (474.59 it/sec) -training >> step=5356000, episode=893 reward=0.7946597 (482.80 it/sec) -training >> step=5356100, episode=893 reward=0.7888044 (504.09 it/sec) -training >> step=5356200, episode=893 reward=0.7742473 (492.19 it/sec) -training >> step=5356300, episode=893 reward=0.7801552 (472.68 it/sec) -training >> step=5356400, episode=893 reward=0.7868764 (466.11 it/sec) -training >> step=5356500, episode=893 reward=0.7944027 (485.57 it/sec) -training >> step=5356600, episode=893 reward=0.7813092 (450.07 it/sec) -training >> step=5356700, episode=893 reward=0.7752327 (474.74 it/sec) -training >> step=5356800, episode=893 reward=0.7761372 (464.80 it/sec) -training >> step=5356900, episode=893 reward=0.816448 (473.74 it/sec) -training >> step=5357000, episode=893 reward=0.7588443 (481.16 it/sec) -training >> step=5357100, episode=893 reward=0.7851183 (452.91 it/sec) -training >> step=5357200, episode=893 reward=0.7777451 (456.77 it/sec) -training >> step=5357300, episode=894 reward=0.8006843 (55.43 it/sec) -training >> step=5357400, episode=894 reward=0.7791623 (448.52 it/sec) -training >> step=5357500, episode=894 reward=0.7761304 (475.85 it/sec) -training >> step=5357600, episode=894 reward=0.7919639 (438.20 it/sec) -training >> step=5357700, episode=894 reward=0.7935833 (453.31 it/sec) -training >> step=5357800, episode=894 reward=0.7733408 (466.29 it/sec) -training >> step=5357900, episode=894 reward=0.8150461 (492.20 it/sec) -training >> step=5358000, episode=894 reward=0.7815335 (458.19 it/sec) -training >> step=5358100, episode=894 reward=0.7670993 (488.62 it/sec) -training >> step=5358200, episode=894 reward=0.7829918 (497.49 it/sec) -training >> step=5358300, episode=894 reward=0.8005052 (476.81 it/sec) -training >> step=5358400, episode=894 reward=0.7591246 (479.43 it/sec) -training >> step=5358500, episode=894 reward=0.7978916 (486.10 it/sec) -training >> step=5358600, episode=894 reward=0.7993109 (469.27 it/sec) -training >> step=5358700, episode=894 reward=0.7801676 (482.72 it/sec) -training >> step=5358800, episode=894 reward=0.7935944 (482.30 it/sec) -training >> step=5358900, episode=894 reward=0.7904003 (483.80 it/sec) -training >> step=5359000, episode=894 reward=0.7876983 (474.08 it/sec) -training >> step=5359100, episode=894 reward=0.8008953 (441.04 it/sec) -training >> step=5359200, episode=894 reward=0.8093084 (455.26 it/sec) -training >> step=5359300, episode=894 reward=0.8183748 (452.20 it/sec) -training >> step=5359400, episode=894 reward=0.7912473 (475.87 it/sec) -training >> step=5359500, episode=894 reward=0.7703648 (457.36 it/sec) -training >> step=5359600, episode=894 reward=0.7619264 (475.25 it/sec) -training >> step=5359700, episode=894 reward=0.7870021 (491.79 it/sec) -training >> step=5359800, episode=894 reward=0.787092 (492.43 it/sec) -training >> step=5359900, episode=894 reward=0.7743692 (486.87 it/sec) -training >> step=5360000, episode=894 reward=0.7860621 (469.01 it/sec) -training >> step=5360100, episode=894 reward=0.7934617 (463.81 it/sec) -training >> step=5360200, episode=894 reward=0.7921161 (458.40 it/sec) -training >> step=5360300, episode=894 reward=0.785963 (480.48 it/sec) -training >> step=5360400, episode=894 reward=0.7921445 (479.28 it/sec) -training >> step=5360500, episode=894 reward=0.7719867 (468.46 it/sec) -training >> step=5360600, episode=894 reward=0.7786771 (455.59 it/sec) -training >> step=5360700, episode=894 reward=0.7823002 (489.79 it/sec) -training >> step=5360800, episode=894 reward=0.7827849 (453.25 it/sec) -training >> step=5360900, episode=894 reward=0.7801701 (448.61 it/sec) -training >> step=5361000, episode=894 reward=0.7813917 (489.46 it/sec) -training >> step=5361100, episode=894 reward=0.7690907 (461.51 it/sec) -training >> step=5361200, episode=894 reward=0.7643355 (451.56 it/sec) -training >> step=5361300, episode=894 reward=0.7824185 (362.07 it/sec) -training >> step=5361400, episode=894 reward=0.7699723 (490.77 it/sec) -training >> step=5361500, episode=894 reward=0.7909024 (470.55 it/sec) -training >> step=5361600, episode=894 reward=0.7920462 (463.40 it/sec) -training >> step=5361700, episode=894 reward=0.7769521 (457.03 it/sec) -training >> step=5361800, episode=894 reward=0.7847275 (504.54 it/sec) -training >> step=5361900, episode=894 reward=0.7664926 (423.83 it/sec) -training >> step=5362000, episode=894 reward=0.7876155 (422.60 it/sec) -training >> step=5362100, episode=894 reward=0.781625 (476.59 it/sec) -training >> step=5362200, episode=894 reward=0.7758743 (471.87 it/sec) -training >> step=5362300, episode=894 reward=0.782304 (470.40 it/sec) -training >> step=5362400, episode=894 reward=0.7893655 (466.20 it/sec) -training >> step=5362500, episode=894 reward=0.7621219 (486.65 it/sec) -training >> step=5362600, episode=894 reward=0.790604 (462.37 it/sec) -training >> step=5362700, episode=894 reward=0.7982787 (496.38 it/sec) -training >> step=5362800, episode=894 reward=0.769903 (476.77 it/sec) -training >> step=5362900, episode=894 reward=0.7735369 (484.05 it/sec) -training >> step=5363000, episode=894 reward=0.7710906 (479.99 it/sec) -training >> step=5363100, episode=894 reward=0.8039666 (436.64 it/sec) -training >> step=5363200, episode=894 reward=0.7944892 (476.55 it/sec) -training >> step=5363300, episode=895 reward=0.7436672 (50.32 it/sec) -training >> step=5363400, episode=895 reward=0.7797312 (492.51 it/sec) -training >> step=5363500, episode=895 reward=0.7772186 (483.05 it/sec) -training >> step=5363600, episode=895 reward=0.7821801 (434.05 it/sec) -training >> step=5363700, episode=895 reward=0.7857121 (461.56 it/sec) -training >> step=5363800, episode=895 reward=0.7608841 (474.99 it/sec) -training >> step=5363900, episode=895 reward=0.78918 (462.45 it/sec) -training >> step=5364000, episode=895 reward=0.7790292 (459.67 it/sec) -training >> step=5364100, episode=895 reward=0.7885705 (487.51 it/sec) -training >> step=5364200, episode=895 reward=0.7801556 (476.81 it/sec) -training >> step=5364300, episode=895 reward=0.770942 (455.62 it/sec) -training >> step=5364400, episode=895 reward=0.7764218 (482.97 it/sec) -training >> step=5364500, episode=895 reward=0.7763889 (482.09 it/sec) -training >> step=5364600, episode=895 reward=0.7883695 (445.91 it/sec) -training >> step=5364700, episode=895 reward=0.7958151 (470.50 it/sec) -training >> step=5364800, episode=895 reward=0.8023472 (428.44 it/sec) -training >> step=5364900, episode=895 reward=0.7724822 (459.15 it/sec) -training >> step=5365000, episode=895 reward=0.7934233 (492.31 it/sec) -training >> step=5365100, episode=895 reward=0.7939697 (427.01 it/sec) -training >> step=5365200, episode=895 reward=0.7760343 (406.18 it/sec) -training >> step=5365300, episode=895 reward=0.7806026 (463.92 it/sec) -training >> step=5365400, episode=895 reward=0.7590594 (482.37 it/sec) -training >> step=5365500, episode=895 reward=0.7952112 (454.58 it/sec) -training >> step=5365600, episode=895 reward=0.778839 (467.51 it/sec) -training >> step=5365700, episode=895 reward=0.7985216 (453.78 it/sec) -training >> step=5365800, episode=895 reward=0.7899261 (469.75 it/sec) -training >> step=5365900, episode=895 reward=0.7907617 (494.60 it/sec) -training >> step=5366000, episode=895 reward=0.7861571 (490.35 it/sec) -training >> step=5366100, episode=895 reward=0.7806131 (465.53 it/sec) -training >> step=5366200, episode=895 reward=0.77351 (479.39 it/sec) -training >> step=5366300, episode=895 reward=0.7861697 (507.74 it/sec) -training >> step=5366400, episode=895 reward=0.7741964 (492.97 it/sec) -training >> step=5366500, episode=895 reward=0.7779667 (481.45 it/sec) -training >> step=5366600, episode=895 reward=0.7853304 (483.36 it/sec) -training >> step=5366700, episode=895 reward=0.80948 (459.99 it/sec) -training >> step=5366800, episode=895 reward=0.7755461 (469.06 it/sec) -training >> step=5366900, episode=895 reward=0.7819303 (471.62 it/sec) -training >> step=5367000, episode=895 reward=0.7917988 (485.47 it/sec) -training >> step=5367100, episode=895 reward=0.780405 (471.78 it/sec) -training >> step=5367200, episode=895 reward=0.7979512 (491.17 it/sec) -training >> step=5367300, episode=895 reward=0.78185 (475.20 it/sec) -training >> step=5367400, episode=895 reward=0.7895873 (439.75 it/sec) -training >> step=5367500, episode=895 reward=0.7614574 (480.89 it/sec) -training >> step=5367600, episode=895 reward=0.7885764 (362.81 it/sec) -training >> step=5367700, episode=895 reward=0.7660587 (492.62 it/sec) -training >> step=5367800, episode=895 reward=0.7892149 (477.07 it/sec) -training >> step=5367900, episode=895 reward=0.7936805 (455.35 it/sec) -training >> step=5368000, episode=895 reward=0.7824179 (487.56 it/sec) -training >> step=5368100, episode=895 reward=0.7702612 (499.03 it/sec) -training >> step=5368200, episode=895 reward=0.7913346 (482.57 it/sec) -training >> step=5368300, episode=895 reward=0.7675174 (474.82 it/sec) -training >> step=5368400, episode=895 reward=0.7687775 (478.61 it/sec) -training >> step=5368500, episode=895 reward=0.7857078 (461.97 it/sec) -training >> step=5368600, episode=895 reward=0.7730689 (485.48 it/sec) -training >> step=5368700, episode=895 reward=0.7652445 (493.51 it/sec) -training >> step=5368800, episode=895 reward=0.7820407 (460.37 it/sec) -training >> step=5368900, episode=895 reward=0.7909098 (484.48 it/sec) -training >> step=5369000, episode=895 reward=0.7626036 (494.65 it/sec) -training >> step=5369100, episode=895 reward=0.773208 (452.53 it/sec) -training >> step=5369200, episode=895 reward=0.7693261 (473.53 it/sec) -training >> step=5369300, episode=896 reward=0.7672775 (47.23 it/sec) -training >> step=5369400, episode=896 reward=0.7666746 (495.26 it/sec) -training >> step=5369500, episode=896 reward=0.7734595 (471.00 it/sec) -training >> step=5369600, episode=896 reward=0.76968 (496.44 it/sec) -training >> step=5369700, episode=896 reward=0.7957513 (492.43 it/sec) -training >> step=5369800, episode=896 reward=0.7858611 (499.47 it/sec) -training >> step=5369900, episode=896 reward=0.7882646 (527.01 it/sec) -training >> step=5370000, episode=896 reward=0.7884674 (458.67 it/sec) -training >> step=5370100, episode=896 reward=0.7943081 (487.18 it/sec) -training >> step=5370200, episode=896 reward=0.792228 (547.28 it/sec) -training >> step=5370300, episode=896 reward=0.7662963 (469.87 it/sec) -training >> step=5370400, episode=896 reward=0.7908196 (503.74 it/sec) -training >> step=5370500, episode=896 reward=0.7692429 (528.27 it/sec) -training >> step=5370600, episode=896 reward=0.79728 (495.73 it/sec) -training >> step=5370700, episode=896 reward=0.7859333 (487.64 it/sec) -training >> step=5370800, episode=896 reward=0.7943807 (513.57 it/sec) -training >> step=5370900, episode=896 reward=0.775066 (523.07 it/sec) -training >> step=5371000, episode=896 reward=0.7749578 (451.50 it/sec) -training >> step=5371100, episode=896 reward=0.7728765 (516.62 it/sec) -training >> step=5371200, episode=896 reward=0.7664157 (552.73 it/sec) -training >> step=5371300, episode=896 reward=0.7913808 (500.16 it/sec) -training >> step=5371400, episode=896 reward=0.7752938 (544.11 it/sec) -training >> step=5371500, episode=896 reward=0.7899916 (472.15 it/sec) -training >> step=5371600, episode=896 reward=0.7887791 (469.21 it/sec) -training >> step=5371700, episode=896 reward=0.7769104 (485.82 it/sec) -training >> step=5371800, episode=896 reward=0.7649003 (451.75 it/sec) -training >> step=5371900, episode=896 reward=0.7850627 (497.70 it/sec) -training >> step=5372000, episode=896 reward=0.7923734 (491.66 it/sec) -training >> step=5372100, episode=896 reward=0.7855719 (493.35 it/sec) -training >> step=5372200, episode=896 reward=0.7932922 (545.85 it/sec) -training >> step=5372300, episode=896 reward=0.7955725 (453.80 it/sec) -training >> step=5372400, episode=896 reward=0.7809555 (490.06 it/sec) -training >> step=5372500, episode=896 reward=0.8032432 (449.18 it/sec) -training >> step=5372600, episode=896 reward=0.7938287 (542.24 it/sec) -training >> step=5372700, episode=896 reward=0.7690935 (512.90 it/sec) -training >> step=5372800, episode=896 reward=0.7825137 (485.51 it/sec) -training >> step=5372900, episode=896 reward=0.7867951 (464.80 it/sec) -training >> step=5373000, episode=896 reward=0.7791685 (497.72 it/sec) -training >> step=5373100, episode=896 reward=0.7666011 (512.64 it/sec) -training >> step=5373200, episode=896 reward=0.7948964 (493.66 it/sec) -training >> step=5373300, episode=896 reward=0.7841648 (501.62 it/sec) -training >> step=5373400, episode=896 reward=0.7987493 (530.32 it/sec) -training >> step=5373500, episode=896 reward=0.7698298 (479.70 it/sec) -training >> step=5373600, episode=896 reward=0.7812606 (496.77 it/sec) -training >> step=5373700, episode=896 reward=0.778573 (404.70 it/sec) -training >> step=5373800, episode=896 reward=0.7829375 (494.85 it/sec) -training >> step=5373900, episode=896 reward=0.8100374 (499.79 it/sec) -training >> step=5374000, episode=896 reward=0.7895132 (528.53 it/sec) -training >> step=5374100, episode=896 reward=0.7673252 (468.84 it/sec) -training >> step=5374200, episode=896 reward=0.7973957 (486.17 it/sec) -training >> step=5374300, episode=896 reward=0.7836018 (516.00 it/sec) -training >> step=5374400, episode=896 reward=0.7964749 (508.31 it/sec) -training >> step=5374500, episode=896 reward=0.7740467 (486.32 it/sec) -training >> step=5374600, episode=896 reward=0.7802585 (505.95 it/sec) -training >> step=5374700, episode=896 reward=0.7770492 (503.45 it/sec) -training >> step=5374800, episode=896 reward=0.7724391 (506.34 it/sec) -training >> step=5374900, episode=896 reward=0.7901944 (504.61 it/sec) -training >> step=5375000, episode=896 reward=0.7759964 (427.74 it/sec) -training >> step=5375100, episode=896 reward=0.7690601 (490.72 it/sec) -training >> step=5375200, episode=896 reward=0.7907513 (501.12 it/sec) -training >> step=5375300, episode=897 reward=0.7867136 (61.78 it/sec) -training >> step=5375400, episode=897 reward=0.7746987 (530.86 it/sec) -training >> step=5375500, episode=897 reward=0.7552297 (477.17 it/sec) -training >> step=5375600, episode=897 reward=0.780364 (498.42 it/sec) -training >> step=5375700, episode=897 reward=0.7597983 (550.43 it/sec) -training >> step=5375800, episode=897 reward=0.7767604 (466.97 it/sec) -training >> step=5375900, episode=897 reward=0.7482184 (542.78 it/sec) -training >> step=5376000, episode=897 reward=0.7657499 (531.01 it/sec) -training >> step=5376100, episode=897 reward=0.7748183 (495.01 it/sec) -training >> step=5376200, episode=897 reward=0.7919611 (513.79 it/sec) -training >> step=5376300, episode=897 reward=0.7890339 (515.52 it/sec) -training >> step=5376400, episode=897 reward=0.7694384 (551.79 it/sec) -training >> step=5376500, episode=897 reward=0.7796564 (521.31 it/sec) -training >> step=5376600, episode=897 reward=0.7777774 (514.71 it/sec) -training >> step=5376700, episode=897 reward=0.76634 (525.96 it/sec) -training >> step=5376800, episode=897 reward=0.7740517 (442.84 it/sec) -training >> step=5376900, episode=897 reward=0.7921878 (448.05 it/sec) -training >> step=5377000, episode=897 reward=0.7970323 (485.63 it/sec) -training >> step=5377100, episode=897 reward=0.761605 (515.74 it/sec) -training >> step=5377200, episode=897 reward=0.7842375 (494.69 it/sec) -training >> step=5377300, episode=897 reward=0.7576005 (460.57 it/sec) -training >> step=5377400, episode=897 reward=0.8014472 (532.24 it/sec) -training >> step=5377500, episode=897 reward=0.7938218 (501.94 it/sec) -training >> step=5377600, episode=897 reward=0.7790205 (489.42 it/sec) -training >> step=5377700, episode=897 reward=0.7770534 (513.28 it/sec) -training >> step=5377800, episode=897 reward=0.774785 (517.95 it/sec) -training >> step=5377900, episode=897 reward=0.7883379 (496.83 it/sec) -training >> step=5378000, episode=897 reward=0.7819138 (512.68 it/sec) -training >> step=5378100, episode=897 reward=0.7609808 (516.36 it/sec) -training >> step=5378200, episode=897 reward=0.8058932 (533.66 it/sec) -training >> step=5378300, episode=897 reward=0.7976053 (458.48 it/sec) -training >> step=5378400, episode=897 reward=0.7752483 (486.87 it/sec) -training >> step=5378500, episode=897 reward=0.7760181 (535.06 it/sec) -training >> step=5378600, episode=897 reward=0.7849982 (502.27 it/sec) -training >> step=5378700, episode=897 reward=0.7900809 (480.21 it/sec) -training >> step=5378800, episode=897 reward=0.7785398 (451.22 it/sec) -training >> step=5378900, episode=897 reward=0.7671523 (488.44 it/sec) -training >> step=5379000, episode=897 reward=0.782504 (525.04 it/sec) -training >> step=5379100, episode=897 reward=0.7860708 (523.48 it/sec) -training >> step=5379200, episode=897 reward=0.7869742 (541.20 it/sec) -training >> step=5379300, episode=897 reward=0.7878157 (442.16 it/sec) -training >> step=5379400, episode=897 reward=0.7817293 (480.18 it/sec) -training >> step=5379500, episode=897 reward=0.7848402 (454.56 it/sec) -training >> step=5379600, episode=897 reward=0.7697675 (501.77 it/sec) -training >> step=5379700, episode=897 reward=0.79054 (481.98 it/sec) -training >> step=5379800, episode=897 reward=0.7755964 (466.31 it/sec) -training >> step=5379900, episode=897 reward=0.7841168 (342.87 it/sec) -training >> step=5380000, episode=897 reward=0.7918903 (517.75 it/sec) -training >> step=5380100, episode=897 reward=0.772017 (522.53 it/sec) -training >> step=5380200, episode=897 reward=0.7556283 (469.18 it/sec) -training >> step=5380300, episode=897 reward=0.7984016 (489.63 it/sec) -training >> step=5380400, episode=897 reward=0.7557865 (518.81 it/sec) -training >> step=5380500, episode=897 reward=0.7604997 (507.18 it/sec) -training >> step=5380600, episode=897 reward=0.7747514 (544.01 it/sec) -training >> step=5380700, episode=897 reward=0.7588617 (517.00 it/sec) -training >> step=5380800, episode=897 reward=0.8073454 (479.73 it/sec) -training >> step=5380900, episode=897 reward=0.7852508 (489.54 it/sec) -training >> step=5381000, episode=897 reward=0.7862565 (538.38 it/sec) -training >> step=5381100, episode=897 reward=0.7760676 (465.30 it/sec) -training >> step=5381200, episode=897 reward=0.7756444 (511.17 it/sec) -training >> step=5381300, episode=898 reward=0.7876616 (70.14 it/sec) -training >> step=5381400, episode=898 reward=0.7940866 (473.86 it/sec) -training >> step=5381500, episode=898 reward=0.7839318 (485.55 it/sec) -training >> step=5381600, episode=898 reward=0.7960144 (515.42 it/sec) -training >> step=5381700, episode=898 reward=0.7748086 (470.08 it/sec) -training >> step=5381800, episode=898 reward=0.7941602 (480.59 it/sec) -training >> step=5381900, episode=898 reward=0.7950332 (503.78 it/sec) -training >> step=5382000, episode=898 reward=0.7859615 (484.55 it/sec) -training >> step=5382100, episode=898 reward=0.7735528 (474.05 it/sec) -training >> step=5382200, episode=898 reward=0.7725021 (492.53 it/sec) -training >> step=5382300, episode=898 reward=0.7881631 (449.90 it/sec) -training >> step=5382400, episode=898 reward=0.8234472 (414.45 it/sec) -training >> step=5382500, episode=898 reward=0.8035793 (460.95 it/sec) -training >> step=5382600, episode=898 reward=0.7928906 (510.06 it/sec) -training >> step=5382700, episode=898 reward=0.7815699 (447.59 it/sec) -training >> step=5382800, episode=898 reward=0.7782547 (452.26 it/sec) -training >> step=5382900, episode=898 reward=0.7786248 (546.82 it/sec) -training >> step=5383000, episode=898 reward=0.7963848 (478.12 it/sec) -training >> step=5383100, episode=898 reward=0.8040947 (475.12 it/sec) -training >> step=5383200, episode=898 reward=0.7786995 (449.02 it/sec) -training >> step=5383300, episode=898 reward=0.7681059 (531.08 it/sec) -training >> step=5383400, episode=898 reward=0.7665865 (472.28 it/sec) -training >> step=5383500, episode=898 reward=0.779387 (472.47 it/sec) -training >> step=5383600, episode=898 reward=0.787321 (461.77 it/sec) -training >> step=5383700, episode=898 reward=0.7707011 (459.93 it/sec) -training >> step=5383800, episode=898 reward=0.7792033 (507.45 it/sec) -training >> step=5383900, episode=898 reward=0.7560297 (526.25 it/sec) -training >> step=5384000, episode=898 reward=0.7746944 (514.84 it/sec) -training >> step=5384100, episode=898 reward=0.7984059 (523.95 it/sec) -training >> step=5384200, episode=898 reward=0.7819883 (494.67 it/sec) -training >> step=5384300, episode=898 reward=0.7836253 (467.49 it/sec) -training >> step=5384400, episode=898 reward=0.7798726 (533.75 it/sec) -training >> step=5384500, episode=898 reward=0.7727901 (483.87 it/sec) -training >> step=5384600, episode=898 reward=0.7981123 (513.86 it/sec) -training >> step=5384700, episode=898 reward=0.7782657 (501.41 it/sec) -training >> step=5384800, episode=898 reward=0.7896555 (516.07 it/sec) -training >> step=5384900, episode=898 reward=0.7830036 (535.78 it/sec) -training >> step=5385000, episode=898 reward=0.7847295 (474.79 it/sec) -training >> step=5385100, episode=898 reward=0.7722412 (509.95 it/sec) -training >> step=5385200, episode=898 reward=0.780959 (498.02 it/sec) -training >> step=5385300, episode=898 reward=0.7861664 (513.77 it/sec) -training >> step=5385400, episode=898 reward=0.7681211 (501.85 it/sec) -training >> step=5385500, episode=898 reward=0.7693136 (554.43 it/sec) -training >> step=5385600, episode=898 reward=0.7728618 (492.46 it/sec) -training >> step=5385700, episode=898 reward=0.7846435 (511.28 it/sec) -training >> step=5385800, episode=898 reward=0.7557964 (502.03 it/sec) -training >> step=5385900, episode=898 reward=0.7898961 (549.74 it/sec) -training >> step=5386000, episode=898 reward=0.7942747 (420.67 it/sec) -training >> step=5386100, episode=898 reward=0.7755733 (513.03 it/sec) -training >> step=5386200, episode=898 reward=0.7696081 (556.15 it/sec) -training >> step=5386300, episode=898 reward=0.777984 (462.00 it/sec) -training >> step=5386400, episode=898 reward=0.7624204 (520.26 it/sec) -training >> step=5386500, episode=898 reward=0.7981587 (516.89 it/sec) -training >> step=5386600, episode=898 reward=0.7866346 (543.05 it/sec) -training >> step=5386700, episode=898 reward=0.7605541 (526.28 it/sec) -training >> step=5386800, episode=898 reward=0.8058333 (468.36 it/sec) -training >> step=5386900, episode=898 reward=0.7760862 (506.58 it/sec) -training >> step=5387000, episode=898 reward=0.7625873 (529.15 it/sec) -training >> step=5387100, episode=898 reward=0.7846645 (516.52 it/sec) -training >> step=5387200, episode=898 reward=0.7849256 (540.67 it/sec) -training >> step=5387300, episode=899 reward=0.764398 (117.73 it/sec) -training >> step=5387400, episode=899 reward=0.7651888 (515.36 it/sec) -training >> step=5387500, episode=899 reward=0.7843074 (474.21 it/sec) -training >> step=5387600, episode=899 reward=0.7649254 (528.30 it/sec) -training >> step=5387700, episode=899 reward=0.787821 (519.36 it/sec) -training >> step=5387800, episode=899 reward=0.7765461 (500.02 it/sec) -training >> step=5387900, episode=899 reward=0.7872389 (521.52 it/sec) -training >> step=5388000, episode=899 reward=0.7558861 (504.03 it/sec) -training >> step=5388100, episode=899 reward=0.7785652 (480.36 it/sec) -training >> step=5388200, episode=899 reward=0.8053302 (520.28 it/sec) -training >> step=5388300, episode=899 reward=0.8159535 (468.33 it/sec) -training >> step=5388400, episode=899 reward=0.787117 (501.64 it/sec) -training >> step=5388500, episode=899 reward=0.7860337 (482.99 it/sec) -training >> step=5388600, episode=899 reward=0.7634947 (448.82 it/sec) -training >> step=5388700, episode=899 reward=0.7806192 (507.51 it/sec) -training >> step=5388800, episode=899 reward=0.7624192 (497.74 it/sec) -training >> step=5388900, episode=899 reward=0.7698838 (458.70 it/sec) -training >> step=5389000, episode=899 reward=0.7892974 (512.73 it/sec) -training >> step=5389100, episode=899 reward=0.7823744 (469.63 it/sec) -training >> step=5389200, episode=899 reward=0.7851585 (516.74 it/sec) -training >> step=5389300, episode=899 reward=0.7798734 (505.96 it/sec) -training >> step=5389400, episode=899 reward=0.7555598 (536.08 it/sec) -training >> step=5389500, episode=899 reward=0.7888718 (532.53 it/sec) -training >> step=5389600, episode=899 reward=0.7778082 (464.68 it/sec) -training >> step=5389700, episode=899 reward=0.7651846 (535.85 it/sec) -training >> step=5389800, episode=899 reward=0.7606291 (507.44 it/sec) -training >> step=5389900, episode=899 reward=0.7960358 (525.55 it/sec) -training >> step=5390000, episode=899 reward=0.8064015 (501.63 it/sec) -training >> step=5390100, episode=899 reward=0.7608203 (491.83 it/sec) -training >> step=5390200, episode=899 reward=0.7756578 (514.20 it/sec) -training >> step=5390300, episode=899 reward=0.78723 (522.69 it/sec) -training >> step=5390400, episode=899 reward=0.767709 (475.66 it/sec) -training >> step=5390500, episode=899 reward=0.7994404 (505.76 it/sec) -training >> step=5390600, episode=899 reward=0.8012 (479.43 it/sec) -training >> step=5390700, episode=899 reward=0.7907159 (514.05 it/sec) -training >> step=5390800, episode=899 reward=0.7826098 (561.34 it/sec) -training >> step=5390900, episode=899 reward=0.7646481 (488.74 it/sec) -training >> step=5391000, episode=899 reward=0.7602152 (494.85 it/sec) -training >> step=5391100, episode=899 reward=0.7901624 (467.64 it/sec) -training >> step=5391200, episode=899 reward=0.7613671 (453.41 it/sec) -training >> step=5391300, episode=899 reward=0.7812282 (536.31 it/sec) -training >> step=5391400, episode=899 reward=0.7809086 (476.00 it/sec) -training >> step=5391500, episode=899 reward=0.7720293 (534.70 it/sec) -training >> step=5391600, episode=899 reward=0.797238 (467.29 it/sec) -training >> step=5391700, episode=899 reward=0.7626794 (487.37 it/sec) -training >> step=5391800, episode=899 reward=0.7914777 (523.31 it/sec) -training >> step=5391900, episode=899 reward=0.7755135 (504.94 it/sec) -training >> step=5392000, episode=899 reward=0.7931157 (526.71 it/sec) -training >> step=5392100, episode=899 reward=0.7770158 (489.62 it/sec) -training >> step=5392200, episode=899 reward=0.7804418 (369.50 it/sec) -training >> step=5392300, episode=899 reward=0.7727283 (491.90 it/sec) -training >> step=5392400, episode=899 reward=0.7771444 (487.99 it/sec) -training >> step=5392500, episode=899 reward=0.7912381 (510.08 it/sec) -training >> step=5392600, episode=899 reward=0.7792407 (535.97 it/sec) -training >> step=5392700, episode=899 reward=0.7726799 (445.62 it/sec) -training >> step=5392800, episode=899 reward=0.7656862 (522.18 it/sec) -training >> step=5392900, episode=899 reward=0.8056461 (496.32 it/sec) -training >> step=5393000, episode=899 reward=0.7937142 (523.19 it/sec) -training >> step=5393100, episode=899 reward=0.7843496 (463.82 it/sec) -training >> step=5393200, episode=899 reward=0.7942428 (424.39 it/sec) -training >> step=5393300, episode=900 reward=0.7627829 (118.72 it/sec) -training >> step=5393400, episode=900 reward=0.7853309 (494.48 it/sec) -training >> step=5393500, episode=900 reward=0.7837011 (518.39 it/sec) -training >> step=5393600, episode=900 reward=0.7725483 (503.16 it/sec) -training >> step=5393700, episode=900 reward=0.7898414 (514.72 it/sec) -training >> step=5393800, episode=900 reward=0.7739193 (515.29 it/sec) -training >> step=5393900, episode=900 reward=0.7856734 (461.44 it/sec) -training >> step=5394000, episode=900 reward=0.775421 (519.99 it/sec) -training >> step=5394100, episode=900 reward=0.7735929 (518.18 it/sec) -training >> step=5394200, episode=900 reward=0.7627576 (516.73 it/sec) -training >> step=5394300, episode=900 reward=0.7912493 (502.81 it/sec) -training >> step=5394400, episode=900 reward=0.7623636 (479.09 it/sec) -training >> step=5394500, episode=900 reward=0.7866809 (499.52 it/sec) -training >> step=5394600, episode=900 reward=0.793678 (496.27 it/sec) -training >> step=5394700, episode=900 reward=0.7976336 (511.81 it/sec) -training >> step=5394800, episode=900 reward=0.792616 (550.87 it/sec) -training >> step=5394900, episode=900 reward=0.7924802 (469.14 it/sec) -training >> step=5395000, episode=900 reward=0.8101441 (487.58 it/sec) -training >> step=5395100, episode=900 reward=0.7895601 (542.72 it/sec) -training >> step=5395200, episode=900 reward=0.7879164 (511.14 it/sec) -training >> step=5395300, episode=900 reward=0.7675549 (532.04 it/sec) -training >> step=5395400, episode=900 reward=0.7893274 (500.43 it/sec) -training >> step=5395500, episode=900 reward=0.7916694 (482.44 it/sec) -training >> step=5395600, episode=900 reward=0.7904316 (514.31 it/sec) -training >> step=5395700, episode=900 reward=0.7909226 (495.87 it/sec) -training >> step=5395800, episode=900 reward=0.7856726 (520.63 it/sec) -training >> step=5395900, episode=900 reward=0.7788877 (513.61 it/sec) -training >> step=5396000, episode=900 reward=0.7908456 (490.62 it/sec) -training >> step=5396100, episode=900 reward=0.7825865 (522.84 it/sec) -training >> step=5396200, episode=900 reward=0.7592444 (492.60 it/sec) -training >> step=5396300, episode=900 reward=0.7821293 (482.29 it/sec) -training >> step=5396400, episode=900 reward=0.7701325 (499.48 it/sec) -training >> step=5396500, episode=900 reward=0.792334 (474.94 it/sec) -training >> step=5396600, episode=900 reward=0.7862791 (524.89 it/sec) -training >> step=5396700, episode=900 reward=0.776131 (496.31 it/sec) -training >> step=5396800, episode=900 reward=0.7938847 (505.67 it/sec) -training >> step=5396900, episode=900 reward=0.7976719 (504.98 it/sec) -training >> step=5397000, episode=900 reward=0.80202 (485.90 it/sec) -training >> step=5397100, episode=900 reward=0.7720612 (496.73 it/sec) -training >> step=5397200, episode=900 reward=0.778079 (492.19 it/sec) -training >> step=5397300, episode=900 reward=0.7664157 (528.42 it/sec) -training >> step=5397400, episode=900 reward=0.7689558 (521.22 it/sec) -training >> step=5397500, episode=900 reward=0.7742628 (461.78 it/sec) -training >> step=5397600, episode=900 reward=0.7919443 (521.32 it/sec) -training >> step=5397700, episode=900 reward=0.775667 (513.11 it/sec) -training >> step=5397800, episode=900 reward=0.7786121 (484.39 it/sec) -training >> step=5397900, episode=900 reward=0.7832562 (451.21 it/sec) -training >> step=5398000, episode=900 reward=0.7965246 (444.31 it/sec) -training >> step=5398100, episode=900 reward=0.7722827 (506.35 it/sec) -training >> step=5398200, episode=900 reward=0.7624469 (499.83 it/sec) -training >> step=5398300, episode=900 reward=0.7833762 (396.07 it/sec) -training >> step=5398400, episode=900 reward=0.7862323 (531.33 it/sec) -training >> step=5398500, episode=900 reward=0.7890896 (486.18 it/sec) -training >> step=5398600, episode=900 reward=0.7852625 (500.17 it/sec) -training >> step=5398700, episode=900 reward=0.7928085 (546.92 it/sec) -training >> step=5398800, episode=900 reward=0.7755955 (538.45 it/sec) -training >> step=5398900, episode=900 reward=0.7683687 (540.38 it/sec) -training >> step=5399000, episode=900 reward=0.8028116 (507.35 it/sec) -training >> step=5399100, episode=900 reward=0.7718107 (495.30 it/sec) -training >> step=5399200, episode=900 reward=0.7782242 (495.59 it/sec) -training >> step=5399300, episode=901 reward=0.7781351 (122.85 it/sec) -training >> step=5399400, episode=901 reward=0.7906142 (513.44 it/sec) -training >> step=5399500, episode=901 reward=0.7807 (529.81 it/sec) -training >> step=5399600, episode=901 reward=0.7751483 (490.24 it/sec) -training >> step=5399700, episode=901 reward=0.7559381 (504.46 it/sec) -training >> step=5399800, episode=901 reward=0.7864136 (495.24 it/sec) -training >> step=5399900, episode=901 reward=0.7890621 (504.19 it/sec) -training >> step=5400000, episode=901 reward=0.7809988 (512.81 it/sec) -training >> step=5400100, episode=901 reward=0.7799718 (532.60 it/sec) -training >> step=5400200, episode=901 reward=0.7709044 (514.16 it/sec) -training >> step=5400300, episode=901 reward=0.7875642 (460.32 it/sec) -training >> step=5400400, episode=901 reward=0.7838622 (508.47 it/sec) -training >> step=5400500, episode=901 reward=0.7764975 (526.19 it/sec) -training >> step=5400600, episode=901 reward=0.7912537 (514.65 it/sec) -training >> step=5400700, episode=901 reward=0.7935357 (499.19 it/sec) -training >> step=5400800, episode=901 reward=0.7777278 (473.94 it/sec) -training >> step=5400900, episode=901 reward=0.7731871 (533.25 it/sec) -training >> step=5401000, episode=901 reward=0.7888358 (515.88 it/sec) -training >> step=5401100, episode=901 reward=0.7726781 (471.92 it/sec) -training >> step=5401200, episode=901 reward=0.7888505 (542.08 it/sec) -training >> step=5401300, episode=901 reward=0.7808939 (470.24 it/sec) -training >> step=5401400, episode=901 reward=0.7813286 (464.95 it/sec) -training >> step=5401500, episode=901 reward=0.7886934 (497.11 it/sec) -training >> step=5401600, episode=901 reward=0.7686818 (532.42 it/sec) -training >> step=5401700, episode=901 reward=0.7842106 (496.76 it/sec) -training >> step=5401800, episode=901 reward=0.7634862 (490.20 it/sec) -training >> step=5401900, episode=901 reward=0.7705023 (519.86 it/sec) -training >> step=5402000, episode=901 reward=0.7795382 (549.73 it/sec) -training >> step=5402100, episode=901 reward=0.7650756 (496.03 it/sec) -training >> step=5402200, episode=901 reward=0.7678877 (527.13 it/sec) -training >> step=5402300, episode=901 reward=0.7847902 (516.43 it/sec) -training >> step=5402400, episode=901 reward=0.7998543 (449.58 it/sec) -training >> step=5402500, episode=901 reward=0.7790405 (486.90 it/sec) -training >> step=5402600, episode=901 reward=0.7756782 (452.51 it/sec) -training >> step=5402700, episode=901 reward=0.7691742 (505.85 it/sec) -training >> step=5402800, episode=901 reward=0.785192 (497.60 it/sec) -training >> step=5402900, episode=901 reward=0.7749092 (475.47 it/sec) -training >> step=5403000, episode=901 reward=0.7898136 (557.57 it/sec) -training >> step=5403100, episode=901 reward=0.807399 (482.63 it/sec) -training >> step=5403200, episode=901 reward=0.7771264 (525.04 it/sec) -training >> step=5403300, episode=901 reward=0.7856665 (510.80 it/sec) -training >> step=5403400, episode=901 reward=0.7809423 (497.58 it/sec) -training >> step=5403500, episode=901 reward=0.7961234 (508.42 it/sec) -training >> step=5403600, episode=901 reward=0.7771469 (501.63 it/sec) -training >> step=5403700, episode=901 reward=0.7830971 (512.76 it/sec) -training >> step=5403800, episode=901 reward=0.760389 (541.54 it/sec) -training >> step=5403900, episode=901 reward=0.7685747 (496.55 it/sec) -training >> step=5404000, episode=901 reward=0.774083 (483.18 it/sec) -training >> step=5404100, episode=901 reward=0.770311 (545.51 it/sec) -training >> step=5404200, episode=901 reward=0.7640528 (523.58 it/sec) -training >> step=5404300, episode=901 reward=0.7967041 (460.81 it/sec) -training >> step=5404400, episode=901 reward=0.7840887 (473.70 it/sec) -training >> step=5404500, episode=901 reward=0.7817466 (383.04 it/sec) -training >> step=5404600, episode=901 reward=0.7656714 (501.57 it/sec) -training >> step=5404700, episode=901 reward=0.7710809 (515.57 it/sec) -training >> step=5404800, episode=901 reward=0.7816662 (543.23 it/sec) -training >> step=5404900, episode=901 reward=0.7801688 (510.43 it/sec) -training >> step=5405000, episode=901 reward=0.7826508 (504.61 it/sec) -training >> step=5405100, episode=901 reward=0.7642092 (527.59 it/sec) -training >> step=5405200, episode=901 reward=0.7713227 (497.57 it/sec) -training >> step=5405300, episode=902 reward=0.7813274 (85.46 it/sec) -training >> step=5405400, episode=902 reward=0.7867916 (519.73 it/sec) -training >> step=5405500, episode=902 reward=0.7793565 (495.36 it/sec) -training >> step=5405600, episode=902 reward=0.7725177 (529.89 it/sec) -training >> step=5405700, episode=902 reward=0.7741445 (475.45 it/sec) -training >> step=5405800, episode=902 reward=0.7967283 (499.63 it/sec) -training >> step=5405900, episode=902 reward=0.7950901 (511.56 it/sec) -training >> step=5406000, episode=902 reward=0.8091274 (513.62 it/sec) -training >> step=5406100, episode=902 reward=0.7749642 (505.96 it/sec) -training >> step=5406200, episode=902 reward=0.7824345 (506.26 it/sec) -training >> step=5406300, episode=902 reward=0.7828928 (511.19 it/sec) -training >> step=5406400, episode=902 reward=0.7835795 (507.52 it/sec) -training >> step=5406500, episode=902 reward=0.7884225 (516.10 it/sec) -training >> step=5406600, episode=902 reward=0.7753332 (509.59 it/sec) -training >> step=5406700, episode=902 reward=0.7973917 (510.03 it/sec) -training >> step=5406800, episode=902 reward=0.7791482 (545.48 it/sec) -training >> step=5406900, episode=902 reward=0.7771538 (528.58 it/sec) -training >> step=5407000, episode=902 reward=0.772793 (484.47 it/sec) -training >> step=5407100, episode=902 reward=0.7836751 (519.97 it/sec) -training >> step=5407200, episode=902 reward=0.791088 (510.05 it/sec) -training >> step=5407300, episode=902 reward=0.7742973 (450.38 it/sec) -training >> step=5407400, episode=902 reward=0.7602353 (483.76 it/sec) -training >> step=5407500, episode=902 reward=0.7935153 (511.39 it/sec) -training >> step=5407600, episode=902 reward=0.7846027 (519.69 it/sec) -training >> step=5407700, episode=902 reward=0.8090024 (505.76 it/sec) -training >> step=5407800, episode=902 reward=0.7992606 (487.14 it/sec) -training >> step=5407900, episode=902 reward=0.7842209 (489.56 it/sec) -training >> step=5408000, episode=902 reward=0.7877695 (481.03 it/sec) -training >> step=5408100, episode=902 reward=0.7893937 (463.04 it/sec) -training >> step=5408200, episode=902 reward=0.7753149 (533.73 it/sec) -training >> step=5408300, episode=902 reward=0.7906723 (522.58 it/sec) -training >> step=5408400, episode=902 reward=0.783944 (488.29 it/sec) -training >> step=5408500, episode=902 reward=0.7819827 (503.13 it/sec) -training >> step=5408600, episode=902 reward=0.7914969 (441.41 it/sec) -training >> step=5408700, episode=902 reward=0.786683 (527.60 it/sec) -training >> step=5408800, episode=902 reward=0.794454 (508.40 it/sec) -training >> step=5408900, episode=902 reward=0.7683163 (515.25 it/sec) -training >> step=5409000, episode=902 reward=0.7802924 (495.44 it/sec) -training >> step=5409100, episode=902 reward=0.7730615 (499.48 it/sec) -training >> step=5409200, episode=902 reward=0.7721897 (504.84 it/sec) -training >> step=5409300, episode=902 reward=0.7877577 (499.11 it/sec) -training >> step=5409400, episode=902 reward=0.7907335 (525.20 it/sec) -training >> step=5409500, episode=902 reward=0.7780172 (486.33 it/sec) -training >> step=5409600, episode=902 reward=0.7655697 (440.85 it/sec) -training >> step=5409700, episode=902 reward=0.7855316 (515.15 it/sec) -training >> step=5409800, episode=902 reward=0.7728216 (507.32 it/sec) -training >> step=5409900, episode=902 reward=0.7782089 (496.45 it/sec) -training >> step=5410000, episode=902 reward=0.7631572 (501.40 it/sec) -training >> step=5410100, episode=902 reward=0.7750925 (503.62 it/sec) -training >> step=5410200, episode=902 reward=0.8007424 (531.04 it/sec) -training >> step=5410300, episode=902 reward=0.76253 (518.88 it/sec) -training >> step=5410400, episode=902 reward=0.7701211 (467.80 it/sec) -training >> step=5410500, episode=902 reward=0.7809446 (531.82 it/sec) -training >> step=5410600, episode=902 reward=0.7867637 (473.36 it/sec) -training >> step=5410700, episode=902 reward=0.761639 (392.68 it/sec) -training >> step=5410800, episode=902 reward=0.7690471 (516.58 it/sec) -training >> step=5410900, episode=902 reward=0.782207 (541.58 it/sec) -training >> step=5411000, episode=902 reward=0.7795575 (476.59 it/sec) -training >> step=5411100, episode=902 reward=0.7702714 (499.60 it/sec) -training >> step=5411200, episode=902 reward=0.7662536 (541.09 it/sec) -training >> step=5411300, episode=903 reward=0.7773067 (61.16 it/sec) -training >> step=5411400, episode=903 reward=0.7856459 (493.65 it/sec) -training >> step=5411500, episode=903 reward=0.7774056 (503.50 it/sec) -training >> step=5411600, episode=903 reward=0.78886 (479.06 it/sec) -training >> step=5411700, episode=903 reward=0.7832876 (506.51 it/sec) -training >> step=5411800, episode=903 reward=0.8173233 (522.38 it/sec) -training >> step=5411900, episode=903 reward=0.7943172 (506.82 it/sec) -training >> step=5412000, episode=903 reward=0.7893329 (524.99 it/sec) -training >> step=5412100, episode=903 reward=0.7856176 (553.71 it/sec) -training >> step=5412200, episode=903 reward=0.7578525 (500.63 it/sec) -training >> step=5412300, episode=903 reward=0.7972834 (502.75 it/sec) -training >> step=5412400, episode=903 reward=0.7606942 (513.08 it/sec) -training >> step=5412500, episode=903 reward=0.7903693 (451.49 it/sec) -training >> step=5412600, episode=903 reward=0.7948069 (517.74 it/sec) -training >> step=5412700, episode=903 reward=0.7965205 (534.94 it/sec) -training >> step=5412800, episode=903 reward=0.7717714 (497.87 it/sec) -training >> step=5412900, episode=903 reward=0.777146 (472.06 it/sec) -training >> step=5413000, episode=903 reward=0.8082321 (471.03 it/sec) -training >> step=5413100, episode=903 reward=0.7745123 (525.78 it/sec) -training >> step=5413200, episode=903 reward=0.7832367 (477.65 it/sec) -training >> step=5413300, episode=903 reward=0.7842544 (526.37 it/sec) -training >> step=5413400, episode=903 reward=0.7860833 (516.44 it/sec) -training >> step=5413500, episode=903 reward=0.783034 (488.14 it/sec) -training >> step=5413600, episode=903 reward=0.7839768 (509.79 it/sec) -training >> step=5413700, episode=903 reward=0.7750431 (493.20 it/sec) -training >> step=5413800, episode=903 reward=0.8023844 (476.36 it/sec) -training >> step=5413900, episode=903 reward=0.7671721 (508.24 it/sec) -training >> step=5414000, episode=903 reward=0.7857283 (436.90 it/sec) -training >> step=5414100, episode=903 reward=0.780965 (539.82 it/sec) -training >> step=5414200, episode=903 reward=0.7796687 (481.51 it/sec) -training >> step=5414300, episode=903 reward=0.7997578 (500.59 it/sec) -training >> step=5414400, episode=903 reward=0.7772737 (525.87 it/sec) -training >> step=5414500, episode=903 reward=0.7787892 (495.64 it/sec) -training >> step=5414600, episode=903 reward=0.7736541 (496.45 it/sec) -training >> step=5414700, episode=903 reward=0.7724482 (497.34 it/sec) -training >> step=5414800, episode=903 reward=0.7998018 (515.67 it/sec) -training >> step=5414900, episode=903 reward=0.7924936 (487.02 it/sec) -training >> step=5415000, episode=903 reward=0.7871135 (415.29 it/sec) -training >> step=5415100, episode=903 reward=0.7849211 (518.43 it/sec) -training >> step=5415200, episode=903 reward=0.7794824 (518.52 it/sec) -training >> step=5415300, episode=903 reward=0.7889951 (530.64 it/sec) -training >> step=5415400, episode=903 reward=0.7868372 (488.61 it/sec) -training >> step=5415500, episode=903 reward=0.8025882 (455.23 it/sec) -training >> step=5415600, episode=903 reward=0.7871517 (533.31 it/sec) -training >> step=5415700, episode=903 reward=0.7787446 (519.10 it/sec) -training >> step=5415800, episode=903 reward=0.780187 (507.53 it/sec) -training >> step=5415900, episode=903 reward=0.7673405 (510.04 it/sec) -training >> step=5416000, episode=903 reward=0.7709697 (455.95 it/sec) -training >> step=5416100, episode=903 reward=0.7767798 (511.71 it/sec) -training >> step=5416200, episode=903 reward=0.7441153 (534.06 it/sec) -training >> step=5416300, episode=903 reward=0.7666839 (497.01 it/sec) -training >> step=5416400, episode=903 reward=0.7647162 (518.52 it/sec) -training >> step=5416500, episode=903 reward=0.7699732 (469.37 it/sec) -training >> step=5416600, episode=903 reward=0.7647412 (438.44 it/sec) -training >> step=5416700, episode=903 reward=0.7929658 (528.17 it/sec) -training >> step=5416800, episode=903 reward=0.7582008 (476.78 it/sec) -training >> step=5416900, episode=903 reward=0.7538854 (391.90 it/sec) -training >> step=5417000, episode=903 reward=0.7825287 (479.86 it/sec) -training >> step=5417100, episode=903 reward=0.7486135 (465.22 it/sec) -training >> step=5417200, episode=903 reward=0.7867237 (492.24 it/sec) -training >> step=5417300, episode=904 reward=0.7661211 (58.99 it/sec) -training >> step=5417400, episode=904 reward=0.7825004 (468.60 it/sec) -training >> step=5417500, episode=904 reward=0.7476173 (501.62 it/sec) -training >> step=5417600, episode=904 reward=0.7653231 (504.10 it/sec) -training >> step=5417700, episode=904 reward=0.7795184 (517.82 it/sec) -training >> step=5417800, episode=904 reward=0.7972118 (515.60 it/sec) -training >> step=5417900, episode=904 reward=0.7819354 (486.60 it/sec) -training >> step=5418000, episode=904 reward=0.789623 (508.50 it/sec) -training >> step=5418100, episode=904 reward=0.7847654 (530.40 it/sec) -training >> step=5418200, episode=904 reward=0.7718333 (491.50 it/sec) -training >> step=5418300, episode=904 reward=0.7733747 (460.88 it/sec) -training >> step=5418400, episode=904 reward=0.7715462 (492.70 it/sec) -training >> step=5418500, episode=904 reward=0.7810316 (477.74 it/sec) -training >> step=5418600, episode=904 reward=0.7767899 (501.33 it/sec) -training >> step=5418700, episode=904 reward=0.7979096 (504.44 it/sec) -training >> step=5418800, episode=904 reward=0.7813513 (479.33 it/sec) -training >> step=5418900, episode=904 reward=0.7559379 (513.24 it/sec) -training >> step=5419000, episode=904 reward=0.7853122 (522.47 it/sec) -training >> step=5419100, episode=904 reward=0.7748721 (516.83 it/sec) -training >> step=5419200, episode=904 reward=0.7901559 (437.13 it/sec) -training >> step=5419300, episode=904 reward=0.7838956 (506.77 it/sec) -training >> step=5419400, episode=904 reward=0.7831321 (539.15 it/sec) -training >> step=5419500, episode=904 reward=0.8019284 (507.05 it/sec) -training >> step=5419600, episode=904 reward=0.7619623 (499.73 it/sec) -training >> step=5419700, episode=904 reward=0.7753809 (511.67 it/sec) -training >> step=5419800, episode=904 reward=0.7862018 (565.31 it/sec) -training >> step=5419900, episode=904 reward=0.7911972 (501.53 it/sec) -training >> step=5420000, episode=904 reward=0.7798357 (494.22 it/sec) -training >> step=5420100, episode=904 reward=0.772043 (536.02 it/sec) -training >> step=5420200, episode=904 reward=0.7808859 (512.59 it/sec) -training >> step=5420300, episode=904 reward=0.802192 (494.74 it/sec) -training >> step=5420400, episode=904 reward=0.7867762 (476.36 it/sec) -training >> step=5420500, episode=904 reward=0.7650524 (537.13 it/sec) -training >> step=5420600, episode=904 reward=0.765041 (490.71 it/sec) -training >> step=5420700, episode=904 reward=0.7705414 (511.33 it/sec) -training >> step=5420800, episode=904 reward=0.7818074 (454.71 it/sec) -training >> step=5420900, episode=904 reward=0.7808325 (531.60 it/sec) -training >> step=5421000, episode=904 reward=0.8000714 (494.60 it/sec) -training >> step=5421100, episode=904 reward=0.7704575 (502.00 it/sec) -training >> step=5421200, episode=904 reward=0.7866151 (504.55 it/sec) -training >> step=5421300, episode=904 reward=0.7806419 (525.16 it/sec) -training >> step=5421400, episode=904 reward=0.7812716 (501.41 it/sec) -training >> step=5421500, episode=904 reward=0.7799873 (539.36 it/sec) -training >> step=5421600, episode=904 reward=0.7801788 (518.76 it/sec) -training >> step=5421700, episode=904 reward=0.7738844 (530.33 it/sec) -training >> step=5421800, episode=904 reward=0.7850299 (466.13 it/sec) -training >> step=5421900, episode=904 reward=0.7973604 (530.63 it/sec) -training >> step=5422000, episode=904 reward=0.7671993 (538.17 it/sec) -training >> step=5422100, episode=904 reward=0.7827451 (519.29 it/sec) -training >> step=5422200, episode=904 reward=0.7762686 (523.82 it/sec) -training >> step=5422300, episode=904 reward=0.7711405 (522.07 it/sec) -training >> step=5422400, episode=904 reward=0.7781023 (480.48 it/sec) -training >> step=5422500, episode=904 reward=0.7743 (520.62 it/sec) -training >> step=5422600, episode=904 reward=0.798887 (522.73 it/sec) -training >> step=5422700, episode=904 reward=0.7974898 (533.79 it/sec) -training >> step=5422800, episode=904 reward=0.7696646 (497.06 it/sec) -training >> step=5422900, episode=904 reward=0.7895811 (490.26 it/sec) -training >> step=5423000, episode=904 reward=0.7783011 (529.17 it/sec) -training >> step=5423100, episode=904 reward=0.7959379 (514.41 it/sec) -training >> step=5423200, episode=904 reward=0.786067 (365.25 it/sec) -training >> step=5423300, episode=905 reward=0.7824676 (61.31 it/sec) -training >> step=5423400, episode=905 reward=0.7639661 (462.34 it/sec) -training >> step=5423500, episode=905 reward=0.7708247 (493.72 it/sec) -training >> step=5423600, episode=905 reward=0.7952534 (476.33 it/sec) -training >> step=5423700, episode=905 reward=0.7844054 (501.62 it/sec) -training >> step=5423800, episode=905 reward=0.8050931 (414.17 it/sec) -training >> step=5423900, episode=905 reward=0.7712747 (470.83 it/sec) -training >> step=5424000, episode=905 reward=0.7731647 (440.81 it/sec) -training >> step=5424100, episode=905 reward=0.7843213 (429.68 it/sec) -training >> step=5424200, episode=905 reward=0.7843472 (448.87 it/sec) -training >> step=5424300, episode=905 reward=0.7836691 (467.29 it/sec) -training >> step=5424400, episode=905 reward=0.7900943 (451.64 it/sec) -training >> step=5424500, episode=905 reward=0.7930319 (472.12 it/sec) -training >> step=5424600, episode=905 reward=0.7684857 (509.61 it/sec) -training >> step=5424700, episode=905 reward=0.7931019 (470.42 it/sec) -training >> step=5424800, episode=905 reward=0.7638831 (468.49 it/sec) -training >> step=5424900, episode=905 reward=0.7727901 (498.59 it/sec) -training >> step=5425000, episode=905 reward=0.7730216 (445.14 it/sec) -training >> step=5425100, episode=905 reward=0.7829751 (460.68 it/sec) -training >> step=5425200, episode=905 reward=0.7896905 (468.96 it/sec) -training >> step=5425300, episode=905 reward=0.7975187 (510.54 it/sec) -training >> step=5425400, episode=905 reward=0.767126 (450.00 it/sec) -training >> step=5425500, episode=905 reward=0.7793078 (498.23 it/sec) -training >> step=5425600, episode=905 reward=0.760827 (458.42 it/sec) -training >> step=5425700, episode=905 reward=0.7752134 (472.93 it/sec) -training >> step=5425800, episode=905 reward=0.7945282 (485.99 it/sec) -training >> step=5425900, episode=905 reward=0.7632326 (484.69 it/sec) -training >> step=5426000, episode=905 reward=0.7991798 (461.34 it/sec) -training >> step=5426100, episode=905 reward=0.7692307 (433.95 it/sec) -training >> step=5426200, episode=905 reward=0.7691594 (424.06 it/sec) -training >> step=5426300, episode=905 reward=0.7981755 (508.48 it/sec) -training >> step=5426400, episode=905 reward=0.778416 (460.22 it/sec) -training >> step=5426500, episode=905 reward=0.7798111 (461.11 it/sec) -training >> step=5426600, episode=905 reward=0.777497 (474.38 it/sec) -training >> step=5426700, episode=905 reward=0.7928868 (460.84 it/sec) -training >> step=5426800, episode=905 reward=0.7775834 (463.83 it/sec) -training >> step=5426900, episode=905 reward=0.7778904 (471.36 it/sec) -training >> step=5427000, episode=905 reward=0.7785784 (516.07 it/sec) -training >> step=5427100, episode=905 reward=0.7694052 (415.12 it/sec) -training >> step=5427200, episode=905 reward=0.7987043 (457.03 it/sec) -training >> step=5427300, episode=905 reward=0.7785095 (498.76 it/sec) -training >> step=5427400, episode=905 reward=0.7860152 (486.13 it/sec) -training >> step=5427500, episode=905 reward=0.7993276 (447.49 it/sec) -training >> step=5427600, episode=905 reward=0.7825689 (448.75 it/sec) -training >> step=5427700, episode=905 reward=0.7846378 (423.88 it/sec) -training >> step=5427800, episode=905 reward=0.7594745 (455.16 it/sec) -training >> step=5427900, episode=905 reward=0.7930141 (491.67 it/sec) -training >> step=5428000, episode=905 reward=0.7948809 (476.01 it/sec) -training >> step=5428100, episode=905 reward=0.7791635 (464.75 it/sec) -training >> step=5428200, episode=905 reward=0.7526775 (481.84 it/sec) -training >> step=5428300, episode=905 reward=0.7681302 (464.22 it/sec) -training >> step=5428400, episode=905 reward=0.8072382 (495.79 it/sec) -training >> step=5428500, episode=905 reward=0.7670765 (482.95 it/sec) -training >> step=5428600, episode=905 reward=0.7695669 (467.82 it/sec) -training >> step=5428700, episode=905 reward=0.7750123 (485.60 it/sec) -training >> step=5428800, episode=905 reward=0.7680137 (472.80 it/sec) -training >> step=5428900, episode=905 reward=0.7729617 (509.73 it/sec) -training >> step=5429000, episode=905 reward=0.7659611 (495.28 it/sec) -training >> step=5429100, episode=905 reward=0.7748743 (489.82 it/sec) -training >> step=5429200, episode=905 reward=0.7948167 (509.71 it/sec) -training >> step=5429300, episode=906 reward=0.7779962 (116.81 it/sec) -training >> step=5429400, episode=906 reward=0.7684894 (489.20 it/sec) -training >> step=5429500, episode=906 reward=0.7868235 (493.36 it/sec) -training >> step=5429600, episode=906 reward=0.7758543 (487.22 it/sec) -training >> step=5429700, episode=906 reward=0.7972332 (511.72 it/sec) -training >> step=5429800, episode=906 reward=0.7997618 (506.11 it/sec) -training >> step=5429900, episode=906 reward=0.7781672 (504.21 it/sec) -training >> step=5430000, episode=906 reward=0.7836455 (504.97 it/sec) -training >> step=5430100, episode=906 reward=0.7889223 (483.22 it/sec) -training >> step=5430200, episode=906 reward=0.7722455 (532.73 it/sec) -training >> step=5430300, episode=906 reward=0.7860371 (481.47 it/sec) -training >> step=5430400, episode=906 reward=0.7769194 (468.14 it/sec) -training >> step=5430500, episode=906 reward=0.7925789 (505.71 it/sec) -training >> step=5430600, episode=906 reward=0.7829677 (519.54 it/sec) -training >> step=5430700, episode=906 reward=0.7872297 (499.03 it/sec) -training >> step=5430800, episode=906 reward=0.7858571 (465.72 it/sec) -training >> step=5430900, episode=906 reward=0.779899 (523.94 it/sec) -training >> step=5431000, episode=906 reward=0.793275 (501.77 it/sec) -training >> step=5431100, episode=906 reward=0.7706346 (506.33 it/sec) -training >> step=5431200, episode=906 reward=0.7983999 (512.85 it/sec) -training >> step=5431300, episode=906 reward=0.7956535 (493.31 it/sec) -training >> step=5431400, episode=906 reward=0.7899767 (446.19 it/sec) -training >> step=5431500, episode=906 reward=0.7814065 (502.23 it/sec) -training >> step=5431600, episode=906 reward=0.7677178 (499.62 it/sec) -training >> step=5431700, episode=906 reward=0.8009159 (532.15 it/sec) -training >> step=5431800, episode=906 reward=0.7802143 (483.96 it/sec) -training >> step=5431900, episode=906 reward=0.783721 (512.07 it/sec) -training >> step=5432000, episode=906 reward=0.7631918 (495.34 it/sec) -training >> step=5432100, episode=906 reward=0.7764111 (473.17 it/sec) -training >> step=5432200, episode=906 reward=0.7773098 (501.56 it/sec) -training >> step=5432300, episode=906 reward=0.7953226 (469.37 it/sec) -training >> step=5432400, episode=906 reward=0.7820271 (489.54 it/sec) -training >> step=5432500, episode=906 reward=0.8148388 (449.24 it/sec) -training >> step=5432600, episode=906 reward=0.7809768 (495.66 it/sec) -training >> step=5432700, episode=906 reward=0.7738209 (454.97 it/sec) -training >> step=5432800, episode=906 reward=0.76771 (483.13 it/sec) -training >> step=5432900, episode=906 reward=0.8002654 (502.02 it/sec) -training >> step=5433000, episode=906 reward=0.7958105 (476.99 it/sec) -training >> step=5433100, episode=906 reward=0.7850915 (509.77 it/sec) -training >> step=5433200, episode=906 reward=0.7770135 (479.20 it/sec) -training >> step=5433300, episode=906 reward=0.7925295 (494.52 it/sec) -training >> step=5433400, episode=906 reward=0.7908883 (501.79 it/sec) -training >> step=5433500, episode=906 reward=0.7694386 (449.99 it/sec) -training >> step=5433600, episode=906 reward=0.7706196 (495.78 it/sec) -training >> step=5433700, episode=906 reward=0.7759286 (487.79 it/sec) -training >> step=5433800, episode=906 reward=0.7722533 (499.59 it/sec) -training >> step=5433900, episode=906 reward=0.802538 (482.52 it/sec) -training >> step=5434000, episode=906 reward=0.7688105 (453.24 it/sec) -training >> step=5434100, episode=906 reward=0.7782931 (461.79 it/sec) -training >> step=5434200, episode=906 reward=0.7689965 (523.90 it/sec) -training >> step=5434300, episode=906 reward=0.7926814 (432.12 it/sec) -training >> step=5434400, episode=906 reward=0.7574495 (451.25 it/sec) -training >> step=5434500, episode=906 reward=0.771768 (463.16 it/sec) -training >> step=5434600, episode=906 reward=0.7794654 (512.70 it/sec) -training >> step=5434700, episode=906 reward=0.766621 (507.51 it/sec) -training >> step=5434800, episode=906 reward=0.7987501 (465.94 it/sec) -training >> step=5434900, episode=906 reward=0.7800401 (489.44 it/sec) -training >> step=5435000, episode=906 reward=0.7535648 (479.16 it/sec) -training >> step=5435100, episode=906 reward=0.7818971 (480.45 it/sec) -training >> step=5435200, episode=906 reward=0.7691478 (500.57 it/sec) -training >> step=5435300, episode=907 reward=0.7694773 (84.39 it/sec) -training >> step=5435400, episode=907 reward=0.7649935 (444.23 it/sec) -training >> step=5435500, episode=907 reward=0.7832503 (492.49 it/sec) -training >> step=5435600, episode=907 reward=0.7619887 (502.88 it/sec) -training >> step=5435700, episode=907 reward=0.7853098 (496.60 it/sec) -training >> step=5435800, episode=907 reward=0.8054407 (482.96 it/sec) -training >> step=5435900, episode=907 reward=0.811998 (524.29 it/sec) -training >> step=5436000, episode=907 reward=0.7733814 (489.98 it/sec) -training >> step=5436100, episode=907 reward=0.7955258 (499.81 it/sec) -training >> step=5436200, episode=907 reward=0.775843 (456.64 it/sec) -training >> step=5436300, episode=907 reward=0.7668122 (478.98 it/sec) -training >> step=5436400, episode=907 reward=0.7746649 (486.90 it/sec) -training >> step=5436500, episode=907 reward=0.7756609 (501.60 it/sec) -training >> step=5436600, episode=907 reward=0.7777197 (526.74 it/sec) -training >> step=5436700, episode=907 reward=0.7810254 (479.34 it/sec) -training >> step=5436800, episode=907 reward=0.7723569 (465.71 it/sec) -training >> step=5436900, episode=907 reward=0.774755 (510.88 it/sec) -training >> step=5437000, episode=907 reward=0.7889403 (511.35 it/sec) -training >> step=5437100, episode=907 reward=0.783839 (454.79 it/sec) -training >> step=5437200, episode=907 reward=0.7848547 (513.35 it/sec) -training >> step=5437300, episode=907 reward=0.7694119 (510.41 it/sec) -training >> step=5437400, episode=907 reward=0.7635704 (417.74 it/sec) -training >> step=5437500, episode=907 reward=0.7935143 (503.21 it/sec) -training >> step=5437600, episode=907 reward=0.7587209 (486.69 it/sec) -training >> step=5437700, episode=907 reward=0.78998 (529.35 it/sec) -training >> step=5437800, episode=907 reward=0.7722724 (483.26 it/sec) -training >> step=5437900, episode=907 reward=0.7873999 (454.08 it/sec) -training >> step=5438000, episode=907 reward=0.7865496 (524.75 it/sec) -training >> step=5438100, episode=907 reward=0.765908 (494.10 it/sec) -training >> step=5438200, episode=907 reward=0.7498209 (511.45 it/sec) -training >> step=5438300, episode=907 reward=0.7721684 (501.53 it/sec) -training >> step=5438400, episode=907 reward=0.758396 (486.34 it/sec) -training >> step=5438500, episode=907 reward=0.7998446 (465.22 it/sec) -training >> step=5438600, episode=907 reward=0.800369 (505.03 it/sec) -training >> step=5438700, episode=907 reward=0.7953221 (441.43 it/sec) -training >> step=5438800, episode=907 reward=0.7997584 (436.11 it/sec) -training >> step=5438900, episode=907 reward=0.7688459 (472.73 it/sec) -training >> step=5439000, episode=907 reward=0.7681562 (498.81 it/sec) -training >> step=5439100, episode=907 reward=0.7961814 (520.21 it/sec) -training >> step=5439200, episode=907 reward=0.796123 (506.40 it/sec) -training >> step=5439300, episode=907 reward=0.779016 (503.18 it/sec) -training >> step=5439400, episode=907 reward=0.777294 (484.83 it/sec) -training >> step=5439500, episode=907 reward=0.7989026 (493.31 it/sec) -training >> step=5439600, episode=907 reward=0.7805395 (494.15 it/sec) -training >> step=5439700, episode=907 reward=0.7836123 (507.30 it/sec) -training >> step=5439800, episode=907 reward=0.7768475 (526.37 it/sec) -training >> step=5439900, episode=907 reward=0.7846606 (466.89 it/sec) -training >> step=5440000, episode=907 reward=0.7539446 (505.12 it/sec) -training >> step=5440100, episode=907 reward=0.7765436 (493.71 it/sec) -training >> step=5440200, episode=907 reward=0.7651327 (523.40 it/sec) -training >> step=5440300, episode=907 reward=0.7753623 (485.77 it/sec) -training >> step=5440400, episode=907 reward=0.7717037 (501.20 it/sec) -training >> step=5440500, episode=907 reward=0.7826108 (496.66 it/sec) -training >> step=5440600, episode=907 reward=0.7773662 (481.82 it/sec) -training >> step=5440700, episode=907 reward=0.7767518 (505.61 it/sec) -training >> step=5440800, episode=907 reward=0.7711689 (491.61 it/sec) -training >> step=5440900, episode=907 reward=0.7991846 (519.25 it/sec) -training >> step=5441000, episode=907 reward=0.7829164 (481.18 it/sec) -training >> step=5441100, episode=907 reward=0.7668393 (491.03 it/sec) -training >> step=5441200, episode=907 reward=0.7485604 (473.62 it/sec) -training >> step=5441300, episode=908 reward=0.769175 (100.57 it/sec) -training >> step=5441400, episode=908 reward=0.7638348 (467.70 it/sec) -training >> step=5441500, episode=908 reward=0.7705982 (496.26 it/sec) -training >> step=5441600, episode=908 reward=0.760891 (485.92 it/sec) -training >> step=5441700, episode=908 reward=0.7754318 (497.03 it/sec) -training >> step=5441800, episode=908 reward=0.8040047 (467.19 it/sec) -training >> step=5441900, episode=908 reward=0.7861072 (483.01 it/sec) -training >> step=5442000, episode=908 reward=0.7849426 (492.51 it/sec) -training >> step=5442100, episode=908 reward=0.7827806 (493.74 it/sec) -training >> step=5442200, episode=908 reward=0.7812502 (505.24 it/sec) -training >> step=5442300, episode=908 reward=0.7842404 (512.49 it/sec) -training >> step=5442400, episode=908 reward=0.8022403 (505.43 it/sec) -training >> step=5442500, episode=908 reward=0.7763862 (483.33 it/sec) -training >> step=5442600, episode=908 reward=0.7780962 (517.28 it/sec) -training >> step=5442700, episode=908 reward=0.7770137 (467.76 it/sec) -training >> step=5442800, episode=908 reward=0.7643133 (493.61 it/sec) -training >> step=5442900, episode=908 reward=0.8035527 (503.18 it/sec) -training >> step=5443000, episode=908 reward=0.7874779 (513.73 it/sec) -training >> step=5443100, episode=908 reward=0.7630988 (496.45 it/sec) -training >> step=5443200, episode=908 reward=0.784005 (477.31 it/sec) -training >> step=5443300, episode=908 reward=0.7736527 (496.64 it/sec) -training >> step=5443400, episode=908 reward=0.7888923 (468.76 it/sec) -training >> step=5443500, episode=908 reward=0.7738976 (494.85 it/sec) -training >> step=5443600, episode=908 reward=0.7801142 (487.76 it/sec) -training >> step=5443700, episode=908 reward=0.795582 (452.75 it/sec) -training >> step=5443800, episode=908 reward=0.7855061 (503.36 it/sec) -training >> step=5443900, episode=908 reward=0.780829 (517.93 it/sec) -training >> step=5444000, episode=908 reward=0.8032789 (487.18 it/sec) -training >> step=5444100, episode=908 reward=0.7819716 (493.60 it/sec) -training >> step=5444200, episode=908 reward=0.813906 (511.62 it/sec) -training >> step=5444300, episode=908 reward=0.7797569 (488.42 it/sec) -training >> step=5444400, episode=908 reward=0.8008752 (488.49 it/sec) -training >> step=5444500, episode=908 reward=0.789164 (481.08 it/sec) -training >> step=5444600, episode=908 reward=0.7728779 (475.70 it/sec) -training >> step=5444700, episode=908 reward=0.7699917 (427.74 it/sec) -training >> step=5444800, episode=908 reward=0.7805671 (455.90 it/sec) -training >> step=5444900, episode=908 reward=0.8003808 (521.19 it/sec) -training >> step=5445000, episode=908 reward=0.7828346 (483.92 it/sec) -training >> step=5445100, episode=908 reward=0.7713127 (500.35 it/sec) -training >> step=5445200, episode=908 reward=0.7719925 (471.37 it/sec) -training >> step=5445300, episode=908 reward=0.7796635 (510.47 it/sec) -training >> step=5445400, episode=908 reward=0.7779554 (512.72 it/sec) -training >> step=5445500, episode=908 reward=0.7936062 (498.80 it/sec) -training >> step=5445600, episode=908 reward=0.7940793 (497.20 it/sec) -training >> step=5445700, episode=908 reward=0.7769055 (468.92 it/sec) -training >> step=5445800, episode=908 reward=0.7812892 (487.92 it/sec) -training >> step=5445900, episode=908 reward=0.7866802 (473.87 it/sec) -training >> step=5446000, episode=908 reward=0.7667878 (476.97 it/sec) -training >> step=5446100, episode=908 reward=0.7923262 (489.73 it/sec) -training >> step=5446200, episode=908 reward=0.7682707 (490.52 it/sec) -training >> step=5446300, episode=908 reward=0.7666008 (513.36 it/sec) -training >> step=5446400, episode=908 reward=0.778302 (464.65 it/sec) -training >> step=5446500, episode=908 reward=0.7817785 (510.82 it/sec) -training >> step=5446600, episode=908 reward=0.7635758 (497.94 it/sec) -training >> step=5446700, episode=908 reward=0.7848997 (463.65 it/sec) -training >> step=5446800, episode=908 reward=0.7664637 (475.63 it/sec) -training >> step=5446900, episode=908 reward=0.7618498 (533.50 it/sec) -training >> step=5447000, episode=908 reward=0.7753824 (427.74 it/sec) -training >> step=5447100, episode=908 reward=0.7736247 (507.66 it/sec) -training >> step=5447200, episode=908 reward=0.7635797 (532.82 it/sec) -training >> step=5447300, episode=909 reward=0.7729453 (96.10 it/sec) -training >> step=5447400, episode=909 reward=0.7833956 (486.24 it/sec) -training >> step=5447500, episode=909 reward=0.7769133 (483.75 it/sec) -training >> step=5447600, episode=909 reward=0.7846143 (501.92 it/sec) -training >> step=5447700, episode=909 reward=0.7856296 (490.09 it/sec) -training >> step=5447800, episode=909 reward=0.7814788 (462.78 it/sec) -training >> step=5447900, episode=909 reward=0.7979648 (478.33 it/sec) -training >> step=5448000, episode=909 reward=0.7761724 (527.78 it/sec) -training >> step=5448100, episode=909 reward=0.7947494 (485.64 it/sec) -training >> step=5448200, episode=909 reward=0.7906245 (487.08 it/sec) -training >> step=5448300, episode=909 reward=0.7893962 (512.83 it/sec) -training >> step=5448400, episode=909 reward=0.7969893 (466.42 it/sec) -training >> step=5448500, episode=909 reward=0.7739617 (515.88 it/sec) -training >> step=5448600, episode=909 reward=0.780131 (503.05 it/sec) -training >> step=5448700, episode=909 reward=0.7758822 (523.17 it/sec) -training >> step=5448800, episode=909 reward=0.7982218 (463.16 it/sec) -training >> step=5448900, episode=909 reward=0.7691969 (507.35 it/sec) -training >> step=5449000, episode=909 reward=0.7731784 (504.93 it/sec) -training >> step=5449100, episode=909 reward=0.7981921 (509.25 it/sec) -training >> step=5449200, episode=909 reward=0.7653434 (481.47 it/sec) -training >> step=5449300, episode=909 reward=0.7765893 (479.09 it/sec) -training >> step=5449400, episode=909 reward=0.7832749 (448.31 it/sec) -training >> step=5449500, episode=909 reward=0.7857615 (495.53 it/sec) -training >> step=5449600, episode=909 reward=0.789762 (490.93 it/sec) -training >> step=5449700, episode=909 reward=0.7579276 (477.07 it/sec) -training >> step=5449800, episode=909 reward=0.7965629 (483.99 it/sec) -training >> step=5449900, episode=909 reward=0.7854788 (509.85 it/sec) -training >> step=5450000, episode=909 reward=0.7847254 (527.26 it/sec) -training >> step=5450100, episode=909 reward=0.7933717 (493.27 it/sec) -training >> step=5450200, episode=909 reward=0.7830023 (420.55 it/sec) -training >> step=5450300, episode=909 reward=0.7816662 (456.18 it/sec) -training >> step=5450400, episode=909 reward=0.7876756 (480.16 it/sec) -training >> step=5450500, episode=909 reward=0.7966874 (477.67 it/sec) -training >> step=5450600, episode=909 reward=0.7829385 (491.39 it/sec) -training >> step=5450700, episode=909 reward=0.818801 (509.06 it/sec) -training >> step=5450800, episode=909 reward=0.767 (497.45 it/sec) -training >> step=5450900, episode=909 reward=0.7675517 (482.02 it/sec) -training >> step=5451000, episode=909 reward=0.7699652 (518.73 it/sec) -training >> step=5451100, episode=909 reward=0.7897817 (514.78 it/sec) -training >> step=5451200, episode=909 reward=0.7871307 (452.90 it/sec) -training >> step=5451300, episode=909 reward=0.7800979 (478.36 it/sec) -training >> step=5451400, episode=909 reward=0.7627517 (476.10 it/sec) -training >> step=5451500, episode=909 reward=0.7873127 (553.55 it/sec) -training >> step=5451600, episode=909 reward=0.785297 (509.52 it/sec) -training >> step=5451700, episode=909 reward=0.784752 (497.75 it/sec) -training >> step=5451800, episode=909 reward=0.7546785 (502.76 it/sec) -training >> step=5451900, episode=909 reward=0.7669497 (499.17 it/sec) -training >> step=5452000, episode=909 reward=0.7869834 (476.71 it/sec) -training >> step=5452100, episode=909 reward=0.7825569 (515.25 it/sec) -training >> step=5452200, episode=909 reward=0.7628338 (496.12 it/sec) -training >> step=5452300, episode=909 reward=0.7741826 (482.90 it/sec) -training >> step=5452400, episode=909 reward=0.7807544 (501.01 it/sec) -training >> step=5452500, episode=909 reward=0.7730953 (530.48 it/sec) -training >> step=5452600, episode=909 reward=0.7924333 (495.69 it/sec) -training >> step=5452700, episode=909 reward=0.7822403 (467.33 it/sec) -training >> step=5452800, episode=909 reward=0.7758048 (451.00 it/sec) -training >> step=5452900, episode=909 reward=0.7931917 (505.18 it/sec) -training >> step=5453000, episode=909 reward=0.7792922 (503.30 it/sec) -training >> step=5453100, episode=909 reward=0.7820129 (503.43 it/sec) -training >> step=5453200, episode=909 reward=0.7708929 (479.00 it/sec) -training >> step=5453300, episode=910 reward=0.7543637 (106.26 it/sec) -training >> step=5453400, episode=910 reward=0.769008 (466.41 it/sec) -training >> step=5453500, episode=910 reward=0.7821288 (485.24 it/sec) -training >> step=5453600, episode=910 reward=0.7639282 (526.69 it/sec) -training >> step=5453700, episode=910 reward=0.7933474 (475.90 it/sec) -training >> step=5453800, episode=910 reward=0.7823851 (469.22 it/sec) -training >> step=5453900, episode=910 reward=0.7764344 (472.31 it/sec) -training >> step=5454000, episode=910 reward=0.7893922 (447.41 it/sec) -training >> step=5454100, episode=910 reward=0.7793779 (505.29 it/sec) -training >> step=5454200, episode=910 reward=0.7720957 (517.87 it/sec) -training >> step=5454300, episode=910 reward=0.7833341 (503.60 it/sec) -training >> step=5454400, episode=910 reward=0.778626 (468.30 it/sec) -training >> step=5454500, episode=910 reward=0.7940023 (498.72 it/sec) -training >> step=5454600, episode=910 reward=0.7855421 (470.89 it/sec) -training >> step=5454700, episode=910 reward=0.7542351 (486.87 it/sec) -training >> step=5454800, episode=910 reward=0.7792208 (486.69 it/sec) -training >> step=5454900, episode=910 reward=0.7540219 (486.21 it/sec) -training >> step=5455000, episode=910 reward=0.7941308 (543.79 it/sec) -training >> step=5455100, episode=910 reward=0.7872461 (474.04 it/sec) -training >> step=5455200, episode=910 reward=0.7811173 (461.24 it/sec) -training >> step=5455300, episode=910 reward=0.7881084 (519.89 it/sec) -training >> step=5455400, episode=910 reward=0.7767847 (477.81 it/sec) -training >> step=5455500, episode=910 reward=0.7887017 (478.63 it/sec) -training >> step=5455600, episode=910 reward=0.7710375 (494.11 it/sec) -training >> step=5455700, episode=910 reward=0.7882249 (534.64 it/sec) -training >> step=5455800, episode=910 reward=0.7803681 (495.75 it/sec) -training >> step=5455900, episode=910 reward=0.7918044 (456.02 it/sec) -training >> step=5456000, episode=910 reward=0.7949212 (490.99 it/sec) -training >> step=5456100, episode=910 reward=0.7835616 (486.23 it/sec) -training >> step=5456200, episode=910 reward=0.7604876 (509.43 it/sec) -training >> step=5456300, episode=910 reward=0.7752666 (477.83 it/sec) -training >> step=5456400, episode=910 reward=0.778156 (508.68 it/sec) -training >> step=5456500, episode=910 reward=0.7757539 (455.62 it/sec) -training >> step=5456600, episode=910 reward=0.7839116 (487.98 it/sec) -training >> step=5456700, episode=910 reward=0.8022697 (512.26 it/sec) -training >> step=5456800, episode=910 reward=0.7916286 (462.04 it/sec) -training >> step=5456900, episode=910 reward=0.7808002 (448.31 it/sec) -training >> step=5457000, episode=910 reward=0.7884704 (503.58 it/sec) -training >> step=5457100, episode=910 reward=0.7745839 (486.30 it/sec) -training >> step=5457200, episode=910 reward=0.7870691 (500.12 it/sec) -training >> step=5457300, episode=910 reward=0.7995206 (465.88 it/sec) -training >> step=5457400, episode=910 reward=0.7928454 (462.84 it/sec) -training >> step=5457500, episode=910 reward=0.7953326 (498.04 it/sec) -training >> step=5457600, episode=910 reward=0.7755691 (485.40 it/sec) -training >> step=5457700, episode=910 reward=0.7822328 (487.76 it/sec) -training >> step=5457800, episode=910 reward=0.7894889 (518.89 it/sec) -training >> step=5457900, episode=910 reward=0.7750068 (481.42 it/sec) -training >> step=5458000, episode=910 reward=0.7859181 (492.80 it/sec) -training >> step=5458100, episode=910 reward=0.7679638 (478.63 it/sec) -training >> step=5458200, episode=910 reward=0.7603161 (546.05 it/sec) -training >> step=5458300, episode=910 reward=0.7705847 (461.72 it/sec) -training >> step=5458400, episode=910 reward=0.7806954 (481.88 it/sec) -training >> step=5458500, episode=910 reward=0.7841274 (505.88 it/sec) -training >> step=5458600, episode=910 reward=0.7620317 (537.01 it/sec) -training >> step=5458700, episode=910 reward=0.7994058 (495.53 it/sec) -training >> step=5458800, episode=910 reward=0.7874151 (461.23 it/sec) -training >> step=5458900, episode=910 reward=0.7699568 (515.20 it/sec) -training >> step=5459000, episode=910 reward=0.7591535 (483.55 it/sec) -training >> step=5459100, episode=910 reward=0.7825807 (511.89 it/sec) -training >> step=5459200, episode=910 reward=0.7688289 (497.83 it/sec) -training >> step=5459300, episode=911 reward=0.7877 (116.28 it/sec) -training >> step=5459400, episode=911 reward=0.7848324 (506.86 it/sec) -training >> step=5459500, episode=911 reward=0.7737572 (480.87 it/sec) -training >> step=5459600, episode=911 reward=0.7723699 (450.95 it/sec) -training >> step=5459700, episode=911 reward=0.7925515 (501.92 it/sec) -training >> step=5459800, episode=911 reward=0.7672482 (520.25 it/sec) -training >> step=5459900, episode=911 reward=0.7802295 (513.65 it/sec) -training >> step=5460000, episode=911 reward=0.7983521 (471.68 it/sec) -training >> step=5460100, episode=911 reward=0.754757 (512.82 it/sec) -training >> step=5460200, episode=911 reward=0.7742875 (481.75 it/sec) -training >> step=5460300, episode=911 reward=0.7845351 (526.48 it/sec) -training >> step=5460400, episode=911 reward=0.7914612 (498.07 it/sec) -training >> step=5460500, episode=911 reward=0.7796323 (438.60 it/sec) -training >> step=5460600, episode=911 reward=0.7902991 (478.05 it/sec) -training >> step=5460700, episode=911 reward=0.7830018 (485.42 it/sec) -training >> step=5460800, episode=911 reward=0.7621811 (485.20 it/sec) -training >> step=5460900, episode=911 reward=0.780574 (492.54 it/sec) -training >> step=5461000, episode=911 reward=0.799988 (502.58 it/sec) -training >> step=5461100, episode=911 reward=0.7746803 (489.65 it/sec) -training >> step=5461200, episode=911 reward=0.7864829 (453.44 it/sec) -training >> step=5461300, episode=911 reward=0.7778162 (478.47 it/sec) -training >> step=5461400, episode=911 reward=0.7796658 (528.79 it/sec) -training >> step=5461500, episode=911 reward=0.7848867 (501.65 it/sec) -training >> step=5461600, episode=911 reward=0.7849508 (489.01 it/sec) -training >> step=5461700, episode=911 reward=0.7757318 (465.48 it/sec) -training >> step=5461800, episode=911 reward=0.7832147 (483.91 it/sec) -training >> step=5461900, episode=911 reward=0.7935278 (488.12 it/sec) -training >> step=5462000, episode=911 reward=0.8046374 (466.21 it/sec) -training >> step=5462100, episode=911 reward=0.7831394 (486.74 it/sec) -training >> step=5462200, episode=911 reward=0.7896995 (512.05 it/sec) -training >> step=5462300, episode=911 reward=0.7607258 (481.22 it/sec) -training >> step=5462400, episode=911 reward=0.7765504 (509.76 it/sec) -training >> step=5462500, episode=911 reward=0.7889857 (529.73 it/sec) -training >> step=5462600, episode=911 reward=0.7891923 (443.85 it/sec) -training >> step=5462700, episode=911 reward=0.7770746 (477.99 it/sec) -training >> step=5462800, episode=911 reward=0.7905641 (514.96 it/sec) -training >> step=5462900, episode=911 reward=0.7693733 (541.49 it/sec) -training >> step=5463000, episode=911 reward=0.7851143 (523.92 it/sec) -training >> step=5463100, episode=911 reward=0.7943138 (438.55 it/sec) -training >> step=5463200, episode=911 reward=0.7689523 (509.16 it/sec) -training >> step=5463300, episode=911 reward=0.7659323 (502.20 it/sec) -training >> step=5463400, episode=911 reward=0.7829797 (472.89 it/sec) -training >> step=5463500, episode=911 reward=0.765682 (507.18 it/sec) -training >> step=5463600, episode=911 reward=0.7814001 (495.77 it/sec) -training >> step=5463700, episode=911 reward=0.7874089 (501.98 it/sec) -training >> step=5463800, episode=911 reward=0.795018 (484.63 it/sec) -training >> step=5463900, episode=911 reward=0.777867 (518.51 it/sec) -training >> step=5464000, episode=911 reward=0.7752644 (533.74 it/sec) -training >> step=5464100, episode=911 reward=0.8060692 (455.78 it/sec) -training >> step=5464200, episode=911 reward=0.7797787 (468.25 it/sec) -training >> step=5464300, episode=911 reward=0.7820628 (540.29 it/sec) -training >> step=5464400, episode=911 reward=0.7813129 (509.36 it/sec) -training >> step=5464500, episode=911 reward=0.7954661 (509.61 it/sec) -training >> step=5464600, episode=911 reward=0.7741424 (435.24 it/sec) -training >> step=5464700, episode=911 reward=0.7702971 (487.21 it/sec) -training >> step=5464800, episode=911 reward=0.7749254 (479.74 it/sec) -training >> step=5464900, episode=911 reward=0.7843897 (465.17 it/sec) -training >> step=5465000, episode=911 reward=0.7852637 (478.27 it/sec) -training >> step=5465100, episode=911 reward=0.7783541 (413.41 it/sec) -training >> step=5465200, episode=911 reward=0.7708749 (472.95 it/sec) -training >> step=5465300, episode=912 reward=0.7907994 (94.83 it/sec) -training >> step=5465400, episode=912 reward=0.7792066 (499.26 it/sec) -training >> step=5465500, episode=912 reward=0.7914045 (557.19 it/sec) -training >> step=5465600, episode=912 reward=0.7758523 (524.99 it/sec) -training >> step=5465700, episode=912 reward=0.7824566 (481.22 it/sec) -training >> step=5465800, episode=912 reward=0.8004048 (568.90 it/sec) -training >> step=5465900, episode=912 reward=0.7977335 (537.18 it/sec) -training >> step=5466000, episode=912 reward=0.7945259 (547.52 it/sec) -training >> step=5466100, episode=912 reward=0.7920575 (552.07 it/sec) -training >> step=5466200, episode=912 reward=0.7774737 (499.15 it/sec) -training >> step=5466300, episode=912 reward=0.8034803 (514.12 it/sec) -training >> step=5466400, episode=912 reward=0.78819 (530.78 it/sec) -training >> step=5466500, episode=912 reward=0.8066728 (502.63 it/sec) -training >> step=5466600, episode=912 reward=0.7870342 (493.66 it/sec) -training >> step=5466700, episode=912 reward=0.7854288 (473.66 it/sec) -training >> step=5466800, episode=912 reward=0.7660941 (518.63 it/sec) -training >> step=5466900, episode=912 reward=0.7897483 (469.16 it/sec) -training >> step=5467000, episode=912 reward=0.7764971 (546.08 it/sec) -training >> step=5467100, episode=912 reward=0.7816863 (542.11 it/sec) -training >> step=5467200, episode=912 reward=0.771049 (543.63 it/sec) -training >> step=5467300, episode=912 reward=0.783768 (493.55 it/sec) -training >> step=5467400, episode=912 reward=0.7933447 (533.02 it/sec) -training >> step=5467500, episode=912 reward=0.7770715 (550.25 it/sec) -training >> step=5467600, episode=912 reward=0.784934 (556.39 it/sec) -training >> step=5467700, episode=912 reward=0.785139 (518.54 it/sec) -training >> step=5467800, episode=912 reward=0.7553442 (454.44 it/sec) -training >> step=5467900, episode=912 reward=0.7648264 (511.71 it/sec) -training >> step=5468000, episode=912 reward=0.7933934 (546.29 it/sec) -training >> step=5468100, episode=912 reward=0.779272 (502.31 it/sec) -training >> step=5468200, episode=912 reward=0.7954064 (467.46 it/sec) -training >> step=5468300, episode=912 reward=0.7807912 (476.40 it/sec) -training >> step=5468400, episode=912 reward=0.8019485 (447.23 it/sec) -training >> step=5468500, episode=912 reward=0.7970201 (504.10 it/sec) -training >> step=5468600, episode=912 reward=0.7609786 (512.19 it/sec) -training >> step=5468700, episode=912 reward=0.7900575 (531.84 it/sec) -training >> step=5468800, episode=912 reward=0.764704 (360.82 it/sec) -training >> step=5468900, episode=912 reward=0.7845343 (451.64 it/sec) -training >> step=5469000, episode=912 reward=0.7783437 (438.53 it/sec) -training >> step=5469100, episode=912 reward=0.7787074 (474.81 it/sec) -training >> step=5469200, episode=912 reward=0.7846931 (472.95 it/sec) -training >> step=5469300, episode=912 reward=0.7907234 (475.59 it/sec) -training >> step=5469400, episode=912 reward=0.7773011 (482.50 it/sec) -training >> step=5469500, episode=912 reward=0.7858152 (475.89 it/sec) -training >> step=5469600, episode=912 reward=0.7955505 (470.46 it/sec) -training >> step=5469700, episode=912 reward=0.7999552 (509.46 it/sec) -training >> step=5469800, episode=912 reward=0.7912494 (454.98 it/sec) -training >> step=5469900, episode=912 reward=0.784532 (492.42 it/sec) -training >> step=5470000, episode=912 reward=0.7614267 (515.41 it/sec) -training >> step=5470100, episode=912 reward=0.7652628 (519.73 it/sec) -training >> step=5470200, episode=912 reward=0.7766705 (498.10 it/sec) -training >> step=5470300, episode=912 reward=0.7814949 (493.02 it/sec) -training >> step=5470400, episode=912 reward=0.7887749 (477.36 it/sec) -training >> step=5470500, episode=912 reward=0.7791086 (443.27 it/sec) -training >> step=5470600, episode=912 reward=0.7787644 (501.30 it/sec) -training >> step=5470700, episode=912 reward=0.7791903 (514.60 it/sec) -training >> step=5470800, episode=912 reward=0.7786347 (470.11 it/sec) -training >> step=5470900, episode=912 reward=0.784081 (475.30 it/sec) -training >> step=5471000, episode=912 reward=0.7733294 (511.17 it/sec) -training >> step=5471100, episode=912 reward=0.7934355 (486.03 it/sec) -training >> step=5471200, episode=912 reward=0.787801 (494.37 it/sec) -training >> step=5471300, episode=913 reward=0.7914359 (64.97 it/sec) -training >> step=5471400, episode=913 reward=0.7938089 (476.62 it/sec) -training >> step=5471500, episode=913 reward=0.7801346 (436.81 it/sec) -training >> step=5471600, episode=913 reward=0.7835217 (441.81 it/sec) -training >> step=5471700, episode=913 reward=0.7950843 (458.19 it/sec) -training >> step=5471800, episode=913 reward=0.8009673 (440.53 it/sec) -training >> step=5471900, episode=913 reward=0.7989101 (469.55 it/sec) -training >> step=5472000, episode=913 reward=0.7817602 (500.07 it/sec) -training >> step=5472100, episode=913 reward=0.8027214 (436.53 it/sec) -training >> step=5472200, episode=913 reward=0.803834 (466.92 it/sec) -training >> step=5472300, episode=913 reward=0.7910247 (502.46 it/sec) -training >> step=5472400, episode=913 reward=0.7804496 (445.36 it/sec) -training >> step=5472500, episode=913 reward=0.7895637 (454.02 it/sec) -training >> step=5472600, episode=913 reward=0.7744409 (452.01 it/sec) -training >> step=5472700, episode=913 reward=0.7549229 (466.62 it/sec) -training >> step=5472800, episode=913 reward=0.7720346 (455.10 it/sec) -training >> step=5472900, episode=913 reward=0.7750116 (500.78 it/sec) -training >> step=5473000, episode=913 reward=0.7859616 (462.17 it/sec) -training >> step=5473100, episode=913 reward=0.7832009 (408.70 it/sec) -training >> step=5473200, episode=913 reward=0.7786928 (426.14 it/sec) -training >> step=5473300, episode=913 reward=0.7763543 (512.59 it/sec) -training >> step=5473400, episode=913 reward=0.7878889 (486.87 it/sec) -training >> step=5473500, episode=913 reward=0.782612 (455.77 it/sec) -training >> step=5473600, episode=913 reward=0.7842106 (437.33 it/sec) -training >> step=5473700, episode=913 reward=0.7877276 (485.87 it/sec) -training >> step=5473800, episode=913 reward=0.7691826 (484.78 it/sec) -training >> step=5473900, episode=913 reward=0.7787207 (485.00 it/sec) -training >> step=5474000, episode=913 reward=0.7987581 (489.96 it/sec) -training >> step=5474100, episode=913 reward=0.7919052 (473.93 it/sec) -training >> step=5474200, episode=913 reward=0.7945207 (455.69 it/sec) -training >> step=5474300, episode=913 reward=0.7777625 (494.99 it/sec) -training >> step=5474400, episode=913 reward=0.7714198 (496.98 it/sec) -training >> step=5474500, episode=913 reward=0.7865677 (421.80 it/sec) -training >> step=5474600, episode=913 reward=0.7605655 (464.31 it/sec) -training >> step=5474700, episode=913 reward=0.7630038 (489.73 it/sec) -training >> step=5474800, episode=913 reward=0.7798603 (458.21 it/sec) -training >> step=5474900, episode=913 reward=0.7737173 (479.85 it/sec) -training >> step=5475000, episode=913 reward=0.7768496 (448.70 it/sec) -training >> step=5475100, episode=913 reward=0.7787035 (460.83 it/sec) -training >> step=5475200, episode=913 reward=0.7857925 (503.29 it/sec) -training >> step=5475300, episode=913 reward=0.7897852 (476.52 it/sec) -training >> step=5475400, episode=913 reward=0.790885 (490.04 it/sec) -training >> step=5475500, episode=913 reward=0.790707 (402.21 it/sec) -training >> step=5475600, episode=913 reward=0.7856795 (488.77 it/sec) -training >> step=5475700, episode=913 reward=0.7924775 (495.37 it/sec) -training >> step=5475800, episode=913 reward=0.7704563 (495.81 it/sec) -training >> step=5475900, episode=913 reward=0.7887771 (417.93 it/sec) -training >> step=5476000, episode=913 reward=0.7991093 (484.27 it/sec) -training >> step=5476100, episode=913 reward=0.7757487 (496.98 it/sec) -training >> step=5476200, episode=913 reward=0.7840791 (498.24 it/sec) -training >> step=5476300, episode=913 reward=0.7848969 (497.14 it/sec) -training >> step=5476400, episode=913 reward=0.7742919 (506.44 it/sec) -training >> step=5476500, episode=913 reward=0.7910589 (497.72 it/sec) -training >> step=5476600, episode=913 reward=0.758919 (486.02 it/sec) -training >> step=5476700, episode=913 reward=0.7925892 (503.43 it/sec) -training >> step=5476800, episode=913 reward=0.7857034 (548.49 it/sec) -training >> step=5476900, episode=913 reward=0.7708085 (498.72 it/sec) -training >> step=5477000, episode=913 reward=0.7805533 (453.03 it/sec) -training >> step=5477100, episode=913 reward=0.7826891 (488.62 it/sec) -training >> step=5477200, episode=913 reward=0.760493 (513.08 it/sec) -training >> step=5477300, episode=914 reward=0.7636606 (97.04 it/sec) -training >> step=5477400, episode=914 reward=0.7840926 (512.25 it/sec) -training >> step=5477500, episode=914 reward=0.7933899 (552.09 it/sec) -training >> step=5477600, episode=914 reward=0.7860321 (504.58 it/sec) -training >> step=5477700, episode=914 reward=0.7995097 (519.89 it/sec) -training >> step=5477800, episode=914 reward=0.7674074 (536.62 it/sec) -training >> step=5477900, episode=914 reward=0.7806748 (549.62 it/sec) -training >> step=5478000, episode=914 reward=0.7733052 (537.32 it/sec) -training >> step=5478100, episode=914 reward=0.7899653 (491.72 it/sec) -training >> step=5478200, episode=914 reward=0.7996118 (559.84 it/sec) -training >> step=5478300, episode=914 reward=0.7995486 (523.72 it/sec) -training >> step=5478400, episode=914 reward=0.7852455 (539.90 it/sec) -training >> step=5478500, episode=914 reward=0.7843207 (585.07 it/sec) -training >> step=5478600, episode=914 reward=0.7553675 (537.23 it/sec) -training >> step=5478700, episode=914 reward=0.7932617 (509.94 it/sec) -training >> step=5478800, episode=914 reward=0.7866264 (546.16 it/sec) -training >> step=5478900, episode=914 reward=0.788509 (549.43 it/sec) -training >> step=5479000, episode=914 reward=0.7826779 (529.98 it/sec) -training >> step=5479100, episode=914 reward=0.7811004 (507.98 it/sec) -training >> step=5479200, episode=914 reward=0.7776285 (503.03 it/sec) -training >> step=5479300, episode=914 reward=0.800856 (521.23 it/sec) -training >> step=5479400, episode=914 reward=0.7887363 (506.29 it/sec) -training >> step=5479500, episode=914 reward=0.7803345 (514.18 it/sec) -training >> step=5479600, episode=914 reward=0.80604 (491.60 it/sec) -training >> step=5479700, episode=914 reward=0.7942595 (478.38 it/sec) -training >> step=5479800, episode=914 reward=0.7962324 (477.91 it/sec) -training >> step=5479900, episode=914 reward=0.7769636 (531.89 it/sec) -training >> step=5480000, episode=914 reward=0.7778105 (510.54 it/sec) -training >> step=5480100, episode=914 reward=0.8039588 (491.64 it/sec) -training >> step=5480200, episode=914 reward=0.783026 (444.04 it/sec) -training >> step=5480300, episode=914 reward=0.7959159 (488.11 it/sec) -training >> step=5480400, episode=914 reward=0.783303 (477.71 it/sec) -training >> step=5480500, episode=914 reward=0.7868373 (497.29 it/sec) -training >> step=5480600, episode=914 reward=0.7965051 (543.67 it/sec) -training >> step=5480700, episode=914 reward=0.7883002 (444.66 it/sec) -training >> step=5480800, episode=914 reward=0.7853071 (486.62 it/sec) -training >> step=5480900, episode=914 reward=0.7731467 (500.54 it/sec) -training >> step=5481000, episode=914 reward=0.7917374 (456.53 it/sec) -training >> step=5481100, episode=914 reward=0.7801599 (517.64 it/sec) -training >> step=5481200, episode=914 reward=0.7832356 (452.25 it/sec) -training >> step=5481300, episode=914 reward=0.7696056 (471.41 it/sec) -training >> step=5481400, episode=914 reward=0.7759957 (441.84 it/sec) -training >> step=5481500, episode=914 reward=0.7866663 (529.79 it/sec) -training >> step=5481600, episode=914 reward=0.7743481 (493.60 it/sec) -training >> step=5481700, episode=914 reward=0.7846287 (499.90 it/sec) -training >> step=5481800, episode=914 reward=0.7903067 (467.33 it/sec) -training >> step=5481900, episode=914 reward=0.7875248 (472.73 it/sec) -training >> step=5482000, episode=914 reward=0.7767909 (481.04 it/sec) -training >> step=5482100, episode=914 reward=0.7836509 (553.68 it/sec) -training >> step=5482200, episode=914 reward=0.7815091 (490.60 it/sec) -training >> step=5482300, episode=914 reward=0.7859095 (414.74 it/sec) -training >> step=5482400, episode=914 reward=0.7799898 (535.03 it/sec) -training >> step=5482500, episode=914 reward=0.7722461 (486.65 it/sec) -training >> step=5482600, episode=914 reward=0.7757323 (503.24 it/sec) -training >> step=5482700, episode=914 reward=0.791119 (447.38 it/sec) -training >> step=5482800, episode=914 reward=0.8032598 (507.88 it/sec) -training >> step=5482900, episode=914 reward=0.8003061 (506.35 it/sec) -training >> step=5483000, episode=914 reward=0.7696684 (510.53 it/sec) -training >> step=5483100, episode=914 reward=0.7907682 (510.84 it/sec) -training >> step=5483200, episode=914 reward=0.8022426 (468.33 it/sec) -training >> step=5483300, episode=915 reward=0.7662428 (95.35 it/sec) -training >> step=5483400, episode=915 reward=0.7754282 (471.23 it/sec) -training >> step=5483500, episode=915 reward=0.7859102 (523.73 it/sec) -training >> step=5483600, episode=915 reward=0.7843273 (468.63 it/sec) -training >> step=5483700, episode=915 reward=0.7839619 (505.68 it/sec) -training >> step=5483800, episode=915 reward=0.7955176 (483.56 it/sec) -training >> step=5483900, episode=915 reward=0.8032503 (473.70 it/sec) -training >> step=5484000, episode=915 reward=0.7951803 (519.80 it/sec) -training >> step=5484100, episode=915 reward=0.7708771 (520.41 it/sec) -training >> step=5484200, episode=915 reward=0.8045021 (441.04 it/sec) -training >> step=5484300, episode=915 reward=0.7861439 (457.21 it/sec) -training >> step=5484400, episode=915 reward=0.7808831 (498.22 it/sec) -training >> step=5484500, episode=915 reward=0.7895431 (508.17 it/sec) -training >> step=5484600, episode=915 reward=0.7539268 (486.56 it/sec) -training >> step=5484700, episode=915 reward=0.7823321 (451.40 it/sec) -training >> step=5484800, episode=915 reward=0.7975138 (423.41 it/sec) -training >> step=5484900, episode=915 reward=0.7928932 (485.99 it/sec) -training >> step=5485000, episode=915 reward=0.7878869 (488.89 it/sec) -training >> step=5485100, episode=915 reward=0.7692773 (526.58 it/sec) -training >> step=5485200, episode=915 reward=0.778229 (532.70 it/sec) -training >> step=5485300, episode=915 reward=0.7809762 (457.24 it/sec) -training >> step=5485400, episode=915 reward=0.7790248 (480.79 it/sec) -training >> step=5485500, episode=915 reward=0.7786164 (527.36 it/sec) -training >> step=5485600, episode=915 reward=0.7795731 (513.51 it/sec) -training >> step=5485700, episode=915 reward=0.7942564 (455.85 it/sec) -training >> step=5485800, episode=915 reward=0.7779105 (487.73 it/sec) -training >> step=5485900, episode=915 reward=0.7772413 (511.36 it/sec) -training >> step=5486000, episode=915 reward=0.7914032 (498.54 it/sec) -training >> step=5486100, episode=915 reward=0.7895006 (522.73 it/sec) -training >> step=5486200, episode=915 reward=0.796271 (516.99 it/sec) -training >> step=5486300, episode=915 reward=0.7782221 (495.94 it/sec) -training >> step=5486400, episode=915 reward=0.7859209 (478.65 it/sec) -training >> step=5486500, episode=915 reward=0.7906858 (499.50 it/sec) -training >> step=5486600, episode=915 reward=0.7701768 (535.32 it/sec) -training >> step=5486700, episode=915 reward=0.7688645 (497.30 it/sec) -training >> step=5486800, episode=915 reward=0.7932866 (499.93 it/sec) -training >> step=5486900, episode=915 reward=0.7550462 (468.41 it/sec) -training >> step=5487000, episode=915 reward=0.7889169 (525.82 it/sec) -training >> step=5487100, episode=915 reward=0.7854358 (502.57 it/sec) -training >> step=5487200, episode=915 reward=0.7711457 (516.65 it/sec) -training >> step=5487300, episode=915 reward=0.7968554 (478.03 it/sec) -training >> step=5487400, episode=915 reward=0.8038789 (454.74 it/sec) -training >> step=5487500, episode=915 reward=0.7891239 (499.85 it/sec) -training >> step=5487600, episode=915 reward=0.7585547 (537.58 it/sec) -training >> step=5487700, episode=915 reward=0.7892065 (531.63 it/sec) -training >> step=5487800, episode=915 reward=0.7832335 (484.78 it/sec) -training >> step=5487900, episode=915 reward=0.7887096 (442.50 it/sec) -training >> step=5488000, episode=915 reward=0.7680328 (481.42 it/sec) -training >> step=5488100, episode=915 reward=0.7635565 (492.40 it/sec) -training >> step=5488200, episode=915 reward=0.7836171 (497.92 it/sec) -training >> step=5488300, episode=915 reward=0.7887301 (465.67 it/sec) -training >> step=5488400, episode=915 reward=0.7863032 (472.45 it/sec) -training >> step=5488500, episode=915 reward=0.8026568 (512.79 it/sec) -training >> step=5488600, episode=915 reward=0.7787547 (512.41 it/sec) -training >> step=5488700, episode=915 reward=0.7931142 (507.30 it/sec) -training >> step=5488800, episode=915 reward=0.803256 (518.18 it/sec) -training >> step=5488900, episode=915 reward=0.7721311 (436.84 it/sec) -training >> step=5489000, episode=915 reward=0.7864112 (474.75 it/sec) -training >> step=5489100, episode=915 reward=0.7605459 (481.97 it/sec) -training >> step=5489200, episode=915 reward=0.776279 (433.04 it/sec) -training >> step=5489300, episode=916 reward=0.7795303 (78.56 it/sec) -training >> step=5489400, episode=916 reward=0.7787054 (357.19 it/sec) -training >> step=5489500, episode=916 reward=0.7992783 (435.97 it/sec) -training >> step=5489600, episode=916 reward=0.7750216 (414.89 it/sec) -training >> step=5489700, episode=916 reward=0.7806017 (452.69 it/sec) -training >> step=5489800, episode=916 reward=0.7546621 (511.86 it/sec) -training >> step=5489900, episode=916 reward=0.7961913 (441.72 it/sec) -training >> step=5490000, episode=916 reward=0.7924177 (501.55 it/sec) -training >> step=5490100, episode=916 reward=0.7737364 (466.61 it/sec) -training >> step=5490200, episode=916 reward=0.7965474 (476.98 it/sec) -training >> step=5490300, episode=916 reward=0.7761061 (460.62 it/sec) -training >> step=5490400, episode=916 reward=0.7822096 (479.10 it/sec) -training >> step=5490500, episode=916 reward=0.7886609 (509.73 it/sec) -training >> step=5490600, episode=916 reward=0.7811194 (503.60 it/sec) -training >> step=5490700, episode=916 reward=0.7983591 (444.27 it/sec) -training >> step=5490800, episode=916 reward=0.7685514 (489.31 it/sec) -training >> step=5490900, episode=916 reward=0.7835607 (511.52 it/sec) -training >> step=5491000, episode=916 reward=0.7845572 (497.97 it/sec) -training >> step=5491100, episode=916 reward=0.7729757 (519.38 it/sec) -training >> step=5491200, episode=916 reward=0.7976222 (537.30 it/sec) -training >> step=5491300, episode=916 reward=0.7719814 (468.16 it/sec) -training >> step=5491400, episode=916 reward=0.7778204 (505.30 it/sec) -training >> step=5491500, episode=916 reward=0.7651466 (538.49 it/sec) -training >> step=5491600, episode=916 reward=0.7716624 (516.87 it/sec) -training >> step=5491700, episode=916 reward=0.7883661 (500.87 it/sec) -training >> step=5491800, episode=916 reward=0.7786751 (491.15 it/sec) -training >> step=5491900, episode=916 reward=0.7362138 (520.51 it/sec) -training >> step=5492000, episode=916 reward=0.7721595 (450.09 it/sec) -training >> step=5492100, episode=916 reward=0.7837561 (501.01 it/sec) -training >> step=5492200, episode=916 reward=0.7739573 (557.27 it/sec) -training >> step=5492300, episode=916 reward=0.7665836 (473.55 it/sec) -training >> step=5492400, episode=916 reward=0.7829939 (474.59 it/sec) -training >> step=5492500, episode=916 reward=0.7835374 (539.95 it/sec) -training >> step=5492600, episode=916 reward=0.8071951 (563.60 it/sec) -training >> step=5492700, episode=916 reward=0.7693118 (523.21 it/sec) -training >> step=5492800, episode=916 reward=0.7799162 (488.63 it/sec) -training >> step=5492900, episode=916 reward=0.7984967 (437.50 it/sec) -training >> step=5493000, episode=916 reward=0.7992896 (404.70 it/sec) -training >> step=5493100, episode=916 reward=0.7885562 (486.05 it/sec) -training >> step=5493200, episode=916 reward=0.7846484 (530.60 it/sec) -training >> step=5493300, episode=916 reward=0.793391 (471.56 it/sec) -training >> step=5493400, episode=916 reward=0.8023506 (438.70 it/sec) -training >> step=5493500, episode=916 reward=0.7802073 (493.66 it/sec) -training >> step=5493600, episode=916 reward=0.7848007 (526.67 it/sec) -training >> step=5493700, episode=916 reward=0.7954628 (468.92 it/sec) -training >> step=5493800, episode=916 reward=0.7821373 (487.25 it/sec) -training >> step=5493900, episode=916 reward=0.775917 (443.96 it/sec) -training >> step=5494000, episode=916 reward=0.7809476 (463.39 it/sec) -training >> step=5494100, episode=916 reward=0.7697542 (418.31 it/sec) -training >> step=5494200, episode=916 reward=0.7869291 (471.52 it/sec) -training >> step=5494300, episode=916 reward=0.806842 (471.24 it/sec) -training >> step=5494400, episode=916 reward=0.7745762 (469.86 it/sec) -training >> step=5494500, episode=916 reward=0.7869123 (450.69 it/sec) -training >> step=5494600, episode=916 reward=0.783801 (466.70 it/sec) -training >> step=5494700, episode=916 reward=0.7799806 (491.87 it/sec) -training >> step=5494800, episode=916 reward=0.7899147 (477.73 it/sec) -training >> step=5494900, episode=916 reward=0.7660848 (438.67 it/sec) -training >> step=5495000, episode=916 reward=0.7798641 (445.05 it/sec) -training >> step=5495100, episode=916 reward=0.7876996 (487.54 it/sec) -training >> step=5495200, episode=916 reward=0.8003882 (476.34 it/sec) -training >> step=5495300, episode=917 reward=0.8023288 (62.75 it/sec) -training >> step=5495400, episode=917 reward=0.7818025 (486.71 it/sec) -training >> step=5495500, episode=917 reward=0.783212 (397.70 it/sec) -training >> step=5495600, episode=917 reward=0.7984472 (477.55 it/sec) -training >> step=5495700, episode=917 reward=0.7692145 (442.45 it/sec) -training >> step=5495800, episode=917 reward=0.7875658 (414.00 it/sec) -training >> step=5495900, episode=917 reward=0.7718638 (423.97 it/sec) -training >> step=5496000, episode=917 reward=0.7674749 (460.17 it/sec) -training >> step=5496100, episode=917 reward=0.7701629 (420.30 it/sec) -training >> step=5496200, episode=917 reward=0.7653488 (458.45 it/sec) -training >> step=5496300, episode=917 reward=0.8038961 (442.46 it/sec) -training >> step=5496400, episode=917 reward=0.7757117 (466.05 it/sec) -training >> step=5496500, episode=917 reward=0.7860156 (486.26 it/sec) -training >> step=5496600, episode=917 reward=0.7758774 (475.26 it/sec) -training >> step=5496700, episode=917 reward=0.7820665 (469.61 it/sec) -training >> step=5496800, episode=917 reward=0.7899247 (520.99 it/sec) -training >> step=5496900, episode=917 reward=0.7880623 (481.09 it/sec) -training >> step=5497000, episode=917 reward=0.7838836 (478.41 it/sec) -training >> step=5497100, episode=917 reward=0.7780255 (490.12 it/sec) -training >> step=5497200, episode=917 reward=0.7686531 (493.69 it/sec) -training >> step=5497300, episode=917 reward=0.7974019 (531.21 it/sec) -training >> step=5497400, episode=917 reward=0.7931458 (513.66 it/sec) -training >> step=5497500, episode=917 reward=0.7579372 (466.18 it/sec) -training >> step=5497600, episode=917 reward=0.7866918 (478.44 it/sec) -training >> step=5497700, episode=917 reward=0.7970272 (529.15 it/sec) -training >> step=5497800, episode=917 reward=0.7786582 (481.32 it/sec) -training >> step=5497900, episode=917 reward=0.7826073 (470.39 it/sec) -training >> step=5498000, episode=917 reward=0.7718615 (462.47 it/sec) -training >> step=5498100, episode=917 reward=0.7865232 (462.11 it/sec) -training >> step=5498200, episode=917 reward=0.7881516 (395.83 it/sec) -training >> step=5498300, episode=917 reward=0.7929883 (438.16 it/sec) -training >> step=5498400, episode=917 reward=0.7769423 (476.88 it/sec) -training >> step=5498500, episode=917 reward=0.7903462 (426.34 it/sec) -training >> step=5498600, episode=917 reward=0.7825448 (479.87 it/sec) -training >> step=5498700, episode=917 reward=0.7883374 (493.60 it/sec) -training >> step=5498800, episode=917 reward=0.7816759 (522.11 it/sec) -training >> step=5498900, episode=917 reward=0.7784471 (458.05 it/sec) -training >> step=5499000, episode=917 reward=0.7723653 (466.66 it/sec) -training >> step=5499100, episode=917 reward=0.7904594 (458.18 it/sec) -training >> step=5499200, episode=917 reward=0.7549869 (377.84 it/sec) -training >> step=5499300, episode=917 reward=0.7769338 (481.73 it/sec) -training >> step=5499400, episode=917 reward=0.7798157 (468.44 it/sec) -training >> step=5499500, episode=917 reward=0.7737352 (475.25 it/sec) -training >> step=5499600, episode=917 reward=0.7832823 (475.51 it/sec) -training >> step=5499700, episode=917 reward=0.7787061 (456.79 it/sec) -training >> step=5499800, episode=917 reward=0.7936597 (504.71 it/sec) -training >> step=5499900, episode=917 reward=0.7824423 (426.14 it/sec) -training >> step=5500000, episode=917 reward=0.7599057 (504.51 it/sec) -training >> step=5500100, episode=917 reward=0.7912483 (452.87 it/sec) -training >> step=5500200, episode=917 reward=0.7839236 (486.80 it/sec) -training >> step=5500300, episode=917 reward=0.7854046 (435.94 it/sec) -training >> step=5500400, episode=917 reward=0.7654769 (463.26 it/sec) -training >> step=5500500, episode=917 reward=0.7853873 (443.14 it/sec) -training >> step=5500600, episode=917 reward=0.7719054 (524.97 it/sec) -training >> step=5500700, episode=917 reward=0.7818449 (502.54 it/sec) -training >> step=5500800, episode=917 reward=0.7675867 (463.92 it/sec) -training >> step=5500900, episode=917 reward=0.7802921 (485.56 it/sec) -training >> step=5501000, episode=917 reward=0.756478 (500.14 it/sec) -training >> step=5501100, episode=917 reward=0.7695931 (506.34 it/sec) -training >> step=5501200, episode=917 reward=0.7647266 (495.67 it/sec) -training >> step=5501300, episode=918 reward=0.7865237 (73.17 it/sec) -training >> step=5501400, episode=918 reward=0.7832682 (476.69 it/sec) -training >> step=5501500, episode=918 reward=0.7708998 (466.66 it/sec) -training >> step=5501600, episode=918 reward=0.7886155 (491.70 it/sec) -training >> step=5501700, episode=918 reward=0.7895681 (485.90 it/sec) -training >> step=5501800, episode=918 reward=0.7749968 (497.38 it/sec) -training >> step=5501900, episode=918 reward=0.7922902 (501.70 it/sec) -training >> step=5502000, episode=918 reward=0.7678691 (493.40 it/sec) -training >> step=5502100, episode=918 reward=0.7934707 (480.87 it/sec) -training >> step=5502200, episode=918 reward=0.8076797 (468.68 it/sec) -training >> step=5502300, episode=918 reward=0.7953508 (475.72 it/sec) -training >> step=5502400, episode=918 reward=0.7868214 (495.75 it/sec) -training >> step=5502500, episode=918 reward=0.7939255 (457.38 it/sec) -training >> step=5502600, episode=918 reward=0.7947003 (502.57 it/sec) -training >> step=5502700, episode=918 reward=0.7830693 (481.39 it/sec) -training >> step=5502800, episode=918 reward=0.7907647 (455.06 it/sec) -training >> step=5502900, episode=918 reward=0.7947258 (474.42 it/sec) -training >> step=5503000, episode=918 reward=0.7889411 (507.05 it/sec) -training >> step=5503100, episode=918 reward=0.798332 (495.53 it/sec) -training >> step=5503200, episode=918 reward=0.7868201 (518.19 it/sec) -training >> step=5503300, episode=918 reward=0.768804 (452.58 it/sec) -training >> step=5503400, episode=918 reward=0.767571 (503.82 it/sec) -training >> step=5503500, episode=918 reward=0.779461 (513.03 it/sec) -training >> step=5503600, episode=918 reward=0.7998466 (544.45 it/sec) -training >> step=5503700, episode=918 reward=0.8059606 (482.57 it/sec) -training >> step=5503800, episode=918 reward=0.7796896 (484.42 it/sec) -training >> step=5503900, episode=918 reward=0.776803 (525.17 it/sec) -training >> step=5504000, episode=918 reward=0.7882663 (511.00 it/sec) -training >> step=5504100, episode=918 reward=0.7844432 (488.39 it/sec) -training >> step=5504200, episode=918 reward=0.7931896 (484.50 it/sec) -training >> step=5504300, episode=918 reward=0.7984045 (477.21 it/sec) -training >> step=5504400, episode=918 reward=0.8021344 (458.75 it/sec) -training >> step=5504500, episode=918 reward=0.7969959 (449.07 it/sec) -training >> step=5504600, episode=918 reward=0.7603634 (506.74 it/sec) -training >> step=5504700, episode=918 reward=0.7835124 (468.73 it/sec) -training >> step=5504800, episode=918 reward=0.7767521 (446.34 it/sec) -training >> step=5504900, episode=918 reward=0.7984228 (441.04 it/sec) -training >> step=5505000, episode=918 reward=0.8016005 (499.39 it/sec) -training >> step=5505100, episode=918 reward=0.7912483 (486.45 it/sec) -training >> step=5505200, episode=918 reward=0.7887961 (477.56 it/sec) -training >> step=5505300, episode=918 reward=0.7757457 (428.75 it/sec) -training >> step=5505400, episode=918 reward=0.7718362 (455.09 it/sec) -training >> step=5505500, episode=918 reward=0.800868 (458.71 it/sec) -training >> step=5505600, episode=918 reward=0.7749265 (486.62 it/sec) -training >> step=5505700, episode=918 reward=0.775703 (535.86 it/sec) -training >> step=5505800, episode=918 reward=0.7936929 (500.60 it/sec) -training >> step=5505900, episode=918 reward=0.7932365 (511.66 it/sec) -training >> step=5506000, episode=918 reward=0.7951038 (525.87 it/sec) -training >> step=5506100, episode=918 reward=0.7851914 (534.38 it/sec) -training >> step=5506200, episode=918 reward=0.7794034 (501.89 it/sec) -training >> step=5506300, episode=918 reward=0.7585459 (513.99 it/sec) -training >> step=5506400, episode=918 reward=0.7845953 (511.63 it/sec) -training >> step=5506500, episode=918 reward=0.7786686 (548.78 it/sec) -training >> step=5506600, episode=918 reward=0.7580799 (514.97 it/sec) -training >> step=5506700, episode=918 reward=0.7701725 (448.49 it/sec) -training >> step=5506800, episode=918 reward=0.7831589 (446.91 it/sec) -training >> step=5506900, episode=918 reward=0.7772186 (542.06 it/sec) -training >> step=5507000, episode=918 reward=0.8088102 (503.70 it/sec) -training >> step=5507100, episode=918 reward=0.7742605 (494.73 it/sec) -training >> step=5507200, episode=918 reward=0.7462939 (502.74 it/sec) -training >> step=5507300, episode=919 reward=0.7998839 (118.78 it/sec) -training >> step=5507400, episode=919 reward=0.770875 (503.75 it/sec) -training >> step=5507500, episode=919 reward=0.7761772 (489.25 it/sec) -training >> step=5507600, episode=919 reward=0.8055773 (527.64 it/sec) -training >> step=5507700, episode=919 reward=0.7912642 (526.10 it/sec) -training >> step=5507800, episode=919 reward=0.7703698 (544.77 it/sec) -training >> step=5507900, episode=919 reward=0.8012066 (474.76 it/sec) -training >> step=5508000, episode=919 reward=0.7717553 (506.47 it/sec) -training >> step=5508100, episode=919 reward=0.7848599 (459.76 it/sec) -training >> step=5508200, episode=919 reward=0.7801091 (508.17 it/sec) -training >> step=5508300, episode=919 reward=0.7977018 (508.13 it/sec) -training >> step=5508400, episode=919 reward=0.7749953 (454.00 it/sec) -training >> step=5508500, episode=919 reward=0.7954709 (492.77 it/sec) -training >> step=5508600, episode=919 reward=0.7949291 (506.39 it/sec) -training >> step=5508700, episode=919 reward=0.7923303 (504.74 it/sec) -training >> step=5508800, episode=919 reward=0.7948856 (484.12 it/sec) -training >> step=5508900, episode=919 reward=0.7865577 (458.38 it/sec) -training >> step=5509000, episode=919 reward=0.7899078 (511.53 it/sec) -training >> step=5509100, episode=919 reward=0.7761523 (447.08 it/sec) -training >> step=5509200, episode=919 reward=0.7838942 (516.29 it/sec) -training >> step=5509300, episode=919 reward=0.7903793 (492.58 it/sec) -training >> step=5509400, episode=919 reward=0.7886366 (521.86 it/sec) -training >> step=5509500, episode=919 reward=0.7821434 (447.52 it/sec) -training >> step=5509600, episode=919 reward=0.7871435 (451.46 it/sec) -training >> step=5509700, episode=919 reward=0.7962138 (517.94 it/sec) -training >> step=5509800, episode=919 reward=0.7929977 (497.76 it/sec) -training >> step=5509900, episode=919 reward=0.7784622 (465.04 it/sec) -training >> step=5510000, episode=919 reward=0.7794606 (485.04 it/sec) -training >> step=5510100, episode=919 reward=0.7713842 (478.66 it/sec) -training >> step=5510200, episode=919 reward=0.8017017 (510.07 it/sec) -training >> step=5510300, episode=919 reward=0.7848348 (465.63 it/sec) -training >> step=5510400, episode=919 reward=0.7777337 (487.87 it/sec) -training >> step=5510500, episode=919 reward=0.7879447 (474.08 it/sec) -training >> step=5510600, episode=919 reward=0.7754454 (466.97 it/sec) -training >> step=5510700, episode=919 reward=0.7779621 (490.40 it/sec) -training >> step=5510800, episode=919 reward=0.7749593 (510.00 it/sec) -training >> step=5510900, episode=919 reward=0.766609 (472.57 it/sec) -training >> step=5511000, episode=919 reward=0.7817794 (502.69 it/sec) -training >> step=5511100, episode=919 reward=0.8158579 (470.04 it/sec) -training >> step=5511200, episode=919 reward=0.7823306 (511.57 it/sec) -training >> step=5511300, episode=919 reward=0.8281841 (467.19 it/sec) -training >> step=5511400, episode=919 reward=0.7758875 (442.21 it/sec) -training >> step=5511500, episode=919 reward=0.7645532 (530.59 it/sec) -training >> step=5511600, episode=919 reward=0.778583 (476.07 it/sec) -training >> step=5511700, episode=919 reward=0.7880282 (467.65 it/sec) -training >> step=5511800, episode=919 reward=0.7783543 (393.54 it/sec) -training >> step=5511900, episode=919 reward=0.7785122 (478.51 it/sec) -training >> step=5512000, episode=919 reward=0.7780904 (444.04 it/sec) -training >> step=5512100, episode=919 reward=0.7691726 (500.44 it/sec) -training >> step=5512200, episode=919 reward=0.8005745 (464.86 it/sec) -training >> step=5512300, episode=919 reward=0.7781886 (527.84 it/sec) -training >> step=5512400, episode=919 reward=0.7870092 (478.55 it/sec) -training >> step=5512500, episode=919 reward=0.779905 (461.77 it/sec) -training >> step=5512600, episode=919 reward=0.7894032 (487.24 it/sec) -training >> step=5512700, episode=919 reward=0.7658474 (510.55 it/sec) -training >> step=5512800, episode=919 reward=0.7894645 (493.59 it/sec) -training >> step=5512900, episode=919 reward=0.7760813 (468.32 it/sec) -training >> step=5513000, episode=919 reward=0.7891973 (494.12 it/sec) -training >> step=5513100, episode=919 reward=0.7809871 (479.55 it/sec) -training >> step=5513200, episode=919 reward=0.773953 (488.29 it/sec) -training >> step=5513300, episode=920 reward=0.7557256 (73.08 it/sec) -training >> step=5513400, episode=920 reward=0.7796331 (486.53 it/sec) -training >> step=5513500, episode=920 reward=0.7754961 (480.12 it/sec) -training >> step=5513600, episode=920 reward=0.7878653 (517.02 it/sec) -training >> step=5513700, episode=920 reward=0.7852111 (478.96 it/sec) -training >> step=5513800, episode=920 reward=0.7840543 (475.86 it/sec) -training >> step=5513900, episode=920 reward=0.7678065 (509.69 it/sec) -training >> step=5514000, episode=920 reward=0.8038704 (462.45 it/sec) -training >> step=5514100, episode=920 reward=0.786275 (478.20 it/sec) -training >> step=5514200, episode=920 reward=0.7558958 (482.45 it/sec) -training >> step=5514300, episode=920 reward=0.7829847 (468.37 it/sec) -training >> step=5514400, episode=920 reward=0.7793112 (464.31 it/sec) -training >> step=5514500, episode=920 reward=0.7952895 (482.34 it/sec) -training >> step=5514600, episode=920 reward=0.7944236 (476.44 it/sec) -training >> step=5514700, episode=920 reward=0.8011478 (492.79 it/sec) -training >> step=5514800, episode=920 reward=0.7746285 (487.23 it/sec) -training >> step=5514900, episode=920 reward=0.7904105 (417.57 it/sec) -training >> step=5515000, episode=920 reward=0.7776334 (484.43 it/sec) -training >> step=5515100, episode=920 reward=0.7958034 (483.85 it/sec) -training >> step=5515200, episode=920 reward=0.7703041 (466.72 it/sec) -training >> step=5515300, episode=920 reward=0.8056531 (485.48 it/sec) -training >> step=5515400, episode=920 reward=0.8033562 (472.78 it/sec) -training >> step=5515500, episode=920 reward=0.7751922 (512.68 it/sec) -training >> step=5515600, episode=920 reward=0.7831483 (496.11 it/sec) -training >> step=5515700, episode=920 reward=0.7790562 (471.58 it/sec) -training >> step=5515800, episode=920 reward=0.785887 (490.15 it/sec) -training >> step=5515900, episode=920 reward=0.7983001 (447.71 it/sec) -training >> step=5516000, episode=920 reward=0.7952988 (453.74 it/sec) -training >> step=5516100, episode=920 reward=0.7727559 (388.30 it/sec) -training >> step=5516200, episode=920 reward=0.792694 (473.89 it/sec) -training >> step=5516300, episode=920 reward=0.7781917 (458.71 it/sec) -training >> step=5516400, episode=920 reward=0.7713603 (471.21 it/sec) -training >> step=5516500, episode=920 reward=0.7623376 (475.51 it/sec) -training >> step=5516600, episode=920 reward=0.7899461 (523.93 it/sec) -training >> step=5516700, episode=920 reward=0.7766117 (486.90 it/sec) -training >> step=5516800, episode=920 reward=0.793779 (463.91 it/sec) -training >> step=5516900, episode=920 reward=0.771727 (490.80 it/sec) -training >> step=5517000, episode=920 reward=0.799905 (491.49 it/sec) -training >> step=5517100, episode=920 reward=0.7753313 (494.55 it/sec) -training >> step=5517200, episode=920 reward=0.7683809 (460.09 it/sec) -training >> step=5517300, episode=920 reward=0.7909068 (529.85 it/sec) -training >> step=5517400, episode=920 reward=0.7830303 (435.97 it/sec) -training >> step=5517500, episode=920 reward=0.7836565 (475.22 it/sec) -training >> step=5517600, episode=920 reward=0.7965755 (468.44 it/sec) -training >> step=5517700, episode=920 reward=0.7772393 (500.16 it/sec) -training >> step=5517800, episode=920 reward=0.7752851 (494.83 it/sec) -training >> step=5517900, episode=920 reward=0.7822654 (477.91 it/sec) -training >> step=5518000, episode=920 reward=0.7774601 (482.82 it/sec) -training >> step=5518100, episode=920 reward=0.7847365 (481.65 it/sec) -training >> step=5518200, episode=920 reward=0.7834743 (471.29 it/sec) -training >> step=5518300, episode=920 reward=0.7882784 (464.46 it/sec) -training >> step=5518400, episode=920 reward=0.7558523 (482.10 it/sec) -training >> step=5518500, episode=920 reward=0.7672731 (509.02 it/sec) -training >> step=5518600, episode=920 reward=0.770952 (476.51 it/sec) -training >> step=5518700, episode=920 reward=0.7729667 (508.18 it/sec) -training >> step=5518800, episode=920 reward=0.7816514 (502.14 it/sec) -training >> step=5518900, episode=920 reward=0.7751202 (462.29 it/sec) -training >> step=5519000, episode=920 reward=0.7981758 (472.13 it/sec) -training >> step=5519100, episode=920 reward=0.79872 (518.66 it/sec) -training >> step=5519200, episode=920 reward=0.7826915 (462.08 it/sec) -training >> step=5519300, episode=921 reward=0.7726107 (60.85 it/sec) -training >> step=5519400, episode=921 reward=0.7592497 (498.42 it/sec) -training >> step=5519500, episode=921 reward=0.7621228 (513.36 it/sec) -training >> step=5519600, episode=921 reward=0.7729549 (537.19 it/sec) -training >> step=5519700, episode=921 reward=0.7938406 (518.55 it/sec) -training >> step=5519800, episode=921 reward=0.8008971 (514.20 it/sec) -training >> step=5519900, episode=921 reward=0.7794707 (531.43 it/sec) -training >> step=5520000, episode=921 reward=0.7938172 (512.02 it/sec) -training >> step=5520100, episode=921 reward=0.8004734 (520.39 it/sec) -training >> step=5520200, episode=921 reward=0.8007575 (515.07 it/sec) -training >> step=5520300, episode=921 reward=0.7777912 (513.80 it/sec) -training >> step=5520400, episode=921 reward=0.7984818 (443.28 it/sec) -training >> step=5520500, episode=921 reward=0.7819485 (478.79 it/sec) -training >> step=5520600, episode=921 reward=0.7732516 (515.64 it/sec) -training >> step=5520700, episode=921 reward=0.783597 (486.41 it/sec) -training >> step=5520800, episode=921 reward=0.7912288 (514.74 it/sec) -training >> step=5520900, episode=921 reward=0.7913582 (525.45 it/sec) -training >> step=5521000, episode=921 reward=0.8100957 (492.81 it/sec) -training >> step=5521100, episode=921 reward=0.7956169 (504.82 it/sec) -training >> step=5521200, episode=921 reward=0.7770336 (509.07 it/sec) -training >> step=5521300, episode=921 reward=0.7824126 (545.48 it/sec) -training >> step=5521400, episode=921 reward=0.7859034 (541.90 it/sec) -training >> step=5521500, episode=921 reward=0.7898317 (509.05 it/sec) -training >> step=5521600, episode=921 reward=0.7710157 (488.36 it/sec) -training >> step=5521700, episode=921 reward=0.7688901 (535.11 it/sec) -training >> step=5521800, episode=921 reward=0.7975746 (486.68 it/sec) -training >> step=5521900, episode=921 reward=0.7689203 (504.74 it/sec) -training >> step=5522000, episode=921 reward=0.7760186 (510.09 it/sec) -training >> step=5522100, episode=921 reward=0.7597225 (536.32 it/sec) -training >> step=5522200, episode=921 reward=0.7903696 (520.38 it/sec) -training >> step=5522300, episode=921 reward=0.8127489 (432.89 it/sec) -training >> step=5522400, episode=921 reward=0.7935967 (536.13 it/sec) -training >> step=5522500, episode=921 reward=0.7767765 (540.40 it/sec) -training >> step=5522600, episode=921 reward=0.7694693 (508.77 it/sec) -training >> step=5522700, episode=921 reward=0.8171675 (495.82 it/sec) -training >> step=5522800, episode=921 reward=0.7793817 (460.28 it/sec) -training >> step=5522900, episode=921 reward=0.7752762 (486.08 it/sec) -training >> step=5523000, episode=921 reward=0.7760456 (473.75 it/sec) -training >> step=5523100, episode=921 reward=0.7660223 (483.27 it/sec) -training >> step=5523200, episode=921 reward=0.7783341 (531.89 it/sec) -training >> step=5523300, episode=921 reward=0.7857348 (505.21 it/sec) -training >> step=5523400, episode=921 reward=0.8076184 (526.98 it/sec) -training >> step=5523500, episode=921 reward=0.7697706 (493.72 it/sec) -training >> step=5523600, episode=921 reward=0.80731 (460.22 it/sec) -training >> step=5523700, episode=921 reward=0.7999206 (501.45 it/sec) -training >> step=5523800, episode=921 reward=0.7942922 (475.98 it/sec) -training >> step=5523900, episode=921 reward=0.8054265 (483.98 it/sec) -training >> step=5524000, episode=921 reward=0.7870411 (431.10 it/sec) -training >> step=5524100, episode=921 reward=0.7937796 (454.52 it/sec) -training >> step=5524200, episode=921 reward=0.7996537 (511.64 it/sec) -training >> step=5524300, episode=921 reward=0.7975938 (458.73 it/sec) -training >> step=5524400, episode=921 reward=0.7858175 (488.70 it/sec) -training >> step=5524500, episode=921 reward=0.7861142 (481.03 it/sec) -training >> step=5524600, episode=921 reward=0.788206 (489.22 it/sec) -training >> step=5524700, episode=921 reward=0.7777325 (502.00 it/sec) -training >> step=5524800, episode=921 reward=0.7855669 (466.48 it/sec) -training >> step=5524900, episode=921 reward=0.7646313 (521.31 it/sec) -training >> step=5525000, episode=921 reward=0.7944671 (503.88 it/sec) -training >> step=5525100, episode=921 reward=0.8004311 (442.13 it/sec) -training >> step=5525200, episode=921 reward=0.7944186 (472.51 it/sec) -training >> step=5525300, episode=922 reward=0.7924139 (39.73 it/sec) -training >> step=5525400, episode=922 reward=0.7694402 (489.79 it/sec) -training >> step=5525500, episode=922 reward=0.7810168 (470.36 it/sec) -training >> step=5525600, episode=922 reward=0.7722933 (476.28 it/sec) -training >> step=5525700, episode=922 reward=0.7621449 (483.91 it/sec) -training >> step=5525800, episode=922 reward=0.7777085 (432.98 it/sec) -training >> step=5525900, episode=922 reward=0.7922776 (464.05 it/sec) -training >> step=5526000, episode=922 reward=0.7695912 (452.82 it/sec) -training >> step=5526100, episode=922 reward=0.7779914 (489.55 it/sec) -training >> step=5526200, episode=922 reward=0.7627954 (477.44 it/sec) -training >> step=5526300, episode=922 reward=0.7879696 (495.74 it/sec) -training >> step=5526400, episode=922 reward=0.7872163 (484.64 it/sec) -training >> step=5526500, episode=922 reward=0.7854926 (472.86 it/sec) -training >> step=5526600, episode=922 reward=0.779444 (493.09 it/sec) -training >> step=5526700, episode=922 reward=0.7757086 (496.48 it/sec) -training >> step=5526800, episode=922 reward=0.7909707 (500.59 it/sec) -training >> step=5526900, episode=922 reward=0.8027446 (507.34 it/sec) -training >> step=5527000, episode=922 reward=0.7838972 (476.37 it/sec) -training >> step=5527100, episode=922 reward=0.7958567 (488.30 it/sec) -training >> step=5527200, episode=922 reward=0.7931926 (457.18 it/sec) -training >> step=5527300, episode=922 reward=0.7728334 (499.93 it/sec) -training >> step=5527400, episode=922 reward=0.7916827 (480.38 it/sec) -training >> step=5527500, episode=922 reward=0.7870257 (477.23 it/sec) -training >> step=5527600, episode=922 reward=0.7798317 (464.78 it/sec) -training >> step=5527700, episode=922 reward=0.7753531 (459.52 it/sec) -training >> step=5527800, episode=922 reward=0.7899758 (468.13 it/sec) -training >> step=5527900, episode=922 reward=0.7789175 (455.93 it/sec) -training >> step=5528000, episode=922 reward=0.7964977 (506.18 it/sec) -training >> step=5528100, episode=922 reward=0.7868842 (472.88 it/sec) -training >> step=5528200, episode=922 reward=0.7801138 (432.01 it/sec) -training >> step=5528300, episode=922 reward=0.7984322 (493.45 it/sec) -training >> step=5528400, episode=922 reward=0.7709826 (458.76 it/sec) -training >> step=5528500, episode=922 reward=0.7718832 (493.01 it/sec) -training >> step=5528600, episode=922 reward=0.7834582 (512.59 it/sec) -training >> step=5528700, episode=922 reward=0.7936804 (494.93 it/sec) -training >> step=5528800, episode=922 reward=0.8065127 (444.77 it/sec) -training >> step=5528900, episode=922 reward=0.7875357 (482.57 it/sec) -training >> step=5529000, episode=922 reward=0.7668946 (431.23 it/sec) -training >> step=5529100, episode=922 reward=0.7963142 (446.50 it/sec) -training >> step=5529200, episode=922 reward=0.8026486 (479.19 it/sec) -training >> step=5529300, episode=922 reward=0.7651495 (499.78 it/sec) -training >> step=5529400, episode=922 reward=0.7831109 (482.12 it/sec) -training >> step=5529500, episode=922 reward=0.7931511 (490.21 it/sec) -training >> step=5529600, episode=922 reward=0.7859241 (456.04 it/sec) -training >> step=5529700, episode=922 reward=0.7836824 (499.82 it/sec) -training >> step=5529800, episode=922 reward=0.7918176 (508.24 it/sec) -training >> step=5529900, episode=922 reward=0.7755256 (461.64 it/sec) -training >> step=5530000, episode=922 reward=0.7735735 (494.58 it/sec) -training >> step=5530100, episode=922 reward=0.7873755 (424.70 it/sec) -training >> step=5530200, episode=922 reward=0.7849761 (463.66 it/sec) -training >> step=5530300, episode=922 reward=0.768629 (490.41 it/sec) -training >> step=5530400, episode=922 reward=0.7628576 (441.56 it/sec) -training >> step=5530500, episode=922 reward=0.7888474 (480.20 it/sec) -training >> step=5530600, episode=922 reward=0.7833655 (457.95 it/sec) -training >> step=5530700, episode=922 reward=0.7835971 (495.13 it/sec) -training >> step=5530800, episode=922 reward=0.7811131 (490.64 it/sec) -training >> step=5530900, episode=922 reward=0.7619421 (476.67 it/sec) -training >> step=5531000, episode=922 reward=0.7679916 (459.97 it/sec) -training >> step=5531100, episode=922 reward=0.7902967 (496.89 it/sec) -training >> step=5531200, episode=922 reward=0.7838266 (468.04 it/sec) -training >> step=5531300, episode=923 reward=0.7637455 (45.04 it/sec) -training >> step=5531400, episode=923 reward=0.7602688 (447.96 it/sec) -training >> step=5531500, episode=923 reward=0.7742767 (488.47 it/sec) -training >> step=5531600, episode=923 reward=0.7941274 (433.98 it/sec) -training >> step=5531700, episode=923 reward=0.760387 (505.60 it/sec) -training >> step=5531800, episode=923 reward=0.788985 (478.51 it/sec) -training >> step=5531900, episode=923 reward=0.7832783 (475.56 it/sec) -training >> step=5532000, episode=923 reward=0.783825 (482.25 it/sec) -training >> step=5532100, episode=923 reward=0.7902071 (497.01 it/sec) -training >> step=5532200, episode=923 reward=0.7985967 (484.71 it/sec) -training >> step=5532300, episode=923 reward=0.7595797 (486.26 it/sec) -training >> step=5532400, episode=923 reward=0.7675825 (415.25 it/sec) -training >> step=5532500, episode=923 reward=0.7728758 (435.00 it/sec) -training >> step=5532600, episode=923 reward=0.758251 (438.40 it/sec) -training >> step=5532700, episode=923 reward=0.7810239 (505.72 it/sec) -training >> step=5532800, episode=923 reward=0.7601699 (472.94 it/sec) -training >> step=5532900, episode=923 reward=0.778183 (465.98 it/sec) -training >> step=5533000, episode=923 reward=0.7685258 (465.10 it/sec) -training >> step=5533100, episode=923 reward=0.7840618 (464.62 it/sec) -training >> step=5533200, episode=923 reward=0.7920449 (448.72 it/sec) -training >> step=5533300, episode=923 reward=0.7610968 (435.12 it/sec) -training >> step=5533400, episode=923 reward=0.77679 (496.20 it/sec) -training >> step=5533500, episode=923 reward=0.7984682 (449.47 it/sec) -training >> step=5533600, episode=923 reward=0.7930358 (437.62 it/sec) -training >> step=5533700, episode=923 reward=0.7941568 (466.51 it/sec) -training >> step=5533800, episode=923 reward=0.7762445 (447.64 it/sec) -training >> step=5533900, episode=923 reward=0.8014635 (441.43 it/sec) -training >> step=5534000, episode=923 reward=0.7989226 (471.93 it/sec) -training >> step=5534100, episode=923 reward=0.7937996 (491.10 it/sec) -training >> step=5534200, episode=923 reward=0.7969607 (422.15 it/sec) -training >> step=5534300, episode=923 reward=0.7865231 (453.61 it/sec) -training >> step=5534400, episode=923 reward=0.7728015 (457.62 it/sec) -training >> step=5534500, episode=923 reward=0.7992533 (499.30 it/sec) -training >> step=5534600, episode=923 reward=0.7648094 (445.01 it/sec) -training >> step=5534700, episode=923 reward=0.8039666 (465.55 it/sec) -training >> step=5534800, episode=923 reward=0.7707549 (447.22 it/sec) -training >> step=5534900, episode=923 reward=0.7724622 (429.27 it/sec) -training >> step=5535000, episode=923 reward=0.7901272 (498.31 it/sec) -training >> step=5535100, episode=923 reward=0.7689276 (471.45 it/sec) -training >> step=5535200, episode=923 reward=0.7921906 (436.31 it/sec) -training >> step=5535300, episode=923 reward=0.7828451 (388.11 it/sec) -training >> step=5535400, episode=923 reward=0.7809874 (447.44 it/sec) -training >> step=5535500, episode=923 reward=0.7918372 (491.53 it/sec) -training >> step=5535600, episode=923 reward=0.7805654 (462.70 it/sec) -training >> step=5535700, episode=923 reward=0.7881358 (449.52 it/sec) -training >> step=5535800, episode=923 reward=0.8013388 (443.72 it/sec) -training >> step=5535900, episode=923 reward=0.8066531 (465.06 it/sec) -training >> step=5536000, episode=923 reward=0.7815036 (496.66 it/sec) -training >> step=5536100, episode=923 reward=0.7758684 (499.56 it/sec) -training >> step=5536200, episode=923 reward=0.761202 (454.61 it/sec) -training >> step=5536300, episode=923 reward=0.7820024 (477.03 it/sec) -training >> step=5536400, episode=923 reward=0.7681587 (505.49 it/sec) -training >> step=5536500, episode=923 reward=0.7802265 (427.16 it/sec) -training >> step=5536600, episode=923 reward=0.7847884 (502.38 it/sec) -training >> step=5536700, episode=923 reward=0.7700939 (472.59 it/sec) -training >> step=5536800, episode=923 reward=0.7766553 (464.12 it/sec) -training >> step=5536900, episode=923 reward=0.7836268 (495.65 it/sec) -training >> step=5537000, episode=923 reward=0.7720771 (522.71 it/sec) -training >> step=5537100, episode=923 reward=0.7571052 (495.98 it/sec) -training >> step=5537200, episode=923 reward=0.7989057 (471.35 it/sec) -training >> step=5537300, episode=924 reward=0.7968277 (75.23 it/sec) -training >> step=5537400, episode=924 reward=0.798023 (476.70 it/sec) -training >> step=5537500, episode=924 reward=0.7749033 (468.21 it/sec) -training >> step=5537600, episode=924 reward=0.7833861 (447.22 it/sec) -training >> step=5537700, episode=924 reward=0.7641531 (420.46 it/sec) -training >> step=5537800, episode=924 reward=0.7754286 (487.02 it/sec) -training >> step=5537900, episode=924 reward=0.7846671 (492.13 it/sec) -training >> step=5538000, episode=924 reward=0.7960098 (489.38 it/sec) -training >> step=5538100, episode=924 reward=0.7951164 (449.25 it/sec) -training >> step=5538200, episode=924 reward=0.7761093 (464.60 it/sec) -training >> step=5538300, episode=924 reward=0.7715609 (451.58 it/sec) -training >> step=5538400, episode=924 reward=0.7791959 (439.51 it/sec) -training >> step=5538500, episode=924 reward=0.797198 (439.18 it/sec) -training >> step=5538600, episode=924 reward=0.7823302 (447.85 it/sec) -training >> step=5538700, episode=924 reward=0.7572922 (474.16 it/sec) -training >> step=5538800, episode=924 reward=0.7851339 (489.75 it/sec) -training >> step=5538900, episode=924 reward=0.7793868 (481.83 it/sec) -training >> step=5539000, episode=924 reward=0.7894251 (452.75 it/sec) -training >> step=5539100, episode=924 reward=0.7820342 (389.50 it/sec) -training >> step=5539200, episode=924 reward=0.7911223 (478.77 it/sec) -training >> step=5539300, episode=924 reward=0.7764761 (496.04 it/sec) -training >> step=5539400, episode=924 reward=0.7933334 (459.26 it/sec) -training >> step=5539500, episode=924 reward=0.7704781 (482.64 it/sec) -training >> step=5539600, episode=924 reward=0.7679548 (429.53 it/sec) -training >> step=5539700, episode=924 reward=0.7833793 (476.28 it/sec) -training >> step=5539800, episode=924 reward=0.7918125 (435.63 it/sec) -training >> step=5539900, episode=924 reward=0.7908899 (463.08 it/sec) -training >> step=5540000, episode=924 reward=0.7786006 (459.30 it/sec) -training >> step=5540100, episode=924 reward=0.7755535 (431.36 it/sec) -training >> step=5540200, episode=924 reward=0.7791799 (477.33 it/sec) -training >> step=5540300, episode=924 reward=0.7847158 (478.28 it/sec) -training >> step=5540400, episode=924 reward=0.7671971 (491.67 it/sec) -training >> step=5540500, episode=924 reward=0.7989077 (438.29 it/sec) -training >> step=5540600, episode=924 reward=0.7733644 (494.76 it/sec) -training >> step=5540700, episode=924 reward=0.7996966 (477.82 it/sec) -training >> step=5540800, episode=924 reward=0.7829698 (506.48 it/sec) -training >> step=5540900, episode=924 reward=0.7657571 (483.25 it/sec) -training >> step=5541000, episode=924 reward=0.7816933 (487.15 it/sec) -training >> step=5541100, episode=924 reward=0.7611681 (429.62 it/sec) -training >> step=5541200, episode=924 reward=0.7905086 (476.91 it/sec) -training >> step=5541300, episode=924 reward=0.7911087 (477.14 it/sec) -training >> step=5541400, episode=924 reward=0.7721257 (492.22 it/sec) -training >> step=5541500, episode=924 reward=0.7754816 (480.33 it/sec) -training >> step=5541600, episode=924 reward=0.7815353 (488.63 it/sec) -training >> step=5541700, episode=924 reward=0.7833421 (502.09 it/sec) -training >> step=5541800, episode=924 reward=0.7682233 (503.42 it/sec) -training >> step=5541900, episode=924 reward=0.7774112 (506.95 it/sec) -training >> step=5542000, episode=924 reward=0.7795466 (522.86 it/sec) -training >> step=5542100, episode=924 reward=0.7781802 (515.80 it/sec) -training >> step=5542200, episode=924 reward=0.7730809 (482.54 it/sec) -training >> step=5542300, episode=924 reward=0.7756557 (480.30 it/sec) -training >> step=5542400, episode=924 reward=0.7792647 (520.19 it/sec) -training >> step=5542500, episode=924 reward=0.7719566 (436.86 it/sec) -training >> step=5542600, episode=924 reward=0.7645914 (521.02 it/sec) -training >> step=5542700, episode=924 reward=0.7737557 (501.23 it/sec) -training >> step=5542800, episode=924 reward=0.7599643 (491.83 it/sec) -training >> step=5542900, episode=924 reward=0.7578099 (454.05 it/sec) -training >> step=5543000, episode=924 reward=0.7761902 (469.26 it/sec) -training >> step=5543100, episode=924 reward=0.7643936 (505.69 it/sec) -training >> step=5543200, episode=924 reward=0.7592642 (514.27 it/sec) -training >> step=5543300, episode=925 reward=0.7864203 (86.19 it/sec) -training >> step=5543400, episode=925 reward=0.7793576 (474.64 it/sec) -training >> step=5543500, episode=925 reward=0.7803876 (459.70 it/sec) -training >> step=5543600, episode=925 reward=0.7840066 (475.19 it/sec) -training >> step=5543700, episode=925 reward=0.7797654 (470.35 it/sec) -training >> step=5543800, episode=925 reward=0.7766137 (492.10 it/sec) -training >> step=5543900, episode=925 reward=0.8010666 (465.22 it/sec) -training >> step=5544000, episode=925 reward=0.7884257 (448.73 it/sec) -training >> step=5544100, episode=925 reward=0.7937427 (511.39 it/sec) -training >> step=5544200, episode=925 reward=0.7974108 (499.89 it/sec) -training >> step=5544300, episode=925 reward=0.7871324 (455.74 it/sec) -training >> step=5544400, episode=925 reward=0.7652826 (534.33 it/sec) -training >> step=5544500, episode=925 reward=0.76454 (459.49 it/sec) -training >> step=5544600, episode=925 reward=0.775811 (508.69 it/sec) -training >> step=5544700, episode=925 reward=0.7809865 (495.95 it/sec) -training >> step=5544800, episode=925 reward=0.7740899 (496.37 it/sec) -training >> step=5544900, episode=925 reward=0.8046888 (473.75 it/sec) -training >> step=5545000, episode=925 reward=0.7926332 (484.83 it/sec) -training >> step=5545100, episode=925 reward=0.7921724 (465.66 it/sec) -training >> step=5545200, episode=925 reward=0.760125 (488.14 it/sec) -training >> step=5545300, episode=925 reward=0.7716722 (447.48 it/sec) -training >> step=5545400, episode=925 reward=0.7707978 (462.66 it/sec) -training >> step=5545500, episode=925 reward=0.7752504 (463.65 it/sec) -training >> step=5545600, episode=925 reward=0.7765189 (447.19 it/sec) -training >> step=5545700, episode=925 reward=0.7947239 (459.47 it/sec) -training >> step=5545800, episode=925 reward=0.7994988 (456.71 it/sec) -training >> step=5545900, episode=925 reward=0.7895297 (429.08 it/sec) -training >> step=5546000, episode=925 reward=0.7755813 (374.86 it/sec) -training >> step=5546100, episode=925 reward=0.7851752 (422.41 it/sec) -training >> step=5546200, episode=925 reward=0.7822523 (449.56 it/sec) -training >> step=5546300, episode=925 reward=0.7782208 (448.24 it/sec) -training >> step=5546400, episode=925 reward=0.8022767 (429.62 it/sec) -training >> step=5546500, episode=925 reward=0.7810842 (426.48 it/sec) -training >> step=5546600, episode=925 reward=0.7753855 (472.05 it/sec) -training >> step=5546700, episode=925 reward=0.7879095 (456.59 it/sec) -training >> step=5546800, episode=925 reward=0.7805133 (463.24 it/sec) -training >> step=5546900, episode=925 reward=0.7831736 (457.76 it/sec) -training >> step=5547000, episode=925 reward=0.8059019 (483.90 it/sec) -training >> step=5547100, episode=925 reward=0.7745834 (458.20 it/sec) -training >> step=5547200, episode=925 reward=0.7768136 (481.57 it/sec) -training >> step=5547300, episode=925 reward=0.7842481 (441.67 it/sec) -training >> step=5547400, episode=925 reward=0.7890735 (453.92 it/sec) -training >> step=5547500, episode=925 reward=0.7869306 (445.71 it/sec) -training >> step=5547600, episode=925 reward=0.7562733 (458.71 it/sec) -training >> step=5547700, episode=925 reward=0.7706452 (484.83 it/sec) -training >> step=5547800, episode=925 reward=0.752531 (447.61 it/sec) -training >> step=5547900, episode=925 reward=0.7936117 (448.31 it/sec) -training >> step=5548000, episode=925 reward=0.7898374 (462.48 it/sec) -training >> step=5548100, episode=925 reward=0.7671728 (464.00 it/sec) -training >> step=5548200, episode=925 reward=0.7679244 (399.57 it/sec) -training >> step=5548300, episode=925 reward=0.7568577 (429.76 it/sec) -training >> step=5548400, episode=925 reward=0.7590889 (455.69 it/sec) -training >> step=5548500, episode=925 reward=0.7918665 (446.12 it/sec) -training >> step=5548600, episode=925 reward=0.7790773 (458.31 it/sec) -training >> step=5548700, episode=925 reward=0.7792694 (458.99 it/sec) -training >> step=5548800, episode=925 reward=0.7745972 (467.95 it/sec) -training >> step=5548900, episode=925 reward=0.7739263 (435.67 it/sec) -training >> step=5549000, episode=925 reward=0.7803617 (423.47 it/sec) -training >> step=5549100, episode=925 reward=0.762665 (474.87 it/sec) -training >> step=5549200, episode=925 reward=0.7764668 (448.46 it/sec) -training >> step=5549300, episode=926 reward=0.7891081 (74.69 it/sec) -training >> step=5549400, episode=926 reward=0.7673692 (527.96 it/sec) -training >> step=5549500, episode=926 reward=0.7851326 (417.58 it/sec) -training >> step=5549600, episode=926 reward=0.7918785 (446.34 it/sec) -training >> step=5549700, episode=926 reward=0.7834792 (475.46 it/sec) -training >> step=5549800, episode=926 reward=0.7774332 (458.17 it/sec) -training >> step=5549900, episode=926 reward=0.7875501 (476.75 it/sec) -training >> step=5550000, episode=926 reward=0.7915324 (463.99 it/sec) -training >> step=5550100, episode=926 reward=0.7829234 (473.09 it/sec) -training >> step=5550200, episode=926 reward=0.8027918 (456.89 it/sec) -training >> step=5550300, episode=926 reward=0.7739584 (465.30 it/sec) -training >> step=5550400, episode=926 reward=0.7782605 (476.05 it/sec) -training >> step=5550500, episode=926 reward=0.781196 (451.71 it/sec) -training >> step=5550600, episode=926 reward=0.7890451 (468.00 it/sec) -training >> step=5550700, episode=926 reward=0.8045825 (458.34 it/sec) -training >> step=5550800, episode=926 reward=0.7673598 (438.00 it/sec) -training >> step=5550900, episode=926 reward=0.7947487 (486.30 it/sec) -training >> step=5551000, episode=926 reward=0.7998357 (469.89 it/sec) -training >> step=5551100, episode=926 reward=0.7876706 (455.69 it/sec) -training >> step=5551200, episode=926 reward=0.798084 (458.08 it/sec) -training >> step=5551300, episode=926 reward=0.7581158 (406.66 it/sec) -training >> step=5551400, episode=926 reward=0.795826 (485.17 it/sec) -training >> step=5551500, episode=926 reward=0.7903852 (422.76 it/sec) -training >> step=5551600, episode=926 reward=0.7933833 (447.15 it/sec) -training >> step=5551700, episode=926 reward=0.7668061 (452.26 it/sec) -training >> step=5551800, episode=926 reward=0.7650049 (484.10 it/sec) -training >> step=5551900, episode=926 reward=0.7853022 (454.58 it/sec) -training >> step=5552000, episode=926 reward=0.7875711 (458.42 it/sec) -training >> step=5552100, episode=926 reward=0.8158772 (457.09 it/sec) -training >> step=5552200, episode=926 reward=0.7637318 (483.49 it/sec) -training >> step=5552300, episode=926 reward=0.7808658 (451.16 it/sec) -training >> step=5552400, episode=926 reward=0.7842918 (453.05 it/sec) -training >> step=5552500, episode=926 reward=0.805085 (454.20 it/sec) -training >> step=5552600, episode=926 reward=0.7878034 (456.04 it/sec) -training >> step=5552700, episode=926 reward=0.7868647 (442.51 it/sec) -training >> step=5552800, episode=926 reward=0.7732497 (462.82 it/sec) -training >> step=5552900, episode=926 reward=0.7870731 (466.42 it/sec) -training >> step=5553000, episode=926 reward=0.7845287 (442.69 it/sec) -training >> step=5553100, episode=926 reward=0.7776453 (448.02 it/sec) -training >> step=5553200, episode=926 reward=0.7803825 (479.91 it/sec) -training >> step=5553300, episode=926 reward=0.7955369 (467.18 it/sec) -training >> step=5553400, episode=926 reward=0.7863904 (435.19 it/sec) -training >> step=5553500, episode=926 reward=0.7703708 (459.61 it/sec) -training >> step=5553600, episode=926 reward=0.7663726 (441.41 it/sec) -training >> step=5553700, episode=926 reward=0.7840596 (445.40 it/sec) -training >> step=5553800, episode=926 reward=0.7714816 (486.64 it/sec) -training >> step=5553900, episode=926 reward=0.770328 (473.83 it/sec) -training >> step=5554000, episode=926 reward=0.7763892 (420.37 it/sec) -training >> step=5554100, episode=926 reward=0.7806363 (435.27 it/sec) -training >> step=5554200, episode=926 reward=0.7754792 (433.76 it/sec) -training >> step=5554300, episode=926 reward=0.7534707 (498.27 it/sec) -training >> step=5554400, episode=926 reward=0.7567931 (374.94 it/sec) -training >> step=5554500, episode=926 reward=0.7666568 (429.71 it/sec) -training >> step=5554600, episode=926 reward=0.7703702 (430.28 it/sec) -training >> step=5554700, episode=926 reward=0.7914968 (458.80 it/sec) -training >> step=5554800, episode=926 reward=0.7600189 (439.14 it/sec) -training >> step=5554900, episode=926 reward=0.7639458 (475.15 it/sec) -training >> step=5555000, episode=926 reward=0.7693993 (469.79 it/sec) -training >> step=5555100, episode=926 reward=0.76909 (449.41 it/sec) -training >> step=5555200, episode=926 reward=0.7515849 (463.54 it/sec) -training >> step=5555300, episode=927 reward=0.7756464 (72.61 it/sec) -training >> step=5555400, episode=927 reward=0.7900214 (444.12 it/sec) -training >> step=5555500, episode=927 reward=0.7691758 (455.06 it/sec) -training >> step=5555600, episode=927 reward=0.7821369 (514.70 it/sec) -training >> step=5555700, episode=927 reward=0.7595968 (422.95 it/sec) -training >> step=5555800, episode=927 reward=0.7959394 (425.57 it/sec) -training >> step=5555900, episode=927 reward=0.7955811 (461.03 it/sec) -training >> step=5556000, episode=927 reward=0.8051396 (495.40 it/sec) -training >> step=5556100, episode=927 reward=0.8001747 (482.72 it/sec) -training >> step=5556200, episode=927 reward=0.7799897 (425.43 it/sec) -training >> step=5556300, episode=927 reward=0.7630224 (449.65 it/sec) -training >> step=5556400, episode=927 reward=0.7779654 (470.50 it/sec) -training >> step=5556500, episode=927 reward=0.7948177 (499.92 it/sec) -training >> step=5556600, episode=927 reward=0.7909649 (445.60 it/sec) -training >> step=5556700, episode=927 reward=0.7890233 (448.77 it/sec) -training >> step=5556800, episode=927 reward=0.7969543 (486.72 it/sec) -training >> step=5556900, episode=927 reward=0.7838486 (428.84 it/sec) -training >> step=5557000, episode=927 reward=0.7765214 (463.35 it/sec) -training >> step=5557100, episode=927 reward=0.7805671 (444.97 it/sec) -training >> step=5557200, episode=927 reward=0.7745675 (432.84 it/sec) -training >> step=5557300, episode=927 reward=0.7777936 (452.88 it/sec) -training >> step=5557400, episode=927 reward=0.7933432 (438.20 it/sec) -training >> step=5557500, episode=927 reward=0.7734036 (461.65 it/sec) -training >> step=5557600, episode=927 reward=0.7755592 (460.34 it/sec) -training >> step=5557700, episode=927 reward=0.7851158 (452.26 it/sec) -training >> step=5557800, episode=927 reward=0.7693241 (467.82 it/sec) -training >> step=5557900, episode=927 reward=0.7802984 (503.70 it/sec) -training >> step=5558000, episode=927 reward=0.7769513 (467.96 it/sec) -training >> step=5558100, episode=927 reward=0.7615067 (418.60 it/sec) -training >> step=5558200, episode=927 reward=0.7847593 (470.01 it/sec) -training >> step=5558300, episode=927 reward=0.8070518 (450.10 it/sec) -training >> step=5558400, episode=927 reward=0.7890846 (436.96 it/sec) -training >> step=5558500, episode=927 reward=0.7892748 (413.49 it/sec) -training >> step=5558600, episode=927 reward=0.7805363 (436.68 it/sec) -training >> step=5558700, episode=927 reward=0.7732587 (429.08 it/sec) -training >> step=5558800, episode=927 reward=0.790177 (466.16 it/sec) -training >> step=5558900, episode=927 reward=0.7909735 (453.03 it/sec) -training >> step=5559000, episode=927 reward=0.8048692 (458.77 it/sec) -training >> step=5559100, episode=927 reward=0.7856333 (432.90 it/sec) -training >> step=5559200, episode=927 reward=0.7905826 (467.54 it/sec) -training >> step=5559300, episode=927 reward=0.804039 (480.16 it/sec) -training >> step=5559400, episode=927 reward=0.79012 (450.01 it/sec) -training >> step=5559500, episode=927 reward=0.7639714 (448.29 it/sec) -training >> step=5559600, episode=927 reward=0.7626937 (443.59 it/sec) -training >> step=5559700, episode=927 reward=0.7727553 (470.22 it/sec) -training >> step=5559800, episode=927 reward=0.7789599 (469.55 it/sec) -training >> step=5559900, episode=927 reward=0.7731215 (432.04 it/sec) -training >> step=5560000, episode=927 reward=0.78826 (489.82 it/sec) -training >> step=5560100, episode=927 reward=0.7583897 (438.00 it/sec) -training >> step=5560200, episode=927 reward=0.7764935 (450.08 it/sec) -training >> step=5560300, episode=927 reward=0.7795227 (455.15 it/sec) -training >> step=5560400, episode=927 reward=0.7655924 (467.42 it/sec) -training >> step=5560500, episode=927 reward=0.7828556 (431.31 it/sec) -training >> step=5560600, episode=927 reward=0.7346704 (442.77 it/sec) -training >> step=5560700, episode=927 reward=0.7734707 (468.19 it/sec) -training >> step=5560800, episode=927 reward=0.7896817 (478.53 it/sec) -training >> step=5560900, episode=927 reward=0.7516477 (429.82 it/sec) -training >> step=5561000, episode=927 reward=0.7592829 (458.00 it/sec) -training >> step=5561100, episode=927 reward=0.7673607 (468.74 it/sec) -training >> step=5561200, episode=927 reward=0.7567297 (454.94 it/sec) -training >> step=5561300, episode=928 reward=0.7886745 (74.90 it/sec) -training >> step=5561400, episode=928 reward=0.7732884 (457.54 it/sec) -training >> step=5561500, episode=928 reward=0.7793401 (463.98 it/sec) -training >> step=5561600, episode=928 reward=0.7869969 (462.64 it/sec) -training >> step=5561700, episode=928 reward=0.7787012 (442.76 it/sec) -training >> step=5561800, episode=928 reward=0.7931817 (451.91 it/sec) -training >> step=5561900, episode=928 reward=0.7801733 (465.60 it/sec) -training >> step=5562000, episode=928 reward=0.7718137 (478.15 it/sec) -training >> step=5562100, episode=928 reward=0.7633225 (468.58 it/sec) -training >> step=5562200, episode=928 reward=0.7944206 (440.07 it/sec) -training >> step=5562300, episode=928 reward=0.7940388 (451.31 it/sec) -training >> step=5562400, episode=928 reward=0.7900397 (476.28 it/sec) -training >> step=5562500, episode=928 reward=0.7931964 (460.65 it/sec) -training >> step=5562600, episode=928 reward=0.7815146 (454.22 it/sec) -training >> step=5562700, episode=928 reward=0.7854743 (455.78 it/sec) -training >> step=5562800, episode=928 reward=0.786904 (459.92 it/sec) -training >> step=5562900, episode=928 reward=0.7723134 (420.62 it/sec) -training >> step=5563000, episode=928 reward=0.7801949 (475.18 it/sec) -training >> step=5563100, episode=928 reward=0.7932047 (454.05 it/sec) -training >> step=5563200, episode=928 reward=0.7890821 (454.81 it/sec) -training >> step=5563300, episode=928 reward=0.7559888 (444.92 it/sec) -training >> step=5563400, episode=928 reward=0.8082821 (464.83 it/sec) -training >> step=5563500, episode=928 reward=0.7851483 (460.19 it/sec) -training >> step=5563600, episode=928 reward=0.7662574 (433.34 it/sec) -training >> step=5563700, episode=928 reward=0.7899016 (435.25 it/sec) -training >> step=5563800, episode=928 reward=0.7890465 (457.45 it/sec) -training >> step=5563900, episode=928 reward=0.7861235 (424.15 it/sec) -training >> step=5564000, episode=928 reward=0.7900518 (450.64 it/sec) -training >> step=5564100, episode=928 reward=0.7903377 (426.64 it/sec) -training >> step=5564200, episode=928 reward=0.8069304 (462.34 it/sec) -training >> step=5564300, episode=928 reward=0.7674266 (430.00 it/sec) -training >> step=5564400, episode=928 reward=0.7683433 (471.47 it/sec) -training >> step=5564500, episode=928 reward=0.7833429 (456.29 it/sec) -training >> step=5564600, episode=928 reward=0.7900335 (438.52 it/sec) -training >> step=5564700, episode=928 reward=0.7834393 (388.47 it/sec) -training >> step=5564800, episode=928 reward=0.7859766 (477.50 it/sec) -training >> step=5564900, episode=928 reward=0.7872962 (431.49 it/sec) -training >> step=5565000, episode=928 reward=0.7838042 (412.92 it/sec) -training >> step=5565100, episode=928 reward=0.7753503 (392.68 it/sec) -training >> step=5565200, episode=928 reward=0.7914115 (476.77 it/sec) -training >> step=5565300, episode=928 reward=0.7902033 (462.44 it/sec) -training >> step=5565400, episode=928 reward=0.7752284 (429.34 it/sec) -training >> step=5565500, episode=928 reward=0.7787135 (461.97 it/sec) -training >> step=5565600, episode=928 reward=0.7829766 (485.55 it/sec) -training >> step=5565700, episode=928 reward=0.7540776 (463.28 it/sec) -training >> step=5565800, episode=928 reward=0.7814168 (486.89 it/sec) -training >> step=5565900, episode=928 reward=0.776641 (466.35 it/sec) -training >> step=5566000, episode=928 reward=0.7969671 (466.23 it/sec) -training >> step=5566100, episode=928 reward=0.7670405 (441.70 it/sec) -training >> step=5566200, episode=928 reward=0.7719851 (468.18 it/sec) -training >> step=5566300, episode=928 reward=0.7912796 (472.85 it/sec) -training >> step=5566400, episode=928 reward=0.7703668 (450.10 it/sec) -training >> step=5566500, episode=928 reward=0.763917 (424.60 it/sec) -training >> step=5566600, episode=928 reward=0.7797229 (467.78 it/sec) -training >> step=5566700, episode=928 reward=0.7830761 (462.88 it/sec) -training >> step=5566800, episode=928 reward=0.7685672 (453.44 it/sec) -training >> step=5566900, episode=928 reward=0.7765552 (441.86 it/sec) -training >> step=5567000, episode=928 reward=0.7590751 (470.61 it/sec) -training >> step=5567100, episode=928 reward=0.7478154 (435.74 it/sec) -training >> step=5567200, episode=928 reward=0.7518969 (448.73 it/sec) -training >> step=5567300, episode=929 reward=0.7660885 (84.14 it/sec) -training >> step=5567400, episode=929 reward=0.7699154 (484.83 it/sec) -training >> step=5567500, episode=929 reward=0.7810426 (462.11 it/sec) -training >> step=5567600, episode=929 reward=0.7820379 (515.73 it/sec) -training >> step=5567700, episode=929 reward=0.7939074 (488.22 it/sec) -training >> step=5567800, episode=929 reward=0.782217 (462.51 it/sec) -training >> step=5567900, episode=929 reward=0.7794217 (442.45 it/sec) -training >> step=5568000, episode=929 reward=0.7854437 (448.09 it/sec) -training >> step=5568100, episode=929 reward=0.780037 (509.81 it/sec) -training >> step=5568200, episode=929 reward=0.7949215 (468.54 it/sec) -training >> step=5568300, episode=929 reward=0.7858905 (486.73 it/sec) -training >> step=5568400, episode=929 reward=0.7629719 (454.48 it/sec) -training >> step=5568500, episode=929 reward=0.7811817 (449.64 it/sec) -training >> step=5568600, episode=929 reward=0.7952627 (485.13 it/sec) -training >> step=5568700, episode=929 reward=0.7834744 (511.30 it/sec) -training >> step=5568800, episode=929 reward=0.790985 (430.63 it/sec) -training >> step=5568900, episode=929 reward=0.7545203 (475.04 it/sec) -training >> step=5569000, episode=929 reward=0.7896405 (486.70 it/sec) -training >> step=5569100, episode=929 reward=0.7777342 (487.95 it/sec) -training >> step=5569200, episode=929 reward=0.7984464 (474.42 it/sec) -training >> step=5569300, episode=929 reward=0.7884468 (470.29 it/sec) -training >> step=5569400, episode=929 reward=0.7505797 (470.23 it/sec) -training >> step=5569500, episode=929 reward=0.7579095 (473.22 it/sec) -training >> step=5569600, episode=929 reward=0.759052 (464.53 it/sec) -training >> step=5569700, episode=929 reward=0.7759676 (486.79 it/sec) -training >> step=5569800, episode=929 reward=0.7962334 (476.99 it/sec) -training >> step=5569900, episode=929 reward=0.7460206 (494.65 it/sec) -training >> step=5570000, episode=929 reward=0.7851781 (480.68 it/sec) -training >> step=5570100, episode=929 reward=0.7661447 (505.51 it/sec) -training >> step=5570200, episode=929 reward=0.7583228 (471.76 it/sec) -training >> step=5570300, episode=929 reward=0.7891204 (454.01 it/sec) -training >> step=5570400, episode=929 reward=0.7906885 (480.87 it/sec) -training >> step=5570500, episode=929 reward=0.7733322 (437.82 it/sec) -training >> step=5570600, episode=929 reward=0.7747075 (500.61 it/sec) -training >> step=5570700, episode=929 reward=0.7873241 (509.48 it/sec) -training >> step=5570800, episode=929 reward=0.789253 (466.20 it/sec) -training >> step=5570900, episode=929 reward=0.7903256 (492.28 it/sec) -training >> step=5571000, episode=929 reward=0.7569937 (519.83 it/sec) -training >> step=5571100, episode=929 reward=0.7832952 (490.43 it/sec) -training >> step=5571200, episode=929 reward=0.7723612 (519.33 it/sec) -training >> step=5571300, episode=929 reward=0.7624028 (460.71 it/sec) -training >> step=5571400, episode=929 reward=0.779938 (456.07 it/sec) -training >> step=5571500, episode=929 reward=0.7931813 (502.58 it/sec) -training >> step=5571600, episode=929 reward=0.7629963 (496.61 it/sec) -training >> step=5571700, episode=929 reward=0.7683287 (495.23 it/sec) -training >> step=5571800, episode=929 reward=0.7832937 (461.17 it/sec) -training >> step=5571900, episode=929 reward=0.7785569 (530.05 it/sec) -training >> step=5572000, episode=929 reward=0.7971442 (513.42 it/sec) -training >> step=5572100, episode=929 reward=0.7730143 (466.28 it/sec) -training >> step=5572200, episode=929 reward=0.7638136 (492.90 it/sec) -training >> step=5572300, episode=929 reward=0.7858263 (502.06 it/sec) -training >> step=5572400, episode=929 reward=0.7849739 (497.72 it/sec) -training >> step=5572500, episode=929 reward=0.7661484 (505.41 it/sec) -training >> step=5572600, episode=929 reward=0.778308 (489.15 it/sec) -training >> step=5572700, episode=929 reward=0.7703884 (513.20 it/sec) -training >> step=5572800, episode=929 reward=0.7903295 (482.49 it/sec) -training >> step=5572900, episode=929 reward=0.7662993 (511.11 it/sec) -training >> step=5573000, episode=929 reward=0.7541401 (498.13 it/sec) -training >> step=5573100, episode=929 reward=0.7774396 (493.84 it/sec) -training >> step=5573200, episode=929 reward=0.7735569 (482.04 it/sec) -training >> step=5573300, episode=930 reward=0.7708431 (105.96 it/sec) -training >> step=5573400, episode=930 reward=0.7730081 (482.49 it/sec) -training >> step=5573500, episode=930 reward=0.7813716 (485.97 it/sec) -training >> step=5573600, episode=930 reward=0.7895532 (471.17 it/sec) -training >> step=5573700, episode=930 reward=0.7780559 (503.58 it/sec) -training >> step=5573800, episode=930 reward=0.7808147 (472.69 it/sec) -training >> step=5573900, episode=930 reward=0.7806993 (447.93 it/sec) -training >> step=5574000, episode=930 reward=0.7715554 (504.41 it/sec) -training >> step=5574100, episode=930 reward=0.7747003 (520.06 it/sec) -training >> step=5574200, episode=930 reward=0.760752 (478.02 it/sec) -training >> step=5574300, episode=930 reward=0.7756599 (482.97 it/sec) -training >> step=5574400, episode=930 reward=0.7786936 (467.39 it/sec) -training >> step=5574500, episode=930 reward=0.7645687 (492.71 it/sec) -training >> step=5574600, episode=930 reward=0.7738841 (484.93 it/sec) -training >> step=5574700, episode=930 reward=0.7814986 (481.99 it/sec) -training >> step=5574800, episode=930 reward=0.7801102 (495.11 it/sec) -training >> step=5574900, episode=930 reward=0.7866602 (458.24 it/sec) -training >> step=5575000, episode=930 reward=0.7751952 (424.20 it/sec) -training >> step=5575100, episode=930 reward=0.7920106 (395.68 it/sec) -training >> step=5575200, episode=930 reward=0.797118 (523.15 it/sec) -training >> step=5575300, episode=930 reward=0.7899069 (482.17 it/sec) -training >> step=5575400, episode=930 reward=0.7882631 (443.07 it/sec) -training >> step=5575500, episode=930 reward=0.7882406 (488.44 it/sec) -training >> step=5575600, episode=930 reward=0.7851173 (451.12 it/sec) -training >> step=5575700, episode=930 reward=0.7931806 (464.03 it/sec) -training >> step=5575800, episode=930 reward=0.790396 (490.54 it/sec) -training >> step=5575900, episode=930 reward=0.7805604 (485.97 it/sec) -training >> step=5576000, episode=930 reward=0.7585794 (488.35 it/sec) -training >> step=5576100, episode=930 reward=0.8132047 (473.49 it/sec) -training >> step=5576200, episode=930 reward=0.7787329 (483.21 it/sec) -training >> step=5576300, episode=930 reward=0.7972909 (504.78 it/sec) -training >> step=5576400, episode=930 reward=0.7704054 (490.71 it/sec) -training >> step=5576500, episode=930 reward=0.8146148 (493.58 it/sec) -training >> step=5576600, episode=930 reward=0.8010136 (510.14 it/sec) -training >> step=5576700, episode=930 reward=0.7960432 (466.45 it/sec) -training >> step=5576800, episode=930 reward=0.7833238 (435.23 it/sec) -training >> step=5576900, episode=930 reward=0.7783148 (457.88 it/sec) -training >> step=5577000, episode=930 reward=0.7888031 (468.70 it/sec) -training >> step=5577100, episode=930 reward=0.7684528 (474.93 it/sec) -training >> step=5577200, episode=930 reward=0.8061841 (482.67 it/sec) -training >> step=5577300, episode=930 reward=0.8048913 (494.23 it/sec) -training >> step=5577400, episode=930 reward=0.7739518 (473.40 it/sec) -training >> step=5577500, episode=930 reward=0.7716814 (508.59 it/sec) -training >> step=5577600, episode=930 reward=0.7804694 (512.24 it/sec) -training >> step=5577700, episode=930 reward=0.7835721 (518.13 it/sec) -training >> step=5577800, episode=930 reward=0.7976329 (494.34 it/sec) -training >> step=5577900, episode=930 reward=0.7883545 (449.95 it/sec) -training >> step=5578000, episode=930 reward=0.774909 (515.41 it/sec) -training >> step=5578100, episode=930 reward=0.7738355 (496.58 it/sec) -training >> step=5578200, episode=930 reward=0.8011346 (500.63 it/sec) -training >> step=5578300, episode=930 reward=0.7724434 (489.58 it/sec) -training >> step=5578400, episode=930 reward=0.7783851 (477.67 it/sec) -training >> step=5578500, episode=930 reward=0.7736686 (450.74 it/sec) -training >> step=5578600, episode=930 reward=0.7844939 (461.11 it/sec) -training >> step=5578700, episode=930 reward=0.7597096 (495.94 it/sec) -training >> step=5578800, episode=930 reward=0.7777908 (542.51 it/sec) -training >> step=5578900, episode=930 reward=0.7834446 (480.56 it/sec) -training >> step=5579000, episode=930 reward=0.7862982 (493.35 it/sec) -training >> step=5579100, episode=930 reward=0.7754127 (543.33 it/sec) -training >> step=5579200, episode=930 reward=0.7863878 (518.69 it/sec) -training >> step=5579300, episode=931 reward=0.7691474 (109.15 it/sec) -training >> step=5579400, episode=931 reward=0.7800573 (377.43 it/sec) -training >> step=5579500, episode=931 reward=0.7831556 (530.90 it/sec) -training >> step=5579600, episode=931 reward=0.7747027 (464.73 it/sec) -training >> step=5579700, episode=931 reward=0.7812771 (445.66 it/sec) -training >> step=5579800, episode=931 reward=0.7949076 (500.99 it/sec) -training >> step=5579900, episode=931 reward=0.78041 (510.97 it/sec) -training >> step=5580000, episode=931 reward=0.7935121 (486.74 it/sec) -training >> step=5580100, episode=931 reward=0.7721396 (507.99 it/sec) -training >> step=5580200, episode=931 reward=0.7721438 (532.83 it/sec) -training >> step=5580300, episode=931 reward=0.7855788 (479.61 it/sec) -training >> step=5580400, episode=931 reward=0.7751801 (470.62 it/sec) -training >> step=5580500, episode=931 reward=0.7654937 (493.95 it/sec) -training >> step=5580600, episode=931 reward=0.7881683 (515.20 it/sec) -training >> step=5580700, episode=931 reward=0.7720616 (532.52 it/sec) -training >> step=5580800, episode=931 reward=0.7854109 (475.90 it/sec) -training >> step=5580900, episode=931 reward=0.7782997 (532.80 it/sec) -training >> step=5581000, episode=931 reward=0.7783887 (504.18 it/sec) -training >> step=5581100, episode=931 reward=0.7879568 (517.99 it/sec) -training >> step=5581200, episode=931 reward=0.7948169 (526.50 it/sec) -training >> step=5581300, episode=931 reward=0.7880358 (535.06 it/sec) -training >> step=5581400, episode=931 reward=0.7845118 (535.25 it/sec) -training >> step=5581500, episode=931 reward=0.7771709 (490.29 it/sec) -training >> step=5581600, episode=931 reward=0.7969123 (470.35 it/sec) -training >> step=5581700, episode=931 reward=0.7825133 (540.54 it/sec) -training >> step=5581800, episode=931 reward=0.7801753 (512.96 it/sec) -training >> step=5581900, episode=931 reward=0.7736093 (520.80 it/sec) -training >> step=5582000, episode=931 reward=0.772307 (518.85 it/sec) -training >> step=5582100, episode=931 reward=0.7919844 (481.82 it/sec) -training >> step=5582200, episode=931 reward=0.7764544 (535.37 it/sec) -training >> step=5582300, episode=931 reward=0.7959234 (521.98 it/sec) -training >> step=5582400, episode=931 reward=0.7985922 (530.41 it/sec) -training >> step=5582500, episode=931 reward=0.7917877 (514.25 it/sec) -training >> step=5582600, episode=931 reward=0.7835692 (478.43 it/sec) -training >> step=5582700, episode=931 reward=0.813518 (529.62 it/sec) -training >> step=5582800, episode=931 reward=0.7712733 (527.85 it/sec) -training >> step=5582900, episode=931 reward=0.7785754 (538.96 it/sec) -training >> step=5583000, episode=931 reward=0.7954742 (518.21 it/sec) -training >> step=5583100, episode=931 reward=0.7937158 (541.05 it/sec) -training >> step=5583200, episode=931 reward=0.8023033 (505.64 it/sec) -training >> step=5583300, episode=931 reward=0.7746829 (442.38 it/sec) -training >> step=5583400, episode=931 reward=0.7786709 (482.05 it/sec) -training >> step=5583500, episode=931 reward=0.7830711 (496.84 it/sec) -training >> step=5583600, episode=931 reward=0.7856923 (494.31 it/sec) -training >> step=5583700, episode=931 reward=0.7694033 (496.37 it/sec) -training >> step=5583800, episode=931 reward=0.7859739 (533.76 it/sec) -training >> step=5583900, episode=931 reward=0.7733004 (487.98 it/sec) -training >> step=5584000, episode=931 reward=0.7787504 (513.49 it/sec) -training >> step=5584100, episode=931 reward=0.7873113 (512.52 it/sec) -training >> step=5584200, episode=931 reward=0.7979574 (518.63 it/sec) -training >> step=5584300, episode=931 reward=0.7802478 (511.39 it/sec) -training >> step=5584400, episode=931 reward=0.7884353 (501.41 it/sec) -training >> step=5584500, episode=931 reward=0.789683 (528.41 it/sec) -training >> step=5584600, episode=931 reward=0.7776167 (510.47 it/sec) -training >> step=5584700, episode=931 reward=0.7743999 (510.57 it/sec) -training >> step=5584800, episode=931 reward=0.7770375 (511.73 it/sec) -training >> step=5584900, episode=931 reward=0.7963862 (540.06 it/sec) -training >> step=5585000, episode=931 reward=0.7783682 (524.14 it/sec) -training >> step=5585100, episode=931 reward=0.7861432 (510.47 it/sec) -training >> step=5585200, episode=931 reward=0.7876794 (534.28 it/sec) -training >> step=5585300, episode=932 reward=0.7664444 (89.46 it/sec) -training >> step=5585400, episode=932 reward=0.8027149 (512.75 it/sec) -training >> step=5585500, episode=932 reward=0.7962713 (402.89 it/sec) -training >> step=5585600, episode=932 reward=0.8005704 (538.76 it/sec) -training >> step=5585700, episode=932 reward=0.7951407 (517.89 it/sec) -training >> step=5585800, episode=932 reward=0.7751634 (497.12 it/sec) -training >> step=5585900, episode=932 reward=0.7791512 (511.46 it/sec) -training >> step=5586000, episode=932 reward=0.777269 (530.62 it/sec) -training >> step=5586100, episode=932 reward=0.7827879 (496.80 it/sec) -training >> step=5586200, episode=932 reward=0.7830684 (477.77 it/sec) -training >> step=5586300, episode=932 reward=0.7708076 (519.33 it/sec) -training >> step=5586400, episode=932 reward=0.7822219 (465.83 it/sec) -training >> step=5586500, episode=932 reward=0.7549741 (469.63 it/sec) -training >> step=5586600, episode=932 reward=0.7925545 (472.62 it/sec) -training >> step=5586700, episode=932 reward=0.7671355 (507.91 it/sec) -training >> step=5586800, episode=932 reward=0.7881866 (490.22 it/sec) -training >> step=5586900, episode=932 reward=0.7942827 (493.00 it/sec) -training >> step=5587000, episode=932 reward=0.7809707 (517.39 it/sec) -training >> step=5587100, episode=932 reward=0.7634653 (520.12 it/sec) -training >> step=5587200, episode=932 reward=0.7720773 (469.26 it/sec) -training >> step=5587300, episode=932 reward=0.7943039 (490.75 it/sec) -training >> step=5587400, episode=932 reward=0.7883438 (536.14 it/sec) -training >> step=5587500, episode=932 reward=0.7886592 (485.11 it/sec) -training >> step=5587600, episode=932 reward=0.7952436 (504.82 it/sec) -training >> step=5587700, episode=932 reward=0.7815207 (503.83 it/sec) -training >> step=5587800, episode=932 reward=0.7956736 (508.50 it/sec) -training >> step=5587900, episode=932 reward=0.7983334 (480.19 it/sec) -training >> step=5588000, episode=932 reward=0.7838085 (496.97 it/sec) -training >> step=5588100, episode=932 reward=0.7637964 (525.74 it/sec) -training >> step=5588200, episode=932 reward=0.7989317 (489.97 it/sec) -training >> step=5588300, episode=932 reward=0.7675286 (493.71 it/sec) -training >> step=5588400, episode=932 reward=0.7723101 (473.42 it/sec) -training >> step=5588500, episode=932 reward=0.7810361 (533.83 it/sec) -training >> step=5588600, episode=932 reward=0.7672791 (481.19 it/sec) -training >> step=5588700, episode=932 reward=0.7735971 (521.07 it/sec) -training >> step=5588800, episode=932 reward=0.7664094 (557.82 it/sec) -training >> step=5588900, episode=932 reward=0.7824876 (489.38 it/sec) -training >> step=5589000, episode=932 reward=0.7810262 (522.64 it/sec) -training >> step=5589100, episode=932 reward=0.7864391 (504.54 it/sec) -training >> step=5589200, episode=932 reward=0.7895006 (551.75 it/sec) -training >> step=5589300, episode=932 reward=0.7892584 (493.02 it/sec) -training >> step=5589400, episode=932 reward=0.7808929 (445.40 it/sec) -training >> step=5589500, episode=932 reward=0.7883215 (532.17 it/sec) -training >> step=5589600, episode=932 reward=0.7964016 (508.52 it/sec) -training >> step=5589700, episode=932 reward=0.7875044 (475.57 it/sec) -training >> step=5589800, episode=932 reward=0.7651892 (467.84 it/sec) -training >> step=5589900, episode=932 reward=0.7879488 (462.74 it/sec) -training >> step=5590000, episode=932 reward=0.7859987 (470.98 it/sec) -training >> step=5590100, episode=932 reward=0.7828772 (430.16 it/sec) -training >> step=5590200, episode=932 reward=0.7970127 (488.22 it/sec) -training >> step=5590300, episode=932 reward=0.796158 (479.83 it/sec) -training >> step=5590400, episode=932 reward=0.7792285 (446.34 it/sec) -training >> step=5590500, episode=932 reward=0.790561 (486.98 it/sec) -training >> step=5590600, episode=932 reward=0.750348 (510.01 it/sec) -training >> step=5590700, episode=932 reward=0.7716662 (490.96 it/sec) -training >> step=5590800, episode=932 reward=0.7638463 (495.70 it/sec) -training >> step=5590900, episode=932 reward=0.7852705 (460.23 it/sec) -training >> step=5591000, episode=932 reward=0.7849743 (500.67 it/sec) -training >> step=5591100, episode=932 reward=0.7946082 (461.41 it/sec) -training >> step=5591200, episode=932 reward=0.7783564 (488.46 it/sec) -training >> step=5591300, episode=933 reward=0.7742746 (121.20 it/sec) -training >> step=5591400, episode=933 reward=0.7413359 (499.84 it/sec) -training >> step=5591500, episode=933 reward=0.781082 (484.61 it/sec) -training >> step=5591600, episode=933 reward=0.7790858 (392.27 it/sec) -training >> step=5591700, episode=933 reward=0.7636572 (520.57 it/sec) -training >> step=5591800, episode=933 reward=0.7803861 (497.81 it/sec) -training >> step=5591900, episode=933 reward=0.7849636 (480.22 it/sec) -training >> step=5592000, episode=933 reward=0.7869316 (486.35 it/sec) -training >> step=5592100, episode=933 reward=0.7818387 (490.11 it/sec) -training >> step=5592200, episode=933 reward=0.7672696 (540.18 it/sec) -training >> step=5592300, episode=933 reward=0.799122 (439.68 it/sec) -training >> step=5592400, episode=933 reward=0.7495524 (453.62 it/sec) -training >> step=5592500, episode=933 reward=0.7835669 (479.00 it/sec) -training >> step=5592600, episode=933 reward=0.7914624 (481.84 it/sec) -training >> step=5592700, episode=933 reward=0.7668865 (513.87 it/sec) -training >> step=5592800, episode=933 reward=0.7777159 (521.76 it/sec) -training >> step=5592900, episode=933 reward=0.7681991 (490.04 it/sec) -training >> step=5593000, episode=933 reward=0.7934843 (488.38 it/sec) -training >> step=5593100, episode=933 reward=0.7784404 (467.01 it/sec) -training >> step=5593200, episode=933 reward=0.785869 (503.58 it/sec) -training >> step=5593300, episode=933 reward=0.7795284 (490.76 it/sec) -training >> step=5593400, episode=933 reward=0.7980865 (511.24 it/sec) -training >> step=5593500, episode=933 reward=0.7701874 (506.82 it/sec) -training >> step=5593600, episode=933 reward=0.7872643 (483.29 it/sec) -training >> step=5593700, episode=933 reward=0.7903349 (515.42 it/sec) -training >> step=5593800, episode=933 reward=0.777994 (502.72 it/sec) -training >> step=5593900, episode=933 reward=0.7844137 (483.43 it/sec) -training >> step=5594000, episode=933 reward=0.7739779 (472.08 it/sec) -training >> step=5594100, episode=933 reward=0.7939051 (439.25 it/sec) -training >> step=5594200, episode=933 reward=0.7944055 (505.20 it/sec) -training >> step=5594300, episode=933 reward=0.7563645 (474.93 it/sec) -training >> step=5594400, episode=933 reward=0.7719339 (426.74 it/sec) -training >> step=5594500, episode=933 reward=0.7713466 (412.26 it/sec) -training >> step=5594600, episode=933 reward=0.770672 (469.08 it/sec) -training >> step=5594700, episode=933 reward=0.7835537 (398.67 it/sec) -training >> step=5594800, episode=933 reward=0.7788908 (452.32 it/sec) -training >> step=5594900, episode=933 reward=0.7702683 (476.87 it/sec) -training >> step=5595000, episode=933 reward=0.7936198 (454.46 it/sec) -training >> step=5595100, episode=933 reward=0.7786497 (468.24 it/sec) -training >> step=5595200, episode=933 reward=0.8036008 (476.85 it/sec) -training >> step=5595300, episode=933 reward=0.7767918 (470.14 it/sec) -training >> step=5595400, episode=933 reward=0.7945443 (456.64 it/sec) -training >> step=5595500, episode=933 reward=0.7978198 (464.72 it/sec) -training >> step=5595600, episode=933 reward=0.7743423 (449.36 it/sec) -training >> step=5595700, episode=933 reward=0.7582752 (504.12 it/sec) -training >> step=5595800, episode=933 reward=0.7938926 (438.09 it/sec) -training >> step=5595900, episode=933 reward=0.7648325 (442.85 it/sec) -training >> step=5596000, episode=933 reward=0.7901289 (487.79 it/sec) -training >> step=5596100, episode=933 reward=0.7935494 (464.58 it/sec) -training >> step=5596200, episode=933 reward=0.7796497 (469.94 it/sec) -training >> step=5596300, episode=933 reward=0.7803981 (457.53 it/sec) -training >> step=5596400, episode=933 reward=0.7949161 (458.31 it/sec) -training >> step=5596500, episode=933 reward=0.7970504 (471.77 it/sec) -training >> step=5596600, episode=933 reward=0.7630841 (444.68 it/sec) -training >> step=5596700, episode=933 reward=0.7788544 (475.77 it/sec) -training >> step=5596800, episode=933 reward=0.7829658 (457.96 it/sec) -training >> step=5596900, episode=933 reward=0.7846876 (455.24 it/sec) -training >> step=5597000, episode=933 reward=0.7722526 (469.35 it/sec) -training >> step=5597100, episode=933 reward=0.760455 (440.28 it/sec) -training >> step=5597200, episode=933 reward=0.7820894 (435.89 it/sec) -training >> step=5597300, episode=934 reward=0.7838427 (79.12 it/sec) -training >> step=5597400, episode=934 reward=0.7754819 (449.58 it/sec) -training >> step=5597500, episode=934 reward=0.7862298 (472.63 it/sec) -training >> step=5597600, episode=934 reward=0.8015167 (449.46 it/sec) -training >> step=5597700, episode=934 reward=0.7915117 (494.40 it/sec) -training >> step=5597800, episode=934 reward=0.7727044 (327.91 it/sec) -training >> step=5597900, episode=934 reward=0.7712066 (438.78 it/sec) -training >> step=5598000, episode=934 reward=0.7824206 (438.04 it/sec) -training >> step=5598100, episode=934 reward=0.780148 (506.51 it/sec) -training >> step=5598200, episode=934 reward=0.7808872 (411.73 it/sec) -training >> step=5598300, episode=934 reward=0.7797529 (436.66 it/sec) -training >> step=5598400, episode=934 reward=0.7855638 (449.15 it/sec) -training >> step=5598500, episode=934 reward=0.7722511 (456.75 it/sec) -training >> step=5598600, episode=934 reward=0.7855482 (448.60 it/sec) -training >> step=5598700, episode=934 reward=0.7911773 (437.37 it/sec) -training >> step=5598800, episode=934 reward=0.7912676 (465.69 it/sec) -training >> step=5598900, episode=934 reward=0.7757818 (466.73 it/sec) -training >> step=5599000, episode=934 reward=0.7866359 (424.70 it/sec) -training >> step=5599100, episode=934 reward=0.7770519 (453.19 it/sec) -training >> step=5599200, episode=934 reward=0.7744758 (488.11 it/sec) -training >> step=5599300, episode=934 reward=0.8163207 (448.04 it/sec) -training >> step=5599400, episode=934 reward=0.7761574 (470.77 it/sec) -training >> step=5599500, episode=934 reward=0.7932944 (486.60 it/sec) -training >> step=5599600, episode=934 reward=0.7886124 (486.43 it/sec) -training >> step=5599700, episode=934 reward=0.7956849 (483.81 it/sec) -training >> step=5599800, episode=934 reward=0.7759879 (503.17 it/sec) -training >> step=5599900, episode=934 reward=0.7943865 (451.91 it/sec) -training >> step=5600000, episode=934 reward=0.7928663 (469.36 it/sec) -training >> step=5600100, episode=934 reward=0.8115975 (427.63 it/sec) -training >> step=5600200, episode=934 reward=0.8049443 (447.08 it/sec) -training >> step=5600300, episode=934 reward=0.7762368 (487.39 it/sec) -training >> step=5600400, episode=934 reward=0.7664081 (438.79 it/sec) -training >> step=5600500, episode=934 reward=0.7928783 (491.64 it/sec) -training >> step=5600600, episode=934 reward=0.7925135 (454.35 it/sec) -training >> step=5600700, episode=934 reward=0.7716247 (477.11 it/sec) -training >> step=5600800, episode=934 reward=0.7745886 (471.77 it/sec) -training >> step=5600900, episode=934 reward=0.7938752 (498.86 it/sec) -training >> step=5601000, episode=934 reward=0.7698907 (471.09 it/sec) -training >> step=5601100, episode=934 reward=0.7735724 (455.65 it/sec) -training >> step=5601200, episode=934 reward=0.7973187 (474.14 it/sec) -training >> step=5601300, episode=934 reward=0.8028623 (481.44 it/sec) -training >> step=5601400, episode=934 reward=0.8043549 (467.54 it/sec) -training >> step=5601500, episode=934 reward=0.7732922 (465.11 it/sec) -training >> step=5601600, episode=934 reward=0.7953274 (450.26 it/sec) -training >> step=5601700, episode=934 reward=0.7872205 (461.38 it/sec) -training >> step=5601800, episode=934 reward=0.7772768 (454.50 it/sec) -training >> step=5601900, episode=934 reward=0.7924903 (476.42 it/sec) -training >> step=5602000, episode=934 reward=0.7903338 (539.71 it/sec) -training >> step=5602100, episode=934 reward=0.7923908 (461.64 it/sec) -training >> step=5602200, episode=934 reward=0.7873608 (471.17 it/sec) -training >> step=5602300, episode=934 reward=0.7972062 (447.47 it/sec) -training >> step=5602400, episode=934 reward=0.793246 (466.25 it/sec) -training >> step=5602500, episode=934 reward=0.7869927 (478.10 it/sec) -training >> step=5602600, episode=934 reward=0.8012401 (462.97 it/sec) -training >> step=5602700, episode=934 reward=0.7920957 (439.58 it/sec) -training >> step=5602800, episode=934 reward=0.7820681 (464.98 it/sec) -training >> step=5602900, episode=934 reward=0.7807192 (465.27 it/sec) -training >> step=5603000, episode=934 reward=0.7791829 (500.71 it/sec) -training >> step=5603100, episode=934 reward=0.7947618 (491.94 it/sec) -training >> step=5603200, episode=934 reward=0.7790477 (439.32 it/sec) -training >> step=5603300, episode=935 reward=0.771401 (64.98 it/sec) -training >> step=5603400, episode=935 reward=0.7864701 (423.74 it/sec) -training >> step=5603500, episode=935 reward=0.7861992 (474.77 it/sec) -training >> step=5603600, episode=935 reward=0.7696681 (467.98 it/sec) -training >> step=5603700, episode=935 reward=0.7636696 (486.65 it/sec) -training >> step=5603800, episode=935 reward=0.7902715 (465.58 it/sec) -training >> step=5603900, episode=935 reward=0.765165 (423.46 it/sec) -training >> step=5604000, episode=935 reward=0.7629182 (448.76 it/sec) -training >> step=5604100, episode=935 reward=0.7839025 (317.08 it/sec) -training >> step=5604200, episode=935 reward=0.7821731 (470.05 it/sec) -training >> step=5604300, episode=935 reward=0.7861305 (482.89 it/sec) -training >> step=5604400, episode=935 reward=0.782497 (484.62 it/sec) -training >> step=5604500, episode=935 reward=0.8018787 (475.81 it/sec) -training >> step=5604600, episode=935 reward=0.7623016 (452.95 it/sec) -training >> step=5604700, episode=935 reward=0.7855586 (503.09 it/sec) -training >> step=5604800, episode=935 reward=0.7847081 (479.11 it/sec) -training >> step=5604900, episode=935 reward=0.7834119 (455.37 it/sec) -training >> step=5605000, episode=935 reward=0.7928956 (415.11 it/sec) -training >> step=5605100, episode=935 reward=0.7836105 (419.99 it/sec) -training >> step=5605200, episode=935 reward=0.7923031 (366.51 it/sec) -training >> step=5605300, episode=935 reward=0.7763349 (461.51 it/sec) -training >> step=5605400, episode=935 reward=0.8036526 (424.37 it/sec) -training >> step=5605500, episode=935 reward=0.7813045 (419.74 it/sec) -training >> step=5605600, episode=935 reward=0.7921261 (413.68 it/sec) -training >> step=5605700, episode=935 reward=0.7992876 (458.91 it/sec) -training >> step=5605800, episode=935 reward=0.7919658 (479.12 it/sec) -training >> step=5605900, episode=935 reward=0.7787558 (481.09 it/sec) -training >> step=5606000, episode=935 reward=0.7847537 (451.57 it/sec) -training >> step=5606100, episode=935 reward=0.7778739 (477.80 it/sec) -training >> step=5606200, episode=935 reward=0.7737396 (471.58 it/sec) -training >> step=5606300, episode=935 reward=0.7931252 (476.04 it/sec) -training >> step=5606400, episode=935 reward=0.7914323 (489.01 it/sec) -training >> step=5606500, episode=935 reward=0.8028145 (457.55 it/sec) -training >> step=5606600, episode=935 reward=0.7754727 (424.28 it/sec) -training >> step=5606700, episode=935 reward=0.7812484 (460.34 it/sec) -training >> step=5606800, episode=935 reward=0.7900221 (478.81 it/sec) -training >> step=5606900, episode=935 reward=0.7844583 (472.61 it/sec) -training >> step=5607000, episode=935 reward=0.7813051 (474.62 it/sec) -training >> step=5607100, episode=935 reward=0.8069028 (491.52 it/sec) -training >> step=5607200, episode=935 reward=0.7918913 (454.87 it/sec) -training >> step=5607300, episode=935 reward=0.7663992 (437.52 it/sec) -training >> step=5607400, episode=935 reward=0.7927969 (476.95 it/sec) -training >> step=5607500, episode=935 reward=0.7732441 (513.27 it/sec) -training >> step=5607600, episode=935 reward=0.7945359 (445.84 it/sec) -training >> step=5607700, episode=935 reward=0.7740387 (445.87 it/sec) -training >> step=5607800, episode=935 reward=0.7642145 (487.67 it/sec) -training >> step=5607900, episode=935 reward=0.7723075 (442.34 it/sec) -training >> step=5608000, episode=935 reward=0.7776883 (477.39 it/sec) -training >> step=5608100, episode=935 reward=0.791062 (450.44 it/sec) -training >> step=5608200, episode=935 reward=0.7921972 (456.33 it/sec) -training >> step=5608300, episode=935 reward=0.79137 (467.47 it/sec) -training >> step=5608400, episode=935 reward=0.7944291 (445.76 it/sec) -training >> step=5608500, episode=935 reward=0.7970509 (516.25 it/sec) -training >> step=5608600, episode=935 reward=0.7922934 (472.39 it/sec) -training >> step=5608700, episode=935 reward=0.7795462 (473.49 it/sec) -training >> step=5608800, episode=935 reward=0.7724086 (434.02 it/sec) -training >> step=5608900, episode=935 reward=0.7938094 (483.48 it/sec) -training >> step=5609000, episode=935 reward=0.7961079 (444.67 it/sec) -training >> step=5609100, episode=935 reward=0.7819521 (494.99 it/sec) -training >> step=5609200, episode=935 reward=0.7533712 (440.93 it/sec) -training >> step=5609300, episode=936 reward=0.7701978 (69.49 it/sec) -training >> step=5609400, episode=936 reward=0.7644987 (435.66 it/sec) -training >> step=5609500, episode=936 reward=0.7830654 (490.63 it/sec) -training >> step=5609600, episode=936 reward=0.7659741 (475.54 it/sec) -training >> step=5609700, episode=936 reward=0.7774237 (446.47 it/sec) -training >> step=5609800, episode=936 reward=0.789325 (468.43 it/sec) -training >> step=5609900, episode=936 reward=0.7975919 (453.56 it/sec) -training >> step=5610000, episode=936 reward=0.7745847 (485.64 it/sec) -training >> step=5610100, episode=936 reward=0.7949303 (479.04 it/sec) -training >> step=5610200, episode=936 reward=0.7795495 (504.86 it/sec) -training >> step=5610300, episode=936 reward=0.7808778 (466.81 it/sec) -training >> step=5610400, episode=936 reward=0.7713107 (317.43 it/sec) -training >> step=5610500, episode=936 reward=0.7971241 (475.10 it/sec) -training >> step=5610600, episode=936 reward=0.7698804 (423.08 it/sec) -training >> step=5610700, episode=936 reward=0.7797802 (493.02 it/sec) -training >> step=5610800, episode=936 reward=0.7881246 (434.01 it/sec) -training >> step=5610900, episode=936 reward=0.7774543 (470.94 it/sec) -training >> step=5611000, episode=936 reward=0.7814134 (471.05 it/sec) -training >> step=5611100, episode=936 reward=0.7651609 (483.23 it/sec) -training >> step=5611200, episode=936 reward=0.7771723 (488.18 it/sec) -training >> step=5611300, episode=936 reward=0.7890009 (444.32 it/sec) -training >> step=5611400, episode=936 reward=0.7917519 (451.47 it/sec) -training >> step=5611500, episode=936 reward=0.8126531 (471.89 it/sec) -training >> step=5611600, episode=936 reward=0.7799777 (457.43 it/sec) -training >> step=5611700, episode=936 reward=0.7996349 (464.21 it/sec) -training >> step=5611800, episode=936 reward=0.767855 (445.65 it/sec) -training >> step=5611900, episode=936 reward=0.76317 (443.70 it/sec) -training >> step=5612000, episode=936 reward=0.7837908 (431.21 it/sec) -training >> step=5612100, episode=936 reward=0.7904954 (477.33 it/sec) -training >> step=5612200, episode=936 reward=0.7894884 (402.73 it/sec) -training >> step=5612300, episode=936 reward=0.7725827 (481.45 it/sec) -training >> step=5612400, episode=936 reward=0.7702237 (428.38 it/sec) -training >> step=5612500, episode=936 reward=0.7707663 (470.83 it/sec) -training >> step=5612600, episode=936 reward=0.791445 (469.08 it/sec) -training >> step=5612700, episode=936 reward=0.7867615 (391.11 it/sec) -training >> step=5612800, episode=936 reward=0.8049012 (447.45 it/sec) -training >> step=5612900, episode=936 reward=0.8105845 (458.77 it/sec) -training >> step=5613000, episode=936 reward=0.7814315 (487.84 it/sec) -training >> step=5613100, episode=936 reward=0.770843 (457.86 it/sec) -training >> step=5613200, episode=936 reward=0.7976816 (415.88 it/sec) -training >> step=5613300, episode=936 reward=0.7913909 (454.83 it/sec) -training >> step=5613400, episode=936 reward=0.7936304 (487.26 it/sec) -training >> step=5613500, episode=936 reward=0.7787095 (493.67 it/sec) -training >> step=5613600, episode=936 reward=0.7733256 (447.07 it/sec) -training >> step=5613700, episode=936 reward=0.7719947 (461.80 it/sec) -training >> step=5613800, episode=936 reward=0.7724837 (434.79 it/sec) -training >> step=5613900, episode=936 reward=0.7750438 (431.83 it/sec) -training >> step=5614000, episode=936 reward=0.7939537 (494.01 it/sec) -training >> step=5614100, episode=936 reward=0.7722703 (449.39 it/sec) -training >> step=5614200, episode=936 reward=0.7955999 (448.51 it/sec) -training >> step=5614300, episode=936 reward=0.7748986 (476.99 it/sec) -training >> step=5614400, episode=936 reward=0.7823102 (479.18 it/sec) -training >> step=5614500, episode=936 reward=0.786809 (430.66 it/sec) -training >> step=5614600, episode=936 reward=0.7932004 (460.12 it/sec) -training >> step=5614700, episode=936 reward=0.7799406 (463.41 it/sec) -training >> step=5614800, episode=936 reward=0.7764201 (507.25 it/sec) -training >> step=5614900, episode=936 reward=0.7793662 (512.50 it/sec) -training >> step=5615000, episode=936 reward=0.7746539 (463.51 it/sec) -training >> step=5615100, episode=936 reward=0.766453 (472.53 it/sec) -training >> step=5615200, episode=936 reward=0.7780682 (510.99 it/sec) -training >> step=5615300, episode=937 reward=0.7801658 (91.81 it/sec) -training >> step=5615400, episode=937 reward=0.7720033 (457.76 it/sec) -training >> step=5615500, episode=937 reward=0.786711 (476.06 it/sec) -training >> step=5615600, episode=937 reward=0.7776413 (403.98 it/sec) -training >> step=5615700, episode=937 reward=0.7669863 (428.50 it/sec) -training >> step=5615800, episode=937 reward=0.7975205 (503.43 it/sec) -training >> step=5615900, episode=937 reward=0.7863172 (459.36 it/sec) -training >> step=5616000, episode=937 reward=0.7724507 (437.78 it/sec) -training >> step=5616100, episode=937 reward=0.7705671 (487.63 it/sec) -training >> step=5616200, episode=937 reward=0.7838488 (460.56 it/sec) -training >> step=5616300, episode=937 reward=0.7924458 (494.62 it/sec) -training >> step=5616400, episode=937 reward=0.7677603 (462.04 it/sec) -training >> step=5616500, episode=937 reward=0.7842597 (381.26 it/sec) -training >> step=5616600, episode=937 reward=0.7867846 (466.19 it/sec) -training >> step=5616700, episode=937 reward=0.8046314 (487.06 it/sec) -training >> step=5616800, episode=937 reward=0.7820292 (475.09 it/sec) -training >> step=5616900, episode=937 reward=0.7658979 (460.92 it/sec) -training >> step=5617000, episode=937 reward=0.7755634 (449.27 it/sec) -training >> step=5617100, episode=937 reward=0.8069295 (477.94 it/sec) -training >> step=5617200, episode=937 reward=0.7744554 (461.86 it/sec) -training >> step=5617300, episode=937 reward=0.7788965 (480.77 it/sec) -training >> step=5617400, episode=937 reward=0.7916358 (478.65 it/sec) -training >> step=5617500, episode=937 reward=0.7789282 (490.86 it/sec) -training >> step=5617600, episode=937 reward=0.7881013 (471.96 it/sec) -training >> step=5617700, episode=937 reward=0.7796598 (410.83 it/sec) -training >> step=5617800, episode=937 reward=0.7747383 (467.70 it/sec) -training >> step=5617900, episode=937 reward=0.7852938 (470.01 it/sec) -training >> step=5618000, episode=937 reward=0.7826179 (454.80 it/sec) -training >> step=5618100, episode=937 reward=0.8020494 (490.47 it/sec) -training >> step=5618200, episode=937 reward=0.7828678 (491.66 it/sec) -training >> step=5618300, episode=937 reward=0.7934477 (511.46 it/sec) -training >> step=5618400, episode=937 reward=0.7654944 (466.80 it/sec) -training >> step=5618500, episode=937 reward=0.7881956 (406.83 it/sec) -training >> step=5618600, episode=937 reward=0.790583 (477.78 it/sec) -training >> step=5618700, episode=937 reward=0.7668285 (471.80 it/sec) -training >> step=5618800, episode=937 reward=0.7711199 (502.08 it/sec) -training >> step=5618900, episode=937 reward=0.7702743 (437.07 it/sec) -training >> step=5619000, episode=937 reward=0.7862655 (480.17 it/sec) -training >> step=5619100, episode=937 reward=0.8013768 (466.75 it/sec) -training >> step=5619200, episode=937 reward=0.7904547 (462.99 it/sec) -training >> step=5619300, episode=937 reward=0.7924477 (461.29 it/sec) -training >> step=5619400, episode=937 reward=0.7991285 (486.20 it/sec) -training >> step=5619500, episode=937 reward=0.776948 (465.23 it/sec) -training >> step=5619600, episode=937 reward=0.7792062 (445.03 it/sec) -training >> step=5619700, episode=937 reward=0.7833679 (519.30 it/sec) -training >> step=5619800, episode=937 reward=0.7757981 (446.77 it/sec) -training >> step=5619900, episode=937 reward=0.7868585 (445.00 it/sec) -training >> step=5620000, episode=937 reward=0.8010249 (428.59 it/sec) -training >> step=5620100, episode=937 reward=0.7794513 (550.12 it/sec) -training >> step=5620200, episode=937 reward=0.7741376 (491.19 it/sec) -training >> step=5620300, episode=937 reward=0.7832197 (451.04 it/sec) -training >> step=5620400, episode=937 reward=0.7918607 (483.14 it/sec) -training >> step=5620500, episode=937 reward=0.7843557 (471.08 it/sec) -training >> step=5620600, episode=937 reward=0.7752188 (514.46 it/sec) -training >> step=5620700, episode=937 reward=0.7896186 (490.02 it/sec) -training >> step=5620800, episode=937 reward=0.7703578 (504.41 it/sec) -training >> step=5620900, episode=937 reward=0.7859295 (508.80 it/sec) -training >> step=5621000, episode=937 reward=0.7808955 (456.23 it/sec) -training >> step=5621100, episode=937 reward=0.7789937 (508.52 it/sec) -training >> step=5621200, episode=937 reward=0.7527791 (517.15 it/sec) -training >> step=5621300, episode=938 reward=0.7633675 (74.80 it/sec) -training >> step=5621400, episode=938 reward=0.7746664 (472.89 it/sec) -training >> step=5621500, episode=938 reward=0.7783873 (484.58 it/sec) -training >> step=5621600, episode=938 reward=0.7746972 (485.39 it/sec) -training >> step=5621700, episode=938 reward=0.7607788 (505.82 it/sec) -training >> step=5621800, episode=938 reward=0.794125 (461.34 it/sec) -training >> step=5621900, episode=938 reward=0.7668369 (477.02 it/sec) -training >> step=5622000, episode=938 reward=0.783833 (526.08 it/sec) -training >> step=5622100, episode=938 reward=0.7897065 (486.21 it/sec) -training >> step=5622200, episode=938 reward=0.7756085 (468.83 it/sec) -training >> step=5622300, episode=938 reward=0.7850498 (451.00 it/sec) -training >> step=5622400, episode=938 reward=0.7714171 (485.19 it/sec) -training >> step=5622500, episode=938 reward=0.7781687 (492.82 it/sec) -training >> step=5622600, episode=938 reward=0.7832306 (343.71 it/sec) -training >> step=5622700, episode=938 reward=0.7718264 (462.95 it/sec) -training >> step=5622800, episode=938 reward=0.7961783 (448.76 it/sec) -training >> step=5622900, episode=938 reward=0.7946317 (454.68 it/sec) -training >> step=5623000, episode=938 reward=0.7800593 (522.16 it/sec) -training >> step=5623100, episode=938 reward=0.7772202 (476.77 it/sec) -training >> step=5623200, episode=938 reward=0.7854404 (491.23 it/sec) -training >> step=5623300, episode=938 reward=0.7769771 (500.48 it/sec) -training >> step=5623400, episode=938 reward=0.7986804 (471.68 it/sec) -training >> step=5623500, episode=938 reward=0.7990956 (492.32 it/sec) -training >> step=5623600, episode=938 reward=0.7682732 (506.43 it/sec) -training >> step=5623700, episode=938 reward=0.800837 (492.91 it/sec) -training >> step=5623800, episode=938 reward=0.7865891 (426.95 it/sec) -training >> step=5623900, episode=938 reward=0.7783452 (453.91 it/sec) -training >> step=5624000, episode=938 reward=0.7895185 (472.38 it/sec) -training >> step=5624100, episode=938 reward=0.8062158 (493.99 it/sec) -training >> step=5624200, episode=938 reward=0.7595419 (510.88 it/sec) -training >> step=5624300, episode=938 reward=0.7913393 (464.48 it/sec) -training >> step=5624400, episode=938 reward=0.77087 (476.73 it/sec) -training >> step=5624500, episode=938 reward=0.7967254 (436.23 it/sec) -training >> step=5624600, episode=938 reward=0.804664 (473.35 it/sec) -training >> step=5624700, episode=938 reward=0.7735513 (523.95 it/sec) -training >> step=5624800, episode=938 reward=0.7913414 (431.56 it/sec) -training >> step=5624900, episode=938 reward=0.7830028 (477.03 it/sec) -training >> step=5625000, episode=938 reward=0.7818938 (501.93 it/sec) -training >> step=5625100, episode=938 reward=0.7864094 (511.54 it/sec) -training >> step=5625200, episode=938 reward=0.7840974 (479.49 it/sec) -training >> step=5625300, episode=938 reward=0.794463 (480.01 it/sec) -training >> step=5625400, episode=938 reward=0.7789638 (448.63 it/sec) -training >> step=5625500, episode=938 reward=0.7749415 (500.89 it/sec) -training >> step=5625600, episode=938 reward=0.7885786 (489.97 it/sec) -training >> step=5625700, episode=938 reward=0.7710593 (457.14 it/sec) -training >> step=5625800, episode=938 reward=0.7673024 (487.95 it/sec) -training >> step=5625900, episode=938 reward=0.7915261 (503.34 it/sec) -training >> step=5626000, episode=938 reward=0.7824484 (495.92 it/sec) -training >> step=5626100, episode=938 reward=0.7692279 (503.32 it/sec) -training >> step=5626200, episode=938 reward=0.7888563 (525.11 it/sec) -training >> step=5626300, episode=938 reward=0.7844489 (453.07 it/sec) -training >> step=5626400, episode=938 reward=0.7723437 (448.34 it/sec) -training >> step=5626500, episode=938 reward=0.7782552 (509.77 it/sec) -training >> step=5626600, episode=938 reward=0.7661836 (501.37 it/sec) -training >> step=5626700, episode=938 reward=0.7872824 (449.11 it/sec) -training >> step=5626800, episode=938 reward=0.7779869 (455.09 it/sec) -training >> step=5626900, episode=938 reward=0.7883126 (506.71 it/sec) -training >> step=5627000, episode=938 reward=0.7994463 (473.13 it/sec) -training >> step=5627100, episode=938 reward=0.7464226 (481.21 it/sec) -training >> step=5627200, episode=938 reward=0.7742391 (496.06 it/sec) -training >> step=5627300, episode=939 reward=0.780104 (110.80 it/sec) -training >> step=5627400, episode=939 reward=0.7888732 (457.75 it/sec) -training >> step=5627500, episode=939 reward=0.7752589 (417.71 it/sec) -training >> step=5627600, episode=939 reward=0.7999061 (480.16 it/sec) -training >> step=5627700, episode=939 reward=0.7620676 (443.38 it/sec) -training >> step=5627800, episode=939 reward=0.7953721 (463.56 it/sec) -training >> step=5627900, episode=939 reward=0.7806597 (449.73 it/sec) -training >> step=5628000, episode=939 reward=0.8093805 (486.89 it/sec) -training >> step=5628100, episode=939 reward=0.7795314 (460.81 it/sec) -training >> step=5628200, episode=939 reward=0.7679787 (440.71 it/sec) -training >> step=5628300, episode=939 reward=0.785063 (401.08 it/sec) -training >> step=5628400, episode=939 reward=0.7757044 (482.36 it/sec) -training >> step=5628500, episode=939 reward=0.7746136 (483.44 it/sec) -training >> step=5628600, episode=939 reward=0.7729841 (443.22 it/sec) -training >> step=5628700, episode=939 reward=0.8067902 (493.65 it/sec) -training >> step=5628800, episode=939 reward=0.8075229 (313.97 it/sec) -training >> step=5628900, episode=939 reward=0.7934176 (460.04 it/sec) -training >> step=5629000, episode=939 reward=0.7839318 (480.37 it/sec) -training >> step=5629100, episode=939 reward=0.7956855 (440.29 it/sec) -training >> step=5629200, episode=939 reward=0.7731058 (429.88 it/sec) -training >> step=5629300, episode=939 reward=0.7791436 (446.35 it/sec) -training >> step=5629400, episode=939 reward=0.806313 (489.62 it/sec) -training >> step=5629500, episode=939 reward=0.7742758 (512.64 it/sec) -training >> step=5629600, episode=939 reward=0.7879182 (475.05 it/sec) -training >> step=5629700, episode=939 reward=0.7928398 (415.28 it/sec) -training >> step=5629800, episode=939 reward=0.7980185 (433.13 it/sec) -training >> step=5629900, episode=939 reward=0.7633672 (453.76 it/sec) -training >> step=5630000, episode=939 reward=0.7951529 (465.56 it/sec) -training >> step=5630100, episode=939 reward=0.773961 (467.83 it/sec) -training >> step=5630200, episode=939 reward=0.7691203 (461.82 it/sec) -training >> step=5630300, episode=939 reward=0.7783676 (454.05 it/sec) -training >> step=5630400, episode=939 reward=0.7744837 (397.12 it/sec) -training >> step=5630500, episode=939 reward=0.7721627 (505.22 it/sec) -training >> step=5630600, episode=939 reward=0.7762471 (425.66 it/sec) -training >> step=5630700, episode=939 reward=0.7646041 (461.94 it/sec) -training >> step=5630800, episode=939 reward=0.7860702 (446.58 it/sec) -training >> step=5630900, episode=939 reward=0.7987235 (508.49 it/sec) -training >> step=5631000, episode=939 reward=0.7562847 (456.66 it/sec) -training >> step=5631100, episode=939 reward=0.7689393 (462.41 it/sec) -training >> step=5631200, episode=939 reward=0.788366 (470.68 it/sec) -training >> step=5631300, episode=939 reward=0.7843949 (489.80 it/sec) -training >> step=5631400, episode=939 reward=0.787102 (455.81 it/sec) -training >> step=5631500, episode=939 reward=0.7761775 (439.40 it/sec) -training >> step=5631600, episode=939 reward=0.7996102 (411.30 it/sec) -training >> step=5631700, episode=939 reward=0.7760975 (440.52 it/sec) -training >> step=5631800, episode=939 reward=0.7816135 (497.51 it/sec) -training >> step=5631900, episode=939 reward=0.7755518 (483.54 it/sec) -training >> step=5632000, episode=939 reward=0.8017847 (491.86 it/sec) -training >> step=5632100, episode=939 reward=0.7945746 (457.80 it/sec) -training >> step=5632200, episode=939 reward=0.767479 (482.94 it/sec) -training >> step=5632300, episode=939 reward=0.7854117 (481.08 it/sec) -training >> step=5632400, episode=939 reward=0.7779742 (473.63 it/sec) -training >> step=5632500, episode=939 reward=0.7854723 (480.89 it/sec) -training >> step=5632600, episode=939 reward=0.7601755 (443.05 it/sec) -training >> step=5632700, episode=939 reward=0.7819202 (499.05 it/sec) -training >> step=5632800, episode=939 reward=0.7895401 (505.30 it/sec) -training >> step=5632900, episode=939 reward=0.7978695 (488.74 it/sec) -training >> step=5633000, episode=939 reward=0.7665772 (430.59 it/sec) -training >> step=5633100, episode=939 reward=0.7742951 (495.66 it/sec) -training >> step=5633200, episode=939 reward=0.7800204 (491.26 it/sec) -training >> step=5633300, episode=940 reward=0.7751105 (73.64 it/sec) -training >> step=5633400, episode=940 reward=0.7856833 (495.55 it/sec) -training >> step=5633500, episode=940 reward=0.7888243 (445.46 it/sec) -training >> step=5633600, episode=940 reward=0.7621214 (482.95 it/sec) -training >> step=5633700, episode=940 reward=0.7649783 (509.09 it/sec) -training >> step=5633800, episode=940 reward=0.7738262 (450.35 it/sec) -training >> step=5633900, episode=940 reward=0.7873112 (502.07 it/sec) -training >> step=5634000, episode=940 reward=0.7876992 (491.40 it/sec) -training >> step=5634100, episode=940 reward=0.7962703 (499.21 it/sec) -training >> step=5634200, episode=940 reward=0.7790982 (480.90 it/sec) -training >> step=5634300, episode=940 reward=0.7680928 (511.31 it/sec) -training >> step=5634400, episode=940 reward=0.7701841 (477.31 it/sec) -training >> step=5634500, episode=940 reward=0.7680598 (473.77 it/sec) -training >> step=5634600, episode=940 reward=0.7865606 (501.33 it/sec) -training >> step=5634700, episode=940 reward=0.7764415 (493.84 it/sec) -training >> step=5634800, episode=940 reward=0.7651312 (449.82 it/sec) -training >> step=5634900, episode=940 reward=0.7671462 (471.35 it/sec) -training >> step=5635000, episode=940 reward=0.7839547 (341.88 it/sec) -training >> step=5635100, episode=940 reward=0.7914336 (496.84 it/sec) -training >> step=5635200, episode=940 reward=0.7999004 (498.03 it/sec) -training >> step=5635300, episode=940 reward=0.7936872 (515.22 it/sec) -training >> step=5635400, episode=940 reward=0.7723789 (463.37 it/sec) -training >> step=5635500, episode=940 reward=0.7595942 (461.72 it/sec) -training >> step=5635600, episode=940 reward=0.8205758 (492.57 it/sec) -training >> step=5635700, episode=940 reward=0.7758371 (482.63 it/sec) -training >> step=5635800, episode=940 reward=0.7996331 (493.31 it/sec) -training >> step=5635900, episode=940 reward=0.7777817 (428.11 it/sec) -training >> step=5636000, episode=940 reward=0.7700261 (475.45 it/sec) -training >> step=5636100, episode=940 reward=0.7906644 (458.28 it/sec) -training >> step=5636200, episode=940 reward=0.7935945 (474.99 it/sec) -training >> step=5636300, episode=940 reward=0.7834936 (495.73 it/sec) -training >> step=5636400, episode=940 reward=0.7940468 (428.35 it/sec) -training >> step=5636500, episode=940 reward=0.7837826 (478.68 it/sec) -training >> step=5636600, episode=940 reward=0.7776878 (468.31 it/sec) -training >> step=5636700, episode=940 reward=0.7746629 (447.51 it/sec) -training >> step=5636800, episode=940 reward=0.7795402 (465.09 it/sec) -training >> step=5636900, episode=940 reward=0.7891746 (450.37 it/sec) -training >> step=5637000, episode=940 reward=0.7815848 (515.52 it/sec) -training >> step=5637100, episode=940 reward=0.7861039 (499.43 it/sec) -training >> step=5637200, episode=940 reward=0.7958838 (483.81 it/sec) -training >> step=5637300, episode=940 reward=0.7713439 (444.06 it/sec) -training >> step=5637400, episode=940 reward=0.7783019 (463.35 it/sec) -training >> step=5637500, episode=940 reward=0.7672556 (460.53 it/sec) -training >> step=5637600, episode=940 reward=0.7860624 (466.14 it/sec) -training >> step=5637700, episode=940 reward=0.7846792 (423.60 it/sec) -training >> step=5637800, episode=940 reward=0.7676836 (467.02 it/sec) -training >> step=5637900, episode=940 reward=0.7742037 (494.85 it/sec) -training >> step=5638000, episode=940 reward=0.7771817 (507.37 it/sec) -training >> step=5638100, episode=940 reward=0.7757589 (468.17 it/sec) -training >> step=5638200, episode=940 reward=0.7802352 (460.11 it/sec) -training >> step=5638300, episode=940 reward=0.7982476 (437.94 it/sec) -training >> step=5638400, episode=940 reward=0.7890082 (461.00 it/sec) -training >> step=5638500, episode=940 reward=0.7690213 (532.45 it/sec) -training >> step=5638600, episode=940 reward=0.7777737 (472.21 it/sec) -training >> step=5638700, episode=940 reward=0.783196 (469.48 it/sec) -training >> step=5638800, episode=940 reward=0.8024314 (480.94 it/sec) -training >> step=5638900, episode=940 reward=0.7651482 (482.69 it/sec) -training >> step=5639000, episode=940 reward=0.7819495 (487.17 it/sec) -training >> step=5639100, episode=940 reward=0.7792403 (473.87 it/sec) -training >> step=5639200, episode=940 reward=0.7714303 (454.57 it/sec) -training >> step=5639300, episode=941 reward=0.7895537 (101.81 it/sec) -training >> step=5639400, episode=941 reward=0.7881309 (445.57 it/sec) -training >> step=5639500, episode=941 reward=0.7607351 (450.57 it/sec) -training >> step=5639600, episode=941 reward=0.7733394 (493.54 it/sec) -training >> step=5639700, episode=941 reward=0.7617625 (443.63 it/sec) -training >> step=5639800, episode=941 reward=0.7779201 (458.09 it/sec) -training >> step=5639900, episode=941 reward=0.7719222 (463.91 it/sec) -training >> step=5640000, episode=941 reward=0.7691789 (416.32 it/sec) -training >> step=5640100, episode=941 reward=0.7777331 (459.55 it/sec) -training >> step=5640200, episode=941 reward=0.7984251 (474.35 it/sec) -training >> step=5640300, episode=941 reward=0.7897169 (497.29 it/sec) -training >> step=5640400, episode=941 reward=0.7949063 (447.27 it/sec) -training >> step=5640500, episode=941 reward=0.7824332 (463.73 it/sec) -training >> step=5640600, episode=941 reward=0.7832016 (491.17 it/sec) -training >> step=5640700, episode=941 reward=0.7885232 (499.85 it/sec) -training >> step=5640800, episode=941 reward=0.7844213 (473.25 it/sec) -training >> step=5640900, episode=941 reward=0.7790768 (437.89 it/sec) -training >> step=5641000, episode=941 reward=0.7628575 (516.89 it/sec) -training >> step=5641100, episode=941 reward=0.7897472 (453.39 it/sec) -training >> step=5641200, episode=941 reward=0.766999 (345.85 it/sec) -training >> step=5641300, episode=941 reward=0.7828306 (435.79 it/sec) -training >> step=5641400, episode=941 reward=0.7736666 (512.80 it/sec) -training >> step=5641500, episode=941 reward=0.7775072 (490.02 it/sec) -training >> step=5641600, episode=941 reward=0.7742767 (442.34 it/sec) -training >> step=5641700, episode=941 reward=0.7920814 (494.62 it/sec) -training >> step=5641800, episode=941 reward=0.8019755 (435.95 it/sec) -training >> step=5641900, episode=941 reward=0.7686307 (487.92 it/sec) -training >> step=5642000, episode=941 reward=0.7906057 (446.69 it/sec) -training >> step=5642100, episode=941 reward=0.7981995 (475.02 it/sec) -training >> step=5642200, episode=941 reward=0.7740105 (402.05 it/sec) -training >> step=5642300, episode=941 reward=0.7734105 (424.62 it/sec) -training >> step=5642400, episode=941 reward=0.7923516 (441.67 it/sec) -training >> step=5642500, episode=941 reward=0.782044 (480.74 it/sec) -training >> step=5642600, episode=941 reward=0.7839671 (495.78 it/sec) -training >> step=5642700, episode=941 reward=0.7622368 (477.40 it/sec) -training >> step=5642800, episode=941 reward=0.7930031 (484.17 it/sec) -training >> step=5642900, episode=941 reward=0.7786555 (512.91 it/sec) -training >> step=5643000, episode=941 reward=0.7989776 (499.63 it/sec) -training >> step=5643100, episode=941 reward=0.8006148 (505.00 it/sec) -training >> step=5643200, episode=941 reward=0.7614064 (535.41 it/sec) -training >> step=5643300, episode=941 reward=0.7876362 (455.28 it/sec) -training >> step=5643400, episode=941 reward=0.7903856 (493.73 it/sec) -training >> step=5643500, episode=941 reward=0.7933434 (519.46 it/sec) -training >> step=5643600, episode=941 reward=0.7824293 (438.29 it/sec) -training >> step=5643700, episode=941 reward=0.7719889 (505.32 it/sec) -training >> step=5643800, episode=941 reward=0.7730449 (454.83 it/sec) -training >> step=5643900, episode=941 reward=0.7801618 (494.55 it/sec) -training >> step=5644000, episode=941 reward=0.7709277 (444.73 it/sec) -training >> step=5644100, episode=941 reward=0.7850039 (493.80 it/sec) -training >> step=5644200, episode=941 reward=0.7799588 (478.21 it/sec) -training >> step=5644300, episode=941 reward=0.7920362 (533.86 it/sec) -training >> step=5644400, episode=941 reward=0.7729999 (479.08 it/sec) -training >> step=5644500, episode=941 reward=0.7814989 (501.21 it/sec) -training >> step=5644600, episode=941 reward=0.756476 (519.99 it/sec) -training >> step=5644700, episode=941 reward=0.7902785 (502.89 it/sec) -training >> step=5644800, episode=941 reward=0.7649634 (518.70 it/sec) -training >> step=5644900, episode=941 reward=0.7819203 (475.53 it/sec) -training >> step=5645000, episode=941 reward=0.7845799 (516.83 it/sec) -training >> step=5645100, episode=941 reward=0.7753892 (513.90 it/sec) -training >> step=5645200, episode=941 reward=0.7887349 (492.07 it/sec) -training >> step=5645300, episode=942 reward=0.7803901 (102.68 it/sec) -training >> step=5645400, episode=942 reward=0.7726135 (529.17 it/sec) -training >> step=5645500, episode=942 reward=0.7629386 (534.86 it/sec) -training >> step=5645600, episode=942 reward=0.7691331 (482.63 it/sec) -training >> step=5645700, episode=942 reward=0.7621957 (535.53 it/sec) -training >> step=5645800, episode=942 reward=0.7910964 (512.89 it/sec) -training >> step=5645900, episode=942 reward=0.77951 (543.35 it/sec) -training >> step=5646000, episode=942 reward=0.7856222 (548.48 it/sec) -training >> step=5646100, episode=942 reward=0.8089652 (476.63 it/sec) -training >> step=5646200, episode=942 reward=0.7958848 (515.86 it/sec) -training >> step=5646300, episode=942 reward=0.7647906 (545.19 it/sec) -training >> step=5646400, episode=942 reward=0.7789044 (495.92 it/sec) -training >> step=5646500, episode=942 reward=0.7839726 (544.50 it/sec) -training >> step=5646600, episode=942 reward=0.7955807 (471.12 it/sec) -training >> step=5646700, episode=942 reward=0.7660112 (500.14 it/sec) -training >> step=5646800, episode=942 reward=0.8054073 (541.39 it/sec) -training >> step=5646900, episode=942 reward=0.7930607 (541.53 it/sec) -training >> step=5647000, episode=942 reward=0.7914157 (548.80 it/sec) -training >> step=5647100, episode=942 reward=0.7881721 (526.82 it/sec) -training >> step=5647200, episode=942 reward=0.7866131 (472.81 it/sec) -training >> step=5647300, episode=942 reward=0.7919673 (534.28 it/sec) -training >> step=5647400, episode=942 reward=0.779195 (524.53 it/sec) -training >> step=5647500, episode=942 reward=0.7611293 (378.55 it/sec) -training >> step=5647600, episode=942 reward=0.7698887 (558.27 it/sec) -training >> step=5647700, episode=942 reward=0.7928513 (462.10 it/sec) -training >> step=5647800, episode=942 reward=0.7898423 (527.12 it/sec) -training >> step=5647900, episode=942 reward=0.78523 (545.94 it/sec) -training >> step=5648000, episode=942 reward=0.7919991 (534.86 it/sec) -training >> step=5648100, episode=942 reward=0.7892884 (525.42 it/sec) -training >> step=5648200, episode=942 reward=0.803695 (471.11 it/sec) -training >> step=5648300, episode=942 reward=0.7678 (551.42 it/sec) -training >> step=5648400, episode=942 reward=0.7839813 (532.21 it/sec) -training >> step=5648500, episode=942 reward=0.7702812 (513.59 it/sec) -training >> step=5648600, episode=942 reward=0.7803898 (539.21 it/sec) -training >> step=5648700, episode=942 reward=0.7590355 (525.04 it/sec) -training >> step=5648800, episode=942 reward=0.7853651 (485.11 it/sec) -training >> step=5648900, episode=942 reward=0.7800131 (530.23 it/sec) -training >> step=5649000, episode=942 reward=0.7924349 (517.72 it/sec) -training >> step=5649100, episode=942 reward=0.7928218 (559.02 it/sec) -training >> step=5649200, episode=942 reward=0.7895759 (519.69 it/sec) -training >> step=5649300, episode=942 reward=0.7757409 (474.86 it/sec) -training >> step=5649400, episode=942 reward=0.781777 (532.63 it/sec) -training >> step=5649500, episode=942 reward=0.7757127 (529.20 it/sec) -training >> step=5649600, episode=942 reward=0.7783695 (501.89 it/sec) -training >> step=5649700, episode=942 reward=0.7689375 (485.69 it/sec) -training >> step=5649800, episode=942 reward=0.7766493 (481.10 it/sec) -training >> step=5649900, episode=942 reward=0.7849581 (511.66 it/sec) -training >> step=5650000, episode=942 reward=0.7944225 (506.63 it/sec) -training >> step=5650100, episode=942 reward=0.7782022 (519.12 it/sec) -training >> step=5650200, episode=942 reward=0.7911775 (466.45 it/sec) -training >> step=5650300, episode=942 reward=0.7869299 (439.28 it/sec) -training >> step=5650400, episode=942 reward=0.8045539 (499.36 it/sec) -training >> step=5650500, episode=942 reward=0.7832968 (501.51 it/sec) -training >> step=5650600, episode=942 reward=0.7908464 (511.74 it/sec) -training >> step=5650700, episode=942 reward=0.7771353 (487.75 it/sec) -training >> step=5650800, episode=942 reward=0.776803 (449.55 it/sec) -training >> step=5650900, episode=942 reward=0.7856421 (523.56 it/sec) -training >> step=5651000, episode=942 reward=0.7636896 (490.79 it/sec) -training >> step=5651100, episode=942 reward=0.7762486 (494.72 it/sec) -training >> step=5651200, episode=942 reward=0.7715556 (468.77 it/sec) -training >> step=5651300, episode=943 reward=0.7664684 (117.70 it/sec) -training >> step=5651400, episode=943 reward=0.7608618 (456.75 it/sec) -training >> step=5651500, episode=943 reward=0.7855936 (461.43 it/sec) -training >> step=5651600, episode=943 reward=0.7739053 (521.82 it/sec) -training >> step=5651700, episode=943 reward=0.7856322 (471.77 it/sec) -training >> step=5651800, episode=943 reward=0.7832296 (470.35 it/sec) -training >> step=5651900, episode=943 reward=0.7812344 (491.34 it/sec) -training >> step=5652000, episode=943 reward=0.7935239 (445.33 it/sec) -training >> step=5652100, episode=943 reward=0.7966851 (509.74 it/sec) -training >> step=5652200, episode=943 reward=0.7832362 (440.40 it/sec) -training >> step=5652300, episode=943 reward=0.7769147 (492.19 it/sec) -training >> step=5652400, episode=943 reward=0.7733636 (438.86 it/sec) -training >> step=5652500, episode=943 reward=0.7854884 (456.04 it/sec) -training >> step=5652600, episode=943 reward=0.811142 (533.08 it/sec) -training >> step=5652700, episode=943 reward=0.8097329 (498.72 it/sec) -training >> step=5652800, episode=943 reward=0.79276 (504.61 it/sec) -training >> step=5652900, episode=943 reward=0.7676023 (457.65 it/sec) -training >> step=5653000, episode=943 reward=0.7990965 (484.10 it/sec) -training >> step=5653100, episode=943 reward=0.7796113 (494.90 it/sec) -training >> step=5653200, episode=943 reward=0.79122 (522.44 it/sec) -training >> step=5653300, episode=943 reward=0.8116283 (470.70 it/sec) -training >> step=5653400, episode=943 reward=0.7835175 (499.20 it/sec) -training >> step=5653500, episode=943 reward=0.7863572 (428.88 it/sec) -training >> step=5653600, episode=943 reward=0.7855401 (383.34 it/sec) -training >> step=5653700, episode=943 reward=0.765579 (488.27 it/sec) -training >> step=5653800, episode=943 reward=0.7937083 (521.60 it/sec) -training >> step=5653900, episode=943 reward=0.7897238 (491.33 it/sec) -training >> step=5654000, episode=943 reward=0.7845076 (464.04 it/sec) -training >> step=5654100, episode=943 reward=0.779302 (516.05 it/sec) -training >> step=5654200, episode=943 reward=0.805958 (482.56 it/sec) -training >> step=5654300, episode=943 reward=0.8037922 (517.74 it/sec) -training >> step=5654400, episode=943 reward=0.7856451 (485.91 it/sec) -training >> step=5654500, episode=943 reward=0.7768809 (464.06 it/sec) -training >> step=5654600, episode=943 reward=0.7664523 (497.05 it/sec) -training >> step=5654700, episode=943 reward=0.7774507 (470.10 it/sec) -training >> step=5654800, episode=943 reward=0.7844616 (505.22 it/sec) -training >> step=5654900, episode=943 reward=0.784313 (485.16 it/sec) -training >> step=5655000, episode=943 reward=0.7884308 (480.51 it/sec) -training >> step=5655100, episode=943 reward=0.7902293 (508.35 it/sec) -training >> step=5655200, episode=943 reward=0.7935082 (498.82 it/sec) -training >> step=5655300, episode=943 reward=0.7952078 (471.93 it/sec) -training >> step=5655400, episode=943 reward=0.8087986 (477.99 it/sec) -training >> step=5655500, episode=943 reward=0.7675111 (501.06 it/sec) -training >> step=5655600, episode=943 reward=0.7612268 (502.86 it/sec) -training >> step=5655700, episode=943 reward=0.7748911 (478.42 it/sec) -training >> step=5655800, episode=943 reward=0.8000562 (461.30 it/sec) -training >> step=5655900, episode=943 reward=0.7701936 (466.18 it/sec) -training >> step=5656000, episode=943 reward=0.7764075 (484.41 it/sec) -training >> step=5656100, episode=943 reward=0.7804323 (532.43 it/sec) -training >> step=5656200, episode=943 reward=0.7767819 (445.13 it/sec) -training >> step=5656300, episode=943 reward=0.7665437 (514.60 it/sec) -training >> step=5656400, episode=943 reward=0.7933055 (470.93 it/sec) -training >> step=5656500, episode=943 reward=0.7882036 (455.09 it/sec) -training >> step=5656600, episode=943 reward=0.7696178 (520.95 it/sec) -training >> step=5656700, episode=943 reward=0.7936897 (518.00 it/sec) -training >> step=5656800, episode=943 reward=0.7839803 (508.06 it/sec) -training >> step=5656900, episode=943 reward=0.7539157 (461.56 it/sec) -training >> step=5657000, episode=943 reward=0.7842717 (516.77 it/sec) -training >> step=5657100, episode=943 reward=0.788125 (480.92 it/sec) -training >> step=5657200, episode=943 reward=0.7718945 (477.17 it/sec) -training >> step=5657300, episode=944 reward=0.8206765 (93.59 it/sec) -training >> step=5657400, episode=944 reward=0.7706245 (514.24 it/sec) -training >> step=5657500, episode=944 reward=0.7711111 (473.08 it/sec) -training >> step=5657600, episode=944 reward=0.7861587 (476.23 it/sec) -training >> step=5657700, episode=944 reward=0.8064725 (472.28 it/sec) -training >> step=5657800, episode=944 reward=0.7816885 (477.09 it/sec) -training >> step=5657900, episode=944 reward=0.7928478 (507.73 it/sec) -training >> step=5658000, episode=944 reward=0.7660761 (494.99 it/sec) -training >> step=5658100, episode=944 reward=0.8011808 (482.87 it/sec) -training >> step=5658200, episode=944 reward=0.7856312 (488.85 it/sec) -training >> step=5658300, episode=944 reward=0.7689685 (519.71 it/sec) -training >> step=5658400, episode=944 reward=0.7907817 (510.30 it/sec) -training >> step=5658500, episode=944 reward=0.7723467 (458.92 it/sec) -training >> step=5658600, episode=944 reward=0.7729603 (493.99 it/sec) -training >> step=5658700, episode=944 reward=0.7797933 (516.28 it/sec) -training >> step=5658800, episode=944 reward=0.7947122 (460.23 it/sec) -training >> step=5658900, episode=944 reward=0.7971757 (491.68 it/sec) -training >> step=5659000, episode=944 reward=0.7737111 (455.13 it/sec) -training >> step=5659100, episode=944 reward=0.7879175 (489.60 it/sec) -training >> step=5659200, episode=944 reward=0.7834694 (463.85 it/sec) -training >> step=5659300, episode=944 reward=0.7709465 (470.12 it/sec) -training >> step=5659400, episode=944 reward=0.7908192 (523.27 it/sec) -training >> step=5659500, episode=944 reward=0.7758811 (466.94 it/sec) -training >> step=5659600, episode=944 reward=0.7811213 (490.41 it/sec) -training >> step=5659700, episode=944 reward=0.8087671 (479.64 it/sec) -training >> step=5659800, episode=944 reward=0.7925624 (397.26 it/sec) -training >> step=5659900, episode=944 reward=0.7603984 (473.24 it/sec) -training >> step=5660000, episode=944 reward=0.7661819 (451.17 it/sec) -training >> step=5660100, episode=944 reward=0.8046975 (487.40 it/sec) -training >> step=5660200, episode=944 reward=0.7788546 (484.84 it/sec) -training >> step=5660300, episode=944 reward=0.7950608 (517.66 it/sec) -training >> step=5660400, episode=944 reward=0.7794137 (481.03 it/sec) -training >> step=5660500, episode=944 reward=0.7821901 (469.04 it/sec) -training >> step=5660600, episode=944 reward=0.7839203 (459.65 it/sec) -training >> step=5660700, episode=944 reward=0.7705115 (455.70 it/sec) -training >> step=5660800, episode=944 reward=0.7959699 (487.81 it/sec) -training >> step=5660900, episode=944 reward=0.7804236 (512.50 it/sec) -training >> step=5661000, episode=944 reward=0.7791311 (444.81 it/sec) -training >> step=5661100, episode=944 reward=0.7733917 (508.41 it/sec) -training >> step=5661200, episode=944 reward=0.7877946 (483.79 it/sec) -training >> step=5661300, episode=944 reward=0.7847461 (518.26 it/sec) -training >> step=5661400, episode=944 reward=0.7768895 (487.67 it/sec) -training >> step=5661500, episode=944 reward=0.7775064 (471.35 it/sec) -training >> step=5661600, episode=944 reward=0.7824822 (530.55 it/sec) -training >> step=5661700, episode=944 reward=0.7769163 (498.27 it/sec) -training >> step=5661800, episode=944 reward=0.7829607 (496.41 it/sec) -training >> step=5661900, episode=944 reward=0.7882418 (493.87 it/sec) -training >> step=5662000, episode=944 reward=0.771595 (477.34 it/sec) -training >> step=5662100, episode=944 reward=0.7673519 (430.08 it/sec) -training >> step=5662200, episode=944 reward=0.7913117 (486.96 it/sec) -training >> step=5662300, episode=944 reward=0.7848878 (484.69 it/sec) -training >> step=5662400, episode=944 reward=0.7807333 (504.03 it/sec) -training >> step=5662500, episode=944 reward=0.8040059 (469.67 it/sec) -training >> step=5662600, episode=944 reward=0.7597524 (490.16 it/sec) -training >> step=5662700, episode=944 reward=0.7692605 (523.37 it/sec) -training >> step=5662800, episode=944 reward=0.7713411 (507.88 it/sec) -training >> step=5662900, episode=944 reward=0.7860252 (491.56 it/sec) -training >> step=5663000, episode=944 reward=0.7720858 (383.04 it/sec) -training >> step=5663100, episode=944 reward=0.7766855 (490.90 it/sec) -training >> step=5663200, episode=944 reward=0.782517 (508.24 it/sec) -training >> step=5663300, episode=945 reward=0.7675738 (93.73 it/sec) -training >> step=5663400, episode=945 reward=0.7725977 (521.91 it/sec) -training >> step=5663500, episode=945 reward=0.7653725 (481.57 it/sec) -training >> step=5663600, episode=945 reward=0.7686474 (466.60 it/sec) -training >> step=5663700, episode=945 reward=0.7972973 (492.15 it/sec) -training >> step=5663800, episode=945 reward=0.7829857 (511.76 it/sec) -training >> step=5663900, episode=945 reward=0.7492375 (476.39 it/sec) -training >> step=5664000, episode=945 reward=0.7786563 (508.86 it/sec) -training >> step=5664100, episode=945 reward=0.781354 (485.09 it/sec) -training >> step=5664200, episode=945 reward=0.7866601 (489.35 it/sec) -training >> step=5664300, episode=945 reward=0.7849975 (497.68 it/sec) -training >> step=5664400, episode=945 reward=0.7849227 (533.06 it/sec) -training >> step=5664500, episode=945 reward=0.7898666 (454.83 it/sec) -training >> step=5664600, episode=945 reward=0.7727597 (471.86 it/sec) -training >> step=5664700, episode=945 reward=0.7756485 (506.68 it/sec) -training >> step=5664800, episode=945 reward=0.7786748 (525.24 it/sec) -training >> step=5664900, episode=945 reward=0.7979202 (484.36 it/sec) -training >> step=5665000, episode=945 reward=0.7796537 (475.69 it/sec) -training >> step=5665100, episode=945 reward=0.7811967 (463.79 it/sec) -training >> step=5665200, episode=945 reward=0.7931964 (492.16 it/sec) -training >> step=5665300, episode=945 reward=0.7637809 (485.89 it/sec) -training >> step=5665400, episode=945 reward=0.788816 (491.83 it/sec) -training >> step=5665500, episode=945 reward=0.7777526 (517.92 it/sec) -training >> step=5665600, episode=945 reward=0.7855991 (433.71 it/sec) -training >> step=5665700, episode=945 reward=0.7843959 (496.40 it/sec) -training >> step=5665800, episode=945 reward=0.787587 (504.83 it/sec) -training >> step=5665900, episode=945 reward=0.7910651 (540.86 it/sec) -training >> step=5666000, episode=945 reward=0.7737702 (323.32 it/sec) -training >> step=5666100, episode=945 reward=0.7808632 (500.88 it/sec) -training >> step=5666200, episode=945 reward=0.7814838 (515.81 it/sec) -training >> step=5666300, episode=945 reward=0.7750317 (485.31 it/sec) -training >> step=5666400, episode=945 reward=0.7768375 (478.76 it/sec) -training >> step=5666500, episode=945 reward=0.7795455 (467.76 it/sec) -training >> step=5666600, episode=945 reward=0.7888441 (492.99 it/sec) -training >> step=5666700, episode=945 reward=0.7881229 (515.34 it/sec) -training >> step=5666800, episode=945 reward=0.7809736 (497.71 it/sec) -training >> step=5666900, episode=945 reward=0.7697107 (511.85 it/sec) -training >> step=5667000, episode=945 reward=0.7911691 (485.99 it/sec) -training >> step=5667100, episode=945 reward=0.7795537 (468.35 it/sec) -training >> step=5667200, episode=945 reward=0.7931066 (470.36 it/sec) -training >> step=5667300, episode=945 reward=0.7990627 (521.84 it/sec) -training >> step=5667400, episode=945 reward=0.7902473 (491.29 it/sec) -training >> step=5667500, episode=945 reward=0.7958032 (505.66 it/sec) -training >> step=5667600, episode=945 reward=0.7780957 (488.15 it/sec) -training >> step=5667700, episode=945 reward=0.788353 (520.43 it/sec) -training >> step=5667800, episode=945 reward=0.7907035 (471.07 it/sec) -training >> step=5667900, episode=945 reward=0.7861282 (484.97 it/sec) -training >> step=5668000, episode=945 reward=0.7783682 (504.89 it/sec) -training >> step=5668100, episode=945 reward=0.7704191 (459.56 it/sec) -training >> step=5668200, episode=945 reward=0.7824431 (485.96 it/sec) -training >> step=5668300, episode=945 reward=0.7840213 (488.64 it/sec) -training >> step=5668400, episode=945 reward=0.792139 (524.85 it/sec) -training >> step=5668500, episode=945 reward=0.7973609 (479.98 it/sec) -training >> step=5668600, episode=945 reward=0.7439756 (471.50 it/sec) -training >> step=5668700, episode=945 reward=0.7880818 (531.98 it/sec) -training >> step=5668800, episode=945 reward=0.7833371 (500.36 it/sec) -training >> step=5668900, episode=945 reward=0.773825 (498.56 it/sec) -training >> step=5669000, episode=945 reward=0.7783307 (501.42 it/sec) -training >> step=5669100, episode=945 reward=0.7523871 (498.76 it/sec) -training >> step=5669200, episode=945 reward=0.7693558 (484.36 it/sec) -training >> step=5669300, episode=946 reward=0.7752589 (95.17 it/sec) -training >> step=5669400, episode=946 reward=0.7722086 (496.94 it/sec) -training >> step=5669500, episode=946 reward=0.7645755 (513.63 it/sec) -training >> step=5669600, episode=946 reward=0.8193827 (435.97 it/sec) -training >> step=5669700, episode=946 reward=0.8016434 (436.09 it/sec) -training >> step=5669800, episode=946 reward=0.8007368 (502.30 it/sec) -training >> step=5669900, episode=946 reward=0.7853178 (484.85 it/sec) -training >> step=5670000, episode=946 reward=0.7835189 (492.43 it/sec) -training >> step=5670100, episode=946 reward=0.7716054 (524.36 it/sec) -training >> step=5670200, episode=946 reward=0.7993158 (465.84 it/sec) -training >> step=5670300, episode=946 reward=0.7790736 (477.92 it/sec) -training >> step=5670400, episode=946 reward=0.7695798 (409.82 it/sec) -training >> step=5670500, episode=946 reward=0.7758128 (451.50 it/sec) -training >> step=5670600, episode=946 reward=0.788949 (449.99 it/sec) -training >> step=5670700, episode=946 reward=0.7944279 (477.67 it/sec) -training >> step=5670800, episode=946 reward=0.7635759 (499.29 it/sec) -training >> step=5670900, episode=946 reward=0.7697306 (477.97 it/sec) -training >> step=5671000, episode=946 reward=0.7844953 (443.63 it/sec) -training >> step=5671100, episode=946 reward=0.7751643 (466.43 it/sec) -training >> step=5671200, episode=946 reward=0.8048056 (486.25 it/sec) -training >> step=5671300, episode=946 reward=0.7864054 (486.10 it/sec) -training >> step=5671400, episode=946 reward=0.7944994 (463.57 it/sec) -training >> step=5671500, episode=946 reward=0.780361 (469.54 it/sec) -training >> step=5671600, episode=946 reward=0.8002976 (455.39 it/sec) -training >> step=5671700, episode=946 reward=0.7839128 (481.45 it/sec) -training >> step=5671800, episode=946 reward=0.7975256 (478.18 it/sec) -training >> step=5671900, episode=946 reward=0.7966585 (481.41 it/sec) -training >> step=5672000, episode=946 reward=0.8006164 (477.82 it/sec) -training >> step=5672100, episode=946 reward=0.7978674 (342.62 it/sec) -training >> step=5672200, episode=946 reward=0.7600918 (554.88 it/sec) -training >> step=5672300, episode=946 reward=0.7801036 (495.83 it/sec) -training >> step=5672400, episode=946 reward=0.7965684 (495.52 it/sec) -training >> step=5672500, episode=946 reward=0.7749806 (473.57 it/sec) -training >> step=5672600, episode=946 reward=0.776455 (513.19 it/sec) -training >> step=5672700, episode=946 reward=0.7870988 (436.00 it/sec) -training >> step=5672800, episode=946 reward=0.7668362 (447.21 it/sec) -training >> step=5672900, episode=946 reward=0.8021685 (502.65 it/sec) -training >> step=5673000, episode=946 reward=0.7883586 (467.41 it/sec) -training >> step=5673100, episode=946 reward=0.7907578 (452.95 it/sec) -training >> step=5673200, episode=946 reward=0.77797 (490.55 it/sec) -training >> step=5673300, episode=946 reward=0.7943512 (441.12 it/sec) -training >> step=5673400, episode=946 reward=0.7914547 (493.38 it/sec) -training >> step=5673500, episode=946 reward=0.7788359 (509.28 it/sec) -training >> step=5673600, episode=946 reward=0.7842139 (469.25 it/sec) -training >> step=5673700, episode=946 reward=0.7801138 (475.81 it/sec) -training >> step=5673800, episode=946 reward=0.7669242 (482.53 it/sec) -training >> step=5673900, episode=946 reward=0.7865999 (438.66 it/sec) -training >> step=5674000, episode=946 reward=0.7544455 (412.61 it/sec) -training >> step=5674100, episode=946 reward=0.7773508 (462.37 it/sec) -training >> step=5674200, episode=946 reward=0.784325 (472.15 it/sec) -training >> step=5674300, episode=946 reward=0.8024237 (450.70 it/sec) -training >> step=5674400, episode=946 reward=0.776413 (479.05 it/sec) -training >> step=5674500, episode=946 reward=0.7769825 (472.11 it/sec) -training >> step=5674600, episode=946 reward=0.7903259 (530.24 it/sec) -training >> step=5674700, episode=946 reward=0.7853577 (508.32 it/sec) -training >> step=5674800, episode=946 reward=0.7708719 (488.26 it/sec) -training >> step=5674900, episode=946 reward=0.776369 (522.90 it/sec) -training >> step=5675000, episode=946 reward=0.7651329 (489.95 it/sec) -training >> step=5675100, episode=946 reward=0.7706112 (521.59 it/sec) -training >> step=5675200, episode=946 reward=0.7622776 (499.77 it/sec) -training >> step=5675300, episode=947 reward=0.7698168 (94.37 it/sec) -training >> step=5675400, episode=947 reward=0.774213 (480.36 it/sec) -training >> step=5675500, episode=947 reward=0.7788894 (453.68 it/sec) -training >> step=5675600, episode=947 reward=0.79666 (472.65 it/sec) -training >> step=5675700, episode=947 reward=0.778714 (435.38 it/sec) -training >> step=5675800, episode=947 reward=0.7813186 (398.86 it/sec) -training >> step=5675900, episode=947 reward=0.7797176 (506.19 it/sec) -training >> step=5676000, episode=947 reward=0.7962108 (522.39 it/sec) -training >> step=5676100, episode=947 reward=0.7928914 (479.64 it/sec) -training >> step=5676200, episode=947 reward=0.7833323 (387.11 it/sec) -training >> step=5676300, episode=947 reward=0.8073062 (413.88 it/sec) -training >> step=5676400, episode=947 reward=0.7977515 (464.69 it/sec) -training >> step=5676500, episode=947 reward=0.7890404 (412.36 it/sec) -training >> step=5676600, episode=947 reward=0.7972161 (481.90 it/sec) -training >> step=5676700, episode=947 reward=0.7975894 (488.02 it/sec) -training >> step=5676800, episode=947 reward=0.7697902 (442.17 it/sec) -training >> step=5676900, episode=947 reward=0.7792884 (441.42 it/sec) -training >> step=5677000, episode=947 reward=0.7869127 (463.86 it/sec) -training >> step=5677100, episode=947 reward=0.7881638 (445.03 it/sec) -training >> step=5677200, episode=947 reward=0.771971 (465.34 it/sec) -training >> step=5677300, episode=947 reward=0.7728739 (485.13 it/sec) -training >> step=5677400, episode=947 reward=0.7771626 (499.44 it/sec) -training >> step=5677500, episode=947 reward=0.7777781 (440.95 it/sec) -training >> step=5677600, episode=947 reward=0.7886612 (465.19 it/sec) -training >> step=5677700, episode=947 reward=0.7809846 (495.70 it/sec) -training >> step=5677800, episode=947 reward=0.7799325 (429.21 it/sec) -training >> step=5677900, episode=947 reward=0.771521 (465.90 it/sec) -training >> step=5678000, episode=947 reward=0.8102731 (464.01 it/sec) -training >> step=5678100, episode=947 reward=0.7971929 (462.73 it/sec) -training >> step=5678200, episode=947 reward=0.7757519 (483.65 it/sec) -training >> step=5678300, episode=947 reward=0.782532 (447.30 it/sec) -training >> step=5678400, episode=947 reward=0.7934027 (356.47 it/sec) -training >> step=5678500, episode=947 reward=0.7705534 (429.96 it/sec) -training >> step=5678600, episode=947 reward=0.8090144 (441.50 it/sec) -training >> step=5678700, episode=947 reward=0.8088743 (429.50 it/sec) -training >> step=5678800, episode=947 reward=0.773945 (474.33 it/sec) -training >> step=5678900, episode=947 reward=0.7795748 (438.41 it/sec) -training >> step=5679000, episode=947 reward=0.7668684 (479.42 it/sec) -training >> step=5679100, episode=947 reward=0.7747058 (475.83 it/sec) -training >> step=5679200, episode=947 reward=0.7920769 (472.61 it/sec) -training >> step=5679300, episode=947 reward=0.7781138 (460.43 it/sec) -training >> step=5679400, episode=947 reward=0.7935732 (470.22 it/sec) -training >> step=5679500, episode=947 reward=0.7943239 (445.88 it/sec) -training >> step=5679600, episode=947 reward=0.7928145 (472.54 it/sec) -training >> step=5679700, episode=947 reward=0.7749691 (446.65 it/sec) -training >> step=5679800, episode=947 reward=0.7734653 (483.93 it/sec) -training >> step=5679900, episode=947 reward=0.7848468 (473.27 it/sec) -training >> step=5680000, episode=947 reward=0.7963887 (407.97 it/sec) -training >> step=5680100, episode=947 reward=0.776534 (450.24 it/sec) -training >> step=5680200, episode=947 reward=0.7709688 (506.11 it/sec) -training >> step=5680300, episode=947 reward=0.779792 (423.34 it/sec) -training >> step=5680400, episode=947 reward=0.7724293 (407.05 it/sec) -training >> step=5680500, episode=947 reward=0.7632749 (469.83 it/sec) -training >> step=5680600, episode=947 reward=0.7839958 (425.17 it/sec) -training >> step=5680700, episode=947 reward=0.7828394 (454.20 it/sec) -training >> step=5680800, episode=947 reward=0.7623737 (462.72 it/sec) -training >> step=5680900, episode=947 reward=0.7796252 (483.90 it/sec) -training >> step=5681000, episode=947 reward=0.7664083 (481.95 it/sec) -training >> step=5681100, episode=947 reward=0.7804936 (419.81 it/sec) -training >> step=5681200, episode=947 reward=0.7747623 (468.48 it/sec) -training >> step=5681300, episode=948 reward=0.7860228 (112.67 it/sec) -training >> step=5681400, episode=948 reward=0.7751451 (469.58 it/sec) -training >> step=5681500, episode=948 reward=0.7706359 (478.21 it/sec) -training >> step=5681600, episode=948 reward=0.755724 (450.05 it/sec) -training >> step=5681700, episode=948 reward=0.795922 (457.84 it/sec) -training >> step=5681800, episode=948 reward=0.7969907 (485.52 it/sec) -training >> step=5681900, episode=948 reward=0.7852413 (434.73 it/sec) -training >> step=5682000, episode=948 reward=0.7922314 (481.30 it/sec) -training >> step=5682100, episode=948 reward=0.787962 (469.96 it/sec) -training >> step=5682200, episode=948 reward=0.7840007 (441.37 it/sec) -training >> step=5682300, episode=948 reward=0.7806289 (497.27 it/sec) -training >> step=5682400, episode=948 reward=0.7777338 (460.17 it/sec) -training >> step=5682500, episode=948 reward=0.7811865 (460.14 it/sec) -training >> step=5682600, episode=948 reward=0.7727503 (435.94 it/sec) -training >> step=5682700, episode=948 reward=0.7998053 (441.20 it/sec) -training >> step=5682800, episode=948 reward=0.7868295 (516.90 it/sec) -training >> step=5682900, episode=948 reward=0.7727847 (438.27 it/sec) -training >> step=5683000, episode=948 reward=0.781267 (463.29 it/sec) -training >> step=5683100, episode=948 reward=0.7956084 (471.96 it/sec) -training >> step=5683200, episode=948 reward=0.8194416 (421.93 it/sec) -training >> step=5683300, episode=948 reward=0.7763209 (432.02 it/sec) -training >> step=5683400, episode=948 reward=0.7890955 (449.20 it/sec) -training >> step=5683500, episode=948 reward=0.777648 (445.90 it/sec) -training >> step=5683600, episode=948 reward=0.795611 (474.84 it/sec) -training >> step=5683700, episode=948 reward=0.7964788 (465.61 it/sec) -training >> step=5683800, episode=948 reward=0.779014 (475.73 it/sec) -training >> step=5683900, episode=948 reward=0.789495 (495.60 it/sec) -training >> step=5684000, episode=948 reward=0.7895377 (406.17 it/sec) -training >> step=5684100, episode=948 reward=0.7721355 (442.52 it/sec) -training >> step=5684200, episode=948 reward=0.7760872 (500.46 it/sec) -training >> step=5684300, episode=948 reward=0.7748419 (471.36 it/sec) -training >> step=5684400, episode=948 reward=0.778771 (438.70 it/sec) -training >> step=5684500, episode=948 reward=0.7830067 (504.72 it/sec) -training >> step=5684600, episode=948 reward=0.8053693 (364.78 it/sec) -training >> step=5684700, episode=948 reward=0.7865661 (472.19 it/sec) -training >> step=5684800, episode=948 reward=0.7953827 (471.17 it/sec) -training >> step=5684900, episode=948 reward=0.7840058 (441.25 it/sec) -training >> step=5685000, episode=948 reward=0.7655954 (464.19 it/sec) -training >> step=5685100, episode=948 reward=0.7668101 (475.25 it/sec) -training >> step=5685200, episode=948 reward=0.7855361 (475.29 it/sec) -training >> step=5685300, episode=948 reward=0.7835059 (496.39 it/sec) -training >> step=5685400, episode=948 reward=0.8014507 (455.74 it/sec) -training >> step=5685500, episode=948 reward=0.7870355 (477.21 it/sec) -training >> step=5685600, episode=948 reward=0.7864343 (457.07 it/sec) -training >> step=5685700, episode=948 reward=0.7830915 (498.38 it/sec) -training >> step=5685800, episode=948 reward=0.7885469 (442.51 it/sec) -training >> step=5685900, episode=948 reward=0.7791529 (466.09 it/sec) -training >> step=5686000, episode=948 reward=0.7588159 (425.47 it/sec) -training >> step=5686100, episode=948 reward=0.7809325 (478.14 it/sec) -training >> step=5686200, episode=948 reward=0.7601674 (489.74 it/sec) -training >> step=5686300, episode=948 reward=0.7578718 (475.37 it/sec) -training >> step=5686400, episode=948 reward=0.7703568 (543.80 it/sec) -training >> step=5686500, episode=948 reward=0.7822437 (479.48 it/sec) -training >> step=5686600, episode=948 reward=0.7526525 (509.19 it/sec) -training >> step=5686700, episode=948 reward=0.7611977 (503.49 it/sec) -training >> step=5686800, episode=948 reward=0.7703919 (527.10 it/sec) -training >> step=5686900, episode=948 reward=0.7810404 (512.74 it/sec) -training >> step=5687000, episode=948 reward=0.8017852 (490.60 it/sec) -training >> step=5687100, episode=948 reward=0.7402021 (546.96 it/sec) -training >> step=5687200, episode=948 reward=0.7748564 (503.89 it/sec) -training >> step=5687300, episode=949 reward=0.7837525 (64.46 it/sec) -training >> step=5687400, episode=949 reward=0.7734125 (509.45 it/sec) -training >> step=5687500, episode=949 reward=0.7883333 (487.60 it/sec) -training >> step=5687600, episode=949 reward=0.7977308 (479.33 it/sec) -training >> step=5687700, episode=949 reward=0.7876954 (503.62 it/sec) -training >> step=5687800, episode=949 reward=0.7774075 (505.46 it/sec) -training >> step=5687900, episode=949 reward=0.8023584 (499.80 it/sec) -training >> step=5688000, episode=949 reward=0.7863958 (544.97 it/sec) -training >> step=5688100, episode=949 reward=0.7858337 (502.16 it/sec) -training >> step=5688200, episode=949 reward=0.7694775 (464.88 it/sec) -training >> step=5688300, episode=949 reward=0.7792901 (530.24 it/sec) -training >> step=5688400, episode=949 reward=0.7934386 (489.42 it/sec) -training >> step=5688500, episode=949 reward=0.7813355 (504.59 it/sec) -training >> step=5688600, episode=949 reward=0.7828232 (505.17 it/sec) -training >> step=5688700, episode=949 reward=0.7592606 (508.81 it/sec) -training >> step=5688800, episode=949 reward=0.7757644 (474.09 it/sec) -training >> step=5688900, episode=949 reward=0.7738339 (498.70 it/sec) -training >> step=5689000, episode=949 reward=0.77285 (516.72 it/sec) -training >> step=5689100, episode=949 reward=0.7709174 (528.46 it/sec) -training >> step=5689200, episode=949 reward=0.7854925 (505.91 it/sec) -training >> step=5689300, episode=949 reward=0.7844108 (482.66 it/sec) -training >> step=5689400, episode=949 reward=0.7783512 (494.41 it/sec) -training >> step=5689500, episode=949 reward=0.7938887 (499.54 it/sec) -training >> step=5689600, episode=949 reward=0.7823442 (510.70 it/sec) -training >> step=5689700, episode=949 reward=0.793034 (526.62 it/sec) -training >> step=5689800, episode=949 reward=0.7914525 (503.86 it/sec) -training >> step=5689900, episode=949 reward=0.8006858 (527.10 it/sec) -training >> step=5690000, episode=949 reward=0.7779832 (497.21 it/sec) -training >> step=5690100, episode=949 reward=0.7810922 (497.88 it/sec) -training >> step=5690200, episode=949 reward=0.7812204 (486.42 it/sec) -training >> step=5690300, episode=949 reward=0.8169665 (453.83 it/sec) -training >> step=5690400, episode=949 reward=0.7875438 (398.25 it/sec) -training >> step=5690500, episode=949 reward=0.78398 (468.33 it/sec) -training >> step=5690600, episode=949 reward=0.7865295 (496.36 it/sec) -training >> step=5690700, episode=949 reward=0.7929201 (482.73 it/sec) -training >> step=5690800, episode=949 reward=0.787233 (487.74 it/sec) -training >> step=5690900, episode=949 reward=0.7740158 (473.54 it/sec) -training >> step=5691000, episode=949 reward=0.7647917 (479.83 it/sec) -training >> step=5691100, episode=949 reward=0.7723704 (485.67 it/sec) -training >> step=5691200, episode=949 reward=0.7962419 (506.26 it/sec) -training >> step=5691300, episode=949 reward=0.7743168 (527.08 it/sec) -training >> step=5691400, episode=949 reward=0.7897808 (528.12 it/sec) -training >> step=5691500, episode=949 reward=0.7735727 (475.48 it/sec) -training >> step=5691600, episode=949 reward=0.7785412 (492.21 it/sec) -training >> step=5691700, episode=949 reward=0.7747793 (520.62 it/sec) -training >> step=5691800, episode=949 reward=0.7912959 (465.52 it/sec) -training >> step=5691900, episode=949 reward=0.7737363 (447.75 it/sec) -training >> step=5692000, episode=949 reward=0.7562331 (483.11 it/sec) -training >> step=5692100, episode=949 reward=0.7657502 (509.09 it/sec) -training >> step=5692200, episode=949 reward=0.7709209 (483.39 it/sec) -training >> step=5692300, episode=949 reward=0.7595126 (465.59 it/sec) -training >> step=5692400, episode=949 reward=0.7583355 (439.97 it/sec) -training >> step=5692500, episode=949 reward=0.7524902 (467.06 it/sec) -training >> step=5692600, episode=949 reward=0.7526231 (502.14 it/sec) -training >> step=5692700, episode=949 reward=0.775538 (513.03 it/sec) -training >> step=5692800, episode=949 reward=0.7521278 (500.15 it/sec) -training >> step=5692900, episode=949 reward=0.7598339 (482.83 it/sec) -training >> step=5693000, episode=949 reward=0.765577 (520.68 it/sec) -training >> step=5693100, episode=949 reward=0.7688771 (514.72 it/sec) -training >> step=5693200, episode=949 reward=0.7861155 (494.72 it/sec) -training >> step=5693300, episode=950 reward=0.789966 (129.88 it/sec) -training >> step=5693400, episode=950 reward=0.8008481 (481.10 it/sec) -training >> step=5693500, episode=950 reward=0.772563 (472.43 it/sec) -training >> step=5693600, episode=950 reward=0.7849312 (507.40 it/sec) -training >> step=5693700, episode=950 reward=0.7966496 (495.57 it/sec) -training >> step=5693800, episode=950 reward=0.792249 (525.14 it/sec) -training >> step=5693900, episode=950 reward=0.8060841 (496.40 it/sec) -training >> step=5694000, episode=950 reward=0.7879679 (479.87 it/sec) -training >> step=5694100, episode=950 reward=0.778253 (527.81 it/sec) -training >> step=5694200, episode=950 reward=0.7558227 (474.39 it/sec) -training >> step=5694300, episode=950 reward=0.7939798 (524.14 it/sec) -training >> step=5694400, episode=950 reward=0.7764879 (495.07 it/sec) -training >> step=5694500, episode=950 reward=0.7639099 (513.07 it/sec) -training >> step=5694600, episode=950 reward=0.784943 (527.17 it/sec) -training >> step=5694700, episode=950 reward=0.7973674 (496.70 it/sec) -training >> step=5694800, episode=950 reward=0.7879062 (511.05 it/sec) -training >> step=5694900, episode=950 reward=0.790877 (480.09 it/sec) -training >> step=5695000, episode=950 reward=0.7797158 (497.01 it/sec) -training >> step=5695100, episode=950 reward=0.7719409 (481.57 it/sec) -training >> step=5695200, episode=950 reward=0.7882676 (535.44 it/sec) -training >> step=5695300, episode=950 reward=0.7960152 (502.60 it/sec) -training >> step=5695400, episode=950 reward=0.7906924 (493.05 it/sec) -training >> step=5695500, episode=950 reward=0.7927595 (510.50 it/sec) -training >> step=5695600, episode=950 reward=0.7780556 (529.24 it/sec) -training >> step=5695700, episode=950 reward=0.7868918 (500.76 it/sec) -training >> step=5695800, episode=950 reward=0.7862396 (466.85 it/sec) -training >> step=5695900, episode=950 reward=0.7778051 (509.24 it/sec) -training >> step=5696000, episode=950 reward=0.797794 (526.02 it/sec) -training >> step=5696100, episode=950 reward=0.7769989 (501.89 it/sec) -training >> step=5696200, episode=950 reward=0.8123555 (530.69 it/sec) -training >> step=5696300, episode=950 reward=0.792115 (440.71 it/sec) -training >> step=5696400, episode=950 reward=0.7723522 (432.67 it/sec) -training >> step=5696500, episode=950 reward=0.7888778 (426.25 it/sec) -training >> step=5696600, episode=950 reward=0.7665214 (312.50 it/sec) -training >> step=5696700, episode=950 reward=0.7994881 (451.43 it/sec) -training >> step=5696800, episode=950 reward=0.7860326 (401.51 it/sec) -training >> step=5696900, episode=950 reward=0.7763515 (451.48 it/sec) -training >> step=5697000, episode=950 reward=0.7579261 (481.76 it/sec) -training >> step=5697100, episode=950 reward=0.7731073 (445.46 it/sec) -training >> step=5697200, episode=950 reward=0.7843233 (524.95 it/sec) -training >> step=5697300, episode=950 reward=0.7618706 (534.01 it/sec) -training >> step=5697400, episode=950 reward=0.7912117 (521.03 it/sec) -training >> step=5697500, episode=950 reward=0.7980034 (541.86 it/sec) -training >> step=5697600, episode=950 reward=0.779817 (515.54 it/sec) -training >> step=5697700, episode=950 reward=0.764733 (506.94 it/sec) -training >> step=5697800, episode=950 reward=0.7842273 (494.96 it/sec) -training >> step=5697900, episode=950 reward=0.7539065 (517.61 it/sec) -training >> step=5698000, episode=950 reward=0.7825212 (514.76 it/sec) -training >> step=5698100, episode=950 reward=0.7945438 (468.81 it/sec) -training >> step=5698200, episode=950 reward=0.7711351 (501.69 it/sec) -training >> step=5698300, episode=950 reward=0.741482 (514.85 it/sec) -training >> step=5698400, episode=950 reward=0.7705501 (512.16 it/sec) -training >> step=5698500, episode=950 reward=0.776889 (510.31 it/sec) -training >> step=5698600, episode=950 reward=0.7522162 (556.44 it/sec) -training >> step=5698700, episode=950 reward=0.7371123 (503.77 it/sec) -training >> step=5698800, episode=950 reward=0.7693728 (501.38 it/sec) -training >> step=5698900, episode=950 reward=0.7801789 (452.07 it/sec) -training >> step=5699000, episode=950 reward=0.7750946 (525.38 it/sec) -training >> step=5699100, episode=950 reward=0.7637917 (491.45 it/sec) -training >> step=5699200, episode=950 reward=0.7810208 (493.35 it/sec) -training >> step=5699300, episode=951 reward=0.7760645 (63.90 it/sec) -training >> step=5699400, episode=951 reward=0.7853974 (506.15 it/sec) -training >> step=5699500, episode=951 reward=0.7937436 (477.73 it/sec) -training >> step=5699600, episode=951 reward=0.772094 (530.27 it/sec) -training >> step=5699700, episode=951 reward=0.7811847 (471.92 it/sec) -training >> step=5699800, episode=951 reward=0.7820057 (505.14 it/sec) -training >> step=5699900, episode=951 reward=0.7812307 (495.33 it/sec) -training >> step=5700000, episode=951 reward=0.7692322 (508.30 it/sec) -training >> step=5700100, episode=951 reward=0.7816434 (479.92 it/sec) -training >> step=5700200, episode=951 reward=0.7988876 (508.19 it/sec) -training >> step=5700300, episode=951 reward=0.7889575 (460.64 it/sec) -training >> step=5700400, episode=951 reward=0.7577607 (521.06 it/sec) -training >> step=5700500, episode=951 reward=0.8002607 (521.44 it/sec) -training >> step=5700600, episode=951 reward=0.7621586 (394.85 it/sec) -training >> step=5700700, episode=951 reward=0.7660575 (472.66 it/sec) -training >> step=5700800, episode=951 reward=0.7749963 (482.92 it/sec) -training >> step=5700900, episode=951 reward=0.7921794 (495.18 it/sec) -training >> step=5701000, episode=951 reward=0.8138384 (514.73 it/sec) -training >> step=5701100, episode=951 reward=0.7841583 (502.04 it/sec) -training >> step=5701200, episode=951 reward=0.7733559 (515.07 it/sec) -training >> step=5701300, episode=951 reward=0.7908563 (515.58 it/sec) -training >> step=5701400, episode=951 reward=0.8081255 (496.98 it/sec) -training >> step=5701500, episode=951 reward=0.7727233 (517.44 it/sec) -training >> step=5701600, episode=951 reward=0.7945213 (484.30 it/sec) -training >> step=5701700, episode=951 reward=0.7599846 (497.40 it/sec) -training >> step=5701800, episode=951 reward=0.8067199 (477.36 it/sec) -training >> step=5701900, episode=951 reward=0.7874376 (549.15 it/sec) -training >> step=5702000, episode=951 reward=0.7891814 (514.64 it/sec) -training >> step=5702100, episode=951 reward=0.7765241 (438.48 it/sec) -training >> step=5702200, episode=951 reward=0.7683286 (512.38 it/sec) -training >> step=5702300, episode=951 reward=0.7631956 (526.07 it/sec) -training >> step=5702400, episode=951 reward=0.7873458 (497.12 it/sec) -training >> step=5702500, episode=951 reward=0.7921216 (465.53 it/sec) -training >> step=5702600, episode=951 reward=0.8050218 (500.94 it/sec) -training >> step=5702700, episode=951 reward=0.7893725 (542.07 it/sec) -training >> step=5702800, episode=951 reward=0.7857617 (462.75 it/sec) -training >> step=5702900, episode=951 reward=0.7858743 (355.59 it/sec) -training >> step=5703000, episode=951 reward=0.7923488 (479.88 it/sec) -training >> step=5703100, episode=951 reward=0.7733701 (498.20 it/sec) -training >> step=5703200, episode=951 reward=0.7918463 (489.87 it/sec) -training >> step=5703300, episode=951 reward=0.767362 (478.39 it/sec) -training >> step=5703400, episode=951 reward=0.8036616 (497.93 it/sec) -training >> step=5703500, episode=951 reward=0.7758607 (503.71 it/sec) -training >> step=5703600, episode=951 reward=0.7875752 (514.50 it/sec) -training >> step=5703700, episode=951 reward=0.769932 (477.79 it/sec) -training >> step=5703800, episode=951 reward=0.773542 (522.49 it/sec) -training >> step=5703900, episode=951 reward=0.7790331 (512.45 it/sec) -training >> step=5704000, episode=951 reward=0.7842168 (497.69 it/sec) -training >> step=5704100, episode=951 reward=0.7844305 (523.91 it/sec) -training >> step=5704200, episode=951 reward=0.7836415 (505.55 it/sec) -training >> step=5704300, episode=951 reward=0.8014863 (498.95 it/sec) -training >> step=5704400, episode=951 reward=0.7792674 (521.88 it/sec) -training >> step=5704500, episode=951 reward=0.7771703 (498.50 it/sec) -training >> step=5704600, episode=951 reward=0.7780656 (478.55 it/sec) -training >> step=5704700, episode=951 reward=0.7763087 (425.34 it/sec) -training >> step=5704800, episode=951 reward=0.7501599 (493.06 it/sec) -training >> step=5704900, episode=951 reward=0.7636554 (558.28 it/sec) -training >> step=5705000, episode=951 reward=0.7682392 (511.46 it/sec) -training >> step=5705100, episode=951 reward=0.7825066 (463.41 it/sec) -training >> step=5705200, episode=951 reward=0.7742202 (535.28 it/sec) -training >> step=5705300, episode=952 reward=0.7982031 (46.13 it/sec) -training >> step=5705400, episode=952 reward=0.7943333 (507.67 it/sec) -training >> step=5705500, episode=952 reward=0.766628 (494.10 it/sec) -training >> step=5705600, episode=952 reward=0.7944459 (507.67 it/sec) -training >> step=5705700, episode=952 reward=0.7869828 (449.12 it/sec) -training >> step=5705800, episode=952 reward=0.7852466 (542.75 it/sec) -training >> step=5705900, episode=952 reward=0.7680897 (494.96 it/sec) -training >> step=5706000, episode=952 reward=0.7799345 (535.21 it/sec) -training >> step=5706100, episode=952 reward=0.81315 (541.85 it/sec) -training >> step=5706200, episode=952 reward=0.7859424 (512.23 it/sec) -training >> step=5706300, episode=952 reward=0.7954707 (495.04 it/sec) -training >> step=5706400, episode=952 reward=0.7861457 (523.33 it/sec) -training >> step=5706500, episode=952 reward=0.7817968 (473.51 it/sec) -training >> step=5706600, episode=952 reward=0.7749404 (516.12 it/sec) -training >> step=5706700, episode=952 reward=0.7764053 (527.24 it/sec) -training >> step=5706800, episode=952 reward=0.7773141 (537.40 it/sec) -training >> step=5706900, episode=952 reward=0.798699 (504.91 it/sec) -training >> step=5707000, episode=952 reward=0.7827915 (469.50 it/sec) -training >> step=5707100, episode=952 reward=0.7711427 (514.00 it/sec) -training >> step=5707200, episode=952 reward=0.769075 (421.89 it/sec) -training >> step=5707300, episode=952 reward=0.788762 (486.93 it/sec) -training >> step=5707400, episode=952 reward=0.7749564 (503.71 it/sec) -training >> step=5707500, episode=952 reward=0.7903928 (471.33 it/sec) -training >> step=5707600, episode=952 reward=0.7993932 (494.45 it/sec) -training >> step=5707700, episode=952 reward=0.767731 (485.39 it/sec) -training >> step=5707800, episode=952 reward=0.7957523 (510.38 it/sec) -training >> step=5707900, episode=952 reward=0.7640849 (544.84 it/sec) -training >> step=5708000, episode=952 reward=0.7948197 (512.50 it/sec) -training >> step=5708100, episode=952 reward=0.7867789 (516.17 it/sec) -training >> step=5708200, episode=952 reward=0.7894203 (484.62 it/sec) -training >> step=5708300, episode=952 reward=0.7905819 (499.16 it/sec) -training >> step=5708400, episode=952 reward=0.8184431 (501.21 it/sec) -training >> step=5708500, episode=952 reward=0.764339 (493.09 it/sec) -training >> step=5708600, episode=952 reward=0.7919926 (545.62 it/sec) -training >> step=5708700, episode=952 reward=0.7772979 (490.58 it/sec) -training >> step=5708800, episode=952 reward=0.7631276 (494.18 it/sec) -training >> step=5708900, episode=952 reward=0.7718333 (499.16 it/sec) -training >> step=5709000, episode=952 reward=0.7811145 (384.41 it/sec) -training >> step=5709100, episode=952 reward=0.7803215 (538.45 it/sec) -training >> step=5709200, episode=952 reward=0.7722312 (454.50 it/sec) -training >> step=5709300, episode=952 reward=0.778001 (493.25 it/sec) -training >> step=5709400, episode=952 reward=0.8078832 (516.76 it/sec) -training >> step=5709500, episode=952 reward=0.7690943 (494.37 it/sec) -training >> step=5709600, episode=952 reward=0.7697843 (535.80 it/sec) -training >> step=5709700, episode=952 reward=0.7713669 (576.73 it/sec) -training >> step=5709800, episode=952 reward=0.7604402 (465.36 it/sec) -training >> step=5709900, episode=952 reward=0.7748969 (527.32 it/sec) -training >> step=5710000, episode=952 reward=0.7674551 (493.75 it/sec) -training >> step=5710100, episode=952 reward=0.7731028 (557.34 it/sec) -training >> step=5710200, episode=952 reward=0.7809021 (496.52 it/sec) -training >> step=5710300, episode=952 reward=0.7897698 (510.63 it/sec) -training >> step=5710400, episode=952 reward=0.7583582 (528.77 it/sec) -training >> step=5710500, episode=952 reward=0.7784153 (507.94 it/sec) -training >> step=5710600, episode=952 reward=0.7631882 (485.74 it/sec) -training >> step=5710700, episode=952 reward=0.7694667 (515.44 it/sec) -training >> step=5710800, episode=952 reward=0.7392287 (540.40 it/sec) -training >> step=5710900, episode=952 reward=0.7956411 (470.21 it/sec) -training >> step=5711000, episode=952 reward=0.7889291 (524.51 it/sec) -training >> step=5711100, episode=952 reward=0.7591223 (461.23 it/sec) -training >> step=5711200, episode=952 reward=0.774745 (543.39 it/sec) -training >> step=5711300, episode=953 reward=0.7986568 (69.68 it/sec) -training >> step=5711400, episode=953 reward=0.7834353 (513.99 it/sec) -training >> step=5711500, episode=953 reward=0.7626362 (478.63 it/sec) -training >> step=5711600, episode=953 reward=0.7716017 (438.29 it/sec) -training >> step=5711700, episode=953 reward=0.7772415 (538.68 it/sec) -training >> step=5711800, episode=953 reward=0.790779 (533.36 it/sec) -training >> step=5711900, episode=953 reward=0.8096133 (534.06 it/sec) -training >> step=5712000, episode=953 reward=0.7745166 (516.12 it/sec) -training >> step=5712100, episode=953 reward=0.79598 (516.26 it/sec) -training >> step=5712200, episode=953 reward=0.7809548 (506.36 it/sec) -training >> step=5712300, episode=953 reward=0.7732009 (520.86 it/sec) -training >> step=5712400, episode=953 reward=0.7841979 (475.94 it/sec) -training >> step=5712500, episode=953 reward=0.7705137 (472.71 it/sec) -training >> step=5712600, episode=953 reward=0.7848094 (509.39 it/sec) -training >> step=5712700, episode=953 reward=0.7877288 (513.98 it/sec) -training >> step=5712800, episode=953 reward=0.7863685 (473.24 it/sec) -training >> step=5712900, episode=953 reward=0.7723799 (415.65 it/sec) -training >> step=5713000, episode=953 reward=0.8054569 (383.36 it/sec) -training >> step=5713100, episode=953 reward=0.7926741 (471.60 it/sec) -training >> step=5713200, episode=953 reward=0.7747354 (499.91 it/sec) -training >> step=5713300, episode=953 reward=0.7793643 (451.14 it/sec) -training >> step=5713400, episode=953 reward=0.7816962 (475.38 it/sec) -training >> step=5713500, episode=953 reward=0.7759026 (478.20 it/sec) -training >> step=5713600, episode=953 reward=0.7683796 (514.86 it/sec) -training >> step=5713700, episode=953 reward=0.7724217 (483.44 it/sec) -training >> step=5713800, episode=953 reward=0.7766903 (464.94 it/sec) -training >> step=5713900, episode=953 reward=0.7851214 (491.87 it/sec) -training >> step=5714000, episode=953 reward=0.7887639 (516.71 it/sec) -training >> step=5714100, episode=953 reward=0.7763923 (527.64 it/sec) -training >> step=5714200, episode=953 reward=0.8003731 (411.67 it/sec) -training >> step=5714300, episode=953 reward=0.7898094 (483.03 it/sec) -training >> step=5714400, episode=953 reward=0.7591439 (522.98 it/sec) -training >> step=5714500, episode=953 reward=0.7868349 (473.80 it/sec) -training >> step=5714600, episode=953 reward=0.7977979 (509.22 it/sec) -training >> step=5714700, episode=953 reward=0.7703214 (504.15 it/sec) -training >> step=5714800, episode=953 reward=0.782456 (446.13 it/sec) -training >> step=5714900, episode=953 reward=0.7791988 (405.17 it/sec) -training >> step=5715000, episode=953 reward=0.7946982 (476.14 it/sec) -training >> step=5715100, episode=953 reward=0.7967898 (352.97 it/sec) -training >> step=5715200, episode=953 reward=0.7723356 (477.72 it/sec) -training >> step=5715300, episode=953 reward=0.8020689 (467.97 it/sec) -training >> step=5715400, episode=953 reward=0.7893506 (494.13 it/sec) -training >> step=5715500, episode=953 reward=0.7958397 (496.46 it/sec) -training >> step=5715600, episode=953 reward=0.7744451 (442.07 it/sec) -training >> step=5715700, episode=953 reward=0.7939023 (447.90 it/sec) -training >> step=5715800, episode=953 reward=0.7827743 (522.57 it/sec) -training >> step=5715900, episode=953 reward=0.7819288 (508.74 it/sec) -training >> step=5716000, episode=953 reward=0.768882 (490.34 it/sec) -training >> step=5716100, episode=953 reward=0.7878377 (484.00 it/sec) -training >> step=5716200, episode=953 reward=0.7725231 (502.16 it/sec) -training >> step=5716300, episode=953 reward=0.7624403 (491.11 it/sec) -training >> step=5716400, episode=953 reward=0.7967868 (461.23 it/sec) -training >> step=5716500, episode=953 reward=0.7828435 (441.57 it/sec) -training >> step=5716600, episode=953 reward=0.8079214 (480.71 it/sec) -training >> step=5716700, episode=953 reward=0.7354252 (447.12 it/sec) -training >> step=5716800, episode=953 reward=0.7920306 (521.96 it/sec) -training >> step=5716900, episode=953 reward=0.7940356 (500.16 it/sec) -training >> step=5717000, episode=953 reward=0.7660059 (416.54 it/sec) -training >> step=5717100, episode=953 reward=0.772189 (444.39 it/sec) -training >> step=5717200, episode=953 reward=0.7625546 (495.39 it/sec) -training >> step=5717300, episode=954 reward=0.7781686 (120.64 it/sec) -training >> step=5717400, episode=954 reward=0.7688299 (498.69 it/sec) -training >> step=5717500, episode=954 reward=0.7742558 (486.28 it/sec) -training >> step=5717600, episode=954 reward=0.7907819 (510.91 it/sec) -training >> step=5717700, episode=954 reward=0.766225 (490.14 it/sec) -training >> step=5717800, episode=954 reward=0.7675749 (523.89 it/sec) -training >> step=5717900, episode=954 reward=0.7781546 (512.15 it/sec) -training >> step=5718000, episode=954 reward=0.7804933 (529.47 it/sec) -training >> step=5718100, episode=954 reward=0.7822369 (543.19 it/sec) -training >> step=5718200, episode=954 reward=0.7746865 (505.75 it/sec) -training >> step=5718300, episode=954 reward=0.7965939 (511.24 it/sec) -training >> step=5718400, episode=954 reward=0.7587146 (472.59 it/sec) -training >> step=5718500, episode=954 reward=0.811276 (501.82 it/sec) -training >> step=5718600, episode=954 reward=0.7689134 (508.69 it/sec) -training >> step=5718700, episode=954 reward=0.7724018 (435.58 it/sec) -training >> step=5718800, episode=954 reward=0.7980986 (484.26 it/sec) -training >> step=5718900, episode=954 reward=0.7832221 (480.77 it/sec) -training >> step=5719000, episode=954 reward=0.7892488 (474.23 it/sec) -training >> step=5719100, episode=954 reward=0.7862004 (568.69 it/sec) -training >> step=5719200, episode=954 reward=0.7747804 (411.38 it/sec) -training >> step=5719300, episode=954 reward=0.8000622 (443.82 it/sec) -training >> step=5719400, episode=954 reward=0.779734 (478.80 it/sec) -training >> step=5719500, episode=954 reward=0.786281 (545.57 it/sec) -training >> step=5719600, episode=954 reward=0.7824846 (517.25 it/sec) -training >> step=5719700, episode=954 reward=0.7902642 (488.51 it/sec) -training >> step=5719800, episode=954 reward=0.8060018 (500.73 it/sec) -training >> step=5719900, episode=954 reward=0.7916816 (475.74 it/sec) -training >> step=5720000, episode=954 reward=0.7961278 (483.52 it/sec) -training >> step=5720100, episode=954 reward=0.7765279 (469.58 it/sec) -training >> step=5720200, episode=954 reward=0.7856548 (498.13 it/sec) -training >> step=5720300, episode=954 reward=0.7892746 (482.66 it/sec) -training >> step=5720400, episode=954 reward=0.7783012 (481.06 it/sec) -training >> step=5720500, episode=954 reward=0.7675634 (483.90 it/sec) -training >> step=5720600, episode=954 reward=0.7898503 (491.27 it/sec) -training >> step=5720700, episode=954 reward=0.7941173 (469.29 it/sec) -training >> step=5720800, episode=954 reward=0.7655668 (503.05 it/sec) -training >> step=5720900, episode=954 reward=0.7841429 (433.28 it/sec) -training >> step=5721000, episode=954 reward=0.798366 (450.77 it/sec) -training >> step=5721100, episode=954 reward=0.7872025 (491.05 it/sec) -training >> step=5721200, episode=954 reward=0.7919211 (503.74 it/sec) -training >> step=5721300, episode=954 reward=0.7900459 (383.08 it/sec) -training >> step=5721400, episode=954 reward=0.796612 (504.06 it/sec) -training >> step=5721500, episode=954 reward=0.7684346 (464.40 it/sec) -training >> step=5721600, episode=954 reward=0.7898537 (517.33 it/sec) -training >> step=5721700, episode=954 reward=0.7966413 (481.86 it/sec) -training >> step=5721800, episode=954 reward=0.7990243 (509.15 it/sec) -training >> step=5721900, episode=954 reward=0.7626716 (506.52 it/sec) -training >> step=5722000, episode=954 reward=0.8017222 (508.01 it/sec) -training >> step=5722100, episode=954 reward=0.7855427 (494.81 it/sec) -training >> step=5722200, episode=954 reward=0.7805542 (502.48 it/sec) -training >> step=5722300, episode=954 reward=0.7908298 (525.79 it/sec) -training >> step=5722400, episode=954 reward=0.7653672 (512.06 it/sec) -training >> step=5722500, episode=954 reward=0.7721795 (527.90 it/sec) -training >> step=5722600, episode=954 reward=0.7945959 (515.60 it/sec) -training >> step=5722700, episode=954 reward=0.7698059 (537.73 it/sec) -training >> step=5722800, episode=954 reward=0.7849365 (516.46 it/sec) -training >> step=5722900, episode=954 reward=0.7658795 (469.29 it/sec) -training >> step=5723000, episode=954 reward=0.7749506 (502.38 it/sec) -training >> step=5723100, episode=954 reward=0.7759995 (515.01 it/sec) -training >> step=5723200, episode=954 reward=0.7736123 (485.25 it/sec) -training >> step=5723300, episode=955 reward=0.7804395 (51.97 it/sec) -training >> step=5723400, episode=955 reward=0.7963064 (403.48 it/sec) -training >> step=5723500, episode=955 reward=0.7854243 (383.62 it/sec) -training >> step=5723600, episode=955 reward=0.7817684 (438.37 it/sec) -training >> step=5723700, episode=955 reward=0.7993825 (482.29 it/sec) -training >> step=5723800, episode=955 reward=0.7669896 (459.97 it/sec) -training >> step=5723900, episode=955 reward=0.7978712 (524.60 it/sec) -training >> step=5724000, episode=955 reward=0.785888 (478.66 it/sec) -training >> step=5724100, episode=955 reward=0.7812548 (489.43 it/sec) -training >> step=5724200, episode=955 reward=0.7891037 (504.08 it/sec) -training >> step=5724300, episode=955 reward=0.7956014 (531.66 it/sec) -training >> step=5724400, episode=955 reward=0.7939572 (522.85 it/sec) -training >> step=5724500, episode=955 reward=0.7914978 (504.55 it/sec) -training >> step=5724600, episode=955 reward=0.7757081 (491.92 it/sec) -training >> step=5724700, episode=955 reward=0.779267 (476.52 it/sec) -training >> step=5724800, episode=955 reward=0.7898291 (453.43 it/sec) -training >> step=5724900, episode=955 reward=0.7784153 (504.61 it/sec) -training >> step=5725000, episode=955 reward=0.7783709 (477.81 it/sec) -training >> step=5725100, episode=955 reward=0.7572265 (466.09 it/sec) -training >> step=5725200, episode=955 reward=0.7781163 (485.40 it/sec) -training >> step=5725300, episode=955 reward=0.7946766 (468.13 it/sec) -training >> step=5725400, episode=955 reward=0.7636114 (543.68 it/sec) -training >> step=5725500, episode=955 reward=0.787143 (490.14 it/sec) -training >> step=5725600, episode=955 reward=0.8035662 (450.72 it/sec) -training >> step=5725700, episode=955 reward=0.7856338 (427.53 it/sec) -training >> step=5725800, episode=955 reward=0.7954263 (447.23 it/sec) -training >> step=5725900, episode=955 reward=0.780403 (399.44 it/sec) -training >> step=5726000, episode=955 reward=0.7770884 (426.03 it/sec) -training >> step=5726100, episode=955 reward=0.7766148 (433.38 it/sec) -training >> step=5726200, episode=955 reward=0.7807591 (390.68 it/sec) -training >> step=5726300, episode=955 reward=0.7839059 (400.67 it/sec) -training >> step=5726400, episode=955 reward=0.7858942 (395.38 it/sec) -training >> step=5726500, episode=955 reward=0.7810739 (378.87 it/sec) -training >> step=5726600, episode=955 reward=0.7909369 (458.94 it/sec) -training >> step=5726700, episode=955 reward=0.7780107 (473.40 it/sec) -training >> step=5726800, episode=955 reward=0.7753256 (471.56 it/sec) -training >> step=5726900, episode=955 reward=0.7928736 (474.44 it/sec) -training >> step=5727000, episode=955 reward=0.7996376 (469.90 it/sec) -training >> step=5727100, episode=955 reward=0.7728981 (450.46 it/sec) -training >> step=5727200, episode=955 reward=0.7912588 (488.09 it/sec) -training >> step=5727300, episode=955 reward=0.7664391 (464.05 it/sec) -training >> step=5727400, episode=955 reward=0.8040267 (418.59 it/sec) -training >> step=5727500, episode=955 reward=0.7969702 (455.46 it/sec) -training >> step=5727600, episode=955 reward=0.7854338 (353.44 it/sec) -training >> step=5727700, episode=955 reward=0.7757071 (445.01 it/sec) -training >> step=5727800, episode=955 reward=0.7874237 (453.12 it/sec) -training >> step=5727900, episode=955 reward=0.7865147 (417.36 it/sec) -training >> step=5728000, episode=955 reward=0.7704988 (440.54 it/sec) -training >> step=5728100, episode=955 reward=0.7928573 (439.79 it/sec) -training >> step=5728200, episode=955 reward=0.7771727 (414.42 it/sec) -training >> step=5728300, episode=955 reward=0.7773678 (408.39 it/sec) -training >> step=5728400, episode=955 reward=0.785597 (439.91 it/sec) -training >> step=5728500, episode=955 reward=0.7724637 (469.08 it/sec) -training >> step=5728600, episode=955 reward=0.7715743 (452.40 it/sec) -training >> step=5728700, episode=955 reward=0.7779426 (450.03 it/sec) -training >> step=5728800, episode=955 reward=0.7559767 (475.35 it/sec) -training >> step=5728900, episode=955 reward=0.7745153 (466.72 it/sec) -training >> step=5729000, episode=955 reward=0.7609107 (484.85 it/sec) -training >> step=5729100, episode=955 reward=0.7775429 (453.93 it/sec) -training >> step=5729200, episode=955 reward=0.7822468 (427.01 it/sec) -training >> step=5729300, episode=956 reward=0.7666348 (90.52 it/sec) -training >> step=5729400, episode=956 reward=0.7955929 (424.32 it/sec) -training >> step=5729500, episode=956 reward=0.7646953 (434.23 it/sec) -training >> step=5729600, episode=956 reward=0.7957647 (441.50 it/sec) -training >> step=5729700, episode=956 reward=0.7740467 (477.78 it/sec) -training >> step=5729800, episode=956 reward=0.8071275 (417.16 it/sec) -training >> step=5729900, episode=956 reward=0.7724556 (449.72 it/sec) -training >> step=5730000, episode=956 reward=0.7795635 (473.97 it/sec) -training >> step=5730100, episode=956 reward=0.783682 (418.09 it/sec) -training >> step=5730200, episode=956 reward=0.789659 (448.93 it/sec) -training >> step=5730300, episode=956 reward=0.7718553 (477.98 it/sec) -training >> step=5730400, episode=956 reward=0.76653 (436.96 it/sec) -training >> step=5730500, episode=956 reward=0.7753324 (435.55 it/sec) -training >> step=5730600, episode=956 reward=0.7775809 (463.46 it/sec) -training >> step=5730700, episode=956 reward=0.7945352 (416.99 it/sec) -training >> step=5730800, episode=956 reward=0.7955396 (435.33 it/sec) -training >> step=5730900, episode=956 reward=0.7786065 (478.14 it/sec) -training >> step=5731000, episode=956 reward=0.783446 (487.50 it/sec) -training >> step=5731100, episode=956 reward=0.7680972 (445.59 it/sec) -training >> step=5731200, episode=956 reward=0.7768954 (457.07 it/sec) -training >> step=5731300, episode=956 reward=0.8028838 (466.14 it/sec) -training >> step=5731400, episode=956 reward=0.7757677 (470.32 it/sec) -training >> step=5731500, episode=956 reward=0.7665882 (412.07 it/sec) -training >> step=5731600, episode=956 reward=0.7849429 (433.96 it/sec) -training >> step=5731700, episode=956 reward=0.775044 (443.15 it/sec) -training >> step=5731800, episode=956 reward=0.7903345 (452.12 it/sec) -training >> step=5731900, episode=956 reward=0.7697515 (462.39 it/sec) -training >> step=5732000, episode=956 reward=0.783756 (455.40 it/sec) -training >> step=5732100, episode=956 reward=0.7716314 (487.17 it/sec) -training >> step=5732200, episode=956 reward=0.7930886 (439.40 it/sec) -training >> step=5732300, episode=956 reward=0.7802598 (450.89 it/sec) -training >> step=5732400, episode=956 reward=0.8018233 (471.00 it/sec) -training >> step=5732500, episode=956 reward=0.7652448 (436.62 it/sec) -training >> step=5732600, episode=956 reward=0.7942255 (467.07 it/sec) -training >> step=5732700, episode=956 reward=0.7634162 (445.08 it/sec) -training >> step=5732800, episode=956 reward=0.7753618 (483.56 it/sec) -training >> step=5732900, episode=956 reward=0.7800956 (436.99 it/sec) -training >> step=5733000, episode=956 reward=0.801468 (451.75 it/sec) -training >> step=5733100, episode=956 reward=0.8011056 (461.53 it/sec) -training >> step=5733200, episode=956 reward=0.7704048 (445.48 it/sec) -training >> step=5733300, episode=956 reward=0.7941747 (469.47 it/sec) -training >> step=5733400, episode=956 reward=0.7746891 (451.39 it/sec) -training >> step=5733500, episode=956 reward=0.7674038 (476.68 it/sec) -training >> step=5733600, episode=956 reward=0.7863421 (446.31 it/sec) -training >> step=5733700, episode=956 reward=0.76989 (416.21 it/sec) -training >> step=5733800, episode=956 reward=0.7909955 (461.77 it/sec) -training >> step=5733900, episode=956 reward=0.7796623 (455.30 it/sec) -training >> step=5734000, episode=956 reward=0.7902657 (301.64 it/sec) -training >> step=5734100, episode=956 reward=0.7627287 (457.58 it/sec) -training >> step=5734200, episode=956 reward=0.7756538 (505.73 it/sec) -training >> step=5734300, episode=956 reward=0.760977 (489.97 it/sec) -training >> step=5734400, episode=956 reward=0.7982222 (500.70 it/sec) -training >> step=5734500, episode=956 reward=0.7728669 (503.41 it/sec) -training >> step=5734600, episode=956 reward=0.7653229 (503.65 it/sec) -training >> step=5734700, episode=956 reward=0.7739071 (456.47 it/sec) -training >> step=5734800, episode=956 reward=0.7804436 (487.49 it/sec) -training >> step=5734900, episode=956 reward=0.7715999 (501.56 it/sec) -training >> step=5735000, episode=956 reward=0.7857223 (525.39 it/sec) -training >> step=5735100, episode=956 reward=0.783273 (492.18 it/sec) -training >> step=5735200, episode=956 reward=0.7593802 (497.18 it/sec) -training >> step=5735300, episode=957 reward=0.7923912 (90.81 it/sec) -training >> step=5735400, episode=957 reward=0.7743329 (438.89 it/sec) -training >> step=5735500, episode=957 reward=0.7684493 (504.86 it/sec) -training >> step=5735600, episode=957 reward=0.7556649 (490.17 it/sec) -training >> step=5735700, episode=957 reward=0.7998311 (481.61 it/sec) -training >> step=5735800, episode=957 reward=0.7721753 (492.35 it/sec) -training >> step=5735900, episode=957 reward=0.8102257 (511.14 it/sec) -training >> step=5736000, episode=957 reward=0.7730277 (515.48 it/sec) -training >> step=5736100, episode=957 reward=0.7766415 (503.13 it/sec) -training >> step=5736200, episode=957 reward=0.7823777 (482.41 it/sec) -training >> step=5736300, episode=957 reward=0.7864023 (541.38 it/sec) -training >> step=5736400, episode=957 reward=0.7937371 (501.78 it/sec) -training >> step=5736500, episode=957 reward=0.8001368 (489.74 it/sec) -training >> step=5736600, episode=957 reward=0.795204 (465.19 it/sec) -training >> step=5736700, episode=957 reward=0.783884 (505.15 it/sec) -training >> step=5736800, episode=957 reward=0.7620007 (495.89 it/sec) -training >> step=5736900, episode=957 reward=0.7834309 (418.99 it/sec) -training >> step=5737000, episode=957 reward=0.7855502 (464.86 it/sec) -training >> step=5737100, episode=957 reward=0.7771711 (479.19 it/sec) -training >> step=5737200, episode=957 reward=0.7907344 (499.81 it/sec) -training >> step=5737300, episode=957 reward=0.800127 (504.90 it/sec) -training >> step=5737400, episode=957 reward=0.7754955 (525.56 it/sec) -training >> step=5737500, episode=957 reward=0.7974756 (477.82 it/sec) -training >> step=5737600, episode=957 reward=0.7819057 (513.34 it/sec) -training >> step=5737700, episode=957 reward=0.7765502 (539.35 it/sec) -training >> step=5737800, episode=957 reward=0.7832991 (517.17 it/sec) -training >> step=5737900, episode=957 reward=0.7857196 (457.79 it/sec) -training >> step=5738000, episode=957 reward=0.7935705 (475.33 it/sec) -training >> step=5738100, episode=957 reward=0.7923478 (550.08 it/sec) -training >> step=5738200, episode=957 reward=0.776641 (507.87 it/sec) -training >> step=5738300, episode=957 reward=0.7936599 (480.45 it/sec) -training >> step=5738400, episode=957 reward=0.7984465 (502.61 it/sec) -training >> step=5738500, episode=957 reward=0.7928303 (484.31 it/sec) -training >> step=5738600, episode=957 reward=0.7769283 (495.50 it/sec) -training >> step=5738700, episode=957 reward=0.7785571 (479.51 it/sec) -training >> step=5738800, episode=957 reward=0.7846245 (522.07 it/sec) -training >> step=5738900, episode=957 reward=0.7835843 (442.31 it/sec) -training >> step=5739000, episode=957 reward=0.7840603 (477.61 it/sec) -training >> step=5739100, episode=957 reward=0.7776993 (509.93 it/sec) -training >> step=5739200, episode=957 reward=0.7863421 (517.51 it/sec) -training >> step=5739300, episode=957 reward=0.7953445 (458.41 it/sec) -training >> step=5739400, episode=957 reward=0.7824149 (481.56 it/sec) -training >> step=5739500, episode=957 reward=0.7870722 (471.87 it/sec) -training >> step=5739600, episode=957 reward=0.7917664 (520.78 it/sec) -training >> step=5739700, episode=957 reward=0.7894771 (508.01 it/sec) -training >> step=5739800, episode=957 reward=0.7896148 (496.22 it/sec) -training >> step=5739900, episode=957 reward=0.7812234 (502.97 it/sec) -training >> step=5740000, episode=957 reward=0.7891867 (474.11 it/sec) -training >> step=5740100, episode=957 reward=0.7596812 (511.04 it/sec) -training >> step=5740200, episode=957 reward=0.7751164 (369.75 it/sec) -training >> step=5740300, episode=957 reward=0.7636619 (535.34 it/sec) -training >> step=5740400, episode=957 reward=0.7784531 (500.30 it/sec) -training >> step=5740500, episode=957 reward=0.768842 (521.35 it/sec) -training >> step=5740600, episode=957 reward=0.7733874 (443.79 it/sec) -training >> step=5740700, episode=957 reward=0.7859317 (452.09 it/sec) -training >> step=5740800, episode=957 reward=0.766302 (548.51 it/sec) -training >> step=5740900, episode=957 reward=0.7742563 (521.05 it/sec) -training >> step=5741000, episode=957 reward=0.7543124 (571.48 it/sec) -training >> step=5741100, episode=957 reward=0.7857552 (546.47 it/sec) -training >> step=5741200, episode=957 reward=0.7731144 (496.01 it/sec) -training >> step=5741300, episode=958 reward=0.7675955 (137.00 it/sec) -training >> step=5741400, episode=958 reward=0.7771049 (538.68 it/sec) -training >> step=5741500, episode=958 reward=0.7768896 (503.22 it/sec) -training >> step=5741600, episode=958 reward=0.7607322 (510.27 it/sec) -training >> step=5741700, episode=958 reward=0.7806523 (551.89 it/sec) -training >> step=5741800, episode=958 reward=0.7833335 (518.01 it/sec) -training >> step=5741900, episode=958 reward=0.7730154 (520.66 it/sec) -training >> step=5742000, episode=958 reward=0.7688665 (514.57 it/sec) -training >> step=5742100, episode=958 reward=0.7887159 (556.96 it/sec) -training >> step=5742200, episode=958 reward=0.7806072 (542.93 it/sec) -training >> step=5742300, episode=958 reward=0.7677921 (525.24 it/sec) -training >> step=5742400, episode=958 reward=0.7950331 (521.23 it/sec) -training >> step=5742500, episode=958 reward=0.775483 (535.67 it/sec) -training >> step=5742600, episode=958 reward=0.7767272 (510.67 it/sec) -training >> step=5742700, episode=958 reward=0.7906515 (514.46 it/sec) -training >> step=5742800, episode=958 reward=0.7905728 (475.90 it/sec) -training >> step=5742900, episode=958 reward=0.7730644 (478.83 it/sec) -training >> step=5743000, episode=958 reward=0.7868282 (516.52 it/sec) -training >> step=5743100, episode=958 reward=0.8067849 (525.40 it/sec) -training >> step=5743200, episode=958 reward=0.7903943 (532.92 it/sec) -training >> step=5743300, episode=958 reward=0.7767439 (527.48 it/sec) -training >> step=5743400, episode=958 reward=0.7768559 (510.81 it/sec) -training >> step=5743500, episode=958 reward=0.8078265 (571.14 it/sec) -training >> step=5743600, episode=958 reward=0.774422 (491.95 it/sec) -training >> step=5743700, episode=958 reward=0.7825425 (496.76 it/sec) -training >> step=5743800, episode=958 reward=0.7951574 (524.48 it/sec) -training >> step=5743900, episode=958 reward=0.8012666 (538.31 it/sec) -training >> step=5744000, episode=958 reward=0.7741029 (537.19 it/sec) -training >> step=5744100, episode=958 reward=0.7593665 (535.68 it/sec) -training >> step=5744200, episode=958 reward=0.7858931 (527.87 it/sec) -training >> step=5744300, episode=958 reward=0.7859134 (499.07 it/sec) -training >> step=5744400, episode=958 reward=0.7776342 (523.81 it/sec) -training >> step=5744500, episode=958 reward=0.7932124 (488.35 it/sec) -training >> step=5744600, episode=958 reward=0.7840531 (552.78 it/sec) -training >> step=5744700, episode=958 reward=0.8033249 (415.59 it/sec) -training >> step=5744800, episode=958 reward=0.7718238 (520.57 it/sec) -training >> step=5744900, episode=958 reward=0.795668 (540.04 it/sec) -training >> step=5745000, episode=958 reward=0.7845608 (487.72 it/sec) -training >> step=5745100, episode=958 reward=0.7932957 (547.37 it/sec) -training >> step=5745200, episode=958 reward=0.8019817 (492.28 it/sec) -training >> step=5745300, episode=958 reward=0.7727684 (496.29 it/sec) -training >> step=5745400, episode=958 reward=0.7874888 (534.71 it/sec) -training >> step=5745500, episode=958 reward=0.7970531 (495.69 it/sec) -training >> step=5745600, episode=958 reward=0.7899221 (491.31 it/sec) -training >> step=5745700, episode=958 reward=0.7760459 (505.30 it/sec) -training >> step=5745800, episode=958 reward=0.7871556 (516.20 it/sec) -training >> step=5745900, episode=958 reward=0.7865927 (530.53 it/sec) -training >> step=5746000, episode=958 reward=0.7980097 (539.73 it/sec) -training >> step=5746100, episode=958 reward=0.7824618 (547.05 it/sec) -training >> step=5746200, episode=958 reward=0.7545387 (515.99 it/sec) -training >> step=5746300, episode=958 reward=0.7524959 (403.63 it/sec) -training >> step=5746400, episode=958 reward=0.7588102 (547.38 it/sec) -training >> step=5746500, episode=958 reward=0.7696806 (519.99 it/sec) -training >> step=5746600, episode=958 reward=0.7774498 (508.02 it/sec) -training >> step=5746700, episode=958 reward=0.7778189 (566.94 it/sec) -training >> step=5746800, episode=958 reward=0.7709396 (467.56 it/sec) -training >> step=5746900, episode=958 reward=0.7760893 (499.17 it/sec) -training >> step=5747000, episode=958 reward=0.7824942 (493.76 it/sec) -training >> step=5747100, episode=958 reward=0.7809294 (509.87 it/sec) -training >> step=5747200, episode=958 reward=0.7841226 (511.25 it/sec) -training >> step=5747300, episode=959 reward=0.7844606 (115.65 it/sec) -training >> step=5747400, episode=959 reward=0.7692487 (520.94 it/sec) -training >> step=5747500, episode=959 reward=0.7631859 (491.65 it/sec) -training >> step=5747600, episode=959 reward=0.7820732 (509.27 it/sec) -training >> step=5747700, episode=959 reward=0.7734528 (477.80 it/sec) -training >> step=5747800, episode=959 reward=0.7773755 (523.58 it/sec) -training >> step=5747900, episode=959 reward=0.7775652 (497.15 it/sec) -training >> step=5748000, episode=959 reward=0.797655 (467.16 it/sec) -training >> step=5748100, episode=959 reward=0.788493 (500.68 it/sec) -training >> step=5748200, episode=959 reward=0.7834895 (523.15 it/sec) -training >> step=5748300, episode=959 reward=0.7872874 (489.22 it/sec) -training >> step=5748400, episode=959 reward=0.7830367 (519.55 it/sec) -training >> step=5748500, episode=959 reward=0.7861343 (492.15 it/sec) -training >> step=5748600, episode=959 reward=0.7886091 (475.17 it/sec) -training >> step=5748700, episode=959 reward=0.7641025 (497.65 it/sec) -training >> step=5748800, episode=959 reward=0.7865126 (469.29 it/sec) -training >> step=5748900, episode=959 reward=0.7905271 (536.36 it/sec) -training >> step=5749000, episode=959 reward=0.7783349 (495.00 it/sec) -training >> step=5749100, episode=959 reward=0.8089582 (453.28 it/sec) -training >> step=5749200, episode=959 reward=0.7934345 (493.56 it/sec) -training >> step=5749300, episode=959 reward=0.7930965 (471.87 it/sec) -training >> step=5749400, episode=959 reward=0.8022569 (524.58 it/sec) -training >> step=5749500, episode=959 reward=0.7886657 (481.06 it/sec) -training >> step=5749600, episode=959 reward=0.7791247 (477.57 it/sec) -training >> step=5749700, episode=959 reward=0.8010122 (500.19 it/sec) -training >> step=5749800, episode=959 reward=0.7956315 (460.97 it/sec) -training >> step=5749900, episode=959 reward=0.7910633 (515.06 it/sec) -training >> step=5750000, episode=959 reward=0.7947847 (532.90 it/sec) -training >> step=5750100, episode=959 reward=0.7774566 (503.85 it/sec) -training >> step=5750200, episode=959 reward=0.7882716 (490.94 it/sec) -training >> step=5750300, episode=959 reward=0.7714545 (490.32 it/sec) -training >> step=5750400, episode=959 reward=0.7852316 (481.92 it/sec) -training >> step=5750500, episode=959 reward=0.796061 (470.23 it/sec) -training >> step=5750600, episode=959 reward=0.782069 (512.65 it/sec) -training >> step=5750700, episode=959 reward=0.7689931 (535.33 it/sec) -training >> step=5750800, episode=959 reward=0.7804111 (456.49 it/sec) -training >> step=5750900, episode=959 reward=0.7872747 (509.24 it/sec) -training >> step=5751000, episode=959 reward=0.7696649 (515.42 it/sec) -training >> step=5751100, episode=959 reward=0.7858982 (450.40 it/sec) -training >> step=5751200, episode=959 reward=0.7834565 (520.03 it/sec) -training >> step=5751300, episode=959 reward=0.7896544 (477.14 it/sec) -training >> step=5751400, episode=959 reward=0.7707579 (508.41 it/sec) -training >> step=5751500, episode=959 reward=0.7920577 (501.27 it/sec) -training >> step=5751600, episode=959 reward=0.7769218 (451.28 it/sec) -training >> step=5751700, episode=959 reward=0.7551083 (517.90 it/sec) -training >> step=5751800, episode=959 reward=0.7861301 (477.16 it/sec) -training >> step=5751900, episode=959 reward=0.7769954 (494.50 it/sec) -training >> step=5752000, episode=959 reward=0.7923385 (518.44 it/sec) -training >> step=5752100, episode=959 reward=0.7755611 (522.75 it/sec) -training >> step=5752200, episode=959 reward=0.7959445 (502.41 it/sec) -training >> step=5752300, episode=959 reward=0.7991907 (467.66 it/sec) -training >> step=5752400, episode=959 reward=0.7586766 (517.85 it/sec) -training >> step=5752500, episode=959 reward=0.7945867 (413.54 it/sec) -training >> step=5752600, episode=959 reward=0.7712212 (457.17 it/sec) -training >> step=5752700, episode=959 reward=0.7622464 (458.06 it/sec) -training >> step=5752800, episode=959 reward=0.7565851 (459.54 it/sec) -training >> step=5752900, episode=959 reward=0.8029403 (462.07 it/sec) -training >> step=5753000, episode=959 reward=0.7844343 (521.78 it/sec) -training >> step=5753100, episode=959 reward=0.7667768 (476.55 it/sec) -training >> step=5753200, episode=959 reward=0.7829705 (539.02 it/sec) -training >> step=5753300, episode=960 reward=0.777192 (113.46 it/sec) -training >> step=5753400, episode=960 reward=0.7694758 (459.24 it/sec) -training >> step=5753500, episode=960 reward=0.7774395 (482.31 it/sec) -training >> step=5753600, episode=960 reward=0.7886328 (487.32 it/sec) -training >> step=5753700, episode=960 reward=0.7733722 (477.17 it/sec) -training >> step=5753800, episode=960 reward=0.7864762 (505.53 it/sec) -training >> step=5753900, episode=960 reward=0.7782921 (522.76 it/sec) -training >> step=5754000, episode=960 reward=0.7804279 (476.23 it/sec) -training >> step=5754100, episode=960 reward=0.8028389 (516.58 it/sec) -training >> step=5754200, episode=960 reward=0.7707629 (504.32 it/sec) -training >> step=5754300, episode=960 reward=0.7887331 (525.58 it/sec) -training >> step=5754400, episode=960 reward=0.7971273 (499.85 it/sec) -training >> step=5754500, episode=960 reward=0.7769648 (455.85 it/sec) -training >> step=5754600, episode=960 reward=0.79273 (502.87 it/sec) -training >> step=5754700, episode=960 reward=0.7819231 (507.50 it/sec) -training >> step=5754800, episode=960 reward=0.790047 (469.49 it/sec) -training >> step=5754900, episode=960 reward=0.7872269 (451.46 it/sec) -training >> step=5755000, episode=960 reward=0.7831465 (500.41 it/sec) -training >> step=5755100, episode=960 reward=0.7706882 (518.91 it/sec) -training >> step=5755200, episode=960 reward=0.7850223 (464.22 it/sec) -training >> step=5755300, episode=960 reward=0.8041607 (405.55 it/sec) -training >> step=5755400, episode=960 reward=0.7774879 (498.95 it/sec) -training >> step=5755500, episode=960 reward=0.7877495 (472.35 it/sec) -training >> step=5755600, episode=960 reward=0.7973731 (500.36 it/sec) -training >> step=5755700, episode=960 reward=0.7856175 (511.59 it/sec) -training >> step=5755800, episode=960 reward=0.8044385 (463.21 it/sec) -training >> step=5755900, episode=960 reward=0.7820234 (486.48 it/sec) -training >> step=5756000, episode=960 reward=0.7558457 (478.95 it/sec) -training >> step=5756100, episode=960 reward=0.7972701 (508.08 it/sec) -training >> step=5756200, episode=960 reward=0.7784522 (477.73 it/sec) -training >> step=5756300, episode=960 reward=0.7825744 (459.29 it/sec) -training >> step=5756400, episode=960 reward=0.7791675 (475.10 it/sec) -training >> step=5756500, episode=960 reward=0.7647951 (466.59 it/sec) -training >> step=5756600, episode=960 reward=0.7845924 (494.69 it/sec) -training >> step=5756700, episode=960 reward=0.7874485 (505.96 it/sec) -training >> step=5756800, episode=960 reward=0.7819596 (487.68 it/sec) -training >> step=5756900, episode=960 reward=0.7747411 (484.79 it/sec) -training >> step=5757000, episode=960 reward=0.771615 (493.25 it/sec) -training >> step=5757100, episode=960 reward=0.7968247 (496.21 it/sec) -training >> step=5757200, episode=960 reward=0.8100337 (526.67 it/sec) -training >> step=5757300, episode=960 reward=0.7952133 (460.99 it/sec) -training >> step=5757400, episode=960 reward=0.7962471 (478.05 it/sec) -training >> step=5757500, episode=960 reward=0.815557 (534.98 it/sec) -training >> step=5757600, episode=960 reward=0.7740424 (489.18 it/sec) -training >> step=5757700, episode=960 reward=0.7892507 (485.41 it/sec) -training >> step=5757800, episode=960 reward=0.7889618 (466.77 it/sec) -training >> step=5757900, episode=960 reward=0.7716376 (513.67 it/sec) -training >> step=5758000, episode=960 reward=0.7709762 (456.16 it/sec) -training >> step=5758100, episode=960 reward=0.7849448 (492.07 it/sec) -training >> step=5758200, episode=960 reward=0.784259 (500.91 it/sec) -training >> step=5758300, episode=960 reward=0.7696488 (491.81 it/sec) -training >> step=5758400, episode=960 reward=0.7780005 (492.93 it/sec) -training >> step=5758500, episode=960 reward=0.7929471 (349.88 it/sec) -training >> step=5758600, episode=960 reward=0.7887098 (495.58 it/sec) -training >> step=5758700, episode=960 reward=0.7858503 (452.29 it/sec) -training >> step=5758800, episode=960 reward=0.7831163 (481.77 it/sec) -training >> step=5758900, episode=960 reward=0.7815399 (503.10 it/sec) -training >> step=5759000, episode=960 reward=0.7974992 (484.58 it/sec) -training >> step=5759100, episode=960 reward=0.7903715 (462.15 it/sec) -training >> step=5759200, episode=960 reward=0.775782 (500.75 it/sec) -training >> step=5759300, episode=961 reward=0.7868825 (149.53 it/sec) -training >> step=5759400, episode=961 reward=0.7770137 (456.15 it/sec) -training >> step=5759500, episode=961 reward=0.7660908 (470.94 it/sec) -training >> step=5759600, episode=961 reward=0.7858884 (473.30 it/sec) -training >> step=5759700, episode=961 reward=0.7780082 (502.48 it/sec) -training >> step=5759800, episode=961 reward=0.7984539 (534.33 it/sec) -training >> step=5759900, episode=961 reward=0.7972453 (490.00 it/sec) -training >> step=5760000, episode=961 reward=0.7791212 (479.13 it/sec) -training >> step=5760100, episode=961 reward=0.7928895 (525.86 it/sec) -training >> step=5760200, episode=961 reward=0.7795129 (500.95 it/sec) -training >> step=5760300, episode=961 reward=0.7792091 (454.10 it/sec) -training >> step=5760400, episode=961 reward=0.7739665 (501.51 it/sec) -training >> step=5760500, episode=961 reward=0.7857762 (435.11 it/sec) -training >> step=5760600, episode=961 reward=0.7709674 (429.59 it/sec) -training >> step=5760700, episode=961 reward=0.7815253 (459.41 it/sec) -training >> step=5760800, episode=961 reward=0.7620438 (560.36 it/sec) -training >> step=5760900, episode=961 reward=0.782345 (501.66 it/sec) -training >> step=5761000, episode=961 reward=0.7904984 (464.48 it/sec) -training >> step=5761100, episode=961 reward=0.786352 (516.94 it/sec) -training >> step=5761200, episode=961 reward=0.7788432 (476.93 it/sec) -training >> step=5761300, episode=961 reward=0.7721132 (527.96 it/sec) -training >> step=5761400, episode=961 reward=0.7734556 (475.89 it/sec) -training >> step=5761500, episode=961 reward=0.8087454 (471.25 it/sec) -training >> step=5761600, episode=961 reward=0.7856395 (506.78 it/sec) -training >> step=5761700, episode=961 reward=0.781583 (452.05 it/sec) -training >> step=5761800, episode=961 reward=0.7904485 (509.37 it/sec) -training >> step=5761900, episode=961 reward=0.7876974 (452.38 it/sec) -training >> step=5762000, episode=961 reward=0.7928908 (463.63 it/sec) -training >> step=5762100, episode=961 reward=0.7940187 (500.05 it/sec) -training >> step=5762200, episode=961 reward=0.7875559 (500.10 it/sec) -training >> step=5762300, episode=961 reward=0.7968431 (488.59 it/sec) -training >> step=5762400, episode=961 reward=0.7777305 (499.88 it/sec) -training >> step=5762500, episode=961 reward=0.7697101 (447.46 it/sec) -training >> step=5762600, episode=961 reward=0.7806992 (516.76 it/sec) -training >> step=5762700, episode=961 reward=0.7876161 (441.14 it/sec) -training >> step=5762800, episode=961 reward=0.7985073 (543.95 it/sec) -training >> step=5762900, episode=961 reward=0.7903458 (494.69 it/sec) -training >> step=5763000, episode=961 reward=0.769056 (516.79 it/sec) -training >> step=5763100, episode=961 reward=0.7789652 (510.75 it/sec) -training >> step=5763200, episode=961 reward=0.7896937 (506.16 it/sec) -training >> step=5763300, episode=961 reward=0.7722659 (455.54 it/sec) -training >> step=5763400, episode=961 reward=0.777717 (468.02 it/sec) -training >> step=5763500, episode=961 reward=0.7723041 (488.65 it/sec) -training >> step=5763600, episode=961 reward=0.7801062 (488.52 it/sec) -training >> step=5763700, episode=961 reward=0.7851732 (494.11 it/sec) -training >> step=5763800, episode=961 reward=0.7909705 (510.38 it/sec) -training >> step=5763900, episode=961 reward=0.7851437 (477.81 it/sec) -training >> step=5764000, episode=961 reward=0.7764358 (539.81 it/sec) -training >> step=5764100, episode=961 reward=0.7809962 (480.44 it/sec) -training >> step=5764200, episode=961 reward=0.7778413 (494.37 it/sec) -training >> step=5764300, episode=961 reward=0.7704222 (482.57 it/sec) -training >> step=5764400, episode=961 reward=0.7784855 (463.40 it/sec) -training >> step=5764500, episode=961 reward=0.7826815 (354.92 it/sec) -training >> step=5764600, episode=961 reward=0.7605429 (492.76 it/sec) -training >> step=5764700, episode=961 reward=0.7810738 (505.73 it/sec) -training >> step=5764800, episode=961 reward=0.7642738 (524.12 it/sec) -training >> step=5764900, episode=961 reward=0.7807636 (478.72 it/sec) -training >> step=5765000, episode=961 reward=0.7755964 (479.32 it/sec) -training >> step=5765100, episode=961 reward=0.7768421 (483.70 it/sec) -training >> step=5765200, episode=961 reward=0.7814797 (498.47 it/sec) -training >> step=5765300, episode=962 reward=0.7849332 (134.36 it/sec) -training >> step=5765400, episode=962 reward=0.7671036 (469.90 it/sec) -training >> step=5765500, episode=962 reward=0.7829651 (499.30 it/sec) -training >> step=5765600, episode=962 reward=0.7865278 (465.47 it/sec) -training >> step=5765700, episode=962 reward=0.7867917 (532.09 it/sec) -training >> step=5765800, episode=962 reward=0.7974695 (481.78 it/sec) -training >> step=5765900, episode=962 reward=0.7915779 (487.51 it/sec) -training >> step=5766000, episode=962 reward=0.7771542 (535.73 it/sec) -training >> step=5766100, episode=962 reward=0.7862918 (497.21 it/sec) -training >> step=5766200, episode=962 reward=0.7533086 (497.01 it/sec) -training >> step=5766300, episode=962 reward=0.7890736 (489.65 it/sec) -training >> step=5766400, episode=962 reward=0.7854058 (506.32 it/sec) -training >> step=5766500, episode=962 reward=0.7839935 (472.15 it/sec) -training >> step=5766600, episode=962 reward=0.7752134 (467.92 it/sec) -training >> step=5766700, episode=962 reward=0.7864196 (525.98 it/sec) -training >> step=5766800, episode=962 reward=0.7951347 (497.18 it/sec) -training >> step=5766900, episode=962 reward=0.779289 (503.07 it/sec) -training >> step=5767000, episode=962 reward=0.7817225 (493.67 it/sec) -training >> step=5767100, episode=962 reward=0.7947033 (487.28 it/sec) -training >> step=5767200, episode=962 reward=0.7926645 (476.79 it/sec) -training >> step=5767300, episode=962 reward=0.7764362 (529.56 it/sec) -training >> step=5767400, episode=962 reward=0.7718792 (452.11 it/sec) -training >> step=5767500, episode=962 reward=0.7934724 (487.28 it/sec) -training >> step=5767600, episode=962 reward=0.7872508 (516.59 it/sec) -training >> step=5767700, episode=962 reward=0.7822573 (489.12 it/sec) -training >> step=5767800, episode=962 reward=0.7819809 (499.56 it/sec) -training >> step=5767900, episode=962 reward=0.7730738 (522.97 it/sec) -training >> step=5768000, episode=962 reward=0.7941301 (502.81 it/sec) -training >> step=5768100, episode=962 reward=0.7886522 (494.58 it/sec) -training >> step=5768200, episode=962 reward=0.7730523 (482.17 it/sec) -training >> step=5768300, episode=962 reward=0.7943563 (507.88 it/sec) -training >> step=5768400, episode=962 reward=0.7699028 (457.90 it/sec) -training >> step=5768500, episode=962 reward=0.8050276 (477.75 it/sec) -training >> step=5768600, episode=962 reward=0.7716366 (477.65 it/sec) -training >> step=5768700, episode=962 reward=0.7915227 (572.86 it/sec) -training >> step=5768800, episode=962 reward=0.7996686 (467.97 it/sec) -training >> step=5768900, episode=962 reward=0.8010444 (498.10 it/sec) -training >> step=5769000, episode=962 reward=0.7762686 (457.44 it/sec) -training >> step=5769100, episode=962 reward=0.7767671 (450.90 it/sec) -training >> step=5769200, episode=962 reward=0.765699 (515.17 it/sec) -training >> step=5769300, episode=962 reward=0.7670248 (454.18 it/sec) -training >> step=5769400, episode=962 reward=0.7795425 (484.67 it/sec) -training >> step=5769500, episode=962 reward=0.7705473 (527.39 it/sec) -training >> step=5769600, episode=962 reward=0.7717224 (516.88 it/sec) -training >> step=5769700, episode=962 reward=0.7666412 (518.19 it/sec) -training >> step=5769800, episode=962 reward=0.7675345 (521.17 it/sec) -training >> step=5769900, episode=962 reward=0.7750278 (456.02 it/sec) -training >> step=5770000, episode=962 reward=0.780448 (542.15 it/sec) -training >> step=5770100, episode=962 reward=0.7855927 (489.52 it/sec) -training >> step=5770200, episode=962 reward=0.7795813 (507.27 it/sec) -training >> step=5770300, episode=962 reward=0.7716039 (506.66 it/sec) -training >> step=5770400, episode=962 reward=0.7647099 (447.37 it/sec) -training >> step=5770500, episode=962 reward=0.7676166 (539.38 it/sec) -training >> step=5770600, episode=962 reward=0.7670484 (456.28 it/sec) -training >> step=5770700, episode=962 reward=0.7693503 (297.59 it/sec) -training >> step=5770800, episode=962 reward=0.7607821 (450.80 it/sec) -training >> step=5770900, episode=962 reward=0.7793111 (438.45 it/sec) -training >> step=5771000, episode=962 reward=0.7750951 (386.88 it/sec) -training >> step=5771100, episode=962 reward=0.7780581 (437.27 it/sec) -training >> step=5771200, episode=962 reward=0.7759451 (401.66 it/sec) -training >> step=5771300, episode=963 reward=0.7887402 (62.39 it/sec) -training >> step=5771400, episode=963 reward=0.7831907 (473.63 it/sec) -training >> step=5771500, episode=963 reward=0.7855901 (433.98 it/sec) -training >> step=5771600, episode=963 reward=0.7874161 (455.21 it/sec) -training >> step=5771700, episode=963 reward=0.7792953 (494.66 it/sec) -training >> step=5771800, episode=963 reward=0.7835497 (464.39 it/sec) -training >> step=5771900, episode=963 reward=0.7981704 (554.21 it/sec) -training >> step=5772000, episode=963 reward=0.7715091 (539.32 it/sec) -training >> step=5772100, episode=963 reward=0.7914505 (499.46 it/sec) -training >> step=5772200, episode=963 reward=0.8055245 (502.81 it/sec) -training >> step=5772300, episode=963 reward=0.7863042 (458.44 it/sec) -training >> step=5772400, episode=963 reward=0.7855761 (501.52 it/sec) -training >> step=5772500, episode=963 reward=0.7664294 (392.39 it/sec) -training >> step=5772600, episode=963 reward=0.7846245 (422.84 it/sec) -training >> step=5772700, episode=963 reward=0.796143 (422.08 it/sec) -training >> step=5772800, episode=963 reward=0.7772614 (482.55 it/sec) -training >> step=5772900, episode=963 reward=0.7977653 (524.79 it/sec) -training >> step=5773000, episode=963 reward=0.7798449 (433.59 it/sec) -training >> step=5773100, episode=963 reward=0.7914596 (463.97 it/sec) -training >> step=5773200, episode=963 reward=0.7950284 (462.03 it/sec) -training >> step=5773300, episode=963 reward=0.7886406 (507.40 it/sec) -training >> step=5773400, episode=963 reward=0.7790034 (498.83 it/sec) -training >> step=5773500, episode=963 reward=0.7965504 (465.07 it/sec) -training >> step=5773600, episode=963 reward=0.8101839 (445.04 it/sec) -training >> step=5773700, episode=963 reward=0.7883385 (479.03 it/sec) -training >> step=5773800, episode=963 reward=0.7779182 (458.82 it/sec) -training >> step=5773900, episode=963 reward=0.7955191 (548.98 it/sec) -training >> step=5774000, episode=963 reward=0.7996246 (487.56 it/sec) -training >> step=5774100, episode=963 reward=0.7685178 (538.39 it/sec) -training >> step=5774200, episode=963 reward=0.7892551 (483.63 it/sec) -training >> step=5774300, episode=963 reward=0.7857839 (494.40 it/sec) -training >> step=5774400, episode=963 reward=0.7865606 (484.10 it/sec) -training >> step=5774500, episode=963 reward=0.7855456 (460.77 it/sec) -training >> step=5774600, episode=963 reward=0.7619334 (465.06 it/sec) -training >> step=5774700, episode=963 reward=0.7801958 (390.42 it/sec) -training >> step=5774800, episode=963 reward=0.7913105 (438.19 it/sec) -training >> step=5774900, episode=963 reward=0.7865742 (470.43 it/sec) -training >> step=5775000, episode=963 reward=0.7799379 (450.39 it/sec) -training >> step=5775100, episode=963 reward=0.7891008 (499.52 it/sec) -training >> step=5775200, episode=963 reward=0.7733439 (427.00 it/sec) -training >> step=5775300, episode=963 reward=0.7907211 (500.85 it/sec) -training >> step=5775400, episode=963 reward=0.7941005 (500.63 it/sec) -training >> step=5775500, episode=963 reward=0.7698117 (444.42 it/sec) -training >> step=5775600, episode=963 reward=0.7792419 (458.59 it/sec) -training >> step=5775700, episode=963 reward=0.7978455 (441.13 it/sec) -training >> step=5775800, episode=963 reward=0.7886612 (518.80 it/sec) -training >> step=5775900, episode=963 reward=0.7695613 (421.91 it/sec) -training >> step=5776000, episode=963 reward=0.7833929 (473.76 it/sec) -training >> step=5776100, episode=963 reward=0.7818108 (515.07 it/sec) -training >> step=5776200, episode=963 reward=0.7759348 (519.45 it/sec) -training >> step=5776300, episode=963 reward=0.7881727 (512.61 it/sec) -training >> step=5776400, episode=963 reward=0.7734859 (477.82 it/sec) -training >> step=5776500, episode=963 reward=0.7815159 (483.37 it/sec) -training >> step=5776600, episode=963 reward=0.7579421 (429.41 it/sec) -training >> step=5776700, episode=963 reward=0.7652987 (491.51 it/sec) -training >> step=5776800, episode=963 reward=0.7489135 (462.58 it/sec) -training >> step=5776900, episode=963 reward=0.7389178 (351.07 it/sec) -training >> step=5777000, episode=963 reward=0.7864949 (528.75 it/sec) -training >> step=5777100, episode=963 reward=0.776539 (429.32 it/sec) -training >> step=5777200, episode=963 reward=0.7644245 (505.75 it/sec) -training >> step=5777300, episode=964 reward=0.7676775 (100.50 it/sec) -training >> step=5777400, episode=964 reward=0.7806168 (420.22 it/sec) -training >> step=5777500, episode=964 reward=0.7771092 (424.00 it/sec) -training >> step=5777600, episode=964 reward=0.7815192 (474.40 it/sec) -training >> step=5777700, episode=964 reward=0.7976312 (442.73 it/sec) -training >> step=5777800, episode=964 reward=0.7819788 (451.55 it/sec) -training >> step=5777900, episode=964 reward=0.7631081 (457.60 it/sec) -training >> step=5778000, episode=964 reward=0.7577063 (408.18 it/sec) -training >> step=5778100, episode=964 reward=0.7821147 (439.71 it/sec) -training >> step=5778200, episode=964 reward=0.7696334 (510.81 it/sec) -training >> step=5778300, episode=964 reward=0.7849131 (547.20 it/sec) -training >> step=5778400, episode=964 reward=0.7794557 (471.14 it/sec) -training >> step=5778500, episode=964 reward=0.7781159 (475.41 it/sec) -training >> step=5778600, episode=964 reward=0.793512 (439.07 it/sec) -training >> step=5778700, episode=964 reward=0.7757335 (537.16 it/sec) -training >> step=5778800, episode=964 reward=0.7949272 (437.68 it/sec) -training >> step=5778900, episode=964 reward=0.7900844 (492.92 it/sec) -training >> step=5779000, episode=964 reward=0.7758125 (479.67 it/sec) -training >> step=5779100, episode=964 reward=0.780268 (437.56 it/sec) -training >> step=5779200, episode=964 reward=0.7851002 (501.43 it/sec) -training >> step=5779300, episode=964 reward=0.7967266 (493.39 it/sec) -training >> step=5779400, episode=964 reward=0.7979656 (418.57 it/sec) -training >> step=5779500, episode=964 reward=0.788246 (520.22 it/sec) -training >> step=5779600, episode=964 reward=0.7966051 (511.08 it/sec) -training >> step=5779700, episode=964 reward=0.7875274 (458.72 it/sec) -training >> step=5779800, episode=964 reward=0.7899486 (491.01 it/sec) -training >> step=5779900, episode=964 reward=0.7936075 (464.35 it/sec) -training >> step=5780000, episode=964 reward=0.8011464 (480.01 it/sec) -training >> step=5780100, episode=964 reward=0.8063358 (495.43 it/sec) -training >> step=5780200, episode=964 reward=0.8001516 (487.85 it/sec) -training >> step=5780300, episode=964 reward=0.7901077 (508.99 it/sec) -training >> step=5780400, episode=964 reward=0.8046155 (469.77 it/sec) -training >> step=5780500, episode=964 reward=0.7790407 (512.66 it/sec) -training >> step=5780600, episode=964 reward=0.7888077 (485.12 it/sec) -training >> step=5780700, episode=964 reward=0.7730988 (509.61 it/sec) -training >> step=5780800, episode=964 reward=0.7827778 (485.95 it/sec) -training >> step=5780900, episode=964 reward=0.78085 (460.02 it/sec) -training >> step=5781000, episode=964 reward=0.7804538 (460.98 it/sec) -training >> step=5781100, episode=964 reward=0.7957622 (461.89 it/sec) -training >> step=5781200, episode=964 reward=0.7804884 (508.78 it/sec) -training >> step=5781300, episode=964 reward=0.7795267 (497.38 it/sec) -training >> step=5781400, episode=964 reward=0.7822412 (460.35 it/sec) -training >> step=5781500, episode=964 reward=0.7851522 (515.59 it/sec) -training >> step=5781600, episode=964 reward=0.7879435 (482.00 it/sec) -training >> step=5781700, episode=964 reward=0.7870755 (502.12 it/sec) -training >> step=5781800, episode=964 reward=0.7579561 (515.73 it/sec) -training >> step=5781900, episode=964 reward=0.7837704 (483.06 it/sec) -training >> step=5782000, episode=964 reward=0.7634134 (525.10 it/sec) -training >> step=5782100, episode=964 reward=0.7806795 (489.91 it/sec) -training >> step=5782200, episode=964 reward=0.7671638 (479.66 it/sec) -training >> step=5782300, episode=964 reward=0.7773421 (431.08 it/sec) -training >> step=5782400, episode=964 reward=0.7772064 (456.14 it/sec) -training >> step=5782500, episode=964 reward=0.7665319 (484.94 it/sec) -training >> step=5782600, episode=964 reward=0.7869143 (496.04 it/sec) -training >> step=5782700, episode=964 reward=0.7438446 (490.89 it/sec) -training >> step=5782800, episode=964 reward=0.777277 (480.81 it/sec) -training >> step=5782900, episode=964 reward=0.7661422 (452.66 it/sec) -training >> step=5783000, episode=964 reward=0.775252 (500.10 it/sec) -training >> step=5783100, episode=964 reward=0.7697442 (368.24 it/sec) -training >> step=5783200, episode=964 reward=0.7496951 (499.54 it/sec) -training >> step=5783300, episode=965 reward=0.7683818 (85.68 it/sec) -training >> step=5783400, episode=965 reward=0.7622354 (367.99 it/sec) -training >> step=5783500, episode=965 reward=0.7693176 (302.14 it/sec) -training >> step=5783600, episode=965 reward=0.781975 (404.66 it/sec) -training >> step=5783700, episode=965 reward=0.7840751 (461.14 it/sec) -training >> step=5783800, episode=965 reward=0.7811676 (480.38 it/sec) -training >> step=5783900, episode=965 reward=0.7963395 (497.42 it/sec) -training >> step=5784000, episode=965 reward=0.7965986 (496.63 it/sec) -training >> step=5784100, episode=965 reward=0.788722 (484.64 it/sec) -training >> step=5784200, episode=965 reward=0.7826834 (498.01 it/sec) -training >> step=5784300, episode=965 reward=0.7734283 (478.20 it/sec) -training >> step=5784400, episode=965 reward=0.7767659 (541.88 it/sec) -training >> step=5784500, episode=965 reward=0.7762525 (451.59 it/sec) -training >> step=5784600, episode=965 reward=0.7710642 (460.49 it/sec) -training >> step=5784700, episode=965 reward=0.8136295 (521.79 it/sec) -training >> step=5784800, episode=965 reward=0.8023278 (465.83 it/sec) -training >> step=5784900, episode=965 reward=0.8035194 (484.17 it/sec) -training >> step=5785000, episode=965 reward=0.7901834 (468.05 it/sec) -training >> step=5785100, episode=965 reward=0.7914896 (505.06 it/sec) -training >> step=5785200, episode=965 reward=0.7736403 (450.03 it/sec) -training >> step=5785300, episode=965 reward=0.7978936 (488.74 it/sec) -training >> step=5785400, episode=965 reward=0.7784705 (444.27 it/sec) -training >> step=5785500, episode=965 reward=0.7844163 (454.44 it/sec) -training >> step=5785600, episode=965 reward=0.7957146 (516.39 it/sec) -training >> step=5785700, episode=965 reward=0.7700999 (494.42 it/sec) -training >> step=5785800, episode=965 reward=0.7588927 (466.15 it/sec) -training >> step=5785900, episode=965 reward=0.7997781 (482.83 it/sec) -training >> step=5786000, episode=965 reward=0.7900317 (494.10 it/sec) -training >> step=5786100, episode=965 reward=0.770956 (493.20 it/sec) -training >> step=5786200, episode=965 reward=0.791136 (494.56 it/sec) -training >> step=5786300, episode=965 reward=0.7889492 (508.57 it/sec) -training >> step=5786400, episode=965 reward=0.7753585 (470.10 it/sec) -training >> step=5786500, episode=965 reward=0.7795472 (485.43 it/sec) -training >> step=5786600, episode=965 reward=0.8072942 (491.55 it/sec) -training >> step=5786700, episode=965 reward=0.7904948 (473.63 it/sec) -training >> step=5786800, episode=965 reward=0.815084 (504.04 it/sec) -training >> step=5786900, episode=965 reward=0.7745165 (474.18 it/sec) -training >> step=5787000, episode=965 reward=0.7931068 (430.73 it/sec) -training >> step=5787100, episode=965 reward=0.7623357 (460.46 it/sec) -training >> step=5787200, episode=965 reward=0.7874992 (433.16 it/sec) -training >> step=5787300, episode=965 reward=0.7922999 (453.42 it/sec) -training >> step=5787400, episode=965 reward=0.7773523 (483.68 it/sec) -training >> step=5787500, episode=965 reward=0.8063724 (430.37 it/sec) -training >> step=5787600, episode=965 reward=0.7747334 (505.35 it/sec) -training >> step=5787700, episode=965 reward=0.7830213 (474.52 it/sec) -training >> step=5787800, episode=965 reward=0.780927 (418.19 it/sec) -training >> step=5787900, episode=965 reward=0.7864454 (479.48 it/sec) -training >> step=5788000, episode=965 reward=0.7713091 (460.37 it/sec) -training >> step=5788100, episode=965 reward=0.7776493 (472.77 it/sec) -training >> step=5788200, episode=965 reward=0.7805015 (476.51 it/sec) -training >> step=5788300, episode=965 reward=0.7537273 (490.83 it/sec) -training >> step=5788400, episode=965 reward=0.7759871 (472.13 it/sec) -training >> step=5788500, episode=965 reward=0.7591147 (507.34 it/sec) -training >> step=5788600, episode=965 reward=0.7862288 (448.93 it/sec) -training >> step=5788700, episode=965 reward=0.7450718 (520.28 it/sec) -training >> step=5788800, episode=965 reward=0.7951124 (506.71 it/sec) -training >> step=5788900, episode=965 reward=0.7778143 (475.12 it/sec) -training >> step=5789000, episode=965 reward=0.773923 (489.13 it/sec) -training >> step=5789100, episode=965 reward=0.7639411 (382.80 it/sec) -training >> step=5789200, episode=965 reward=0.763124 (327.54 it/sec) -training >> step=5789300, episode=966 reward=0.7842483 (96.79 it/sec) -training >> step=5789400, episode=966 reward=0.7800932 (500.58 it/sec) -training >> step=5789500, episode=966 reward=0.7887366 (522.47 it/sec) -training >> step=5789600, episode=966 reward=0.7754422 (536.29 it/sec) -training >> step=5789700, episode=966 reward=0.7786247 (451.69 it/sec) -training >> step=5789800, episode=966 reward=0.7629717 (433.96 it/sec) -training >> step=5789900, episode=966 reward=0.7841868 (409.17 it/sec) -training >> step=5790000, episode=966 reward=0.7729302 (391.09 it/sec) -training >> step=5790100, episode=966 reward=0.7819488 (410.75 it/sec) -training >> step=5790200, episode=966 reward=0.7921311 (412.73 it/sec) -training >> step=5790300, episode=966 reward=0.7926909 (404.05 it/sec) -training >> step=5790400, episode=966 reward=0.7903107 (406.55 it/sec) -training >> step=5790500, episode=966 reward=0.7880352 (488.38 it/sec) -training >> step=5790600, episode=966 reward=0.7828188 (465.64 it/sec) -training >> step=5790700, episode=966 reward=0.7722582 (488.56 it/sec) -training >> step=5790800, episode=966 reward=0.7952995 (506.58 it/sec) -training >> step=5790900, episode=966 reward=0.79423 (500.82 it/sec) -training >> step=5791000, episode=966 reward=0.7866281 (448.62 it/sec) -training >> step=5791100, episode=966 reward=0.7710847 (486.27 it/sec) -training >> step=5791200, episode=966 reward=0.7781936 (507.14 it/sec) -training >> step=5791300, episode=966 reward=0.7820753 (517.32 it/sec) -training >> step=5791400, episode=966 reward=0.7709805 (484.04 it/sec) -training >> step=5791500, episode=966 reward=0.7692073 (534.66 it/sec) -training >> step=5791600, episode=966 reward=0.8022807 (491.44 it/sec) -training >> step=5791700, episode=966 reward=0.7897128 (482.00 it/sec) -training >> step=5791800, episode=966 reward=0.7834774 (476.70 it/sec) -training >> step=5791900, episode=966 reward=0.7904574 (431.92 it/sec) -training >> step=5792000, episode=966 reward=0.781837 (395.82 it/sec) -training >> step=5792100, episode=966 reward=0.7887464 (421.40 it/sec) -training >> step=5792200, episode=966 reward=0.7772454 (467.05 it/sec) -training >> step=5792300, episode=966 reward=0.7820183 (519.62 it/sec) -training >> step=5792400, episode=966 reward=0.7678064 (438.10 it/sec) -training >> step=5792500, episode=966 reward=0.7842648 (436.76 it/sec) -training >> step=5792600, episode=966 reward=0.7764323 (492.13 it/sec) -training >> step=5792700, episode=966 reward=0.7676279 (504.71 it/sec) -training >> step=5792800, episode=966 reward=0.7977586 (437.86 it/sec) -training >> step=5792900, episode=966 reward=0.7955168 (514.66 it/sec) -training >> step=5793000, episode=966 reward=0.7830009 (510.12 it/sec) -training >> step=5793100, episode=966 reward=0.7960303 (453.05 it/sec) -training >> step=5793200, episode=966 reward=0.7829196 (457.23 it/sec) -training >> step=5793300, episode=966 reward=0.7936001 (480.95 it/sec) -training >> step=5793400, episode=966 reward=0.8097196 (524.75 it/sec) -training >> step=5793500, episode=966 reward=0.7643318 (463.25 it/sec) -training >> step=5793600, episode=966 reward=0.7343653 (448.46 it/sec) -training >> step=5793700, episode=966 reward=0.7857871 (513.44 it/sec) -training >> step=5793800, episode=966 reward=0.7654529 (488.11 it/sec) -training >> step=5793900, episode=966 reward=0.7868032 (471.69 it/sec) -training >> step=5794000, episode=966 reward=0.7564371 (453.68 it/sec) -training >> step=5794100, episode=966 reward=0.7803002 (496.44 it/sec) -training >> step=5794200, episode=966 reward=0.7616993 (458.77 it/sec) -training >> step=5794300, episode=966 reward=0.7815714 (486.51 it/sec) -training >> step=5794400, episode=966 reward=0.7666197 (447.40 it/sec) -training >> step=5794500, episode=966 reward=0.7833641 (461.45 it/sec) -training >> step=5794600, episode=966 reward=0.7793776 (472.54 it/sec) -training >> step=5794700, episode=966 reward=0.7779183 (512.41 it/sec) -training >> step=5794800, episode=966 reward=0.7587979 (493.33 it/sec) -training >> step=5794900, episode=966 reward=0.7648609 (420.32 it/sec) -training >> step=5795000, episode=966 reward=0.7788854 (419.75 it/sec) -training >> step=5795100, episode=966 reward=0.7856393 (498.06 it/sec) -training >> step=5795200, episode=966 reward=0.7606597 (482.78 it/sec) -training >> step=5795300, episode=967 reward=0.7916583 (49.89 it/sec) -training >> step=5795400, episode=967 reward=0.7783307 (470.44 it/sec) -training >> step=5795500, episode=967 reward=0.7848828 (508.10 it/sec) -training >> step=5795600, episode=967 reward=0.7884973 (489.17 it/sec) -training >> step=5795700, episode=967 reward=0.7801604 (527.01 it/sec) -training >> step=5795800, episode=967 reward=0.7757135 (494.45 it/sec) -training >> step=5795900, episode=967 reward=0.7798263 (474.86 it/sec) -training >> step=5796000, episode=967 reward=0.7876636 (439.55 it/sec) -training >> step=5796100, episode=967 reward=0.7729458 (521.43 it/sec) -training >> step=5796200, episode=967 reward=0.7944618 (531.28 it/sec) -training >> step=5796300, episode=967 reward=0.7974887 (507.38 it/sec) -training >> step=5796400, episode=967 reward=0.7989402 (540.01 it/sec) -training >> step=5796500, episode=967 reward=0.7828971 (524.03 it/sec) -training >> step=5796600, episode=967 reward=0.789915 (507.40 it/sec) -training >> step=5796700, episode=967 reward=0.7989537 (538.92 it/sec) -training >> step=5796800, episode=967 reward=0.7817738 (524.84 it/sec) -training >> step=5796900, episode=967 reward=0.7920328 (537.92 it/sec) -training >> step=5797000, episode=967 reward=0.8048177 (499.02 it/sec) -training >> step=5797100, episode=967 reward=0.7823325 (489.60 it/sec) -training >> step=5797200, episode=967 reward=0.784059 (510.17 it/sec) -training >> step=5797300, episode=967 reward=0.7903749 (526.82 it/sec) -training >> step=5797400, episode=967 reward=0.7920846 (512.04 it/sec) -training >> step=5797500, episode=967 reward=0.7859795 (509.30 it/sec) -training >> step=5797600, episode=967 reward=0.7865843 (543.10 it/sec) -training >> step=5797700, episode=967 reward=0.8015201 (523.28 it/sec) -training >> step=5797800, episode=967 reward=0.7948658 (531.11 it/sec) -training >> step=5797900, episode=967 reward=0.7757376 (488.42 it/sec) -training >> step=5798000, episode=967 reward=0.8042721 (518.31 it/sec) -training >> step=5798100, episode=967 reward=0.7828241 (544.21 it/sec) -training >> step=5798200, episode=967 reward=0.7718614 (475.37 it/sec) -training >> step=5798300, episode=967 reward=0.7910528 (515.24 it/sec) -training >> step=5798400, episode=967 reward=0.7853027 (512.21 it/sec) -training >> step=5798500, episode=967 reward=0.8049757 (533.26 it/sec) -training >> step=5798600, episode=967 reward=0.775057 (512.77 it/sec) -training >> step=5798700, episode=967 reward=0.7969705 (543.14 it/sec) -training >> step=5798800, episode=967 reward=0.7741102 (495.79 it/sec) -training >> step=5798900, episode=967 reward=0.8019614 (515.24 it/sec) -training >> step=5799000, episode=967 reward=0.7961684 (504.88 it/sec) -training >> step=5799100, episode=967 reward=0.7808211 (529.41 it/sec) -training >> step=5799200, episode=967 reward=0.785871 (489.40 it/sec) -training >> step=5799300, episode=967 reward=0.7890769 (470.93 it/sec) -training >> step=5799400, episode=967 reward=0.776938 (524.94 it/sec) -training >> step=5799500, episode=967 reward=0.784973 (498.47 it/sec) -training >> step=5799600, episode=967 reward=0.7793379 (504.78 it/sec) -training >> step=5799700, episode=967 reward=0.7651156 (496.49 it/sec) -training >> step=5799800, episode=967 reward=0.781261 (498.20 it/sec) -training >> step=5799900, episode=967 reward=0.7797347 (487.65 it/sec) -training >> step=5800000, episode=967 reward=0.7929758 (521.50 it/sec) -training >> step=5800100, episode=967 reward=0.7668786 (518.79 it/sec) -training >> step=5800200, episode=967 reward=0.7885866 (503.59 it/sec) -training >> step=5800300, episode=967 reward=0.7875324 (485.56 it/sec) -training >> step=5800400, episode=967 reward=0.7879444 (506.47 it/sec) -training >> step=5800500, episode=967 reward=0.78711 (481.32 it/sec) -training >> step=5800600, episode=967 reward=0.7528934 (521.83 it/sec) -training >> step=5800700, episode=967 reward=0.7682871 (456.56 it/sec) -training >> step=5800800, episode=967 reward=0.7801038 (473.03 it/sec) -training >> step=5800900, episode=967 reward=0.7903641 (507.58 it/sec) -training >> step=5801000, episode=967 reward=0.7744021 (482.22 it/sec) -training >> step=5801100, episode=967 reward=0.7833307 (523.84 it/sec) -training >> step=5801200, episode=967 reward=0.7744762 (535.07 it/sec) -training >> step=5801300, episode=968 reward=0.7704167 (128.49 it/sec) -training >> step=5801400, episode=968 reward=0.7831364 (503.44 it/sec) -training >> step=5801500, episode=968 reward=0.7680355 (516.48 it/sec) -training >> step=5801600, episode=968 reward=0.7997676 (524.12 it/sec) -training >> step=5801700, episode=968 reward=0.786955 (508.53 it/sec) -training >> step=5801800, episode=968 reward=0.7690051 (514.22 it/sec) -training >> step=5801900, episode=968 reward=0.7843447 (537.56 it/sec) -training >> step=5802000, episode=968 reward=0.7747486 (541.56 it/sec) -training >> step=5802100, episode=968 reward=0.7611262 (474.78 it/sec) -training >> step=5802200, episode=968 reward=0.7806152 (503.92 it/sec) -training >> step=5802300, episode=968 reward=0.7825714 (515.00 it/sec) -training >> step=5802400, episode=968 reward=0.7976516 (539.94 it/sec) -training >> step=5802500, episode=968 reward=0.7813758 (499.33 it/sec) -training >> step=5802600, episode=968 reward=0.7750275 (500.10 it/sec) -training >> step=5802700, episode=968 reward=0.7892802 (505.95 it/sec) -training >> step=5802800, episode=968 reward=0.8068182 (474.79 it/sec) -training >> step=5802900, episode=968 reward=0.7908236 (526.37 it/sec) -training >> step=5803000, episode=968 reward=0.8049527 (501.25 it/sec) -training >> step=5803100, episode=968 reward=0.7948633 (520.42 it/sec) -training >> step=5803200, episode=968 reward=0.7651333 (504.24 it/sec) -training >> step=5803300, episode=968 reward=0.7693839 (498.78 it/sec) -training >> step=5803400, episode=968 reward=0.7873908 (567.12 it/sec) -training >> step=5803500, episode=968 reward=0.8031301 (513.95 it/sec) -training >> step=5803600, episode=968 reward=0.7671914 (497.07 it/sec) -training >> step=5803700, episode=968 reward=0.7477703 (488.04 it/sec) -training >> step=5803800, episode=968 reward=0.7721058 (519.61 it/sec) -training >> step=5803900, episode=968 reward=0.7705889 (509.11 it/sec) -training >> step=5804000, episode=968 reward=0.7783273 (490.91 it/sec) -training >> step=5804100, episode=968 reward=0.8019538 (529.10 it/sec) -training >> step=5804200, episode=968 reward=0.7960492 (485.90 it/sec) -training >> step=5804300, episode=968 reward=0.7601871 (482.30 it/sec) -training >> step=5804400, episode=968 reward=0.7920168 (467.00 it/sec) -training >> step=5804500, episode=968 reward=0.7919643 (472.95 it/sec) -training >> step=5804600, episode=968 reward=0.7956945 (463.82 it/sec) -training >> step=5804700, episode=968 reward=0.7946878 (498.70 it/sec) -training >> step=5804800, episode=968 reward=0.7938238 (472.56 it/sec) -training >> step=5804900, episode=968 reward=0.7684913 (520.63 it/sec) -training >> step=5805000, episode=968 reward=0.7626464 (479.16 it/sec) -training >> step=5805100, episode=968 reward=0.8042591 (442.84 it/sec) -training >> step=5805200, episode=968 reward=0.7906718 (515.70 it/sec) -training >> step=5805300, episode=968 reward=0.7763471 (477.91 it/sec) -training >> step=5805400, episode=968 reward=0.7842285 (471.82 it/sec) -training >> step=5805500, episode=968 reward=0.7929881 (477.11 it/sec) -training >> step=5805600, episode=968 reward=0.7834214 (530.00 it/sec) -training >> step=5805700, episode=968 reward=0.7917477 (482.72 it/sec) -training >> step=5805800, episode=968 reward=0.7770191 (468.35 it/sec) -training >> step=5805900, episode=968 reward=0.7781845 (465.86 it/sec) -training >> step=5806000, episode=968 reward=0.7986506 (532.92 it/sec) -training >> step=5806100, episode=968 reward=0.7770718 (509.77 it/sec) -training >> step=5806200, episode=968 reward=0.7786595 (430.93 it/sec) -training >> step=5806300, episode=968 reward=0.7782694 (520.49 it/sec) -training >> step=5806400, episode=968 reward=0.7545986 (472.79 it/sec) -training >> step=5806500, episode=968 reward=0.7593601 (487.90 it/sec) -training >> step=5806600, episode=968 reward=0.7616765 (414.61 it/sec) -training >> step=5806700, episode=968 reward=0.7752269 (474.88 it/sec) -training >> step=5806800, episode=968 reward=0.7465026 (478.47 it/sec) -training >> step=5806900, episode=968 reward=0.742087 (448.29 it/sec) -training >> step=5807000, episode=968 reward=0.7731915 (524.11 it/sec) -training >> step=5807100, episode=968 reward=0.7718487 (500.45 it/sec) -training >> step=5807200, episode=968 reward=0.7647905 (472.25 it/sec) -training >> step=5807300, episode=969 reward=0.7621912 (118.52 it/sec) -training >> step=5807400, episode=969 reward=0.7826527 (433.67 it/sec) -training >> step=5807500, episode=969 reward=0.7780697 (471.71 it/sec) -training >> step=5807600, episode=969 reward=0.7895317 (508.60 it/sec) -training >> step=5807700, episode=969 reward=0.7789044 (485.90 it/sec) -training >> step=5807800, episode=969 reward=0.7874346 (412.73 it/sec) -training >> step=5807900, episode=969 reward=0.7779361 (525.69 it/sec) -training >> step=5808000, episode=969 reward=0.7912114 (475.80 it/sec) -training >> step=5808100, episode=969 reward=0.7841205 (480.15 it/sec) -training >> step=5808200, episode=969 reward=0.7740309 (488.75 it/sec) -training >> step=5808300, episode=969 reward=0.7794219 (479.54 it/sec) -training >> step=5808400, episode=969 reward=0.7857698 (501.22 it/sec) -training >> step=5808500, episode=969 reward=0.7665362 (517.25 it/sec) -training >> step=5808600, episode=969 reward=0.7820903 (505.15 it/sec) -training >> step=5808700, episode=969 reward=0.8042667 (486.17 it/sec) -training >> step=5808800, episode=969 reward=0.7795113 (479.34 it/sec) -training >> step=5808900, episode=969 reward=0.7884454 (449.39 it/sec) -training >> step=5809000, episode=969 reward=0.8013153 (458.89 it/sec) -training >> step=5809100, episode=969 reward=0.788749 (473.13 it/sec) -training >> step=5809200, episode=969 reward=0.8128673 (490.07 it/sec) -training >> step=5809300, episode=969 reward=0.7834589 (442.75 it/sec) -training >> step=5809400, episode=969 reward=0.7742457 (460.08 it/sec) -training >> step=5809500, episode=969 reward=0.7923785 (456.84 it/sec) -training >> step=5809600, episode=969 reward=0.7642183 (472.46 it/sec) -training >> step=5809700, episode=969 reward=0.7910314 (447.62 it/sec) -training >> step=5809800, episode=969 reward=0.7719024 (459.90 it/sec) -training >> step=5809900, episode=969 reward=0.7835277 (489.78 it/sec) -training >> step=5810000, episode=969 reward=0.7860377 (488.90 it/sec) -training >> step=5810100, episode=969 reward=0.7793538 (429.37 it/sec) -training >> step=5810200, episode=969 reward=0.7778951 (448.40 it/sec) -training >> step=5810300, episode=969 reward=0.7855114 (499.26 it/sec) -training >> step=5810400, episode=969 reward=0.7949951 (465.59 it/sec) -training >> step=5810500, episode=969 reward=0.7978886 (443.92 it/sec) -training >> step=5810600, episode=969 reward=0.7879604 (480.17 it/sec) -training >> step=5810700, episode=969 reward=0.7839473 (458.51 it/sec) -training >> step=5810800, episode=969 reward=0.7989734 (408.87 it/sec) -training >> step=5810900, episode=969 reward=0.7948608 (480.36 it/sec) -training >> step=5811000, episode=969 reward=0.7882686 (510.44 it/sec) -training >> step=5811100, episode=969 reward=0.7965012 (467.81 it/sec) -training >> step=5811200, episode=969 reward=0.7881135 (445.03 it/sec) -training >> step=5811300, episode=969 reward=0.7547693 (477.58 it/sec) -training >> step=5811400, episode=969 reward=0.796244 (508.46 it/sec) -training >> step=5811500, episode=969 reward=0.7797349 (471.03 it/sec) -training >> step=5811600, episode=969 reward=0.7794104 (504.15 it/sec) -training >> step=5811700, episode=969 reward=0.7683827 (475.46 it/sec) -training >> step=5811800, episode=969 reward=0.7533432 (535.18 it/sec) -training >> step=5811900, episode=969 reward=0.7965588 (511.35 it/sec) -training >> step=5812000, episode=969 reward=0.7640164 (520.09 it/sec) -training >> step=5812100, episode=969 reward=0.7839674 (527.15 it/sec) -training >> step=5812200, episode=969 reward=0.776324 (460.60 it/sec) -training >> step=5812300, episode=969 reward=0.7701707 (501.38 it/sec) -training >> step=5812400, episode=969 reward=0.7724817 (494.19 it/sec) -training >> step=5812500, episode=969 reward=0.758517 (539.93 it/sec) -training >> step=5812600, episode=969 reward=0.7518871 (509.88 it/sec) -training >> step=5812700, episode=969 reward=0.7789682 (493.74 it/sec) -training >> step=5812800, episode=969 reward=0.760847 (539.59 it/sec) -training >> step=5812900, episode=969 reward=0.7653611 (524.26 it/sec) -training >> step=5813000, episode=969 reward=0.7544317 (520.91 it/sec) -training >> step=5813100, episode=969 reward=0.7697778 (517.01 it/sec) -training >> step=5813200, episode=969 reward=0.7596446 (550.91 it/sec) -training >> step=5813300, episode=970 reward=0.7968854 (74.15 it/sec) -training >> step=5813400, episode=970 reward=0.7777096 (499.56 it/sec) -training >> step=5813500, episode=970 reward=0.7961172 (527.75 it/sec) -training >> step=5813600, episode=970 reward=0.7739605 (510.58 it/sec) -training >> step=5813700, episode=970 reward=0.7713004 (482.24 it/sec) -training >> step=5813800, episode=970 reward=0.7802866 (528.44 it/sec) -training >> step=5813900, episode=970 reward=0.8180021 (474.74 it/sec) -training >> step=5814000, episode=970 reward=0.7971677 (527.88 it/sec) -training >> step=5814100, episode=970 reward=0.7697597 (495.30 it/sec) -training >> step=5814200, episode=970 reward=0.796177 (497.10 it/sec) -training >> step=5814300, episode=970 reward=0.7753071 (523.36 it/sec) -training >> step=5814400, episode=970 reward=0.7807075 (536.06 it/sec) -training >> step=5814500, episode=970 reward=0.7831759 (485.89 it/sec) -training >> step=5814600, episode=970 reward=0.7594101 (539.44 it/sec) -training >> step=5814700, episode=970 reward=0.7856314 (530.22 it/sec) -training >> step=5814800, episode=970 reward=0.8011221 (527.16 it/sec) -training >> step=5814900, episode=970 reward=0.7922222 (521.36 it/sec) -training >> step=5815000, episode=970 reward=0.7835132 (475.44 it/sec) -training >> step=5815100, episode=970 reward=0.7813531 (513.73 it/sec) -training >> step=5815200, episode=970 reward=0.7942445 (446.89 it/sec) -training >> step=5815300, episode=970 reward=0.7986481 (407.05 it/sec) -training >> step=5815400, episode=970 reward=0.7800515 (476.38 it/sec) -training >> step=5815500, episode=970 reward=0.7899346 (444.95 it/sec) -training >> step=5815600, episode=970 reward=0.7870243 (490.30 it/sec) -training >> step=5815700, episode=970 reward=0.7781129 (384.99 it/sec) -training >> step=5815800, episode=970 reward=0.7731361 (443.12 it/sec) -training >> step=5815900, episode=970 reward=0.7812536 (430.58 it/sec) -training >> step=5816000, episode=970 reward=0.809124 (477.24 it/sec) -training >> step=5816100, episode=970 reward=0.7959319 (470.32 it/sec) -training >> step=5816200, episode=970 reward=0.7959973 (416.72 it/sec) -training >> step=5816300, episode=970 reward=0.7977276 (441.46 it/sec) -training >> step=5816400, episode=970 reward=0.7795368 (495.97 it/sec) -training >> step=5816500, episode=970 reward=0.7673831 (522.54 it/sec) -training >> step=5816600, episode=970 reward=0.7952309 (506.20 it/sec) -training >> step=5816700, episode=970 reward=0.8007382 (454.05 it/sec) -training >> step=5816800, episode=970 reward=0.7963865 (482.62 it/sec) -training >> step=5816900, episode=970 reward=0.7804523 (493.26 it/sec) -training >> step=5817000, episode=970 reward=0.7825103 (465.86 it/sec) -training >> step=5817100, episode=970 reward=0.7902202 (490.57 it/sec) -training >> step=5817200, episode=970 reward=0.7753338 (452.75 it/sec) -training >> step=5817300, episode=970 reward=0.7765194 (454.74 it/sec) -training >> step=5817400, episode=970 reward=0.7868544 (502.61 it/sec) -training >> step=5817500, episode=970 reward=0.8065386 (491.57 it/sec) -training >> step=5817600, episode=970 reward=0.7765269 (478.56 it/sec) -training >> step=5817700, episode=970 reward=0.7900555 (441.80 it/sec) -training >> step=5817800, episode=970 reward=0.7981991 (481.89 it/sec) -training >> step=5817900, episode=970 reward=0.7962811 (486.07 it/sec) -training >> step=5818000, episode=970 reward=0.7746943 (451.71 it/sec) -training >> step=5818100, episode=970 reward=0.7770861 (420.80 it/sec) -training >> step=5818200, episode=970 reward=0.7806231 (438.43 it/sec) -training >> step=5818300, episode=970 reward=0.7743317 (472.05 it/sec) -training >> step=5818400, episode=970 reward=0.7617754 (491.43 it/sec) -training >> step=5818500, episode=970 reward=0.7599384 (516.73 it/sec) -training >> step=5818600, episode=970 reward=0.7576793 (478.73 it/sec) -training >> step=5818700, episode=970 reward=0.7830905 (508.67 it/sec) -training >> step=5818800, episode=970 reward=0.7710665 (487.27 it/sec) -training >> step=5818900, episode=970 reward=0.7393794 (466.38 it/sec) -training >> step=5819000, episode=970 reward=0.7560751 (469.63 it/sec) -training >> step=5819100, episode=970 reward=0.772023 (503.68 it/sec) -training >> step=5819200, episode=970 reward=0.7565866 (450.57 it/sec) -training >> step=5819300, episode=971 reward=0.7836356 (55.90 it/sec) -training >> step=5819400, episode=971 reward=0.7870904 (459.38 it/sec) -training >> step=5819500, episode=971 reward=0.8049033 (456.42 it/sec) -training >> step=5819600, episode=971 reward=0.7769567 (502.93 it/sec) -training >> step=5819700, episode=971 reward=0.7756013 (552.46 it/sec) -training >> step=5819800, episode=971 reward=0.7536777 (527.75 it/sec) -training >> step=5819900, episode=971 reward=0.7888793 (474.56 it/sec) -training >> step=5820000, episode=971 reward=0.7915245 (547.98 it/sec) -training >> step=5820100, episode=971 reward=0.7701638 (503.77 it/sec) -training >> step=5820200, episode=971 reward=0.7732137 (477.42 it/sec) -training >> step=5820300, episode=971 reward=0.7757909 (524.14 it/sec) -training >> step=5820400, episode=971 reward=0.7949455 (518.87 it/sec) -training >> step=5820500, episode=971 reward=0.7847214 (481.35 it/sec) -training >> step=5820600, episode=971 reward=0.7823073 (467.46 it/sec) -training >> step=5820700, episode=971 reward=0.7819573 (532.14 it/sec) -training >> step=5820800, episode=971 reward=0.7798349 (504.52 it/sec) -training >> step=5820900, episode=971 reward=0.7884673 (511.78 it/sec) -training >> step=5821000, episode=971 reward=0.7854246 (472.09 it/sec) -training >> step=5821100, episode=971 reward=0.7763333 (499.26 it/sec) -training >> step=5821200, episode=971 reward=0.7909172 (537.96 it/sec) -training >> step=5821300, episode=971 reward=0.7731941 (476.73 it/sec) -training >> step=5821400, episode=971 reward=0.7687207 (530.25 it/sec) -training >> step=5821500, episode=971 reward=0.8047146 (467.81 it/sec) -training >> step=5821600, episode=971 reward=0.7701442 (475.92 it/sec) -training >> step=5821700, episode=971 reward=0.7966659 (533.17 it/sec) -training >> step=5821800, episode=971 reward=0.7629902 (525.34 it/sec) -training >> step=5821900, episode=971 reward=0.8008471 (517.03 it/sec) -training >> step=5822000, episode=971 reward=0.7870365 (523.45 it/sec) -training >> step=5822100, episode=971 reward=0.7683389 (549.52 it/sec) -training >> step=5822200, episode=971 reward=0.7848877 (472.56 it/sec) -training >> step=5822300, episode=971 reward=0.7802365 (476.97 it/sec) -training >> step=5822400, episode=971 reward=0.786105 (501.16 it/sec) -training >> step=5822500, episode=971 reward=0.7655501 (518.50 it/sec) -training >> step=5822600, episode=971 reward=0.7954227 (494.93 it/sec) -training >> step=5822700, episode=971 reward=0.8102439 (505.25 it/sec) -training >> step=5822800, episode=971 reward=0.7783165 (470.91 it/sec) -training >> step=5822900, episode=971 reward=0.7844025 (528.10 it/sec) -training >> step=5823000, episode=971 reward=0.8040354 (520.93 it/sec) -training >> step=5823100, episode=971 reward=0.764079 (511.11 it/sec) -training >> step=5823200, episode=971 reward=0.7729189 (481.58 it/sec) -training >> step=5823300, episode=971 reward=0.776032 (510.96 it/sec) -training >> step=5823400, episode=971 reward=0.7765609 (463.54 it/sec) -training >> step=5823500, episode=971 reward=0.8033673 (466.10 it/sec) -training >> step=5823600, episode=971 reward=0.7704428 (493.48 it/sec) -training >> step=5823700, episode=971 reward=0.78798 (491.60 it/sec) -training >> step=5823800, episode=971 reward=0.7898487 (484.41 it/sec) -training >> step=5823900, episode=971 reward=0.7761368 (572.73 it/sec) -training >> step=5824000, episode=971 reward=0.7858279 (499.60 it/sec) -training >> step=5824100, episode=971 reward=0.7642643 (508.30 it/sec) -training >> step=5824200, episode=971 reward=0.777115 (467.46 it/sec) -training >> step=5824300, episode=971 reward=0.7777175 (472.52 it/sec) -training >> step=5824400, episode=971 reward=0.7630097 (507.99 it/sec) -training >> step=5824500, episode=971 reward=0.7623887 (493.35 it/sec) -training >> step=5824600, episode=971 reward=0.7636777 (471.78 it/sec) -training >> step=5824700, episode=971 reward=0.7654358 (525.44 it/sec) -training >> step=5824800, episode=971 reward=0.7609539 (508.54 it/sec) -training >> step=5824900, episode=971 reward=0.7694479 (492.85 it/sec) -training >> step=5825000, episode=971 reward=0.7800304 (459.11 it/sec) -training >> step=5825100, episode=971 reward=0.7736169 (459.38 it/sec) -training >> step=5825200, episode=971 reward=0.7635472 (494.80 it/sec) -training >> step=5825300, episode=972 reward=0.7750086 (63.12 it/sec) -training >> step=5825400, episode=972 reward=0.7849607 (424.72 it/sec) -training >> step=5825500, episode=972 reward=0.7882809 (474.86 it/sec) -training >> step=5825600, episode=972 reward=0.7634445 (473.33 it/sec) -training >> step=5825700, episode=972 reward=0.7975584 (484.13 it/sec) -training >> step=5825800, episode=972 reward=0.7874881 (488.43 it/sec) -training >> step=5825900, episode=972 reward=0.7857338 (468.63 it/sec) -training >> step=5826000, episode=972 reward=0.7919483 (455.77 it/sec) -training >> step=5826100, episode=972 reward=0.7582054 (480.71 it/sec) -training >> step=5826200, episode=972 reward=0.7972744 (510.67 it/sec) -training >> step=5826300, episode=972 reward=0.8032861 (479.50 it/sec) -training >> step=5826400, episode=972 reward=0.7649941 (489.69 it/sec) -training >> step=5826500, episode=972 reward=0.7945495 (460.53 it/sec) -training >> step=5826600, episode=972 reward=0.7985303 (430.29 it/sec) -training >> step=5826700, episode=972 reward=0.7852012 (461.48 it/sec) -training >> step=5826800, episode=972 reward=0.7860391 (468.52 it/sec) -training >> step=5826900, episode=972 reward=0.7815567 (444.81 it/sec) -training >> step=5827000, episode=972 reward=0.7859445 (433.59 it/sec) -training >> step=5827100, episode=972 reward=0.7726273 (474.93 it/sec) -training >> step=5827200, episode=972 reward=0.7816889 (497.39 it/sec) -training >> step=5827300, episode=972 reward=0.7736745 (493.50 it/sec) -training >> step=5827400, episode=972 reward=0.7712997 (439.92 it/sec) -training >> step=5827500, episode=972 reward=0.7858728 (482.20 it/sec) -training >> step=5827600, episode=972 reward=0.7788023 (508.93 it/sec) -training >> step=5827700, episode=972 reward=0.782968 (449.32 it/sec) -training >> step=5827800, episode=972 reward=0.7989374 (491.50 it/sec) -training >> step=5827900, episode=972 reward=0.7928153 (466.26 it/sec) -training >> step=5828000, episode=972 reward=0.7701248 (440.78 it/sec) -training >> step=5828100, episode=972 reward=0.7789268 (473.80 it/sec) -training >> step=5828200, episode=972 reward=0.7821791 (456.21 it/sec) -training >> step=5828300, episode=972 reward=0.7847373 (489.12 it/sec) -training >> step=5828400, episode=972 reward=0.7799491 (456.69 it/sec) -training >> step=5828500, episode=972 reward=0.7818484 (495.20 it/sec) -training >> step=5828600, episode=972 reward=0.7888229 (465.27 it/sec) -training >> step=5828700, episode=972 reward=0.7551275 (464.63 it/sec) -training >> step=5828800, episode=972 reward=0.7846998 (482.40 it/sec) -training >> step=5828900, episode=972 reward=0.7948343 (482.13 it/sec) -training >> step=5829000, episode=972 reward=0.7942321 (509.91 it/sec) -training >> step=5829100, episode=972 reward=0.7943671 (495.79 it/sec) -training >> step=5829200, episode=972 reward=0.7912303 (448.01 it/sec) -training >> step=5829300, episode=972 reward=0.7611228 (471.96 it/sec) -training >> step=5829400, episode=972 reward=0.7961081 (479.84 it/sec) -training >> step=5829500, episode=972 reward=0.8045897 (503.59 it/sec) -training >> step=5829600, episode=972 reward=0.7805535 (470.42 it/sec) -training >> step=5829700, episode=972 reward=0.7914084 (480.09 it/sec) -training >> step=5829800, episode=972 reward=0.771773 (431.22 it/sec) -training >> step=5829900, episode=972 reward=0.7651656 (466.41 it/sec) -training >> step=5830000, episode=972 reward=0.7830097 (482.80 it/sec) -training >> step=5830100, episode=972 reward=0.7792394 (523.51 it/sec) -training >> step=5830200, episode=972 reward=0.7850314 (466.70 it/sec) -training >> step=5830300, episode=972 reward=0.7515888 (415.20 it/sec) -training >> step=5830400, episode=972 reward=0.7790427 (488.04 it/sec) -training >> step=5830500, episode=972 reward=0.7774062 (469.91 it/sec) -training >> step=5830600, episode=972 reward=0.7454008 (501.68 it/sec) -training >> step=5830700, episode=972 reward=0.7754423 (489.64 it/sec) -training >> step=5830800, episode=972 reward=0.7699611 (503.35 it/sec) -training >> step=5830900, episode=972 reward=0.7756332 (462.35 it/sec) -training >> step=5831000, episode=972 reward=0.775004 (499.76 it/sec) -training >> step=5831100, episode=972 reward=0.7485645 (494.19 it/sec) -training >> step=5831200, episode=972 reward=0.79249 (483.35 it/sec) -training >> step=5831300, episode=973 reward=0.7732984 (129.18 it/sec) -training >> step=5831400, episode=973 reward=0.7642443 (481.44 it/sec) -training >> step=5831500, episode=973 reward=0.7873375 (449.86 it/sec) -training >> step=5831600, episode=973 reward=0.778468 (466.21 it/sec) -training >> step=5831700, episode=973 reward=0.7883283 (450.23 it/sec) -training >> step=5831800, episode=973 reward=0.7943076 (445.83 it/sec) -training >> step=5831900, episode=973 reward=0.7833853 (495.18 it/sec) -training >> step=5832000, episode=973 reward=0.7749366 (480.84 it/sec) -training >> step=5832100, episode=973 reward=0.7963119 (440.44 it/sec) -training >> step=5832200, episode=973 reward=0.7849336 (480.39 it/sec) -training >> step=5832300, episode=973 reward=0.8044595 (493.32 it/sec) -training >> step=5832400, episode=973 reward=0.8003535 (430.68 it/sec) -training >> step=5832500, episode=973 reward=0.7827857 (468.09 it/sec) -training >> step=5832600, episode=973 reward=0.7871628 (494.07 it/sec) -training >> step=5832700, episode=973 reward=0.791791 (498.26 it/sec) -training >> step=5832800, episode=973 reward=0.7745391 (521.44 it/sec) -training >> step=5832900, episode=973 reward=0.770768 (514.67 it/sec) -training >> step=5833000, episode=973 reward=0.787894 (475.52 it/sec) -training >> step=5833100, episode=973 reward=0.7869067 (495.63 it/sec) -training >> step=5833200, episode=973 reward=0.7825691 (475.84 it/sec) -training >> step=5833300, episode=973 reward=0.8065005 (494.23 it/sec) -training >> step=5833400, episode=973 reward=0.782227 (492.08 it/sec) -training >> step=5833500, episode=973 reward=0.7787525 (475.59 it/sec) -training >> step=5833600, episode=973 reward=0.8048347 (506.26 it/sec) -training >> step=5833700, episode=973 reward=0.787562 (502.81 it/sec) -training >> step=5833800, episode=973 reward=0.7870705 (523.90 it/sec) -training >> step=5833900, episode=973 reward=0.7897135 (493.60 it/sec) -training >> step=5834000, episode=973 reward=0.7599704 (510.62 it/sec) -training >> step=5834100, episode=973 reward=0.7875757 (503.72 it/sec) -training >> step=5834200, episode=973 reward=0.7936695 (526.55 it/sec) -training >> step=5834300, episode=973 reward=0.7843969 (491.77 it/sec) -training >> step=5834400, episode=973 reward=0.7938282 (510.02 it/sec) -training >> step=5834500, episode=973 reward=0.7806337 (511.12 it/sec) -training >> step=5834600, episode=973 reward=0.7882549 (480.79 it/sec) -training >> step=5834700, episode=973 reward=0.7961064 (515.54 it/sec) -training >> step=5834800, episode=973 reward=0.7735507 (490.84 it/sec) -training >> step=5834900, episode=973 reward=0.7840875 (519.93 it/sec) -training >> step=5835000, episode=973 reward=0.7819734 (495.87 it/sec) -training >> step=5835100, episode=973 reward=0.776068 (526.08 it/sec) -training >> step=5835200, episode=973 reward=0.7776753 (536.02 it/sec) -training >> step=5835300, episode=973 reward=0.7816429 (480.68 it/sec) -training >> step=5835400, episode=973 reward=0.7816996 (516.42 it/sec) -training >> step=5835500, episode=973 reward=0.7857571 (499.70 it/sec) -training >> step=5835600, episode=973 reward=0.7794569 (529.94 it/sec) -training >> step=5835700, episode=973 reward=0.7765504 (530.05 it/sec) -training >> step=5835800, episode=973 reward=0.779827 (582.76 it/sec) -training >> step=5835900, episode=973 reward=0.7749068 (464.86 it/sec) -training >> step=5836000, episode=973 reward=0.7495744 (450.25 it/sec) -training >> step=5836100, episode=973 reward=0.779098 (525.61 it/sec) -training >> step=5836200, episode=973 reward=0.764234 (473.38 it/sec) -training >> step=5836300, episode=973 reward=0.7803108 (503.40 it/sec) -training >> step=5836400, episode=973 reward=0.7477484 (500.71 it/sec) -training >> step=5836500, episode=973 reward=0.7859686 (446.31 it/sec) -training >> step=5836600, episode=973 reward=0.7719517 (521.12 it/sec) -training >> step=5836700, episode=973 reward=0.7601438 (498.24 it/sec) -training >> step=5836800, episode=973 reward=0.7874464 (501.79 it/sec) -training >> step=5836900, episode=973 reward=0.7533438 (543.50 it/sec) -training >> step=5837000, episode=973 reward=0.763173 (487.04 it/sec) -training >> step=5837100, episode=973 reward=0.7714194 (493.78 it/sec) -training >> step=5837200, episode=973 reward=0.7778913 (530.44 it/sec) -training >> step=5837300, episode=974 reward=0.7659912 (57.65 it/sec) -training >> step=5837400, episode=974 reward=0.7650362 (482.85 it/sec) -training >> step=5837500, episode=974 reward=0.7620414 (448.97 it/sec) -training >> step=5837600, episode=974 reward=0.7732579 (466.16 it/sec) -training >> step=5837700, episode=974 reward=0.7729073 (440.81 it/sec) -training >> step=5837800, episode=974 reward=0.7883501 (446.67 it/sec) -training >> step=5837900, episode=974 reward=0.7985925 (478.73 it/sec) -training >> step=5838000, episode=974 reward=0.7941547 (458.64 it/sec) -training >> step=5838100, episode=974 reward=0.7899629 (500.80 it/sec) -training >> step=5838200, episode=974 reward=0.7972593 (478.11 it/sec) -training >> step=5838300, episode=974 reward=0.7998701 (457.30 it/sec) -training >> step=5838400, episode=974 reward=0.7853711 (463.63 it/sec) -training >> step=5838500, episode=974 reward=0.7764516 (501.35 it/sec) -training >> step=5838600, episode=974 reward=0.786558 (518.06 it/sec) -training >> step=5838700, episode=974 reward=0.7974686 (461.23 it/sec) -training >> step=5838800, episode=974 reward=0.7954059 (451.52 it/sec) -training >> step=5838900, episode=974 reward=0.7692477 (474.84 it/sec) -training >> step=5839000, episode=974 reward=0.7865496 (530.36 it/sec) -training >> step=5839100, episode=974 reward=0.7897965 (507.10 it/sec) -training >> step=5839200, episode=974 reward=0.7778327 (480.20 it/sec) -training >> step=5839300, episode=974 reward=0.7938158 (480.28 it/sec) -training >> step=5839400, episode=974 reward=0.7867506 (506.57 it/sec) -training >> step=5839500, episode=974 reward=0.7994896 (479.38 it/sec) -training >> step=5839600, episode=974 reward=0.7704949 (525.91 it/sec) -training >> step=5839700, episode=974 reward=0.7802387 (467.41 it/sec) -training >> step=5839800, episode=974 reward=0.7708592 (483.16 it/sec) -training >> step=5839900, episode=974 reward=0.779917 (491.82 it/sec) -training >> step=5840000, episode=974 reward=0.7889037 (498.94 it/sec) -training >> step=5840100, episode=974 reward=0.7995328 (448.39 it/sec) -training >> step=5840200, episode=974 reward=0.7953361 (485.81 it/sec) -training >> step=5840300, episode=974 reward=0.7892917 (477.41 it/sec) -training >> step=5840400, episode=974 reward=0.7819927 (477.54 it/sec) -training >> step=5840500, episode=974 reward=0.7993407 (476.77 it/sec) -training >> step=5840600, episode=974 reward=0.794281 (494.79 it/sec) -training >> step=5840700, episode=974 reward=0.7931038 (451.39 it/sec) -training >> step=5840800, episode=974 reward=0.7677767 (495.42 it/sec) -training >> step=5840900, episode=974 reward=0.7766765 (473.94 it/sec) -training >> step=5841000, episode=974 reward=0.7935843 (509.19 it/sec) -training >> step=5841100, episode=974 reward=0.7804287 (516.70 it/sec) -training >> step=5841200, episode=974 reward=0.7717193 (444.55 it/sec) -training >> step=5841300, episode=974 reward=0.7795947 (459.54 it/sec) -training >> step=5841400, episode=974 reward=0.7751809 (481.44 it/sec) -training >> step=5841500, episode=974 reward=0.7785663 (493.37 it/sec) -training >> step=5841600, episode=974 reward=0.7850588 (438.53 it/sec) -training >> step=5841700, episode=974 reward=0.7856266 (460.14 it/sec) -training >> step=5841800, episode=974 reward=0.767452 (488.58 it/sec) -training >> step=5841900, episode=974 reward=0.7582736 (477.19 it/sec) -training >> step=5842000, episode=974 reward=0.7761824 (500.58 it/sec) -training >> step=5842100, episode=974 reward=0.7756091 (488.18 it/sec) -training >> step=5842200, episode=974 reward=0.7567954 (461.12 it/sec) -training >> step=5842300, episode=974 reward=0.7639121 (486.68 it/sec) -training >> step=5842400, episode=974 reward=0.7656066 (480.91 it/sec) -training >> step=5842500, episode=974 reward=0.7813346 (503.98 it/sec) -training >> step=5842600, episode=974 reward=0.7771417 (481.64 it/sec) -training >> step=5842700, episode=974 reward=0.7607052 (473.91 it/sec) -training >> step=5842800, episode=974 reward=0.7526951 (470.03 it/sec) -training >> step=5842900, episode=974 reward=0.7634746 (462.76 it/sec) -training >> step=5843000, episode=974 reward=0.7423809 (510.61 it/sec) -training >> step=5843100, episode=974 reward=0.7722923 (474.42 it/sec) -training >> step=5843200, episode=974 reward=0.7736125 (469.66 it/sec) -training >> step=5843300, episode=975 reward=0.7686384 (74.19 it/sec) -training >> step=5843400, episode=975 reward=0.775309 (471.49 it/sec) -training >> step=5843500, episode=975 reward=0.7865801 (509.42 it/sec) -training >> step=5843600, episode=975 reward=0.79768 (445.54 it/sec) -training >> step=5843700, episode=975 reward=0.7627113 (460.83 it/sec) -training >> step=5843800, episode=975 reward=0.7832058 (520.80 it/sec) -training >> step=5843900, episode=975 reward=0.7847816 (531.35 it/sec) -training >> step=5844000, episode=975 reward=0.7799278 (519.81 it/sec) -training >> step=5844100, episode=975 reward=0.772105 (467.64 it/sec) -training >> step=5844200, episode=975 reward=0.7809257 (494.30 it/sec) -training >> step=5844300, episode=975 reward=0.7857397 (455.28 it/sec) -training >> step=5844400, episode=975 reward=0.779841 (502.25 it/sec) -training >> step=5844500, episode=975 reward=0.797976 (551.69 it/sec) -training >> step=5844600, episode=975 reward=0.7878183 (452.54 it/sec) -training >> step=5844700, episode=975 reward=0.7874231 (531.58 it/sec) -training >> step=5844800, episode=975 reward=0.7923657 (493.22 it/sec) -training >> step=5844900, episode=975 reward=0.783371 (431.13 it/sec) -training >> step=5845000, episode=975 reward=0.7851509 (525.74 it/sec) -training >> step=5845100, episode=975 reward=0.7656056 (530.29 it/sec) -training >> step=5845200, episode=975 reward=0.7842473 (468.61 it/sec) -training >> step=5845300, episode=975 reward=0.7851087 (464.27 it/sec) -training >> step=5845400, episode=975 reward=0.7806862 (468.28 it/sec) -training >> step=5845500, episode=975 reward=0.7706032 (488.03 it/sec) -training >> step=5845600, episode=975 reward=0.7971882 (502.55 it/sec) -training >> step=5845700, episode=975 reward=0.7761685 (520.77 it/sec) -training >> step=5845800, episode=975 reward=0.7690743 (533.66 it/sec) -training >> step=5845900, episode=975 reward=0.7834252 (550.84 it/sec) -training >> step=5846000, episode=975 reward=0.8007305 (490.89 it/sec) -training >> step=5846100, episode=975 reward=0.7825666 (519.62 it/sec) -training >> step=5846200, episode=975 reward=0.7734317 (507.65 it/sec) -training >> step=5846300, episode=975 reward=0.7809469 (488.99 it/sec) -training >> step=5846400, episode=975 reward=0.7764437 (499.54 it/sec) -training >> step=5846500, episode=975 reward=0.7639436 (539.03 it/sec) -training >> step=5846600, episode=975 reward=0.7839019 (530.31 it/sec) -training >> step=5846700, episode=975 reward=0.7753851 (502.13 it/sec) -training >> step=5846800, episode=975 reward=0.7802096 (530.29 it/sec) -training >> step=5846900, episode=975 reward=0.8018319 (564.01 it/sec) -training >> step=5847000, episode=975 reward=0.7819384 (544.95 it/sec) -training >> step=5847100, episode=975 reward=0.7939076 (519.66 it/sec) -training >> step=5847200, episode=975 reward=0.7748741 (520.09 it/sec) -training >> step=5847300, episode=975 reward=0.773835 (468.77 it/sec) -training >> step=5847400, episode=975 reward=0.8016109 (525.69 it/sec) -training >> step=5847500, episode=975 reward=0.7873099 (502.66 it/sec) -training >> step=5847600, episode=975 reward=0.7819853 (544.11 it/sec) -training >> step=5847700, episode=975 reward=0.7774222 (519.07 it/sec) -training >> step=5847800, episode=975 reward=0.7558962 (498.14 it/sec) -training >> step=5847900, episode=975 reward=0.7862676 (549.69 it/sec) -training >> step=5848000, episode=975 reward=0.7693663 (527.86 it/sec) -training >> step=5848100, episode=975 reward=0.7788771 (511.36 it/sec) -training >> step=5848200, episode=975 reward=0.7768937 (548.98 it/sec) -training >> step=5848300, episode=975 reward=0.7806697 (494.07 it/sec) -training >> step=5848400, episode=975 reward=0.7662947 (484.24 it/sec) -training >> step=5848500, episode=975 reward=0.7496478 (478.24 it/sec) -training >> step=5848600, episode=975 reward=0.7526124 (482.40 it/sec) -training >> step=5848700, episode=975 reward=0.7907038 (500.88 it/sec) -training >> step=5848800, episode=975 reward=0.7622214 (491.74 it/sec) -training >> step=5848900, episode=975 reward=0.759847 (444.62 it/sec) -training >> step=5849000, episode=975 reward=0.7512396 (495.70 it/sec) -training >> step=5849100, episode=975 reward=0.7531776 (516.40 it/sec) -training >> step=5849200, episode=975 reward=0.7662946 (467.98 it/sec) -training >> step=5849300, episode=976 reward=0.772377 (92.08 it/sec) -training >> step=5849400, episode=976 reward=0.7874612 (442.85 it/sec) -training >> step=5849500, episode=976 reward=0.781426 (540.47 it/sec) -training >> step=5849600, episode=976 reward=0.7891907 (475.87 it/sec) -training >> step=5849700, episode=976 reward=0.7760232 (508.49 it/sec) -training >> step=5849800, episode=976 reward=0.7809049 (491.59 it/sec) -training >> step=5849900, episode=976 reward=0.7875397 (460.06 it/sec) -training >> step=5850000, episode=976 reward=0.7889698 (507.62 it/sec) -training >> step=5850100, episode=976 reward=0.7706652 (507.06 it/sec) -training >> step=5850200, episode=976 reward=0.7786193 (479.42 it/sec) -training >> step=5850300, episode=976 reward=0.8071902 (524.12 it/sec) -training >> step=5850400, episode=976 reward=0.7943105 (484.32 it/sec) -training >> step=5850500, episode=976 reward=0.7842946 (444.99 it/sec) -training >> step=5850600, episode=976 reward=0.8028627 (514.94 it/sec) -training >> step=5850700, episode=976 reward=0.7791962 (543.10 it/sec) -training >> step=5850800, episode=976 reward=0.7959357 (498.16 it/sec) -training >> step=5850900, episode=976 reward=0.7994846 (463.84 it/sec) -training >> step=5851000, episode=976 reward=0.7945674 (497.73 it/sec) -training >> step=5851100, episode=976 reward=0.7925009 (481.65 it/sec) -training >> step=5851200, episode=976 reward=0.7802506 (527.27 it/sec) -training >> step=5851300, episode=976 reward=0.7920351 (486.71 it/sec) -training >> step=5851400, episode=976 reward=0.7848795 (512.27 it/sec) -training >> step=5851500, episode=976 reward=0.7743229 (509.75 it/sec) -training >> step=5851600, episode=976 reward=0.7710744 (480.27 it/sec) -training >> step=5851700, episode=976 reward=0.7879869 (469.20 it/sec) -training >> step=5851800, episode=976 reward=0.7666311 (484.42 it/sec) -training >> step=5851900, episode=976 reward=0.7884398 (482.26 it/sec) -training >> step=5852000, episode=976 reward=0.7735428 (469.74 it/sec) -training >> step=5852100, episode=976 reward=0.790171 (518.26 it/sec) -training >> step=5852200, episode=976 reward=0.7741451 (517.65 it/sec) -training >> step=5852300, episode=976 reward=0.7992431 (453.18 it/sec) -training >> step=5852400, episode=976 reward=0.7839668 (501.05 it/sec) -training >> step=5852500, episode=976 reward=0.7856256 (494.71 it/sec) -training >> step=5852600, episode=976 reward=0.7814033 (475.66 it/sec) -training >> step=5852700, episode=976 reward=0.7784473 (487.97 it/sec) -training >> step=5852800, episode=976 reward=0.7717671 (465.90 it/sec) -training >> step=5852900, episode=976 reward=0.7794381 (402.16 it/sec) -training >> step=5853000, episode=976 reward=0.7981609 (510.60 it/sec) -training >> step=5853100, episode=976 reward=0.8053947 (460.48 it/sec) -training >> step=5853200, episode=976 reward=0.78563 (518.98 it/sec) -training >> step=5853300, episode=976 reward=0.7950904 (506.65 it/sec) -training >> step=5853400, episode=976 reward=0.792908 (453.53 it/sec) -training >> step=5853500, episode=976 reward=0.7844534 (429.38 it/sec) -training >> step=5853600, episode=976 reward=0.7857035 (513.23 it/sec) -training >> step=5853700, episode=976 reward=0.7923526 (484.12 it/sec) -training >> step=5853800, episode=976 reward=0.7597917 (462.13 it/sec) -training >> step=5853900, episode=976 reward=0.7805277 (440.26 it/sec) -training >> step=5854000, episode=976 reward=0.7626188 (451.02 it/sec) -training >> step=5854100, episode=976 reward=0.7683464 (518.21 it/sec) -training >> step=5854200, episode=976 reward=0.7838572 (467.12 it/sec) -training >> step=5854300, episode=976 reward=0.768463 (466.63 it/sec) -training >> step=5854400, episode=976 reward=0.7792434 (442.66 it/sec) -training >> step=5854500, episode=976 reward=0.7558022 (459.57 it/sec) -training >> step=5854600, episode=976 reward=0.7835135 (511.29 it/sec) -training >> step=5854700, episode=976 reward=0.7693021 (501.69 it/sec) -training >> step=5854800, episode=976 reward=0.7474703 (491.70 it/sec) -training >> step=5854900, episode=976 reward=0.7813114 (464.73 it/sec) -training >> step=5855000, episode=976 reward=0.756792 (489.41 it/sec) -training >> step=5855100, episode=976 reward=0.7741613 (484.36 it/sec) -training >> step=5855200, episode=976 reward=0.7612615 (480.35 it/sec) -training >> step=5855300, episode=977 reward=0.7924973 (66.94 it/sec) -training >> step=5855400, episode=977 reward=0.770913 (441.53 it/sec) -training >> step=5855500, episode=977 reward=0.7853012 (499.02 it/sec) -training >> step=5855600, episode=977 reward=0.8010553 (481.50 it/sec) -training >> step=5855700, episode=977 reward=0.7801072 (409.85 it/sec) -training >> step=5855800, episode=977 reward=0.7748441 (497.88 it/sec) -training >> step=5855900, episode=977 reward=0.7906083 (510.90 it/sec) -training >> step=5856000, episode=977 reward=0.7928862 (436.21 it/sec) -training >> step=5856100, episode=977 reward=0.7515048 (466.81 it/sec) -training >> step=5856200, episode=977 reward=0.7810057 (427.73 it/sec) -training >> step=5856300, episode=977 reward=0.7763184 (461.50 it/sec) -training >> step=5856400, episode=977 reward=0.7705477 (499.36 it/sec) -training >> step=5856500, episode=977 reward=0.7974061 (493.23 it/sec) -training >> step=5856600, episode=977 reward=0.7848434 (473.52 it/sec) -training >> step=5856700, episode=977 reward=0.8005652 (437.60 it/sec) -training >> step=5856800, episode=977 reward=0.7829068 (446.14 it/sec) -training >> step=5856900, episode=977 reward=0.7817799 (509.45 it/sec) -training >> step=5857000, episode=977 reward=0.8085176 (495.09 it/sec) -training >> step=5857100, episode=977 reward=0.795872 (469.06 it/sec) -training >> step=5857200, episode=977 reward=0.8103972 (455.72 it/sec) -training >> step=5857300, episode=977 reward=0.7582296 (460.21 it/sec) -training >> step=5857400, episode=977 reward=0.7619271 (506.25 it/sec) -training >> step=5857500, episode=977 reward=0.7878039 (517.81 it/sec) -training >> step=5857600, episode=977 reward=0.8045042 (486.66 it/sec) -training >> step=5857700, episode=977 reward=0.7926359 (436.02 it/sec) -training >> step=5857800, episode=977 reward=0.7579685 (479.00 it/sec) -training >> step=5857900, episode=977 reward=0.7887357 (445.90 it/sec) -training >> step=5858000, episode=977 reward=0.7834744 (447.03 it/sec) -training >> step=5858100, episode=977 reward=0.7828512 (486.68 it/sec) -training >> step=5858200, episode=977 reward=0.7749972 (458.43 it/sec) -training >> step=5858300, episode=977 reward=0.7940426 (478.49 it/sec) -training >> step=5858400, episode=977 reward=0.7849799 (503.63 it/sec) -training >> step=5858500, episode=977 reward=0.8053646 (492.27 it/sec) -training >> step=5858600, episode=977 reward=0.7807399 (490.15 it/sec) -training >> step=5858700, episode=977 reward=0.7787955 (494.26 it/sec) -training >> step=5858800, episode=977 reward=0.7632485 (494.42 it/sec) -training >> step=5858900, episode=977 reward=0.7885894 (451.34 it/sec) -training >> step=5859000, episode=977 reward=0.7853575 (490.68 it/sec) -training >> step=5859100, episode=977 reward=0.7994839 (513.38 it/sec) -training >> step=5859200, episode=977 reward=0.7868667 (450.87 it/sec) -training >> step=5859300, episode=977 reward=0.7931547 (502.06 it/sec) -training >> step=5859400, episode=977 reward=0.7965157 (493.70 it/sec) -training >> step=5859500, episode=977 reward=0.7707065 (505.27 it/sec) -training >> step=5859600, episode=977 reward=0.790899 (498.37 it/sec) -training >> step=5859700, episode=977 reward=0.7741425 (470.47 it/sec) -training >> step=5859800, episode=977 reward=0.7567545 (559.97 it/sec) -training >> step=5859900, episode=977 reward=0.7806888 (511.61 it/sec) -training >> step=5860000, episode=977 reward=0.7676495 (539.19 it/sec) -training >> step=5860100, episode=977 reward=0.7756826 (481.61 it/sec) -training >> step=5860200, episode=977 reward=0.7681627 (536.49 it/sec) -training >> step=5860300, episode=977 reward=0.7937962 (448.96 it/sec) -training >> step=5860400, episode=977 reward=0.7814979 (487.93 it/sec) -training >> step=5860500, episode=977 reward=0.7732407 (512.14 it/sec) -training >> step=5860600, episode=977 reward=0.7777867 (500.44 it/sec) -training >> step=5860700, episode=977 reward=0.7689819 (482.46 it/sec) -training >> step=5860800, episode=977 reward=0.7768987 (470.91 it/sec) -training >> step=5860900, episode=977 reward=0.7855369 (474.43 it/sec) -training >> step=5861000, episode=977 reward=0.7782238 (499.33 it/sec) -training >> step=5861100, episode=977 reward=0.7676035 (516.12 it/sec) -training >> step=5861200, episode=977 reward=0.7815505 (535.02 it/sec) -training >> step=5861300, episode=978 reward=0.7803632 (94.68 it/sec) -training >> step=5861400, episode=978 reward=0.8013098 (461.20 it/sec) -training >> step=5861500, episode=978 reward=0.7789659 (518.50 it/sec) -training >> step=5861600, episode=978 reward=0.7957039 (497.75 it/sec) -training >> step=5861700, episode=978 reward=0.7802482 (490.85 it/sec) -training >> step=5861800, episode=978 reward=0.7821615 (492.71 it/sec) -training >> step=5861900, episode=978 reward=0.7756535 (487.35 it/sec) -training >> step=5862000, episode=978 reward=0.7840736 (529.77 it/sec) -training >> step=5862100, episode=978 reward=0.7814403 (532.98 it/sec) -training >> step=5862200, episode=978 reward=0.7896189 (538.98 it/sec) -training >> step=5862300, episode=978 reward=0.777747 (520.89 it/sec) -training >> step=5862400, episode=978 reward=0.7810212 (494.27 it/sec) -training >> step=5862500, episode=978 reward=0.7850598 (471.05 it/sec) -training >> step=5862600, episode=978 reward=0.8005199 (494.79 it/sec) -training >> step=5862700, episode=978 reward=0.7608944 (469.22 it/sec) -training >> step=5862800, episode=978 reward=0.7800127 (514.62 it/sec) -training >> step=5862900, episode=978 reward=0.7939543 (539.16 it/sec) -training >> step=5863000, episode=978 reward=0.7859974 (466.26 it/sec) -training >> step=5863100, episode=978 reward=0.7951479 (529.10 it/sec) -training >> step=5863200, episode=978 reward=0.789032 (555.43 it/sec) -training >> step=5863300, episode=978 reward=0.7829366 (531.39 it/sec) -training >> step=5863400, episode=978 reward=0.7713278 (501.36 it/sec) -training >> step=5863500, episode=978 reward=0.7792492 (518.34 it/sec) -training >> step=5863600, episode=978 reward=0.8019076 (545.64 it/sec) -training >> step=5863700, episode=978 reward=0.7855453 (523.06 it/sec) -training >> step=5863800, episode=978 reward=0.7920032 (513.57 it/sec) -training >> step=5863900, episode=978 reward=0.7685122 (536.10 it/sec) -training >> step=5864000, episode=978 reward=0.7921681 (488.18 it/sec) -training >> step=5864100, episode=978 reward=0.7962316 (501.48 it/sec) -training >> step=5864200, episode=978 reward=0.7802284 (539.00 it/sec) -training >> step=5864300, episode=978 reward=0.7904968 (526.69 it/sec) -training >> step=5864400, episode=978 reward=0.7786486 (523.55 it/sec) -training >> step=5864500, episode=978 reward=0.7894778 (488.83 it/sec) -training >> step=5864600, episode=978 reward=0.7962507 (421.08 it/sec) -training >> step=5864700, episode=978 reward=0.7936814 (454.35 it/sec) -training >> step=5864800, episode=978 reward=0.8006606 (527.40 it/sec) -training >> step=5864900, episode=978 reward=0.7730758 (521.16 it/sec) -training >> step=5865000, episode=978 reward=0.7704591 (529.62 it/sec) -training >> step=5865100, episode=978 reward=0.7813113 (478.60 it/sec) -training >> step=5865200, episode=978 reward=0.7643679 (495.49 it/sec) -training >> step=5865300, episode=978 reward=0.7670854 (551.86 it/sec) -training >> step=5865400, episode=978 reward=0.784063 (541.26 it/sec) -training >> step=5865500, episode=978 reward=0.7773264 (494.75 it/sec) -training >> step=5865600, episode=978 reward=0.7887706 (515.04 it/sec) -training >> step=5865700, episode=978 reward=0.7669765 (490.32 it/sec) -training >> step=5865800, episode=978 reward=0.786262 (478.21 it/sec) -training >> step=5865900, episode=978 reward=0.7943104 (531.63 it/sec) -training >> step=5866000, episode=978 reward=0.7722552 (533.55 it/sec) -training >> step=5866100, episode=978 reward=0.7638206 (534.60 it/sec) -training >> step=5866200, episode=978 reward=0.7819949 (493.69 it/sec) -training >> step=5866300, episode=978 reward=0.7772295 (486.73 it/sec) -training >> step=5866400, episode=978 reward=0.7690828 (537.75 it/sec) -training >> step=5866500, episode=978 reward=0.7584849 (532.43 it/sec) -training >> step=5866600, episode=978 reward=0.7707456 (541.06 it/sec) -training >> step=5866700, episode=978 reward=0.7749887 (496.66 it/sec) -training >> step=5866800, episode=978 reward=0.7679725 (526.98 it/sec) -training >> step=5866900, episode=978 reward=0.765209 (511.71 it/sec) -training >> step=5867000, episode=978 reward=0.7793121 (504.98 it/sec) -training >> step=5867100, episode=978 reward=0.7606447 (506.61 it/sec) -training >> step=5867200, episode=978 reward=0.7724546 (477.47 it/sec) -training >> step=5867300, episode=979 reward=0.7681556 (112.96 it/sec) -training >> step=5867400, episode=979 reward=0.7884134 (441.03 it/sec) -training >> step=5867500, episode=979 reward=0.7920596 (519.12 it/sec) -training >> step=5867600, episode=979 reward=0.778971 (499.03 it/sec) -training >> step=5867700, episode=979 reward=0.7575268 (438.16 it/sec) -training >> step=5867800, episode=979 reward=0.7893527 (462.81 it/sec) -training >> step=5867900, episode=979 reward=0.8006524 (496.45 it/sec) -training >> step=5868000, episode=979 reward=0.8029255 (537.68 it/sec) -training >> step=5868100, episode=979 reward=0.7757065 (494.60 it/sec) -training >> step=5868200, episode=979 reward=0.7658812 (509.33 it/sec) -training >> step=5868300, episode=979 reward=0.7654845 (449.27 it/sec) -training >> step=5868400, episode=979 reward=0.7829465 (419.79 it/sec) -training >> step=5868500, episode=979 reward=0.7827783 (523.08 it/sec) -training >> step=5868600, episode=979 reward=0.785919 (536.65 it/sec) -training >> step=5868700, episode=979 reward=0.7835324 (523.48 it/sec) -training >> step=5868800, episode=979 reward=0.7892376 (479.57 it/sec) -training >> step=5868900, episode=979 reward=0.7865525 (523.90 it/sec) -training >> step=5869000, episode=979 reward=0.7891082 (451.37 it/sec) -training >> step=5869100, episode=979 reward=0.793175 (494.30 it/sec) -training >> step=5869200, episode=979 reward=0.7936797 (526.89 it/sec) -training >> step=5869300, episode=979 reward=0.8055233 (541.29 it/sec) -training >> step=5869400, episode=979 reward=0.7777237 (486.18 it/sec) -training >> step=5869500, episode=979 reward=0.7996649 (451.39 it/sec) -training >> step=5869600, episode=979 reward=0.788525 (540.24 it/sec) -training >> step=5869700, episode=979 reward=0.7658388 (521.58 it/sec) -training >> step=5869800, episode=979 reward=0.7787778 (517.17 it/sec) -training >> step=5869900, episode=979 reward=0.7548874 (484.50 it/sec) -training >> step=5870000, episode=979 reward=0.784931 (487.65 it/sec) -training >> step=5870100, episode=979 reward=0.7651854 (502.59 it/sec) -training >> step=5870200, episode=979 reward=0.7847047 (526.73 it/sec) -training >> step=5870300, episode=979 reward=0.7698073 (518.81 it/sec) -training >> step=5870400, episode=979 reward=0.8007703 (520.43 it/sec) -training >> step=5870500, episode=979 reward=0.7744249 (439.17 it/sec) -training >> step=5870600, episode=979 reward=0.7824931 (499.51 it/sec) -training >> step=5870700, episode=979 reward=0.7807651 (503.07 it/sec) -training >> step=5870800, episode=979 reward=0.7704382 (475.24 it/sec) -training >> step=5870900, episode=979 reward=0.7812376 (459.21 it/sec) -training >> step=5871000, episode=979 reward=0.7748356 (448.23 it/sec) -training >> step=5871100, episode=979 reward=0.7903333 (469.14 it/sec) -training >> step=5871200, episode=979 reward=0.7862524 (435.58 it/sec) -training >> step=5871300, episode=979 reward=0.773215 (440.79 it/sec) -training >> step=5871400, episode=979 reward=0.7884136 (490.41 it/sec) -training >> step=5871500, episode=979 reward=0.7901992 (427.80 it/sec) -training >> step=5871600, episode=979 reward=0.8141627 (472.11 it/sec) -training >> step=5871700, episode=979 reward=0.7763376 (430.19 it/sec) -training >> step=5871800, episode=979 reward=0.7827195 (485.51 it/sec) -training >> step=5871900, episode=979 reward=0.7840845 (456.07 it/sec) -training >> step=5872000, episode=979 reward=0.7652181 (451.48 it/sec) -training >> step=5872100, episode=979 reward=0.776477 (453.44 it/sec) -training >> step=5872200, episode=979 reward=0.7770799 (414.70 it/sec) -training >> step=5872300, episode=979 reward=0.7927161 (404.80 it/sec) -training >> step=5872400, episode=979 reward=0.7700987 (436.32 it/sec) -training >> step=5872500, episode=979 reward=0.7794572 (443.53 it/sec) -training >> step=5872600, episode=979 reward=0.7441355 (476.44 it/sec) -training >> step=5872700, episode=979 reward=0.7651904 (495.96 it/sec) -training >> step=5872800, episode=979 reward=0.7758871 (501.91 it/sec) -training >> step=5872900, episode=979 reward=0.7685183 (490.24 it/sec) -training >> step=5873000, episode=979 reward=0.7676042 (497.70 it/sec) -training >> step=5873100, episode=979 reward=0.7619444 (502.10 it/sec) -training >> step=5873200, episode=979 reward=0.7480727 (487.17 it/sec) -training >> step=5873300, episode=980 reward=0.7907772 (88.43 it/sec) -training >> step=5873400, episode=980 reward=0.7851639 (479.35 it/sec) -training >> step=5873500, episode=980 reward=0.7927502 (412.02 it/sec) -training >> step=5873600, episode=980 reward=0.7763135 (498.35 it/sec) -training >> step=5873700, episode=980 reward=0.7745259 (481.75 it/sec) -training >> step=5873800, episode=980 reward=0.8046574 (514.07 it/sec) -training >> step=5873900, episode=980 reward=0.776966 (482.44 it/sec) -training >> step=5874000, episode=980 reward=0.7791204 (462.84 it/sec) -training >> step=5874100, episode=980 reward=0.7700334 (489.88 it/sec) -training >> step=5874200, episode=980 reward=0.7808577 (488.80 it/sec) -training >> step=5874300, episode=980 reward=0.7873993 (465.95 it/sec) -training >> step=5874400, episode=980 reward=0.8022788 (473.28 it/sec) -training >> step=5874500, episode=980 reward=0.7854646 (483.34 it/sec) -training >> step=5874600, episode=980 reward=0.7684757 (527.27 it/sec) -training >> step=5874700, episode=980 reward=0.7685567 (518.31 it/sec) -training >> step=5874800, episode=980 reward=0.7703823 (507.59 it/sec) -training >> step=5874900, episode=980 reward=0.794381 (524.33 it/sec) -training >> step=5875000, episode=980 reward=0.7890519 (406.74 it/sec) -training >> step=5875100, episode=980 reward=0.777713 (519.46 it/sec) -training >> step=5875200, episode=980 reward=0.7945032 (514.92 it/sec) -training >> step=5875300, episode=980 reward=0.8024542 (539.57 it/sec) -training >> step=5875400, episode=980 reward=0.8044515 (545.29 it/sec) -training >> step=5875500, episode=980 reward=0.7805787 (473.45 it/sec) -training >> step=5875600, episode=980 reward=0.7806826 (475.29 it/sec) -training >> step=5875700, episode=980 reward=0.7795193 (482.91 it/sec) -training >> step=5875800, episode=980 reward=0.7643163 (476.36 it/sec) -training >> step=5875900, episode=980 reward=0.7941929 (501.06 it/sec) -training >> step=5876000, episode=980 reward=0.7778343 (553.42 it/sec) -training >> step=5876100, episode=980 reward=0.7926862 (449.94 it/sec) -training >> step=5876200, episode=980 reward=0.788851 (463.41 it/sec) -training >> step=5876300, episode=980 reward=0.8117276 (465.78 it/sec) -training >> step=5876400, episode=980 reward=0.7977194 (481.63 it/sec) -training >> step=5876500, episode=980 reward=0.7676157 (495.04 it/sec) -training >> step=5876600, episode=980 reward=0.7742176 (421.80 it/sec) -training >> step=5876700, episode=980 reward=0.7896012 (491.44 it/sec) -training >> step=5876800, episode=980 reward=0.7813169 (464.69 it/sec) -training >> step=5876900, episode=980 reward=0.7927113 (489.71 it/sec) -training >> step=5877000, episode=980 reward=0.7848606 (490.69 it/sec) -training >> step=5877100, episode=980 reward=0.7936547 (459.58 it/sec) -training >> step=5877200, episode=980 reward=0.7951411 (499.83 it/sec) -training >> step=5877300, episode=980 reward=0.7839605 (488.43 it/sec) -training >> step=5877400, episode=980 reward=0.7908474 (508.14 it/sec) -training >> step=5877500, episode=980 reward=0.7752761 (490.87 it/sec) -training >> step=5877600, episode=980 reward=0.7743286 (451.20 it/sec) -training >> step=5877700, episode=980 reward=0.7678012 (485.02 it/sec) -training >> step=5877800, episode=980 reward=0.7618241 (510.77 it/sec) -training >> step=5877900, episode=980 reward=0.7853631 (490.65 it/sec) -training >> step=5878000, episode=980 reward=0.7727372 (498.44 it/sec) -training >> step=5878100, episode=980 reward=0.7733779 (444.56 it/sec) -training >> step=5878200, episode=980 reward=0.772647 (497.82 it/sec) -training >> step=5878300, episode=980 reward=0.778165 (499.19 it/sec) -training >> step=5878400, episode=980 reward=0.7613726 (491.30 it/sec) -training >> step=5878500, episode=980 reward=0.7695624 (493.62 it/sec) -training >> step=5878600, episode=980 reward=0.7605687 (449.50 it/sec) -training >> step=5878700, episode=980 reward=0.7667109 (474.50 it/sec) -training >> step=5878800, episode=980 reward=0.7593219 (472.47 it/sec) -training >> step=5878900, episode=980 reward=0.7766615 (549.24 it/sec) -training >> step=5879000, episode=980 reward=0.7662258 (496.49 it/sec) -training >> step=5879100, episode=980 reward=0.7467147 (427.04 it/sec) -training >> step=5879200, episode=980 reward=0.7986682 (463.61 it/sec) -training >> step=5879300, episode=981 reward=0.7784783 (85.22 it/sec) -training >> step=5879400, episode=981 reward=0.7855142 (477.44 it/sec) -training >> step=5879500, episode=981 reward=0.769452 (492.95 it/sec) -training >> step=5879600, episode=981 reward=0.7865573 (508.23 it/sec) -training >> step=5879700, episode=981 reward=0.7771629 (434.03 it/sec) -training >> step=5879800, episode=981 reward=0.7926688 (492.01 it/sec) -training >> step=5879900, episode=981 reward=0.7871763 (430.77 it/sec) -training >> step=5880000, episode=981 reward=0.7853597 (483.14 it/sec) -training >> step=5880100, episode=981 reward=0.7806376 (427.68 it/sec) -training >> step=5880200, episode=981 reward=0.7834061 (449.44 it/sec) -training >> step=5880300, episode=981 reward=0.7881386 (446.26 it/sec) -training >> step=5880400, episode=981 reward=0.7896953 (451.68 it/sec) -training >> step=5880500, episode=981 reward=0.8091606 (458.64 it/sec) -training >> step=5880600, episode=981 reward=0.7977749 (434.84 it/sec) -training >> step=5880700, episode=981 reward=0.7865545 (465.61 it/sec) -training >> step=5880800, episode=981 reward=0.7895368 (472.42 it/sec) -training >> step=5880900, episode=981 reward=0.7792217 (462.70 it/sec) -training >> step=5881000, episode=981 reward=0.7672747 (433.67 it/sec) -training >> step=5881100, episode=981 reward=0.768848 (413.32 it/sec) -training >> step=5881200, episode=981 reward=0.7704548 (440.21 it/sec) -training >> step=5881300, episode=981 reward=0.8008428 (488.88 it/sec) -training >> step=5881400, episode=981 reward=0.8096768 (460.93 it/sec) -training >> step=5881500, episode=981 reward=0.8014511 (432.74 it/sec) -training >> step=5881600, episode=981 reward=0.7815234 (468.11 it/sec) -training >> step=5881700, episode=981 reward=0.7941265 (449.73 it/sec) -training >> step=5881800, episode=981 reward=0.7849199 (462.73 it/sec) -training >> step=5881900, episode=981 reward=0.7842162 (465.51 it/sec) -training >> step=5882000, episode=981 reward=0.7939833 (424.49 it/sec) -training >> step=5882100, episode=981 reward=0.8022732 (437.49 it/sec) -training >> step=5882200, episode=981 reward=0.7992173 (458.40 it/sec) -training >> step=5882300, episode=981 reward=0.7874029 (385.80 it/sec) -training >> step=5882400, episode=981 reward=0.7828636 (414.26 it/sec) -training >> step=5882500, episode=981 reward=0.7857756 (435.77 it/sec) -training >> step=5882600, episode=981 reward=0.7990841 (444.66 it/sec) -training >> step=5882700, episode=981 reward=0.7678161 (493.11 it/sec) -training >> step=5882800, episode=981 reward=0.7917354 (479.42 it/sec) -training >> step=5882900, episode=981 reward=0.7709916 (477.44 it/sec) -training >> step=5883000, episode=981 reward=0.786445 (474.88 it/sec) -training >> step=5883100, episode=981 reward=0.7978664 (503.32 it/sec) -training >> step=5883200, episode=981 reward=0.7836701 (459.34 it/sec) -training >> step=5883300, episode=981 reward=0.7888721 (470.95 it/sec) -training >> step=5883400, episode=981 reward=0.7735441 (463.59 it/sec) -training >> step=5883500, episode=981 reward=0.7874461 (499.38 it/sec) -training >> step=5883600, episode=981 reward=0.777245 (453.85 it/sec) -training >> step=5883700, episode=981 reward=0.7774755 (430.07 it/sec) -training >> step=5883800, episode=981 reward=0.7746885 (493.70 it/sec) -training >> step=5883900, episode=981 reward=0.7979288 (427.70 it/sec) -training >> step=5884000, episode=981 reward=0.7750202 (457.71 it/sec) -training >> step=5884100, episode=981 reward=0.7820635 (470.80 it/sec) -training >> step=5884200, episode=981 reward=0.7735935 (481.80 it/sec) -training >> step=5884300, episode=981 reward=0.7644107 (468.91 it/sec) -training >> step=5884400, episode=981 reward=0.793403 (465.26 it/sec) -training >> step=5884500, episode=981 reward=0.7768958 (447.78 it/sec) -training >> step=5884600, episode=981 reward=0.7488952 (541.09 it/sec) -training >> step=5884700, episode=981 reward=0.7940655 (530.00 it/sec) -training >> step=5884800, episode=981 reward=0.7498559 (524.91 it/sec) -training >> step=5884900, episode=981 reward=0.7476071 (515.40 it/sec) -training >> step=5885000, episode=981 reward=0.7609193 (535.46 it/sec) -training >> step=5885100, episode=981 reward=0.7567413 (536.07 it/sec) -training >> step=5885200, episode=981 reward=0.7790673 (551.19 it/sec) -training >> step=5885300, episode=982 reward=0.7762535 (110.68 it/sec) -training >> step=5885400, episode=982 reward=0.7873964 (494.22 it/sec) -training >> step=5885500, episode=982 reward=0.7749068 (502.91 it/sec) -training >> step=5885600, episode=982 reward=0.7935733 (487.93 it/sec) -training >> step=5885700, episode=982 reward=0.7963069 (490.00 it/sec) -training >> step=5885800, episode=982 reward=0.7691056 (536.74 it/sec) -training >> step=5885900, episode=982 reward=0.7852901 (532.56 it/sec) -training >> step=5886000, episode=982 reward=0.7880824 (506.81 it/sec) -training >> step=5886100, episode=982 reward=0.7807955 (444.26 it/sec) -training >> step=5886200, episode=982 reward=0.7938021 (455.85 it/sec) -training >> step=5886300, episode=982 reward=0.7813283 (479.37 it/sec) -training >> step=5886400, episode=982 reward=0.7724891 (511.89 it/sec) -training >> step=5886500, episode=982 reward=0.77032 (487.03 it/sec) -training >> step=5886600, episode=982 reward=0.7564808 (455.00 it/sec) -training >> step=5886700, episode=982 reward=0.7962636 (481.55 it/sec) -training >> step=5886800, episode=982 reward=0.7657402 (467.06 it/sec) -training >> step=5886900, episode=982 reward=0.7913788 (490.94 it/sec) -training >> step=5887000, episode=982 reward=0.7778441 (539.53 it/sec) -training >> step=5887100, episode=982 reward=0.78795 (443.77 it/sec) -training >> step=5887200, episode=982 reward=0.7971383 (504.36 it/sec) -training >> step=5887300, episode=982 reward=0.7871113 (478.19 it/sec) -training >> step=5887400, episode=982 reward=0.7759755 (513.19 it/sec) -training >> step=5887500, episode=982 reward=0.7804346 (466.42 it/sec) -training >> step=5887600, episode=982 reward=0.8023504 (455.36 it/sec) -training >> step=5887700, episode=982 reward=0.7973891 (482.60 it/sec) -training >> step=5887800, episode=982 reward=0.7814314 (521.45 it/sec) -training >> step=5887900, episode=982 reward=0.7953085 (489.18 it/sec) -training >> step=5888000, episode=982 reward=0.8161148 (457.21 it/sec) -training >> step=5888100, episode=982 reward=0.7954209 (476.04 it/sec) -training >> step=5888200, episode=982 reward=0.7673336 (499.41 it/sec) -training >> step=5888300, episode=982 reward=0.7970476 (495.65 it/sec) -training >> step=5888400, episode=982 reward=0.7865886 (500.38 it/sec) -training >> step=5888500, episode=982 reward=0.7932149 (465.09 it/sec) -training >> step=5888600, episode=982 reward=0.7994645 (431.23 it/sec) -training >> step=5888700, episode=982 reward=0.7735242 (498.15 it/sec) -training >> step=5888800, episode=982 reward=0.7842505 (517.68 it/sec) -training >> step=5888900, episode=982 reward=0.7837448 (509.17 it/sec) -training >> step=5889000, episode=982 reward=0.8005352 (440.64 it/sec) -training >> step=5889100, episode=982 reward=0.7954295 (440.98 it/sec) -training >> step=5889200, episode=982 reward=0.7922558 (497.35 it/sec) -training >> step=5889300, episode=982 reward=0.7868555 (501.13 it/sec) -training >> step=5889400, episode=982 reward=0.7807584 (508.72 it/sec) -training >> step=5889500, episode=982 reward=0.79096 (465.84 it/sec) -training >> step=5889600, episode=982 reward=0.7770646 (467.17 it/sec) -training >> step=5889700, episode=982 reward=0.7873826 (460.82 it/sec) -training >> step=5889800, episode=982 reward=0.7873325 (513.37 it/sec) -training >> step=5889900, episode=982 reward=0.7821728 (493.04 it/sec) -training >> step=5890000, episode=982 reward=0.7905456 (495.47 it/sec) -training >> step=5890100, episode=982 reward=0.7848872 (447.07 it/sec) -training >> step=5890200, episode=982 reward=0.7541822 (454.78 it/sec) -training >> step=5890300, episode=982 reward=0.7696798 (492.68 it/sec) -training >> step=5890400, episode=982 reward=0.7782754 (477.16 it/sec) -training >> step=5890500, episode=982 reward=0.7801266 (437.86 it/sec) -training >> step=5890600, episode=982 reward=0.7781518 (478.63 it/sec) -training >> step=5890700, episode=982 reward=0.7814936 (432.13 it/sec) -training >> step=5890800, episode=982 reward=0.7774318 (483.49 it/sec) -training >> step=5890900, episode=982 reward=0.7726504 (471.35 it/sec) -training >> step=5891000, episode=982 reward=0.7752981 (454.09 it/sec) -training >> step=5891100, episode=982 reward=0.7686903 (495.40 it/sec) -training >> step=5891200, episode=982 reward=0.7757967 (477.78 it/sec) -training >> step=5891300, episode=983 reward=0.7861146 (88.58 it/sec) -training >> step=5891400, episode=983 reward=0.7771303 (505.26 it/sec) -training >> step=5891500, episode=983 reward=0.7981905 (434.08 it/sec) -training >> step=5891600, episode=983 reward=0.7917084 (455.87 it/sec) -training >> step=5891700, episode=983 reward=0.7887773 (524.06 it/sec) -training >> step=5891800, episode=983 reward=0.7760893 (491.22 it/sec) -training >> step=5891900, episode=983 reward=0.7969337 (480.45 it/sec) -training >> step=5892000, episode=983 reward=0.7807735 (475.96 it/sec) -training >> step=5892100, episode=983 reward=0.7744218 (446.65 it/sec) -training >> step=5892200, episode=983 reward=0.7694508 (482.33 it/sec) -training >> step=5892300, episode=983 reward=0.7904801 (515.13 it/sec) -training >> step=5892400, episode=983 reward=0.7815024 (500.45 it/sec) -training >> step=5892500, episode=983 reward=0.7881733 (443.12 it/sec) -training >> step=5892600, episode=983 reward=0.7596478 (499.15 it/sec) -training >> step=5892700, episode=983 reward=0.7846395 (483.55 it/sec) -training >> step=5892800, episode=983 reward=0.7693079 (420.69 it/sec) -training >> step=5892900, episode=983 reward=0.7962349 (507.40 it/sec) -training >> step=5893000, episode=983 reward=0.8054566 (458.18 it/sec) -training >> step=5893100, episode=983 reward=0.7884097 (474.48 it/sec) -training >> step=5893200, episode=983 reward=0.7857866 (480.47 it/sec) -training >> step=5893300, episode=983 reward=0.782091 (462.30 it/sec) -training >> step=5893400, episode=983 reward=0.7717946 (504.45 it/sec) -training >> step=5893500, episode=983 reward=0.7872583 (479.16 it/sec) -training >> step=5893600, episode=983 reward=0.7889135 (477.62 it/sec) -training >> step=5893700, episode=983 reward=0.7930403 (505.57 it/sec) -training >> step=5893800, episode=983 reward=0.8141838 (504.08 it/sec) -training >> step=5893900, episode=983 reward=0.8015795 (468.68 it/sec) -training >> step=5894000, episode=983 reward=0.7645518 (471.29 it/sec) -training >> step=5894100, episode=983 reward=0.7871407 (487.36 it/sec) -training >> step=5894200, episode=983 reward=0.7741177 (498.89 it/sec) -training >> step=5894300, episode=983 reward=0.7757239 (501.39 it/sec) -training >> step=5894400, episode=983 reward=0.7971384 (463.09 it/sec) -training >> step=5894500, episode=983 reward=0.8008126 (428.26 it/sec) -training >> step=5894600, episode=983 reward=0.7864169 (409.15 it/sec) -training >> step=5894700, episode=983 reward=0.7621783 (458.06 it/sec) -training >> step=5894800, episode=983 reward=0.7734965 (503.85 it/sec) -training >> step=5894900, episode=983 reward=0.7930419 (485.30 it/sec) -training >> step=5895000, episode=983 reward=0.777785 (424.32 it/sec) -training >> step=5895100, episode=983 reward=0.7815797 (466.42 it/sec) -training >> step=5895200, episode=983 reward=0.7929016 (510.36 it/sec) -training >> step=5895300, episode=983 reward=0.8055637 (492.21 it/sec) -training >> step=5895400, episode=983 reward=0.7765273 (448.51 it/sec) -training >> step=5895500, episode=983 reward=0.795233 (462.96 it/sec) -training >> step=5895600, episode=983 reward=0.7955126 (459.04 it/sec) -training >> step=5895700, episode=983 reward=0.801544 (466.58 it/sec) -training >> step=5895800, episode=983 reward=0.7704684 (457.08 it/sec) -training >> step=5895900, episode=983 reward=0.7659252 (473.78 it/sec) -training >> step=5896000, episode=983 reward=0.7473185 (492.77 it/sec) -training >> step=5896100, episode=983 reward=0.760199 (470.84 it/sec) -training >> step=5896200, episode=983 reward=0.7773641 (489.87 it/sec) -training >> step=5896300, episode=983 reward=0.7682797 (507.73 it/sec) -training >> step=5896400, episode=983 reward=0.7789674 (459.73 it/sec) -training >> step=5896500, episode=983 reward=0.7600163 (473.72 it/sec) -training >> step=5896600, episode=983 reward=0.7773076 (473.84 it/sec) -training >> step=5896700, episode=983 reward=0.7725443 (520.00 it/sec) -training >> step=5896800, episode=983 reward=0.7944293 (505.78 it/sec) -training >> step=5896900, episode=983 reward=0.7464399 (481.42 it/sec) -training >> step=5897000, episode=983 reward=0.7653637 (499.10 it/sec) -training >> step=5897100, episode=983 reward=0.7782437 (549.42 it/sec) -training >> step=5897200, episode=983 reward=0.7720633 (489.34 it/sec) -training >> step=5897300, episode=984 reward=0.7844597 (100.23 it/sec) -training >> step=5897400, episode=984 reward=0.7677031 (509.76 it/sec) -training >> step=5897500, episode=984 reward=0.7887329 (472.01 it/sec) -training >> step=5897600, episode=984 reward=0.7764961 (474.73 it/sec) -training >> step=5897700, episode=984 reward=0.7912091 (531.50 it/sec) -training >> step=5897800, episode=984 reward=0.7712032 (512.70 it/sec) -training >> step=5897900, episode=984 reward=0.7845902 (503.35 it/sec) -training >> step=5898000, episode=984 reward=0.7948131 (488.76 it/sec) -training >> step=5898100, episode=984 reward=0.8156311 (474.41 it/sec) -training >> step=5898200, episode=984 reward=0.7962519 (490.06 it/sec) -training >> step=5898300, episode=984 reward=0.7821546 (521.83 it/sec) -training >> step=5898400, episode=984 reward=0.7803777 (489.69 it/sec) -training >> step=5898500, episode=984 reward=0.7809314 (510.67 it/sec) -training >> step=5898600, episode=984 reward=0.7876149 (494.54 it/sec) -training >> step=5898700, episode=984 reward=0.799176 (498.84 it/sec) -training >> step=5898800, episode=984 reward=0.7927685 (483.51 it/sec) -training >> step=5898900, episode=984 reward=0.7722204 (465.09 it/sec) -training >> step=5899000, episode=984 reward=0.7771275 (485.36 it/sec) -training >> step=5899100, episode=984 reward=0.7948631 (490.81 it/sec) -training >> step=5899200, episode=984 reward=0.7906438 (479.23 it/sec) -training >> step=5899300, episode=984 reward=0.7783521 (502.75 it/sec) -training >> step=5899400, episode=984 reward=0.8030847 (475.77 it/sec) -training >> step=5899500, episode=984 reward=0.776315 (499.30 it/sec) -training >> step=5899600, episode=984 reward=0.7990133 (487.93 it/sec) -training >> step=5899700, episode=984 reward=0.7728021 (508.01 it/sec) -training >> step=5899800, episode=984 reward=0.7998759 (497.24 it/sec) -training >> step=5899900, episode=984 reward=0.7932224 (483.52 it/sec) -training >> step=5900000, episode=984 reward=0.7912006 (522.56 it/sec) -training >> step=5900100, episode=984 reward=0.7887262 (474.83 it/sec) -training >> step=5900200, episode=984 reward=0.7919406 (524.43 it/sec) -training >> step=5900300, episode=984 reward=0.7950716 (486.89 it/sec) -training >> step=5900400, episode=984 reward=0.7900411 (532.78 it/sec) -training >> step=5900500, episode=984 reward=0.8177036 (483.31 it/sec) -training >> step=5900600, episode=984 reward=0.7863833 (476.14 it/sec) -training >> step=5900700, episode=984 reward=0.7676431 (496.66 it/sec) -training >> step=5900800, episode=984 reward=0.7819262 (547.97 it/sec) -training >> step=5900900, episode=984 reward=0.7662126 (516.80 it/sec) -training >> step=5901000, episode=984 reward=0.796887 (511.67 it/sec) -training >> step=5901100, episode=984 reward=0.8055418 (504.25 it/sec) -training >> step=5901200, episode=984 reward=0.7953759 (506.81 it/sec) -training >> step=5901300, episode=984 reward=0.776791 (497.64 it/sec) -training >> step=5901400, episode=984 reward=0.7917512 (517.24 it/sec) -training >> step=5901500, episode=984 reward=0.7972693 (486.67 it/sec) -training >> step=5901600, episode=984 reward=0.7578202 (451.54 it/sec) -training >> step=5901700, episode=984 reward=0.7877433 (491.91 it/sec) -training >> step=5901800, episode=984 reward=0.7769674 (541.24 it/sec) -training >> step=5901900, episode=984 reward=0.7852871 (511.27 it/sec) -training >> step=5902000, episode=984 reward=0.7665052 (511.07 it/sec) -training >> step=5902100, episode=984 reward=0.7588308 (489.62 it/sec) -training >> step=5902200, episode=984 reward=0.7800481 (473.56 it/sec) -training >> step=5902300, episode=984 reward=0.783167 (506.61 it/sec) -training >> step=5902400, episode=984 reward=0.7727427 (514.25 it/sec) -training >> step=5902500, episode=984 reward=0.7688205 (479.61 it/sec) -training >> step=5902600, episode=984 reward=0.7794887 (497.78 it/sec) -training >> step=5902700, episode=984 reward=0.7640855 (504.08 it/sec) -training >> step=5902800, episode=984 reward=0.7440297 (490.30 it/sec) -training >> step=5902900, episode=984 reward=0.7766628 (522.58 it/sec) -training >> step=5903000, episode=984 reward=0.7606604 (527.07 it/sec) -training >> step=5903100, episode=984 reward=0.7629776 (514.76 it/sec) -training >> step=5903200, episode=984 reward=0.7759331 (506.07 it/sec) -training >> step=5903300, episode=985 reward=0.7892215 (129.65 it/sec) -training >> step=5903400, episode=985 reward=0.7822807 (499.20 it/sec) -training >> step=5903500, episode=985 reward=0.78501 (463.90 it/sec) -training >> step=5903600, episode=985 reward=0.7881889 (470.02 it/sec) -training >> step=5903700, episode=985 reward=0.7883132 (436.49 it/sec) -training >> step=5903800, episode=985 reward=0.7709574 (449.89 it/sec) -training >> step=5903900, episode=985 reward=0.7689725 (458.82 it/sec) -training >> step=5904000, episode=985 reward=0.7563227 (486.40 it/sec) -training >> step=5904100, episode=985 reward=0.7728775 (521.32 it/sec) -training >> step=5904200, episode=985 reward=0.7818106 (460.27 it/sec) -training >> step=5904300, episode=985 reward=0.7907243 (467.10 it/sec) -training >> step=5904400, episode=985 reward=0.7985002 (469.86 it/sec) -training >> step=5904500, episode=985 reward=0.8035916 (446.85 it/sec) -training >> step=5904600, episode=985 reward=0.8002862 (469.02 it/sec) -training >> step=5904700, episode=985 reward=0.7932397 (464.43 it/sec) -training >> step=5904800, episode=985 reward=0.799849 (429.82 it/sec) -training >> step=5904900, episode=985 reward=0.758163 (469.41 it/sec) -training >> step=5905000, episode=985 reward=0.7761841 (461.29 it/sec) -training >> step=5905100, episode=985 reward=0.7621771 (473.27 it/sec) -training >> step=5905200, episode=985 reward=0.8041717 (481.57 it/sec) -training >> step=5905300, episode=985 reward=0.7823263 (439.79 it/sec) -training >> step=5905400, episode=985 reward=0.7815112 (492.14 it/sec) -training >> step=5905500, episode=985 reward=0.777194 (499.56 it/sec) -training >> step=5905600, episode=985 reward=0.784738 (479.92 it/sec) -training >> step=5905700, episode=985 reward=0.7835737 (451.75 it/sec) -training >> step=5905800, episode=985 reward=0.7652416 (485.30 it/sec) -training >> step=5905900, episode=985 reward=0.7793556 (463.28 it/sec) -training >> step=5906000, episode=985 reward=0.7897823 (467.33 it/sec) -training >> step=5906100, episode=985 reward=0.8008254 (481.44 it/sec) -training >> step=5906200, episode=985 reward=0.7765335 (496.07 it/sec) -training >> step=5906300, episode=985 reward=0.7694003 (481.26 it/sec) -training >> step=5906400, episode=985 reward=0.7913924 (453.80 it/sec) -training >> step=5906500, episode=985 reward=0.8047491 (477.61 it/sec) -training >> step=5906600, episode=985 reward=0.787145 (450.09 it/sec) -training >> step=5906700, episode=985 reward=0.7952499 (493.45 it/sec) -training >> step=5906800, episode=985 reward=0.7808006 (456.94 it/sec) -training >> step=5906900, episode=985 reward=0.792264 (526.63 it/sec) -training >> step=5907000, episode=985 reward=0.8002579 (454.74 it/sec) -training >> step=5907100, episode=985 reward=0.7943315 (456.84 it/sec) -training >> step=5907200, episode=985 reward=0.7753882 (531.44 it/sec) -training >> step=5907300, episode=985 reward=0.7681188 (444.42 it/sec) -training >> step=5907400, episode=985 reward=0.7834259 (471.83 it/sec) -training >> step=5907500, episode=985 reward=0.8028629 (499.47 it/sec) -training >> step=5907600, episode=985 reward=0.795457 (459.49 it/sec) -training >> step=5907700, episode=985 reward=0.7479904 (440.57 it/sec) -training >> step=5907800, episode=985 reward=0.7731991 (475.83 it/sec) -training >> step=5907900, episode=985 reward=0.7697272 (514.63 it/sec) -training >> step=5908000, episode=985 reward=0.8035295 (546.56 it/sec) -training >> step=5908100, episode=985 reward=0.7786181 (417.02 it/sec) -training >> step=5908200, episode=985 reward=0.7727206 (489.89 it/sec) -training >> step=5908300, episode=985 reward=0.7792459 (459.98 it/sec) -training >> step=5908400, episode=985 reward=0.769596 (454.42 it/sec) -training >> step=5908500, episode=985 reward=0.7724314 (440.20 it/sec) -training >> step=5908600, episode=985 reward=0.7641602 (519.20 it/sec) -training >> step=5908700, episode=985 reward=0.7722087 (457.08 it/sec) -training >> step=5908800, episode=985 reward=0.7765321 (481.26 it/sec) -training >> step=5908900, episode=985 reward=0.763712 (497.97 it/sec) -training >> step=5909000, episode=985 reward=0.7734745 (497.31 it/sec) -training >> step=5909100, episode=985 reward=0.7825165 (494.59 it/sec) -training >> step=5909200, episode=985 reward=0.7794002 (495.34 it/sec) -training >> step=5909300, episode=986 reward=0.7681701 (95.38 it/sec) -training >> step=5909400, episode=986 reward=0.7933708 (466.28 it/sec) -training >> step=5909500, episode=986 reward=0.7671084 (482.28 it/sec) -training >> step=5909600, episode=986 reward=0.7881057 (446.64 it/sec) -training >> step=5909700, episode=986 reward=0.7871147 (497.01 it/sec) -training >> step=5909800, episode=986 reward=0.8045253 (458.34 it/sec) -training >> step=5909900, episode=986 reward=0.8048639 (492.12 it/sec) -training >> step=5910000, episode=986 reward=0.7925807 (491.24 it/sec) -training >> step=5910100, episode=986 reward=0.7776006 (473.40 it/sec) -training >> step=5910200, episode=986 reward=0.7898734 (458.87 it/sec) -training >> step=5910300, episode=986 reward=0.7673683 (460.48 it/sec) -training >> step=5910400, episode=986 reward=0.7734681 (479.45 it/sec) -training >> step=5910500, episode=986 reward=0.7811433 (492.31 it/sec) -training >> step=5910600, episode=986 reward=0.7799619 (485.62 it/sec) -training >> step=5910700, episode=986 reward=0.7818307 (463.19 it/sec) -training >> step=5910800, episode=986 reward=0.7866021 (451.42 it/sec) -training >> step=5910900, episode=986 reward=0.786292 (500.08 it/sec) -training >> step=5911000, episode=986 reward=0.787652 (466.63 it/sec) -training >> step=5911100, episode=986 reward=0.7925161 (450.80 it/sec) -training >> step=5911200, episode=986 reward=0.7971673 (475.32 it/sec) -training >> step=5911300, episode=986 reward=0.7763483 (471.84 it/sec) -training >> step=5911400, episode=986 reward=0.7855428 (453.17 it/sec) -training >> step=5911500, episode=986 reward=0.7954963 (468.04 it/sec) -training >> step=5911600, episode=986 reward=0.786524 (464.66 it/sec) -training >> step=5911700, episode=986 reward=0.7721934 (519.32 it/sec) -training >> step=5911800, episode=986 reward=0.7978776 (355.98 it/sec) -training >> step=5911900, episode=986 reward=0.7784564 (467.64 it/sec) -training >> step=5912000, episode=986 reward=0.786205 (476.78 it/sec) -training >> step=5912100, episode=986 reward=0.7842289 (464.02 it/sec) -training >> step=5912200, episode=986 reward=0.7800077 (480.40 it/sec) -training >> step=5912300, episode=986 reward=0.7718388 (453.52 it/sec) -training >> step=5912400, episode=986 reward=0.7781914 (497.87 it/sec) -training >> step=5912500, episode=986 reward=0.7845865 (441.58 it/sec) -training >> step=5912600, episode=986 reward=0.7912737 (486.29 it/sec) -training >> step=5912700, episode=986 reward=0.7956875 (443.89 it/sec) -training >> step=5912800, episode=986 reward=0.7907147 (421.23 it/sec) -training >> step=5912900, episode=986 reward=0.7963518 (456.18 it/sec) -training >> step=5913000, episode=986 reward=0.7827312 (470.59 it/sec) -training >> step=5913100, episode=986 reward=0.7974299 (490.62 it/sec) -training >> step=5913200, episode=986 reward=0.7784338 (450.41 it/sec) -training >> step=5913300, episode=986 reward=0.7861 (423.75 it/sec) -training >> step=5913400, episode=986 reward=0.7740264 (465.58 it/sec) -training >> step=5913500, episode=986 reward=0.7551155 (488.23 it/sec) -training >> step=5913600, episode=986 reward=0.7788205 (461.50 it/sec) -training >> step=5913700, episode=986 reward=0.7616866 (429.26 it/sec) -training >> step=5913800, episode=986 reward=0.7886933 (517.15 it/sec) -training >> step=5913900, episode=986 reward=0.7644723 (468.76 it/sec) -training >> step=5914000, episode=986 reward=0.7461247 (483.19 it/sec) -training >> step=5914100, episode=986 reward=0.7762288 (424.13 it/sec) -training >> step=5914200, episode=986 reward=0.7668984 (505.02 it/sec) -training >> step=5914300, episode=986 reward=0.7479437 (505.54 it/sec) -training >> step=5914400, episode=986 reward=0.7665886 (458.08 it/sec) -training >> step=5914500, episode=986 reward=0.7697342 (498.24 it/sec) -training >> step=5914600, episode=986 reward=0.7455636 (470.76 it/sec) -training >> step=5914700, episode=986 reward=0.7772858 (482.55 it/sec) -training >> step=5914800, episode=986 reward=0.7606578 (469.76 it/sec) -training >> step=5914900, episode=986 reward=0.7646109 (484.06 it/sec) -training >> step=5915000, episode=986 reward=0.7400796 (536.68 it/sec) -training >> step=5915100, episode=986 reward=0.7819245 (454.41 it/sec) -training >> step=5915200, episode=986 reward=0.7821491 (497.27 it/sec) -training >> step=5915300, episode=987 reward=0.7942377 (98.36 it/sec) -training >> step=5915400, episode=987 reward=0.787377 (447.14 it/sec) -training >> step=5915500, episode=987 reward=0.7874245 (459.12 it/sec) -training >> step=5915600, episode=987 reward=0.7742469 (504.91 it/sec) -training >> step=5915700, episode=987 reward=0.7604017 (427.15 it/sec) -training >> step=5915800, episode=987 reward=0.7867213 (481.92 it/sec) -training >> step=5915900, episode=987 reward=0.7989115 (500.94 it/sec) -training >> step=5916000, episode=987 reward=0.7752051 (481.90 it/sec) -training >> step=5916100, episode=987 reward=0.7913339 (481.03 it/sec) -training >> step=5916200, episode=987 reward=0.7748873 (421.23 it/sec) -training >> step=5916300, episode=987 reward=0.7931981 (496.67 it/sec) -training >> step=5916400, episode=987 reward=0.7664112 (512.09 it/sec) -training >> step=5916500, episode=987 reward=0.7742987 (492.43 it/sec) -training >> step=5916600, episode=987 reward=0.7710903 (478.60 it/sec) -training >> step=5916700, episode=987 reward=0.7826309 (481.51 it/sec) -training >> step=5916800, episode=987 reward=0.7702353 (503.77 it/sec) -training >> step=5916900, episode=987 reward=0.7701548 (508.75 it/sec) -training >> step=5917000, episode=987 reward=0.7649936 (523.10 it/sec) -training >> step=5917100, episode=987 reward=0.7676632 (476.55 it/sec) -training >> step=5917200, episode=987 reward=0.7801602 (417.64 it/sec) -training >> step=5917300, episode=987 reward=0.7647805 (464.56 it/sec) -training >> step=5917400, episode=987 reward=0.7965476 (468.44 it/sec) -training >> step=5917500, episode=987 reward=0.784043 (456.89 it/sec) -training >> step=5917600, episode=987 reward=0.8014854 (498.65 it/sec) -training >> step=5917700, episode=987 reward=0.7931892 (423.41 it/sec) -training >> step=5917800, episode=987 reward=0.7789459 (526.34 it/sec) -training >> step=5917900, episode=987 reward=0.7651026 (480.59 it/sec) -training >> step=5918000, episode=987 reward=0.7853966 (470.22 it/sec) -training >> step=5918100, episode=987 reward=0.7802976 (490.41 it/sec) -training >> step=5918200, episode=987 reward=0.7807515 (460.48 it/sec) -training >> step=5918300, episode=987 reward=0.7892942 (459.59 it/sec) -training >> step=5918400, episode=987 reward=0.8006572 (455.45 it/sec) -training >> step=5918500, episode=987 reward=0.77573 (495.59 it/sec) -training >> step=5918600, episode=987 reward=0.8038231 (435.98 it/sec) -training >> step=5918700, episode=987 reward=0.7872874 (463.52 it/sec) -training >> step=5918800, episode=987 reward=0.7859554 (452.25 it/sec) -training >> step=5918900, episode=987 reward=0.7776043 (413.67 it/sec) -training >> step=5919000, episode=987 reward=0.7858022 (459.12 it/sec) -training >> step=5919100, episode=987 reward=0.7920932 (425.36 it/sec) -training >> step=5919200, episode=987 reward=0.7833835 (534.50 it/sec) -training >> step=5919300, episode=987 reward=0.7549981 (480.05 it/sec) -training >> step=5919400, episode=987 reward=0.7720829 (446.62 it/sec) -training >> step=5919500, episode=987 reward=0.7684807 (457.62 it/sec) -training >> step=5919600, episode=987 reward=0.7641207 (470.80 it/sec) -training >> step=5919700, episode=987 reward=0.7911757 (466.36 it/sec) -training >> step=5919800, episode=987 reward=0.7802261 (484.73 it/sec) -training >> step=5919900, episode=987 reward=0.7632077 (495.08 it/sec) -training >> step=5920000, episode=987 reward=0.7509775 (473.47 it/sec) -training >> step=5920100, episode=987 reward=0.7687962 (453.55 it/sec) -training >> step=5920200, episode=987 reward=0.7743427 (483.80 it/sec) -training >> step=5920300, episode=987 reward=0.7757305 (504.44 it/sec) -training >> step=5920400, episode=987 reward=0.7779873 (463.93 it/sec) -training >> step=5920500, episode=987 reward=0.774257 (474.58 it/sec) -training >> step=5920600, episode=987 reward=0.7426153 (443.42 it/sec) -training >> step=5920700, episode=987 reward=0.7662758 (502.92 it/sec) -training >> step=5920800, episode=987 reward=0.7642897 (456.49 it/sec) -training >> step=5920900, episode=987 reward=0.7558065 (502.62 it/sec) -training >> step=5921000, episode=987 reward=0.734123 (502.62 it/sec) -training >> step=5921100, episode=987 reward=0.7610443 (463.68 it/sec) -training >> step=5921200, episode=987 reward=0.7681661 (507.79 it/sec) -training >> step=5921300, episode=988 reward=0.7578025 (108.89 it/sec) -training >> step=5921400, episode=988 reward=0.7966667 (494.61 it/sec) -training >> step=5921500, episode=988 reward=0.794783 (444.20 it/sec) -training >> step=5921600, episode=988 reward=0.766993 (450.97 it/sec) -training >> step=5921700, episode=988 reward=0.7623875 (476.11 it/sec) -training >> step=5921800, episode=988 reward=0.7907947 (513.69 it/sec) -training >> step=5921900, episode=988 reward=0.7836371 (496.78 it/sec) -training >> step=5922000, episode=988 reward=0.7808341 (474.04 it/sec) -training >> step=5922100, episode=988 reward=0.782266 (453.63 it/sec) -training >> step=5922200, episode=988 reward=0.7805801 (429.32 it/sec) -training >> step=5922300, episode=988 reward=0.7808496 (501.47 it/sec) -training >> step=5922400, episode=988 reward=0.7687574 (495.60 it/sec) -training >> step=5922500, episode=988 reward=0.7968007 (488.66 it/sec) -training >> step=5922600, episode=988 reward=0.7861242 (474.06 it/sec) -training >> step=5922700, episode=988 reward=0.7717544 (477.36 it/sec) -training >> step=5922800, episode=988 reward=0.776231 (517.25 it/sec) -training >> step=5922900, episode=988 reward=0.7851029 (503.48 it/sec) -training >> step=5923000, episode=988 reward=0.7695821 (501.26 it/sec) -training >> step=5923100, episode=988 reward=0.7960829 (424.57 it/sec) -training >> step=5923200, episode=988 reward=0.7676222 (464.55 it/sec) -training >> step=5923300, episode=988 reward=0.784544 (488.98 it/sec) -training >> step=5923400, episode=988 reward=0.7767985 (459.19 it/sec) -training >> step=5923500, episode=988 reward=0.7894003 (507.44 it/sec) -training >> step=5923600, episode=988 reward=0.786088 (465.33 it/sec) -training >> step=5923700, episode=988 reward=0.8060092 (489.57 it/sec) -training >> step=5923800, episode=988 reward=0.774939 (480.18 it/sec) -training >> step=5923900, episode=988 reward=0.7855729 (499.26 it/sec) -training >> step=5924000, episode=988 reward=0.7948166 (482.71 it/sec) -training >> step=5924100, episode=988 reward=0.7833098 (501.95 it/sec) -training >> step=5924200, episode=988 reward=0.7847804 (480.29 it/sec) -training >> step=5924300, episode=988 reward=0.7875435 (484.14 it/sec) -training >> step=5924400, episode=988 reward=0.7818303 (480.76 it/sec) -training >> step=5924500, episode=988 reward=0.7752107 (471.00 it/sec) -training >> step=5924600, episode=988 reward=0.7838182 (492.59 it/sec) -training >> step=5924700, episode=988 reward=0.7765309 (468.38 it/sec) -training >> step=5924800, episode=988 reward=0.7983068 (503.34 it/sec) -training >> step=5924900, episode=988 reward=0.7758391 (541.35 it/sec) -training >> step=5925000, episode=988 reward=0.7865348 (536.15 it/sec) -training >> step=5925100, episode=988 reward=0.8022782 (510.00 it/sec) -training >> step=5925200, episode=988 reward=0.7712862 (533.04 it/sec) -training >> step=5925300, episode=988 reward=0.7977151 (552.61 it/sec) -training >> step=5925400, episode=988 reward=0.7986234 (509.10 it/sec) -training >> step=5925500, episode=988 reward=0.784535 (527.16 it/sec) -training >> step=5925600, episode=988 reward=0.7888874 (513.12 it/sec) -training >> step=5925700, episode=988 reward=0.7777806 (530.39 it/sec) -training >> step=5925800, episode=988 reward=0.7921794 (505.60 it/sec) -training >> step=5925900, episode=988 reward=0.7873554 (492.26 it/sec) -training >> step=5926000, episode=988 reward=0.7747989 (560.64 it/sec) -training >> step=5926100, episode=988 reward=0.7707796 (528.09 it/sec) -training >> step=5926200, episode=988 reward=0.7803334 (492.38 it/sec) -training >> step=5926300, episode=988 reward=0.7591775 (513.23 it/sec) -training >> step=5926400, episode=988 reward=0.7655239 (560.91 it/sec) -training >> step=5926500, episode=988 reward=0.762638 (481.10 it/sec) -training >> step=5926600, episode=988 reward=0.7590095 (493.59 it/sec) -training >> step=5926700, episode=988 reward=0.764453 (511.32 it/sec) -training >> step=5926800, episode=988 reward=0.7587577 (534.80 it/sec) -training >> step=5926900, episode=988 reward=0.7700703 (526.33 it/sec) -training >> step=5927000, episode=988 reward=0.7525648 (496.73 it/sec) -training >> step=5927100, episode=988 reward=0.7816131 (520.20 it/sec) -training >> step=5927200, episode=988 reward=0.7741351 (500.38 it/sec) -training >> step=5927300, episode=989 reward=0.7739455 (111.01 it/sec) -training >> step=5927400, episode=989 reward=0.7572133 (379.33 it/sec) -training >> step=5927500, episode=989 reward=0.7926238 (449.19 it/sec) -training >> step=5927600, episode=989 reward=0.8007833 (495.02 it/sec) -training >> step=5927700, episode=989 reward=0.7918595 (520.83 it/sec) -training >> step=5927800, episode=989 reward=0.7874591 (509.55 it/sec) -training >> step=5927900, episode=989 reward=0.7630377 (493.51 it/sec) -training >> step=5928000, episode=989 reward=0.7841606 (495.60 it/sec) -training >> step=5928100, episode=989 reward=0.771186 (540.77 it/sec) -training >> step=5928200, episode=989 reward=0.8124333 (477.88 it/sec) -training >> step=5928300, episode=989 reward=0.7705159 (461.29 it/sec) -training >> step=5928400, episode=989 reward=0.7935113 (505.00 it/sec) -training >> step=5928500, episode=989 reward=0.7868503 (505.01 it/sec) -training >> step=5928600, episode=989 reward=0.7802739 (503.14 it/sec) -training >> step=5928700, episode=989 reward=0.7828597 (493.73 it/sec) -training >> step=5928800, episode=989 reward=0.762507 (528.85 it/sec) -training >> step=5928900, episode=989 reward=0.800844 (504.04 it/sec) -training >> step=5929000, episode=989 reward=0.7748231 (510.67 it/sec) -training >> step=5929100, episode=989 reward=0.7810816 (523.37 it/sec) -training >> step=5929200, episode=989 reward=0.8077931 (532.64 it/sec) -training >> step=5929300, episode=989 reward=0.7739745 (512.58 it/sec) -training >> step=5929400, episode=989 reward=0.7842318 (521.87 it/sec) -training >> step=5929500, episode=989 reward=0.7903937 (495.46 it/sec) -training >> step=5929600, episode=989 reward=0.7756453 (511.22 it/sec) -training >> step=5929700, episode=989 reward=0.7905402 (525.66 it/sec) -training >> step=5929800, episode=989 reward=0.7723547 (501.95 it/sec) -training >> step=5929900, episode=989 reward=0.7999948 (500.07 it/sec) -training >> step=5930000, episode=989 reward=0.7951761 (545.85 it/sec) -training >> step=5930100, episode=989 reward=0.7877271 (479.73 it/sec) -training >> step=5930200, episode=989 reward=0.7821876 (534.46 it/sec) -training >> step=5930300, episode=989 reward=0.7781003 (520.09 it/sec) -training >> step=5930400, episode=989 reward=0.7880511 (497.15 it/sec) -training >> step=5930500, episode=989 reward=0.775683 (494.35 it/sec) -training >> step=5930600, episode=989 reward=0.7886238 (521.14 it/sec) -training >> step=5930700, episode=989 reward=0.7798508 (543.01 it/sec) -training >> step=5930800, episode=989 reward=0.7771024 (500.91 it/sec) -training >> step=5930900, episode=989 reward=0.7903036 (486.91 it/sec) -training >> step=5931000, episode=989 reward=0.8083141 (523.46 it/sec) -training >> step=5931100, episode=989 reward=0.8028161 (459.76 it/sec) -training >> step=5931200, episode=989 reward=0.784588 (502.96 it/sec) -training >> step=5931300, episode=989 reward=0.7623142 (475.82 it/sec) -training >> step=5931400, episode=989 reward=0.7857289 (502.70 it/sec) -training >> step=5931500, episode=989 reward=0.7885616 (493.21 it/sec) -training >> step=5931600, episode=989 reward=0.7823403 (494.62 it/sec) -training >> step=5931700, episode=989 reward=0.7719256 (490.75 it/sec) -training >> step=5931800, episode=989 reward=0.7594443 (481.07 it/sec) -training >> step=5931900, episode=989 reward=0.7780067 (461.32 it/sec) -training >> step=5932000, episode=989 reward=0.7711438 (502.35 it/sec) -training >> step=5932100, episode=989 reward=0.7690998 (544.32 it/sec) -training >> step=5932200, episode=989 reward=0.7804599 (488.31 it/sec) -training >> step=5932300, episode=989 reward=0.7703566 (494.90 it/sec) -training >> step=5932400, episode=989 reward=0.7841777 (540.69 it/sec) -training >> step=5932500, episode=989 reward=0.7559988 (515.73 it/sec) -training >> step=5932600, episode=989 reward=0.7555704 (528.29 it/sec) -training >> step=5932700, episode=989 reward=0.7526854 (506.77 it/sec) -training >> step=5932800, episode=989 reward=0.7394127 (559.44 it/sec) -training >> step=5932900, episode=989 reward=0.7738481 (472.20 it/sec) -training >> step=5933000, episode=989 reward=0.7712635 (520.02 it/sec) -training >> step=5933100, episode=989 reward=0.7726415 (533.61 it/sec) -training >> step=5933200, episode=989 reward=0.7710013 (490.31 it/sec) -training >> step=5933300, episode=990 reward=0.789961 (125.02 it/sec) -training >> step=5933400, episode=990 reward=0.7864196 (503.85 it/sec) -training >> step=5933500, episode=990 reward=0.7819256 (490.72 it/sec) -training >> step=5933600, episode=990 reward=0.7876475 (516.93 it/sec) -training >> step=5933700, episode=990 reward=0.7696614 (505.99 it/sec) -training >> step=5933800, episode=990 reward=0.7822781 (522.37 it/sec) -training >> step=5933900, episode=990 reward=0.7875754 (542.98 it/sec) -training >> step=5934000, episode=990 reward=0.7759734 (486.16 it/sec) -training >> step=5934100, episode=990 reward=0.7726048 (501.26 it/sec) -training >> step=5934200, episode=990 reward=0.7719706 (515.47 it/sec) -training >> step=5934300, episode=990 reward=0.8021373 (519.36 it/sec) -training >> step=5934400, episode=990 reward=0.7889263 (516.57 it/sec) -training >> step=5934500, episode=990 reward=0.7805604 (459.75 it/sec) -training >> step=5934600, episode=990 reward=0.7813197 (552.80 it/sec) -training >> step=5934700, episode=990 reward=0.7774068 (467.94 it/sec) -training >> step=5934800, episode=990 reward=0.7688547 (435.14 it/sec) -training >> step=5934900, episode=990 reward=0.8007544 (493.88 it/sec) -training >> step=5935000, episode=990 reward=0.7849271 (518.15 it/sec) -training >> step=5935100, episode=990 reward=0.7751403 (513.53 it/sec) -training >> step=5935200, episode=990 reward=0.7822819 (481.32 it/sec) -training >> step=5935300, episode=990 reward=0.7894578 (558.86 it/sec) -training >> step=5935400, episode=990 reward=0.7921911 (491.34 it/sec) -training >> step=5935500, episode=990 reward=0.7690915 (505.04 it/sec) -training >> step=5935600, episode=990 reward=0.789979 (495.02 it/sec) -training >> step=5935700, episode=990 reward=0.8067428 (511.32 it/sec) -training >> step=5935800, episode=990 reward=0.8003454 (462.43 it/sec) -training >> step=5935900, episode=990 reward=0.775103 (488.11 it/sec) -training >> step=5936000, episode=990 reward=0.7897496 (432.08 it/sec) -training >> step=5936100, episode=990 reward=0.7749361 (544.59 it/sec) -training >> step=5936200, episode=990 reward=0.7782436 (469.51 it/sec) -training >> step=5936300, episode=990 reward=0.7658107 (495.69 it/sec) -training >> step=5936400, episode=990 reward=0.7644942 (507.09 it/sec) -training >> step=5936500, episode=990 reward=0.8032161 (507.37 it/sec) -training >> step=5936600, episode=990 reward=0.7769452 (459.42 it/sec) -training >> step=5936700, episode=990 reward=0.7523843 (440.89 it/sec) -training >> step=5936800, episode=990 reward=0.7882151 (479.44 it/sec) -training >> step=5936900, episode=990 reward=0.7820355 (504.34 it/sec) -training >> step=5937000, episode=990 reward=0.7830847 (480.66 it/sec) -training >> step=5937100, episode=990 reward=0.792309 (505.99 it/sec) -training >> step=5937200, episode=990 reward=0.7786157 (497.62 it/sec) -training >> step=5937300, episode=990 reward=0.7899753 (460.75 it/sec) -training >> step=5937400, episode=990 reward=0.7917098 (488.99 it/sec) -training >> step=5937500, episode=990 reward=0.7861766 (548.28 it/sec) -training >> step=5937600, episode=990 reward=0.7932135 (481.14 it/sec) -training >> step=5937700, episode=990 reward=0.7969061 (494.60 it/sec) -training >> step=5937800, episode=990 reward=0.7870067 (497.84 it/sec) -training >> step=5937900, episode=990 reward=0.7771377 (551.95 it/sec) -training >> step=5938000, episode=990 reward=0.7909108 (488.28 it/sec) -training >> step=5938100, episode=990 reward=0.7581171 (487.86 it/sec) -training >> step=5938200, episode=990 reward=0.7561482 (481.70 it/sec) -training >> step=5938300, episode=990 reward=0.761358 (501.70 it/sec) -training >> step=5938400, episode=990 reward=0.7768876 (513.49 it/sec) -training >> step=5938500, episode=990 reward=0.7567308 (536.01 it/sec) -training >> step=5938600, episode=990 reward=0.751085 (493.07 it/sec) -training >> step=5938700, episode=990 reward=0.7935715 (491.72 it/sec) -training >> step=5938800, episode=990 reward=0.7645414 (431.21 it/sec) -training >> step=5938900, episode=990 reward=0.763656 (523.33 it/sec) -training >> step=5939000, episode=990 reward=0.7529119 (517.74 it/sec) -training >> step=5939100, episode=990 reward=0.765 (485.76 it/sec) -training >> step=5939200, episode=990 reward=0.7691052 (471.85 it/sec) -training >> step=5939300, episode=991 reward=0.765521 (69.16 it/sec) -training >> step=5939400, episode=991 reward=0.7703828 (496.51 it/sec) -training >> step=5939500, episode=991 reward=0.78591 (504.60 it/sec) -training >> step=5939600, episode=991 reward=0.7862313 (478.09 it/sec) -training >> step=5939700, episode=991 reward=0.7951937 (495.17 it/sec) -training >> step=5939800, episode=991 reward=0.7706467 (496.84 it/sec) -training >> step=5939900, episode=991 reward=0.7882175 (521.73 it/sec) -training >> step=5940000, episode=991 reward=0.7714677 (373.86 it/sec) -training >> step=5940100, episode=991 reward=0.8058111 (441.87 it/sec) -training >> step=5940200, episode=991 reward=0.7968835 (490.47 it/sec) -training >> step=5940300, episode=991 reward=0.7802297 (459.96 it/sec) -training >> step=5940400, episode=991 reward=0.8073729 (492.97 it/sec) -training >> step=5940500, episode=991 reward=0.7867626 (519.02 it/sec) -training >> step=5940600, episode=991 reward=0.7747729 (477.20 it/sec) -training >> step=5940700, episode=991 reward=0.7987218 (523.34 it/sec) -training >> step=5940800, episode=991 reward=0.7933683 (535.57 it/sec) -training >> step=5940900, episode=991 reward=0.7654564 (547.36 it/sec) -training >> step=5941000, episode=991 reward=0.7881362 (452.28 it/sec) -training >> step=5941100, episode=991 reward=0.7668188 (497.00 it/sec) -training >> step=5941200, episode=991 reward=0.7876961 (523.75 it/sec) -training >> step=5941300, episode=991 reward=0.7938259 (530.65 it/sec) -training >> step=5941400, episode=991 reward=0.7801551 (527.67 it/sec) -training >> step=5941500, episode=991 reward=0.7831722 (519.77 it/sec) -training >> step=5941600, episode=991 reward=0.7769834 (540.84 it/sec) -training >> step=5941700, episode=991 reward=0.762116 (486.49 it/sec) -training >> step=5941800, episode=991 reward=0.7845451 (523.48 it/sec) -training >> step=5941900, episode=991 reward=0.7800853 (538.51 it/sec) -training >> step=5942000, episode=991 reward=0.7841339 (502.41 it/sec) -training >> step=5942100, episode=991 reward=0.7625649 (506.74 it/sec) -training >> step=5942200, episode=991 reward=0.7883931 (526.65 it/sec) -training >> step=5942300, episode=991 reward=0.7870144 (488.04 it/sec) -training >> step=5942400, episode=991 reward=0.795282 (540.53 it/sec) -training >> step=5942500, episode=991 reward=0.7689398 (530.94 it/sec) -training >> step=5942600, episode=991 reward=0.7640166 (526.89 it/sec) -training >> step=5942700, episode=991 reward=0.7745756 (488.84 it/sec) -training >> step=5942800, episode=991 reward=0.7962989 (442.57 it/sec) -training >> step=5942900, episode=991 reward=0.7994011 (546.24 it/sec) -training >> step=5943000, episode=991 reward=0.784162 (500.36 it/sec) -training >> step=5943100, episode=991 reward=0.7756464 (511.32 it/sec) -training >> step=5943200, episode=991 reward=0.787113 (509.59 it/sec) -training >> step=5943300, episode=991 reward=0.7984703 (505.96 it/sec) -training >> step=5943400, episode=991 reward=0.7673375 (512.81 it/sec) -training >> step=5943500, episode=991 reward=0.7937783 (518.21 it/sec) -training >> step=5943600, episode=991 reward=0.7743988 (515.50 it/sec) -training >> step=5943700, episode=991 reward=0.7912978 (565.04 it/sec) -training >> step=5943800, episode=991 reward=0.7782215 (484.13 it/sec) -training >> step=5943900, episode=991 reward=0.7818368 (523.70 it/sec) -training >> step=5944000, episode=991 reward=0.7564827 (543.15 it/sec) -training >> step=5944100, episode=991 reward=0.7469144 (520.93 it/sec) -training >> step=5944200, episode=991 reward=0.7612004 (505.15 it/sec) -training >> step=5944300, episode=991 reward=0.755609 (515.34 it/sec) -training >> step=5944400, episode=991 reward=0.7442165 (537.69 it/sec) -training >> step=5944500, episode=991 reward=0.7659022 (527.56 it/sec) -training >> step=5944600, episode=991 reward=0.7565224 (488.84 it/sec) -training >> step=5944700, episode=991 reward=0.7694001 (522.81 it/sec) -training >> step=5944800, episode=991 reward=0.7771369 (537.72 it/sec) -training >> step=5944900, episode=991 reward=0.7886558 (499.89 it/sec) -training >> step=5945000, episode=991 reward=0.7705887 (520.29 it/sec) -training >> step=5945100, episode=991 reward=0.7612942 (526.63 it/sec) -training >> step=5945200, episode=991 reward=0.7546691 (492.87 it/sec) -training >> step=5945300, episode=992 reward=0.7819198 (73.03 it/sec) -training >> step=5945400, episode=992 reward=0.776058 (520.19 it/sec) -training >> step=5945500, episode=992 reward=0.7638552 (512.65 it/sec) -training >> step=5945600, episode=992 reward=0.7621623 (518.93 it/sec) -training >> step=5945700, episode=992 reward=0.7865343 (518.95 it/sec) -training >> step=5945800, episode=992 reward=0.7850134 (513.48 it/sec) -training >> step=5945900, episode=992 reward=0.7785314 (477.46 it/sec) -training >> step=5946000, episode=992 reward=0.7919961 (560.08 it/sec) -training >> step=5946100, episode=992 reward=0.776916 (518.84 it/sec) -training >> step=5946200, episode=992 reward=0.780441 (516.12 it/sec) -training >> step=5946300, episode=992 reward=0.7921917 (531.31 it/sec) -training >> step=5946400, episode=992 reward=0.78988 (502.82 it/sec) -training >> step=5946500, episode=992 reward=0.8085243 (456.59 it/sec) -training >> step=5946600, episode=992 reward=0.7942489 (542.48 it/sec) -training >> step=5946700, episode=992 reward=0.7783048 (518.56 it/sec) -training >> step=5946800, episode=992 reward=0.8076249 (521.01 it/sec) -training >> step=5946900, episode=992 reward=0.7997102 (500.18 it/sec) -training >> step=5947000, episode=992 reward=0.8048959 (524.05 it/sec) -training >> step=5947100, episode=992 reward=0.795828 (500.54 it/sec) -training >> step=5947200, episode=992 reward=0.7733479 (509.16 it/sec) -training >> step=5947300, episode=992 reward=0.7781836 (519.04 it/sec) -training >> step=5947400, episode=992 reward=0.7535735 (509.51 it/sec) -training >> step=5947500, episode=992 reward=0.7895952 (502.28 it/sec) -training >> step=5947600, episode=992 reward=0.8001115 (539.13 it/sec) -training >> step=5947700, episode=992 reward=0.786896 (496.37 it/sec) -training >> step=5947800, episode=992 reward=0.7796092 (508.96 it/sec) -training >> step=5947900, episode=992 reward=0.8007334 (487.26 it/sec) -training >> step=5948000, episode=992 reward=0.7874324 (523.24 it/sec) -training >> step=5948100, episode=992 reward=0.7903954 (512.36 it/sec) -training >> step=5948200, episode=992 reward=0.7966055 (481.24 it/sec) -training >> step=5948300, episode=992 reward=0.7864466 (514.60 it/sec) -training >> step=5948400, episode=992 reward=0.7852051 (520.10 it/sec) -training >> step=5948500, episode=992 reward=0.8037181 (484.66 it/sec) -training >> step=5948600, episode=992 reward=0.7892447 (499.97 it/sec) -training >> step=5948700, episode=992 reward=0.7927345 (536.41 it/sec) -training >> step=5948800, episode=992 reward=0.7898415 (523.59 it/sec) -training >> step=5948900, episode=992 reward=0.7966447 (495.89 it/sec) -training >> step=5949000, episode=992 reward=0.7720916 (490.02 it/sec) -training >> step=5949100, episode=992 reward=0.7895803 (536.30 it/sec) -training >> step=5949200, episode=992 reward=0.8087727 (491.09 it/sec) -training >> step=5949300, episode=992 reward=0.7823821 (549.68 it/sec) -training >> step=5949400, episode=992 reward=0.7791243 (515.40 it/sec) -training >> step=5949500, episode=992 reward=0.7775736 (514.89 it/sec) -training >> step=5949600, episode=992 reward=0.7881297 (468.73 it/sec) -training >> step=5949700, episode=992 reward=0.7793592 (480.36 it/sec) -training >> step=5949800, episode=992 reward=0.7607284 (536.83 it/sec) -training >> step=5949900, episode=992 reward=0.7918029 (463.97 it/sec) -training >> step=5950000, episode=992 reward=0.773276 (482.08 it/sec) -training >> step=5950100, episode=992 reward=0.7639318 (499.97 it/sec) -training >> step=5950200, episode=992 reward=0.7870448 (504.63 it/sec) -training >> step=5950300, episode=992 reward=0.773622 (450.95 it/sec) -training >> step=5950400, episode=992 reward=0.7582002 (488.56 it/sec) -training >> step=5950500, episode=992 reward=0.7733123 (481.96 it/sec) -training >> step=5950600, episode=992 reward=0.7701475 (544.83 it/sec) -training >> step=5950700, episode=992 reward=0.770121 (464.14 it/sec) -training >> step=5950800, episode=992 reward=0.7896089 (454.01 it/sec) -training >> step=5950900, episode=992 reward=0.7583259 (502.66 it/sec) -training >> step=5951000, episode=992 reward=0.7694306 (497.64 it/sec) -training >> step=5951100, episode=992 reward=0.7550693 (494.70 it/sec) -training >> step=5951200, episode=992 reward=0.7762947 (494.97 it/sec) -training >> step=5951300, episode=993 reward=0.7653514 (72.42 it/sec) -training >> step=5951400, episode=993 reward=0.7890738 (379.77 it/sec) -training >> step=5951500, episode=993 reward=0.7611125 (418.26 it/sec) -training >> step=5951600, episode=993 reward=0.7861635 (386.77 it/sec) -training >> step=5951700, episode=993 reward=0.7768492 (370.67 it/sec) -training >> step=5951800, episode=993 reward=0.7725576 (404.33 it/sec) -training >> step=5951900, episode=993 reward=0.7900947 (410.40 it/sec) -training >> step=5952000, episode=993 reward=0.7708719 (396.91 it/sec) -training >> step=5952100, episode=993 reward=0.7646262 (435.66 it/sec) -training >> step=5952200, episode=993 reward=0.7783434 (429.14 it/sec) -training >> step=5952300, episode=993 reward=0.7681922 (467.78 it/sec) -training >> step=5952400, episode=993 reward=0.7958383 (446.28 it/sec) -training >> step=5952500, episode=993 reward=0.7443562 (513.26 it/sec) -training >> step=5952600, episode=993 reward=0.8006753 (427.65 it/sec) -training >> step=5952700, episode=993 reward=0.7770625 (502.97 it/sec) -training >> step=5952800, episode=993 reward=0.7845119 (528.43 it/sec) -training >> step=5952900, episode=993 reward=0.7947449 (541.95 it/sec) -training >> step=5953000, episode=993 reward=0.7804258 (531.08 it/sec) -training >> step=5953100, episode=993 reward=0.7885901 (520.97 it/sec) -training >> step=5953200, episode=993 reward=0.7828202 (487.07 it/sec) -training >> step=5953300, episode=993 reward=0.7853866 (478.51 it/sec) -training >> step=5953400, episode=993 reward=0.7887614 (472.14 it/sec) -training >> step=5953500, episode=993 reward=0.7884045 (473.42 it/sec) -training >> step=5953600, episode=993 reward=0.79118 (450.76 it/sec) -training >> step=5953700, episode=993 reward=0.7851016 (459.82 it/sec) -training >> step=5953800, episode=993 reward=0.8036581 (442.40 it/sec) -training >> step=5953900, episode=993 reward=0.7815886 (494.34 it/sec) -training >> step=5954000, episode=993 reward=0.8271169 (484.18 it/sec) -training >> step=5954100, episode=993 reward=0.7863364 (452.60 it/sec) -training >> step=5954200, episode=993 reward=0.7857709 (492.79 it/sec) -training >> step=5954300, episode=993 reward=0.7797879 (506.97 it/sec) -training >> step=5954400, episode=993 reward=0.7901453 (444.63 it/sec) -training >> step=5954500, episode=993 reward=0.7747848 (474.60 it/sec) -training >> step=5954600, episode=993 reward=0.7821952 (486.21 it/sec) -training >> step=5954700, episode=993 reward=0.8107524 (500.58 it/sec) -training >> step=5954800, episode=993 reward=0.7694212 (478.50 it/sec) -training >> step=5954900, episode=993 reward=0.7870392 (505.30 it/sec) -training >> step=5955000, episode=993 reward=0.7950225 (519.19 it/sec) -training >> step=5955100, episode=993 reward=0.7915273 (495.37 it/sec) -training >> step=5955200, episode=993 reward=0.7961452 (512.78 it/sec) -training >> step=5955300, episode=993 reward=0.7819496 (464.88 it/sec) -training >> step=5955400, episode=993 reward=0.7908344 (441.74 it/sec) -training >> step=5955500, episode=993 reward=0.775884 (452.87 it/sec) -training >> step=5955600, episode=993 reward=0.7881913 (512.62 it/sec) -training >> step=5955700, episode=993 reward=0.7743124 (416.29 it/sec) -training >> step=5955800, episode=993 reward=0.7932916 (510.73 it/sec) -training >> step=5955900, episode=993 reward=0.7767568 (509.16 it/sec) -training >> step=5956000, episode=993 reward=0.7829448 (508.39 it/sec) -training >> step=5956100, episode=993 reward=0.7532827 (489.67 it/sec) -training >> step=5956200, episode=993 reward=0.7679659 (481.54 it/sec) -training >> step=5956300, episode=993 reward=0.7583531 (431.56 it/sec) -training >> step=5956400, episode=993 reward=0.7552983 (493.50 it/sec) -training >> step=5956500, episode=993 reward=0.7467397 (520.98 it/sec) -training >> step=5956600, episode=993 reward=0.7619437 (514.71 it/sec) -training >> step=5956700, episode=993 reward=0.758373 (540.92 it/sec) -training >> step=5956800, episode=993 reward=0.7936026 (451.44 it/sec) -training >> step=5956900, episode=993 reward=0.751681 (493.22 it/sec) -training >> step=5957000, episode=993 reward=0.7708679 (509.03 it/sec) -training >> step=5957100, episode=993 reward=0.7455409 (527.58 it/sec) -training >> step=5957200, episode=993 reward=0.7765644 (499.47 it/sec) -training >> step=5957300, episode=994 reward=0.7808146 (107.13 it/sec) -training >> step=5957400, episode=994 reward=0.7793813 (507.66 it/sec) -training >> step=5957500, episode=994 reward=0.7743728 (509.44 it/sec) -training >> step=5957600, episode=994 reward=0.8016652 (528.26 it/sec) -training >> step=5957700, episode=994 reward=0.7858497 (522.33 it/sec) -training >> step=5957800, episode=994 reward=0.7580817 (516.85 it/sec) -training >> step=5957900, episode=994 reward=0.7793304 (504.61 it/sec) -training >> step=5958000, episode=994 reward=0.7646189 (503.82 it/sec) -training >> step=5958100, episode=994 reward=0.790898 (542.56 it/sec) -training >> step=5958200, episode=994 reward=0.798739 (527.23 it/sec) -training >> step=5958300, episode=994 reward=0.7861676 (483.00 it/sec) -training >> step=5958400, episode=994 reward=0.7730273 (526.46 it/sec) -training >> step=5958500, episode=994 reward=0.7920051 (515.94 it/sec) -training >> step=5958600, episode=994 reward=0.7849532 (498.74 it/sec) -training >> step=5958700, episode=994 reward=0.781655 (493.53 it/sec) -training >> step=5958800, episode=994 reward=0.7759204 (511.60 it/sec) -training >> step=5958900, episode=994 reward=0.8117108 (521.89 it/sec) -training >> step=5959000, episode=994 reward=0.785504 (504.64 it/sec) -training >> step=5959100, episode=994 reward=0.7901914 (418.04 it/sec) -training >> step=5959200, episode=994 reward=0.7741868 (548.37 it/sec) -training >> step=5959300, episode=994 reward=0.8151523 (492.91 it/sec) -training >> step=5959400, episode=994 reward=0.779552 (499.56 it/sec) -training >> step=5959500, episode=994 reward=0.7922108 (500.08 it/sec) -training >> step=5959600, episode=994 reward=0.7723674 (534.79 it/sec) -training >> step=5959700, episode=994 reward=0.7834915 (503.63 it/sec) -training >> step=5959800, episode=994 reward=0.7813911 (494.84 it/sec) -training >> step=5959900, episode=994 reward=0.7998388 (456.64 it/sec) -training >> step=5960000, episode=994 reward=0.7777008 (500.93 it/sec) -training >> step=5960100, episode=994 reward=0.7706257 (505.02 it/sec) -training >> step=5960200, episode=994 reward=0.778485 (515.68 it/sec) -training >> step=5960300, episode=994 reward=0.7936027 (514.06 it/sec) -training >> step=5960400, episode=994 reward=0.7929903 (491.63 it/sec) -training >> step=5960500, episode=994 reward=0.7991327 (509.29 it/sec) -training >> step=5960600, episode=994 reward=0.7966686 (533.91 it/sec) -training >> step=5960700, episode=994 reward=0.7865611 (530.46 it/sec) -training >> step=5960800, episode=994 reward=0.791323 (491.03 it/sec) -training >> step=5960900, episode=994 reward=0.7585193 (484.13 it/sec) -training >> step=5961000, episode=994 reward=0.7793092 (475.83 it/sec) -training >> step=5961100, episode=994 reward=0.773101 (466.95 it/sec) -training >> step=5961200, episode=994 reward=0.7843874 (499.72 it/sec) -training >> step=5961300, episode=994 reward=0.7771097 (501.49 it/sec) -training >> step=5961400, episode=994 reward=0.8007919 (558.48 it/sec) -training >> step=5961500, episode=994 reward=0.8001361 (477.53 it/sec) -training >> step=5961600, episode=994 reward=0.8097779 (481.05 it/sec) -training >> step=5961700, episode=994 reward=0.7897121 (467.78 it/sec) -training >> step=5961800, episode=994 reward=0.7676535 (521.09 it/sec) -training >> step=5961900, episode=994 reward=0.7774668 (500.86 it/sec) -training >> step=5962000, episode=994 reward=0.759082 (488.46 it/sec) -training >> step=5962100, episode=994 reward=0.7650512 (504.52 it/sec) -training >> step=5962200, episode=994 reward=0.764738 (484.59 it/sec) -training >> step=5962300, episode=994 reward=0.7663106 (457.20 it/sec) -training >> step=5962400, episode=994 reward=0.7670776 (465.76 it/sec) -training >> step=5962500, episode=994 reward=0.7763892 (481.10 it/sec) -training >> step=5962600, episode=994 reward=0.7550375 (521.31 it/sec) -training >> step=5962700, episode=994 reward=0.7463213 (511.15 it/sec) -training >> step=5962800, episode=994 reward=0.777965 (513.18 it/sec) -training >> step=5962900, episode=994 reward=0.7563572 (471.67 it/sec) -training >> step=5963000, episode=994 reward=0.7711925 (504.81 it/sec) -training >> step=5963100, episode=994 reward=0.7819797 (502.08 it/sec) -training >> step=5963200, episode=994 reward=0.7654646 (508.74 it/sec) -training >> step=5963300, episode=995 reward=0.7940233 (72.83 it/sec) -training >> step=5963400, episode=995 reward=0.7647516 (514.87 it/sec) -training >> step=5963500, episode=995 reward=0.7765025 (492.55 it/sec) -training >> step=5963600, episode=995 reward=0.7949554 (510.25 it/sec) -training >> step=5963700, episode=995 reward=0.7922221 (479.79 it/sec) -training >> step=5963800, episode=995 reward=0.7858104 (479.45 it/sec) -training >> step=5963900, episode=995 reward=0.7912161 (451.74 it/sec) -training >> step=5964000, episode=995 reward=0.7716191 (490.59 it/sec) -training >> step=5964100, episode=995 reward=0.7856345 (505.90 it/sec) -training >> step=5964200, episode=995 reward=0.788514 (499.64 it/sec) -training >> step=5964300, episode=995 reward=0.7976846 (461.84 it/sec) -training >> step=5964400, episode=995 reward=0.7861515 (525.77 it/sec) -training >> step=5964500, episode=995 reward=0.7826625 (510.36 it/sec) -training >> step=5964600, episode=995 reward=0.7769632 (495.06 it/sec) -training >> step=5964700, episode=995 reward=0.7772327 (475.89 it/sec) -training >> step=5964800, episode=995 reward=0.8047043 (515.10 it/sec) -training >> step=5964900, episode=995 reward=0.7845894 (472.44 it/sec) -training >> step=5965000, episode=995 reward=0.7787839 (497.50 it/sec) -training >> step=5965100, episode=995 reward=0.8033301 (512.08 it/sec) -training >> step=5965200, episode=995 reward=0.7887345 (438.93 it/sec) -training >> step=5965300, episode=995 reward=0.7586178 (463.71 it/sec) -training >> step=5965400, episode=995 reward=0.777746 (460.69 it/sec) -training >> step=5965500, episode=995 reward=0.790621 (515.57 it/sec) -training >> step=5965600, episode=995 reward=0.7944804 (481.81 it/sec) -training >> step=5965700, episode=995 reward=0.7823699 (519.30 it/sec) -training >> step=5965800, episode=995 reward=0.7764249 (486.34 it/sec) -training >> step=5965900, episode=995 reward=0.7858042 (415.15 it/sec) -training >> step=5966000, episode=995 reward=0.7942209 (460.01 it/sec) -training >> step=5966100, episode=995 reward=0.7962941 (497.30 it/sec) -training >> step=5966200, episode=995 reward=0.786828 (488.43 it/sec) -training >> step=5966300, episode=995 reward=0.7874933 (461.55 it/sec) -training >> step=5966400, episode=995 reward=0.8061731 (427.82 it/sec) -training >> step=5966500, episode=995 reward=0.7788297 (467.02 it/sec) -training >> step=5966600, episode=995 reward=0.7671827 (426.55 it/sec) -training >> step=5966700, episode=995 reward=0.7863873 (513.03 it/sec) -training >> step=5966800, episode=995 reward=0.7946683 (513.05 it/sec) -training >> step=5966900, episode=995 reward=0.7817346 (479.90 it/sec) -training >> step=5967000, episode=995 reward=0.7689506 (457.99 it/sec) -training >> step=5967100, episode=995 reward=0.7838757 (489.50 it/sec) -training >> step=5967200, episode=995 reward=0.7676573 (475.14 it/sec) -training >> step=5967300, episode=995 reward=0.7761945 (501.24 it/sec) -training >> step=5967400, episode=995 reward=0.7779042 (433.97 it/sec) -training >> step=5967500, episode=995 reward=0.7998027 (502.53 it/sec) -training >> step=5967600, episode=995 reward=0.769005 (492.84 it/sec) -training >> step=5967700, episode=995 reward=0.7604316 (483.19 it/sec) -training >> step=5967800, episode=995 reward=0.7745143 (480.49 it/sec) -training >> step=5967900, episode=995 reward=0.7701138 (448.31 it/sec) -training >> step=5968000, episode=995 reward=0.7602227 (450.80 it/sec) -training >> step=5968100, episode=995 reward=0.7687566 (472.03 it/sec) -training >> step=5968200, episode=995 reward=0.7623044 (420.94 it/sec) -training >> step=5968300, episode=995 reward=0.7702227 (461.80 it/sec) -training >> step=5968400, episode=995 reward=0.7349303 (495.84 it/sec) -training >> step=5968500, episode=995 reward=0.7405533 (472.60 it/sec) -training >> step=5968600, episode=995 reward=0.7484166 (490.39 it/sec) -training >> step=5968700, episode=995 reward=0.7560348 (493.16 it/sec) -training >> step=5968800, episode=995 reward=0.7473379 (489.96 it/sec) -training >> step=5968900, episode=995 reward=0.7553326 (492.48 it/sec) -training >> step=5969000, episode=995 reward=0.7547488 (490.13 it/sec) -training >> step=5969100, episode=995 reward=0.7675382 (511.02 it/sec) -training >> step=5969200, episode=995 reward=0.7410654 (486.56 it/sec) -training >> step=5969300, episode=996 reward=0.7659841 (65.00 it/sec) -training >> step=5969400, episode=996 reward=0.7649512 (344.01 it/sec) -training >> step=5969500, episode=996 reward=0.7633569 (466.11 it/sec) -training >> step=5969600, episode=996 reward=0.7893212 (486.57 it/sec) -training >> step=5969700, episode=996 reward=0.7803426 (453.41 it/sec) -training >> step=5969800, episode=996 reward=0.7758877 (496.62 it/sec) -training >> step=5969900, episode=996 reward=0.7734308 (489.36 it/sec) -training >> step=5970000, episode=996 reward=0.7929241 (525.17 it/sec) -training >> step=5970100, episode=996 reward=0.7815561 (511.02 it/sec) -training >> step=5970200, episode=996 reward=0.7688866 (430.76 it/sec) -training >> step=5970300, episode=996 reward=0.7877498 (464.27 it/sec) -training >> step=5970400, episode=996 reward=0.7871182 (488.18 it/sec) -training >> step=5970500, episode=996 reward=0.7856132 (464.56 it/sec) -training >> step=5970600, episode=996 reward=0.7836868 (531.21 it/sec) -training >> step=5970700, episode=996 reward=0.787545 (476.60 it/sec) -training >> step=5970800, episode=996 reward=0.7918677 (501.76 it/sec) -training >> step=5970900, episode=996 reward=0.7747872 (512.64 it/sec) -training >> step=5971000, episode=996 reward=0.7590097 (507.23 it/sec) -training >> step=5971100, episode=996 reward=0.78494 (506.49 it/sec) -training >> step=5971200, episode=996 reward=0.7923816 (493.63 it/sec) -training >> step=5971300, episode=996 reward=0.7844033 (516.97 it/sec) -training >> step=5971400, episode=996 reward=0.8016769 (520.54 it/sec) -training >> step=5971500, episode=996 reward=0.7941216 (475.78 it/sec) -training >> step=5971600, episode=996 reward=0.8024205 (477.98 it/sec) -training >> step=5971700, episode=996 reward=0.7851075 (490.88 it/sec) -training >> step=5971800, episode=996 reward=0.7898059 (499.08 it/sec) -training >> step=5971900, episode=996 reward=0.7895272 (459.63 it/sec) -training >> step=5972000, episode=996 reward=0.7999215 (510.35 it/sec) -training >> step=5972100, episode=996 reward=0.7877181 (463.14 it/sec) -training >> step=5972200, episode=996 reward=0.7760544 (473.23 it/sec) -training >> step=5972300, episode=996 reward=0.7787376 (493.09 it/sec) -training >> step=5972400, episode=996 reward=0.7915238 (466.88 it/sec) -training >> step=5972500, episode=996 reward=0.7933612 (495.54 it/sec) -training >> step=5972600, episode=996 reward=0.7881656 (493.54 it/sec) -training >> step=5972700, episode=996 reward=0.7686074 (482.76 it/sec) -training >> step=5972800, episode=996 reward=0.7753247 (468.56 it/sec) -training >> step=5972900, episode=996 reward=0.7814884 (484.28 it/sec) -training >> step=5973000, episode=996 reward=0.7752456 (517.88 it/sec) -training >> step=5973100, episode=996 reward=0.7844388 (491.92 it/sec) -training >> step=5973200, episode=996 reward=0.7690908 (467.38 it/sec) -training >> step=5973300, episode=996 reward=0.8130657 (482.05 it/sec) -training >> step=5973400, episode=996 reward=0.7493658 (448.73 it/sec) -training >> step=5973500, episode=996 reward=0.7939117 (478.06 it/sec) -training >> step=5973600, episode=996 reward=0.791234 (515.07 it/sec) -training >> step=5973700, episode=996 reward=0.7805727 (487.66 it/sec) -training >> step=5973800, episode=996 reward=0.7687698 (491.04 it/sec) -training >> step=5973900, episode=996 reward=0.7943854 (473.48 it/sec) -training >> step=5974000, episode=996 reward=0.7751099 (460.94 it/sec) -training >> step=5974100, episode=996 reward=0.7795467 (508.99 it/sec) -training >> step=5974200, episode=996 reward=0.775337 (474.29 it/sec) -training >> step=5974300, episode=996 reward=0.763225 (478.76 it/sec) -training >> step=5974400, episode=996 reward=0.7940875 (499.20 it/sec) -training >> step=5974500, episode=996 reward=0.7932168 (522.39 it/sec) -training >> step=5974600, episode=996 reward=0.7570578 (498.32 it/sec) -training >> step=5974700, episode=996 reward=0.7851635 (439.79 it/sec) -training >> step=5974800, episode=996 reward=0.7735116 (453.90 it/sec) -training >> step=5974900, episode=996 reward=0.7713187 (489.13 it/sec) -training >> step=5975000, episode=996 reward=0.7866139 (503.57 it/sec) -training >> step=5975100, episode=996 reward=0.7716156 (436.80 it/sec) -training >> step=5975200, episode=996 reward=0.7561362 (382.87 it/sec) -training >> step=5975300, episode=997 reward=0.7783466 (48.92 it/sec) -training >> step=5975400, episode=997 reward=0.7745756 (453.84 it/sec) -training >> step=5975500, episode=997 reward=0.7853292 (498.33 it/sec) -training >> step=5975600, episode=997 reward=0.7799677 (430.78 it/sec) -training >> step=5975700, episode=997 reward=0.7918611 (374.19 it/sec) -training >> step=5975800, episode=997 reward=0.7875677 (483.24 it/sec) -training >> step=5975900, episode=997 reward=0.7743024 (503.73 it/sec) -training >> step=5976000, episode=997 reward=0.7771034 (467.46 it/sec) -training >> step=5976100, episode=997 reward=0.7705079 (524.31 it/sec) -training >> step=5976200, episode=997 reward=0.7600534 (515.57 it/sec) -training >> step=5976300, episode=997 reward=0.7895175 (482.72 it/sec) -training >> step=5976400, episode=997 reward=0.7793173 (458.25 it/sec) -training >> step=5976500, episode=997 reward=0.7761156 (472.41 it/sec) -training >> step=5976600, episode=997 reward=0.7886656 (482.03 it/sec) -training >> step=5976700, episode=997 reward=0.797712 (526.06 it/sec) -training >> step=5976800, episode=997 reward=0.7725618 (449.00 it/sec) -training >> step=5976900, episode=997 reward=0.7694462 (445.85 it/sec) -training >> step=5977000, episode=997 reward=0.7856106 (507.37 it/sec) -training >> step=5977100, episode=997 reward=0.8081973 (501.19 it/sec) -training >> step=5977200, episode=997 reward=0.7833788 (521.57 it/sec) -training >> step=5977300, episode=997 reward=0.7700065 (526.57 it/sec) -training >> step=5977400, episode=997 reward=0.7942481 (508.44 it/sec) -training >> step=5977500, episode=997 reward=0.7826787 (508.51 it/sec) -training >> step=5977600, episode=997 reward=0.78376 (480.28 it/sec) -training >> step=5977700, episode=997 reward=0.775731 (520.47 it/sec) -training >> step=5977800, episode=997 reward=0.7783897 (530.95 it/sec) -training >> step=5977900, episode=997 reward=0.7912523 (476.33 it/sec) -training >> step=5978000, episode=997 reward=0.7987332 (485.98 it/sec) -training >> step=5978100, episode=997 reward=0.7853634 (461.51 it/sec) -training >> step=5978200, episode=997 reward=0.7993479 (453.64 it/sec) -training >> step=5978300, episode=997 reward=0.791244 (511.57 it/sec) -training >> step=5978400, episode=997 reward=0.7670901 (448.35 it/sec) -training >> step=5978500, episode=997 reward=0.769959 (514.82 it/sec) -training >> step=5978600, episode=997 reward=0.7836776 (510.20 it/sec) -training >> step=5978700, episode=997 reward=0.7574418 (500.03 it/sec) -training >> step=5978800, episode=997 reward=0.7935838 (519.67 it/sec) -training >> step=5978900, episode=997 reward=0.7813231 (485.33 it/sec) -training >> step=5979000, episode=997 reward=0.7862189 (498.64 it/sec) -training >> step=5979100, episode=997 reward=0.7729344 (514.39 it/sec) -training >> step=5979200, episode=997 reward=0.7979018 (520.21 it/sec) -training >> step=5979300, episode=997 reward=0.7902235 (464.59 it/sec) -training >> step=5979400, episode=997 reward=0.7862132 (424.81 it/sec) -training >> step=5979500, episode=997 reward=0.7752513 (491.30 it/sec) -training >> step=5979600, episode=997 reward=0.7937846 (510.82 it/sec) -training >> step=5979700, episode=997 reward=0.7820603 (524.35 it/sec) -training >> step=5979800, episode=997 reward=0.7771872 (543.82 it/sec) -training >> step=5979900, episode=997 reward=0.781798 (508.36 it/sec) -training >> step=5980000, episode=997 reward=0.7890978 (454.44 it/sec) -training >> step=5980100, episode=997 reward=0.7707994 (469.81 it/sec) -training >> step=5980200, episode=997 reward=0.7931943 (511.49 it/sec) -training >> step=5980300, episode=997 reward=0.7901083 (504.07 it/sec) -training >> step=5980400, episode=997 reward=0.7873714 (504.73 it/sec) -training >> step=5980500, episode=997 reward=0.7790757 (482.45 it/sec) -training >> step=5980600, episode=997 reward=0.7679585 (519.75 it/sec) -training >> step=5980700, episode=997 reward=0.771659 (524.49 it/sec) -training >> step=5980800, episode=997 reward=0.7812585 (537.02 it/sec) -training >> step=5980900, episode=997 reward=0.7760289 (485.30 it/sec) -training >> step=5981000, episode=997 reward=0.7542397 (486.99 it/sec) -training >> step=5981100, episode=997 reward=0.7718214 (493.19 it/sec) -training >> step=5981200, episode=997 reward=0.7642815 (481.70 it/sec) -training >> step=5981300, episode=998 reward=0.751754 (45.17 it/sec) -training >> step=5981400, episode=998 reward=0.7801992 (419.02 it/sec) -training >> step=5981500, episode=998 reward=0.7783563 (497.59 it/sec) -training >> step=5981600, episode=998 reward=0.7947097 (542.66 it/sec) -training >> step=5981700, episode=998 reward=0.783388 (487.46 it/sec) -training >> step=5981800, episode=998 reward=0.7884827 (516.96 it/sec) -training >> step=5981900, episode=998 reward=0.7888699 (516.27 it/sec) -training >> step=5982000, episode=998 reward=0.790381 (364.45 it/sec) -training >> step=5982100, episode=998 reward=0.7641762 (531.24 it/sec) -training >> step=5982200, episode=998 reward=0.7899942 (485.27 it/sec) -training >> step=5982300, episode=998 reward=0.7697847 (509.17 it/sec) -training >> step=5982400, episode=998 reward=0.7595431 (524.23 it/sec) -training >> step=5982500, episode=998 reward=0.7764067 (491.75 it/sec) -training >> step=5982600, episode=998 reward=0.7899939 (518.35 it/sec) -training >> step=5982700, episode=998 reward=0.7613572 (527.94 it/sec) -training >> step=5982800, episode=998 reward=0.7677241 (525.15 it/sec) -training >> step=5982900, episode=998 reward=0.7989749 (506.29 it/sec) -training >> step=5983000, episode=998 reward=0.7920013 (480.86 it/sec) -training >> step=5983100, episode=998 reward=0.7760564 (551.31 it/sec) -training >> step=5983200, episode=998 reward=0.7717023 (481.68 it/sec) -training >> step=5983300, episode=998 reward=0.7776831 (526.40 it/sec) -training >> step=5983400, episode=998 reward=0.7954809 (492.56 it/sec) -training >> step=5983500, episode=998 reward=0.7828308 (462.02 it/sec) -training >> step=5983600, episode=998 reward=0.7994251 (466.20 it/sec) -training >> step=5983700, episode=998 reward=0.7923699 (511.29 it/sec) -training >> step=5983800, episode=998 reward=0.7938061 (522.76 it/sec) -training >> step=5983900, episode=998 reward=0.7988272 (563.14 it/sec) -training >> step=5984000, episode=998 reward=0.77765 (482.84 it/sec) -training >> step=5984100, episode=998 reward=0.7984688 (507.69 it/sec) -training >> step=5984200, episode=998 reward=0.7857546 (472.52 it/sec) -training >> step=5984300, episode=998 reward=0.7685201 (491.75 it/sec) -training >> step=5984400, episode=998 reward=0.7773114 (495.24 it/sec) -training >> step=5984500, episode=998 reward=0.7916625 (500.20 it/sec) -training >> step=5984600, episode=998 reward=0.7922635 (516.18 it/sec) -training >> step=5984700, episode=998 reward=0.8049131 (514.38 it/sec) -training >> step=5984800, episode=998 reward=0.7776325 (487.56 it/sec) -training >> step=5984900, episode=998 reward=0.7954976 (492.10 it/sec) -training >> step=5985000, episode=998 reward=0.7807063 (502.30 it/sec) -training >> step=5985100, episode=998 reward=0.7778433 (477.63 it/sec) -training >> step=5985200, episode=998 reward=0.7772413 (497.30 it/sec) -training >> step=5985300, episode=998 reward=0.7790405 (538.93 it/sec) -training >> step=5985400, episode=998 reward=0.7724519 (480.96 it/sec) -training >> step=5985500, episode=998 reward=0.7755243 (498.48 it/sec) -training >> step=5985600, episode=998 reward=0.7862967 (486.62 it/sec) -training >> step=5985700, episode=998 reward=0.7653734 (515.39 it/sec) -training >> step=5985800, episode=998 reward=0.7626085 (516.48 it/sec) -training >> step=5985900, episode=998 reward=0.7667991 (531.58 it/sec) -training >> step=5986000, episode=998 reward=0.7833526 (488.50 it/sec) -training >> step=5986100, episode=998 reward=0.7723351 (502.63 it/sec) -training >> step=5986200, episode=998 reward=0.7672575 (542.80 it/sec) -training >> step=5986300, episode=998 reward=0.7540081 (485.44 it/sec) -training >> step=5986400, episode=998 reward=0.7562171 (548.72 it/sec) -training >> step=5986500, episode=998 reward=0.7630497 (496.06 it/sec) -training >> step=5986600, episode=998 reward=0.7654953 (446.96 it/sec) -training >> step=5986700, episode=998 reward=0.7278987 (543.91 it/sec) -training >> step=5986800, episode=998 reward=0.7455984 (511.08 it/sec) -training >> step=5986900, episode=998 reward=0.7467307 (508.90 it/sec) -training >> step=5987000, episode=998 reward=0.7607651 (505.78 it/sec) -training >> step=5987100, episode=998 reward=0.7564772 (523.46 it/sec) -training >> step=5987200, episode=998 reward=0.7568852 (494.51 it/sec) -training >> step=5987300, episode=999 reward=0.7701443 (42.28 it/sec) -training >> step=5987400, episode=999 reward=0.7686242 (531.69 it/sec) -training >> step=5987500, episode=999 reward=0.7818295 (451.63 it/sec) -training >> step=5987600, episode=999 reward=0.7733006 (514.78 it/sec) -training >> step=5987700, episode=999 reward=0.7969693 (541.44 it/sec) -training >> step=5987800, episode=999 reward=0.7618429 (508.51 it/sec) -training >> step=5987900, episode=999 reward=0.7802671 (499.21 it/sec) -training >> step=5988000, episode=999 reward=0.7967541 (527.41 it/sec) -training >> step=5988100, episode=999 reward=0.7830396 (514.10 it/sec) -training >> step=5988200, episode=999 reward=0.7799431 (354.62 it/sec) -training >> step=5988300, episode=999 reward=0.7643933 (492.01 it/sec) -training >> step=5988400, episode=999 reward=0.7812998 (532.78 it/sec) -training >> step=5988500, episode=999 reward=0.7929466 (468.74 it/sec) -training >> step=5988600, episode=999 reward=0.7963474 (507.38 it/sec) -training >> step=5988700, episode=999 reward=0.7929517 (483.54 it/sec) -training >> step=5988800, episode=999 reward=0.7776297 (516.17 it/sec) -training >> step=5988900, episode=999 reward=0.777683 (522.93 it/sec) -training >> step=5989000, episode=999 reward=0.7831646 (528.33 it/sec) -training >> step=5989100, episode=999 reward=0.7874604 (486.84 it/sec) -training >> step=5989200, episode=999 reward=0.7703077 (486.07 it/sec) -training >> step=5989300, episode=999 reward=0.7876563 (525.40 it/sec) -training >> step=5989400, episode=999 reward=0.7857493 (541.49 it/sec) -training >> step=5989500, episode=999 reward=0.770772 (483.29 it/sec) -training >> step=5989600, episode=999 reward=0.7706391 (465.87 it/sec) -training >> step=5989700, episode=999 reward=0.7797906 (527.37 it/sec) -training >> step=5989800, episode=999 reward=0.7752426 (529.12 it/sec) -training >> step=5989900, episode=999 reward=0.7681106 (504.07 it/sec) -training >> step=5990000, episode=999 reward=0.7763327 (488.67 it/sec) -training >> step=5990100, episode=999 reward=0.8002585 (524.23 it/sec) -training >> step=5990200, episode=999 reward=0.7798195 (469.00 it/sec) -training >> step=5990300, episode=999 reward=0.7799364 (523.45 it/sec) -training >> step=5990400, episode=999 reward=0.7904014 (512.28 it/sec) -training >> step=5990500, episode=999 reward=0.8004831 (560.86 it/sec) -training >> step=5990600, episode=999 reward=0.814657 (477.57 it/sec) -training >> step=5990700, episode=999 reward=0.790062 (506.02 it/sec) -training >> step=5990800, episode=999 reward=0.7731664 (480.46 it/sec) -training >> step=5990900, episode=999 reward=0.8067835 (458.86 it/sec) -training >> step=5991000, episode=999 reward=0.7772645 (475.40 it/sec) -training >> step=5991100, episode=999 reward=0.783884 (508.35 it/sec) -training >> step=5991200, episode=999 reward=0.7876934 (530.73 it/sec) -training >> step=5991300, episode=999 reward=0.7928598 (489.35 it/sec) -training >> step=5991400, episode=999 reward=0.7824342 (530.99 it/sec) -training >> step=5991500, episode=999 reward=0.7715626 (529.67 it/sec) -training >> step=5991600, episode=999 reward=0.7668345 (499.32 it/sec) -training >> step=5991700, episode=999 reward=0.7691499 (515.45 it/sec) -training >> step=5991800, episode=999 reward=0.7728455 (534.87 it/sec) -training >> step=5991900, episode=999 reward=0.7765523 (509.08 it/sec) -training >> step=5992000, episode=999 reward=0.7770304 (541.63 it/sec) -training >> step=5992100, episode=999 reward=0.7731116 (483.83 it/sec) -training >> step=5992200, episode=999 reward=0.7709354 (519.37 it/sec) -training >> step=5992300, episode=999 reward=0.7520084 (533.99 it/sec) -training >> step=5992400, episode=999 reward=0.7544667 (516.08 it/sec) -training >> step=5992500, episode=999 reward=0.7747641 (511.24 it/sec) -training >> step=5992600, episode=999 reward=0.7608286 (521.92 it/sec) -training >> step=5992700, episode=999 reward=0.7463211 (496.53 it/sec) -training >> step=5992800, episode=999 reward=0.783096 (503.39 it/sec) -training >> step=5992900, episode=999 reward=0.7398215 (528.51 it/sec) -training >> step=5993000, episode=999 reward=0.7504975 (540.36 it/sec) -training >> step=5993100, episode=999 reward=0.7562343 (447.42 it/sec) -training >> step=5993200, episode=999 reward=0.7531403 (479.99 it/sec) -training >> step=5993300, episode=1000 reward=0.7707359 (46.39 it/sec) -training >> step=5993400, episode=1000 reward=0.7618665 (495.07 it/sec) -training >> step=5993500, episode=1000 reward=0.7897587 (514.98 it/sec) -training >> step=5993600, episode=1000 reward=0.7821667 (536.96 it/sec) -training >> step=5993700, episode=1000 reward=0.7736214 (458.13 it/sec) -training >> step=5993800, episode=1000 reward=0.7775703 (497.45 it/sec) -training >> step=5993900, episode=1000 reward=0.8057134 (522.49 it/sec) -training >> step=5994000, episode=1000 reward=0.7702853 (521.69 it/sec) -training >> step=5994100, episode=1000 reward=0.7807217 (536.85 it/sec) -training >> step=5994200, episode=1000 reward=0.7636205 (494.08 it/sec) -training >> step=5994300, episode=1000 reward=0.7992439 (493.07 it/sec) -training >> step=5994400, episode=1000 reward=0.7855043 (503.30 it/sec) -training >> step=5994500, episode=1000 reward=0.7733192 (363.63 it/sec) -training >> step=5994600, episode=1000 reward=0.7891784 (492.49 it/sec) -training >> step=5994700, episode=1000 reward=0.775903 (492.67 it/sec) -training >> step=5994800, episode=1000 reward=0.7940659 (532.36 it/sec) -training >> step=5994900, episode=1000 reward=0.7901886 (501.56 it/sec) -training >> step=5995000, episode=1000 reward=0.7871369 (498.70 it/sec) -training >> step=5995100, episode=1000 reward=0.7858901 (524.64 it/sec) -training >> step=5995200, episode=1000 reward=0.7778934 (527.19 it/sec) -training >> step=5995300, episode=1000 reward=0.7899741 (508.09 it/sec) -training >> step=5995400, episode=1000 reward=0.7723642 (499.59 it/sec) -training >> step=5995500, episode=1000 reward=0.8128003 (534.01 it/sec) -training >> step=5995600, episode=1000 reward=0.7895671 (506.00 it/sec) -training >> step=5995700, episode=1000 reward=0.7945263 (498.53 it/sec) -training >> step=5995800, episode=1000 reward=0.7919039 (514.51 it/sec) -training >> step=5995900, episode=1000 reward=0.7743494 (538.48 it/sec) -training >> step=5996000, episode=1000 reward=0.7870454 (486.22 it/sec) -training >> step=5996100, episode=1000 reward=0.7937209 (512.95 it/sec) -training >> step=5996200, episode=1000 reward=0.7985976 (522.27 it/sec) -training >> step=5996300, episode=1000 reward=0.7877573 (469.44 it/sec) -training >> step=5996400, episode=1000 reward=0.7840285 (484.99 it/sec) -training >> step=5996500, episode=1000 reward=0.7861692 (482.95 it/sec) -training >> step=5996600, episode=1000 reward=0.7931589 (561.71 it/sec) -training >> step=5996700, episode=1000 reward=0.806864 (515.06 it/sec) -training >> step=5996800, episode=1000 reward=0.786143 (459.59 it/sec) -training >> step=5996900, episode=1000 reward=0.8037037 (535.34 it/sec) -training >> step=5997000, episode=1000 reward=0.7957497 (486.61 it/sec) -training >> step=5997100, episode=1000 reward=0.7726271 (518.28 it/sec) -training >> step=5997200, episode=1000 reward=0.78713 (449.05 it/sec) -training >> step=5997300, episode=1000 reward=0.7686632 (528.36 it/sec) -training >> step=5997400, episode=1000 reward=0.7907404 (518.58 it/sec) -training >> step=5997500, episode=1000 reward=0.7891194 (475.19 it/sec) -training >> step=5997600, episode=1000 reward=0.792762 (525.03 it/sec) -training >> step=5997700, episode=1000 reward=0.7720194 (534.27 it/sec) -training >> step=5997800, episode=1000 reward=0.7688001 (509.81 it/sec) -training >> step=5997900, episode=1000 reward=0.7808911 (476.22 it/sec) -training >> step=5998000, episode=1000 reward=0.7887748 (516.12 it/sec) -training >> step=5998100, episode=1000 reward=0.769443 (452.40 it/sec) -training >> step=5998200, episode=1000 reward=0.7738181 (504.37 it/sec) -training >> step=5998300, episode=1000 reward=0.790265 (520.05 it/sec) -training >> step=5998400, episode=1000 reward=0.7692198 (484.82 it/sec) -training >> step=5998500, episode=1000 reward=0.789045 (500.45 it/sec) -training >> step=5998600, episode=1000 reward=0.7620254 (497.14 it/sec) -training >> step=5998700, episode=1000 reward=0.7616056 (520.07 it/sec) -training >> step=5998800, episode=1000 reward=0.747516 (523.53 it/sec) -training >> step=5998900, episode=1000 reward=0.7788789 (504.61 it/sec) -training >> step=5999000, episode=1000 reward=0.7404233 (505.17 it/sec) -training >> step=5999100, episode=1000 reward=0.7447588 (503.69 it/sec) -training >> step=5999200, episode=1000 reward=0.7487317 (487.32 it/sec) -training >> step=5999300, episode=1001 reward=0.755856 (48.85 it/sec) -training >> step=5999400, episode=1001 reward=0.7773897 (442.08 it/sec) -training >> step=5999500, episode=1001 reward=0.7785246 (483.53 it/sec) -training >> step=5999600, episode=1001 reward=0.7994282 (488.80 it/sec) -training >> step=5999700, episode=1001 reward=0.7867598 (509.70 it/sec) -training >> step=5999800, episode=1001 reward=0.7819982 (516.85 it/sec) -training >> step=5999900, episode=1001 reward=0.7612954 (496.05 it/sec) -training >> step=6000000, episode=1001 reward=0.7899438 (470.50 it/sec) -training >> step=6000100, episode=1001 reward=0.7824909 (446.04 it/sec) -training >> step=6000200, episode=1001 reward=0.7815387 (441.59 it/sec) -training >> step=6000300, episode=1001 reward=0.7777103 (520.34 it/sec) -training >> step=6000400, episode=1001 reward=0.8015403 (435.06 it/sec) -training >> step=6000500, episode=1001 reward=0.7705664 (485.44 it/sec) -training >> step=6000600, episode=1001 reward=0.7785674 (486.78 it/sec) -training >> step=6000700, episode=1001 reward=0.7976452 (479.81 it/sec) -training >> step=6000800, episode=1001 reward=0.7786004 (324.25 it/sec) -training >> step=6000900, episode=1001 reward=0.7768043 (493.12 it/sec) -training >> step=6001000, episode=1001 reward=0.7842099 (500.86 it/sec) -training >> step=6001100, episode=1001 reward=0.7818812 (485.48 it/sec) -training >> step=6001200, episode=1001 reward=0.7987965 (470.75 it/sec) -training >> step=6001300, episode=1001 reward=0.7789282 (505.02 it/sec) -training >> step=6001400, episode=1001 reward=0.7835062 (486.09 it/sec) -training >> step=6001500, episode=1001 reward=0.7883113 (515.16 it/sec) -training >> step=6001600, episode=1001 reward=0.7814922 (535.82 it/sec) -training >> step=6001700, episode=1001 reward=0.8036402 (474.89 it/sec) -training >> step=6001800, episode=1001 reward=0.8056295 (480.80 it/sec) -training >> step=6001900, episode=1001 reward=0.7899147 (501.62 it/sec) -training >> step=6002000, episode=1001 reward=0.7860127 (464.08 it/sec) -training >> step=6002100, episode=1001 reward=0.786006 (435.08 it/sec) -training >> step=6002200, episode=1001 reward=0.7940777 (438.68 it/sec) -training >> step=6002300, episode=1001 reward=0.786997 (456.63 it/sec) -training >> step=6002400, episode=1001 reward=0.7899387 (466.97 it/sec) -training >> step=6002500, episode=1001 reward=0.7825581 (476.32 it/sec) -training >> step=6002600, episode=1001 reward=0.7871011 (484.42 it/sec) -training >> step=6002700, episode=1001 reward=0.7959766 (518.20 it/sec) -training >> step=6002800, episode=1001 reward=0.7755652 (458.72 it/sec) -training >> step=6002900, episode=1001 reward=0.7744178 (510.59 it/sec) -training >> step=6003000, episode=1001 reward=0.7828543 (477.60 it/sec) -training >> step=6003100, episode=1001 reward=0.7898217 (520.90 it/sec) -training >> step=6003200, episode=1001 reward=0.7956626 (485.25 it/sec) -training >> step=6003300, episode=1001 reward=0.7954021 (507.31 it/sec) -training >> step=6003400, episode=1001 reward=0.7744992 (507.74 it/sec) -training >> step=6003500, episode=1001 reward=0.768943 (519.01 it/sec) -training >> step=6003600, episode=1001 reward=0.7824167 (497.35 it/sec) -training >> step=6003700, episode=1001 reward=0.7959409 (509.41 it/sec) -training >> step=6003800, episode=1001 reward=0.7701303 (528.41 it/sec) -training >> step=6003900, episode=1001 reward=0.7735047 (482.55 it/sec) -training >> step=6004000, episode=1001 reward=0.7898558 (485.30 it/sec) -training >> step=6004100, episode=1001 reward=0.7810315 (486.76 it/sec) -training >> step=6004200, episode=1001 reward=0.7720016 (487.71 it/sec) -training >> step=6004300, episode=1001 reward=0.7713061 (483.75 it/sec) -training >> step=6004400, episode=1001 reward=0.777647 (477.60 it/sec) -training >> step=6004500, episode=1001 reward=0.7819391 (485.74 it/sec) -training >> step=6004600, episode=1001 reward=0.7776802 (447.44 it/sec) -training >> step=6004700, episode=1001 reward=0.7686803 (474.87 it/sec) -training >> step=6004800, episode=1001 reward=0.7621488 (451.99 it/sec) -training >> step=6004900, episode=1001 reward=0.7880478 (467.58 it/sec) -training >> step=6005000, episode=1001 reward=0.7686117 (463.73 it/sec) -training >> step=6005100, episode=1001 reward=0.7700311 (474.01 it/sec) -training >> step=6005200, episode=1001 reward=0.745178 (436.71 it/sec) -training >> step=6005300, episode=1002 reward=0.808187 (93.29 it/sec) -training >> step=6005400, episode=1002 reward=0.783071 (501.13 it/sec) -training >> step=6005500, episode=1002 reward=0.7853069 (427.76 it/sec) -training >> step=6005600, episode=1002 reward=0.7919069 (501.14 it/sec) -training >> step=6005700, episode=1002 reward=0.7872249 (477.01 it/sec) -training >> step=6005800, episode=1002 reward=0.783241 (516.79 it/sec) -training >> step=6005900, episode=1002 reward=0.7892459 (531.68 it/sec) -training >> step=6006000, episode=1002 reward=0.779614 (540.57 it/sec) -training >> step=6006100, episode=1002 reward=0.814495 (547.19 it/sec) -training >> step=6006200, episode=1002 reward=0.7675356 (519.95 it/sec) -training >> step=6006300, episode=1002 reward=0.7920236 (402.17 it/sec) -training >> step=6006400, episode=1002 reward=0.7715966 (445.25 it/sec) -training >> step=6006500, episode=1002 reward=0.770058 (378.70 it/sec) -training >> step=6006600, episode=1002 reward=0.8082761 (452.91 it/sec) -training >> step=6006700, episode=1002 reward=0.7775638 (450.25 it/sec) -training >> step=6006800, episode=1002 reward=0.7751764 (345.34 it/sec) -training >> step=6006900, episode=1002 reward=0.7781473 (498.44 it/sec) -training >> step=6007000, episode=1002 reward=0.7912369 (488.15 it/sec) -training >> step=6007100, episode=1002 reward=0.8038598 (464.90 it/sec) -training >> step=6007200, episode=1002 reward=0.7637285 (519.53 it/sec) -training >> step=6007300, episode=1002 reward=0.7739297 (558.17 it/sec) -training >> step=6007400, episode=1002 reward=0.8051857 (430.44 it/sec) -training >> step=6007500, episode=1002 reward=0.8061351 (455.02 it/sec) -training >> step=6007600, episode=1002 reward=0.7922234 (568.00 it/sec) -training >> step=6007700, episode=1002 reward=0.7818149 (527.69 it/sec) -training >> step=6007800, episode=1002 reward=0.7873589 (482.41 it/sec) -training >> step=6007900, episode=1002 reward=0.7865086 (542.73 it/sec) -training >> step=6008000, episode=1002 reward=0.7904742 (555.74 it/sec) -training >> step=6008100, episode=1002 reward=0.7812043 (516.70 it/sec) -training >> step=6008200, episode=1002 reward=0.7803484 (529.57 it/sec) -training >> step=6008300, episode=1002 reward=0.7929019 (525.44 it/sec) -training >> step=6008400, episode=1002 reward=0.7933692 (495.36 it/sec) -training >> step=6008500, episode=1002 reward=0.7958741 (512.48 it/sec) -training >> step=6008600, episode=1002 reward=0.7926217 (509.42 it/sec) -training >> step=6008700, episode=1002 reward=0.7758937 (532.96 it/sec) -training >> step=6008800, episode=1002 reward=0.7817036 (525.88 it/sec) -training >> step=6008900, episode=1002 reward=0.7792097 (553.18 it/sec) -training >> step=6009000, episode=1002 reward=0.7872686 (563.54 it/sec) -training >> step=6009100, episode=1002 reward=0.7796612 (565.41 it/sec) -training >> step=6009200, episode=1002 reward=0.8125463 (549.49 it/sec) -training >> step=6009300, episode=1002 reward=0.7974844 (544.32 it/sec) -training >> step=6009400, episode=1002 reward=0.7904757 (502.82 it/sec) -training >> step=6009500, episode=1002 reward=0.7765335 (554.41 it/sec) -training >> step=6009600, episode=1002 reward=0.7838669 (541.61 it/sec) -training >> step=6009700, episode=1002 reward=0.790522 (522.23 it/sec) -training >> step=6009800, episode=1002 reward=0.7744877 (577.27 it/sec) -training >> step=6009900, episode=1002 reward=0.7869554 (528.07 it/sec) -training >> step=6010000, episode=1002 reward=0.7783157 (551.68 it/sec) -training >> step=6010100, episode=1002 reward=0.7675409 (545.90 it/sec) -training >> step=6010200, episode=1002 reward=0.7985378 (552.56 it/sec) -training >> step=6010300, episode=1002 reward=0.7676885 (507.39 it/sec) -training >> step=6010400, episode=1002 reward=0.7770951 (537.07 it/sec) -training >> step=6010500, episode=1002 reward=0.736787 (472.26 it/sec) -training >> step=6010600, episode=1002 reward=0.7747166 (536.36 it/sec) -training >> step=6010700, episode=1002 reward=0.7573611 (558.99 it/sec) -training >> step=6010800, episode=1002 reward=0.7434868 (518.33 it/sec) -training >> step=6010900, episode=1002 reward=0.7759181 (553.56 it/sec) -training >> step=6011000, episode=1002 reward=0.7723361 (470.31 it/sec) -training >> step=6011100, episode=1002 reward=0.7492986 (521.81 it/sec) -training >> step=6011200, episode=1002 reward=0.7633244 (546.58 it/sec) -training >> step=6011300, episode=1003 reward=0.7759901 (106.82 it/sec) -training >> step=6011400, episode=1003 reward=0.7970164 (398.07 it/sec) -training >> step=6011500, episode=1003 reward=0.7840519 (400.89 it/sec) -training >> step=6011600, episode=1003 reward=0.7886489 (418.53 it/sec) -training >> step=6011700, episode=1003 reward=0.7752714 (378.42 it/sec) -training >> step=6011800, episode=1003 reward=0.7579051 (482.40 it/sec) -training >> step=6011900, episode=1003 reward=0.768236 (469.43 it/sec) -training >> step=6012000, episode=1003 reward=0.783411 (573.60 it/sec) -training >> step=6012100, episode=1003 reward=0.7776691 (484.54 it/sec) -training >> step=6012200, episode=1003 reward=0.7724943 (488.00 it/sec) -training >> step=6012300, episode=1003 reward=0.7642476 (487.60 it/sec) -training >> step=6012400, episode=1003 reward=0.7963383 (463.23 it/sec) -training >> step=6012500, episode=1003 reward=0.7770115 (474.88 it/sec) -training >> step=6012600, episode=1003 reward=0.7988071 (452.83 it/sec) -training >> step=6012700, episode=1003 reward=0.7866188 (471.07 it/sec) -training >> step=6012800, episode=1003 reward=0.8069674 (444.91 it/sec) -training >> step=6012900, episode=1003 reward=0.7746988 (399.09 it/sec) -training >> step=6013000, episode=1003 reward=0.7970685 (529.25 it/sec) -training >> step=6013100, episode=1003 reward=0.7584257 (512.03 it/sec) -training >> step=6013200, episode=1003 reward=0.7619009 (443.40 it/sec) -training >> step=6013300, episode=1003 reward=0.7932192 (466.61 it/sec) -training >> step=6013400, episode=1003 reward=0.78396 (501.41 it/sec) -training >> step=6013500, episode=1003 reward=0.7945126 (528.54 it/sec) -training >> step=6013600, episode=1003 reward=0.7935652 (474.85 it/sec) -training >> step=6013700, episode=1003 reward=0.7868952 (448.90 it/sec) -training >> step=6013800, episode=1003 reward=0.7899504 (475.20 it/sec) -training >> step=6013900, episode=1003 reward=0.7830949 (455.16 it/sec) -training >> step=6014000, episode=1003 reward=0.7799616 (525.25 it/sec) -training >> step=6014100, episode=1003 reward=0.7886517 (554.33 it/sec) -training >> step=6014200, episode=1003 reward=0.7814227 (475.66 it/sec) -training >> step=6014300, episode=1003 reward=0.7763104 (459.17 it/sec) -training >> step=6014400, episode=1003 reward=0.7773384 (482.33 it/sec) -training >> step=6014500, episode=1003 reward=0.7875934 (573.83 it/sec) -training >> step=6014600, episode=1003 reward=0.7762399 (463.97 it/sec) -training >> step=6014700, episode=1003 reward=0.8040959 (522.35 it/sec) -training >> step=6014800, episode=1003 reward=0.7851797 (579.85 it/sec) -training >> step=6014900, episode=1003 reward=0.7955592 (487.43 it/sec) -training >> step=6015000, episode=1003 reward=0.7839255 (538.77 it/sec) -training >> step=6015100, episode=1003 reward=0.786522 (547.63 it/sec) -training >> step=6015200, episode=1003 reward=0.79374 (525.60 it/sec) -training >> step=6015300, episode=1003 reward=0.761002 (558.76 it/sec) -training >> step=6015400, episode=1003 reward=0.7884984 (522.87 it/sec) -training >> step=6015500, episode=1003 reward=0.7862393 (577.06 it/sec) -training >> step=6015600, episode=1003 reward=0.7778878 (518.23 it/sec) -training >> step=6015700, episode=1003 reward=0.7949967 (483.28 it/sec) -training >> step=6015800, episode=1003 reward=0.7626346 (531.39 it/sec) -training >> step=6015900, episode=1003 reward=0.7677796 (539.83 it/sec) -training >> step=6016000, episode=1003 reward=0.7854809 (482.29 it/sec) -training >> step=6016100, episode=1003 reward=0.7982944 (384.83 it/sec) -training >> step=6016200, episode=1003 reward=0.7692767 (390.69 it/sec) -training >> step=6016300, episode=1003 reward=0.7550201 (396.68 it/sec) -training >> step=6016400, episode=1003 reward=0.7661356 (390.88 it/sec) -training >> step=6016500, episode=1003 reward=0.7734662 (392.27 it/sec) -training >> step=6016600, episode=1003 reward=0.7946129 (418.99 it/sec) -training >> step=6016700, episode=1003 reward=0.7558967 (400.65 it/sec) -training >> step=6016800, episode=1003 reward=0.7622137 (416.80 it/sec) -training >> step=6016900, episode=1003 reward=0.7782636 (490.42 it/sec) -training >> step=6017000, episode=1003 reward=0.7705905 (486.55 it/sec) -training >> step=6017100, episode=1003 reward=0.7659537 (466.92 it/sec) -training >> step=6017200, episode=1003 reward=0.790346 (544.94 it/sec) -training >> step=6017300, episode=1004 reward=0.7687271 (118.18 it/sec) -training >> step=6017400, episode=1004 reward=0.7763262 (423.02 it/sec) -training >> step=6017500, episode=1004 reward=0.7943595 (497.31 it/sec) -training >> step=6017600, episode=1004 reward=0.7871984 (532.76 it/sec) -training >> step=6017700, episode=1004 reward=0.7877421 (504.10 it/sec) -training >> step=6017800, episode=1004 reward=0.774625 (513.53 it/sec) -training >> step=6017900, episode=1004 reward=0.7761527 (532.79 it/sec) -training >> step=6018000, episode=1004 reward=0.7820561 (544.76 it/sec) -training >> step=6018100, episode=1004 reward=0.7807754 (508.49 it/sec) -training >> step=6018200, episode=1004 reward=0.7734255 (504.05 it/sec) -training >> step=6018300, episode=1004 reward=0.7960691 (543.03 it/sec) -training >> step=6018400, episode=1004 reward=0.7998572 (522.15 it/sec) -training >> step=6018500, episode=1004 reward=0.7749674 (441.25 it/sec) -training >> step=6018600, episode=1004 reward=0.7690879 (514.24 it/sec) -training >> step=6018700, episode=1004 reward=0.7824307 (542.22 it/sec) -training >> step=6018800, episode=1004 reward=0.791647 (538.65 it/sec) -training >> step=6018900, episode=1004 reward=0.7883754 (455.98 it/sec) -training >> step=6019000, episode=1004 reward=0.7860236 (499.36 it/sec) -training >> step=6019100, episode=1004 reward=0.7784036 (518.65 it/sec) -training >> step=6019200, episode=1004 reward=0.7788553 (318.67 it/sec) -training >> step=6019300, episode=1004 reward=0.7820665 (429.24 it/sec) -training >> step=6019400, episode=1004 reward=0.7982399 (445.01 it/sec) -training >> step=6019500, episode=1004 reward=0.7941306 (475.66 it/sec) -training >> step=6019600, episode=1004 reward=0.8122992 (533.02 it/sec) -training >> step=6019700, episode=1004 reward=0.7646896 (555.75 it/sec) -training >> step=6019800, episode=1004 reward=0.7925691 (548.77 it/sec) -training >> step=6019900, episode=1004 reward=0.80049 (482.11 it/sec) -training >> step=6020000, episode=1004 reward=0.7769781 (463.21 it/sec) -training >> step=6020100, episode=1004 reward=0.7982755 (542.31 it/sec) -training >> step=6020200, episode=1004 reward=0.8082083 (542.28 it/sec) -training >> step=6020300, episode=1004 reward=0.7718701 (539.64 it/sec) -training >> step=6020400, episode=1004 reward=0.8093073 (468.68 it/sec) -training >> step=6020500, episode=1004 reward=0.7803454 (566.90 it/sec) -training >> step=6020600, episode=1004 reward=0.7806706 (440.26 it/sec) -training >> step=6020700, episode=1004 reward=0.7902125 (499.17 it/sec) -training >> step=6020800, episode=1004 reward=0.7790113 (552.66 it/sec) -training >> step=6020900, episode=1004 reward=0.771568 (569.60 it/sec) -training >> step=6021000, episode=1004 reward=0.7835132 (486.58 it/sec) -training >> step=6021100, episode=1004 reward=0.8123829 (488.53 it/sec) -training >> step=6021200, episode=1004 reward=0.7873449 (538.74 it/sec) -training >> step=6021300, episode=1004 reward=0.7934293 (489.69 it/sec) -training >> step=6021400, episode=1004 reward=0.7791087 (521.27 it/sec) -training >> step=6021500, episode=1004 reward=0.7931417 (547.13 it/sec) -training >> step=6021600, episode=1004 reward=0.7931755 (523.44 it/sec) -training >> step=6021700, episode=1004 reward=0.7693634 (502.68 it/sec) -training >> step=6021800, episode=1004 reward=0.7942767 (507.29 it/sec) -training >> step=6021900, episode=1004 reward=0.7918669 (500.62 it/sec) -training >> step=6022000, episode=1004 reward=0.7767993 (566.57 it/sec) -training >> step=6022100, episode=1004 reward=0.7736678 (534.94 it/sec) -training >> step=6022200, episode=1004 reward=0.7847611 (460.71 it/sec) -training >> step=6022300, episode=1004 reward=0.7725799 (552.45 it/sec) -training >> step=6022400, episode=1004 reward=0.7845581 (476.35 it/sec) -training >> step=6022500, episode=1004 reward=0.7420201 (505.60 it/sec) -training >> step=6022600, episode=1004 reward=0.7867998 (503.20 it/sec) -training >> step=6022700, episode=1004 reward=0.7690253 (468.88 it/sec) -training >> step=6022800, episode=1004 reward=0.7526899 (566.15 it/sec) -training >> step=6022900, episode=1004 reward=0.7712158 (533.18 it/sec) -training >> step=6023000, episode=1004 reward=0.7575638 (547.06 it/sec) -training >> step=6023100, episode=1004 reward=0.7958524 (535.21 it/sec) -training >> step=6023200, episode=1004 reward=0.763257 (526.50 it/sec) -training >> step=6023300, episode=1005 reward=0.7799413 (135.85 it/sec) -training >> step=6023400, episode=1005 reward=0.7946675 (503.36 it/sec) -training >> step=6023500, episode=1005 reward=0.7811443 (508.89 it/sec) -training >> step=6023600, episode=1005 reward=0.7760569 (451.40 it/sec) -training >> step=6023700, episode=1005 reward=0.7553074 (498.43 it/sec) -training >> step=6023800, episode=1005 reward=0.7814021 (516.03 it/sec) -training >> step=6023900, episode=1005 reward=0.7720605 (441.81 it/sec) -training >> step=6024000, episode=1005 reward=0.7895062 (478.18 it/sec) -training >> step=6024100, episode=1005 reward=0.7796586 (554.28 it/sec) -training >> step=6024200, episode=1005 reward=0.7817571 (478.54 it/sec) -training >> step=6024300, episode=1005 reward=0.7989481 (461.00 it/sec) -training >> step=6024400, episode=1005 reward=0.7956873 (420.33 it/sec) -training >> step=6024500, episode=1005 reward=0.7826338 (483.04 it/sec) -training >> step=6024600, episode=1005 reward=0.7948246 (449.99 it/sec) -training >> step=6024700, episode=1005 reward=0.7589008 (413.26 it/sec) -training >> step=6024800, episode=1005 reward=0.7880397 (458.28 it/sec) -training >> step=6024900, episode=1005 reward=0.7842957 (406.11 it/sec) -training >> step=6025000, episode=1005 reward=0.787257 (467.45 it/sec) -training >> step=6025100, episode=1005 reward=0.7920883 (457.85 it/sec) -training >> step=6025200, episode=1005 reward=0.7760531 (439.31 it/sec) -training >> step=6025300, episode=1005 reward=0.783701 (426.35 it/sec) -training >> step=6025400, episode=1005 reward=0.7759557 (352.87 it/sec) -training >> step=6025500, episode=1005 reward=0.7608156 (503.30 it/sec) -training >> step=6025600, episode=1005 reward=0.7861027 (495.14 it/sec) -training >> step=6025700, episode=1005 reward=0.7880862 (498.09 it/sec) -training >> step=6025800, episode=1005 reward=0.7833639 (439.80 it/sec) -training >> step=6025900, episode=1005 reward=0.7888533 (488.16 it/sec) -training >> step=6026000, episode=1005 reward=0.7821711 (477.79 it/sec) -training >> step=6026100, episode=1005 reward=0.7941261 (487.22 it/sec) -training >> step=6026200, episode=1005 reward=0.7838405 (509.29 it/sec) -training >> step=6026300, episode=1005 reward=0.7829481 (484.29 it/sec) -training >> step=6026400, episode=1005 reward=0.7783268 (456.25 it/sec) -training >> step=6026500, episode=1005 reward=0.7878826 (488.58 it/sec) -training >> step=6026600, episode=1005 reward=0.7739415 (450.14 it/sec) -training >> step=6026700, episode=1005 reward=0.8018534 (476.93 it/sec) -training >> step=6026800, episode=1005 reward=0.7872403 (381.54 it/sec) -training >> step=6026900, episode=1005 reward=0.7915649 (444.26 it/sec) -training >> step=6027000, episode=1005 reward=0.7794008 (453.99 it/sec) -training >> step=6027100, episode=1005 reward=0.7714025 (442.84 it/sec) -training >> step=6027200, episode=1005 reward=0.7836564 (464.24 it/sec) -training >> step=6027300, episode=1005 reward=0.7963908 (504.73 it/sec) -training >> step=6027400, episode=1005 reward=0.8002142 (516.55 it/sec) -training >> step=6027500, episode=1005 reward=0.7674721 (497.26 it/sec) -training >> step=6027600, episode=1005 reward=0.7855415 (492.71 it/sec) -training >> step=6027700, episode=1005 reward=0.7934724 (506.67 it/sec) -training >> step=6027800, episode=1005 reward=0.7771682 (471.03 it/sec) -training >> step=6027900, episode=1005 reward=0.7636371 (503.84 it/sec) -training >> step=6028000, episode=1005 reward=0.7645174 (460.93 it/sec) -training >> step=6028100, episode=1005 reward=0.7842487 (497.38 it/sec) -training >> step=6028200, episode=1005 reward=0.7831498 (458.26 it/sec) -training >> step=6028300, episode=1005 reward=0.792516 (473.38 it/sec) -training >> step=6028400, episode=1005 reward=0.7722515 (516.23 it/sec) -training >> step=6028500, episode=1005 reward=0.7682335 (485.05 it/sec) -training >> step=6028600, episode=1005 reward=0.7801455 (476.32 it/sec) -training >> step=6028700, episode=1005 reward=0.7711974 (473.89 it/sec) -training >> step=6028800, episode=1005 reward=0.7636477 (513.83 it/sec) -training >> step=6028900, episode=1005 reward=0.7653359 (508.15 it/sec) -training >> step=6029000, episode=1005 reward=0.7828713 (446.28 it/sec) -training >> step=6029100, episode=1005 reward=0.7767026 (502.22 it/sec) -training >> step=6029200, episode=1005 reward=0.7693668 (497.75 it/sec) -training >> step=6029300, episode=1006 reward=0.7869785 (124.40 it/sec) -training >> step=6029400, episode=1006 reward=0.783874 (471.41 it/sec) -training >> step=6029500, episode=1006 reward=0.7800189 (475.75 it/sec) -training >> step=6029600, episode=1006 reward=0.7983862 (507.60 it/sec) -training >> step=6029700, episode=1006 reward=0.785149 (419.97 it/sec) -training >> step=6029800, episode=1006 reward=0.7731242 (394.08 it/sec) -training >> step=6029900, episode=1006 reward=0.7800481 (371.26 it/sec) -training >> step=6030000, episode=1006 reward=0.7598365 (371.69 it/sec) -training >> step=6030100, episode=1006 reward=0.7889864 (455.28 it/sec) -training >> step=6030200, episode=1006 reward=0.7550994 (495.63 it/sec) -training >> step=6030300, episode=1006 reward=0.7714325 (481.28 it/sec) -training >> step=6030400, episode=1006 reward=0.7989614 (479.70 it/sec) -training >> step=6030500, episode=1006 reward=0.7830549 (487.90 it/sec) -training >> step=6030600, episode=1006 reward=0.7749194 (473.06 it/sec) -training >> step=6030700, episode=1006 reward=0.7798519 (477.38 it/sec) -training >> step=6030800, episode=1006 reward=0.7863208 (453.91 it/sec) -training >> step=6030900, episode=1006 reward=0.7954525 (506.47 it/sec) -training >> step=6031000, episode=1006 reward=0.7677246 (539.18 it/sec) -training >> step=6031100, episode=1006 reward=0.77662 (457.67 it/sec) -training >> step=6031200, episode=1006 reward=0.7933799 (479.80 it/sec) -training >> step=6031300, episode=1006 reward=0.8027967 (522.24 it/sec) -training >> step=6031400, episode=1006 reward=0.7764104 (454.08 it/sec) -training >> step=6031500, episode=1006 reward=0.7716197 (488.16 it/sec) -training >> step=6031600, episode=1006 reward=0.7934462 (344.99 it/sec) -training >> step=6031700, episode=1006 reward=0.7813227 (504.22 it/sec) -training >> step=6031800, episode=1006 reward=0.7850662 (491.12 it/sec) -training >> step=6031900, episode=1006 reward=0.788981 (521.37 it/sec) -training >> step=6032000, episode=1006 reward=0.8016469 (505.82 it/sec) -training >> step=6032100, episode=1006 reward=0.7833835 (516.60 it/sec) -training >> step=6032200, episode=1006 reward=0.7949873 (476.36 it/sec) -training >> step=6032300, episode=1006 reward=0.7905023 (450.00 it/sec) -training >> step=6032400, episode=1006 reward=0.7792103 (494.61 it/sec) -training >> step=6032500, episode=1006 reward=0.7969323 (499.49 it/sec) -training >> step=6032600, episode=1006 reward=0.7821298 (468.96 it/sec) -training >> step=6032700, episode=1006 reward=0.7996945 (479.90 it/sec) -training >> step=6032800, episode=1006 reward=0.7775654 (507.35 it/sec) -training >> step=6032900, episode=1006 reward=0.7893571 (495.35 it/sec) -training >> step=6033000, episode=1006 reward=0.7834335 (475.87 it/sec) -training >> step=6033100, episode=1006 reward=0.8014385 (514.75 it/sec) -training >> step=6033200, episode=1006 reward=0.7776825 (479.81 it/sec) -training >> step=6033300, episode=1006 reward=0.8068647 (461.09 it/sec) -training >> step=6033400, episode=1006 reward=0.7835618 (492.56 it/sec) -training >> step=6033500, episode=1006 reward=0.7880961 (474.68 it/sec) -training >> step=6033600, episode=1006 reward=0.764844 (457.92 it/sec) -training >> step=6033700, episode=1006 reward=0.7747979 (476.56 it/sec) -training >> step=6033800, episode=1006 reward=0.7833803 (477.62 it/sec) -training >> step=6033900, episode=1006 reward=0.7722416 (524.78 it/sec) -training >> step=6034000, episode=1006 reward=0.785657 (480.99 it/sec) -training >> step=6034100, episode=1006 reward=0.7722306 (480.53 it/sec) -training >> step=6034200, episode=1006 reward=0.7573464 (438.12 it/sec) -training >> step=6034300, episode=1006 reward=0.7772849 (475.59 it/sec) -training >> step=6034400, episode=1006 reward=0.7782854 (496.88 it/sec) -training >> step=6034500, episode=1006 reward=0.7861199 (424.46 it/sec) -training >> step=6034600, episode=1006 reward=0.759976 (501.45 it/sec) -training >> step=6034700, episode=1006 reward=0.7764496 (470.22 it/sec) -training >> step=6034800, episode=1006 reward=0.7756957 (462.84 it/sec) -training >> step=6034900, episode=1006 reward=0.779887 (484.50 it/sec) -training >> step=6035000, episode=1006 reward=0.7633064 (489.96 it/sec) -training >> step=6035100, episode=1006 reward=0.7772127 (505.83 it/sec) -training >> step=6035200, episode=1006 reward=0.7549809 (474.71 it/sec) -training >> step=6035300, episode=1007 reward=0.7899563 (163.69 it/sec) -training >> step=6035400, episode=1007 reward=0.7794149 (483.59 it/sec) -training >> step=6035500, episode=1007 reward=0.7901534 (472.79 it/sec) -training >> step=6035600, episode=1007 reward=0.7762422 (486.29 it/sec) -training >> step=6035700, episode=1007 reward=0.7664775 (526.90 it/sec) -training >> step=6035800, episode=1007 reward=0.7986219 (493.79 it/sec) -training >> step=6035900, episode=1007 reward=0.7752768 (473.24 it/sec) -training >> step=6036000, episode=1007 reward=0.7878962 (502.90 it/sec) -training >> step=6036100, episode=1007 reward=0.7917097 (457.59 it/sec) -training >> step=6036200, episode=1007 reward=0.7815154 (502.93 it/sec) -training >> step=6036300, episode=1007 reward=0.8139952 (477.63 it/sec) -training >> step=6036400, episode=1007 reward=0.7705236 (462.85 it/sec) -training >> step=6036500, episode=1007 reward=0.777815 (440.35 it/sec) -training >> step=6036600, episode=1007 reward=0.7559211 (471.69 it/sec) -training >> step=6036700, episode=1007 reward=0.7900125 (464.24 it/sec) -training >> step=6036800, episode=1007 reward=0.7925733 (446.99 it/sec) -training >> step=6036900, episode=1007 reward=0.7777135 (410.43 it/sec) -training >> step=6037000, episode=1007 reward=0.770084 (442.94 it/sec) -training >> step=6037100, episode=1007 reward=0.7937492 (482.85 it/sec) -training >> step=6037200, episode=1007 reward=0.8001066 (460.42 it/sec) -training >> step=6037300, episode=1007 reward=0.7892231 (467.41 it/sec) -training >> step=6037400, episode=1007 reward=0.7783858 (458.11 it/sec) -training >> step=6037500, episode=1007 reward=0.7903394 (498.93 it/sec) -training >> step=6037600, episode=1007 reward=0.7840444 (443.49 it/sec) -training >> step=6037700, episode=1007 reward=0.7814136 (461.78 it/sec) -training >> step=6037800, episode=1007 reward=0.7812434 (299.52 it/sec) -training >> step=6037900, episode=1007 reward=0.7636532 (482.25 it/sec) -training >> step=6038000, episode=1007 reward=0.7901489 (456.27 it/sec) -training >> step=6038100, episode=1007 reward=0.7972184 (461.84 it/sec) -training >> step=6038200, episode=1007 reward=0.7703503 (511.68 it/sec) -training >> step=6038300, episode=1007 reward=0.7899466 (507.68 it/sec) -training >> step=6038400, episode=1007 reward=0.7797753 (483.22 it/sec) -training >> step=6038500, episode=1007 reward=0.7822835 (469.05 it/sec) -training >> step=6038600, episode=1007 reward=0.7829365 (475.01 it/sec) -training >> step=6038700, episode=1007 reward=0.7935009 (464.94 it/sec) -training >> step=6038800, episode=1007 reward=0.7725725 (504.10 it/sec) -training >> step=6038900, episode=1007 reward=0.7656447 (482.66 it/sec) -training >> step=6039000, episode=1007 reward=0.7823672 (459.30 it/sec) -training >> step=6039100, episode=1007 reward=0.7753867 (482.92 it/sec) -training >> step=6039200, episode=1007 reward=0.7878654 (507.51 it/sec) -training >> step=6039300, episode=1007 reward=0.7882163 (517.00 it/sec) -training >> step=6039400, episode=1007 reward=0.7781872 (508.32 it/sec) -training >> step=6039500, episode=1007 reward=0.7830544 (510.83 it/sec) -training >> step=6039600, episode=1007 reward=0.7733575 (537.48 it/sec) -training >> step=6039700, episode=1007 reward=0.7940677 (529.49 it/sec) -training >> step=6039800, episode=1007 reward=0.7668578 (533.32 it/sec) -training >> step=6039900, episode=1007 reward=0.7714103 (519.31 it/sec) -training >> step=6040000, episode=1007 reward=0.7945457 (535.53 it/sec) -training >> step=6040100, episode=1007 reward=0.7708808 (528.28 it/sec) -training >> step=6040200, episode=1007 reward=0.7768648 (453.46 it/sec) -training >> step=6040300, episode=1007 reward=0.7730554 (520.60 it/sec) -training >> step=6040400, episode=1007 reward=0.7783105 (547.41 it/sec) -training >> step=6040500, episode=1007 reward=0.7724576 (510.04 it/sec) -training >> step=6040600, episode=1007 reward=0.7750127 (527.08 it/sec) -training >> step=6040700, episode=1007 reward=0.7882944 (526.42 it/sec) -training >> step=6040800, episode=1007 reward=0.7679366 (449.69 it/sec) -training >> step=6040900, episode=1007 reward=0.7723994 (495.78 it/sec) -training >> step=6041000, episode=1007 reward=0.77321 (489.52 it/sec) -training >> step=6041100, episode=1007 reward=0.7673599 (537.79 it/sec) -training >> step=6041200, episode=1007 reward=0.7843925 (527.13 it/sec) -training >> step=6041300, episode=1008 reward=0.7793298 (121.52 it/sec) -training >> step=6041400, episode=1008 reward=0.7987052 (549.84 it/sec) -training >> step=6041500, episode=1008 reward=0.7973272 (470.47 it/sec) -training >> step=6041600, episode=1008 reward=0.7860544 (407.74 it/sec) -training >> step=6041700, episode=1008 reward=0.7870447 (486.99 it/sec) -training >> step=6041800, episode=1008 reward=0.801353 (528.50 it/sec) -training >> step=6041900, episode=1008 reward=0.7932251 (427.30 it/sec) -training >> step=6042000, episode=1008 reward=0.7683699 (495.48 it/sec) -training >> step=6042100, episode=1008 reward=0.8110349 (517.38 it/sec) -training >> step=6042200, episode=1008 reward=0.7861723 (472.80 it/sec) -training >> step=6042300, episode=1008 reward=0.7707217 (482.68 it/sec) -training >> step=6042400, episode=1008 reward=0.8009493 (469.22 it/sec) -training >> step=6042500, episode=1008 reward=0.7801196 (522.67 it/sec) -training >> step=6042600, episode=1008 reward=0.7727718 (451.59 it/sec) -training >> step=6042700, episode=1008 reward=0.777254 (498.41 it/sec) -training >> step=6042800, episode=1008 reward=0.786348 (509.20 it/sec) -training >> step=6042900, episode=1008 reward=0.7777995 (486.02 it/sec) -training >> step=6043000, episode=1008 reward=0.7888221 (496.86 it/sec) -training >> step=6043100, episode=1008 reward=0.7869282 (464.44 it/sec) -training >> step=6043200, episode=1008 reward=0.7681702 (464.59 it/sec) -training >> step=6043300, episode=1008 reward=0.7597759 (485.20 it/sec) -training >> step=6043400, episode=1008 reward=0.7686067 (479.67 it/sec) -training >> step=6043500, episode=1008 reward=0.7775249 (445.54 it/sec) -training >> step=6043600, episode=1008 reward=0.7777099 (469.63 it/sec) -training >> step=6043700, episode=1008 reward=0.7715884 (470.86 it/sec) -training >> step=6043800, episode=1008 reward=0.7770851 (354.49 it/sec) -training >> step=6043900, episode=1008 reward=0.7741088 (470.78 it/sec) -training >> step=6044000, episode=1008 reward=0.777833 (504.09 it/sec) -training >> step=6044100, episode=1008 reward=0.7929615 (483.83 it/sec) -training >> step=6044200, episode=1008 reward=0.7682058 (482.70 it/sec) -training >> step=6044300, episode=1008 reward=0.7822087 (500.29 it/sec) -training >> step=6044400, episode=1008 reward=0.7911363 (438.69 it/sec) -training >> step=6044500, episode=1008 reward=0.791059 (505.22 it/sec) -training >> step=6044600, episode=1008 reward=0.7771876 (484.89 it/sec) -training >> step=6044700, episode=1008 reward=0.7930688 (510.67 it/sec) -training >> step=6044800, episode=1008 reward=0.7633926 (509.86 it/sec) -training >> step=6044900, episode=1008 reward=0.7877692 (446.58 it/sec) -training >> step=6045000, episode=1008 reward=0.7708774 (524.02 it/sec) -training >> step=6045100, episode=1008 reward=0.8139523 (495.93 it/sec) -training >> step=6045200, episode=1008 reward=0.7945771 (498.55 it/sec) -training >> step=6045300, episode=1008 reward=0.7678955 (524.71 it/sec) -training >> step=6045400, episode=1008 reward=0.7882748 (478.26 it/sec) -training >> step=6045500, episode=1008 reward=0.7969757 (497.71 it/sec) -training >> step=6045600, episode=1008 reward=0.7633775 (492.60 it/sec) -training >> step=6045700, episode=1008 reward=0.7807898 (528.97 it/sec) -training >> step=6045800, episode=1008 reward=0.781026 (513.22 it/sec) -training >> step=6045900, episode=1008 reward=0.7864999 (515.80 it/sec) -training >> step=6046000, episode=1008 reward=0.7815074 (460.24 it/sec) -training >> step=6046100, episode=1008 reward=0.8054373 (498.88 it/sec) -training >> step=6046200, episode=1008 reward=0.7723209 (492.44 it/sec) -training >> step=6046300, episode=1008 reward=0.7870153 (485.03 it/sec) -training >> step=6046400, episode=1008 reward=0.752369 (433.28 it/sec) -training >> step=6046500, episode=1008 reward=0.7793505 (502.85 it/sec) -training >> step=6046600, episode=1008 reward=0.800929 (495.27 it/sec) -training >> step=6046700, episode=1008 reward=0.7899303 (469.05 it/sec) -training >> step=6046800, episode=1008 reward=0.7755352 (526.81 it/sec) -training >> step=6046900, episode=1008 reward=0.7525867 (516.30 it/sec) -training >> step=6047000, episode=1008 reward=0.7687179 (486.47 it/sec) -training >> step=6047100, episode=1008 reward=0.7784331 (519.77 it/sec) -training >> step=6047200, episode=1008 reward=0.7729934 (495.60 it/sec) -training >> step=6047300, episode=1009 reward=0.7706612 (116.97 it/sec) -training >> step=6047400, episode=1009 reward=0.7484614 (444.69 it/sec) -training >> step=6047500, episode=1009 reward=0.7675983 (498.15 it/sec) -training >> step=6047600, episode=1009 reward=0.7892985 (472.47 it/sec) -training >> step=6047700, episode=1009 reward=0.7947772 (465.72 it/sec) -training >> step=6047800, episode=1009 reward=0.7734106 (508.33 it/sec) -training >> step=6047900, episode=1009 reward=0.7725891 (507.06 it/sec) -training >> step=6048000, episode=1009 reward=0.7857329 (470.89 it/sec) -training >> step=6048100, episode=1009 reward=0.8033274 (458.34 it/sec) -training >> step=6048200, episode=1009 reward=0.7886607 (456.89 it/sec) -training >> step=6048300, episode=1009 reward=0.7829084 (492.37 it/sec) -training >> step=6048400, episode=1009 reward=0.7964816 (508.63 it/sec) -training >> step=6048500, episode=1009 reward=0.7600957 (475.53 it/sec) -training >> step=6048600, episode=1009 reward=0.7735961 (506.22 it/sec) -training >> step=6048700, episode=1009 reward=0.8056584 (487.22 it/sec) -training >> step=6048800, episode=1009 reward=0.7891768 (474.38 it/sec) -training >> step=6048900, episode=1009 reward=0.7776421 (505.20 it/sec) -training >> step=6049000, episode=1009 reward=0.7726145 (461.39 it/sec) -training >> step=6049100, episode=1009 reward=0.8084715 (433.71 it/sec) -training >> step=6049200, episode=1009 reward=0.7834509 (494.73 it/sec) -training >> step=6049300, episode=1009 reward=0.7763154 (500.51 it/sec) -training >> step=6049400, episode=1009 reward=0.7662702 (495.54 it/sec) -training >> step=6049500, episode=1009 reward=0.7711554 (516.12 it/sec) -training >> step=6049600, episode=1009 reward=0.8009279 (480.46 it/sec) -training >> step=6049700, episode=1009 reward=0.8023336 (502.40 it/sec) -training >> step=6049800, episode=1009 reward=0.8007276 (474.52 it/sec) -training >> step=6049900, episode=1009 reward=0.7885741 (452.66 it/sec) -training >> step=6050000, episode=1009 reward=0.7592445 (486.43 it/sec) -training >> step=6050100, episode=1009 reward=0.7854886 (369.38 it/sec) -training >> step=6050200, episode=1009 reward=0.7954922 (502.14 it/sec) -training >> step=6050300, episode=1009 reward=0.7710466 (450.82 it/sec) -training >> step=6050400, episode=1009 reward=0.7831885 (473.49 it/sec) -training >> step=6050500, episode=1009 reward=0.7922039 (472.57 it/sec) -training >> step=6050600, episode=1009 reward=0.7961495 (505.65 it/sec) -training >> step=6050700, episode=1009 reward=0.7919201 (481.21 it/sec) -training >> step=6050800, episode=1009 reward=0.7927184 (519.04 it/sec) -training >> step=6050900, episode=1009 reward=0.7712751 (502.84 it/sec) -training >> step=6051000, episode=1009 reward=0.7976766 (492.75 it/sec) -training >> step=6051100, episode=1009 reward=0.7973592 (471.04 it/sec) -training >> step=6051200, episode=1009 reward=0.7977017 (496.76 it/sec) -training >> step=6051300, episode=1009 reward=0.8011926 (487.32 it/sec) -training >> step=6051400, episode=1009 reward=0.8006824 (510.02 it/sec) -training >> step=6051500, episode=1009 reward=0.7903485 (488.12 it/sec) -training >> step=6051600, episode=1009 reward=0.7900185 (470.66 it/sec) -training >> step=6051700, episode=1009 reward=0.7565396 (490.69 it/sec) -training >> step=6051800, episode=1009 reward=0.8046628 (456.28 it/sec) -training >> step=6051900, episode=1009 reward=0.7881547 (489.93 it/sec) -training >> step=6052000, episode=1009 reward=0.7604053 (493.47 it/sec) -training >> step=6052100, episode=1009 reward=0.78787 (475.25 it/sec) -training >> step=6052200, episode=1009 reward=0.7731353 (495.20 it/sec) -training >> step=6052300, episode=1009 reward=0.7906867 (513.27 it/sec) -training >> step=6052400, episode=1009 reward=0.765582 (481.12 it/sec) -training >> step=6052500, episode=1009 reward=0.7714893 (475.84 it/sec) -training >> step=6052600, episode=1009 reward=0.7696887 (486.73 it/sec) -training >> step=6052700, episode=1009 reward=0.7965564 (488.29 it/sec) -training >> step=6052800, episode=1009 reward=0.7763517 (471.63 it/sec) -training >> step=6052900, episode=1009 reward=0.7698187 (521.76 it/sec) -training >> step=6053000, episode=1009 reward=0.7778825 (536.59 it/sec) -training >> step=6053100, episode=1009 reward=0.772629 (448.77 it/sec) -training >> step=6053200, episode=1009 reward=0.7503781 (469.68 it/sec) -training >> step=6053300, episode=1010 reward=0.780243 (126.63 it/sec) -training >> step=6053400, episode=1010 reward=0.7704704 (524.04 it/sec) -training >> step=6053500, episode=1010 reward=0.8019292 (500.49 it/sec) -training >> step=6053600, episode=1010 reward=0.76623 (492.70 it/sec) -training >> step=6053700, episode=1010 reward=0.7821694 (527.48 it/sec) -training >> step=6053800, episode=1010 reward=0.7640604 (548.31 it/sec) -training >> step=6053900, episode=1010 reward=0.7651485 (480.47 it/sec) -training >> step=6054000, episode=1010 reward=0.7962579 (497.70 it/sec) -training >> step=6054100, episode=1010 reward=0.7731217 (519.84 it/sec) -training >> step=6054200, episode=1010 reward=0.7732993 (517.89 it/sec) -training >> step=6054300, episode=1010 reward=0.8051777 (479.54 it/sec) -training >> step=6054400, episode=1010 reward=0.7913827 (502.01 it/sec) -training >> step=6054500, episode=1010 reward=0.7748829 (453.53 it/sec) -training >> step=6054600, episode=1010 reward=0.7933921 (493.87 it/sec) -training >> step=6054700, episode=1010 reward=0.7934024 (488.64 it/sec) -training >> step=6054800, episode=1010 reward=0.791537 (455.89 it/sec) -training >> step=6054900, episode=1010 reward=0.7842653 (498.91 it/sec) -training >> step=6055000, episode=1010 reward=0.7915859 (481.76 it/sec) -training >> step=6055100, episode=1010 reward=0.7956694 (485.31 it/sec) -training >> step=6055200, episode=1010 reward=0.7797084 (518.77 it/sec) -training >> step=6055300, episode=1010 reward=0.7775196 (513.24 it/sec) -training >> step=6055400, episode=1010 reward=0.7829633 (484.04 it/sec) -training >> step=6055500, episode=1010 reward=0.7900419 (548.43 it/sec) -training >> step=6055600, episode=1010 reward=0.7626915 (482.71 it/sec) -training >> step=6055700, episode=1010 reward=0.7990866 (499.24 it/sec) -training >> step=6055800, episode=1010 reward=0.7551908 (513.38 it/sec) -training >> step=6055900, episode=1010 reward=0.7956136 (472.32 it/sec) -training >> step=6056000, episode=1010 reward=0.7743485 (500.76 it/sec) -training >> step=6056100, episode=1010 reward=0.7907758 (471.33 it/sec) -training >> step=6056200, episode=1010 reward=0.7895757 (462.22 it/sec) -training >> step=6056300, episode=1010 reward=0.7995391 (370.57 it/sec) -training >> step=6056400, episode=1010 reward=0.7911633 (490.09 it/sec) -training >> step=6056500, episode=1010 reward=0.7753443 (498.08 it/sec) -training >> step=6056600, episode=1010 reward=0.7930709 (442.90 it/sec) -training >> step=6056700, episode=1010 reward=0.7677785 (475.36 it/sec) -training >> step=6056800, episode=1010 reward=0.7829295 (457.19 it/sec) -training >> step=6056900, episode=1010 reward=0.802453 (503.99 it/sec) -training >> step=6057000, episode=1010 reward=0.7874579 (511.12 it/sec) -training >> step=6057100, episode=1010 reward=0.8003928 (490.10 it/sec) -training >> step=6057200, episode=1010 reward=0.8028924 (490.40 it/sec) -training >> step=6057300, episode=1010 reward=0.7815716 (528.92 it/sec) -training >> step=6057400, episode=1010 reward=0.7658529 (460.72 it/sec) -training >> step=6057500, episode=1010 reward=0.7928361 (474.84 it/sec) -training >> step=6057600, episode=1010 reward=0.7861433 (518.33 it/sec) -training >> step=6057700, episode=1010 reward=0.7979348 (485.34 it/sec) -training >> step=6057800, episode=1010 reward=0.7825186 (448.77 it/sec) -training >> step=6057900, episode=1010 reward=0.7877897 (500.53 it/sec) -training >> step=6058000, episode=1010 reward=0.7724779 (554.88 it/sec) -training >> step=6058100, episode=1010 reward=0.7734057 (496.67 it/sec) -training >> step=6058200, episode=1010 reward=0.7737428 (509.99 it/sec) -training >> step=6058300, episode=1010 reward=0.7790123 (520.09 it/sec) -training >> step=6058400, episode=1010 reward=0.7904666 (499.07 it/sec) -training >> step=6058500, episode=1010 reward=0.7735707 (533.20 it/sec) -training >> step=6058600, episode=1010 reward=0.7714763 (501.85 it/sec) -training >> step=6058700, episode=1010 reward=0.7733936 (488.84 it/sec) -training >> step=6058800, episode=1010 reward=0.7964192 (461.88 it/sec) -training >> step=6058900, episode=1010 reward=0.7622773 (473.34 it/sec) -training >> step=6059000, episode=1010 reward=0.7692617 (415.83 it/sec) -training >> step=6059100, episode=1010 reward=0.7830526 (499.30 it/sec) -training >> step=6059200, episode=1010 reward=0.7704696 (488.91 it/sec) -training >> step=6059300, episode=1011 reward=0.7882606 (124.98 it/sec) -training >> step=6059400, episode=1011 reward=0.7759222 (541.11 it/sec) -training >> step=6059500, episode=1011 reward=0.7910113 (508.06 it/sec) -training >> step=6059600, episode=1011 reward=0.7907891 (512.50 it/sec) -training >> step=6059700, episode=1011 reward=0.7827964 (465.89 it/sec) -training >> step=6059800, episode=1011 reward=0.764116 (533.55 it/sec) -training >> step=6059900, episode=1011 reward=0.7943585 (451.15 it/sec) -training >> step=6060000, episode=1011 reward=0.7778764 (456.60 it/sec) -training >> step=6060100, episode=1011 reward=0.7643011 (492.87 it/sec) -training >> step=6060200, episode=1011 reward=0.7797071 (523.15 it/sec) -training >> step=6060300, episode=1011 reward=0.7809825 (500.48 it/sec) -training >> step=6060400, episode=1011 reward=0.7764598 (463.59 it/sec) -training >> step=6060500, episode=1011 reward=0.7685612 (449.96 it/sec) -training >> step=6060600, episode=1011 reward=0.7939889 (495.82 it/sec) -training >> step=6060700, episode=1011 reward=0.7892597 (495.23 it/sec) -training >> step=6060800, episode=1011 reward=0.7851775 (480.92 it/sec) -training >> step=6060900, episode=1011 reward=0.7944819 (495.14 it/sec) -training >> step=6061000, episode=1011 reward=0.7934037 (507.91 it/sec) -training >> step=6061100, episode=1011 reward=0.793516 (473.73 it/sec) -training >> step=6061200, episode=1011 reward=0.7801942 (509.56 it/sec) -training >> step=6061300, episode=1011 reward=0.7700256 (489.95 it/sec) -training >> step=6061400, episode=1011 reward=0.7900645 (470.42 it/sec) -training >> step=6061500, episode=1011 reward=0.7926662 (503.05 it/sec) -training >> step=6061600, episode=1011 reward=0.7873049 (502.28 it/sec) -training >> step=6061700, episode=1011 reward=0.7740297 (483.27 it/sec) -training >> step=6061800, episode=1011 reward=0.7994008 (504.54 it/sec) -training >> step=6061900, episode=1011 reward=0.7715645 (501.72 it/sec) -training >> step=6062000, episode=1011 reward=0.7966841 (554.11 it/sec) -training >> step=6062100, episode=1011 reward=0.7846946 (503.18 it/sec) -training >> step=6062200, episode=1011 reward=0.8029348 (510.18 it/sec) -training >> step=6062300, episode=1011 reward=0.7749617 (504.26 it/sec) -training >> step=6062400, episode=1011 reward=0.8045621 (354.27 it/sec) -training >> step=6062500, episode=1011 reward=0.7700794 (503.28 it/sec) -training >> step=6062600, episode=1011 reward=0.7880996 (437.86 it/sec) -training >> step=6062700, episode=1011 reward=0.7854939 (550.60 it/sec) -training >> step=6062800, episode=1011 reward=0.7949356 (482.57 it/sec) -training >> step=6062900, episode=1011 reward=0.8035464 (482.29 it/sec) -training >> step=6063000, episode=1011 reward=0.7921474 (444.12 it/sec) -training >> step=6063100, episode=1011 reward=0.7715121 (448.33 it/sec) -training >> step=6063200, episode=1011 reward=0.7759752 (507.00 it/sec) -training >> step=6063300, episode=1011 reward=0.7976097 (462.77 it/sec) -training >> step=6063400, episode=1011 reward=0.7714813 (472.92 it/sec) -training >> step=6063500, episode=1011 reward=0.778755 (483.32 it/sec) -training >> step=6063600, episode=1011 reward=0.7942181 (482.60 it/sec) -training >> step=6063700, episode=1011 reward=0.7861589 (508.31 it/sec) -training >> step=6063800, episode=1011 reward=0.7670469 (511.05 it/sec) -training >> step=6063900, episode=1011 reward=0.7702233 (487.05 it/sec) -training >> step=6064000, episode=1011 reward=0.7701731 (447.73 it/sec) -training >> step=6064100, episode=1011 reward=0.7781842 (520.46 it/sec) -training >> step=6064200, episode=1011 reward=0.7885858 (519.33 it/sec) -training >> step=6064300, episode=1011 reward=0.780264 (490.05 it/sec) -training >> step=6064400, episode=1011 reward=0.7642374 (458.88 it/sec) -training >> step=6064500, episode=1011 reward=0.761999 (540.78 it/sec) -training >> step=6064600, episode=1011 reward=0.7459758 (485.59 it/sec) -training >> step=6064700, episode=1011 reward=0.7841598 (491.47 it/sec) -training >> step=6064800, episode=1011 reward=0.7875637 (534.83 it/sec) -training >> step=6064900, episode=1011 reward=0.7684236 (509.45 it/sec) -training >> step=6065000, episode=1011 reward=0.770476 (510.61 it/sec) -training >> step=6065100, episode=1011 reward=0.7739989 (461.73 it/sec) -training >> step=6065200, episode=1011 reward=0.7655765 (513.52 it/sec) -training >> step=6065300, episode=1012 reward=0.7930307 (122.16 it/sec) -training >> step=6065400, episode=1012 reward=0.7983453 (526.38 it/sec) -training >> step=6065500, episode=1012 reward=0.7482274 (455.51 it/sec) -training >> step=6065600, episode=1012 reward=0.7984728 (500.10 it/sec) -training >> step=6065700, episode=1012 reward=0.7730749 (484.58 it/sec) -training >> step=6065800, episode=1012 reward=0.7826905 (466.76 it/sec) -training >> step=6065900, episode=1012 reward=0.7799369 (556.46 it/sec) -training >> step=6066000, episode=1012 reward=0.7772213 (466.88 it/sec) -training >> step=6066100, episode=1012 reward=0.7847829 (510.67 it/sec) -training >> step=6066200, episode=1012 reward=0.7711492 (461.30 it/sec) -training >> step=6066300, episode=1012 reward=0.7713097 (479.89 it/sec) -training >> step=6066400, episode=1012 reward=0.7927386 (449.02 it/sec) -training >> step=6066500, episode=1012 reward=0.7819894 (497.22 it/sec) -training >> step=6066600, episode=1012 reward=0.772873 (490.09 it/sec) -training >> step=6066700, episode=1012 reward=0.7808151 (496.45 it/sec) -training >> step=6066800, episode=1012 reward=0.7816854 (520.42 it/sec) -training >> step=6066900, episode=1012 reward=0.7889949 (499.79 it/sec) -training >> step=6067000, episode=1012 reward=0.7918278 (491.51 it/sec) -training >> step=6067100, episode=1012 reward=0.7696413 (454.29 it/sec) -training >> step=6067200, episode=1012 reward=0.7769325 (463.95 it/sec) -training >> step=6067300, episode=1012 reward=0.7840196 (469.98 it/sec) -training >> step=6067400, episode=1012 reward=0.7605841 (489.81 it/sec) -training >> step=6067500, episode=1012 reward=0.79067 (484.20 it/sec) -training >> step=6067600, episode=1012 reward=0.7670397 (516.95 it/sec) -training >> step=6067700, episode=1012 reward=0.7760853 (524.09 it/sec) -training >> step=6067800, episode=1012 reward=0.7811869 (416.54 it/sec) -training >> step=6067900, episode=1012 reward=0.7707267 (493.67 it/sec) -training >> step=6068000, episode=1012 reward=0.7851788 (469.30 it/sec) -training >> step=6068100, episode=1012 reward=0.7889616 (519.87 it/sec) -training >> step=6068200, episode=1012 reward=0.787717 (443.62 it/sec) -training >> step=6068300, episode=1012 reward=0.7737122 (359.20 it/sec) -training >> step=6068400, episode=1012 reward=0.7965471 (431.22 it/sec) -training >> step=6068500, episode=1012 reward=0.7794111 (484.23 it/sec) -training >> step=6068600, episode=1012 reward=0.7806132 (504.53 it/sec) -training >> step=6068700, episode=1012 reward=0.7989767 (476.91 it/sec) -training >> step=6068800, episode=1012 reward=0.7805854 (543.62 it/sec) -training >> step=6068900, episode=1012 reward=0.7666301 (517.27 it/sec) -training >> step=6069000, episode=1012 reward=0.7839822 (474.41 it/sec) -training >> step=6069100, episode=1012 reward=0.774132 (519.32 it/sec) -training >> step=6069200, episode=1012 reward=0.7699831 (521.38 it/sec) -training >> step=6069300, episode=1012 reward=0.7852984 (492.03 it/sec) -training >> step=6069400, episode=1012 reward=0.7725415 (507.88 it/sec) -training >> step=6069500, episode=1012 reward=0.7777822 (525.59 it/sec) -training >> step=6069600, episode=1012 reward=0.7893322 (495.79 it/sec) -training >> step=6069700, episode=1012 reward=0.783468 (504.04 it/sec) -training >> step=6069800, episode=1012 reward=0.7725065 (497.44 it/sec) -training >> step=6069900, episode=1012 reward=0.7841089 (548.18 it/sec) -training >> step=6070000, episode=1012 reward=0.7899286 (486.79 it/sec) -training >> step=6070100, episode=1012 reward=0.7889668 (474.10 it/sec) -training >> step=6070200, episode=1012 reward=0.7828736 (495.46 it/sec) -training >> step=6070300, episode=1012 reward=0.7724904 (502.69 it/sec) -training >> step=6070400, episode=1012 reward=0.7768817 (499.83 it/sec) -training >> step=6070500, episode=1012 reward=0.7842327 (499.11 it/sec) -training >> step=6070600, episode=1012 reward=0.7957295 (465.68 it/sec) -training >> step=6070700, episode=1012 reward=0.7850857 (498.70 it/sec) -training >> step=6070800, episode=1012 reward=0.7756076 (486.99 it/sec) -training >> step=6070900, episode=1012 reward=0.7591204 (521.65 it/sec) -training >> step=6071000, episode=1012 reward=0.7654386 (491.61 it/sec) -training >> step=6071100, episode=1012 reward=0.7798264 (498.39 it/sec) -training >> step=6071200, episode=1012 reward=0.7898672 (491.70 it/sec) -training >> step=6071300, episode=1013 reward=0.7945288 (139.20 it/sec) -training >> step=6071400, episode=1013 reward=0.8025213 (527.02 it/sec) -training >> step=6071500, episode=1013 reward=0.7646843 (455.05 it/sec) -training >> step=6071600, episode=1013 reward=0.786918 (483.82 it/sec) -training >> step=6071700, episode=1013 reward=0.7984724 (473.57 it/sec) -training >> step=6071800, episode=1013 reward=0.7963172 (470.94 it/sec) -training >> step=6071900, episode=1013 reward=0.783881 (530.05 it/sec) -training >> step=6072000, episode=1013 reward=0.7747043 (514.44 it/sec) -training >> step=6072100, episode=1013 reward=0.7736115 (461.61 it/sec) -training >> step=6072200, episode=1013 reward=0.8066617 (519.47 it/sec) -training >> step=6072300, episode=1013 reward=0.7941918 (482.01 it/sec) -training >> step=6072400, episode=1013 reward=0.7747874 (527.54 it/sec) -training >> step=6072500, episode=1013 reward=0.7669352 (467.99 it/sec) -training >> step=6072600, episode=1013 reward=0.7665246 (516.28 it/sec) -training >> step=6072700, episode=1013 reward=0.7906766 (511.24 it/sec) -training >> step=6072800, episode=1013 reward=0.7765312 (449.97 it/sec) -training >> step=6072900, episode=1013 reward=0.7748783 (490.36 it/sec) -training >> step=6073000, episode=1013 reward=0.7896606 (477.06 it/sec) -training >> step=6073100, episode=1013 reward=0.7719969 (479.19 it/sec) -training >> step=6073200, episode=1013 reward=0.8166794 (476.94 it/sec) -training >> step=6073300, episode=1013 reward=0.7905302 (494.58 it/sec) -training >> step=6073400, episode=1013 reward=0.7855656 (525.65 it/sec) -training >> step=6073500, episode=1013 reward=0.785818 (488.27 it/sec) -training >> step=6073600, episode=1013 reward=0.7966132 (512.93 it/sec) -training >> step=6073700, episode=1013 reward=0.7766907 (530.19 it/sec) -training >> step=6073800, episode=1013 reward=0.7808249 (467.70 it/sec) -training >> step=6073900, episode=1013 reward=0.7597698 (500.83 it/sec) -training >> step=6074000, episode=1013 reward=0.7758843 (467.88 it/sec) -training >> step=6074100, episode=1013 reward=0.7746855 (524.79 it/sec) -training >> step=6074200, episode=1013 reward=0.7869332 (462.44 it/sec) -training >> step=6074300, episode=1013 reward=0.7832442 (484.41 it/sec) -training >> step=6074400, episode=1013 reward=0.7523157 (486.79 it/sec) -training >> step=6074500, episode=1013 reward=0.8032244 (393.40 it/sec) -training >> step=6074600, episode=1013 reward=0.7736299 (485.59 it/sec) -training >> step=6074700, episode=1013 reward=0.7957821 (479.15 it/sec) -training >> step=6074800, episode=1013 reward=0.7887809 (457.44 it/sec) -training >> step=6074900, episode=1013 reward=0.7698373 (520.27 it/sec) -training >> step=6075000, episode=1013 reward=0.8013493 (441.55 it/sec) -training >> step=6075100, episode=1013 reward=0.7787603 (527.42 it/sec) -training >> step=6075200, episode=1013 reward=0.7914925 (523.31 it/sec) -training >> step=6075300, episode=1013 reward=0.7756393 (491.27 it/sec) -training >> step=6075400, episode=1013 reward=0.8010694 (456.43 it/sec) -training >> step=6075500, episode=1013 reward=0.7775251 (542.34 it/sec) -training >> step=6075600, episode=1013 reward=0.7777362 (497.91 it/sec) -training >> step=6075700, episode=1013 reward=0.7652616 (510.06 it/sec) -training >> step=6075800, episode=1013 reward=0.7712843 (463.69 it/sec) -training >> step=6075900, episode=1013 reward=0.7809008 (531.67 it/sec) -training >> step=6076000, episode=1013 reward=0.7612 (520.13 it/sec) -training >> step=6076100, episode=1013 reward=0.7860701 (513.39 it/sec) -training >> step=6076200, episode=1013 reward=0.7710053 (520.64 it/sec) -training >> step=6076300, episode=1013 reward=0.7756559 (500.36 it/sec) -training >> step=6076400, episode=1013 reward=0.7859411 (431.27 it/sec) -training >> step=6076500, episode=1013 reward=0.7802745 (484.50 it/sec) -training >> step=6076600, episode=1013 reward=0.7840436 (503.88 it/sec) -training >> step=6076700, episode=1013 reward=0.7899482 (482.62 it/sec) -training >> step=6076800, episode=1013 reward=0.7613174 (461.28 it/sec) -training >> step=6076900, episode=1013 reward=0.7775213 (511.40 it/sec) -training >> step=6077000, episode=1013 reward=0.7905539 (509.45 it/sec) -training >> step=6077100, episode=1013 reward=0.7590596 (485.69 it/sec) -training >> step=6077200, episode=1013 reward=0.7867731 (469.59 it/sec) -training >> step=6077300, episode=1014 reward=0.8033121 (120.64 it/sec) -training >> step=6077400, episode=1014 reward=0.7755474 (500.26 it/sec) -training >> step=6077500, episode=1014 reward=0.7923657 (477.76 it/sec) -training >> step=6077600, episode=1014 reward=0.7705621 (491.72 it/sec) -training >> step=6077700, episode=1014 reward=0.7489936 (457.20 it/sec) -training >> step=6077800, episode=1014 reward=0.792461 (482.04 it/sec) -training >> step=6077900, episode=1014 reward=0.780121 (487.75 it/sec) -training >> step=6078000, episode=1014 reward=0.796557 (502.82 it/sec) -training >> step=6078100, episode=1014 reward=0.7865098 (503.62 it/sec) -training >> step=6078200, episode=1014 reward=0.7771915 (508.53 it/sec) -training >> step=6078300, episode=1014 reward=0.7743906 (507.81 it/sec) -training >> step=6078400, episode=1014 reward=0.7887532 (511.96 it/sec) -training >> step=6078500, episode=1014 reward=0.7883826 (479.27 it/sec) -training >> step=6078600, episode=1014 reward=0.7912941 (472.80 it/sec) -training >> step=6078700, episode=1014 reward=0.7941935 (497.12 it/sec) -training >> step=6078800, episode=1014 reward=0.7785416 (474.84 it/sec) -training >> step=6078900, episode=1014 reward=0.7770638 (482.85 it/sec) -training >> step=6079000, episode=1014 reward=0.7719676 (495.46 it/sec) -training >> step=6079100, episode=1014 reward=0.7986529 (502.35 it/sec) -training >> step=6079200, episode=1014 reward=0.7911912 (470.24 it/sec) -training >> step=6079300, episode=1014 reward=0.784028 (452.18 it/sec) -training >> step=6079400, episode=1014 reward=0.8040383 (482.79 it/sec) -training >> step=6079500, episode=1014 reward=0.7941574 (457.38 it/sec) -training >> step=6079600, episode=1014 reward=0.7756556 (489.28 it/sec) -training >> step=6079700, episode=1014 reward=0.7901537 (471.59 it/sec) -training >> step=6079800, episode=1014 reward=0.7789584 (475.01 it/sec) -training >> step=6079900, episode=1014 reward=0.7947446 (503.65 it/sec) -training >> step=6080000, episode=1014 reward=0.7813269 (447.64 it/sec) -training >> step=6080100, episode=1014 reward=0.7886159 (511.15 it/sec) -training >> step=6080200, episode=1014 reward=0.7978609 (530.01 it/sec) -training >> step=6080300, episode=1014 reward=0.7926613 (487.65 it/sec) -training >> step=6080400, episode=1014 reward=0.7761829 (435.48 it/sec) -training >> step=6080500, episode=1014 reward=0.7730025 (375.50 it/sec) -training >> step=6080600, episode=1014 reward=0.7867538 (486.44 it/sec) -training >> step=6080700, episode=1014 reward=0.7763731 (526.27 it/sec) -training >> step=6080800, episode=1014 reward=0.8039785 (498.19 it/sec) -training >> step=6080900, episode=1014 reward=0.7543461 (509.27 it/sec) -training >> step=6081000, episode=1014 reward=0.7832221 (471.64 it/sec) -training >> step=6081100, episode=1014 reward=0.7860867 (444.07 it/sec) -training >> step=6081200, episode=1014 reward=0.7843256 (485.09 it/sec) -training >> step=6081300, episode=1014 reward=0.7810029 (489.70 it/sec) -training >> step=6081400, episode=1014 reward=0.8011671 (478.26 it/sec) -training >> step=6081500, episode=1014 reward=0.8065398 (479.93 it/sec) -training >> step=6081600, episode=1014 reward=0.7732518 (464.82 it/sec) -training >> step=6081700, episode=1014 reward=0.7829131 (484.41 it/sec) -training >> step=6081800, episode=1014 reward=0.7956842 (501.50 it/sec) -training >> step=6081900, episode=1014 reward=0.7751909 (491.35 it/sec) -training >> step=6082000, episode=1014 reward=0.7884979 (501.68 it/sec) -training >> step=6082100, episode=1014 reward=0.7949092 (475.20 it/sec) -training >> step=6082200, episode=1014 reward=0.7655023 (485.43 it/sec) -training >> step=6082300, episode=1014 reward=0.777279 (477.96 it/sec) -training >> step=6082400, episode=1014 reward=0.7738074 (510.39 it/sec) -training >> step=6082500, episode=1014 reward=0.761899 (483.13 it/sec) -training >> step=6082600, episode=1014 reward=0.7780964 (501.86 it/sec) -training >> step=6082700, episode=1014 reward=0.7712409 (494.11 it/sec) -training >> step=6082800, episode=1014 reward=0.7664473 (457.07 it/sec) -training >> step=6082900, episode=1014 reward=0.7800567 (491.69 it/sec) -training >> step=6083000, episode=1014 reward=0.7870798 (477.76 it/sec) -training >> step=6083100, episode=1014 reward=0.7555045 (470.69 it/sec) -training >> step=6083200, episode=1014 reward=0.7568946 (478.46 it/sec) -training >> step=6083300, episode=1015 reward=0.7848528 (128.23 it/sec) -training >> step=6083400, episode=1015 reward=0.7689022 (492.59 it/sec) -training >> step=6083500, episode=1015 reward=0.7855958 (451.89 it/sec) -training >> step=6083600, episode=1015 reward=0.7635665 (479.92 it/sec) -training >> step=6083700, episode=1015 reward=0.7890381 (499.81 it/sec) -training >> step=6083800, episode=1015 reward=0.7642365 (471.68 it/sec) -training >> step=6083900, episode=1015 reward=0.782795 (513.53 it/sec) -training >> step=6084000, episode=1015 reward=0.7769667 (455.74 it/sec) -training >> step=6084100, episode=1015 reward=0.7503307 (499.52 it/sec) -training >> step=6084200, episode=1015 reward=0.776998 (469.18 it/sec) -training >> step=6084300, episode=1015 reward=0.7917517 (476.87 it/sec) -training >> step=6084400, episode=1015 reward=0.7871898 (509.56 it/sec) -training >> step=6084500, episode=1015 reward=0.776654 (442.20 it/sec) -training >> step=6084600, episode=1015 reward=0.797418 (475.76 it/sec) -training >> step=6084700, episode=1015 reward=0.7849489 (462.26 it/sec) -training >> step=6084800, episode=1015 reward=0.7620121 (476.29 it/sec) -training >> step=6084900, episode=1015 reward=0.7832489 (482.11 it/sec) -training >> step=6085000, episode=1015 reward=0.7673399 (484.92 it/sec) -training >> step=6085100, episode=1015 reward=0.777926 (475.42 it/sec) -training >> step=6085200, episode=1015 reward=0.7906687 (487.70 it/sec) -training >> step=6085300, episode=1015 reward=0.7857406 (465.38 it/sec) -training >> step=6085400, episode=1015 reward=0.7764975 (477.95 it/sec) -training >> step=6085500, episode=1015 reward=0.7822176 (475.05 it/sec) -training >> step=6085600, episode=1015 reward=0.7947879 (504.17 it/sec) -training >> step=6085700, episode=1015 reward=0.7807052 (450.17 it/sec) -training >> step=6085800, episode=1015 reward=0.8002383 (481.88 it/sec) -training >> step=6085900, episode=1015 reward=0.7727951 (510.99 it/sec) -training >> step=6086000, episode=1015 reward=0.7681159 (497.95 it/sec) -training >> step=6086100, episode=1015 reward=0.7973062 (475.38 it/sec) -training >> step=6086200, episode=1015 reward=0.7937093 (456.42 it/sec) -training >> step=6086300, episode=1015 reward=0.7756679 (495.82 it/sec) -training >> step=6086400, episode=1015 reward=0.7866265 (471.16 it/sec) -training >> step=6086500, episode=1015 reward=0.7848862 (446.20 it/sec) -training >> step=6086600, episode=1015 reward=0.7924572 (355.20 it/sec) -training >> step=6086700, episode=1015 reward=0.7780933 (508.81 it/sec) -training >> step=6086800, episode=1015 reward=0.7798318 (497.27 it/sec) -training >> step=6086900, episode=1015 reward=0.7959287 (479.91 it/sec) -training >> step=6087000, episode=1015 reward=0.7632425 (516.11 it/sec) -training >> step=6087100, episode=1015 reward=0.7866136 (434.98 it/sec) -training >> step=6087200, episode=1015 reward=0.7789344 (414.09 it/sec) -training >> step=6087300, episode=1015 reward=0.7938979 (435.98 it/sec) -training >> step=6087400, episode=1015 reward=0.7783535 (492.57 it/sec) -training >> step=6087500, episode=1015 reward=0.7962123 (474.76 it/sec) -training >> step=6087600, episode=1015 reward=0.7857069 (490.48 it/sec) -training >> step=6087700, episode=1015 reward=0.7909799 (487.90 it/sec) -training >> step=6087800, episode=1015 reward=0.7679442 (455.42 it/sec) -training >> step=6087900, episode=1015 reward=0.8040656 (484.20 it/sec) -training >> step=6088000, episode=1015 reward=0.7611313 (425.53 it/sec) -training >> step=6088100, episode=1015 reward=0.7905366 (489.17 it/sec) -training >> step=6088200, episode=1015 reward=0.7866665 (465.52 it/sec) -training >> step=6088300, episode=1015 reward=0.7725412 (467.75 it/sec) -training >> step=6088400, episode=1015 reward=0.7792905 (477.28 it/sec) -training >> step=6088500, episode=1015 reward=0.7999451 (441.29 it/sec) -training >> step=6088600, episode=1015 reward=0.7806108 (468.50 it/sec) -training >> step=6088700, episode=1015 reward=0.7941844 (496.19 it/sec) -training >> step=6088800, episode=1015 reward=0.7860155 (484.06 it/sec) -training >> step=6088900, episode=1015 reward=0.7761075 (525.12 it/sec) -training >> step=6089000, episode=1015 reward=0.7788452 (510.28 it/sec) -training >> step=6089100, episode=1015 reward=0.7942837 (536.38 it/sec) -training >> step=6089200, episode=1015 reward=0.7641953 (538.83 it/sec) -training >> step=6089300, episode=1016 reward=0.7913571 (138.34 it/sec) -training >> step=6089400, episode=1016 reward=0.7884864 (502.36 it/sec) -training >> step=6089500, episode=1016 reward=0.7889246 (528.37 it/sec) -training >> step=6089600, episode=1016 reward=0.7867385 (531.69 it/sec) -training >> step=6089700, episode=1016 reward=0.7718754 (529.90 it/sec) -training >> step=6089800, episode=1016 reward=0.7681007 (544.11 it/sec) -training >> step=6089900, episode=1016 reward=0.7777055 (540.40 it/sec) -training >> step=6090000, episode=1016 reward=0.7746086 (510.88 it/sec) -training >> step=6090100, episode=1016 reward=0.7757338 (503.51 it/sec) -training >> step=6090200, episode=1016 reward=0.7758076 (517.25 it/sec) -training >> step=6090300, episode=1016 reward=0.771613 (484.82 it/sec) -training >> step=6090400, episode=1016 reward=0.78859 (518.25 it/sec) -training >> step=6090500, episode=1016 reward=0.7883969 (523.48 it/sec) -training >> step=6090600, episode=1016 reward=0.7567568 (566.20 it/sec) -training >> step=6090700, episode=1016 reward=0.7684445 (524.07 it/sec) -training >> step=6090800, episode=1016 reward=0.7742604 (496.93 it/sec) -training >> step=6090900, episode=1016 reward=0.7650945 (550.49 it/sec) -training >> step=6091000, episode=1016 reward=0.7902474 (535.21 it/sec) -training >> step=6091100, episode=1016 reward=0.7985016 (511.67 it/sec) -training >> step=6091200, episode=1016 reward=0.7862566 (525.79 it/sec) -training >> step=6091300, episode=1016 reward=0.7953119 (517.49 it/sec) -training >> step=6091400, episode=1016 reward=0.7976533 (507.38 it/sec) -training >> step=6091500, episode=1016 reward=0.784574 (525.89 it/sec) -training >> step=6091600, episode=1016 reward=0.7947139 (526.77 it/sec) -training >> step=6091700, episode=1016 reward=0.7896399 (538.16 it/sec) -training >> step=6091800, episode=1016 reward=0.7837217 (530.83 it/sec) -training >> step=6091900, episode=1016 reward=0.7750062 (520.60 it/sec) -training >> step=6092000, episode=1016 reward=0.7847248 (521.83 it/sec) -training >> step=6092100, episode=1016 reward=0.7946966 (527.98 it/sec) -training >> step=6092200, episode=1016 reward=0.7743554 (527.79 it/sec) -training >> step=6092300, episode=1016 reward=0.7760201 (501.31 it/sec) -training >> step=6092400, episode=1016 reward=0.7861726 (513.63 it/sec) -training >> step=6092500, episode=1016 reward=0.7875955 (496.84 it/sec) -training >> step=6092600, episode=1016 reward=0.8141062 (492.30 it/sec) -training >> step=6092700, episode=1016 reward=0.7998136 (527.15 it/sec) -training >> step=6092800, episode=1016 reward=0.7911237 (423.99 it/sec) -training >> step=6092900, episode=1016 reward=0.8005776 (521.07 it/sec) -training >> step=6093000, episode=1016 reward=0.780326 (517.85 it/sec) -training >> step=6093100, episode=1016 reward=0.798438 (509.18 it/sec) -training >> step=6093200, episode=1016 reward=0.776915 (494.33 it/sec) -training >> step=6093300, episode=1016 reward=0.7641981 (521.69 it/sec) -training >> step=6093400, episode=1016 reward=0.780686 (486.52 it/sec) -training >> step=6093500, episode=1016 reward=0.7742485 (537.27 it/sec) -training >> step=6093600, episode=1016 reward=0.7615135 (517.97 it/sec) -training >> step=6093700, episode=1016 reward=0.7830059 (459.06 it/sec) -training >> step=6093800, episode=1016 reward=0.7908534 (518.80 it/sec) -training >> step=6093900, episode=1016 reward=0.7691885 (509.04 it/sec) -training >> step=6094000, episode=1016 reward=0.7956787 (509.66 it/sec) -training >> step=6094100, episode=1016 reward=0.7712765 (514.06 it/sec) -training >> step=6094200, episode=1016 reward=0.769962 (512.84 it/sec) -training >> step=6094300, episode=1016 reward=0.7770895 (501.65 it/sec) -training >> step=6094400, episode=1016 reward=0.7977786 (510.07 it/sec) -training >> step=6094500, episode=1016 reward=0.7828388 (528.29 it/sec) -training >> step=6094600, episode=1016 reward=0.7868912 (565.10 it/sec) -training >> step=6094700, episode=1016 reward=0.7831852 (509.76 it/sec) -training >> step=6094800, episode=1016 reward=0.7682148 (503.46 it/sec) -training >> step=6094900, episode=1016 reward=0.7978964 (518.02 it/sec) -training >> step=6095000, episode=1016 reward=0.7847646 (491.32 it/sec) -training >> step=6095100, episode=1016 reward=0.7866359 (534.33 it/sec) -training >> step=6095200, episode=1016 reward=0.7508995 (524.31 it/sec) -training >> step=6095300, episode=1017 reward=0.7776979 (122.95 it/sec) -training >> step=6095400, episode=1017 reward=0.7831888 (473.84 it/sec) -training >> step=6095500, episode=1017 reward=0.7856628 (450.26 it/sec) -training >> step=6095600, episode=1017 reward=0.7767944 (516.12 it/sec) -training >> step=6095700, episode=1017 reward=0.7777192 (527.92 it/sec) -training >> step=6095800, episode=1017 reward=0.7763458 (524.24 it/sec) -training >> step=6095900, episode=1017 reward=0.7732936 (498.32 it/sec) -training >> step=6096000, episode=1017 reward=0.7712411 (522.77 it/sec) -training >> step=6096100, episode=1017 reward=0.7843976 (525.19 it/sec) -training >> step=6096200, episode=1017 reward=0.7757648 (513.79 it/sec) -training >> step=6096300, episode=1017 reward=0.7885253 (520.90 it/sec) -training >> step=6096400, episode=1017 reward=0.8078142 (538.61 it/sec) -training >> step=6096500, episode=1017 reward=0.7827125 (492.69 it/sec) -training >> step=6096600, episode=1017 reward=0.777266 (468.41 it/sec) -training >> step=6096700, episode=1017 reward=0.7899327 (541.61 it/sec) -training >> step=6096800, episode=1017 reward=0.7979552 (510.19 it/sec) -training >> step=6096900, episode=1017 reward=0.7954084 (517.17 it/sec) -training >> step=6097000, episode=1017 reward=0.761441 (510.26 it/sec) -training >> step=6097100, episode=1017 reward=0.7902057 (502.92 it/sec) -training >> step=6097200, episode=1017 reward=0.783181 (505.40 it/sec) -training >> step=6097300, episode=1017 reward=0.7804552 (492.80 it/sec) -training >> step=6097400, episode=1017 reward=0.7905263 (505.24 it/sec) -training >> step=6097500, episode=1017 reward=0.7610037 (510.82 it/sec) -training >> step=6097600, episode=1017 reward=0.7804478 (530.99 it/sec) -training >> step=6097700, episode=1017 reward=0.7649528 (483.77 it/sec) -training >> step=6097800, episode=1017 reward=0.7761757 (529.81 it/sec) -training >> step=6097900, episode=1017 reward=0.762022 (507.92 it/sec) -training >> step=6098000, episode=1017 reward=0.7933536 (518.23 it/sec) -training >> step=6098100, episode=1017 reward=0.798411 (517.68 it/sec) -training >> step=6098200, episode=1017 reward=0.7848679 (524.64 it/sec) -training >> step=6098300, episode=1017 reward=0.7964914 (471.37 it/sec) -training >> step=6098400, episode=1017 reward=0.790383 (475.34 it/sec) -training >> step=6098500, episode=1017 reward=0.7881329 (530.91 it/sec) -training >> step=6098600, episode=1017 reward=0.8034068 (502.76 it/sec) -training >> step=6098700, episode=1017 reward=0.7908146 (503.14 it/sec) -training >> step=6098800, episode=1017 reward=0.7873869 (521.75 it/sec) -training >> step=6098900, episode=1017 reward=0.7918204 (388.32 it/sec) -training >> step=6099000, episode=1017 reward=0.7775075 (492.27 it/sec) -training >> step=6099100, episode=1017 reward=0.7875554 (515.13 it/sec) -training >> step=6099200, episode=1017 reward=0.7816247 (536.22 it/sec) -training >> step=6099300, episode=1017 reward=0.7760873 (536.17 it/sec) -training >> step=6099400, episode=1017 reward=0.7816001 (487.37 it/sec) -training >> step=6099500, episode=1017 reward=0.7979584 (509.31 it/sec) -training >> step=6099600, episode=1017 reward=0.7928205 (507.21 it/sec) -training >> step=6099700, episode=1017 reward=0.7858484 (509.49 it/sec) -training >> step=6099800, episode=1017 reward=0.7734539 (514.11 it/sec) -training >> step=6099900, episode=1017 reward=0.757829 (479.59 it/sec) -training >> step=6100000, episode=1017 reward=0.7900566 (544.57 it/sec) -training >> step=6100100, episode=1017 reward=0.7746398 (495.50 it/sec) -training >> step=6100200, episode=1017 reward=0.7794601 (435.66 it/sec) -training >> step=6100300, episode=1017 reward=0.7867721 (484.18 it/sec) -training >> step=6100400, episode=1017 reward=0.7934883 (495.03 it/sec) -training >> step=6100500, episode=1017 reward=0.7767187 (508.97 it/sec) -training >> step=6100600, episode=1017 reward=0.7458366 (524.40 it/sec) -training >> step=6100700, episode=1017 reward=0.7856103 (505.29 it/sec) -training >> step=6100800, episode=1017 reward=0.8003811 (492.50 it/sec) -training >> step=6100900, episode=1017 reward=0.7731419 (497.79 it/sec) -training >> step=6101000, episode=1017 reward=0.7920128 (514.48 it/sec) -training >> step=6101100, episode=1017 reward=0.7861602 (532.56 it/sec) -training >> step=6101200, episode=1017 reward=0.780656 (508.12 it/sec) -training >> step=6101300, episode=1018 reward=0.80033 (121.03 it/sec) -training >> step=6101400, episode=1018 reward=0.7780209 (499.58 it/sec) -training >> step=6101500, episode=1018 reward=0.8041955 (426.06 it/sec) -training >> step=6101600, episode=1018 reward=0.7717845 (459.16 it/sec) -training >> step=6101700, episode=1018 reward=0.7626421 (496.17 it/sec) -training >> step=6101800, episode=1018 reward=0.7653749 (501.93 it/sec) -training >> step=6101900, episode=1018 reward=0.7760879 (513.10 it/sec) -training >> step=6102000, episode=1018 reward=0.7813991 (484.17 it/sec) -training >> step=6102100, episode=1018 reward=0.7731391 (469.59 it/sec) -training >> step=6102200, episode=1018 reward=0.8005378 (537.38 it/sec) -training >> step=6102300, episode=1018 reward=0.7805272 (492.48 it/sec) -training >> step=6102400, episode=1018 reward=0.788421 (505.98 it/sec) -training >> step=6102500, episode=1018 reward=0.7716744 (507.29 it/sec) -training >> step=6102600, episode=1018 reward=0.7979938 (417.15 it/sec) -training >> step=6102700, episode=1018 reward=0.7808833 (509.13 it/sec) -training >> step=6102800, episode=1018 reward=0.7975888 (506.27 it/sec) -training >> step=6102900, episode=1018 reward=0.7958165 (540.27 it/sec) -training >> step=6103000, episode=1018 reward=0.77478 (510.26 it/sec) -training >> step=6103100, episode=1018 reward=0.7737172 (509.51 it/sec) -training >> step=6103200, episode=1018 reward=0.7842283 (500.56 it/sec) -training >> step=6103300, episode=1018 reward=0.7955877 (528.74 it/sec) -training >> step=6103400, episode=1018 reward=0.7927286 (508.67 it/sec) -training >> step=6103500, episode=1018 reward=0.7867124 (496.29 it/sec) -training >> step=6103600, episode=1018 reward=0.7860066 (519.83 it/sec) -training >> step=6103700, episode=1018 reward=0.7809356 (468.20 it/sec) -training >> step=6103800, episode=1018 reward=0.7943317 (459.18 it/sec) -training >> step=6103900, episode=1018 reward=0.7860605 (495.10 it/sec) -training >> step=6104000, episode=1018 reward=0.7744157 (520.10 it/sec) -training >> step=6104100, episode=1018 reward=0.7545806 (507.87 it/sec) -training >> step=6104200, episode=1018 reward=0.7975581 (468.59 it/sec) -training >> step=6104300, episode=1018 reward=0.7743636 (518.78 it/sec) -training >> step=6104400, episode=1018 reward=0.7843143 (481.10 it/sec) -training >> step=6104500, episode=1018 reward=0.7683817 (516.28 it/sec) -training >> step=6104600, episode=1018 reward=0.7968949 (511.19 it/sec) -training >> step=6104700, episode=1018 reward=0.7779264 (489.39 it/sec) -training >> step=6104800, episode=1018 reward=0.816303 (507.72 it/sec) -training >> step=6104900, episode=1018 reward=0.771578 (450.64 it/sec) -training >> step=6105000, episode=1018 reward=0.7548898 (502.23 it/sec) -training >> step=6105100, episode=1018 reward=0.8049384 (420.46 it/sec) -training >> step=6105200, episode=1018 reward=0.7674314 (493.40 it/sec) -training >> step=6105300, episode=1018 reward=0.7757466 (524.27 it/sec) -training >> step=6105400, episode=1018 reward=0.7873673 (543.66 it/sec) -training >> step=6105500, episode=1018 reward=0.7704564 (508.80 it/sec) -training >> step=6105600, episode=1018 reward=0.7820828 (516.84 it/sec) -training >> step=6105700, episode=1018 reward=0.7698964 (549.04 it/sec) -training >> step=6105800, episode=1018 reward=0.7774742 (488.63 it/sec) -training >> step=6105900, episode=1018 reward=0.7859537 (506.00 it/sec) -training >> step=6106000, episode=1018 reward=0.7770404 (515.68 it/sec) -training >> step=6106100, episode=1018 reward=0.7809669 (552.40 it/sec) -training >> step=6106200, episode=1018 reward=0.7907101 (508.26 it/sec) -training >> step=6106300, episode=1018 reward=0.7965282 (481.68 it/sec) -training >> step=6106400, episode=1018 reward=0.7780897 (534.37 it/sec) -training >> step=6106500, episode=1018 reward=0.7689289 (531.59 it/sec) -training >> step=6106600, episode=1018 reward=0.7647856 (518.69 it/sec) -training >> step=6106700, episode=1018 reward=0.7700689 (512.70 it/sec) -training >> step=6106800, episode=1018 reward=0.781849 (523.82 it/sec) -training >> step=6106900, episode=1018 reward=0.7682432 (478.06 it/sec) -training >> step=6107000, episode=1018 reward=0.7641593 (526.55 it/sec) -training >> step=6107100, episode=1018 reward=0.7723176 (496.01 it/sec) -training >> step=6107200, episode=1018 reward=0.7855827 (529.21 it/sec) -training >> step=6107300, episode=1019 reward=0.76609 (128.54 it/sec) -training >> step=6107400, episode=1019 reward=0.7716339 (489.16 it/sec) -training >> step=6107500, episode=1019 reward=0.7767817 (511.31 it/sec) -training >> step=6107600, episode=1019 reward=0.7916968 (492.00 it/sec) -training >> step=6107700, episode=1019 reward=0.7962095 (438.65 it/sec) -training >> step=6107800, episode=1019 reward=0.7782587 (482.04 it/sec) -training >> step=6107900, episode=1019 reward=0.7774428 (510.89 it/sec) -training >> step=6108000, episode=1019 reward=0.7747567 (547.18 it/sec) -training >> step=6108100, episode=1019 reward=0.7745798 (470.28 it/sec) -training >> step=6108200, episode=1019 reward=0.791903 (525.36 it/sec) -training >> step=6108300, episode=1019 reward=0.8118028 (550.54 it/sec) -training >> step=6108400, episode=1019 reward=0.7908819 (507.91 it/sec) -training >> step=6108500, episode=1019 reward=0.7754858 (525.22 it/sec) -training >> step=6108600, episode=1019 reward=0.7792507 (536.73 it/sec) -training >> step=6108700, episode=1019 reward=0.8042175 (419.69 it/sec) -training >> step=6108800, episode=1019 reward=0.7891096 (497.93 it/sec) -training >> step=6108900, episode=1019 reward=0.7908586 (527.36 it/sec) -training >> step=6109000, episode=1019 reward=0.7924762 (534.12 it/sec) -training >> step=6109100, episode=1019 reward=0.7951282 (511.03 it/sec) -training >> step=6109200, episode=1019 reward=0.7795588 (464.07 it/sec) -training >> step=6109300, episode=1019 reward=0.7728127 (530.92 it/sec) -training >> step=6109400, episode=1019 reward=0.7732078 (538.29 it/sec) -training >> step=6109500, episode=1019 reward=0.7928837 (537.26 it/sec) -training >> step=6109600, episode=1019 reward=0.7975404 (495.43 it/sec) -training >> step=6109700, episode=1019 reward=0.7638206 (459.25 it/sec) -training >> step=6109800, episode=1019 reward=0.7536851 (483.77 it/sec) -training >> step=6109900, episode=1019 reward=0.8014305 (485.68 it/sec) -training >> step=6110000, episode=1019 reward=0.7652252 (536.56 it/sec) -training >> step=6110100, episode=1019 reward=0.7757472 (539.95 it/sec) -training >> step=6110200, episode=1019 reward=0.7724268 (518.92 it/sec) -training >> step=6110300, episode=1019 reward=0.8007535 (454.49 it/sec) -training >> step=6110400, episode=1019 reward=0.777493 (527.04 it/sec) -training >> step=6110500, episode=1019 reward=0.7957388 (507.38 it/sec) -training >> step=6110600, episode=1019 reward=0.7584633 (510.54 it/sec) -training >> step=6110700, episode=1019 reward=0.8127444 (509.23 it/sec) -training >> step=6110800, episode=1019 reward=0.8016528 (436.56 it/sec) -training >> step=6110900, episode=1019 reward=0.7712287 (491.56 it/sec) -training >> step=6111000, episode=1019 reward=0.7971544 (470.44 it/sec) -training >> step=6111100, episode=1019 reward=0.7837778 (545.08 it/sec) -training >> step=6111200, episode=1019 reward=0.7920589 (521.36 it/sec) -training >> step=6111300, episode=1019 reward=0.7890904 (463.11 it/sec) -training >> step=6111400, episode=1019 reward=0.7884585 (392.86 it/sec) -training >> step=6111500, episode=1019 reward=0.7746103 (565.81 it/sec) -training >> step=6111600, episode=1019 reward=0.7796636 (521.68 it/sec) -training >> step=6111700, episode=1019 reward=0.7833698 (515.95 it/sec) -training >> step=6111800, episode=1019 reward=0.7926317 (491.38 it/sec) -training >> step=6111900, episode=1019 reward=0.7879616 (520.26 it/sec) -training >> step=6112000, episode=1019 reward=0.7810549 (536.77 it/sec) -training >> step=6112100, episode=1019 reward=0.7810335 (524.54 it/sec) -training >> step=6112200, episode=1019 reward=0.777973 (533.73 it/sec) -training >> step=6112300, episode=1019 reward=0.7822566 (444.89 it/sec) -training >> step=6112400, episode=1019 reward=0.7774034 (465.53 it/sec) -training >> step=6112500, episode=1019 reward=0.7715153 (513.16 it/sec) -training >> step=6112600, episode=1019 reward=0.7799937 (538.18 it/sec) -training >> step=6112700, episode=1019 reward=0.7677478 (500.53 it/sec) -training >> step=6112800, episode=1019 reward=0.7848227 (461.03 it/sec) -training >> step=6112900, episode=1019 reward=0.8024059 (488.37 it/sec) -training >> step=6113000, episode=1019 reward=0.7745904 (499.56 it/sec) -training >> step=6113100, episode=1019 reward=0.7798068 (450.81 it/sec) -training >> step=6113200, episode=1019 reward=0.7741133 (455.55 it/sec) -training >> step=6113300, episode=1020 reward=0.7660376 (118.83 it/sec) -training >> step=6113400, episode=1020 reward=0.7698662 (495.10 it/sec) -training >> step=6113500, episode=1020 reward=0.7699357 (452.56 it/sec) -training >> step=6113600, episode=1020 reward=0.7933459 (495.09 it/sec) -training >> step=6113700, episode=1020 reward=0.769522 (492.23 it/sec) -training >> step=6113800, episode=1020 reward=0.7976287 (496.67 it/sec) -training >> step=6113900, episode=1020 reward=0.7729833 (508.45 it/sec) -training >> step=6114000, episode=1020 reward=0.7685444 (470.18 it/sec) -training >> step=6114100, episode=1020 reward=0.7916849 (496.43 it/sec) -training >> step=6114200, episode=1020 reward=0.7764946 (515.47 it/sec) -training >> step=6114300, episode=1020 reward=0.7934952 (468.59 it/sec) -training >> step=6114400, episode=1020 reward=0.7633712 (476.84 it/sec) -training >> step=6114500, episode=1020 reward=0.779927 (437.17 it/sec) -training >> step=6114600, episode=1020 reward=0.785975 (495.81 it/sec) -training >> step=6114700, episode=1020 reward=0.7958128 (453.66 it/sec) -training >> step=6114800, episode=1020 reward=0.7724253 (466.33 it/sec) -training >> step=6114900, episode=1020 reward=0.769711 (505.59 it/sec) -training >> step=6115000, episode=1020 reward=0.7962844 (457.04 it/sec) -training >> step=6115100, episode=1020 reward=0.8040599 (508.88 it/sec) -training >> step=6115200, episode=1020 reward=0.7894989 (514.92 it/sec) -training >> step=6115300, episode=1020 reward=0.7764562 (500.54 it/sec) -training >> step=6115400, episode=1020 reward=0.7757987 (527.27 it/sec) -training >> step=6115500, episode=1020 reward=0.7972291 (473.82 it/sec) -training >> step=6115600, episode=1020 reward=0.7852077 (442.12 it/sec) -training >> step=6115700, episode=1020 reward=0.7749786 (512.46 it/sec) -training >> step=6115800, episode=1020 reward=0.7781622 (529.80 it/sec) -training >> step=6115900, episode=1020 reward=0.7764006 (485.16 it/sec) -training >> step=6116000, episode=1020 reward=0.7849489 (489.84 it/sec) -training >> step=6116100, episode=1020 reward=0.7972819 (441.14 it/sec) -training >> step=6116200, episode=1020 reward=0.7829083 (384.99 it/sec) -training >> step=6116300, episode=1020 reward=0.7945343 (381.20 it/sec) -training >> step=6116400, episode=1020 reward=0.7985712 (464.81 it/sec) -training >> step=6116500, episode=1020 reward=0.7816694 (449.78 it/sec) -training >> step=6116600, episode=1020 reward=0.7860049 (494.95 it/sec) -training >> step=6116700, episode=1020 reward=0.7914578 (509.15 it/sec) -training >> step=6116800, episode=1020 reward=0.7744614 (496.15 it/sec) -training >> step=6116900, episode=1020 reward=0.8148335 (455.07 it/sec) -training >> step=6117000, episode=1020 reward=0.7674054 (328.08 it/sec) -training >> step=6117100, episode=1020 reward=0.7845867 (396.18 it/sec) -training >> step=6117200, episode=1020 reward=0.7927164 (376.50 it/sec) -training >> step=6117300, episode=1020 reward=0.7836412 (396.99 it/sec) -training >> step=6117400, episode=1020 reward=0.7907264 (396.70 it/sec) -training >> step=6117500, episode=1020 reward=0.7654107 (421.03 it/sec) -training >> step=6117600, episode=1020 reward=0.7796529 (323.14 it/sec) -training >> step=6117700, episode=1020 reward=0.7965397 (467.26 it/sec) -training >> step=6117800, episode=1020 reward=0.7887142 (414.83 it/sec) -training >> step=6117900, episode=1020 reward=0.799527 (457.18 it/sec) -training >> step=6118000, episode=1020 reward=0.7848649 (455.82 it/sec) -training >> step=6118100, episode=1020 reward=0.8004377 (443.31 it/sec) -training >> step=6118200, episode=1020 reward=0.7840335 (448.88 it/sec) -training >> step=6118300, episode=1020 reward=0.7760444 (457.60 it/sec) -training >> step=6118400, episode=1020 reward=0.7757168 (481.65 it/sec) -training >> step=6118500, episode=1020 reward=0.7905187 (464.34 it/sec) -training >> step=6118600, episode=1020 reward=0.7826697 (470.16 it/sec) -training >> step=6118700, episode=1020 reward=0.7783481 (448.51 it/sec) -training >> step=6118800, episode=1020 reward=0.7962899 (432.75 it/sec) -training >> step=6118900, episode=1020 reward=0.7921166 (472.79 it/sec) -training >> step=6119000, episode=1020 reward=0.7961504 (465.31 it/sec) -training >> step=6119100, episode=1020 reward=0.7973269 (483.08 it/sec) -training >> step=6119200, episode=1020 reward=0.7902246 (399.37 it/sec) -training >> step=6119300, episode=1021 reward=0.7860639 (134.74 it/sec) -training >> step=6119400, episode=1021 reward=0.7938218 (433.91 it/sec) -training >> step=6119500, episode=1021 reward=0.7946556 (507.38 it/sec) -training >> step=6119600, episode=1021 reward=0.77976 (499.31 it/sec) -training >> step=6119700, episode=1021 reward=0.789564 (470.94 it/sec) -training >> step=6119800, episode=1021 reward=0.764019 (527.54 it/sec) -training >> step=6119900, episode=1021 reward=0.7806607 (484.82 it/sec) -training >> step=6120000, episode=1021 reward=0.7492457 (447.92 it/sec) -training >> step=6120100, episode=1021 reward=0.769712 (523.84 it/sec) -training >> step=6120200, episode=1021 reward=0.7646258 (546.16 it/sec) -training >> step=6120300, episode=1021 reward=0.7856054 (496.21 it/sec) -training >> step=6120400, episode=1021 reward=0.7722198 (479.45 it/sec) -training >> step=6120500, episode=1021 reward=0.7599857 (492.39 it/sec) -training >> step=6120600, episode=1021 reward=0.7950962 (505.03 it/sec) -training >> step=6120700, episode=1021 reward=0.7964733 (480.64 it/sec) -training >> step=6120800, episode=1021 reward=0.7612315 (488.58 it/sec) -training >> step=6120900, episode=1021 reward=0.7947085 (483.68 it/sec) -training >> step=6121000, episode=1021 reward=0.785857 (412.98 it/sec) -training >> step=6121100, episode=1021 reward=0.7568618 (494.32 it/sec) -training >> step=6121200, episode=1021 reward=0.7765363 (520.05 it/sec) -training >> step=6121300, episode=1021 reward=0.7891957 (522.69 it/sec) -training >> step=6121400, episode=1021 reward=0.7688828 (482.16 it/sec) -training >> step=6121500, episode=1021 reward=0.7849631 (431.41 it/sec) -training >> step=6121600, episode=1021 reward=0.7835394 (548.51 it/sec) -training >> step=6121700, episode=1021 reward=0.7762353 (518.94 it/sec) -training >> step=6121800, episode=1021 reward=0.8017595 (506.67 it/sec) -training >> step=6121900, episode=1021 reward=0.7712007 (503.44 it/sec) -training >> step=6122000, episode=1021 reward=0.7874228 (544.07 it/sec) -training >> step=6122100, episode=1021 reward=0.7716135 (492.18 it/sec) -training >> step=6122200, episode=1021 reward=0.7907611 (506.96 it/sec) -training >> step=6122300, episode=1021 reward=0.8055871 (485.33 it/sec) -training >> step=6122400, episode=1021 reward=0.7898299 (512.65 it/sec) -training >> step=6122500, episode=1021 reward=0.7847752 (502.92 it/sec) -training >> step=6122600, episode=1021 reward=0.7957683 (490.95 it/sec) -training >> step=6122700, episode=1021 reward=0.7824994 (544.88 it/sec) -training >> step=6122800, episode=1021 reward=0.789283 (518.15 it/sec) -training >> step=6122900, episode=1021 reward=0.776601 (494.26 it/sec) -training >> step=6123000, episode=1021 reward=0.797972 (502.98 it/sec) -training >> step=6123100, episode=1021 reward=0.774411 (493.67 it/sec) -training >> step=6123200, episode=1021 reward=0.7928786 (528.67 it/sec) -training >> step=6123300, episode=1021 reward=0.8071499 (444.95 it/sec) -training >> step=6123400, episode=1021 reward=0.7954297 (503.44 it/sec) -training >> step=6123500, episode=1021 reward=0.7990633 (459.19 it/sec) -training >> step=6123600, episode=1021 reward=0.7698167 (453.41 it/sec) -training >> step=6123700, episode=1021 reward=0.7867218 (376.56 it/sec) -training >> step=6123800, episode=1021 reward=0.7957673 (516.51 it/sec) -training >> step=6123900, episode=1021 reward=0.796263 (502.04 it/sec) -training >> step=6124000, episode=1021 reward=0.7865514 (512.25 it/sec) -training >> step=6124100, episode=1021 reward=0.801194 (491.36 it/sec) -training >> step=6124200, episode=1021 reward=0.7688226 (493.47 it/sec) -training >> step=6124300, episode=1021 reward=0.7866384 (435.55 it/sec) -training >> step=6124400, episode=1021 reward=0.7641964 (452.57 it/sec) -training >> step=6124500, episode=1021 reward=0.768056 (507.03 it/sec) -training >> step=6124600, episode=1021 reward=0.7731801 (522.94 it/sec) -training >> step=6124700, episode=1021 reward=0.7838131 (519.26 it/sec) -training >> step=6124800, episode=1021 reward=0.7943828 (481.58 it/sec) -training >> step=6124900, episode=1021 reward=0.7732745 (517.34 it/sec) -training >> step=6125000, episode=1021 reward=0.7806448 (486.23 it/sec) -training >> step=6125100, episode=1021 reward=0.7964274 (494.06 it/sec) -training >> step=6125200, episode=1021 reward=0.7822146 (472.76 it/sec) -training >> step=6125300, episode=1022 reward=0.7864721 (119.04 it/sec) -training >> step=6125400, episode=1022 reward=0.7713733 (523.21 it/sec) -training >> step=6125500, episode=1022 reward=0.7736312 (407.67 it/sec) -training >> step=6125600, episode=1022 reward=0.7888736 (434.96 it/sec) -training >> step=6125700, episode=1022 reward=0.7834531 (477.59 it/sec) -training >> step=6125800, episode=1022 reward=0.7765138 (440.47 it/sec) -training >> step=6125900, episode=1022 reward=0.7795954 (402.02 it/sec) -training >> step=6126000, episode=1022 reward=0.801181 (478.91 it/sec) -training >> step=6126100, episode=1022 reward=0.7726985 (445.40 it/sec) -training >> step=6126200, episode=1022 reward=0.7758642 (454.61 it/sec) -training >> step=6126300, episode=1022 reward=0.8022432 (488.99 it/sec) -training >> step=6126400, episode=1022 reward=0.7848408 (470.26 it/sec) -training >> step=6126500, episode=1022 reward=0.800972 (478.37 it/sec) -training >> step=6126600, episode=1022 reward=0.793703 (465.81 it/sec) -training >> step=6126700, episode=1022 reward=0.767715 (469.40 it/sec) -training >> step=6126800, episode=1022 reward=0.7935508 (489.07 it/sec) -training >> step=6126900, episode=1022 reward=0.7939673 (461.35 it/sec) -training >> step=6127000, episode=1022 reward=0.7779415 (456.74 it/sec) -training >> step=6127100, episode=1022 reward=0.7714165 (510.90 it/sec) -training >> step=6127200, episode=1022 reward=0.7797992 (453.67 it/sec) -training >> step=6127300, episode=1022 reward=0.8113223 (493.93 it/sec) -training >> step=6127400, episode=1022 reward=0.7837473 (480.31 it/sec) -training >> step=6127500, episode=1022 reward=0.7678886 (455.59 it/sec) -training >> step=6127600, episode=1022 reward=0.7862533 (489.41 it/sec) -training >> step=6127700, episode=1022 reward=0.7694674 (473.64 it/sec) -training >> step=6127800, episode=1022 reward=0.7965694 (491.32 it/sec) -training >> step=6127900, episode=1022 reward=0.7931961 (447.32 it/sec) -training >> step=6128000, episode=1022 reward=0.777422 (450.79 it/sec) -training >> step=6128100, episode=1022 reward=0.792586 (451.10 it/sec) -training >> step=6128200, episode=1022 reward=0.7969587 (452.14 it/sec) -training >> step=6128300, episode=1022 reward=0.7662419 (458.24 it/sec) -training >> step=6128400, episode=1022 reward=0.7668655 (485.94 it/sec) -training >> step=6128500, episode=1022 reward=0.7973384 (445.11 it/sec) -training >> step=6128600, episode=1022 reward=0.7955558 (475.55 it/sec) -training >> step=6128700, episode=1022 reward=0.7744618 (466.65 it/sec) -training >> step=6128800, episode=1022 reward=0.769099 (471.21 it/sec) -training >> step=6128900, episode=1022 reward=0.7971195 (463.53 it/sec) -training >> step=6129000, episode=1022 reward=0.7810669 (457.90 it/sec) -training >> step=6129100, episode=1022 reward=0.7756689 (437.68 it/sec) -training >> step=6129200, episode=1022 reward=0.8040312 (501.68 it/sec) -training >> step=6129300, episode=1022 reward=0.7819866 (482.88 it/sec) -training >> step=6129400, episode=1022 reward=0.7747417 (480.99 it/sec) -training >> step=6129500, episode=1022 reward=0.7784836 (498.45 it/sec) -training >> step=6129600, episode=1022 reward=0.7824897 (484.51 it/sec) -training >> step=6129700, episode=1022 reward=0.7696633 (457.13 it/sec) -training >> step=6129800, episode=1022 reward=0.7833048 (484.15 it/sec) -training >> step=6129900, episode=1022 reward=0.8009983 (345.71 it/sec) -training >> step=6130000, episode=1022 reward=0.7789245 (470.86 it/sec) -training >> step=6130100, episode=1022 reward=0.7829557 (470.91 it/sec) -training >> step=6130200, episode=1022 reward=0.784692 (432.27 it/sec) -training >> step=6130300, episode=1022 reward=0.805449 (510.00 it/sec) -training >> step=6130400, episode=1022 reward=0.7825297 (471.57 it/sec) -training >> step=6130500, episode=1022 reward=0.7722174 (490.20 it/sec) -training >> step=6130600, episode=1022 reward=0.7649513 (477.99 it/sec) -training >> step=6130700, episode=1022 reward=0.7696394 (490.18 it/sec) -training >> step=6130800, episode=1022 reward=0.7735169 (465.90 it/sec) -training >> step=6130900, episode=1022 reward=0.7771658 (471.28 it/sec) -training >> step=6131000, episode=1022 reward=0.7889187 (474.43 it/sec) -training >> step=6131100, episode=1022 reward=0.7851934 (477.63 it/sec) -training >> step=6131200, episode=1022 reward=0.7793024 (424.79 it/sec) -training >> step=6131300, episode=1023 reward=0.7927 (122.50 it/sec) -training >> step=6131400, episode=1023 reward=0.7776161 (480.83 it/sec) -training >> step=6131500, episode=1023 reward=0.7820885 (470.23 it/sec) -training >> step=6131600, episode=1023 reward=0.7743676 (478.60 it/sec) -training >> step=6131700, episode=1023 reward=0.7637783 (479.71 it/sec) -training >> step=6131800, episode=1023 reward=0.778103 (464.27 it/sec) -training >> step=6131900, episode=1023 reward=0.7854465 (482.80 it/sec) -training >> step=6132000, episode=1023 reward=0.7882199 (472.16 it/sec) -training >> step=6132100, episode=1023 reward=0.7832319 (446.55 it/sec) -training >> step=6132200, episode=1023 reward=0.7981446 (482.62 it/sec) -training >> step=6132300, episode=1023 reward=0.8015268 (434.13 it/sec) -training >> step=6132400, episode=1023 reward=0.7945711 (457.71 it/sec) -training >> step=6132500, episode=1023 reward=0.7732224 (482.86 it/sec) -training >> step=6132600, episode=1023 reward=0.786077 (429.30 it/sec) -training >> step=6132700, episode=1023 reward=0.7892733 (441.55 it/sec) -training >> step=6132800, episode=1023 reward=0.7861523 (477.90 it/sec) -training >> step=6132900, episode=1023 reward=0.7745349 (500.17 it/sec) -training >> step=6133000, episode=1023 reward=0.7767991 (480.11 it/sec) -training >> step=6133100, episode=1023 reward=0.8147289 (439.04 it/sec) -training >> step=6133200, episode=1023 reward=0.7881225 (519.73 it/sec) -training >> step=6133300, episode=1023 reward=0.8030313 (472.76 it/sec) -training >> step=6133400, episode=1023 reward=0.7727801 (452.97 it/sec) -training >> step=6133500, episode=1023 reward=0.8046125 (484.68 it/sec) -training >> step=6133600, episode=1023 reward=0.7712021 (529.50 it/sec) -training >> step=6133700, episode=1023 reward=0.795785 (481.64 it/sec) -training >> step=6133800, episode=1023 reward=0.7837173 (478.29 it/sec) -training >> step=6133900, episode=1023 reward=0.7834811 (509.35 it/sec) -training >> step=6134000, episode=1023 reward=0.7690976 (525.77 it/sec) -training >> step=6134100, episode=1023 reward=0.7654378 (496.15 it/sec) -training >> step=6134200, episode=1023 reward=0.7841617 (504.74 it/sec) -training >> step=6134300, episode=1023 reward=0.7882186 (512.25 it/sec) -training >> step=6134400, episode=1023 reward=0.7757798 (527.29 it/sec) -training >> step=6134500, episode=1023 reward=0.7946662 (518.56 it/sec) -training >> step=6134600, episode=1023 reward=0.7827049 (481.87 it/sec) -training >> step=6134700, episode=1023 reward=0.7994813 (530.10 it/sec) -training >> step=6134800, episode=1023 reward=0.7780026 (478.84 it/sec) -training >> step=6134900, episode=1023 reward=0.8054031 (517.78 it/sec) -training >> step=6135000, episode=1023 reward=0.7935527 (520.98 it/sec) -training >> step=6135100, episode=1023 reward=0.7948229 (439.73 it/sec) -training >> step=6135200, episode=1023 reward=0.7829912 (482.33 it/sec) -training >> step=6135300, episode=1023 reward=0.7760112 (520.68 it/sec) -training >> step=6135400, episode=1023 reward=0.7933487 (455.70 it/sec) -training >> step=6135500, episode=1023 reward=0.7870947 (449.03 it/sec) -training >> step=6135600, episode=1023 reward=0.7624496 (473.81 it/sec) -training >> step=6135700, episode=1023 reward=0.7883622 (501.44 it/sec) -training >> step=6135800, episode=1023 reward=0.7780322 (520.38 it/sec) -training >> step=6135900, episode=1023 reward=0.771852 (539.46 it/sec) -training >> step=6136000, episode=1023 reward=0.8106218 (361.80 it/sec) -training >> step=6136100, episode=1023 reward=0.7806474 (502.13 it/sec) -training >> step=6136200, episode=1023 reward=0.7827626 (510.75 it/sec) -training >> step=6136300, episode=1023 reward=0.7956958 (483.68 it/sec) -training >> step=6136400, episode=1023 reward=0.7673201 (536.49 it/sec) -training >> step=6136500, episode=1023 reward=0.7704648 (484.49 it/sec) -training >> step=6136600, episode=1023 reward=0.7955165 (491.40 it/sec) -training >> step=6136700, episode=1023 reward=0.7983485 (504.01 it/sec) -training >> step=6136800, episode=1023 reward=0.7969573 (486.26 it/sec) -training >> step=6136900, episode=1023 reward=0.7874131 (495.63 it/sec) -training >> step=6137000, episode=1023 reward=0.8182513 (472.20 it/sec) -training >> step=6137100, episode=1023 reward=0.7803512 (490.81 it/sec) -training >> step=6137200, episode=1023 reward=0.7832217 (492.36 it/sec) -training >> step=6137300, episode=1024 reward=0.7738453 (124.17 it/sec) -training >> step=6137400, episode=1024 reward=0.7797057 (497.54 it/sec) -training >> step=6137500, episode=1024 reward=0.7655281 (476.19 it/sec) -training >> step=6137600, episode=1024 reward=0.8000509 (510.86 it/sec) -training >> step=6137700, episode=1024 reward=0.7650672 (509.00 it/sec) -training >> step=6137800, episode=1024 reward=0.7770699 (508.61 it/sec) -training >> step=6137900, episode=1024 reward=0.7686763 (491.95 it/sec) -training >> step=6138000, episode=1024 reward=0.811422 (516.08 it/sec) -training >> step=6138100, episode=1024 reward=0.774374 (489.42 it/sec) -training >> step=6138200, episode=1024 reward=0.7673754 (441.09 it/sec) -training >> step=6138300, episode=1024 reward=0.7865657 (515.51 it/sec) -training >> step=6138400, episode=1024 reward=0.770651 (520.56 it/sec) -training >> step=6138500, episode=1024 reward=0.7883111 (502.29 it/sec) -training >> step=6138600, episode=1024 reward=0.7909179 (506.44 it/sec) -training >> step=6138700, episode=1024 reward=0.784525 (488.37 it/sec) -training >> step=6138800, episode=1024 reward=0.7724361 (457.93 it/sec) -training >> step=6138900, episode=1024 reward=0.7841875 (497.56 it/sec) -training >> step=6139000, episode=1024 reward=0.7906576 (503.04 it/sec) -training >> step=6139100, episode=1024 reward=0.7786384 (486.44 it/sec) -training >> step=6139200, episode=1024 reward=0.7837465 (495.55 it/sec) -training >> step=6139300, episode=1024 reward=0.7693707 (501.81 it/sec) -training >> step=6139400, episode=1024 reward=0.7677404 (520.71 it/sec) -training >> step=6139500, episode=1024 reward=0.7871435 (497.36 it/sec) -training >> step=6139600, episode=1024 reward=0.7622195 (492.84 it/sec) -training >> step=6139700, episode=1024 reward=0.7599216 (468.81 it/sec) -training >> step=6139800, episode=1024 reward=0.7850354 (522.45 it/sec) -training >> step=6139900, episode=1024 reward=0.8020379 (504.33 it/sec) -training >> step=6140000, episode=1024 reward=0.8016797 (500.33 it/sec) -training >> step=6140100, episode=1024 reward=0.7607873 (515.65 it/sec) -training >> step=6140200, episode=1024 reward=0.7895427 (527.15 it/sec) -training >> step=6140300, episode=1024 reward=0.7668316 (518.60 it/sec) -training >> step=6140400, episode=1024 reward=0.7687148 (497.92 it/sec) -training >> step=6140500, episode=1024 reward=0.7964536 (512.52 it/sec) -training >> step=6140600, episode=1024 reward=0.7927344 (483.19 it/sec) -training >> step=6140700, episode=1024 reward=0.8058372 (499.11 it/sec) -training >> step=6140800, episode=1024 reward=0.7824462 (515.09 it/sec) -training >> step=6140900, episode=1024 reward=0.791941 (506.66 it/sec) -training >> step=6141000, episode=1024 reward=0.7668174 (499.47 it/sec) -training >> step=6141100, episode=1024 reward=0.7812321 (474.52 it/sec) -training >> step=6141200, episode=1024 reward=0.7700253 (489.51 it/sec) -training >> step=6141300, episode=1024 reward=0.7833346 (486.17 it/sec) -training >> step=6141400, episode=1024 reward=0.7898441 (496.53 it/sec) -training >> step=6141500, episode=1024 reward=0.8008698 (501.94 it/sec) -training >> step=6141600, episode=1024 reward=0.7864448 (532.37 it/sec) -training >> step=6141700, episode=1024 reward=0.802716 (486.47 it/sec) -training >> step=6141800, episode=1024 reward=0.7845926 (478.05 it/sec) -training >> step=6141900, episode=1024 reward=0.7709954 (472.42 it/sec) -training >> step=6142000, episode=1024 reward=0.7839289 (494.58 it/sec) -training >> step=6142100, episode=1024 reward=0.7918251 (389.35 it/sec) -training >> step=6142200, episode=1024 reward=0.7740015 (533.33 it/sec) -training >> step=6142300, episode=1024 reward=0.7865762 (534.73 it/sec) -training >> step=6142400, episode=1024 reward=0.7949145 (471.48 it/sec) -training >> step=6142500, episode=1024 reward=0.7924554 (516.00 it/sec) -training >> step=6142600, episode=1024 reward=0.8073083 (504.04 it/sec) -training >> step=6142700, episode=1024 reward=0.7892709 (489.38 it/sec) -training >> step=6142800, episode=1024 reward=0.7992211 (508.67 it/sec) -training >> step=6142900, episode=1024 reward=0.7724215 (438.07 it/sec) -training >> step=6143000, episode=1024 reward=0.8005643 (490.64 it/sec) -training >> step=6143100, episode=1024 reward=0.7665789 (562.62 it/sec) -training >> step=6143200, episode=1024 reward=0.7867995 (502.21 it/sec) -training >> step=6143300, episode=1025 reward=0.7764978 (119.85 it/sec) -training >> step=6143400, episode=1025 reward=0.7787572 (505.88 it/sec) -training >> step=6143500, episode=1025 reward=0.7789228 (506.26 it/sec) -training >> step=6143600, episode=1025 reward=0.7760681 (492.59 it/sec) -training >> step=6143700, episode=1025 reward=0.7679427 (495.08 it/sec) -training >> step=6143800, episode=1025 reward=0.7992795 (511.43 it/sec) -training >> step=6143900, episode=1025 reward=0.79438 (491.98 it/sec) -training >> step=6144000, episode=1025 reward=0.7654525 (502.71 it/sec) -training >> step=6144100, episode=1025 reward=0.7828528 (461.00 it/sec) -training >> step=6144200, episode=1025 reward=0.7909365 (512.81 it/sec) -training >> step=6144300, episode=1025 reward=0.779632 (504.87 it/sec) -training >> step=6144400, episode=1025 reward=0.7773796 (461.13 it/sec) -training >> step=6144500, episode=1025 reward=0.8033759 (504.77 it/sec) -training >> step=6144600, episode=1025 reward=0.7750926 (502.29 it/sec) -training >> step=6144700, episode=1025 reward=0.7862867 (523.80 it/sec) -training >> step=6144800, episode=1025 reward=0.7754225 (504.60 it/sec) -training >> step=6144900, episode=1025 reward=0.7861992 (465.81 it/sec) -training >> step=6145000, episode=1025 reward=0.7685618 (544.87 it/sec) -training >> step=6145100, episode=1025 reward=0.7751936 (477.69 it/sec) -training >> step=6145200, episode=1025 reward=0.7975645 (512.22 it/sec) -training >> step=6145300, episode=1025 reward=0.7935768 (507.27 it/sec) -training >> step=6145400, episode=1025 reward=0.7806342 (495.65 it/sec) -training >> step=6145500, episode=1025 reward=0.7865928 (505.83 it/sec) -training >> step=6145600, episode=1025 reward=0.7972332 (518.52 it/sec) -training >> step=6145700, episode=1025 reward=0.7849833 (492.17 it/sec) -training >> step=6145800, episode=1025 reward=0.7786143 (500.60 it/sec) -training >> step=6145900, episode=1025 reward=0.7943632 (478.08 it/sec) -training >> step=6146000, episode=1025 reward=0.7719016 (507.56 it/sec) -training >> step=6146100, episode=1025 reward=0.7894732 (531.09 it/sec) -training >> step=6146200, episode=1025 reward=0.7973432 (484.09 it/sec) -training >> step=6146300, episode=1025 reward=0.7742592 (475.47 it/sec) -training >> step=6146400, episode=1025 reward=0.7973505 (500.76 it/sec) -training >> step=6146500, episode=1025 reward=0.7989043 (558.37 it/sec) -training >> step=6146600, episode=1025 reward=0.8001501 (498.73 it/sec) -training >> step=6146700, episode=1025 reward=0.7877766 (479.36 it/sec) -training >> step=6146800, episode=1025 reward=0.7774212 (506.20 it/sec) -training >> step=6146900, episode=1025 reward=0.7937102 (554.86 it/sec) -training >> step=6147000, episode=1025 reward=0.7858384 (528.90 it/sec) -training >> step=6147100, episode=1025 reward=0.7887368 (551.90 it/sec) -training >> step=6147200, episode=1025 reward=0.7899676 (537.69 it/sec) -training >> step=6147300, episode=1025 reward=0.7813224 (513.01 it/sec) -training >> step=6147400, episode=1025 reward=0.7952335 (497.96 it/sec) -training >> step=6147500, episode=1025 reward=0.7704427 (507.25 it/sec) -training >> step=6147600, episode=1025 reward=0.7850936 (566.25 it/sec) -training >> step=6147700, episode=1025 reward=0.7986732 (533.89 it/sec) -training >> step=6147800, episode=1025 reward=0.7832054 (532.65 it/sec) -training >> step=6147900, episode=1025 reward=0.8007236 (518.21 it/sec) -training >> step=6148000, episode=1025 reward=0.7938469 (532.54 it/sec) -training >> step=6148100, episode=1025 reward=0.787 (369.21 it/sec) -training >> step=6148200, episode=1025 reward=0.7771571 (535.56 it/sec) -training >> step=6148300, episode=1025 reward=0.7684563 (549.95 it/sec) -training >> step=6148400, episode=1025 reward=0.8093376 (490.14 it/sec) -training >> step=6148500, episode=1025 reward=0.7851425 (459.85 it/sec) -training >> step=6148600, episode=1025 reward=0.7761033 (501.80 it/sec) -training >> step=6148700, episode=1025 reward=0.8002499 (556.64 it/sec) -training >> step=6148800, episode=1025 reward=0.7668313 (521.90 it/sec) -training >> step=6148900, episode=1025 reward=0.7969406 (503.89 it/sec) -training >> step=6149000, episode=1025 reward=0.7694511 (531.28 it/sec) -training >> step=6149100, episode=1025 reward=0.7815882 (508.30 it/sec) -training >> step=6149200, episode=1025 reward=0.7638654 (513.12 it/sec) -training >> step=6149300, episode=1026 reward=0.7793186 (139.38 it/sec) -training >> step=6149400, episode=1026 reward=0.7740805 (525.88 it/sec) -training >> step=6149500, episode=1026 reward=0.7952946 (517.19 it/sec) -training >> step=6149600, episode=1026 reward=0.7699072 (530.37 it/sec) -training >> step=6149700, episode=1026 reward=0.7805873 (514.56 it/sec) -training >> step=6149800, episode=1026 reward=0.8003344 (551.53 it/sec) -training >> step=6149900, episode=1026 reward=0.7979307 (492.74 it/sec) -training >> step=6150000, episode=1026 reward=0.7933304 (528.16 it/sec) -training >> step=6150100, episode=1026 reward=0.7758795 (525.91 it/sec) -training >> step=6150200, episode=1026 reward=0.7842982 (512.46 it/sec) -training >> step=6150300, episode=1026 reward=0.7847067 (512.01 it/sec) -training >> step=6150400, episode=1026 reward=0.7750379 (500.49 it/sec) -training >> step=6150500, episode=1026 reward=0.7722155 (552.22 it/sec) -training >> step=6150600, episode=1026 reward=0.7872002 (512.47 it/sec) -training >> step=6150700, episode=1026 reward=0.7676821 (508.46 it/sec) -training >> step=6150800, episode=1026 reward=0.7914352 (527.62 it/sec) -training >> step=6150900, episode=1026 reward=0.7733014 (552.05 it/sec) -training >> step=6151000, episode=1026 reward=0.7938069 (436.19 it/sec) -training >> step=6151100, episode=1026 reward=0.7965239 (526.68 it/sec) -training >> step=6151200, episode=1026 reward=0.7956334 (532.99 it/sec) -training >> step=6151300, episode=1026 reward=0.771939 (517.25 it/sec) -training >> step=6151400, episode=1026 reward=0.7967697 (515.47 it/sec) -training >> step=6151500, episode=1026 reward=0.783641 (480.53 it/sec) -training >> step=6151600, episode=1026 reward=0.7855774 (505.55 it/sec) -training >> step=6151700, episode=1026 reward=0.7812742 (507.47 it/sec) -training >> step=6151800, episode=1026 reward=0.7636309 (517.11 it/sec) -training >> step=6151900, episode=1026 reward=0.771548 (567.13 it/sec) -training >> step=6152000, episode=1026 reward=0.7940378 (508.62 it/sec) -training >> step=6152100, episode=1026 reward=0.7853813 (522.32 it/sec) -training >> step=6152200, episode=1026 reward=0.8025566 (506.30 it/sec) -training >> step=6152300, episode=1026 reward=0.7724525 (518.58 it/sec) -training >> step=6152400, episode=1026 reward=0.7915245 (503.26 it/sec) -training >> step=6152500, episode=1026 reward=0.8006497 (518.01 it/sec) -training >> step=6152600, episode=1026 reward=0.7985858 (495.09 it/sec) -training >> step=6152700, episode=1026 reward=0.7677676 (550.61 it/sec) -training >> step=6152800, episode=1026 reward=0.7807738 (538.17 it/sec) -training >> step=6152900, episode=1026 reward=0.7832231 (483.76 it/sec) -training >> step=6153000, episode=1026 reward=0.7918775 (539.46 it/sec) -training >> step=6153100, episode=1026 reward=0.7793202 (479.62 it/sec) -training >> step=6153200, episode=1026 reward=0.7883011 (547.96 it/sec) -training >> step=6153300, episode=1026 reward=0.7858855 (519.33 it/sec) -training >> step=6153400, episode=1026 reward=0.80684 (538.60 it/sec) -training >> step=6153500, episode=1026 reward=0.7987051 (507.97 it/sec) -training >> step=6153600, episode=1026 reward=0.8014877 (517.09 it/sec) -training >> step=6153700, episode=1026 reward=0.7824258 (535.71 it/sec) -training >> step=6153800, episode=1026 reward=0.7820206 (548.61 it/sec) -training >> step=6153900, episode=1026 reward=0.7865655 (540.36 it/sec) -training >> step=6154000, episode=1026 reward=0.8019698 (501.00 it/sec) -training >> step=6154100, episode=1026 reward=0.7610845 (540.43 it/sec) -training >> step=6154200, episode=1026 reward=0.7773604 (368.80 it/sec) -training >> step=6154300, episode=1026 reward=0.7935559 (528.14 it/sec) -training >> step=6154400, episode=1026 reward=0.7761733 (529.59 it/sec) -training >> step=6154500, episode=1026 reward=0.7732798 (532.87 it/sec) -training >> step=6154600, episode=1026 reward=0.7645937 (526.13 it/sec) -training >> step=6154700, episode=1026 reward=0.7698797 (501.33 it/sec) -training >> step=6154800, episode=1026 reward=0.7820593 (546.99 it/sec) -training >> step=6154900, episode=1026 reward=0.7816284 (533.46 it/sec) -training >> step=6155000, episode=1026 reward=0.7590351 (511.05 it/sec) -training >> step=6155100, episode=1026 reward=0.7808077 (535.57 it/sec) -training >> step=6155200, episode=1026 reward=0.7788135 (531.37 it/sec) -training >> step=6155300, episode=1027 reward=0.7970259 (129.37 it/sec) -training >> step=6155400, episode=1027 reward=0.7797484 (498.67 it/sec) -training >> step=6155500, episode=1027 reward=0.7823361 (518.35 it/sec) -training >> step=6155600, episode=1027 reward=0.7687482 (513.91 it/sec) -training >> step=6155700, episode=1027 reward=0.7906055 (522.24 it/sec) -training >> step=6155800, episode=1027 reward=0.7854514 (516.03 it/sec) -training >> step=6155900, episode=1027 reward=0.7852192 (530.55 it/sec) -training >> step=6156000, episode=1027 reward=0.7725536 (470.99 it/sec) -training >> step=6156100, episode=1027 reward=0.7914909 (520.79 it/sec) -training >> step=6156200, episode=1027 reward=0.7875771 (508.68 it/sec) -training >> step=6156300, episode=1027 reward=0.7724176 (530.69 it/sec) -training >> step=6156400, episode=1027 reward=0.762694 (487.94 it/sec) -training >> step=6156500, episode=1027 reward=0.7987643 (458.93 it/sec) -training >> step=6156600, episode=1027 reward=0.7770547 (549.97 it/sec) -training >> step=6156700, episode=1027 reward=0.7788664 (519.40 it/sec) -training >> step=6156800, episode=1027 reward=0.7587257 (516.44 it/sec) -training >> step=6156900, episode=1027 reward=0.7594573 (544.47 it/sec) -training >> step=6157000, episode=1027 reward=0.7765236 (522.25 it/sec) -training >> step=6157100, episode=1027 reward=0.7843425 (505.87 it/sec) -training >> step=6157200, episode=1027 reward=0.8071355 (520.28 it/sec) -training >> step=6157300, episode=1027 reward=0.7739922 (516.10 it/sec) -training >> step=6157400, episode=1027 reward=0.788375 (554.20 it/sec) -training >> step=6157500, episode=1027 reward=0.7830788 (510.99 it/sec) -training >> step=6157600, episode=1027 reward=0.7971579 (456.68 it/sec) -training >> step=6157700, episode=1027 reward=0.7905663 (498.87 it/sec) -training >> step=6157800, episode=1027 reward=0.7962943 (491.22 it/sec) -training >> step=6157900, episode=1027 reward=0.7914982 (499.44 it/sec) -training >> step=6158000, episode=1027 reward=0.7869273 (479.96 it/sec) -training >> step=6158100, episode=1027 reward=0.7717546 (514.57 it/sec) -training >> step=6158200, episode=1027 reward=0.7944779 (472.17 it/sec) -training >> step=6158300, episode=1027 reward=0.7956456 (486.65 it/sec) -training >> step=6158400, episode=1027 reward=0.7741245 (483.14 it/sec) -training >> step=6158500, episode=1027 reward=0.7745916 (527.85 it/sec) -training >> step=6158600, episode=1027 reward=0.7925628 (469.27 it/sec) -training >> step=6158700, episode=1027 reward=0.7785418 (522.29 it/sec) -training >> step=6158800, episode=1027 reward=0.7755274 (498.09 it/sec) -training >> step=6158900, episode=1027 reward=0.7845829 (497.65 it/sec) -training >> step=6159000, episode=1027 reward=0.7760071 (522.66 it/sec) -training >> step=6159100, episode=1027 reward=0.7668811 (481.99 it/sec) -training >> step=6159200, episode=1027 reward=0.8032039 (526.22 it/sec) -training >> step=6159300, episode=1027 reward=0.7675748 (485.96 it/sec) -training >> step=6159400, episode=1027 reward=0.7920266 (491.48 it/sec) -training >> step=6159500, episode=1027 reward=0.7877662 (488.98 it/sec) -training >> step=6159600, episode=1027 reward=0.7938268 (466.35 it/sec) -training >> step=6159700, episode=1027 reward=0.7847751 (520.02 it/sec) -training >> step=6159800, episode=1027 reward=0.7825665 (519.55 it/sec) -training >> step=6159900, episode=1027 reward=0.7847292 (456.33 it/sec) -training >> step=6160000, episode=1027 reward=0.7951296 (497.44 it/sec) -training >> step=6160100, episode=1027 reward=0.805205 (489.42 it/sec) -training >> step=6160200, episode=1027 reward=0.7911075 (340.27 it/sec) -training >> step=6160300, episode=1027 reward=0.7976591 (469.50 it/sec) -training >> step=6160400, episode=1027 reward=0.7949322 (488.26 it/sec) -training >> step=6160500, episode=1027 reward=0.7927919 (476.45 it/sec) -training >> step=6160600, episode=1027 reward=0.7907259 (441.31 it/sec) -training >> step=6160700, episode=1027 reward=0.7699715 (468.93 it/sec) -training >> step=6160800, episode=1027 reward=0.7887576 (468.58 it/sec) -training >> step=6160900, episode=1027 reward=0.7836517 (510.33 it/sec) -training >> step=6161000, episode=1027 reward=0.8081968 (521.88 it/sec) -training >> step=6161100, episode=1027 reward=0.7905015 (484.11 it/sec) -training >> step=6161200, episode=1027 reward=0.7892094 (467.95 it/sec) -training >> step=6161300, episode=1028 reward=0.7889934 (98.27 it/sec) -training >> step=6161400, episode=1028 reward=0.7972916 (384.80 it/sec) -training >> step=6161500, episode=1028 reward=0.7935103 (499.25 it/sec) -training >> step=6161600, episode=1028 reward=0.7806756 (434.18 it/sec) -training >> step=6161700, episode=1028 reward=0.7912133 (441.18 it/sec) -training >> step=6161800, episode=1028 reward=0.7885216 (485.02 it/sec) -training >> step=6161900, episode=1028 reward=0.7923509 (489.91 it/sec) -training >> step=6162000, episode=1028 reward=0.7765804 (528.58 it/sec) -training >> step=6162100, episode=1028 reward=0.8153198 (469.25 it/sec) -training >> step=6162200, episode=1028 reward=0.7782034 (490.39 it/sec) -training >> step=6162300, episode=1028 reward=0.8086352 (518.77 it/sec) -training >> step=6162400, episode=1028 reward=0.7851785 (464.12 it/sec) -training >> step=6162500, episode=1028 reward=0.7853659 (455.02 it/sec) -training >> step=6162600, episode=1028 reward=0.789882 (480.88 it/sec) -training >> step=6162700, episode=1028 reward=0.788461 (501.27 it/sec) -training >> step=6162800, episode=1028 reward=0.7832365 (465.01 it/sec) -training >> step=6162900, episode=1028 reward=0.7869881 (502.24 it/sec) -training >> step=6163000, episode=1028 reward=0.7718316 (489.68 it/sec) -training >> step=6163100, episode=1028 reward=0.7957076 (484.88 it/sec) -training >> step=6163200, episode=1028 reward=0.7894971 (495.11 it/sec) -training >> step=6163300, episode=1028 reward=0.7896926 (445.77 it/sec) -training >> step=6163400, episode=1028 reward=0.7773238 (508.10 it/sec) -training >> step=6163500, episode=1028 reward=0.7639037 (431.45 it/sec) -training >> step=6163600, episode=1028 reward=0.7957056 (462.74 it/sec) -training >> step=6163700, episode=1028 reward=0.7803951 (491.69 it/sec) -training >> step=6163800, episode=1028 reward=0.7681524 (513.81 it/sec) -training >> step=6163900, episode=1028 reward=0.7788455 (462.88 it/sec) -training >> step=6164000, episode=1028 reward=0.7949423 (505.04 it/sec) -training >> step=6164100, episode=1028 reward=0.7799979 (430.36 it/sec) -training >> step=6164200, episode=1028 reward=0.789776 (533.50 it/sec) -training >> step=6164300, episode=1028 reward=0.7820094 (497.11 it/sec) -training >> step=6164400, episode=1028 reward=0.7695609 (497.53 it/sec) -training >> step=6164500, episode=1028 reward=0.7761197 (482.38 it/sec) -training >> step=6164600, episode=1028 reward=0.7899697 (483.07 it/sec) -training >> step=6164700, episode=1028 reward=0.7829674 (506.40 it/sec) -training >> step=6164800, episode=1028 reward=0.7786506 (517.64 it/sec) -training >> step=6164900, episode=1028 reward=0.7862955 (525.89 it/sec) -training >> step=6165000, episode=1028 reward=0.7858844 (506.23 it/sec) -training >> step=6165100, episode=1028 reward=0.7970731 (464.29 it/sec) -training >> step=6165200, episode=1028 reward=0.7837446 (528.81 it/sec) -training >> step=6165300, episode=1028 reward=0.7932521 (484.94 it/sec) -training >> step=6165400, episode=1028 reward=0.7777711 (484.72 it/sec) -training >> step=6165500, episode=1028 reward=0.7721878 (500.33 it/sec) -training >> step=6165600, episode=1028 reward=0.7828812 (473.36 it/sec) -training >> step=6165700, episode=1028 reward=0.7720958 (486.28 it/sec) -training >> step=6165800, episode=1028 reward=0.7923452 (477.43 it/sec) -training >> step=6165900, episode=1028 reward=0.7653551 (489.71 it/sec) -training >> step=6166000, episode=1028 reward=0.7845945 (535.57 it/sec) -training >> step=6166100, episode=1028 reward=0.7640767 (473.93 it/sec) -training >> step=6166200, episode=1028 reward=0.7542032 (466.70 it/sec) -training >> step=6166300, episode=1028 reward=0.7526021 (519.37 it/sec) -training >> step=6166400, episode=1028 reward=0.7826613 (501.27 it/sec) -training >> step=6166500, episode=1028 reward=0.7900625 (382.28 it/sec) -training >> step=6166600, episode=1028 reward=0.7758456 (476.54 it/sec) -training >> step=6166700, episode=1028 reward=0.7750848 (495.67 it/sec) -training >> step=6166800, episode=1028 reward=0.7758583 (494.29 it/sec) -training >> step=6166900, episode=1028 reward=0.7867343 (483.64 it/sec) -training >> step=6167000, episode=1028 reward=0.7928303 (523.63 it/sec) -training >> step=6167100, episode=1028 reward=0.7852303 (488.69 it/sec) -training >> step=6167200, episode=1028 reward=0.7716027 (506.24 it/sec) -training >> step=6167300, episode=1029 reward=0.7760861 (130.06 it/sec) -training >> step=6167400, episode=1029 reward=0.7677253 (503.59 it/sec) -training >> step=6167500, episode=1029 reward=0.7885169 (411.51 it/sec) -training >> step=6167600, episode=1029 reward=0.79054 (501.08 it/sec) -training >> step=6167700, episode=1029 reward=0.8064221 (487.44 it/sec) -training >> step=6167800, episode=1029 reward=0.7784095 (521.66 it/sec) -training >> step=6167900, episode=1029 reward=0.775849 (508.19 it/sec) -training >> step=6168000, episode=1029 reward=0.7843118 (494.78 it/sec) -training >> step=6168100, episode=1029 reward=0.7992362 (431.38 it/sec) -training >> step=6168200, episode=1029 reward=0.7986147 (484.03 it/sec) -training >> step=6168300, episode=1029 reward=0.7884636 (463.47 it/sec) -training >> step=6168400, episode=1029 reward=0.7560303 (479.61 it/sec) -training >> step=6168500, episode=1029 reward=0.7795207 (540.05 it/sec) -training >> step=6168600, episode=1029 reward=0.7977077 (488.01 it/sec) -training >> step=6168700, episode=1029 reward=0.785848 (465.52 it/sec) -training >> step=6168800, episode=1029 reward=0.775425 (480.45 it/sec) -training >> step=6168900, episode=1029 reward=0.7689715 (515.92 it/sec) -training >> step=6169000, episode=1029 reward=0.787869 (471.01 it/sec) -training >> step=6169100, episode=1029 reward=0.7753742 (488.75 it/sec) -training >> step=6169200, episode=1029 reward=0.7689441 (517.50 it/sec) -training >> step=6169300, episode=1029 reward=0.7788603 (451.28 it/sec) -training >> step=6169400, episode=1029 reward=0.7753667 (469.49 it/sec) -training >> step=6169500, episode=1029 reward=0.7776541 (491.26 it/sec) -training >> step=6169600, episode=1029 reward=0.7754824 (488.62 it/sec) -training >> step=6169700, episode=1029 reward=0.7871009 (482.69 it/sec) -training >> step=6169800, episode=1029 reward=0.7818164 (492.26 it/sec) -training >> step=6169900, episode=1029 reward=0.7900047 (455.35 it/sec) -training >> step=6170000, episode=1029 reward=0.7845636 (484.26 it/sec) -training >> step=6170100, episode=1029 reward=0.7868971 (483.58 it/sec) -training >> step=6170200, episode=1029 reward=0.7779083 (461.33 it/sec) -training >> step=6170300, episode=1029 reward=0.7857752 (480.19 it/sec) -training >> step=6170400, episode=1029 reward=0.7705695 (387.07 it/sec) -training >> step=6170500, episode=1029 reward=0.7900546 (456.76 it/sec) -training >> step=6170600, episode=1029 reward=0.7758841 (499.23 it/sec) -training >> step=6170700, episode=1029 reward=0.7950519 (543.73 it/sec) -training >> step=6170800, episode=1029 reward=0.7632763 (494.34 it/sec) -training >> step=6170900, episode=1029 reward=0.780785 (504.76 it/sec) -training >> step=6171000, episode=1029 reward=0.782899 (500.14 it/sec) -training >> step=6171100, episode=1029 reward=0.767326 (478.77 it/sec) -training >> step=6171200, episode=1029 reward=0.7855327 (519.83 it/sec) -training >> step=6171300, episode=1029 reward=0.7915528 (493.46 it/sec) -training >> step=6171400, episode=1029 reward=0.7998158 (509.18 it/sec) -training >> step=6171500, episode=1029 reward=0.7778243 (488.43 it/sec) -training >> step=6171600, episode=1029 reward=0.788525 (480.98 it/sec) -training >> step=6171700, episode=1029 reward=0.7780463 (526.95 it/sec) -training >> step=6171800, episode=1029 reward=0.7917746 (524.31 it/sec) -training >> step=6171900, episode=1029 reward=0.7843741 (493.32 it/sec) -training >> step=6172000, episode=1029 reward=0.7680165 (501.69 it/sec) -training >> step=6172100, episode=1029 reward=0.7728075 (470.22 it/sec) -training >> step=6172200, episode=1029 reward=0.8001559 (502.43 it/sec) -training >> step=6172300, episode=1029 reward=0.7765785 (472.45 it/sec) -training >> step=6172400, episode=1029 reward=0.776504 (518.06 it/sec) -training >> step=6172500, episode=1029 reward=0.7911697 (504.84 it/sec) -training >> step=6172600, episode=1029 reward=0.7617676 (359.63 it/sec) -training >> step=6172700, episode=1029 reward=0.7811693 (490.36 it/sec) -training >> step=6172800, episode=1029 reward=0.7680132 (488.47 it/sec) -training >> step=6172900, episode=1029 reward=0.7902083 (497.58 it/sec) -training >> step=6173000, episode=1029 reward=0.7898911 (462.78 it/sec) -training >> step=6173100, episode=1029 reward=0.7815545 (449.76 it/sec) -training >> step=6173200, episode=1029 reward=0.7898362 (450.60 it/sec) -training >> step=6173300, episode=1030 reward=0.7827879 (157.74 it/sec) -training >> step=6173400, episode=1030 reward=0.7782924 (458.65 it/sec) -training >> step=6173500, episode=1030 reward=0.7891643 (475.24 it/sec) -training >> step=6173600, episode=1030 reward=0.8004231 (445.74 it/sec) -training >> step=6173700, episode=1030 reward=0.7762788 (498.63 it/sec) -training >> step=6173800, episode=1030 reward=0.7862326 (448.71 it/sec) -training >> step=6173900, episode=1030 reward=0.7642254 (530.17 it/sec) -training >> step=6174000, episode=1030 reward=0.7999782 (475.97 it/sec) -training >> step=6174100, episode=1030 reward=0.7719316 (451.76 it/sec) -training >> step=6174200, episode=1030 reward=0.7830737 (490.09 it/sec) -training >> step=6174300, episode=1030 reward=0.7943923 (454.30 it/sec) -training >> step=6174400, episode=1030 reward=0.7825362 (473.68 it/sec) -training >> step=6174500, episode=1030 reward=0.7841159 (502.68 it/sec) -training >> step=6174600, episode=1030 reward=0.7746513 (517.66 it/sec) -training >> step=6174700, episode=1030 reward=0.7796831 (465.23 it/sec) -training >> step=6174800, episode=1030 reward=0.7852207 (470.82 it/sec) -training >> step=6174900, episode=1030 reward=0.7717332 (488.48 it/sec) -training >> step=6175000, episode=1030 reward=0.7815209 (501.89 it/sec) -training >> step=6175100, episode=1030 reward=0.8052595 (463.92 it/sec) -training >> step=6175200, episode=1030 reward=0.7737482 (469.45 it/sec) -training >> step=6175300, episode=1030 reward=0.7800058 (479.98 it/sec) -training >> step=6175400, episode=1030 reward=0.7756475 (491.44 it/sec) -training >> step=6175500, episode=1030 reward=0.7814777 (487.48 it/sec) -training >> step=6175600, episode=1030 reward=0.8003545 (479.04 it/sec) -training >> step=6175700, episode=1030 reward=0.7709132 (468.02 it/sec) -training >> step=6175800, episode=1030 reward=0.7836772 (474.12 it/sec) -training >> step=6175900, episode=1030 reward=0.7993472 (482.52 it/sec) -training >> step=6176000, episode=1030 reward=0.7885551 (487.09 it/sec) -training >> step=6176100, episode=1030 reward=0.8001774 (530.14 it/sec) -training >> step=6176200, episode=1030 reward=0.7852523 (503.49 it/sec) -training >> step=6176300, episode=1030 reward=0.7744454 (436.33 it/sec) -training >> step=6176400, episode=1030 reward=0.7982107 (491.10 it/sec) -training >> step=6176500, episode=1030 reward=0.7989138 (465.81 it/sec) -training >> step=6176600, episode=1030 reward=0.763841 (487.14 it/sec) -training >> step=6176700, episode=1030 reward=0.7873169 (483.74 it/sec) -training >> step=6176800, episode=1030 reward=0.8022954 (495.28 it/sec) -training >> step=6176900, episode=1030 reward=0.7810405 (477.19 it/sec) -training >> step=6177000, episode=1030 reward=0.814647 (475.13 it/sec) -training >> step=6177100, episode=1030 reward=0.7817031 (453.07 it/sec) -training >> step=6177200, episode=1030 reward=0.8077471 (518.46 it/sec) -training >> step=6177300, episode=1030 reward=0.7847727 (487.32 it/sec) -training >> step=6177400, episode=1030 reward=0.7858691 (447.10 it/sec) -training >> step=6177500, episode=1030 reward=0.7907207 (498.84 it/sec) -training >> step=6177600, episode=1030 reward=0.766062 (468.34 it/sec) -training >> step=6177700, episode=1030 reward=0.7840319 (516.61 it/sec) -training >> step=6177800, episode=1030 reward=0.7822359 (485.02 it/sec) -training >> step=6177900, episode=1030 reward=0.8068865 (523.37 it/sec) -training >> step=6178000, episode=1030 reward=0.7815934 (454.14 it/sec) -training >> step=6178100, episode=1030 reward=0.8021297 (479.33 it/sec) -training >> step=6178200, episode=1030 reward=0.7860844 (438.73 it/sec) -training >> step=6178300, episode=1030 reward=0.7872486 (508.29 it/sec) -training >> step=6178400, episode=1030 reward=0.7835643 (466.87 it/sec) -training >> step=6178500, episode=1030 reward=0.7852381 (472.86 it/sec) -training >> step=6178600, episode=1030 reward=0.7731584 (495.56 it/sec) -training >> step=6178700, episode=1030 reward=0.7834451 (412.81 it/sec) -training >> step=6178800, episode=1030 reward=0.7971658 (479.93 it/sec) -training >> step=6178900, episode=1030 reward=0.7870002 (414.99 it/sec) -training >> step=6179000, episode=1030 reward=0.7854034 (504.50 it/sec) -training >> step=6179100, episode=1030 reward=0.7703188 (484.93 it/sec) -training >> step=6179200, episode=1030 reward=0.7622072 (445.03 it/sec) -training >> step=6179300, episode=1031 reward=0.796831 (161.11 it/sec) -training >> step=6179400, episode=1031 reward=0.7996454 (504.62 it/sec) -training >> step=6179500, episode=1031 reward=0.7985464 (454.91 it/sec) -training >> step=6179600, episode=1031 reward=0.7706202 (446.14 it/sec) -training >> step=6179700, episode=1031 reward=0.7763098 (479.04 it/sec) -training >> step=6179800, episode=1031 reward=0.7744378 (469.87 it/sec) -training >> step=6179900, episode=1031 reward=0.794476 (491.16 it/sec) -training >> step=6180000, episode=1031 reward=0.7834734 (485.97 it/sec) -training >> step=6180100, episode=1031 reward=0.7808242 (526.18 it/sec) -training >> step=6180200, episode=1031 reward=0.7759275 (495.15 it/sec) -training >> step=6180300, episode=1031 reward=0.7829649 (495.22 it/sec) -training >> step=6180400, episode=1031 reward=0.7997675 (494.44 it/sec) -training >> step=6180500, episode=1031 reward=0.7788284 (455.06 it/sec) -training >> step=6180600, episode=1031 reward=0.7695285 (501.97 it/sec) -training >> step=6180700, episode=1031 reward=0.7736579 (479.27 it/sec) -training >> step=6180800, episode=1031 reward=0.7836885 (481.62 it/sec) -training >> step=6180900, episode=1031 reward=0.7978618 (483.55 it/sec) -training >> step=6181000, episode=1031 reward=0.7920933 (454.83 it/sec) -training >> step=6181100, episode=1031 reward=0.7682888 (479.24 it/sec) -training >> step=6181200, episode=1031 reward=0.7982216 (512.00 it/sec) -training >> step=6181300, episode=1031 reward=0.8053541 (502.15 it/sec) -training >> step=6181400, episode=1031 reward=0.7932799 (485.91 it/sec) -training >> step=6181500, episode=1031 reward=0.7945007 (499.67 it/sec) -training >> step=6181600, episode=1031 reward=0.7774615 (462.24 it/sec) -training >> step=6181700, episode=1031 reward=0.787692 (456.51 it/sec) -training >> step=6181800, episode=1031 reward=0.7896496 (494.53 it/sec) -training >> step=6181900, episode=1031 reward=0.8063253 (531.85 it/sec) -training >> step=6182000, episode=1031 reward=0.772876 (485.37 it/sec) -training >> step=6182100, episode=1031 reward=0.7861301 (449.28 it/sec) -training >> step=6182200, episode=1031 reward=0.7836964 (499.23 it/sec) -training >> step=6182300, episode=1031 reward=0.7695458 (467.86 it/sec) -training >> step=6182400, episode=1031 reward=0.7805207 (465.08 it/sec) -training >> step=6182500, episode=1031 reward=0.7726403 (472.86 it/sec) -training >> step=6182600, episode=1031 reward=0.7773108 (509.98 it/sec) -training >> step=6182700, episode=1031 reward=0.7825438 (522.83 it/sec) -training >> step=6182800, episode=1031 reward=0.7881937 (451.81 it/sec) -training >> step=6182900, episode=1031 reward=0.7827706 (490.33 it/sec) -training >> step=6183000, episode=1031 reward=0.7861569 (457.27 it/sec) -training >> step=6183100, episode=1031 reward=0.7873301 (497.23 it/sec) -training >> step=6183200, episode=1031 reward=0.760892 (461.32 it/sec) -training >> step=6183300, episode=1031 reward=0.7632897 (476.93 it/sec) -training >> step=6183400, episode=1031 reward=0.7873406 (522.87 it/sec) -training >> step=6183500, episode=1031 reward=0.8086318 (451.16 it/sec) -training >> step=6183600, episode=1031 reward=0.8063701 (485.34 it/sec) -training >> step=6183700, episode=1031 reward=0.8086206 (471.17 it/sec) -training >> step=6183800, episode=1031 reward=0.7763188 (483.90 it/sec) -training >> step=6183900, episode=1031 reward=0.7825883 (498.48 it/sec) -training >> step=6184000, episode=1031 reward=0.8018028 (442.45 it/sec) -training >> step=6184100, episode=1031 reward=0.801308 (498.51 it/sec) -training >> step=6184200, episode=1031 reward=0.7897655 (445.71 it/sec) -training >> step=6184300, episode=1031 reward=0.7927043 (501.69 it/sec) -training >> step=6184400, episode=1031 reward=0.7871829 (512.56 it/sec) -training >> step=6184500, episode=1031 reward=0.7830794 (471.98 it/sec) -training >> step=6184600, episode=1031 reward=0.7694074 (416.32 it/sec) -training >> step=6184700, episode=1031 reward=0.7718506 (518.37 it/sec) -training >> step=6184800, episode=1031 reward=0.7826805 (510.90 it/sec) -training >> step=6184900, episode=1031 reward=0.7838603 (493.22 it/sec) -training >> step=6185000, episode=1031 reward=0.7682648 (489.42 it/sec) -training >> step=6185100, episode=1031 reward=0.7872651 (371.68 it/sec) -training >> step=6185200, episode=1031 reward=0.7942769 (523.15 it/sec) -training >> step=6185300, episode=1032 reward=0.7578679 (133.81 it/sec) -training >> step=6185400, episode=1032 reward=0.8001002 (512.06 it/sec) -training >> step=6185500, episode=1032 reward=0.801725 (490.41 it/sec) -training >> step=6185600, episode=1032 reward=0.7861283 (438.39 it/sec) -training >> step=6185700, episode=1032 reward=0.7786978 (453.52 it/sec) -training >> step=6185800, episode=1032 reward=0.7923756 (498.27 it/sec) -training >> step=6185900, episode=1032 reward=0.7833263 (452.85 it/sec) -training >> step=6186000, episode=1032 reward=0.7948529 (475.29 it/sec) -training >> step=6186100, episode=1032 reward=0.7958905 (492.19 it/sec) -training >> step=6186200, episode=1032 reward=0.7936255 (482.30 it/sec) -training >> step=6186300, episode=1032 reward=0.7770137 (468.90 it/sec) -training >> step=6186400, episode=1032 reward=0.8008788 (491.46 it/sec) -training >> step=6186500, episode=1032 reward=0.7794194 (412.85 it/sec) -training >> step=6186600, episode=1032 reward=0.7843312 (490.41 it/sec) -training >> step=6186700, episode=1032 reward=0.7685798 (491.40 it/sec) -training >> step=6186800, episode=1032 reward=0.7745285 (477.02 it/sec) -training >> step=6186900, episode=1032 reward=0.7735756 (521.72 it/sec) -training >> step=6187000, episode=1032 reward=0.7837599 (496.98 it/sec) -training >> step=6187100, episode=1032 reward=0.7845954 (515.44 it/sec) -training >> step=6187200, episode=1032 reward=0.790195 (464.76 it/sec) -training >> step=6187300, episode=1032 reward=0.7918617 (486.74 it/sec) -training >> step=6187400, episode=1032 reward=0.7661905 (503.36 it/sec) -training >> step=6187500, episode=1032 reward=0.7692208 (504.39 it/sec) -training >> step=6187600, episode=1032 reward=0.7849863 (500.68 it/sec) -training >> step=6187700, episode=1032 reward=0.8098758 (470.06 it/sec) -training >> step=6187800, episode=1032 reward=0.7844042 (494.51 it/sec) -training >> step=6187900, episode=1032 reward=0.7915725 (496.54 it/sec) -training >> step=6188000, episode=1032 reward=0.7864127 (489.53 it/sec) -training >> step=6188100, episode=1032 reward=0.7884579 (472.47 it/sec) -training >> step=6188200, episode=1032 reward=0.7891033 (484.05 it/sec) -training >> step=6188300, episode=1032 reward=0.7798838 (471.75 it/sec) -training >> step=6188400, episode=1032 reward=0.7941015 (496.78 it/sec) -training >> step=6188500, episode=1032 reward=0.7757257 (468.88 it/sec) -training >> step=6188600, episode=1032 reward=0.8010297 (492.61 it/sec) -training >> step=6188700, episode=1032 reward=0.7849269 (511.62 it/sec) -training >> step=6188800, episode=1032 reward=0.7880508 (498.79 it/sec) -training >> step=6188900, episode=1032 reward=0.7847493 (491.15 it/sec) -training >> step=6189000, episode=1032 reward=0.7803725 (485.57 it/sec) -training >> step=6189100, episode=1032 reward=0.7901102 (455.25 it/sec) -training >> step=6189200, episode=1032 reward=0.7821795 (451.18 it/sec) -training >> step=6189300, episode=1032 reward=0.7944456 (491.74 it/sec) -training >> step=6189400, episode=1032 reward=0.7957847 (493.13 it/sec) -training >> step=6189500, episode=1032 reward=0.7966539 (476.91 it/sec) -training >> step=6189600, episode=1032 reward=0.7952949 (504.75 it/sec) -training >> step=6189700, episode=1032 reward=0.7816616 (480.07 it/sec) -training >> step=6189800, episode=1032 reward=0.7997247 (519.16 it/sec) -training >> step=6189900, episode=1032 reward=0.7681261 (490.69 it/sec) -training >> step=6190000, episode=1032 reward=0.7816578 (472.30 it/sec) -training >> step=6190100, episode=1032 reward=0.7878074 (530.74 it/sec) -training >> step=6190200, episode=1032 reward=0.7784361 (491.40 it/sec) -training >> step=6190300, episode=1032 reward=0.7942628 (493.65 it/sec) -training >> step=6190400, episode=1032 reward=0.7946889 (474.93 it/sec) -training >> step=6190500, episode=1032 reward=0.8065466 (498.37 it/sec) -training >> step=6190600, episode=1032 reward=0.7520851 (489.48 it/sec) -training >> step=6190700, episode=1032 reward=0.7719685 (484.84 it/sec) -training >> step=6190800, episode=1032 reward=0.7616307 (486.79 it/sec) -training >> step=6190900, episode=1032 reward=0.7975708 (483.94 it/sec) -training >> step=6191000, episode=1032 reward=0.7896974 (480.17 it/sec) -training >> step=6191100, episode=1032 reward=0.8046699 (509.22 it/sec) -training >> step=6191200, episode=1032 reward=0.7783169 (366.40 it/sec) -training >> step=6191300, episode=1033 reward=0.7822492 (101.87 it/sec) -training >> step=6191400, episode=1033 reward=0.7981325 (467.14 it/sec) -training >> step=6191500, episode=1033 reward=0.7898818 (499.23 it/sec) -training >> step=6191600, episode=1033 reward=0.7967393 (492.39 it/sec) -training >> step=6191700, episode=1033 reward=0.7810333 (505.89 it/sec) -training >> step=6191800, episode=1033 reward=0.7928209 (497.44 it/sec) -training >> step=6191900, episode=1033 reward=0.7853994 (460.39 it/sec) -training >> step=6192000, episode=1033 reward=0.7896414 (512.30 it/sec) -training >> step=6192100, episode=1033 reward=0.791599 (515.16 it/sec) -training >> step=6192200, episode=1033 reward=0.7841801 (509.53 it/sec) -training >> step=6192300, episode=1033 reward=0.7975225 (469.55 it/sec) -training >> step=6192400, episode=1033 reward=0.7799028 (461.70 it/sec) -training >> step=6192500, episode=1033 reward=0.7927043 (526.23 it/sec) -training >> step=6192600, episode=1033 reward=0.7836407 (503.24 it/sec) -training >> step=6192700, episode=1033 reward=0.7850184 (465.43 it/sec) -training >> step=6192800, episode=1033 reward=0.7798507 (501.22 it/sec) -training >> step=6192900, episode=1033 reward=0.792783 (473.81 it/sec) -training >> step=6193000, episode=1033 reward=0.7861923 (469.87 it/sec) -training >> step=6193100, episode=1033 reward=0.7956505 (476.88 it/sec) -training >> step=6193200, episode=1033 reward=0.7900755 (475.72 it/sec) -training >> step=6193300, episode=1033 reward=0.8004736 (478.86 it/sec) -training >> step=6193400, episode=1033 reward=0.7922362 (504.81 it/sec) -training >> step=6193500, episode=1033 reward=0.7687252 (433.98 it/sec) -training >> step=6193600, episode=1033 reward=0.771565 (442.47 it/sec) -training >> step=6193700, episode=1033 reward=0.7734818 (451.23 it/sec) -training >> step=6193800, episode=1033 reward=0.8144163 (458.31 it/sec) -training >> step=6193900, episode=1033 reward=0.8045785 (501.67 it/sec) -training >> step=6194000, episode=1033 reward=0.7933353 (517.53 it/sec) -training >> step=6194100, episode=1033 reward=0.8030459 (473.96 it/sec) -training >> step=6194200, episode=1033 reward=0.8051046 (482.00 it/sec) -training >> step=6194300, episode=1033 reward=0.777498 (493.39 it/sec) -training >> step=6194400, episode=1033 reward=0.7851266 (509.30 it/sec) -training >> step=6194500, episode=1033 reward=0.7887135 (473.55 it/sec) -training >> step=6194600, episode=1033 reward=0.7949318 (458.65 it/sec) -training >> step=6194700, episode=1033 reward=0.7809869 (489.74 it/sec) -training >> step=6194800, episode=1033 reward=0.7918864 (441.12 it/sec) -training >> step=6194900, episode=1033 reward=0.7938004 (526.16 it/sec) -training >> step=6195000, episode=1033 reward=0.8004026 (487.85 it/sec) -training >> step=6195100, episode=1033 reward=0.786379 (520.20 it/sec) -training >> step=6195200, episode=1033 reward=0.7874523 (494.82 it/sec) -training >> step=6195300, episode=1033 reward=0.8039607 (461.99 it/sec) -training >> step=6195400, episode=1033 reward=0.7743813 (519.82 it/sec) -training >> step=6195500, episode=1033 reward=0.7918087 (509.05 it/sec) -training >> step=6195600, episode=1033 reward=0.7808399 (512.23 it/sec) -training >> step=6195700, episode=1033 reward=0.7873095 (491.63 it/sec) -training >> step=6195800, episode=1033 reward=0.7629276 (470.81 it/sec) -training >> step=6195900, episode=1033 reward=0.7708228 (452.78 it/sec) -training >> step=6196000, episode=1033 reward=0.7973644 (433.58 it/sec) -training >> step=6196100, episode=1033 reward=0.764124 (476.81 it/sec) -training >> step=6196200, episode=1033 reward=0.771909 (498.17 it/sec) -training >> step=6196300, episode=1033 reward=0.7673295 (445.43 it/sec) -training >> step=6196400, episode=1033 reward=0.7735574 (495.22 it/sec) -training >> step=6196500, episode=1033 reward=0.7763492 (518.69 it/sec) -training >> step=6196600, episode=1033 reward=0.7762367 (472.22 it/sec) -training >> step=6196700, episode=1033 reward=0.7803311 (508.36 it/sec) -training >> step=6196800, episode=1033 reward=0.7733853 (481.22 it/sec) -training >> step=6196900, episode=1033 reward=0.7658208 (501.29 it/sec) -training >> step=6197000, episode=1033 reward=0.7933823 (501.32 it/sec) -training >> step=6197100, episode=1033 reward=0.80158 (503.95 it/sec) -training >> step=6197200, episode=1033 reward=0.7515573 (498.81 it/sec) -training >> step=6197300, episode=1034 reward=0.76299 (99.75 it/sec) -training >> step=6197400, episode=1034 reward=0.7941514 (483.22 it/sec) -training >> step=6197500, episode=1034 reward=0.7638223 (476.07 it/sec) -training >> step=6197600, episode=1034 reward=0.7909675 (510.94 it/sec) -training >> step=6197700, episode=1034 reward=0.7917582 (457.83 it/sec) -training >> step=6197800, episode=1034 reward=0.7742326 (473.24 it/sec) -training >> step=6197900, episode=1034 reward=0.7885846 (531.52 it/sec) -training >> step=6198000, episode=1034 reward=0.7986414 (443.44 it/sec) -training >> step=6198100, episode=1034 reward=0.7870128 (465.88 it/sec) -training >> step=6198200, episode=1034 reward=0.786643 (511.43 it/sec) -training >> step=6198300, episode=1034 reward=0.7740948 (514.73 it/sec) -training >> step=6198400, episode=1034 reward=0.8061619 (495.93 it/sec) -training >> step=6198500, episode=1034 reward=0.7767287 (462.29 it/sec) -training >> step=6198600, episode=1034 reward=0.7942815 (515.80 it/sec) -training >> step=6198700, episode=1034 reward=0.7844678 (479.83 it/sec) -training >> step=6198800, episode=1034 reward=0.7842878 (502.41 it/sec) -training >> step=6198900, episode=1034 reward=0.8012049 (509.83 it/sec) -training >> step=6199000, episode=1034 reward=0.7951303 (484.30 it/sec) -training >> step=6199100, episode=1034 reward=0.7925506 (488.46 it/sec) -training >> step=6199200, episode=1034 reward=0.8015314 (476.73 it/sec) -training >> step=6199300, episode=1034 reward=0.7827687 (498.66 it/sec) -training >> step=6199400, episode=1034 reward=0.7785229 (498.10 it/sec) -training >> step=6199500, episode=1034 reward=0.777519 (472.74 it/sec) -training >> step=6199600, episode=1034 reward=0.8021957 (543.92 it/sec) -training >> step=6199700, episode=1034 reward=0.7589241 (483.78 it/sec) -training >> step=6199800, episode=1034 reward=0.778855 (502.31 it/sec) -training >> step=6199900, episode=1034 reward=0.782452 (481.33 it/sec) -training >> step=6200000, episode=1034 reward=0.7728657 (493.99 it/sec) -training >> step=6200100, episode=1034 reward=0.78444 (528.81 it/sec) -training >> step=6200200, episode=1034 reward=0.8032016 (394.29 it/sec) -training >> step=6200300, episode=1034 reward=0.8060561 (513.54 it/sec) -training >> step=6200400, episode=1034 reward=0.7816319 (494.82 it/sec) -training >> step=6200500, episode=1034 reward=0.7744949 (478.62 it/sec) -training >> step=6200600, episode=1034 reward=0.769706 (511.99 it/sec) -training >> step=6200700, episode=1034 reward=0.789068 (476.27 it/sec) -training >> step=6200800, episode=1034 reward=0.7917879 (495.49 it/sec) -training >> step=6200900, episode=1034 reward=0.7753989 (491.70 it/sec) -training >> step=6201000, episode=1034 reward=0.7630008 (471.34 it/sec) -training >> step=6201100, episode=1034 reward=0.7851245 (519.04 it/sec) -training >> step=6201200, episode=1034 reward=0.7770607 (486.79 it/sec) -training >> step=6201300, episode=1034 reward=0.7925391 (469.12 it/sec) -training >> step=6201400, episode=1034 reward=0.7684016 (487.36 it/sec) -training >> step=6201500, episode=1034 reward=0.7803346 (459.95 it/sec) -training >> step=6201600, episode=1034 reward=0.7886605 (491.27 it/sec) -training >> step=6201700, episode=1034 reward=0.7918321 (461.59 it/sec) -training >> step=6201800, episode=1034 reward=0.7866076 (493.76 it/sec) -training >> step=6201900, episode=1034 reward=0.7998129 (520.94 it/sec) -training >> step=6202000, episode=1034 reward=0.7857693 (442.10 it/sec) -training >> step=6202100, episode=1034 reward=0.7769138 (497.82 it/sec) -training >> step=6202200, episode=1034 reward=0.7739945 (503.66 it/sec) -training >> step=6202300, episode=1034 reward=0.7746396 (502.73 it/sec) -training >> step=6202400, episode=1034 reward=0.7865953 (498.82 it/sec) -training >> step=6202500, episode=1034 reward=0.7859303 (477.90 it/sec) -training >> step=6202600, episode=1034 reward=0.7704692 (514.66 it/sec) -training >> step=6202700, episode=1034 reward=0.7740681 (479.25 it/sec) -training >> step=6202800, episode=1034 reward=0.7848173 (499.97 it/sec) -training >> step=6202900, episode=1034 reward=0.7915732 (487.09 it/sec) -training >> step=6203000, episode=1034 reward=0.7841669 (439.12 it/sec) -training >> step=6203100, episode=1034 reward=0.7806919 (511.14 it/sec) -training >> step=6203200, episode=1034 reward=0.8044617 (472.47 it/sec) -training >> step=6203300, episode=1035 reward=0.8007281 (101.61 it/sec) -training >> step=6203400, episode=1035 reward=0.7983427 (494.59 it/sec) -training >> step=6203500, episode=1035 reward=0.7718481 (436.44 it/sec) -training >> step=6203600, episode=1035 reward=0.7832592 (513.37 it/sec) -training >> step=6203700, episode=1035 reward=0.7783261 (505.51 it/sec) -training >> step=6203800, episode=1035 reward=0.7653991 (450.75 it/sec) -training >> step=6203900, episode=1035 reward=0.7856436 (481.29 it/sec) -training >> step=6204000, episode=1035 reward=0.7752976 (484.93 it/sec) -training >> step=6204100, episode=1035 reward=0.782739 (489.08 it/sec) -training >> step=6204200, episode=1035 reward=0.7785296 (486.47 it/sec) -training >> step=6204300, episode=1035 reward=0.7857804 (469.97 it/sec) -training >> step=6204400, episode=1035 reward=0.7762901 (479.83 it/sec) -training >> step=6204500, episode=1035 reward=0.7799641 (522.76 it/sec) -training >> step=6204600, episode=1035 reward=0.7943761 (509.11 it/sec) -training >> step=6204700, episode=1035 reward=0.7654014 (497.80 it/sec) -training >> step=6204800, episode=1035 reward=0.8065711 (480.49 it/sec) -training >> step=6204900, episode=1035 reward=0.7923845 (472.66 it/sec) -training >> step=6205000, episode=1035 reward=0.7741866 (477.62 it/sec) -training >> step=6205100, episode=1035 reward=0.7873144 (521.52 it/sec) -training >> step=6205200, episode=1035 reward=0.7877284 (505.02 it/sec) -training >> step=6205300, episode=1035 reward=0.787497 (503.40 it/sec) -training >> step=6205400, episode=1035 reward=0.808919 (477.40 it/sec) -training >> step=6205500, episode=1035 reward=0.7833337 (488.75 it/sec) -training >> step=6205600, episode=1035 reward=0.784789 (529.77 it/sec) -training >> step=6205700, episode=1035 reward=0.7909614 (491.76 it/sec) -training >> step=6205800, episode=1035 reward=0.8216017 (486.99 it/sec) -training >> step=6205900, episode=1035 reward=0.7925534 (503.83 it/sec) -training >> step=6206000, episode=1035 reward=0.7887855 (509.72 it/sec) -training >> step=6206100, episode=1035 reward=0.7792794 (488.79 it/sec) -training >> step=6206200, episode=1035 reward=0.7683044 (504.29 it/sec) -training >> step=6206300, episode=1035 reward=0.7883783 (481.40 it/sec) -training >> step=6206400, episode=1035 reward=0.8034178 (483.80 it/sec) -training >> step=6206500, episode=1035 reward=0.7714133 (462.83 it/sec) -training >> step=6206600, episode=1035 reward=0.7879236 (460.40 it/sec) -training >> step=6206700, episode=1035 reward=0.7696365 (516.32 it/sec) -training >> step=6206800, episode=1035 reward=0.7923407 (461.29 it/sec) -training >> step=6206900, episode=1035 reward=0.7731784 (468.30 it/sec) -training >> step=6207000, episode=1035 reward=0.8012806 (482.06 it/sec) -training >> step=6207100, episode=1035 reward=0.7712764 (517.48 it/sec) -training >> step=6207200, episode=1035 reward=0.7939917 (498.45 it/sec) -training >> step=6207300, episode=1035 reward=0.764679 (497.44 it/sec) -training >> step=6207400, episode=1035 reward=0.7836436 (463.31 it/sec) -training >> step=6207500, episode=1035 reward=0.792407 (458.54 it/sec) -training >> step=6207600, episode=1035 reward=0.7640461 (487.68 it/sec) -training >> step=6207700, episode=1035 reward=0.7877941 (491.98 it/sec) -training >> step=6207800, episode=1035 reward=0.7911697 (439.48 it/sec) -training >> step=6207900, episode=1035 reward=0.7790973 (480.19 it/sec) -training >> step=6208000, episode=1035 reward=0.7770641 (414.26 it/sec) -training >> step=6208100, episode=1035 reward=0.8021582 (480.54 it/sec) -training >> step=6208200, episode=1035 reward=0.785381 (416.83 it/sec) -training >> step=6208300, episode=1035 reward=0.7725489 (418.70 it/sec) -training >> step=6208400, episode=1035 reward=0.8033669 (446.00 it/sec) -training >> step=6208500, episode=1035 reward=0.7952806 (497.60 it/sec) -training >> step=6208600, episode=1035 reward=0.7897657 (489.15 it/sec) -training >> step=6208700, episode=1035 reward=0.7785963 (503.86 it/sec) -training >> step=6208800, episode=1035 reward=0.781186 (464.73 it/sec) -training >> step=6208900, episode=1035 reward=0.7977539 (515.76 it/sec) -training >> step=6209000, episode=1035 reward=0.7626069 (494.50 it/sec) -training >> step=6209100, episode=1035 reward=0.7755886 (483.29 it/sec) -training >> step=6209200, episode=1035 reward=0.799886 (502.05 it/sec) -training >> step=6209300, episode=1036 reward=0.7864277 (75.17 it/sec) -training >> step=6209400, episode=1036 reward=0.783336 (370.18 it/sec) -training >> step=6209500, episode=1036 reward=0.7760917 (408.97 it/sec) -training >> step=6209600, episode=1036 reward=0.7788206 (472.20 it/sec) -training >> step=6209700, episode=1036 reward=0.7670244 (445.77 it/sec) -training >> step=6209800, episode=1036 reward=0.7727942 (488.94 it/sec) -training >> step=6209900, episode=1036 reward=0.7927454 (482.17 it/sec) -training >> step=6210000, episode=1036 reward=0.7747815 (500.16 it/sec) -training >> step=6210100, episode=1036 reward=0.7891896 (451.87 it/sec) -training >> step=6210200, episode=1036 reward=0.8048316 (500.51 it/sec) -training >> step=6210300, episode=1036 reward=0.7862665 (481.86 it/sec) -training >> step=6210400, episode=1036 reward=0.7816784 (529.99 it/sec) -training >> step=6210500, episode=1036 reward=0.7814511 (516.32 it/sec) -training >> step=6210600, episode=1036 reward=0.7992393 (512.28 it/sec) -training >> step=6210700, episode=1036 reward=0.7813085 (515.64 it/sec) -training >> step=6210800, episode=1036 reward=0.791066 (528.11 it/sec) -training >> step=6210900, episode=1036 reward=0.7584329 (515.91 it/sec) -training >> step=6211000, episode=1036 reward=0.7863265 (510.50 it/sec) -training >> step=6211100, episode=1036 reward=0.7864347 (474.09 it/sec) -training >> step=6211200, episode=1036 reward=0.7921515 (507.04 it/sec) -training >> step=6211300, episode=1036 reward=0.7780834 (521.66 it/sec) -training >> step=6211400, episode=1036 reward=0.7887771 (429.57 it/sec) -training >> step=6211500, episode=1036 reward=0.7800892 (371.50 it/sec) -training >> step=6211600, episode=1036 reward=0.7891942 (464.25 it/sec) -training >> step=6211700, episode=1036 reward=0.8045145 (387.75 it/sec) -training >> step=6211800, episode=1036 reward=0.7991694 (372.68 it/sec) -training >> step=6211900, episode=1036 reward=0.7660452 (454.38 it/sec) -training >> step=6212000, episode=1036 reward=0.808376 (368.70 it/sec) -training >> step=6212100, episode=1036 reward=0.7753273 (422.28 it/sec) -training >> step=6212200, episode=1036 reward=0.8135875 (482.35 it/sec) -training >> step=6212300, episode=1036 reward=0.7881538 (464.54 it/sec) -training >> step=6212400, episode=1036 reward=0.790457 (360.89 it/sec) -training >> step=6212500, episode=1036 reward=0.7959091 (405.66 it/sec) -training >> step=6212600, episode=1036 reward=0.7853391 (461.41 it/sec) -training >> step=6212700, episode=1036 reward=0.8079498 (389.63 it/sec) -training >> step=6212800, episode=1036 reward=0.7825882 (403.19 it/sec) -training >> step=6212900, episode=1036 reward=0.7939796 (398.68 it/sec) -training >> step=6213000, episode=1036 reward=0.8009361 (451.42 it/sec) -training >> step=6213100, episode=1036 reward=0.7900284 (478.66 it/sec) -training >> step=6213200, episode=1036 reward=0.7988676 (418.04 it/sec) -training >> step=6213300, episode=1036 reward=0.7975795 (507.19 it/sec) -training >> step=6213400, episode=1036 reward=0.7772222 (435.35 it/sec) -training >> step=6213500, episode=1036 reward=0.7922538 (372.03 it/sec) -training >> step=6213600, episode=1036 reward=0.7778952 (464.94 it/sec) -training >> step=6213700, episode=1036 reward=0.8012719 (460.54 it/sec) -training >> step=6213800, episode=1036 reward=0.7564374 (453.26 it/sec) -training >> step=6213900, episode=1036 reward=0.7942014 (474.35 it/sec) -training >> step=6214000, episode=1036 reward=0.7710164 (486.78 it/sec) -training >> step=6214100, episode=1036 reward=0.7974254 (474.86 it/sec) -training >> step=6214200, episode=1036 reward=0.7687862 (500.46 it/sec) -training >> step=6214300, episode=1036 reward=0.7843967 (454.10 it/sec) -training >> step=6214400, episode=1036 reward=0.7902227 (499.92 it/sec) -training >> step=6214500, episode=1036 reward=0.7915128 (490.25 it/sec) -training >> step=6214600, episode=1036 reward=0.7909367 (496.82 it/sec) -training >> step=6214700, episode=1036 reward=0.7696145 (470.88 it/sec) -training >> step=6214800, episode=1036 reward=0.7833753 (474.56 it/sec) -training >> step=6214900, episode=1036 reward=0.7903234 (491.95 it/sec) -training >> step=6215000, episode=1036 reward=0.7688818 (462.56 it/sec) -training >> step=6215100, episode=1036 reward=0.7647796 (487.05 it/sec) -training >> step=6215200, episode=1036 reward=0.7921946 (501.84 it/sec) -training >> step=6215300, episode=1037 reward=0.7540816 (87.43 it/sec) -training >> step=6215400, episode=1037 reward=0.7728415 (477.37 it/sec) -training >> step=6215500, episode=1037 reward=0.7807704 (422.26 it/sec) -training >> step=6215600, episode=1037 reward=0.7923639 (482.44 it/sec) -training >> step=6215700, episode=1037 reward=0.7826068 (503.73 it/sec) -training >> step=6215800, episode=1037 reward=0.7863649 (462.65 it/sec) -training >> step=6215900, episode=1037 reward=0.7775522 (505.50 it/sec) -training >> step=6216000, episode=1037 reward=0.8122052 (403.11 it/sec) -training >> step=6216100, episode=1037 reward=0.7939367 (433.17 it/sec) -training >> step=6216200, episode=1037 reward=0.7862182 (497.84 it/sec) -training >> step=6216300, episode=1037 reward=0.7792022 (456.54 it/sec) -training >> step=6216400, episode=1037 reward=0.7640712 (514.20 it/sec) -training >> step=6216500, episode=1037 reward=0.7769398 (467.74 it/sec) -training >> step=6216600, episode=1037 reward=0.7888009 (472.48 it/sec) -training >> step=6216700, episode=1037 reward=0.7653083 (488.38 it/sec) -training >> step=6216800, episode=1037 reward=0.801583 (508.73 it/sec) -training >> step=6216900, episode=1037 reward=0.7976434 (498.15 it/sec) -training >> step=6217000, episode=1037 reward=0.7798687 (463.52 it/sec) -training >> step=6217100, episode=1037 reward=0.7932745 (500.20 it/sec) -training >> step=6217200, episode=1037 reward=0.8006371 (501.63 it/sec) -training >> step=6217300, episode=1037 reward=0.7711573 (504.20 it/sec) -training >> step=6217400, episode=1037 reward=0.7953625 (504.96 it/sec) -training >> step=6217500, episode=1037 reward=0.7782667 (529.74 it/sec) -training >> step=6217600, episode=1037 reward=0.8158113 (479.91 it/sec) -training >> step=6217700, episode=1037 reward=0.7933822 (491.22 it/sec) -training >> step=6217800, episode=1037 reward=0.7994971 (453.64 it/sec) -training >> step=6217900, episode=1037 reward=0.7856827 (503.51 it/sec) -training >> step=6218000, episode=1037 reward=0.7867053 (430.82 it/sec) -training >> step=6218100, episode=1037 reward=0.790251 (474.92 it/sec) -training >> step=6218200, episode=1037 reward=0.7918714 (507.23 it/sec) -training >> step=6218300, episode=1037 reward=0.7847432 (508.96 it/sec) -training >> step=6218400, episode=1037 reward=0.7916098 (442.46 it/sec) -training >> step=6218500, episode=1037 reward=0.781906 (442.57 it/sec) -training >> step=6218600, episode=1037 reward=0.7780899 (485.53 it/sec) -training >> step=6218700, episode=1037 reward=0.7842942 (461.84 it/sec) -training >> step=6218800, episode=1037 reward=0.7782097 (420.40 it/sec) -training >> step=6218900, episode=1037 reward=0.7991807 (441.68 it/sec) -training >> step=6219000, episode=1037 reward=0.7502913 (435.86 it/sec) -training >> step=6219100, episode=1037 reward=0.7870963 (452.91 it/sec) -training >> step=6219200, episode=1037 reward=0.7857355 (437.02 it/sec) -training >> step=6219300, episode=1037 reward=0.7988935 (460.65 it/sec) -training >> step=6219400, episode=1037 reward=0.7972927 (457.94 it/sec) -training >> step=6219500, episode=1037 reward=0.7870745 (413.84 it/sec) -training >> step=6219600, episode=1037 reward=0.7839453 (469.36 it/sec) -training >> step=6219700, episode=1037 reward=0.7726696 (477.89 it/sec) -training >> step=6219800, episode=1037 reward=0.7893978 (448.87 it/sec) -training >> step=6219900, episode=1037 reward=0.7827871 (474.66 it/sec) -training >> step=6220000, episode=1037 reward=0.7987041 (505.56 it/sec) -training >> step=6220100, episode=1037 reward=0.7737479 (477.34 it/sec) -training >> step=6220200, episode=1037 reward=0.7875147 (492.83 it/sec) -training >> step=6220300, episode=1037 reward=0.7921132 (480.19 it/sec) -training >> step=6220400, episode=1037 reward=0.7864326 (516.39 it/sec) -training >> step=6220500, episode=1037 reward=0.7788641 (480.02 it/sec) -training >> step=6220600, episode=1037 reward=0.782103 (466.41 it/sec) -training >> step=6220700, episode=1037 reward=0.7756235 (487.49 it/sec) -training >> step=6220800, episode=1037 reward=0.7910451 (495.23 it/sec) -training >> step=6220900, episode=1037 reward=0.7887084 (490.05 it/sec) -training >> step=6221000, episode=1037 reward=0.7704461 (494.26 it/sec) -training >> step=6221100, episode=1037 reward=0.7748557 (530.95 it/sec) -training >> step=6221200, episode=1037 reward=0.7848931 (480.63 it/sec) -training >> step=6221300, episode=1038 reward=0.7518454 (98.09 it/sec) -training >> step=6221400, episode=1038 reward=0.7797315 (534.02 it/sec) -training >> step=6221500, episode=1038 reward=0.760836 (510.59 it/sec) -training >> step=6221600, episode=1038 reward=0.7798138 (530.95 it/sec) -training >> step=6221700, episode=1038 reward=0.7844971 (527.53 it/sec) -training >> step=6221800, episode=1038 reward=0.7802089 (537.51 it/sec) -training >> step=6221900, episode=1038 reward=0.7740279 (484.82 it/sec) -training >> step=6222000, episode=1038 reward=0.7698326 (501.19 it/sec) -training >> step=6222100, episode=1038 reward=0.7800339 (517.38 it/sec) -training >> step=6222200, episode=1038 reward=0.7702012 (508.75 it/sec) -training >> step=6222300, episode=1038 reward=0.7768503 (502.97 it/sec) -training >> step=6222400, episode=1038 reward=0.7879174 (507.59 it/sec) -training >> step=6222500, episode=1038 reward=0.7832869 (472.61 it/sec) -training >> step=6222600, episode=1038 reward=0.767951 (518.67 it/sec) -training >> step=6222700, episode=1038 reward=0.7951429 (542.95 it/sec) -training >> step=6222800, episode=1038 reward=0.7672516 (549.28 it/sec) -training >> step=6222900, episode=1038 reward=0.7737487 (468.81 it/sec) -training >> step=6223000, episode=1038 reward=0.7718866 (469.97 it/sec) -training >> step=6223100, episode=1038 reward=0.7770732 (452.57 it/sec) -training >> step=6223200, episode=1038 reward=0.7744532 (454.46 it/sec) -training >> step=6223300, episode=1038 reward=0.7763528 (464.61 it/sec) -training >> step=6223400, episode=1038 reward=0.7817279 (456.70 it/sec) -training >> step=6223500, episode=1038 reward=0.7957566 (521.73 it/sec) -training >> step=6223600, episode=1038 reward=0.7919798 (495.49 it/sec) -training >> step=6223700, episode=1038 reward=0.7896858 (492.47 it/sec) -training >> step=6223800, episode=1038 reward=0.7994745 (531.63 it/sec) -training >> step=6223900, episode=1038 reward=0.7925099 (497.96 it/sec) -training >> step=6224000, episode=1038 reward=0.7814826 (491.27 it/sec) -training >> step=6224100, episode=1038 reward=0.7810085 (481.43 it/sec) -training >> step=6224200, episode=1038 reward=0.7941259 (532.68 it/sec) -training >> step=6224300, episode=1038 reward=0.7973679 (488.05 it/sec) -training >> step=6224400, episode=1038 reward=0.7980887 (493.76 it/sec) -training >> step=6224500, episode=1038 reward=0.7882736 (507.45 it/sec) -training >> step=6224600, episode=1038 reward=0.7761804 (531.26 it/sec) -training >> step=6224700, episode=1038 reward=0.7917134 (501.93 it/sec) -training >> step=6224800, episode=1038 reward=0.79204 (485.19 it/sec) -training >> step=6224900, episode=1038 reward=0.7885402 (439.48 it/sec) -training >> step=6225000, episode=1038 reward=0.7683238 (493.72 it/sec) -training >> step=6225100, episode=1038 reward=0.7966791 (459.82 it/sec) -training >> step=6225200, episode=1038 reward=0.7890254 (447.75 it/sec) -training >> step=6225300, episode=1038 reward=0.7686188 (434.25 it/sec) -training >> step=6225400, episode=1038 reward=0.7891309 (468.86 it/sec) -training >> step=6225500, episode=1038 reward=0.7826425 (474.68 it/sec) -training >> step=6225600, episode=1038 reward=0.7789133 (495.73 it/sec) -training >> step=6225700, episode=1038 reward=0.7614109 (514.61 it/sec) -training >> step=6225800, episode=1038 reward=0.7713684 (509.59 it/sec) -training >> step=6225900, episode=1038 reward=0.7885653 (465.19 it/sec) -training >> step=6226000, episode=1038 reward=0.786802 (515.37 it/sec) -training >> step=6226100, episode=1038 reward=0.786082 (484.99 it/sec) -training >> step=6226200, episode=1038 reward=0.7549359 (518.04 it/sec) -training >> step=6226300, episode=1038 reward=0.7844005 (522.31 it/sec) -training >> step=6226400, episode=1038 reward=0.8132505 (504.59 it/sec) -training >> step=6226500, episode=1038 reward=0.7924246 (507.20 it/sec) -training >> step=6226600, episode=1038 reward=0.7899736 (478.43 it/sec) -training >> step=6226700, episode=1038 reward=0.7844271 (522.19 it/sec) -training >> step=6226800, episode=1038 reward=0.7905611 (518.93 it/sec) -training >> step=6226900, episode=1038 reward=0.7858632 (490.69 it/sec) -training >> step=6227000, episode=1038 reward=0.7707039 (501.28 it/sec) -training >> step=6227100, episode=1038 reward=0.7816316 (532.80 it/sec) -training >> step=6227200, episode=1038 reward=0.7786544 (483.57 it/sec) -training >> step=6227300, episode=1039 reward=0.7904729 (112.40 it/sec) -training >> step=6227400, episode=1039 reward=0.7786622 (546.87 it/sec) -training >> step=6227500, episode=1039 reward=0.7683799 (549.95 it/sec) -training >> step=6227600, episode=1039 reward=0.8057571 (514.61 it/sec) -training >> step=6227700, episode=1039 reward=0.8030146 (488.66 it/sec) -training >> step=6227800, episode=1039 reward=0.7725573 (527.92 it/sec) -training >> step=6227900, episode=1039 reward=0.7777005 (499.71 it/sec) -training >> step=6228000, episode=1039 reward=0.7677077 (523.00 it/sec) -training >> step=6228100, episode=1039 reward=0.7640228 (525.37 it/sec) -training >> step=6228200, episode=1039 reward=0.7582881 (497.72 it/sec) -training >> step=6228300, episode=1039 reward=0.7795026 (525.95 it/sec) -training >> step=6228400, episode=1039 reward=0.7721627 (509.50 it/sec) -training >> step=6228500, episode=1039 reward=0.7773948 (548.72 it/sec) -training >> step=6228600, episode=1039 reward=0.7976411 (523.05 it/sec) -training >> step=6228700, episode=1039 reward=0.7736853 (490.33 it/sec) -training >> step=6228800, episode=1039 reward=0.792323 (519.19 it/sec) -training >> step=6228900, episode=1039 reward=0.7865841 (543.52 it/sec) -training >> step=6229000, episode=1039 reward=0.7903089 (496.70 it/sec) -training >> step=6229100, episode=1039 reward=0.782298 (535.18 it/sec) -training >> step=6229200, episode=1039 reward=0.784628 (505.88 it/sec) -training >> step=6229300, episode=1039 reward=0.78589 (488.54 it/sec) -training >> step=6229400, episode=1039 reward=0.7760455 (508.77 it/sec) -training >> step=6229500, episode=1039 reward=0.7983077 (520.23 it/sec) -training >> step=6229600, episode=1039 reward=0.7931278 (534.80 it/sec) -training >> step=6229700, episode=1039 reward=0.803079 (521.22 it/sec) -training >> step=6229800, episode=1039 reward=0.7859596 (476.19 it/sec) -training >> step=6229900, episode=1039 reward=0.7663029 (483.04 it/sec) -training >> step=6230000, episode=1039 reward=0.7766241 (536.43 it/sec) -training >> step=6230100, episode=1039 reward=0.7935513 (516.50 it/sec) -training >> step=6230200, episode=1039 reward=0.771133 (510.86 it/sec) -training >> step=6230300, episode=1039 reward=0.8025836 (516.82 it/sec) -training >> step=6230400, episode=1039 reward=0.7764979 (508.75 it/sec) -training >> step=6230500, episode=1039 reward=0.7877083 (517.70 it/sec) -training >> step=6230600, episode=1039 reward=0.7860907 (528.16 it/sec) -training >> step=6230700, episode=1039 reward=0.7851006 (552.51 it/sec) -training >> step=6230800, episode=1039 reward=0.7984039 (464.37 it/sec) -training >> step=6230900, episode=1039 reward=0.7898556 (527.09 it/sec) -training >> step=6231000, episode=1039 reward=0.7858979 (524.71 it/sec) -training >> step=6231100, episode=1039 reward=0.7768632 (553.12 it/sec) -training >> step=6231200, episode=1039 reward=0.7666762 (530.59 it/sec) -training >> step=6231300, episode=1039 reward=0.7808864 (480.24 it/sec) -training >> step=6231400, episode=1039 reward=0.7772809 (472.92 it/sec) -training >> step=6231500, episode=1039 reward=0.7824974 (520.01 it/sec) -training >> step=6231600, episode=1039 reward=0.7828931 (540.22 it/sec) -training >> step=6231700, episode=1039 reward=0.7953799 (530.17 it/sec) -training >> step=6231800, episode=1039 reward=0.7859703 (540.84 it/sec) -training >> step=6231900, episode=1039 reward=0.7930358 (483.56 it/sec) -training >> step=6232000, episode=1039 reward=0.7900236 (534.57 it/sec) -training >> step=6232100, episode=1039 reward=0.7762373 (523.60 it/sec) -training >> step=6232200, episode=1039 reward=0.7742285 (552.77 it/sec) -training >> step=6232300, episode=1039 reward=0.7831473 (525.62 it/sec) -training >> step=6232400, episode=1039 reward=0.7913418 (500.67 it/sec) -training >> step=6232500, episode=1039 reward=0.8022416 (523.95 it/sec) -training >> step=6232600, episode=1039 reward=0.8038712 (494.75 it/sec) -training >> step=6232700, episode=1039 reward=0.8041332 (539.34 it/sec) -training >> step=6232800, episode=1039 reward=0.790237 (533.96 it/sec) -training >> step=6232900, episode=1039 reward=0.7804972 (518.45 it/sec) -training >> step=6233000, episode=1039 reward=0.7863335 (448.51 it/sec) -training >> step=6233100, episode=1039 reward=0.7804056 (531.63 it/sec) -training >> step=6233200, episode=1039 reward=0.7897317 (535.28 it/sec) -training >> step=6233300, episode=1040 reward=0.7779123 (115.64 it/sec) -training >> step=6233400, episode=1040 reward=0.7841035 (523.19 it/sec) -training >> step=6233500, episode=1040 reward=0.7835752 (520.26 it/sec) -training >> step=6233600, episode=1040 reward=0.7578874 (555.32 it/sec) -training >> step=6233700, episode=1040 reward=0.7723432 (457.12 it/sec) -training >> step=6233800, episode=1040 reward=0.7883596 (489.67 it/sec) -training >> step=6233900, episode=1040 reward=0.7969962 (551.90 it/sec) -training >> step=6234000, episode=1040 reward=0.7816998 (520.01 it/sec) -training >> step=6234100, episode=1040 reward=0.7897146 (520.62 it/sec) -training >> step=6234200, episode=1040 reward=0.7817572 (506.09 it/sec) -training >> step=6234300, episode=1040 reward=0.8021081 (528.86 it/sec) -training >> step=6234400, episode=1040 reward=0.7933545 (502.23 it/sec) -training >> step=6234500, episode=1040 reward=0.7775224 (531.81 it/sec) -training >> step=6234600, episode=1040 reward=0.8002459 (516.12 it/sec) -training >> step=6234700, episode=1040 reward=0.7700807 (473.81 it/sec) -training >> step=6234800, episode=1040 reward=0.7878904 (500.27 it/sec) -training >> step=6234900, episode=1040 reward=0.7879238 (528.98 it/sec) -training >> step=6235000, episode=1040 reward=0.7943566 (522.72 it/sec) -training >> step=6235100, episode=1040 reward=0.7764438 (522.46 it/sec) -training >> step=6235200, episode=1040 reward=0.7890826 (499.13 it/sec) -training >> step=6235300, episode=1040 reward=0.7891961 (510.49 it/sec) -training >> step=6235400, episode=1040 reward=0.7775726 (524.61 it/sec) -training >> step=6235500, episode=1040 reward=0.7817227 (526.98 it/sec) -training >> step=6235600, episode=1040 reward=0.7857281 (518.56 it/sec) -training >> step=6235700, episode=1040 reward=0.7710192 (521.68 it/sec) -training >> step=6235800, episode=1040 reward=0.7923989 (508.01 it/sec) -training >> step=6235900, episode=1040 reward=0.7622856 (499.49 it/sec) -training >> step=6236000, episode=1040 reward=0.7820452 (512.22 it/sec) -training >> step=6236100, episode=1040 reward=0.7915022 (547.93 it/sec) -training >> step=6236200, episode=1040 reward=0.7826679 (492.71 it/sec) -training >> step=6236300, episode=1040 reward=0.7617906 (497.94 it/sec) -training >> step=6236400, episode=1040 reward=0.7544244 (508.66 it/sec) -training >> step=6236500, episode=1040 reward=0.7771834 (524.33 it/sec) -training >> step=6236600, episode=1040 reward=0.764561 (536.44 it/sec) -training >> step=6236700, episode=1040 reward=0.7830336 (546.49 it/sec) -training >> step=6236800, episode=1040 reward=0.7855074 (497.67 it/sec) -training >> step=6236900, episode=1040 reward=0.7833337 (495.98 it/sec) -training >> step=6237000, episode=1040 reward=0.7993802 (517.69 it/sec) -training >> step=6237100, episode=1040 reward=0.7952563 (505.47 it/sec) -training >> step=6237200, episode=1040 reward=0.793172 (531.70 it/sec) -training >> step=6237300, episode=1040 reward=0.7787271 (509.44 it/sec) -training >> step=6237400, episode=1040 reward=0.7778602 (500.44 it/sec) -training >> step=6237500, episode=1040 reward=0.800191 (559.00 it/sec) -training >> step=6237600, episode=1040 reward=0.7722127 (520.53 it/sec) -training >> step=6237700, episode=1040 reward=0.7843723 (534.04 it/sec) -training >> step=6237800, episode=1040 reward=0.7806053 (543.25 it/sec) -training >> step=6237900, episode=1040 reward=0.8019783 (519.57 it/sec) -training >> step=6238000, episode=1040 reward=0.7957532 (496.59 it/sec) -training >> step=6238100, episode=1040 reward=0.7663129 (515.62 it/sec) -training >> step=6238200, episode=1040 reward=0.7970573 (529.10 it/sec) -training >> step=6238300, episode=1040 reward=0.804727 (531.75 it/sec) -training >> step=6238400, episode=1040 reward=0.7697893 (524.78 it/sec) -training >> step=6238500, episode=1040 reward=0.7836437 (492.96 it/sec) -training >> step=6238600, episode=1040 reward=0.7902378 (547.22 it/sec) -training >> step=6238700, episode=1040 reward=0.7826234 (542.31 it/sec) -training >> step=6238800, episode=1040 reward=0.7927946 (516.76 it/sec) -training >> step=6238900, episode=1040 reward=0.7860255 (510.08 it/sec) -training >> step=6239000, episode=1040 reward=0.7951941 (520.21 it/sec) -training >> step=6239100, episode=1040 reward=0.7756497 (514.47 it/sec) -training >> step=6239200, episode=1040 reward=0.7662472 (518.39 it/sec) -training >> step=6239300, episode=1041 reward=0.7777477 (112.70 it/sec) -training >> step=6239400, episode=1041 reward=0.7625804 (550.58 it/sec) -training >> step=6239500, episode=1041 reward=0.7762484 (478.19 it/sec) -training >> step=6239600, episode=1041 reward=0.7848731 (487.07 it/sec) -training >> step=6239700, episode=1041 reward=0.7668938 (495.37 it/sec) -training >> step=6239800, episode=1041 reward=0.7987146 (539.47 it/sec) -training >> step=6239900, episode=1041 reward=0.7813995 (514.47 it/sec) -training >> step=6240000, episode=1041 reward=0.7829825 (519.76 it/sec) -training >> step=6240100, episode=1041 reward=0.7795628 (533.40 it/sec) -training >> step=6240200, episode=1041 reward=0.8041608 (497.15 it/sec) -training >> step=6240300, episode=1041 reward=0.7705797 (511.33 it/sec) -training >> step=6240400, episode=1041 reward=0.7820958 (530.19 it/sec) -training >> step=6240500, episode=1041 reward=0.7769006 (533.94 it/sec) -training >> step=6240600, episode=1041 reward=0.7903886 (520.37 it/sec) -training >> step=6240700, episode=1041 reward=0.771215 (517.41 it/sec) -training >> step=6240800, episode=1041 reward=0.7957422 (501.59 it/sec) -training >> step=6240900, episode=1041 reward=0.7919675 (507.79 it/sec) -training >> step=6241000, episode=1041 reward=0.78407 (501.71 it/sec) -training >> step=6241100, episode=1041 reward=0.7627519 (461.35 it/sec) -training >> step=6241200, episode=1041 reward=0.7748724 (557.21 it/sec) -training >> step=6241300, episode=1041 reward=0.800831 (471.02 it/sec) -training >> step=6241400, episode=1041 reward=0.7812859 (521.73 it/sec) -training >> step=6241500, episode=1041 reward=0.7996262 (540.61 it/sec) -training >> step=6241600, episode=1041 reward=0.7980092 (497.98 it/sec) -training >> step=6241700, episode=1041 reward=0.7835188 (509.57 it/sec) -training >> step=6241800, episode=1041 reward=0.7898785 (499.23 it/sec) -training >> step=6241900, episode=1041 reward=0.7927686 (536.77 it/sec) -training >> step=6242000, episode=1041 reward=0.7829 (514.67 it/sec) -training >> step=6242100, episode=1041 reward=0.776094 (522.59 it/sec) -training >> step=6242200, episode=1041 reward=0.781777 (507.80 it/sec) -training >> step=6242300, episode=1041 reward=0.8148419 (514.86 it/sec) -training >> step=6242400, episode=1041 reward=0.7733586 (496.97 it/sec) -training >> step=6242500, episode=1041 reward=0.7655169 (531.86 it/sec) -training >> step=6242600, episode=1041 reward=0.77469 (540.60 it/sec) -training >> step=6242700, episode=1041 reward=0.796108 (502.00 it/sec) -training >> step=6242800, episode=1041 reward=0.7920223 (513.69 it/sec) -training >> step=6242900, episode=1041 reward=0.7758663 (509.12 it/sec) -training >> step=6243000, episode=1041 reward=0.7871916 (508.58 it/sec) -training >> step=6243100, episode=1041 reward=0.8047418 (524.33 it/sec) -training >> step=6243200, episode=1041 reward=0.7731798 (509.27 it/sec) -training >> step=6243300, episode=1041 reward=0.7811779 (544.45 it/sec) -training >> step=6243400, episode=1041 reward=0.8000429 (463.27 it/sec) -training >> step=6243500, episode=1041 reward=0.7910751 (506.24 it/sec) -training >> step=6243600, episode=1041 reward=0.7836631 (535.14 it/sec) -training >> step=6243700, episode=1041 reward=0.8072378 (559.91 it/sec) -training >> step=6243800, episode=1041 reward=0.7799217 (505.38 it/sec) -training >> step=6243900, episode=1041 reward=0.7953391 (520.06 it/sec) -training >> step=6244000, episode=1041 reward=0.7766618 (513.74 it/sec) -training >> step=6244100, episode=1041 reward=0.778391 (513.55 it/sec) -training >> step=6244200, episode=1041 reward=0.7809031 (518.02 it/sec) -training >> step=6244300, episode=1041 reward=0.7878088 (520.45 it/sec) -training >> step=6244400, episode=1041 reward=0.7967651 (550.55 it/sec) -training >> step=6244500, episode=1041 reward=0.7676592 (468.71 it/sec) -training >> step=6244600, episode=1041 reward=0.7766142 (508.56 it/sec) -training >> step=6244700, episode=1041 reward=0.7828991 (467.32 it/sec) -training >> step=6244800, episode=1041 reward=0.7856607 (518.69 it/sec) -training >> step=6244900, episode=1041 reward=0.7626221 (515.39 it/sec) -training >> step=6245000, episode=1041 reward=0.7837458 (494.38 it/sec) -training >> step=6245100, episode=1041 reward=0.7738579 (543.55 it/sec) -training >> step=6245200, episode=1041 reward=0.7859224 (508.81 it/sec) -training >> step=6245300, episode=1042 reward=0.789344 (111.35 it/sec) -training >> step=6245400, episode=1042 reward=0.7692527 (528.45 it/sec) -training >> step=6245500, episode=1042 reward=0.7878641 (509.98 it/sec) -training >> step=6245600, episode=1042 reward=0.7892576 (517.54 it/sec) -training >> step=6245700, episode=1042 reward=0.7868143 (488.52 it/sec) -training >> step=6245800, episode=1042 reward=0.7827489 (539.86 it/sec) -training >> step=6245900, episode=1042 reward=0.7828044 (453.27 it/sec) -training >> step=6246000, episode=1042 reward=0.78127 (514.80 it/sec) -training >> step=6246100, episode=1042 reward=0.7958056 (531.66 it/sec) -training >> step=6246200, episode=1042 reward=0.7873571 (470.54 it/sec) -training >> step=6246300, episode=1042 reward=0.7864941 (550.01 it/sec) -training >> step=6246400, episode=1042 reward=0.8053916 (523.11 it/sec) -training >> step=6246500, episode=1042 reward=0.7849882 (511.94 it/sec) -training >> step=6246600, episode=1042 reward=0.8025647 (502.83 it/sec) -training >> step=6246700, episode=1042 reward=0.7999032 (512.63 it/sec) -training >> step=6246800, episode=1042 reward=0.7991037 (517.22 it/sec) -training >> step=6246900, episode=1042 reward=0.7765042 (553.03 it/sec) -training >> step=6247000, episode=1042 reward=0.7880941 (552.21 it/sec) -training >> step=6247100, episode=1042 reward=0.7725921 (485.52 it/sec) -training >> step=6247200, episode=1042 reward=0.783021 (556.62 it/sec) -training >> step=6247300, episode=1042 reward=0.7844708 (498.39 it/sec) -training >> step=6247400, episode=1042 reward=0.7800909 (521.85 it/sec) -training >> step=6247500, episode=1042 reward=0.7866283 (517.80 it/sec) -training >> step=6247600, episode=1042 reward=0.7791341 (542.69 it/sec) -training >> step=6247700, episode=1042 reward=0.7676386 (506.40 it/sec) -training >> step=6247800, episode=1042 reward=0.7820204 (501.74 it/sec) -training >> step=6247900, episode=1042 reward=0.7881613 (512.96 it/sec) -training >> step=6248000, episode=1042 reward=0.7777761 (545.09 it/sec) -training >> step=6248100, episode=1042 reward=0.776949 (528.73 it/sec) -training >> step=6248200, episode=1042 reward=0.7851218 (516.00 it/sec) -training >> step=6248300, episode=1042 reward=0.7831398 (514.85 it/sec) -training >> step=6248400, episode=1042 reward=0.7670634 (509.30 it/sec) -training >> step=6248500, episode=1042 reward=0.7906351 (542.86 it/sec) -training >> step=6248600, episode=1042 reward=0.78405 (538.31 it/sec) -training >> step=6248700, episode=1042 reward=0.786908 (520.66 it/sec) -training >> step=6248800, episode=1042 reward=0.7913049 (533.41 it/sec) -training >> step=6248900, episode=1042 reward=0.7841315 (469.71 it/sec) -training >> step=6249000, episode=1042 reward=0.7977605 (539.07 it/sec) -training >> step=6249100, episode=1042 reward=0.7835122 (548.71 it/sec) -training >> step=6249200, episode=1042 reward=0.7627031 (522.38 it/sec) -training >> step=6249300, episode=1042 reward=0.7689093 (530.63 it/sec) -training >> step=6249400, episode=1042 reward=0.7964317 (520.32 it/sec) -training >> step=6249500, episode=1042 reward=0.7740363 (472.65 it/sec) -training >> step=6249600, episode=1042 reward=0.7747115 (523.03 it/sec) -training >> step=6249700, episode=1042 reward=0.7796223 (506.10 it/sec) -training >> step=6249800, episode=1042 reward=0.7777582 (528.67 it/sec) -training >> step=6249900, episode=1042 reward=0.7788259 (530.37 it/sec) -training >> step=6250000, episode=1042 reward=0.8003185 (507.59 it/sec) -training >> step=6250100, episode=1042 reward=0.7701682 (519.55 it/sec) -training >> step=6250200, episode=1042 reward=0.7762155 (541.37 it/sec) -training >> step=6250300, episode=1042 reward=0.7710429 (516.05 it/sec) -training >> step=6250400, episode=1042 reward=0.7886163 (533.80 it/sec) -training >> step=6250500, episode=1042 reward=0.7959936 (515.05 it/sec) -training >> step=6250600, episode=1042 reward=0.7562211 (550.23 it/sec) -training >> step=6250700, episode=1042 reward=0.794798 (517.31 it/sec) -training >> step=6250800, episode=1042 reward=0.7821171 (540.50 it/sec) -training >> step=6250900, episode=1042 reward=0.7867914 (529.54 it/sec) -training >> step=6251000, episode=1042 reward=0.7839299 (486.57 it/sec) -training >> step=6251100, episode=1042 reward=0.771222 (490.82 it/sec) -training >> step=6251200, episode=1042 reward=0.7623217 (556.03 it/sec) -training >> step=6251300, episode=1043 reward=0.7945052 (91.04 it/sec) -training >> step=6251400, episode=1043 reward=0.7631558 (520.76 it/sec) -training >> step=6251500, episode=1043 reward=0.7783768 (527.20 it/sec) -training >> step=6251600, episode=1043 reward=0.7745024 (494.14 it/sec) -training >> step=6251700, episode=1043 reward=0.7841414 (450.37 it/sec) -training >> step=6251800, episode=1043 reward=0.8057756 (508.27 it/sec) -training >> step=6251900, episode=1043 reward=0.8048449 (547.22 it/sec) -training >> step=6252000, episode=1043 reward=0.7856542 (544.34 it/sec) -training >> step=6252100, episode=1043 reward=0.7918791 (523.13 it/sec) -training >> step=6252200, episode=1043 reward=0.7974744 (490.94 it/sec) -training >> step=6252300, episode=1043 reward=0.7852722 (567.33 it/sec) -training >> step=6252400, episode=1043 reward=0.7794866 (526.05 it/sec) -training >> step=6252500, episode=1043 reward=0.784705 (511.25 it/sec) -training >> step=6252600, episode=1043 reward=0.7962421 (551.67 it/sec) -training >> step=6252700, episode=1043 reward=0.7824222 (507.89 it/sec) -training >> step=6252800, episode=1043 reward=0.7795509 (527.34 it/sec) -training >> step=6252900, episode=1043 reward=0.7795072 (517.11 it/sec) -training >> step=6253000, episode=1043 reward=0.7820509 (533.42 it/sec) -training >> step=6253100, episode=1043 reward=0.792483 (518.83 it/sec) -training >> step=6253200, episode=1043 reward=0.7944531 (495.94 it/sec) -training >> step=6253300, episode=1043 reward=0.8032511 (503.39 it/sec) -training >> step=6253400, episode=1043 reward=0.7773051 (555.17 it/sec) -training >> step=6253500, episode=1043 reward=0.776479 (546.58 it/sec) -training >> step=6253600, episode=1043 reward=0.7817425 (501.48 it/sec) -training >> step=6253700, episode=1043 reward=0.7904652 (550.66 it/sec) -training >> step=6253800, episode=1043 reward=0.7629466 (488.65 it/sec) -training >> step=6253900, episode=1043 reward=0.7794801 (532.89 it/sec) -training >> step=6254000, episode=1043 reward=0.7877259 (528.98 it/sec) -training >> step=6254100, episode=1043 reward=0.8020701 (548.90 it/sec) -training >> step=6254200, episode=1043 reward=0.7846584 (508.79 it/sec) -training >> step=6254300, episode=1043 reward=0.7970896 (426.42 it/sec) -training >> step=6254400, episode=1043 reward=0.7876431 (536.85 it/sec) -training >> step=6254500, episode=1043 reward=0.7796519 (523.90 it/sec) -training >> step=6254600, episode=1043 reward=0.786572 (495.60 it/sec) -training >> step=6254700, episode=1043 reward=0.8117521 (511.52 it/sec) -training >> step=6254800, episode=1043 reward=0.780578 (525.48 it/sec) -training >> step=6254900, episode=1043 reward=0.7988285 (456.43 it/sec) -training >> step=6255000, episode=1043 reward=0.7703542 (494.82 it/sec) -training >> step=6255100, episode=1043 reward=0.7771319 (513.79 it/sec) -training >> step=6255200, episode=1043 reward=0.789396 (551.44 it/sec) -training >> step=6255300, episode=1043 reward=0.8046348 (523.43 it/sec) -training >> step=6255400, episode=1043 reward=0.7927203 (443.63 it/sec) -training >> step=6255500, episode=1043 reward=0.7981293 (542.65 it/sec) -training >> step=6255600, episode=1043 reward=0.7774381 (490.96 it/sec) -training >> step=6255700, episode=1043 reward=0.775187 (508.29 it/sec) -training >> step=6255800, episode=1043 reward=0.7777811 (530.30 it/sec) -training >> step=6255900, episode=1043 reward=0.7842923 (493.13 it/sec) -training >> step=6256000, episode=1043 reward=0.7714592 (541.08 it/sec) -training >> step=6256100, episode=1043 reward=0.7891035 (494.30 it/sec) -training >> step=6256200, episode=1043 reward=0.7753076 (422.45 it/sec) -training >> step=6256300, episode=1043 reward=0.776069 (498.85 it/sec) -training >> step=6256400, episode=1043 reward=0.7855109 (465.52 it/sec) -training >> step=6256500, episode=1043 reward=0.7941533 (479.19 it/sec) -training >> step=6256600, episode=1043 reward=0.7920347 (505.17 it/sec) -training >> step=6256700, episode=1043 reward=0.8010187 (456.51 it/sec) -training >> step=6256800, episode=1043 reward=0.780857 (501.80 it/sec) -training >> step=6256900, episode=1043 reward=0.7739827 (451.26 it/sec) -training >> step=6257000, episode=1043 reward=0.7715716 (557.94 it/sec) -training >> step=6257100, episode=1043 reward=0.7654248 (542.31 it/sec) -training >> step=6257200, episode=1043 reward=0.7559839 (507.04 it/sec) -training >> step=6257300, episode=1044 reward=0.775802 (132.89 it/sec) -training >> step=6257400, episode=1044 reward=0.78657 (511.60 it/sec) -training >> step=6257500, episode=1044 reward=0.7905189 (538.69 it/sec) -training >> step=6257600, episode=1044 reward=0.7742361 (529.17 it/sec) -training >> step=6257700, episode=1044 reward=0.7917003 (511.46 it/sec) -training >> step=6257800, episode=1044 reward=0.7881423 (532.06 it/sec) -training >> step=6257900, episode=1044 reward=0.7869186 (526.74 it/sec) -training >> step=6258000, episode=1044 reward=0.7673788 (526.61 it/sec) -training >> step=6258100, episode=1044 reward=0.7997519 (514.37 it/sec) -training >> step=6258200, episode=1044 reward=0.7900403 (485.15 it/sec) -training >> step=6258300, episode=1044 reward=0.7764317 (522.41 it/sec) -training >> step=6258400, episode=1044 reward=0.770668 (544.87 it/sec) -training >> step=6258500, episode=1044 reward=0.779501 (515.14 it/sec) -training >> step=6258600, episode=1044 reward=0.7968267 (505.61 it/sec) -training >> step=6258700, episode=1044 reward=0.7762245 (539.65 it/sec) -training >> step=6258800, episode=1044 reward=0.7841148 (488.57 it/sec) -training >> step=6258900, episode=1044 reward=0.7953148 (533.33 it/sec) -training >> step=6259000, episode=1044 reward=0.7747522 (509.10 it/sec) -training >> step=6259100, episode=1044 reward=0.781453 (503.92 it/sec) -training >> step=6259200, episode=1044 reward=0.7911017 (476.04 it/sec) -training >> step=6259300, episode=1044 reward=0.7763414 (490.20 it/sec) -training >> step=6259400, episode=1044 reward=0.7629521 (535.95 it/sec) -training >> step=6259500, episode=1044 reward=0.792037 (541.12 it/sec) -training >> step=6259600, episode=1044 reward=0.79011 (505.17 it/sec) -training >> step=6259700, episode=1044 reward=0.7907959 (495.16 it/sec) -training >> step=6259800, episode=1044 reward=0.7792443 (490.00 it/sec) -training >> step=6259900, episode=1044 reward=0.7700189 (545.08 it/sec) -training >> step=6260000, episode=1044 reward=0.7567065 (524.97 it/sec) -training >> step=6260100, episode=1044 reward=0.8238106 (516.30 it/sec) -training >> step=6260200, episode=1044 reward=0.8139625 (538.87 it/sec) -training >> step=6260300, episode=1044 reward=0.7634281 (526.88 it/sec) -training >> step=6260400, episode=1044 reward=0.7775471 (468.60 it/sec) -training >> step=6260500, episode=1044 reward=0.7920697 (558.22 it/sec) -training >> step=6260600, episode=1044 reward=0.7907676 (530.56 it/sec) -training >> step=6260700, episode=1044 reward=0.8131259 (549.79 it/sec) -training >> step=6260800, episode=1044 reward=0.8092071 (537.59 it/sec) -training >> step=6260900, episode=1044 reward=0.8012646 (509.15 it/sec) -training >> step=6261000, episode=1044 reward=0.7947325 (524.67 it/sec) -training >> step=6261100, episode=1044 reward=0.7890046 (529.63 it/sec) -training >> step=6261200, episode=1044 reward=0.7777743 (526.36 it/sec) -training >> step=6261300, episode=1044 reward=0.7692289 (529.69 it/sec) -training >> step=6261400, episode=1044 reward=0.7679531 (529.82 it/sec) -training >> step=6261500, episode=1044 reward=0.7821168 (480.18 it/sec) -training >> step=6261600, episode=1044 reward=0.7795427 (525.52 it/sec) -training >> step=6261700, episode=1044 reward=0.8059951 (472.38 it/sec) -training >> step=6261800, episode=1044 reward=0.7868131 (450.41 it/sec) -training >> step=6261900, episode=1044 reward=0.7870149 (522.28 it/sec) -training >> step=6262000, episode=1044 reward=0.7744814 (510.11 it/sec) -training >> step=6262100, episode=1044 reward=0.7809303 (526.34 it/sec) -training >> step=6262200, episode=1044 reward=0.7799612 (516.00 it/sec) -training >> step=6262300, episode=1044 reward=0.7763283 (529.82 it/sec) -training >> step=6262400, episode=1044 reward=0.7870633 (523.70 it/sec) -training >> step=6262500, episode=1044 reward=0.7756793 (479.01 it/sec) -training >> step=6262600, episode=1044 reward=0.787103 (539.04 it/sec) -training >> step=6262700, episode=1044 reward=0.7720476 (538.90 it/sec) -training >> step=6262800, episode=1044 reward=0.7544242 (534.96 it/sec) -training >> step=6262900, episode=1044 reward=0.7835711 (505.98 it/sec) -training >> step=6263000, episode=1044 reward=0.7663917 (510.37 it/sec) -training >> step=6263100, episode=1044 reward=0.7723241 (543.97 it/sec) -training >> step=6263200, episode=1044 reward=0.7705379 (517.82 it/sec) -training >> step=6263300, episode=1045 reward=0.7881322 (124.38 it/sec) -training >> step=6263400, episode=1045 reward=0.7900872 (528.77 it/sec) -training >> step=6263500, episode=1045 reward=0.7881261 (507.21 it/sec) -training >> step=6263600, episode=1045 reward=0.7887965 (509.94 it/sec) -training >> step=6263700, episode=1045 reward=0.7796472 (553.98 it/sec) -training >> step=6263800, episode=1045 reward=0.7765195 (451.03 it/sec) -training >> step=6263900, episode=1045 reward=0.7858877 (495.24 it/sec) -training >> step=6264000, episode=1045 reward=0.7805074 (463.17 it/sec) -training >> step=6264100, episode=1045 reward=0.804601 (513.85 it/sec) -training >> step=6264200, episode=1045 reward=0.7809356 (533.91 it/sec) -training >> step=6264300, episode=1045 reward=0.7953597 (522.94 it/sec) -training >> step=6264400, episode=1045 reward=0.7871071 (528.49 it/sec) -training >> step=6264500, episode=1045 reward=0.7968423 (501.91 it/sec) -training >> step=6264600, episode=1045 reward=0.7755945 (552.22 it/sec) -training >> step=6264700, episode=1045 reward=0.7890897 (502.43 it/sec) -training >> step=6264800, episode=1045 reward=0.7814553 (523.99 it/sec) -training >> step=6264900, episode=1045 reward=0.7805299 (527.01 it/sec) -training >> step=6265000, episode=1045 reward=0.7920468 (554.44 it/sec) -training >> step=6265100, episode=1045 reward=0.7598869 (516.63 it/sec) -training >> step=6265200, episode=1045 reward=0.7877066 (530.02 it/sec) -training >> step=6265300, episode=1045 reward=0.7744964 (530.28 it/sec) -training >> step=6265400, episode=1045 reward=0.7715415 (510.91 it/sec) -training >> step=6265500, episode=1045 reward=0.7623851 (537.11 it/sec) -training >> step=6265600, episode=1045 reward=0.7866995 (520.10 it/sec) -training >> step=6265700, episode=1045 reward=0.7912121 (512.50 it/sec) -training >> step=6265800, episode=1045 reward=0.7764578 (527.05 it/sec) -training >> step=6265900, episode=1045 reward=0.7984979 (414.58 it/sec) -training >> step=6266000, episode=1045 reward=0.788234 (507.25 it/sec) -training >> step=6266100, episode=1045 reward=0.8068294 (539.85 it/sec) -training >> step=6266200, episode=1045 reward=0.7966457 (490.12 it/sec) -training >> step=6266300, episode=1045 reward=0.7604489 (533.04 it/sec) -training >> step=6266400, episode=1045 reward=0.7720444 (508.80 it/sec) -training >> step=6266500, episode=1045 reward=0.7737576 (535.63 it/sec) -training >> step=6266600, episode=1045 reward=0.7990314 (525.95 it/sec) -training >> step=6266700, episode=1045 reward=0.7718263 (503.89 it/sec) -training >> step=6266800, episode=1045 reward=0.7682623 (541.83 it/sec) -training >> step=6266900, episode=1045 reward=0.8028552 (542.05 it/sec) -training >> step=6267000, episode=1045 reward=0.7840335 (483.57 it/sec) -training >> step=6267100, episode=1045 reward=0.7779711 (542.76 it/sec) -training >> step=6267200, episode=1045 reward=0.7788041 (543.22 it/sec) -training >> step=6267300, episode=1045 reward=0.7829463 (494.16 it/sec) -training >> step=6267400, episode=1045 reward=0.7761091 (486.34 it/sec) -training >> step=6267500, episode=1045 reward=0.7865248 (470.04 it/sec) -training >> step=6267600, episode=1045 reward=0.7928347 (539.96 it/sec) -training >> step=6267700, episode=1045 reward=0.7737589 (536.97 it/sec) -training >> step=6267800, episode=1045 reward=0.7752718 (500.19 it/sec) -training >> step=6267900, episode=1045 reward=0.7780802 (541.35 it/sec) -training >> step=6268000, episode=1045 reward=0.7759345 (503.45 it/sec) -training >> step=6268100, episode=1045 reward=0.7799866 (539.74 it/sec) -training >> step=6268200, episode=1045 reward=0.7560384 (532.67 it/sec) -training >> step=6268300, episode=1045 reward=0.782936 (527.68 it/sec) -training >> step=6268400, episode=1045 reward=0.776787 (529.99 it/sec) -training >> step=6268500, episode=1045 reward=0.7976788 (520.63 it/sec) -training >> step=6268600, episode=1045 reward=0.7775314 (526.11 it/sec) -training >> step=6268700, episode=1045 reward=0.7881723 (535.48 it/sec) -training >> step=6268800, episode=1045 reward=0.8034964 (523.94 it/sec) -training >> step=6268900, episode=1045 reward=0.7933484 (521.23 it/sec) -training >> step=6269000, episode=1045 reward=0.781937 (544.59 it/sec) -training >> step=6269100, episode=1045 reward=0.7825251 (452.87 it/sec) -training >> step=6269200, episode=1045 reward=0.7790115 (480.27 it/sec) -training >> step=6269300, episode=1046 reward=0.7874178 (124.24 it/sec) -training >> step=6269400, episode=1046 reward=0.7719385 (532.78 it/sec) -training >> step=6269500, episode=1046 reward=0.7806062 (519.62 it/sec) -training >> step=6269600, episode=1046 reward=0.7923877 (512.68 it/sec) -training >> step=6269700, episode=1046 reward=0.7810792 (527.84 it/sec) -training >> step=6269800, episode=1046 reward=0.8095004 (527.38 it/sec) -training >> step=6269900, episode=1046 reward=0.7961136 (504.30 it/sec) -training >> step=6270000, episode=1046 reward=0.7848949 (544.80 it/sec) -training >> step=6270100, episode=1046 reward=0.7825079 (519.25 it/sec) -training >> step=6270200, episode=1046 reward=0.7835156 (498.31 it/sec) -training >> step=6270300, episode=1046 reward=0.795541 (532.08 it/sec) -training >> step=6270400, episode=1046 reward=0.7964736 (479.96 it/sec) -training >> step=6270500, episode=1046 reward=0.7810827 (536.87 it/sec) -training >> step=6270600, episode=1046 reward=0.7929108 (492.05 it/sec) -training >> step=6270700, episode=1046 reward=0.7856747 (499.05 it/sec) -training >> step=6270800, episode=1046 reward=0.7993678 (550.54 it/sec) -training >> step=6270900, episode=1046 reward=0.7910959 (484.65 it/sec) -training >> step=6271000, episode=1046 reward=0.7693275 (525.06 it/sec) -training >> step=6271100, episode=1046 reward=0.7852233 (503.93 it/sec) -training >> step=6271200, episode=1046 reward=0.7724755 (533.45 it/sec) -training >> step=6271300, episode=1046 reward=0.7715939 (499.27 it/sec) -training >> step=6271400, episode=1046 reward=0.8031502 (511.48 it/sec) -training >> step=6271500, episode=1046 reward=0.7927798 (501.06 it/sec) -training >> step=6271600, episode=1046 reward=0.7946303 (550.51 it/sec) -training >> step=6271700, episode=1046 reward=0.7893566 (479.53 it/sec) -training >> step=6271800, episode=1046 reward=0.7954911 (477.26 it/sec) -training >> step=6271900, episode=1046 reward=0.8010811 (545.70 it/sec) -training >> step=6272000, episode=1046 reward=0.8039255 (476.34 it/sec) -training >> step=6272100, episode=1046 reward=0.7879241 (517.06 it/sec) -training >> step=6272200, episode=1046 reward=0.7956102 (519.79 it/sec) -training >> step=6272300, episode=1046 reward=0.7863678 (506.35 it/sec) -training >> step=6272400, episode=1046 reward=0.7780954 (485.98 it/sec) -training >> step=6272500, episode=1046 reward=0.793625 (468.54 it/sec) -training >> step=6272600, episode=1046 reward=0.786464 (516.98 it/sec) -training >> step=6272700, episode=1046 reward=0.7975807 (520.25 it/sec) -training >> step=6272800, episode=1046 reward=0.7841006 (498.94 it/sec) -training >> step=6272900, episode=1046 reward=0.785549 (518.45 it/sec) -training >> step=6273000, episode=1046 reward=0.7752023 (485.46 it/sec) -training >> step=6273100, episode=1046 reward=0.8042684 (517.68 it/sec) -training >> step=6273200, episode=1046 reward=0.7894973 (521.12 it/sec) -training >> step=6273300, episode=1046 reward=0.7816787 (536.32 it/sec) -training >> step=6273400, episode=1046 reward=0.8075724 (563.48 it/sec) -training >> step=6273500, episode=1046 reward=0.7980576 (462.39 it/sec) -training >> step=6273600, episode=1046 reward=0.7975022 (525.29 it/sec) -training >> step=6273700, episode=1046 reward=0.7824308 (506.28 it/sec) -training >> step=6273800, episode=1046 reward=0.7946791 (545.27 it/sec) -training >> step=6273900, episode=1046 reward=0.7945443 (503.93 it/sec) -training >> step=6274000, episode=1046 reward=0.7908459 (402.55 it/sec) -training >> step=6274100, episode=1046 reward=0.8092692 (533.82 it/sec) -training >> step=6274200, episode=1046 reward=0.7938028 (509.67 it/sec) -training >> step=6274300, episode=1046 reward=0.7825069 (460.68 it/sec) -training >> step=6274400, episode=1046 reward=0.7718424 (521.85 it/sec) -training >> step=6274500, episode=1046 reward=0.7720362 (515.85 it/sec) -training >> step=6274600, episode=1046 reward=0.7706417 (464.17 it/sec) -training >> step=6274700, episode=1046 reward=0.7771804 (524.09 it/sec) -training >> step=6274800, episode=1046 reward=0.7914143 (482.73 it/sec) -training >> step=6274900, episode=1046 reward=0.7856045 (535.42 it/sec) -training >> step=6275000, episode=1046 reward=0.7868845 (523.97 it/sec) -training >> step=6275100, episode=1046 reward=0.7731656 (474.29 it/sec) -training >> step=6275200, episode=1046 reward=0.8105173 (544.98 it/sec) -training >> step=6275300, episode=1047 reward=0.7839026 (112.92 it/sec) -training >> step=6275400, episode=1047 reward=0.7729254 (511.83 it/sec) -training >> step=6275500, episode=1047 reward=0.789573 (449.85 it/sec) -training >> step=6275600, episode=1047 reward=0.7711033 (506.71 it/sec) -training >> step=6275700, episode=1047 reward=0.7891366 (544.52 it/sec) -training >> step=6275800, episode=1047 reward=0.7889034 (516.30 it/sec) -training >> step=6275900, episode=1047 reward=0.7982256 (526.83 it/sec) -training >> step=6276000, episode=1047 reward=0.803406 (533.84 it/sec) -training >> step=6276100, episode=1047 reward=0.778315 (505.33 it/sec) -training >> step=6276200, episode=1047 reward=0.7711561 (527.94 it/sec) -training >> step=6276300, episode=1047 reward=0.7924884 (502.09 it/sec) -training >> step=6276400, episode=1047 reward=0.7704588 (525.39 it/sec) -training >> step=6276500, episode=1047 reward=0.7875142 (506.76 it/sec) -training >> step=6276600, episode=1047 reward=0.7988425 (493.04 it/sec) -training >> step=6276700, episode=1047 reward=0.7723474 (522.93 it/sec) -training >> step=6276800, episode=1047 reward=0.7822728 (529.62 it/sec) -training >> step=6276900, episode=1047 reward=0.7806888 (518.24 it/sec) -training >> step=6277000, episode=1047 reward=0.7779642 (516.43 it/sec) -training >> step=6277100, episode=1047 reward=0.7753289 (531.84 it/sec) -training >> step=6277200, episode=1047 reward=0.7914522 (509.44 it/sec) -training >> step=6277300, episode=1047 reward=0.7976322 (531.17 it/sec) -training >> step=6277400, episode=1047 reward=0.7911945 (487.42 it/sec) -training >> step=6277500, episode=1047 reward=0.7755511 (517.12 it/sec) -training >> step=6277600, episode=1047 reward=0.7807619 (502.04 it/sec) -training >> step=6277700, episode=1047 reward=0.8012896 (501.02 it/sec) -training >> step=6277800, episode=1047 reward=0.8029962 (510.15 it/sec) -training >> step=6277900, episode=1047 reward=0.7879899 (485.09 it/sec) -training >> step=6278000, episode=1047 reward=0.7958112 (520.61 it/sec) -training >> step=6278100, episode=1047 reward=0.7900976 (487.11 it/sec) -training >> step=6278200, episode=1047 reward=0.8035665 (541.90 it/sec) -training >> step=6278300, episode=1047 reward=0.7853189 (482.68 it/sec) -training >> step=6278400, episode=1047 reward=0.7675158 (469.41 it/sec) -training >> step=6278500, episode=1047 reward=0.7908096 (522.68 it/sec) -training >> step=6278600, episode=1047 reward=0.7907306 (555.99 it/sec) -training >> step=6278700, episode=1047 reward=0.7808084 (465.71 it/sec) -training >> step=6278800, episode=1047 reward=0.7689396 (424.09 it/sec) -training >> step=6278900, episode=1047 reward=0.8032423 (463.33 it/sec) -training >> step=6279000, episode=1047 reward=0.7847797 (534.35 it/sec) -training >> step=6279100, episode=1047 reward=0.7730598 (510.22 it/sec) -training >> step=6279200, episode=1047 reward=0.7855132 (493.46 it/sec) -training >> step=6279300, episode=1047 reward=0.7867316 (548.11 it/sec) -training >> step=6279400, episode=1047 reward=0.776234 (482.97 it/sec) -training >> step=6279500, episode=1047 reward=0.7946416 (496.08 it/sec) -training >> step=6279600, episode=1047 reward=0.7864845 (532.41 it/sec) -training >> step=6279700, episode=1047 reward=0.7741123 (514.05 it/sec) -training >> step=6279800, episode=1047 reward=0.7742718 (557.31 it/sec) -training >> step=6279900, episode=1047 reward=0.7655973 (504.96 it/sec) -training >> step=6280000, episode=1047 reward=0.8062868 (510.20 it/sec) -training >> step=6280100, episode=1047 reward=0.7764997 (485.10 it/sec) -training >> step=6280200, episode=1047 reward=0.7978351 (499.15 it/sec) -training >> step=6280300, episode=1047 reward=0.800241 (480.78 it/sec) -training >> step=6280400, episode=1047 reward=0.7851257 (533.62 it/sec) -training >> step=6280500, episode=1047 reward=0.7776756 (512.15 it/sec) -training >> step=6280600, episode=1047 reward=0.7610466 (499.49 it/sec) -training >> step=6280700, episode=1047 reward=0.7776617 (509.49 it/sec) -training >> step=6280800, episode=1047 reward=0.7841434 (536.53 it/sec) -training >> step=6280900, episode=1047 reward=0.7839271 (517.62 it/sec) -training >> step=6281000, episode=1047 reward=0.7715134 (476.87 it/sec) -training >> step=6281100, episode=1047 reward=0.7902294 (540.11 it/sec) -training >> step=6281200, episode=1047 reward=0.7758884 (525.52 it/sec) -training >> step=6281300, episode=1048 reward=0.7710573 (98.11 it/sec) -training >> step=6281400, episode=1048 reward=0.7679348 (525.13 it/sec) -training >> step=6281500, episode=1048 reward=0.7938637 (516.05 it/sec) -training >> step=6281600, episode=1048 reward=0.7937657 (452.71 it/sec) -training >> step=6281700, episode=1048 reward=0.7955149 (496.56 it/sec) -training >> step=6281800, episode=1048 reward=0.7723992 (502.42 it/sec) -training >> step=6281900, episode=1048 reward=0.8072006 (479.07 it/sec) -training >> step=6282000, episode=1048 reward=0.7904448 (497.96 it/sec) -training >> step=6282100, episode=1048 reward=0.7939665 (520.71 it/sec) -training >> step=6282200, episode=1048 reward=0.7883539 (516.09 it/sec) -training >> step=6282300, episode=1048 reward=0.7863715 (516.02 it/sec) -training >> step=6282400, episode=1048 reward=0.778312 (524.39 it/sec) -training >> step=6282500, episode=1048 reward=0.779419 (527.84 it/sec) -training >> step=6282600, episode=1048 reward=0.7785108 (495.51 it/sec) -training >> step=6282700, episode=1048 reward=0.7835495 (506.37 it/sec) -training >> step=6282800, episode=1048 reward=0.7770237 (535.20 it/sec) -training >> step=6282900, episode=1048 reward=0.7921929 (503.78 it/sec) -training >> step=6283000, episode=1048 reward=0.7863058 (540.50 it/sec) -training >> step=6283100, episode=1048 reward=0.7939119 (545.50 it/sec) -training >> step=6283200, episode=1048 reward=0.7800807 (472.32 it/sec) -training >> step=6283300, episode=1048 reward=0.7803451 (495.91 it/sec) -training >> step=6283400, episode=1048 reward=0.7930954 (482.65 it/sec) -training >> step=6283500, episode=1048 reward=0.7876421 (524.13 it/sec) -training >> step=6283600, episode=1048 reward=0.785439 (520.54 it/sec) -training >> step=6283700, episode=1048 reward=0.7687247 (481.61 it/sec) -training >> step=6283800, episode=1048 reward=0.7882664 (547.66 it/sec) -training >> step=6283900, episode=1048 reward=0.7860711 (521.08 it/sec) -training >> step=6284000, episode=1048 reward=0.780626 (538.31 it/sec) -training >> step=6284100, episode=1048 reward=0.7846965 (537.54 it/sec) -training >> step=6284200, episode=1048 reward=0.7877998 (475.58 it/sec) -training >> step=6284300, episode=1048 reward=0.7907079 (498.55 it/sec) -training >> step=6284400, episode=1048 reward=0.7865197 (478.26 it/sec) -training >> step=6284500, episode=1048 reward=0.7788376 (495.84 it/sec) -training >> step=6284600, episode=1048 reward=0.7778229 (444.59 it/sec) -training >> step=6284700, episode=1048 reward=0.7911754 (446.66 it/sec) -training >> step=6284800, episode=1048 reward=0.7943709 (484.08 it/sec) -training >> step=6284900, episode=1048 reward=0.7920654 (512.33 it/sec) -training >> step=6285000, episode=1048 reward=0.7842507 (534.76 it/sec) -training >> step=6285100, episode=1048 reward=0.7885575 (525.96 it/sec) -training >> step=6285200, episode=1048 reward=0.7945951 (501.09 it/sec) -training >> step=6285300, episode=1048 reward=0.7805212 (518.49 it/sec) -training >> step=6285400, episode=1048 reward=0.7905694 (512.26 it/sec) -training >> step=6285500, episode=1048 reward=0.7910703 (482.55 it/sec) -training >> step=6285600, episode=1048 reward=0.763719 (539.44 it/sec) -training >> step=6285700, episode=1048 reward=0.7966121 (475.79 it/sec) -training >> step=6285800, episode=1048 reward=0.7774452 (503.45 it/sec) -training >> step=6285900, episode=1048 reward=0.7864108 (514.21 it/sec) -training >> step=6286000, episode=1048 reward=0.784703 (540.42 it/sec) -training >> step=6286100, episode=1048 reward=0.7755924 (506.31 it/sec) -training >> step=6286200, episode=1048 reward=0.7958327 (534.37 it/sec) -training >> step=6286300, episode=1048 reward=0.7919106 (481.82 it/sec) -training >> step=6286400, episode=1048 reward=0.7756767 (506.14 it/sec) -training >> step=6286500, episode=1048 reward=0.8000738 (514.92 it/sec) -training >> step=6286600, episode=1048 reward=0.7768755 (519.31 it/sec) -training >> step=6286700, episode=1048 reward=0.7645221 (535.86 it/sec) -training >> step=6286800, episode=1048 reward=0.7841451 (493.90 it/sec) -training >> step=6286900, episode=1048 reward=0.7936357 (535.72 it/sec) -training >> step=6287000, episode=1048 reward=0.7875708 (514.79 it/sec) -training >> step=6287100, episode=1048 reward=0.7853925 (509.30 it/sec) -training >> step=6287200, episode=1048 reward=0.7806319 (512.20 it/sec) -training >> step=6287300, episode=1049 reward=0.784183 (106.78 it/sec) -training >> step=6287400, episode=1049 reward=0.7700995 (530.85 it/sec) -training >> step=6287500, episode=1049 reward=0.7837197 (480.88 it/sec) -training >> step=6287600, episode=1049 reward=0.7901974 (506.56 it/sec) -training >> step=6287700, episode=1049 reward=0.7718535 (495.70 it/sec) -training >> step=6287800, episode=1049 reward=0.7915492 (543.84 it/sec) -training >> step=6287900, episode=1049 reward=0.7943194 (531.62 it/sec) -training >> step=6288000, episode=1049 reward=0.7643697 (475.84 it/sec) -training >> step=6288100, episode=1049 reward=0.7901142 (491.23 it/sec) -training >> step=6288200, episode=1049 reward=0.7802751 (494.36 it/sec) -training >> step=6288300, episode=1049 reward=0.7983755 (509.21 it/sec) -training >> step=6288400, episode=1049 reward=0.8096358 (510.56 it/sec) -training >> step=6288500, episode=1049 reward=0.7977326 (521.32 it/sec) -training >> step=6288600, episode=1049 reward=0.8014424 (487.33 it/sec) -training >> step=6288700, episode=1049 reward=0.778356 (499.49 it/sec) -training >> step=6288800, episode=1049 reward=0.7859191 (514.97 it/sec) -training >> step=6288900, episode=1049 reward=0.794975 (525.37 it/sec) -training >> step=6289000, episode=1049 reward=0.7873603 (492.13 it/sec) -training >> step=6289100, episode=1049 reward=0.7791567 (504.65 it/sec) -training >> step=6289200, episode=1049 reward=0.7805015 (504.50 it/sec) -training >> step=6289300, episode=1049 reward=0.806846 (443.48 it/sec) -training >> step=6289400, episode=1049 reward=0.7860888 (525.48 it/sec) -training >> step=6289500, episode=1049 reward=0.7882628 (497.02 it/sec) -training >> step=6289600, episode=1049 reward=0.7958383 (471.74 it/sec) -training >> step=6289700, episode=1049 reward=0.7796284 (507.94 it/sec) -training >> step=6289800, episode=1049 reward=0.7650232 (489.68 it/sec) -training >> step=6289900, episode=1049 reward=0.8077703 (516.69 it/sec) -training >> step=6290000, episode=1049 reward=0.7676 (526.28 it/sec) -training >> step=6290100, episode=1049 reward=0.7833399 (480.74 it/sec) -training >> step=6290200, episode=1049 reward=0.7861049 (503.25 it/sec) -training >> step=6290300, episode=1049 reward=0.780954 (510.06 it/sec) -training >> step=6290400, episode=1049 reward=0.8035584 (466.46 it/sec) -training >> step=6290500, episode=1049 reward=0.7753418 (532.06 it/sec) -training >> step=6290600, episode=1049 reward=0.7978293 (497.17 it/sec) -training >> step=6290700, episode=1049 reward=0.7921408 (513.23 it/sec) -training >> step=6290800, episode=1049 reward=0.7781256 (483.52 it/sec) -training >> step=6290900, episode=1049 reward=0.8015964 (523.37 it/sec) -training >> step=6291000, episode=1049 reward=0.7811213 (517.06 it/sec) -training >> step=6291100, episode=1049 reward=0.7962906 (477.51 it/sec) -training >> step=6291200, episode=1049 reward=0.7809882 (525.89 it/sec) -training >> step=6291300, episode=1049 reward=0.789453 (473.33 it/sec) -training >> step=6291400, episode=1049 reward=0.8083541 (557.25 it/sec) -training >> step=6291500, episode=1049 reward=0.7727931 (510.94 it/sec) -training >> step=6291600, episode=1049 reward=0.789503 (477.70 it/sec) -training >> step=6291700, episode=1049 reward=0.7793742 (518.58 it/sec) -training >> step=6291800, episode=1049 reward=0.7769585 (497.61 it/sec) -training >> step=6291900, episode=1049 reward=0.7903577 (510.66 it/sec) -training >> step=6292000, episode=1049 reward=0.8158007 (502.22 it/sec) -training >> step=6292100, episode=1049 reward=0.7784186 (485.24 it/sec) -training >> step=6292200, episode=1049 reward=0.7679266 (479.37 it/sec) -training >> step=6292300, episode=1049 reward=0.7905658 (480.77 it/sec) -training >> step=6292400, episode=1049 reward=0.7796074 (527.42 it/sec) -training >> step=6292500, episode=1049 reward=0.7957154 (545.00 it/sec) -training >> step=6292600, episode=1049 reward=0.7913738 (500.18 it/sec) -training >> step=6292700, episode=1049 reward=0.78412 (500.02 it/sec) -training >> step=6292800, episode=1049 reward=0.7721465 (511.34 it/sec) -training >> step=6292900, episode=1049 reward=0.7749486 (392.16 it/sec) -training >> step=6293000, episode=1049 reward=0.7905842 (523.68 it/sec) -training >> step=6293100, episode=1049 reward=0.7870825 (465.09 it/sec) -training >> step=6293200, episode=1049 reward=0.7789525 (528.50 it/sec) -training >> step=6293300, episode=1050 reward=0.7747798 (92.49 it/sec) -training >> step=6293400, episode=1050 reward=0.781168 (488.45 it/sec) -training >> step=6293500, episode=1050 reward=0.7861359 (524.10 it/sec) -training >> step=6293600, episode=1050 reward=0.7927336 (509.63 it/sec) -training >> step=6293700, episode=1050 reward=0.7731807 (508.25 it/sec) -training >> step=6293800, episode=1050 reward=0.7827967 (513.21 it/sec) -training >> step=6293900, episode=1050 reward=0.7816361 (533.27 it/sec) -training >> step=6294000, episode=1050 reward=0.7810205 (501.89 it/sec) -training >> step=6294100, episode=1050 reward=0.7821415 (512.14 it/sec) -training >> step=6294200, episode=1050 reward=0.754235 (505.87 it/sec) -training >> step=6294300, episode=1050 reward=0.7701814 (500.75 it/sec) -training >> step=6294400, episode=1050 reward=0.7938381 (488.56 it/sec) -training >> step=6294500, episode=1050 reward=0.77365 (510.93 it/sec) -training >> step=6294600, episode=1050 reward=0.782016 (496.34 it/sec) -training >> step=6294700, episode=1050 reward=0.7759809 (480.86 it/sec) -training >> step=6294800, episode=1050 reward=0.7619553 (511.73 it/sec) -training >> step=6294900, episode=1050 reward=0.790199 (480.68 it/sec) -training >> step=6295000, episode=1050 reward=0.7913445 (534.60 it/sec) -training >> step=6295100, episode=1050 reward=0.7999936 (527.87 it/sec) -training >> step=6295200, episode=1050 reward=0.8079903 (496.90 it/sec) -training >> step=6295300, episode=1050 reward=0.7919968 (534.37 it/sec) -training >> step=6295400, episode=1050 reward=0.7723387 (516.54 it/sec) -training >> step=6295500, episode=1050 reward=0.7867504 (531.31 it/sec) -training >> step=6295600, episode=1050 reward=0.8006041 (513.35 it/sec) -training >> step=6295700, episode=1050 reward=0.7962678 (509.38 it/sec) -training >> step=6295800, episode=1050 reward=0.7942363 (515.96 it/sec) -training >> step=6295900, episode=1050 reward=0.7794286 (503.34 it/sec) -training >> step=6296000, episode=1050 reward=0.7797785 (482.13 it/sec) -training >> step=6296100, episode=1050 reward=0.7941711 (499.89 it/sec) -training >> step=6296200, episode=1050 reward=0.789031 (513.34 it/sec) -training >> step=6296300, episode=1050 reward=0.8016961 (492.63 it/sec) -training >> step=6296400, episode=1050 reward=0.7744457 (495.21 it/sec) -training >> step=6296500, episode=1050 reward=0.7971142 (529.88 it/sec) -training >> step=6296600, episode=1050 reward=0.7871226 (506.73 it/sec) -training >> step=6296700, episode=1050 reward=0.8019305 (495.64 it/sec) -training >> step=6296800, episode=1050 reward=0.7869583 (499.12 it/sec) -training >> step=6296900, episode=1050 reward=0.7751718 (496.08 it/sec) -training >> step=6297000, episode=1050 reward=0.7899578 (480.78 it/sec) -training >> step=6297100, episode=1050 reward=0.7920387 (500.64 it/sec) -training >> step=6297200, episode=1050 reward=0.7871236 (508.06 it/sec) -training >> step=6297300, episode=1050 reward=0.7859306 (499.68 it/sec) -training >> step=6297400, episode=1050 reward=0.7836167 (508.00 it/sec) -training >> step=6297500, episode=1050 reward=0.7993537 (541.31 it/sec) -training >> step=6297600, episode=1050 reward=0.7925653 (513.86 it/sec) -training >> step=6297700, episode=1050 reward=0.7806349 (502.52 it/sec) -training >> step=6297800, episode=1050 reward=0.766368 (489.33 it/sec) -training >> step=6297900, episode=1050 reward=0.7993031 (502.53 it/sec) -training >> step=6298000, episode=1050 reward=0.7797114 (510.58 it/sec) -training >> step=6298100, episode=1050 reward=0.7731913 (519.03 it/sec) -training >> step=6298200, episode=1050 reward=0.7895877 (519.54 it/sec) -training >> step=6298300, episode=1050 reward=0.7796077 (492.10 it/sec) -training >> step=6298400, episode=1050 reward=0.7674324 (481.09 it/sec) -training >> step=6298500, episode=1050 reward=0.7974457 (497.96 it/sec) -training >> step=6298600, episode=1050 reward=0.7916341 (555.01 it/sec) -training >> step=6298700, episode=1050 reward=0.7868664 (504.36 it/sec) -training >> step=6298800, episode=1050 reward=0.7766052 (464.47 it/sec) -training >> step=6298900, episode=1050 reward=0.7964788 (514.20 it/sec) -training >> step=6299000, episode=1050 reward=0.778947 (505.27 it/sec) -training >> step=6299100, episode=1050 reward=0.7848446 (513.03 it/sec) -training >> step=6299200, episode=1050 reward=0.7939913 (513.92 it/sec) -training >> step=6299300, episode=1051 reward=0.7799082 (96.94 it/sec) -training >> step=6299400, episode=1051 reward=0.7971796 (503.23 it/sec) -training >> step=6299500, episode=1051 reward=0.7744868 (494.58 it/sec) -training >> step=6299600, episode=1051 reward=0.7685805 (529.52 it/sec) -training >> step=6299700, episode=1051 reward=0.7713689 (477.71 it/sec) -training >> step=6299800, episode=1051 reward=0.7803376 (504.55 it/sec) -training >> step=6299900, episode=1051 reward=0.7800308 (540.59 it/sec) -training >> step=6300000, episode=1051 reward=0.7764331 (499.76 it/sec) -training >> step=6300100, episode=1051 reward=0.7839391 (492.42 it/sec) -training >> step=6300200, episode=1051 reward=0.7743599 (495.00 it/sec) -training >> step=6300300, episode=1051 reward=0.761658 (512.90 it/sec) -training >> step=6300400, episode=1051 reward=0.8062716 (512.27 it/sec) -training >> step=6300500, episode=1051 reward=0.7809837 (478.95 it/sec) -training >> step=6300600, episode=1051 reward=0.8066738 (511.90 it/sec) -training >> step=6300700, episode=1051 reward=0.7805701 (472.95 it/sec) -training >> step=6300800, episode=1051 reward=0.7915237 (495.87 it/sec) -training >> step=6300900, episode=1051 reward=0.7789341 (543.27 it/sec) -training >> step=6301000, episode=1051 reward=0.7893158 (538.94 it/sec) -training >> step=6301100, episode=1051 reward=0.7871592 (488.00 it/sec) -training >> step=6301200, episode=1051 reward=0.7755613 (514.00 it/sec) -training >> step=6301300, episode=1051 reward=0.7975259 (516.97 it/sec) -training >> step=6301400, episode=1051 reward=0.789727 (519.27 it/sec) -training >> step=6301500, episode=1051 reward=0.8001906 (540.61 it/sec) -training >> step=6301600, episode=1051 reward=0.80553 (489.69 it/sec) -training >> step=6301700, episode=1051 reward=0.7792683 (511.68 it/sec) -training >> step=6301800, episode=1051 reward=0.7866731 (531.49 it/sec) -training >> step=6301900, episode=1051 reward=0.7775562 (515.62 it/sec) -training >> step=6302000, episode=1051 reward=0.7838427 (511.98 it/sec) -training >> step=6302100, episode=1051 reward=0.7929305 (516.31 it/sec) -training >> step=6302200, episode=1051 reward=0.7842931 (509.91 it/sec) -training >> step=6302300, episode=1051 reward=0.7878356 (546.04 it/sec) -training >> step=6302400, episode=1051 reward=0.7894818 (535.84 it/sec) -training >> step=6302500, episode=1051 reward=0.7828457 (492.95 it/sec) -training >> step=6302600, episode=1051 reward=0.7815084 (471.09 it/sec) -training >> step=6302700, episode=1051 reward=0.7761368 (548.06 it/sec) -training >> step=6302800, episode=1051 reward=0.7794236 (533.64 it/sec) -training >> step=6302900, episode=1051 reward=0.7903928 (517.61 it/sec) -training >> step=6303000, episode=1051 reward=0.7865262 (518.96 it/sec) -training >> step=6303100, episode=1051 reward=0.7842507 (532.43 it/sec) -training >> step=6303200, episode=1051 reward=0.7857635 (492.72 it/sec) -training >> step=6303300, episode=1051 reward=0.7969701 (493.82 it/sec) -training >> step=6303400, episode=1051 reward=0.8066297 (523.36 it/sec) -training >> step=6303500, episode=1051 reward=0.7927029 (557.61 it/sec) -training >> step=6303600, episode=1051 reward=0.7945457 (517.19 it/sec) -training >> step=6303700, episode=1051 reward=0.7818248 (430.03 it/sec) -training >> step=6303800, episode=1051 reward=0.7795705 (458.33 it/sec) -training >> step=6303900, episode=1051 reward=0.7705835 (471.86 it/sec) -training >> step=6304000, episode=1051 reward=0.7854598 (479.46 it/sec) -training >> step=6304100, episode=1051 reward=0.7933788 (416.66 it/sec) -training >> step=6304200, episode=1051 reward=0.777261 (398.43 it/sec) -training >> step=6304300, episode=1051 reward=0.7615125 (407.99 it/sec) -training >> step=6304400, episode=1051 reward=0.7623975 (464.01 it/sec) -training >> step=6304500, episode=1051 reward=0.7637374 (479.03 it/sec) -training >> step=6304600, episode=1051 reward=0.7938305 (501.57 it/sec) -training >> step=6304700, episode=1051 reward=0.7765093 (457.83 it/sec) -training >> step=6304800, episode=1051 reward=0.7758248 (456.25 it/sec) -training >> step=6304900, episode=1051 reward=0.7923136 (498.37 it/sec) -training >> step=6305000, episode=1051 reward=0.7745605 (499.77 it/sec) -training >> step=6305100, episode=1051 reward=0.7691751 (481.58 it/sec) -training >> step=6305200, episode=1051 reward=0.8023821 (480.04 it/sec) -training >> step=6305300, episode=1052 reward=0.7896224 (136.57 it/sec) -training >> step=6305400, episode=1052 reward=0.793146 (487.53 it/sec) -training >> step=6305500, episode=1052 reward=0.7836455 (513.54 it/sec) -training >> step=6305600, episode=1052 reward=0.7734992 (550.52 it/sec) -training >> step=6305700, episode=1052 reward=0.7864708 (562.15 it/sec) -training >> step=6305800, episode=1052 reward=0.8117796 (536.90 it/sec) -training >> step=6305900, episode=1052 reward=0.7942191 (496.51 it/sec) -training >> step=6306000, episode=1052 reward=0.7669403 (496.23 it/sec) -training >> step=6306100, episode=1052 reward=0.7877257 (505.81 it/sec) -training >> step=6306200, episode=1052 reward=0.7958801 (515.98 it/sec) -training >> step=6306300, episode=1052 reward=0.792442 (516.95 it/sec) -training >> step=6306400, episode=1052 reward=0.7690977 (496.66 it/sec) -training >> step=6306500, episode=1052 reward=0.7809775 (499.90 it/sec) -training >> step=6306600, episode=1052 reward=0.8030614 (489.11 it/sec) -training >> step=6306700, episode=1052 reward=0.7880421 (466.14 it/sec) -training >> step=6306800, episode=1052 reward=0.7877606 (481.19 it/sec) -training >> step=6306900, episode=1052 reward=0.8082649 (449.10 it/sec) -training >> step=6307000, episode=1052 reward=0.7836212 (410.40 it/sec) -training >> step=6307100, episode=1052 reward=0.7688074 (362.32 it/sec) -training >> step=6307200, episode=1052 reward=0.7866489 (430.04 it/sec) -training >> step=6307300, episode=1052 reward=0.7896768 (465.26 it/sec) -training >> step=6307400, episode=1052 reward=0.7776278 (478.16 it/sec) -training >> step=6307500, episode=1052 reward=0.7728904 (507.24 it/sec) -training >> step=6307600, episode=1052 reward=0.8062919 (492.53 it/sec) -training >> step=6307700, episode=1052 reward=0.7854762 (494.24 it/sec) -training >> step=6307800, episode=1052 reward=0.7932942 (453.37 it/sec) -training >> step=6307900, episode=1052 reward=0.7729766 (358.88 it/sec) -training >> step=6308000, episode=1052 reward=0.7938721 (431.62 it/sec) -training >> step=6308100, episode=1052 reward=0.7910535 (475.43 it/sec) -training >> step=6308200, episode=1052 reward=0.7893946 (492.74 it/sec) -training >> step=6308300, episode=1052 reward=0.7809744 (463.48 it/sec) -training >> step=6308400, episode=1052 reward=0.7949746 (447.45 it/sec) -training >> step=6308500, episode=1052 reward=0.7840084 (467.43 it/sec) -training >> step=6308600, episode=1052 reward=0.805586 (519.00 it/sec) -training >> step=6308700, episode=1052 reward=0.798229 (482.00 it/sec) -training >> step=6308800, episode=1052 reward=0.799455 (503.99 it/sec) -training >> step=6308900, episode=1052 reward=0.7890176 (496.97 it/sec) -training >> step=6309000, episode=1052 reward=0.7753947 (479.09 it/sec) -training >> step=6309100, episode=1052 reward=0.7822461 (481.21 it/sec) -training >> step=6309200, episode=1052 reward=0.7724143 (519.59 it/sec) -training >> step=6309300, episode=1052 reward=0.793232 (518.92 it/sec) -training >> step=6309400, episode=1052 reward=0.7847112 (467.32 it/sec) -training >> step=6309500, episode=1052 reward=0.7796925 (507.50 it/sec) -training >> step=6309600, episode=1052 reward=0.7841437 (474.79 it/sec) -training >> step=6309700, episode=1052 reward=0.7768185 (499.31 it/sec) -training >> step=6309800, episode=1052 reward=0.7759463 (491.91 it/sec) -training >> step=6309900, episode=1052 reward=0.7788877 (486.13 it/sec) -training >> step=6310000, episode=1052 reward=0.7916245 (494.49 it/sec) -training >> step=6310100, episode=1052 reward=0.7791306 (531.25 it/sec) -training >> step=6310200, episode=1052 reward=0.7942876 (516.03 it/sec) -training >> step=6310300, episode=1052 reward=0.7847517 (467.09 it/sec) -training >> step=6310400, episode=1052 reward=0.7931758 (466.57 it/sec) -training >> step=6310500, episode=1052 reward=0.7817737 (499.47 it/sec) -training >> step=6310600, episode=1052 reward=0.7736862 (456.44 it/sec) -training >> step=6310700, episode=1052 reward=0.8152573 (483.80 it/sec) -training >> step=6310800, episode=1052 reward=0.776181 (461.13 it/sec) -training >> step=6310900, episode=1052 reward=0.7955883 (424.92 it/sec) -training >> step=6311000, episode=1052 reward=0.7776998 (472.82 it/sec) -training >> step=6311100, episode=1052 reward=0.7731115 (442.93 it/sec) -training >> step=6311200, episode=1052 reward=0.7928414 (387.64 it/sec) -training >> step=6311300, episode=1053 reward=0.8025797 (101.15 it/sec) -training >> step=6311400, episode=1053 reward=0.7607945 (451.49 it/sec) -training >> step=6311500, episode=1053 reward=0.7732238 (420.29 it/sec) -training >> step=6311600, episode=1053 reward=0.7827759 (468.06 it/sec) -training >> step=6311700, episode=1053 reward=0.773896 (487.75 it/sec) -training >> step=6311800, episode=1053 reward=0.8052358 (462.27 it/sec) -training >> step=6311900, episode=1053 reward=0.7782485 (463.65 it/sec) -training >> step=6312000, episode=1053 reward=0.7770167 (417.58 it/sec) -training >> step=6312100, episode=1053 reward=0.7819478 (444.98 it/sec) -training >> step=6312200, episode=1053 reward=0.783119 (489.83 it/sec) -training >> step=6312300, episode=1053 reward=0.780208 (433.67 it/sec) -training >> step=6312400, episode=1053 reward=0.7918368 (457.76 it/sec) -training >> step=6312500, episode=1053 reward=0.7808951 (480.33 it/sec) -training >> step=6312600, episode=1053 reward=0.7673837 (436.83 it/sec) -training >> step=6312700, episode=1053 reward=0.7880746 (435.49 it/sec) -training >> step=6312800, episode=1053 reward=0.8003634 (450.84 it/sec) -training >> step=6312900, episode=1053 reward=0.782068 (507.10 it/sec) -training >> step=6313000, episode=1053 reward=0.7812775 (424.82 it/sec) -training >> step=6313100, episode=1053 reward=0.7893004 (466.05 it/sec) -training >> step=6313200, episode=1053 reward=0.7899339 (444.11 it/sec) -training >> step=6313300, episode=1053 reward=0.7879565 (450.85 it/sec) -training >> step=6313400, episode=1053 reward=0.775506 (438.79 it/sec) -training >> step=6313500, episode=1053 reward=0.7868608 (461.15 it/sec) -training >> step=6313600, episode=1053 reward=0.7891842 (462.41 it/sec) -training >> step=6313700, episode=1053 reward=0.7839217 (407.31 it/sec) -training >> step=6313800, episode=1053 reward=0.7803646 (448.81 it/sec) -training >> step=6313900, episode=1053 reward=0.790418 (494.85 it/sec) -training >> step=6314000, episode=1053 reward=0.7794314 (468.60 it/sec) -training >> step=6314100, episode=1053 reward=0.8048311 (484.24 it/sec) -training >> step=6314200, episode=1053 reward=0.7990377 (445.13 it/sec) -training >> step=6314300, episode=1053 reward=0.7805439 (474.83 it/sec) -training >> step=6314400, episode=1053 reward=0.7979295 (436.97 it/sec) -training >> step=6314500, episode=1053 reward=0.8084846 (420.13 it/sec) -training >> step=6314600, episode=1053 reward=0.7889648 (489.07 it/sec) -training >> step=6314700, episode=1053 reward=0.7875799 (516.63 it/sec) -training >> step=6314800, episode=1053 reward=0.7632977 (505.83 it/sec) -training >> step=6314900, episode=1053 reward=0.7656348 (510.10 it/sec) -training >> step=6315000, episode=1053 reward=0.7878792 (517.65 it/sec) -training >> step=6315100, episode=1053 reward=0.7890534 (490.57 it/sec) -training >> step=6315200, episode=1053 reward=0.7989617 (487.05 it/sec) -training >> step=6315300, episode=1053 reward=0.7631029 (497.93 it/sec) -training >> step=6315400, episode=1053 reward=0.7845055 (529.31 it/sec) -training >> step=6315500, episode=1053 reward=0.7909617 (491.15 it/sec) -training >> step=6315600, episode=1053 reward=0.7850788 (518.26 it/sec) -training >> step=6315700, episode=1053 reward=0.7787065 (477.24 it/sec) -training >> step=6315800, episode=1053 reward=0.7815537 (489.16 it/sec) -training >> step=6315900, episode=1053 reward=0.7879681 (515.83 it/sec) -training >> step=6316000, episode=1053 reward=0.7733994 (506.58 it/sec) -training >> step=6316100, episode=1053 reward=0.7844074 (541.96 it/sec) -training >> step=6316200, episode=1053 reward=0.7811022 (470.86 it/sec) -training >> step=6316300, episode=1053 reward=0.7873952 (477.02 it/sec) -training >> step=6316400, episode=1053 reward=0.7803828 (535.06 it/sec) -training >> step=6316500, episode=1053 reward=0.7738784 (528.21 it/sec) -training >> step=6316600, episode=1053 reward=0.7736841 (511.29 it/sec) -training >> step=6316700, episode=1053 reward=0.7799929 (500.05 it/sec) -training >> step=6316800, episode=1053 reward=0.7978106 (515.42 it/sec) -training >> step=6316900, episode=1053 reward=0.7838697 (483.17 it/sec) -training >> step=6317000, episode=1053 reward=0.7919064 (506.90 it/sec) -training >> step=6317100, episode=1053 reward=0.7826052 (477.91 it/sec) -training >> step=6317200, episode=1053 reward=0.7968442 (506.40 it/sec) -training >> step=6317300, episode=1054 reward=0.7738471 (58.99 it/sec) -training >> step=6317400, episode=1054 reward=0.7870307 (521.39 it/sec) -training >> step=6317500, episode=1054 reward=0.7912644 (482.82 it/sec) -training >> step=6317600, episode=1054 reward=0.7959898 (456.61 it/sec) -training >> step=6317700, episode=1054 reward=0.800993 (554.36 it/sec) -training >> step=6317800, episode=1054 reward=0.7971981 (474.79 it/sec) -training >> step=6317900, episode=1054 reward=0.7832181 (501.72 it/sec) -training >> step=6318000, episode=1054 reward=0.7823609 (510.80 it/sec) -training >> step=6318100, episode=1054 reward=0.7852448 (469.42 it/sec) -training >> step=6318200, episode=1054 reward=0.7738312 (477.82 it/sec) -training >> step=6318300, episode=1054 reward=0.7838732 (464.79 it/sec) -training >> step=6318400, episode=1054 reward=0.8022597 (537.62 it/sec) -training >> step=6318500, episode=1054 reward=0.7897857 (491.42 it/sec) -training >> step=6318600, episode=1054 reward=0.7907555 (514.98 it/sec) -training >> step=6318700, episode=1054 reward=0.7752893 (546.95 it/sec) -training >> step=6318800, episode=1054 reward=0.7727904 (488.58 it/sec) -training >> step=6318900, episode=1054 reward=0.7812871 (529.79 it/sec) -training >> step=6319000, episode=1054 reward=0.7950343 (525.35 it/sec) -training >> step=6319100, episode=1054 reward=0.776893 (538.24 it/sec) -training >> step=6319200, episode=1054 reward=0.7880356 (518.93 it/sec) -training >> step=6319300, episode=1054 reward=0.7909096 (505.85 it/sec) -training >> step=6319400, episode=1054 reward=0.7971996 (520.48 it/sec) -training >> step=6319500, episode=1054 reward=0.7886299 (495.98 it/sec) -training >> step=6319600, episode=1054 reward=0.7832345 (470.20 it/sec) -training >> step=6319700, episode=1054 reward=0.810641 (517.19 it/sec) -training >> step=6319800, episode=1054 reward=0.7905596 (462.72 it/sec) -training >> step=6319900, episode=1054 reward=0.7763263 (474.76 it/sec) -training >> step=6320000, episode=1054 reward=0.8044643 (493.37 it/sec) -training >> step=6320100, episode=1054 reward=0.7728659 (490.25 it/sec) -training >> step=6320200, episode=1054 reward=0.7972124 (524.52 it/sec) -training >> step=6320300, episode=1054 reward=0.781483 (478.46 it/sec) -training >> step=6320400, episode=1054 reward=0.7761953 (470.70 it/sec) -training >> step=6320500, episode=1054 reward=0.7648074 (484.03 it/sec) -training >> step=6320600, episode=1054 reward=0.7839897 (475.38 it/sec) -training >> step=6320700, episode=1054 reward=0.7719302 (501.75 it/sec) -training >> step=6320800, episode=1054 reward=0.7842606 (498.53 it/sec) -training >> step=6320900, episode=1054 reward=0.780494 (482.66 it/sec) -training >> step=6321000, episode=1054 reward=0.7808929 (455.27 it/sec) -training >> step=6321100, episode=1054 reward=0.772721 (471.20 it/sec) -training >> step=6321200, episode=1054 reward=0.7601553 (503.18 it/sec) -training >> step=6321300, episode=1054 reward=0.7948906 (472.48 it/sec) -training >> step=6321400, episode=1054 reward=0.7617793 (521.97 it/sec) -training >> step=6321500, episode=1054 reward=0.7943094 (481.60 it/sec) -training >> step=6321600, episode=1054 reward=0.8004415 (510.85 it/sec) -training >> step=6321700, episode=1054 reward=0.7764823 (490.71 it/sec) -training >> step=6321800, episode=1054 reward=0.7861626 (480.23 it/sec) -training >> step=6321900, episode=1054 reward=0.7744136 (507.93 it/sec) -training >> step=6322000, episode=1054 reward=0.7816279 (517.20 it/sec) -training >> step=6322100, episode=1054 reward=0.7866853 (483.52 it/sec) -training >> step=6322200, episode=1054 reward=0.7929305 (466.24 it/sec) -training >> step=6322300, episode=1054 reward=0.7909622 (506.48 it/sec) -training >> step=6322400, episode=1054 reward=0.7781183 (525.69 it/sec) -training >> step=6322500, episode=1054 reward=0.7676536 (513.63 it/sec) -training >> step=6322600, episode=1054 reward=0.7819582 (487.44 it/sec) -training >> step=6322700, episode=1054 reward=0.7873772 (491.91 it/sec) -training >> step=6322800, episode=1054 reward=0.772914 (481.63 it/sec) -training >> step=6322900, episode=1054 reward=0.7868094 (524.54 it/sec) -training >> step=6323000, episode=1054 reward=0.8112397 (511.17 it/sec) -training >> step=6323100, episode=1054 reward=0.7852677 (468.74 it/sec) -training >> step=6323200, episode=1054 reward=0.7845699 (439.84 it/sec) -training >> step=6323300, episode=1055 reward=0.7822701 (48.70 it/sec) -training >> step=6323400, episode=1055 reward=0.7858427 (487.59 it/sec) -training >> step=6323500, episode=1055 reward=0.7792912 (516.27 it/sec) -training >> step=6323600, episode=1055 reward=0.7699802 (529.22 it/sec) -training >> step=6323700, episode=1055 reward=0.7710796 (476.71 it/sec) -training >> step=6323800, episode=1055 reward=0.7631207 (458.20 it/sec) -training >> step=6323900, episode=1055 reward=0.7691775 (460.26 it/sec) -training >> step=6324000, episode=1055 reward=0.7640108 (502.34 it/sec) -training >> step=6324100, episode=1055 reward=0.7672774 (491.25 it/sec) -training >> step=6324200, episode=1055 reward=0.8056626 (435.36 it/sec) -training >> step=6324300, episode=1055 reward=0.7811087 (476.70 it/sec) -training >> step=6324400, episode=1055 reward=0.789219 (485.47 it/sec) -training >> step=6324500, episode=1055 reward=0.7976692 (436.56 it/sec) -training >> step=6324600, episode=1055 reward=0.7687215 (485.36 it/sec) -training >> step=6324700, episode=1055 reward=0.7569063 (470.48 it/sec) -training >> step=6324800, episode=1055 reward=0.7844352 (511.70 it/sec) -training >> step=6324900, episode=1055 reward=0.7856851 (491.30 it/sec) -training >> step=6325000, episode=1055 reward=0.7870722 (479.94 it/sec) -training >> step=6325100, episode=1055 reward=0.7895845 (502.99 it/sec) -training >> step=6325200, episode=1055 reward=0.7700735 (480.97 it/sec) -training >> step=6325300, episode=1055 reward=0.8059607 (465.33 it/sec) -training >> step=6325400, episode=1055 reward=0.7842784 (495.52 it/sec) -training >> step=6325500, episode=1055 reward=0.7816707 (518.60 it/sec) -training >> step=6325600, episode=1055 reward=0.8085893 (478.85 it/sec) -training >> step=6325700, episode=1055 reward=0.800861 (413.91 it/sec) -training >> step=6325800, episode=1055 reward=0.7728045 (506.51 it/sec) -training >> step=6325900, episode=1055 reward=0.7716897 (478.43 it/sec) -training >> step=6326000, episode=1055 reward=0.7951292 (436.18 it/sec) -training >> step=6326100, episode=1055 reward=0.7902753 (486.01 it/sec) -training >> step=6326200, episode=1055 reward=0.7887795 (497.38 it/sec) -training >> step=6326300, episode=1055 reward=0.7894304 (496.03 it/sec) -training >> step=6326400, episode=1055 reward=0.788947 (502.67 it/sec) -training >> step=6326500, episode=1055 reward=0.7938387 (481.47 it/sec) -training >> step=6326600, episode=1055 reward=0.8030708 (502.86 it/sec) -training >> step=6326700, episode=1055 reward=0.7991801 (441.46 it/sec) -training >> step=6326800, episode=1055 reward=0.7904342 (448.74 it/sec) -training >> step=6326900, episode=1055 reward=0.7901604 (443.81 it/sec) -training >> step=6327000, episode=1055 reward=0.7940398 (479.03 it/sec) -training >> step=6327100, episode=1055 reward=0.783215 (474.64 it/sec) -training >> step=6327200, episode=1055 reward=0.7945645 (475.84 it/sec) -training >> step=6327300, episode=1055 reward=0.7959141 (475.59 it/sec) -training >> step=6327400, episode=1055 reward=0.7899873 (481.48 it/sec) -training >> step=6327500, episode=1055 reward=0.7921527 (497.06 it/sec) -training >> step=6327600, episode=1055 reward=0.7960885 (453.47 it/sec) -training >> step=6327700, episode=1055 reward=0.7693462 (486.68 it/sec) -training >> step=6327800, episode=1055 reward=0.7823963 (477.35 it/sec) -training >> step=6327900, episode=1055 reward=0.7810975 (401.93 it/sec) -training >> step=6328000, episode=1055 reward=0.7789959 (505.50 it/sec) -training >> step=6328100, episode=1055 reward=0.7837375 (474.67 it/sec) -training >> step=6328200, episode=1055 reward=0.7979052 (476.65 it/sec) -training >> step=6328300, episode=1055 reward=0.7747034 (476.22 it/sec) -training >> step=6328400, episode=1055 reward=0.786124 (481.59 it/sec) -training >> step=6328500, episode=1055 reward=0.7697377 (422.69 it/sec) -training >> step=6328600, episode=1055 reward=0.7917482 (410.57 it/sec) -training >> step=6328700, episode=1055 reward=0.7928151 (500.11 it/sec) -training >> step=6328800, episode=1055 reward=0.7870377 (479.55 it/sec) -training >> step=6328900, episode=1055 reward=0.7884567 (500.71 it/sec) -training >> step=6329000, episode=1055 reward=0.7742251 (487.74 it/sec) -training >> step=6329100, episode=1055 reward=0.7885244 (499.94 it/sec) -training >> step=6329200, episode=1055 reward=0.7860867 (489.34 it/sec) -training >> step=6329300, episode=1056 reward=0.7903179 (62.70 it/sec) -training >> step=6329400, episode=1056 reward=0.7727387 (513.63 it/sec) -training >> step=6329500, episode=1056 reward=0.7761266 (458.26 it/sec) -training >> step=6329600, episode=1056 reward=0.7826371 (516.67 it/sec) -training >> step=6329700, episode=1056 reward=0.7724002 (490.14 it/sec) -training >> step=6329800, episode=1056 reward=0.8058479 (488.55 it/sec) -training >> step=6329900, episode=1056 reward=0.7936826 (515.38 it/sec) -training >> step=6330000, episode=1056 reward=0.7860509 (506.73 it/sec) -training >> step=6330100, episode=1056 reward=0.7573652 (452.30 it/sec) -training >> step=6330200, episode=1056 reward=0.783292 (463.90 it/sec) -training >> step=6330300, episode=1056 reward=0.8010145 (491.14 it/sec) -training >> step=6330400, episode=1056 reward=0.8007534 (471.55 it/sec) -training >> step=6330500, episode=1056 reward=0.7891182 (465.78 it/sec) -training >> step=6330600, episode=1056 reward=0.797646 (519.52 it/sec) -training >> step=6330700, episode=1056 reward=0.7939466 (499.17 it/sec) -training >> step=6330800, episode=1056 reward=0.7789441 (474.43 it/sec) -training >> step=6330900, episode=1056 reward=0.79662 (532.84 it/sec) -training >> step=6331000, episode=1056 reward=0.7900919 (470.60 it/sec) -training >> step=6331100, episode=1056 reward=0.7740195 (468.45 it/sec) -training >> step=6331200, episode=1056 reward=0.7690164 (523.84 it/sec) -training >> step=6331300, episode=1056 reward=0.7803688 (518.22 it/sec) -training >> step=6331400, episode=1056 reward=0.7818942 (489.91 it/sec) -training >> step=6331500, episode=1056 reward=0.7836127 (471.06 it/sec) -training >> step=6331600, episode=1056 reward=0.7843636 (537.58 it/sec) -training >> step=6331700, episode=1056 reward=0.7768733 (496.26 it/sec) -training >> step=6331800, episode=1056 reward=0.790481 (501.52 it/sec) -training >> step=6331900, episode=1056 reward=0.791854 (477.32 it/sec) -training >> step=6332000, episode=1056 reward=0.7926014 (485.69 it/sec) -training >> step=6332100, episode=1056 reward=0.7915261 (503.88 it/sec) -training >> step=6332200, episode=1056 reward=0.7640579 (469.60 it/sec) -training >> step=6332300, episode=1056 reward=0.7955992 (505.31 it/sec) -training >> step=6332400, episode=1056 reward=0.8026797 (445.91 it/sec) -training >> step=6332500, episode=1056 reward=0.7990869 (498.82 it/sec) -training >> step=6332600, episode=1056 reward=0.8076453 (494.90 it/sec) -training >> step=6332700, episode=1056 reward=0.7872478 (518.89 it/sec) -training >> step=6332800, episode=1056 reward=0.784038 (499.56 it/sec) -training >> step=6332900, episode=1056 reward=0.778843 (512.16 it/sec) -training >> step=6333000, episode=1056 reward=0.7776279 (497.43 it/sec) -training >> step=6333100, episode=1056 reward=0.7953525 (525.34 it/sec) -training >> step=6333200, episode=1056 reward=0.7845341 (532.24 it/sec) -training >> step=6333300, episode=1056 reward=0.7910064 (498.82 it/sec) -training >> step=6333400, episode=1056 reward=0.7682175 (541.96 it/sec) -training >> step=6333500, episode=1056 reward=0.7847374 (411.59 it/sec) -training >> step=6333600, episode=1056 reward=0.7841823 (521.22 it/sec) -training >> step=6333700, episode=1056 reward=0.7965415 (506.93 it/sec) -training >> step=6333800, episode=1056 reward=0.789242 (563.76 it/sec) -training >> step=6333900, episode=1056 reward=0.7872666 (504.14 it/sec) -training >> step=6334000, episode=1056 reward=0.778843 (513.97 it/sec) -training >> step=6334100, episode=1056 reward=0.757633 (527.81 it/sec) -training >> step=6334200, episode=1056 reward=0.7816603 (480.45 it/sec) -training >> step=6334300, episode=1056 reward=0.7767865 (522.19 it/sec) -training >> step=6334400, episode=1056 reward=0.7792259 (502.81 it/sec) -training >> step=6334500, episode=1056 reward=0.7813933 (527.76 it/sec) -training >> step=6334600, episode=1056 reward=0.7828524 (518.54 it/sec) -training >> step=6334700, episode=1056 reward=0.7998509 (517.37 it/sec) -training >> step=6334800, episode=1056 reward=0.7815139 (515.49 it/sec) -training >> step=6334900, episode=1056 reward=0.8083684 (533.35 it/sec) -training >> step=6335000, episode=1056 reward=0.7790318 (531.97 it/sec) -training >> step=6335100, episode=1056 reward=0.7888995 (496.57 it/sec) -training >> step=6335200, episode=1056 reward=0.774545 (551.87 it/sec) -training >> step=6335300, episode=1057 reward=0.7841844 (106.41 it/sec) -training >> step=6335400, episode=1057 reward=0.7927045 (540.71 it/sec) -training >> step=6335500, episode=1057 reward=0.792335 (498.57 it/sec) -training >> step=6335600, episode=1057 reward=0.7971151 (534.18 it/sec) -training >> step=6335700, episode=1057 reward=0.7907125 (475.45 it/sec) -training >> step=6335800, episode=1057 reward=0.7894619 (479.28 it/sec) -training >> step=6335900, episode=1057 reward=0.7680248 (525.64 it/sec) -training >> step=6336000, episode=1057 reward=0.7820734 (518.04 it/sec) -training >> step=6336100, episode=1057 reward=0.7914648 (420.26 it/sec) -training >> step=6336200, episode=1057 reward=0.7984913 (519.18 it/sec) -training >> step=6336300, episode=1057 reward=0.7807513 (512.97 it/sec) -training >> step=6336400, episode=1057 reward=0.7925143 (515.19 it/sec) -training >> step=6336500, episode=1057 reward=0.7939745 (518.59 it/sec) -training >> step=6336600, episode=1057 reward=0.78302 (540.29 it/sec) -training >> step=6336700, episode=1057 reward=0.8127192 (513.43 it/sec) -training >> step=6336800, episode=1057 reward=0.7940911 (483.07 it/sec) -training >> step=6336900, episode=1057 reward=0.7885914 (503.56 it/sec) -training >> step=6337000, episode=1057 reward=0.8020712 (538.32 it/sec) -training >> step=6337100, episode=1057 reward=0.7964846 (477.46 it/sec) -training >> step=6337200, episode=1057 reward=0.7994198 (546.27 it/sec) -training >> step=6337300, episode=1057 reward=0.7887918 (476.59 it/sec) -training >> step=6337400, episode=1057 reward=0.7871165 (495.30 it/sec) -training >> step=6337500, episode=1057 reward=0.7722453 (482.56 it/sec) -training >> step=6337600, episode=1057 reward=0.7928627 (455.66 it/sec) -training >> step=6337700, episode=1057 reward=0.7863944 (522.17 it/sec) -training >> step=6337800, episode=1057 reward=0.8089845 (504.97 it/sec) -training >> step=6337900, episode=1057 reward=0.7969871 (478.67 it/sec) -training >> step=6338000, episode=1057 reward=0.7690527 (492.57 it/sec) -training >> step=6338100, episode=1057 reward=0.7893043 (527.37 it/sec) -training >> step=6338200, episode=1057 reward=0.7774454 (452.49 it/sec) -training >> step=6338300, episode=1057 reward=0.8028592 (482.61 it/sec) -training >> step=6338400, episode=1057 reward=0.7791043 (486.17 it/sec) -training >> step=6338500, episode=1057 reward=0.7817852 (538.86 it/sec) -training >> step=6338600, episode=1057 reward=0.8067335 (508.68 it/sec) -training >> step=6338700, episode=1057 reward=0.772794 (469.06 it/sec) -training >> step=6338800, episode=1057 reward=0.8039619 (521.76 it/sec) -training >> step=6338900, episode=1057 reward=0.7844378 (496.32 it/sec) -training >> step=6339000, episode=1057 reward=0.7950198 (503.68 it/sec) -training >> step=6339100, episode=1057 reward=0.7930642 (518.64 it/sec) -training >> step=6339200, episode=1057 reward=0.7655167 (526.51 it/sec) -training >> step=6339300, episode=1057 reward=0.7966934 (514.36 it/sec) -training >> step=6339400, episode=1057 reward=0.7766383 (483.51 it/sec) -training >> step=6339500, episode=1057 reward=0.7953468 (561.21 it/sec) -training >> step=6339600, episode=1057 reward=0.7646852 (535.62 it/sec) -training >> step=6339700, episode=1057 reward=0.7463081 (502.21 it/sec) -training >> step=6339800, episode=1057 reward=0.7878581 (485.78 it/sec) -training >> step=6339900, episode=1057 reward=0.783678 (476.76 it/sec) -training >> step=6340000, episode=1057 reward=0.7713664 (490.86 it/sec) -training >> step=6340100, episode=1057 reward=0.7779019 (536.14 it/sec) -training >> step=6340200, episode=1057 reward=0.7836434 (511.99 it/sec) -training >> step=6340300, episode=1057 reward=0.8039476 (553.02 it/sec) -training >> step=6340400, episode=1057 reward=0.7947492 (477.26 it/sec) -training >> step=6340500, episode=1057 reward=0.7744405 (506.44 it/sec) -training >> step=6340600, episode=1057 reward=0.7821938 (458.85 it/sec) -training >> step=6340700, episode=1057 reward=0.7761565 (511.77 it/sec) -training >> step=6340800, episode=1057 reward=0.7674015 (515.60 it/sec) -training >> step=6340900, episode=1057 reward=0.7881841 (495.25 it/sec) -training >> step=6341000, episode=1057 reward=0.7642518 (486.85 it/sec) -training >> step=6341100, episode=1057 reward=0.7844536 (533.03 it/sec) -training >> step=6341200, episode=1057 reward=0.7707795 (456.18 it/sec) -training >> step=6341300, episode=1058 reward=0.791458 (118.89 it/sec) -training >> step=6341400, episode=1058 reward=0.7853275 (501.48 it/sec) -training >> step=6341500, episode=1058 reward=0.7878914 (513.15 it/sec) -training >> step=6341600, episode=1058 reward=0.7965244 (507.28 it/sec) -training >> step=6341700, episode=1058 reward=0.7918699 (528.43 it/sec) -training >> step=6341800, episode=1058 reward=0.7914667 (506.78 it/sec) -training >> step=6341900, episode=1058 reward=0.7937302 (479.57 it/sec) -training >> step=6342000, episode=1058 reward=0.7938071 (503.47 it/sec) -training >> step=6342100, episode=1058 reward=0.7705777 (506.44 it/sec) -training >> step=6342200, episode=1058 reward=0.7935329 (472.81 it/sec) -training >> step=6342300, episode=1058 reward=0.7691673 (515.28 it/sec) -training >> step=6342400, episode=1058 reward=0.7761423 (538.58 it/sec) -training >> step=6342500, episode=1058 reward=0.7903008 (522.98 it/sec) -training >> step=6342600, episode=1058 reward=0.7628391 (545.34 it/sec) -training >> step=6342700, episode=1058 reward=0.7907824 (502.26 it/sec) -training >> step=6342800, episode=1058 reward=0.7814549 (542.31 it/sec) -training >> step=6342900, episode=1058 reward=0.7983169 (490.91 it/sec) -training >> step=6343000, episode=1058 reward=0.7799501 (477.64 it/sec) -training >> step=6343100, episode=1058 reward=0.7933869 (523.80 it/sec) -training >> step=6343200, episode=1058 reward=0.7881879 (505.20 it/sec) -training >> step=6343300, episode=1058 reward=0.8031777 (533.82 it/sec) -training >> step=6343400, episode=1058 reward=0.774708 (539.47 it/sec) -training >> step=6343500, episode=1058 reward=0.7857571 (474.79 it/sec) -training >> step=6343600, episode=1058 reward=0.7844552 (495.59 it/sec) -training >> step=6343700, episode=1058 reward=0.775497 (512.97 it/sec) -training >> step=6343800, episode=1058 reward=0.8123835 (469.96 it/sec) -training >> step=6343900, episode=1058 reward=0.7977443 (480.53 it/sec) -training >> step=6344000, episode=1058 reward=0.7915891 (483.55 it/sec) -training >> step=6344100, episode=1058 reward=0.7961603 (482.61 it/sec) -training >> step=6344200, episode=1058 reward=0.7907559 (514.34 it/sec) -training >> step=6344300, episode=1058 reward=0.772378 (543.14 it/sec) -training >> step=6344400, episode=1058 reward=0.7749721 (520.16 it/sec) -training >> step=6344500, episode=1058 reward=0.7923375 (475.87 it/sec) -training >> step=6344600, episode=1058 reward=0.7891915 (533.44 it/sec) -training >> step=6344700, episode=1058 reward=0.7793581 (514.40 it/sec) -training >> step=6344800, episode=1058 reward=0.7723786 (503.67 it/sec) -training >> step=6344900, episode=1058 reward=0.7765984 (527.51 it/sec) -training >> step=6345000, episode=1058 reward=0.7649705 (495.72 it/sec) -training >> step=6345100, episode=1058 reward=0.7623932 (473.54 it/sec) -training >> step=6345200, episode=1058 reward=0.7837073 (484.66 it/sec) -training >> step=6345300, episode=1058 reward=0.7857686 (502.12 it/sec) -training >> step=6345400, episode=1058 reward=0.7978395 (528.08 it/sec) -training >> step=6345500, episode=1058 reward=0.7812253 (514.85 it/sec) -training >> step=6345600, episode=1058 reward=0.7962433 (515.41 it/sec) -training >> step=6345700, episode=1058 reward=0.7767559 (530.41 it/sec) -training >> step=6345800, episode=1058 reward=0.7850675 (442.53 it/sec) -training >> step=6345900, episode=1058 reward=0.7803018 (529.82 it/sec) -training >> step=6346000, episode=1058 reward=0.7888288 (518.52 it/sec) -training >> step=6346100, episode=1058 reward=0.7731997 (471.35 it/sec) -training >> step=6346200, episode=1058 reward=0.7893535 (485.09 it/sec) -training >> step=6346300, episode=1058 reward=0.8061371 (461.80 it/sec) -training >> step=6346400, episode=1058 reward=0.7521648 (528.89 it/sec) -training >> step=6346500, episode=1058 reward=0.7624046 (517.84 it/sec) -training >> step=6346600, episode=1058 reward=0.7779114 (512.51 it/sec) -training >> step=6346700, episode=1058 reward=0.779225 (489.93 it/sec) -training >> step=6346800, episode=1058 reward=0.7808463 (502.61 it/sec) -training >> step=6346900, episode=1058 reward=0.7745083 (479.42 it/sec) -training >> step=6347000, episode=1058 reward=0.7802292 (528.77 it/sec) -training >> step=6347100, episode=1058 reward=0.7876485 (510.46 it/sec) -training >> step=6347200, episode=1058 reward=0.7794265 (549.94 it/sec) -training >> step=6347300, episode=1059 reward=0.7812387 (120.47 it/sec) -training >> step=6347400, episode=1059 reward=0.786747 (512.57 it/sec) -training >> step=6347500, episode=1059 reward=0.7845141 (500.87 it/sec) -training >> step=6347600, episode=1059 reward=0.7735424 (472.45 it/sec) -training >> step=6347700, episode=1059 reward=0.7902627 (495.29 it/sec) -training >> step=6347800, episode=1059 reward=0.7505491 (427.87 it/sec) -training >> step=6347900, episode=1059 reward=0.7747645 (516.92 it/sec) -training >> step=6348000, episode=1059 reward=0.7794173 (481.72 it/sec) -training >> step=6348100, episode=1059 reward=0.7934904 (469.95 it/sec) -training >> step=6348200, episode=1059 reward=0.7838075 (564.42 it/sec) -training >> step=6348300, episode=1059 reward=0.7991691 (505.85 it/sec) -training >> step=6348400, episode=1059 reward=0.7998918 (512.22 it/sec) -training >> step=6348500, episode=1059 reward=0.7940069 (519.78 it/sec) -training >> step=6348600, episode=1059 reward=0.8076006 (484.68 it/sec) -training >> step=6348700, episode=1059 reward=0.7780026 (505.84 it/sec) -training >> step=6348800, episode=1059 reward=0.7662787 (469.07 it/sec) -training >> step=6348900, episode=1059 reward=0.7908418 (533.48 it/sec) -training >> step=6349000, episode=1059 reward=0.7791798 (565.58 it/sec) -training >> step=6349100, episode=1059 reward=0.7847165 (467.92 it/sec) -training >> step=6349200, episode=1059 reward=0.7981871 (519.09 it/sec) -training >> step=6349300, episode=1059 reward=0.7831826 (518.53 it/sec) -training >> step=6349400, episode=1059 reward=0.7656898 (471.81 it/sec) -training >> step=6349500, episode=1059 reward=0.7880219 (506.34 it/sec) -training >> step=6349600, episode=1059 reward=0.7974393 (491.25 it/sec) -training >> step=6349700, episode=1059 reward=0.7839381 (508.27 it/sec) -training >> step=6349800, episode=1059 reward=0.7961904 (513.74 it/sec) -training >> step=6349900, episode=1059 reward=0.7625932 (499.87 it/sec) -training >> step=6350000, episode=1059 reward=0.782225 (522.68 it/sec) -training >> step=6350100, episode=1059 reward=0.7915086 (525.09 it/sec) -training >> step=6350200, episode=1059 reward=0.7962306 (495.85 it/sec) -training >> step=6350300, episode=1059 reward=0.77121 (512.27 it/sec) -training >> step=6350400, episode=1059 reward=0.7942204 (536.16 it/sec) -training >> step=6350500, episode=1059 reward=0.7952369 (474.92 it/sec) -training >> step=6350600, episode=1059 reward=0.8206534 (489.79 it/sec) -training >> step=6350700, episode=1059 reward=0.8167465 (489.57 it/sec) -training >> step=6350800, episode=1059 reward=0.7820045 (502.22 it/sec) -training >> step=6350900, episode=1059 reward=0.7924574 (505.41 it/sec) -training >> step=6351000, episode=1059 reward=0.7931734 (530.13 it/sec) -training >> step=6351100, episode=1059 reward=0.7966579 (528.23 it/sec) -training >> step=6351200, episode=1059 reward=0.783638 (476.99 it/sec) -training >> step=6351300, episode=1059 reward=0.7845446 (517.73 it/sec) -training >> step=6351400, episode=1059 reward=0.7849666 (506.87 it/sec) -training >> step=6351500, episode=1059 reward=0.787921 (530.25 it/sec) -training >> step=6351600, episode=1059 reward=0.770467 (511.67 it/sec) -training >> step=6351700, episode=1059 reward=0.7627426 (453.91 it/sec) -training >> step=6351800, episode=1059 reward=0.7945669 (528.23 it/sec) -training >> step=6351900, episode=1059 reward=0.7800919 (528.47 it/sec) -training >> step=6352000, episode=1059 reward=0.7948574 (509.72 it/sec) -training >> step=6352100, episode=1059 reward=0.7678066 (524.97 it/sec) -training >> step=6352200, episode=1059 reward=0.7718598 (504.20 it/sec) -training >> step=6352300, episode=1059 reward=0.7668486 (494.20 it/sec) -training >> step=6352400, episode=1059 reward=0.7803921 (517.46 it/sec) -training >> step=6352500, episode=1059 reward=0.7896347 (493.54 it/sec) -training >> step=6352600, episode=1059 reward=0.7853796 (535.78 it/sec) -training >> step=6352700, episode=1059 reward=0.7603046 (477.20 it/sec) -training >> step=6352800, episode=1059 reward=0.802304 (440.54 it/sec) -training >> step=6352900, episode=1059 reward=0.7877991 (497.79 it/sec) -training >> step=6353000, episode=1059 reward=0.7905511 (519.02 it/sec) -training >> step=6353100, episode=1059 reward=0.7981563 (511.26 it/sec) -training >> step=6353200, episode=1059 reward=0.8134316 (507.07 it/sec) -training >> step=6353300, episode=1060 reward=0.7815558 (129.99 it/sec) -training >> step=6353400, episode=1060 reward=0.784044 (516.88 it/sec) -training >> step=6353500, episode=1060 reward=0.7796433 (490.33 it/sec) -training >> step=6353600, episode=1060 reward=0.7667831 (473.02 it/sec) -training >> step=6353700, episode=1060 reward=0.797062 (527.04 it/sec) -training >> step=6353800, episode=1060 reward=0.7779523 (514.41 it/sec) -training >> step=6353900, episode=1060 reward=0.7986385 (494.55 it/sec) -training >> step=6354000, episode=1060 reward=0.7738153 (446.22 it/sec) -training >> step=6354100, episode=1060 reward=0.7946674 (534.00 it/sec) -training >> step=6354200, episode=1060 reward=0.7948444 (487.54 it/sec) -training >> step=6354300, episode=1060 reward=0.7784682 (499.59 it/sec) -training >> step=6354400, episode=1060 reward=0.7860336 (546.83 it/sec) -training >> step=6354500, episode=1060 reward=0.7975035 (476.82 it/sec) -training >> step=6354600, episode=1060 reward=0.7883477 (494.45 it/sec) -training >> step=6354700, episode=1060 reward=0.7755067 (549.62 it/sec) -training >> step=6354800, episode=1060 reward=0.7918826 (521.51 it/sec) -training >> step=6354900, episode=1060 reward=0.7877062 (512.34 it/sec) -training >> step=6355000, episode=1060 reward=0.7962362 (483.43 it/sec) -training >> step=6355100, episode=1060 reward=0.788422 (519.62 it/sec) -training >> step=6355200, episode=1060 reward=0.7758768 (502.19 it/sec) -training >> step=6355300, episode=1060 reward=0.7818559 (494.49 it/sec) -training >> step=6355400, episode=1060 reward=0.8034902 (493.26 it/sec) -training >> step=6355500, episode=1060 reward=0.7777082 (511.65 it/sec) -training >> step=6355600, episode=1060 reward=0.7947229 (457.22 it/sec) -training >> step=6355700, episode=1060 reward=0.7944255 (507.19 it/sec) -training >> step=6355800, episode=1060 reward=0.7714849 (478.90 it/sec) -training >> step=6355900, episode=1060 reward=0.7716809 (440.63 it/sec) -training >> step=6356000, episode=1060 reward=0.8000453 (426.96 it/sec) -training >> step=6356100, episode=1060 reward=0.7929589 (450.10 it/sec) -training >> step=6356200, episode=1060 reward=0.7900096 (419.11 it/sec) -training >> step=6356300, episode=1060 reward=0.7859501 (436.44 it/sec) -training >> step=6356400, episode=1060 reward=0.7908276 (452.19 it/sec) -training >> step=6356500, episode=1060 reward=0.7846492 (418.64 it/sec) -training >> step=6356600, episode=1060 reward=0.799596 (481.03 it/sec) -training >> step=6356700, episode=1060 reward=0.7951781 (500.88 it/sec) -training >> step=6356800, episode=1060 reward=0.8055825 (493.85 it/sec) -training >> step=6356900, episode=1060 reward=0.7777435 (522.41 it/sec) -training >> step=6357000, episode=1060 reward=0.7810084 (496.06 it/sec) -training >> step=6357100, episode=1060 reward=0.7961387 (493.06 it/sec) -training >> step=6357200, episode=1060 reward=0.7879762 (495.91 it/sec) -training >> step=6357300, episode=1060 reward=0.7830904 (486.67 it/sec) -training >> step=6357400, episode=1060 reward=0.7865538 (450.10 it/sec) -training >> step=6357500, episode=1060 reward=0.7878667 (453.86 it/sec) -training >> step=6357600, episode=1060 reward=0.782959 (536.15 it/sec) -training >> step=6357700, episode=1060 reward=0.7755902 (489.91 it/sec) -training >> step=6357800, episode=1060 reward=0.793496 (504.37 it/sec) -training >> step=6357900, episode=1060 reward=0.779964 (502.11 it/sec) -training >> step=6358000, episode=1060 reward=0.8172576 (487.69 it/sec) -training >> step=6358100, episode=1060 reward=0.7859722 (516.71 it/sec) -training >> step=6358200, episode=1060 reward=0.7773117 (531.84 it/sec) -training >> step=6358300, episode=1060 reward=0.7596069 (513.93 it/sec) -training >> step=6358400, episode=1060 reward=0.7787345 (515.10 it/sec) -training >> step=6358500, episode=1060 reward=0.7640431 (500.08 it/sec) -training >> step=6358600, episode=1060 reward=0.7728947 (486.60 it/sec) -training >> step=6358700, episode=1060 reward=0.8009343 (531.53 it/sec) -training >> step=6358800, episode=1060 reward=0.7672658 (493.43 it/sec) -training >> step=6358900, episode=1060 reward=0.7797332 (488.22 it/sec) -training >> step=6359000, episode=1060 reward=0.7880663 (497.40 it/sec) -training >> step=6359100, episode=1060 reward=0.7742656 (463.87 it/sec) -training >> step=6359200, episode=1060 reward=0.779417 (487.88 it/sec) -training >> step=6359300, episode=1061 reward=0.7727602 (103.77 it/sec) -training >> step=6359400, episode=1061 reward=0.7927643 (454.45 it/sec) -training >> step=6359500, episode=1061 reward=0.7767324 (481.09 it/sec) -training >> step=6359600, episode=1061 reward=0.7835988 (446.43 it/sec) -training >> step=6359700, episode=1061 reward=0.7738252 (464.42 it/sec) -training >> step=6359800, episode=1061 reward=0.7889244 (485.68 it/sec) -training >> step=6359900, episode=1061 reward=0.7940525 (493.31 it/sec) -training >> step=6360000, episode=1061 reward=0.7887381 (516.70 it/sec) -training >> step=6360100, episode=1061 reward=0.7739738 (480.33 it/sec) -training >> step=6360200, episode=1061 reward=0.7979667 (479.71 it/sec) -training >> step=6360300, episode=1061 reward=0.7789733 (485.70 it/sec) -training >> step=6360400, episode=1061 reward=0.7679052 (517.05 it/sec) -training >> step=6360500, episode=1061 reward=0.7958074 (435.97 it/sec) -training >> step=6360600, episode=1061 reward=0.7733818 (445.18 it/sec) -training >> step=6360700, episode=1061 reward=0.7769443 (474.63 it/sec) -training >> step=6360800, episode=1061 reward=0.7781098 (481.12 it/sec) -training >> step=6360900, episode=1061 reward=0.804525 (495.23 it/sec) -training >> step=6361000, episode=1061 reward=0.7912278 (503.26 it/sec) -training >> step=6361100, episode=1061 reward=0.7777799 (465.28 it/sec) -training >> step=6361200, episode=1061 reward=0.7821983 (456.15 it/sec) -training >> step=6361300, episode=1061 reward=0.7759882 (487.06 it/sec) -training >> step=6361400, episode=1061 reward=0.8203399 (491.29 it/sec) -training >> step=6361500, episode=1061 reward=0.7964833 (496.60 it/sec) -training >> step=6361600, episode=1061 reward=0.7896444 (483.61 it/sec) -training >> step=6361700, episode=1061 reward=0.7807249 (532.32 it/sec) -training >> step=6361800, episode=1061 reward=0.7863499 (488.51 it/sec) -training >> step=6361900, episode=1061 reward=0.794998 (408.55 it/sec) -training >> step=6362000, episode=1061 reward=0.8092791 (465.92 it/sec) -training >> step=6362100, episode=1061 reward=0.7883963 (470.94 it/sec) -training >> step=6362200, episode=1061 reward=0.791566 (471.77 it/sec) -training >> step=6362300, episode=1061 reward=0.7828746 (483.75 it/sec) -training >> step=6362400, episode=1061 reward=0.7834764 (490.12 it/sec) -training >> step=6362500, episode=1061 reward=0.7734383 (483.90 it/sec) -training >> step=6362600, episode=1061 reward=0.7922705 (487.52 it/sec) -training >> step=6362700, episode=1061 reward=0.7708418 (442.57 it/sec) -training >> step=6362800, episode=1061 reward=0.7905046 (537.22 it/sec) -training >> step=6362900, episode=1061 reward=0.7754505 (491.64 it/sec) -training >> step=6363000, episode=1061 reward=0.7835374 (497.33 it/sec) -training >> step=6363100, episode=1061 reward=0.7812761 (481.65 it/sec) -training >> step=6363200, episode=1061 reward=0.7955704 (509.08 it/sec) -training >> step=6363300, episode=1061 reward=0.7717306 (512.33 it/sec) -training >> step=6363400, episode=1061 reward=0.7777607 (463.94 it/sec) -training >> step=6363500, episode=1061 reward=0.782257 (518.86 it/sec) -training >> step=6363600, episode=1061 reward=0.7994057 (465.21 it/sec) -training >> step=6363700, episode=1061 reward=0.7832568 (479.15 it/sec) -training >> step=6363800, episode=1061 reward=0.7667047 (524.24 it/sec) -training >> step=6363900, episode=1061 reward=0.7679679 (509.35 it/sec) -training >> step=6364000, episode=1061 reward=0.7662022 (494.07 it/sec) -training >> step=6364100, episode=1061 reward=0.7814007 (491.27 it/sec) -training >> step=6364200, episode=1061 reward=0.7852593 (489.70 it/sec) -training >> step=6364300, episode=1061 reward=0.7771447 (447.55 it/sec) -training >> step=6364400, episode=1061 reward=0.778768 (465.76 it/sec) -training >> step=6364500, episode=1061 reward=0.7867478 (501.17 it/sec) -training >> step=6364600, episode=1061 reward=0.7711959 (474.30 it/sec) -training >> step=6364700, episode=1061 reward=0.7995616 (477.02 it/sec) -training >> step=6364800, episode=1061 reward=0.7883546 (499.51 it/sec) -training >> step=6364900, episode=1061 reward=0.7578759 (454.93 it/sec) -training >> step=6365000, episode=1061 reward=0.7909779 (509.47 it/sec) -training >> step=6365100, episode=1061 reward=0.7890586 (457.66 it/sec) -training >> step=6365200, episode=1061 reward=0.7738045 (494.80 it/sec) -training >> step=6365300, episode=1062 reward=0.7975302 (104.70 it/sec) -training >> step=6365400, episode=1062 reward=0.7949933 (489.51 it/sec) -training >> step=6365500, episode=1062 reward=0.7854149 (422.03 it/sec) -training >> step=6365600, episode=1062 reward=0.7868255 (385.48 it/sec) -training >> step=6365700, episode=1062 reward=0.7837683 (484.36 it/sec) -training >> step=6365800, episode=1062 reward=0.7727672 (482.45 it/sec) -training >> step=6365900, episode=1062 reward=0.7816809 (480.89 it/sec) -training >> step=6366000, episode=1062 reward=0.7798842 (475.32 it/sec) -training >> step=6366100, episode=1062 reward=0.7931867 (471.69 it/sec) -training >> step=6366200, episode=1062 reward=0.7746698 (429.04 it/sec) -training >> step=6366300, episode=1062 reward=0.785793 (456.54 it/sec) -training >> step=6366400, episode=1062 reward=0.7713239 (399.77 it/sec) -training >> step=6366500, episode=1062 reward=0.7946023 (435.52 it/sec) -training >> step=6366600, episode=1062 reward=0.803762 (452.52 it/sec) -training >> step=6366700, episode=1062 reward=0.7940058 (426.57 it/sec) -training >> step=6366800, episode=1062 reward=0.7885637 (419.31 it/sec) -training >> step=6366900, episode=1062 reward=0.7919949 (423.65 it/sec) -training >> step=6367000, episode=1062 reward=0.7896545 (377.10 it/sec) -training >> step=6367100, episode=1062 reward=0.7843693 (398.21 it/sec) -training >> step=6367200, episode=1062 reward=0.7920932 (398.97 it/sec) -training >> step=6367300, episode=1062 reward=0.7930267 (424.89 it/sec) -training >> step=6367400, episode=1062 reward=0.810347 (502.68 it/sec) -training >> step=6367500, episode=1062 reward=0.7917043 (487.72 it/sec) -training >> step=6367600, episode=1062 reward=0.7914287 (525.86 it/sec) -training >> step=6367700, episode=1062 reward=0.8067585 (511.23 it/sec) -training >> step=6367800, episode=1062 reward=0.7808147 (521.01 it/sec) -training >> step=6367900, episode=1062 reward=0.8116363 (504.29 it/sec) -training >> step=6368000, episode=1062 reward=0.7940794 (495.82 it/sec) -training >> step=6368100, episode=1062 reward=0.7762693 (534.01 it/sec) -training >> step=6368200, episode=1062 reward=0.7857407 (511.56 it/sec) -training >> step=6368300, episode=1062 reward=0.7839157 (519.18 it/sec) -training >> step=6368400, episode=1062 reward=0.7863222 (534.07 it/sec) -training >> step=6368500, episode=1062 reward=0.810774 (520.49 it/sec) -training >> step=6368600, episode=1062 reward=0.7858409 (446.13 it/sec) -training >> step=6368700, episode=1062 reward=0.7844744 (524.20 it/sec) -training >> step=6368800, episode=1062 reward=0.7952222 (491.23 it/sec) -training >> step=6368900, episode=1062 reward=0.7726672 (550.96 it/sec) -training >> step=6369000, episode=1062 reward=0.7966946 (523.37 it/sec) -training >> step=6369100, episode=1062 reward=0.7849144 (473.34 it/sec) -training >> step=6369200, episode=1062 reward=0.7960677 (553.18 it/sec) -training >> step=6369300, episode=1062 reward=0.7639281 (506.03 it/sec) -training >> step=6369400, episode=1062 reward=0.7987489 (525.17 it/sec) -training >> step=6369500, episode=1062 reward=0.7822244 (520.68 it/sec) -training >> step=6369600, episode=1062 reward=0.781462 (506.19 it/sec) -training >> step=6369700, episode=1062 reward=0.7818579 (464.80 it/sec) -training >> step=6369800, episode=1062 reward=0.7806989 (515.39 it/sec) -training >> step=6369900, episode=1062 reward=0.7713438 (478.60 it/sec) -training >> step=6370000, episode=1062 reward=0.7675927 (533.89 it/sec) -training >> step=6370100, episode=1062 reward=0.7713112 (484.78 it/sec) -training >> step=6370200, episode=1062 reward=0.7690151 (521.56 it/sec) -training >> step=6370300, episode=1062 reward=0.7590919 (525.77 it/sec) -training >> step=6370400, episode=1062 reward=0.7900012 (501.52 it/sec) -training >> step=6370500, episode=1062 reward=0.7907099 (516.13 it/sec) -training >> step=6370600, episode=1062 reward=0.7872963 (510.54 it/sec) -training >> step=6370700, episode=1062 reward=0.7878097 (537.61 it/sec) -training >> step=6370800, episode=1062 reward=0.7875837 (485.01 it/sec) -training >> step=6370900, episode=1062 reward=0.7783973 (483.64 it/sec) -training >> step=6371000, episode=1062 reward=0.7883765 (507.46 it/sec) -training >> step=6371100, episode=1062 reward=0.7871433 (544.46 it/sec) -training >> step=6371200, episode=1062 reward=0.776866 (496.58 it/sec) -training >> step=6371300, episode=1063 reward=0.7947217 (115.08 it/sec) -training >> step=6371400, episode=1063 reward=0.7627496 (411.39 it/sec) -training >> step=6371500, episode=1063 reward=0.7980201 (476.80 it/sec) -training >> step=6371600, episode=1063 reward=0.7777837 (432.62 it/sec) -training >> step=6371700, episode=1063 reward=0.7854479 (425.20 it/sec) -training >> step=6371800, episode=1063 reward=0.7709742 (464.81 it/sec) -training >> step=6371900, episode=1063 reward=0.787262 (523.93 it/sec) -training >> step=6372000, episode=1063 reward=0.7796851 (471.03 it/sec) -training >> step=6372100, episode=1063 reward=0.8015062 (450.46 it/sec) -training >> step=6372200, episode=1063 reward=0.7826389 (548.18 it/sec) -training >> step=6372300, episode=1063 reward=0.7865306 (492.28 it/sec) -training >> step=6372400, episode=1063 reward=0.775897 (494.76 it/sec) -training >> step=6372500, episode=1063 reward=0.7692388 (486.67 it/sec) -training >> step=6372600, episode=1063 reward=0.7923105 (476.26 it/sec) -training >> step=6372700, episode=1063 reward=0.7714556 (443.82 it/sec) -training >> step=6372800, episode=1063 reward=0.7936233 (417.62 it/sec) -training >> step=6372900, episode=1063 reward=0.7886465 (486.93 it/sec) -training >> step=6373000, episode=1063 reward=0.7875082 (441.02 it/sec) -training >> step=6373100, episode=1063 reward=0.7784461 (472.80 it/sec) -training >> step=6373200, episode=1063 reward=0.7922567 (469.71 it/sec) -training >> step=6373300, episode=1063 reward=0.8012167 (469.36 it/sec) -training >> step=6373400, episode=1063 reward=0.7871501 (469.20 it/sec) -training >> step=6373500, episode=1063 reward=0.7793464 (438.34 it/sec) -training >> step=6373600, episode=1063 reward=0.8079181 (415.38 it/sec) -training >> step=6373700, episode=1063 reward=0.7863815 (420.21 it/sec) -training >> step=6373800, episode=1063 reward=0.7902136 (452.94 it/sec) -training >> step=6373900, episode=1063 reward=0.788313 (407.47 it/sec) -training >> step=6374000, episode=1063 reward=0.7780443 (457.01 it/sec) -training >> step=6374100, episode=1063 reward=0.7732373 (475.34 it/sec) -training >> step=6374200, episode=1063 reward=0.7772252 (429.78 it/sec) -training >> step=6374300, episode=1063 reward=0.7814837 (476.75 it/sec) -training >> step=6374400, episode=1063 reward=0.7870347 (498.53 it/sec) -training >> step=6374500, episode=1063 reward=0.7862154 (500.22 it/sec) -training >> step=6374600, episode=1063 reward=0.8066456 (500.07 it/sec) -training >> step=6374700, episode=1063 reward=0.7870282 (526.12 it/sec) -training >> step=6374800, episode=1063 reward=0.8010446 (524.89 it/sec) -training >> step=6374900, episode=1063 reward=0.7833238 (464.97 it/sec) -training >> step=6375000, episode=1063 reward=0.771432 (558.43 it/sec) -training >> step=6375100, episode=1063 reward=0.7798181 (547.31 it/sec) -training >> step=6375200, episode=1063 reward=0.7971507 (495.90 it/sec) -training >> step=6375300, episode=1063 reward=0.7675513 (416.60 it/sec) -training >> step=6375400, episode=1063 reward=0.7867352 (451.04 it/sec) -training >> step=6375500, episode=1063 reward=0.8116422 (432.17 it/sec) -training >> step=6375600, episode=1063 reward=0.7749031 (464.59 it/sec) -training >> step=6375700, episode=1063 reward=0.764715 (464.80 it/sec) -training >> step=6375800, episode=1063 reward=0.7821138 (558.17 it/sec) -training >> step=6375900, episode=1063 reward=0.7843754 (514.89 it/sec) -training >> step=6376000, episode=1063 reward=0.7874681 (483.88 it/sec) -training >> step=6376100, episode=1063 reward=0.7794805 (523.97 it/sec) -training >> step=6376200, episode=1063 reward=0.7801787 (459.20 it/sec) -training >> step=6376300, episode=1063 reward=0.7894222 (531.19 it/sec) -training >> step=6376400, episode=1063 reward=0.7737504 (500.94 it/sec) -training >> step=6376500, episode=1063 reward=0.7710236 (492.73 it/sec) -training >> step=6376600, episode=1063 reward=0.786722 (406.00 it/sec) -training >> step=6376700, episode=1063 reward=0.7681292 (427.48 it/sec) -training >> step=6376800, episode=1063 reward=0.7937448 (408.91 it/sec) -training >> step=6376900, episode=1063 reward=0.7932764 (518.46 it/sec) -training >> step=6377000, episode=1063 reward=0.7818938 (501.86 it/sec) -training >> step=6377100, episode=1063 reward=0.7791958 (505.80 it/sec) -training >> step=6377200, episode=1063 reward=0.7605702 (503.48 it/sec) -training >> step=6377300, episode=1064 reward=0.7930945 (91.22 it/sec) -training >> step=6377400, episode=1064 reward=0.7821639 (494.04 it/sec) -training >> step=6377500, episode=1064 reward=0.773895 (519.45 it/sec) -training >> step=6377600, episode=1064 reward=0.7921702 (350.86 it/sec) -training >> step=6377700, episode=1064 reward=0.7863668 (406.15 it/sec) -training >> step=6377800, episode=1064 reward=0.7926074 (459.50 it/sec) -training >> step=6377900, episode=1064 reward=0.7988107 (470.30 it/sec) -training >> step=6378000, episode=1064 reward=0.801398 (499.40 it/sec) -training >> step=6378100, episode=1064 reward=0.8003716 (420.31 it/sec) -training >> step=6378200, episode=1064 reward=0.7693844 (497.39 it/sec) -training >> step=6378300, episode=1064 reward=0.7703124 (485.80 it/sec) -training >> step=6378400, episode=1064 reward=0.7801444 (459.25 it/sec) -training >> step=6378500, episode=1064 reward=0.7772581 (486.03 it/sec) -training >> step=6378600, episode=1064 reward=0.7924685 (482.51 it/sec) -training >> step=6378700, episode=1064 reward=0.8047527 (455.81 it/sec) -training >> step=6378800, episode=1064 reward=0.783076 (458.78 it/sec) -training >> step=6378900, episode=1064 reward=0.7757838 (483.62 it/sec) -training >> step=6379000, episode=1064 reward=0.780008 (471.09 it/sec) -training >> step=6379100, episode=1064 reward=0.783125 (444.07 it/sec) -training >> step=6379200, episode=1064 reward=0.7859332 (440.08 it/sec) -training >> step=6379300, episode=1064 reward=0.7802776 (463.75 it/sec) -training >> step=6379400, episode=1064 reward=0.7681695 (456.91 it/sec) -training >> step=6379500, episode=1064 reward=0.7838089 (463.64 it/sec) -training >> step=6379600, episode=1064 reward=0.7877691 (489.15 it/sec) -training >> step=6379700, episode=1064 reward=0.7877194 (446.74 it/sec) -training >> step=6379800, episode=1064 reward=0.7760144 (499.49 it/sec) -training >> step=6379900, episode=1064 reward=0.8025335 (461.17 it/sec) -training >> step=6380000, episode=1064 reward=0.7761786 (425.61 it/sec) -training >> step=6380100, episode=1064 reward=0.7715725 (403.68 it/sec) -training >> step=6380200, episode=1064 reward=0.7980112 (429.74 it/sec) -training >> step=6380300, episode=1064 reward=0.7903491 (438.60 it/sec) -training >> step=6380400, episode=1064 reward=0.7977337 (392.63 it/sec) -training >> step=6380500, episode=1064 reward=0.7979642 (363.79 it/sec) -training >> step=6380600, episode=1064 reward=0.7959093 (382.40 it/sec) -training >> step=6380700, episode=1064 reward=0.7885221 (408.93 it/sec) -training >> step=6380800, episode=1064 reward=0.817638 (417.48 it/sec) -training >> step=6380900, episode=1064 reward=0.787706 (350.52 it/sec) -training >> step=6381000, episode=1064 reward=0.794233 (414.30 it/sec) -training >> step=6381100, episode=1064 reward=0.8015628 (477.10 it/sec) -training >> step=6381200, episode=1064 reward=0.7806801 (462.85 it/sec) -training >> step=6381300, episode=1064 reward=0.7886323 (427.75 it/sec) -training >> step=6381400, episode=1064 reward=0.7770354 (437.48 it/sec) -training >> step=6381500, episode=1064 reward=0.7936302 (423.75 it/sec) -training >> step=6381600, episode=1064 reward=0.7898438 (487.74 it/sec) -training >> step=6381700, episode=1064 reward=0.7622599 (497.07 it/sec) -training >> step=6381800, episode=1064 reward=0.7719278 (507.26 it/sec) -training >> step=6381900, episode=1064 reward=0.7797317 (504.93 it/sec) -training >> step=6382000, episode=1064 reward=0.7911026 (465.34 it/sec) -training >> step=6382100, episode=1064 reward=0.7902137 (468.19 it/sec) -training >> step=6382200, episode=1064 reward=0.7690532 (520.02 it/sec) -training >> step=6382300, episode=1064 reward=0.7659636 (473.28 it/sec) -training >> step=6382400, episode=1064 reward=0.8052107 (483.25 it/sec) -training >> step=6382500, episode=1064 reward=0.7827075 (497.99 it/sec) -training >> step=6382600, episode=1064 reward=0.8042642 (494.37 it/sec) -training >> step=6382700, episode=1064 reward=0.8067836 (470.84 it/sec) -training >> step=6382800, episode=1064 reward=0.765696 (435.06 it/sec) -training >> step=6382900, episode=1064 reward=0.7794615 (467.29 it/sec) -training >> step=6383000, episode=1064 reward=0.7812963 (482.95 it/sec) -training >> step=6383100, episode=1064 reward=0.7920917 (445.26 it/sec) -training >> step=6383200, episode=1064 reward=0.8047289 (471.89 it/sec) -training >> step=6383300, episode=1065 reward=0.7580948 (89.71 it/sec) -training >> step=6383400, episode=1065 reward=0.8039379 (437.92 it/sec) -training >> step=6383500, episode=1065 reward=0.7842624 (446.56 it/sec) -training >> step=6383600, episode=1065 reward=0.7688707 (470.44 it/sec) -training >> step=6383700, episode=1065 reward=0.7860692 (437.44 it/sec) -training >> step=6383800, episode=1065 reward=0.7870899 (343.01 it/sec) -training >> step=6383900, episode=1065 reward=0.7914942 (449.62 it/sec) -training >> step=6384000, episode=1065 reward=0.7821445 (389.15 it/sec) -training >> step=6384100, episode=1065 reward=0.7794665 (432.17 it/sec) -training >> step=6384200, episode=1065 reward=0.8076443 (470.69 it/sec) -training >> step=6384300, episode=1065 reward=0.7838069 (478.34 it/sec) -training >> step=6384400, episode=1065 reward=0.7988819 (478.80 it/sec) -training >> step=6384500, episode=1065 reward=0.7822327 (471.74 it/sec) -training >> step=6384600, episode=1065 reward=0.7975843 (509.65 it/sec) -training >> step=6384700, episode=1065 reward=0.7760552 (397.48 it/sec) -training >> step=6384800, episode=1065 reward=0.7911769 (408.71 it/sec) -training >> step=6384900, episode=1065 reward=0.7751077 (483.07 it/sec) -training >> step=6385000, episode=1065 reward=0.7891701 (406.26 it/sec) -training >> step=6385100, episode=1065 reward=0.7802163 (463.38 it/sec) -training >> step=6385200, episode=1065 reward=0.8098642 (505.84 it/sec) -training >> step=6385300, episode=1065 reward=0.7929087 (477.61 it/sec) -training >> step=6385400, episode=1065 reward=0.7990154 (532.16 it/sec) -training >> step=6385500, episode=1065 reward=0.7813432 (491.29 it/sec) -training >> step=6385600, episode=1065 reward=0.8046945 (453.47 it/sec) -training >> step=6385700, episode=1065 reward=0.7783233 (480.43 it/sec) -training >> step=6385800, episode=1065 reward=0.791056 (450.36 it/sec) -training >> step=6385900, episode=1065 reward=0.7870282 (483.35 it/sec) -training >> step=6386000, episode=1065 reward=0.7875819 (477.42 it/sec) -training >> step=6386100, episode=1065 reward=0.7975041 (445.27 it/sec) -training >> step=6386200, episode=1065 reward=0.7707484 (443.91 it/sec) -training >> step=6386300, episode=1065 reward=0.802275 (461.25 it/sec) -training >> step=6386400, episode=1065 reward=0.7956673 (446.11 it/sec) -training >> step=6386500, episode=1065 reward=0.7756854 (441.13 it/sec) -training >> step=6386600, episode=1065 reward=0.7809737 (442.30 it/sec) -training >> step=6386700, episode=1065 reward=0.786582 (469.14 it/sec) -training >> step=6386800, episode=1065 reward=0.7936012 (488.98 it/sec) -training >> step=6386900, episode=1065 reward=0.7941825 (487.06 it/sec) -training >> step=6387000, episode=1065 reward=0.7934517 (503.59 it/sec) -training >> step=6387100, episode=1065 reward=0.8013427 (453.78 it/sec) -training >> step=6387200, episode=1065 reward=0.7531137 (510.01 it/sec) -training >> step=6387300, episode=1065 reward=0.796039 (468.14 it/sec) -training >> step=6387400, episode=1065 reward=0.8003026 (464.43 it/sec) -training >> step=6387500, episode=1065 reward=0.7963314 (500.87 it/sec) -training >> step=6387600, episode=1065 reward=0.7756751 (501.10 it/sec) -training >> step=6387700, episode=1065 reward=0.7867951 (476.81 it/sec) -training >> step=6387800, episode=1065 reward=0.7676658 (459.84 it/sec) -training >> step=6387900, episode=1065 reward=0.7635745 (495.27 it/sec) -training >> step=6388000, episode=1065 reward=0.7757944 (454.14 it/sec) -training >> step=6388100, episode=1065 reward=0.7486519 (476.88 it/sec) -training >> step=6388200, episode=1065 reward=0.7871812 (471.14 it/sec) -training >> step=6388300, episode=1065 reward=0.7825627 (492.15 it/sec) -training >> step=6388400, episode=1065 reward=0.7717137 (500.65 it/sec) -training >> step=6388500, episode=1065 reward=0.7469779 (452.22 it/sec) -training >> step=6388600, episode=1065 reward=0.7925345 (488.43 it/sec) -training >> step=6388700, episode=1065 reward=0.7750219 (468.93 it/sec) -training >> step=6388800, episode=1065 reward=0.7861062 (480.70 it/sec) -training >> step=6388900, episode=1065 reward=0.7778244 (482.59 it/sec) -training >> step=6389000, episode=1065 reward=0.7853735 (500.77 it/sec) -training >> step=6389100, episode=1065 reward=0.7840175 (499.22 it/sec) -training >> step=6389200, episode=1065 reward=0.7789665 (478.97 it/sec) -training >> step=6389300, episode=1066 reward=0.7872797 (75.60 it/sec) -training >> step=6389400, episode=1066 reward=0.7883518 (444.51 it/sec) -training >> step=6389500, episode=1066 reward=0.794767 (479.18 it/sec) -training >> step=6389600, episode=1066 reward=0.7814372 (511.79 it/sec) -training >> step=6389700, episode=1066 reward=0.7956675 (476.86 it/sec) -training >> step=6389800, episode=1066 reward=0.7691979 (477.96 it/sec) -training >> step=6389900, episode=1066 reward=0.7794188 (518.68 it/sec) -training >> step=6390000, episode=1066 reward=0.7818304 (360.32 it/sec) -training >> step=6390100, episode=1066 reward=0.7987452 (493.96 it/sec) -training >> step=6390200, episode=1066 reward=0.8000467 (530.96 it/sec) -training >> step=6390300, episode=1066 reward=0.8022562 (458.30 it/sec) -training >> step=6390400, episode=1066 reward=0.7646078 (485.16 it/sec) -training >> step=6390500, episode=1066 reward=0.7899348 (494.18 it/sec) -training >> step=6390600, episode=1066 reward=0.7738981 (529.35 it/sec) -training >> step=6390700, episode=1066 reward=0.8033641 (469.06 it/sec) -training >> step=6390800, episode=1066 reward=0.7960172 (434.38 it/sec) -training >> step=6390900, episode=1066 reward=0.795216 (422.50 it/sec) -training >> step=6391000, episode=1066 reward=0.7857563 (386.65 it/sec) -training >> step=6391100, episode=1066 reward=0.783109 (481.07 it/sec) -training >> step=6391200, episode=1066 reward=0.7778057 (476.90 it/sec) -training >> step=6391300, episode=1066 reward=0.8008396 (513.96 it/sec) -training >> step=6391400, episode=1066 reward=0.7923778 (399.47 it/sec) -training >> step=6391500, episode=1066 reward=0.7924597 (458.64 it/sec) -training >> step=6391600, episode=1066 reward=0.7911721 (536.13 it/sec) -training >> step=6391700, episode=1066 reward=0.7933107 (482.95 it/sec) -training >> step=6391800, episode=1066 reward=0.7819141 (482.97 it/sec) -training >> step=6391900, episode=1066 reward=0.7699803 (489.63 it/sec) -training >> step=6392000, episode=1066 reward=0.7627165 (466.82 it/sec) -training >> step=6392100, episode=1066 reward=0.7860752 (493.76 it/sec) -training >> step=6392200, episode=1066 reward=0.7798464 (476.88 it/sec) -training >> step=6392300, episode=1066 reward=0.7919941 (449.56 it/sec) -training >> step=6392400, episode=1066 reward=0.7916294 (472.46 it/sec) -training >> step=6392500, episode=1066 reward=0.7773129 (438.70 it/sec) -training >> step=6392600, episode=1066 reward=0.7864792 (517.21 it/sec) -training >> step=6392700, episode=1066 reward=0.7944036 (511.99 it/sec) -training >> step=6392800, episode=1066 reward=0.7805145 (469.05 it/sec) -training >> step=6392900, episode=1066 reward=0.7859794 (443.83 it/sec) -training >> step=6393000, episode=1066 reward=0.7691157 (466.62 it/sec) -training >> step=6393100, episode=1066 reward=0.7822948 (514.66 it/sec) -training >> step=6393200, episode=1066 reward=0.782236 (496.63 it/sec) -training >> step=6393300, episode=1066 reward=0.8013453 (448.80 it/sec) -training >> step=6393400, episode=1066 reward=0.7817661 (419.83 it/sec) -training >> step=6393500, episode=1066 reward=0.7868586 (398.65 it/sec) -training >> step=6393600, episode=1066 reward=0.771304 (356.56 it/sec) -training >> step=6393700, episode=1066 reward=0.7743747 (411.34 it/sec) -training >> step=6393800, episode=1066 reward=0.8028703 (433.95 it/sec) -training >> step=6393900, episode=1066 reward=0.7984246 (438.29 it/sec) -training >> step=6394000, episode=1066 reward=0.7683752 (378.42 it/sec) -training >> step=6394100, episode=1066 reward=0.782506 (394.25 it/sec) -training >> step=6394200, episode=1066 reward=0.8027428 (405.07 it/sec) -training >> step=6394300, episode=1066 reward=0.774096 (461.29 it/sec) -training >> step=6394400, episode=1066 reward=0.7911162 (454.15 it/sec) -training >> step=6394500, episode=1066 reward=0.776969 (497.12 it/sec) -training >> step=6394600, episode=1066 reward=0.7780721 (439.40 it/sec) -training >> step=6394700, episode=1066 reward=0.7671706 (492.52 it/sec) -training >> step=6394800, episode=1066 reward=0.7876886 (469.62 it/sec) -training >> step=6394900, episode=1066 reward=0.7822846 (453.65 it/sec) -training >> step=6395000, episode=1066 reward=0.7740951 (464.63 it/sec) -training >> step=6395100, episode=1066 reward=0.8053467 (482.46 it/sec) -training >> step=6395200, episode=1066 reward=0.7706244 (463.87 it/sec) -training >> step=6395300, episode=1067 reward=0.7790162 (86.58 it/sec) -training >> step=6395400, episode=1067 reward=0.7679791 (466.13 it/sec) -training >> step=6395500, episode=1067 reward=0.806195 (452.37 it/sec) -training >> step=6395600, episode=1067 reward=0.7958481 (511.83 it/sec) -training >> step=6395700, episode=1067 reward=0.7993786 (457.02 it/sec) -training >> step=6395800, episode=1067 reward=0.7838736 (486.91 it/sec) -training >> step=6395900, episode=1067 reward=0.8022406 (489.47 it/sec) -training >> step=6396000, episode=1067 reward=0.7990121 (479.15 it/sec) -training >> step=6396100, episode=1067 reward=0.7971916 (448.29 it/sec) -training >> step=6396200, episode=1067 reward=0.7840089 (349.25 it/sec) -training >> step=6396300, episode=1067 reward=0.791137 (457.83 it/sec) -training >> step=6396400, episode=1067 reward=0.7756936 (445.82 it/sec) -training >> step=6396500, episode=1067 reward=0.7865241 (464.38 it/sec) -training >> step=6396600, episode=1067 reward=0.7799634 (501.70 it/sec) -training >> step=6396700, episode=1067 reward=0.7733262 (509.45 it/sec) -training >> step=6396800, episode=1067 reward=0.7866691 (467.84 it/sec) -training >> step=6396900, episode=1067 reward=0.7961884 (493.57 it/sec) -training >> step=6397000, episode=1067 reward=0.7970294 (484.47 it/sec) -training >> step=6397100, episode=1067 reward=0.7766405 (435.16 it/sec) -training >> step=6397200, episode=1067 reward=0.7827429 (388.92 it/sec) -training >> step=6397300, episode=1067 reward=0.7575222 (419.88 it/sec) -training >> step=6397400, episode=1067 reward=0.7718573 (378.63 it/sec) -training >> step=6397500, episode=1067 reward=0.7763037 (408.56 it/sec) -training >> step=6397600, episode=1067 reward=0.7817841 (500.65 it/sec) -training >> step=6397700, episode=1067 reward=0.7773397 (501.85 it/sec) -training >> step=6397800, episode=1067 reward=0.7817876 (519.90 it/sec) -training >> step=6397900, episode=1067 reward=0.7677401 (510.13 it/sec) -training >> step=6398000, episode=1067 reward=0.7995466 (505.67 it/sec) -training >> step=6398100, episode=1067 reward=0.7822583 (507.17 it/sec) -training >> step=6398200, episode=1067 reward=0.7734963 (473.37 it/sec) -training >> step=6398300, episode=1067 reward=0.7814364 (488.09 it/sec) -training >> step=6398400, episode=1067 reward=0.8037951 (527.18 it/sec) -training >> step=6398500, episode=1067 reward=0.772175 (479.25 it/sec) -training >> step=6398600, episode=1067 reward=0.8049785 (462.21 it/sec) -training >> step=6398700, episode=1067 reward=0.774348 (486.20 it/sec) -training >> step=6398800, episode=1067 reward=0.7842289 (505.04 it/sec) -training >> step=6398900, episode=1067 reward=0.781833 (523.17 it/sec) -training >> step=6399000, episode=1067 reward=0.8050588 (463.31 it/sec) -training >> step=6399100, episode=1067 reward=0.7924199 (508.79 it/sec) -training >> step=6399200, episode=1067 reward=0.790179 (490.18 it/sec) -training >> step=6399300, episode=1067 reward=0.7878183 (468.18 it/sec) -training >> step=6399400, episode=1067 reward=0.7871395 (494.81 it/sec) -training >> step=6399500, episode=1067 reward=0.7900107 (488.83 it/sec) -training >> step=6399600, episode=1067 reward=0.7915257 (466.93 it/sec) -training >> step=6399700, episode=1067 reward=0.765009 (460.38 it/sec) -training >> step=6399800, episode=1067 reward=0.7633641 (477.83 it/sec) -training >> step=6399900, episode=1067 reward=0.780827 (496.39 it/sec) -training >> step=6400000, episode=1067 reward=0.7736362 (483.67 it/sec) -training >> step=6400100, episode=1067 reward=0.7726284 (468.18 it/sec) -training >> step=6400200, episode=1067 reward=0.7788551 (516.24 it/sec) -training >> step=6400300, episode=1067 reward=0.7677513 (495.57 it/sec) -training >> step=6400400, episode=1067 reward=0.7661839 (453.24 it/sec) -training >> step=6400500, episode=1067 reward=0.7816918 (489.45 it/sec) -training >> step=6400600, episode=1067 reward=0.7849736 (495.88 it/sec) -training >> step=6400700, episode=1067 reward=0.7858686 (498.33 it/sec) -training >> step=6400800, episode=1067 reward=0.7706398 (489.14 it/sec) -training >> step=6400900, episode=1067 reward=0.7770533 (534.38 it/sec) -training >> step=6401000, episode=1067 reward=0.7771767 (450.75 it/sec) -training >> step=6401100, episode=1067 reward=0.7742764 (493.69 it/sec) -training >> step=6401200, episode=1067 reward=0.7726025 (520.53 it/sec) -training >> step=6401300, episode=1068 reward=0.7712949 (113.28 it/sec) -training >> step=6401400, episode=1068 reward=0.7934163 (473.74 it/sec) -training >> step=6401500, episode=1068 reward=0.7925053 (477.77 it/sec) -training >> step=6401600, episode=1068 reward=0.7573623 (448.10 it/sec) -training >> step=6401700, episode=1068 reward=0.7897096 (452.02 it/sec) -training >> step=6401800, episode=1068 reward=0.7748353 (468.59 it/sec) -training >> step=6401900, episode=1068 reward=0.7790033 (495.30 it/sec) -training >> step=6402000, episode=1068 reward=0.7907916 (499.97 it/sec) -training >> step=6402100, episode=1068 reward=0.795847 (465.99 it/sec) -training >> step=6402200, episode=1068 reward=0.7989524 (462.14 it/sec) -training >> step=6402300, episode=1068 reward=0.797763 (450.41 it/sec) -training >> step=6402400, episode=1068 reward=0.8018332 (378.07 it/sec) -training >> step=6402500, episode=1068 reward=0.802978 (484.34 it/sec) -training >> step=6402600, episode=1068 reward=0.7771854 (489.15 it/sec) -training >> step=6402700, episode=1068 reward=0.7748213 (525.09 it/sec) -training >> step=6402800, episode=1068 reward=0.8071421 (472.08 it/sec) -training >> step=6402900, episode=1068 reward=0.7904309 (500.53 it/sec) -training >> step=6403000, episode=1068 reward=0.7788104 (523.84 it/sec) -training >> step=6403100, episode=1068 reward=0.7851163 (466.94 it/sec) -training >> step=6403200, episode=1068 reward=0.8127207 (434.69 it/sec) -training >> step=6403300, episode=1068 reward=0.7628649 (511.71 it/sec) -training >> step=6403400, episode=1068 reward=0.7585571 (520.63 it/sec) -training >> step=6403500, episode=1068 reward=0.7937564 (477.30 it/sec) -training >> step=6403600, episode=1068 reward=0.7841403 (487.78 it/sec) -training >> step=6403700, episode=1068 reward=0.7805927 (411.21 it/sec) -training >> step=6403800, episode=1068 reward=0.78253 (521.46 it/sec) -training >> step=6403900, episode=1068 reward=0.7843903 (512.85 it/sec) -training >> step=6404000, episode=1068 reward=0.7775256 (546.21 it/sec) -training >> step=6404100, episode=1068 reward=0.7907988 (499.66 it/sec) -training >> step=6404200, episode=1068 reward=0.7830333 (498.98 it/sec) -training >> step=6404300, episode=1068 reward=0.7850954 (526.45 it/sec) -training >> step=6404400, episode=1068 reward=0.7860422 (517.56 it/sec) -training >> step=6404500, episode=1068 reward=0.80257 (518.03 it/sec) -training >> step=6404600, episode=1068 reward=0.7916 (483.57 it/sec) -training >> step=6404700, episode=1068 reward=0.7771562 (537.98 it/sec) -training >> step=6404800, episode=1068 reward=0.7753231 (530.19 it/sec) -training >> step=6404900, episode=1068 reward=0.7795013 (524.93 it/sec) -training >> step=6405000, episode=1068 reward=0.7998478 (499.60 it/sec) -training >> step=6405100, episode=1068 reward=0.7706065 (550.55 it/sec) -training >> step=6405200, episode=1068 reward=0.7961318 (460.76 it/sec) -training >> step=6405300, episode=1068 reward=0.7983505 (454.68 it/sec) -training >> step=6405400, episode=1068 reward=0.784474 (529.39 it/sec) -training >> step=6405500, episode=1068 reward=0.7967538 (580.37 it/sec) -training >> step=6405600, episode=1068 reward=0.7748218 (531.07 it/sec) -training >> step=6405700, episode=1068 reward=0.7613696 (486.22 it/sec) -training >> step=6405800, episode=1068 reward=0.7631381 (536.11 it/sec) -training >> step=6405900, episode=1068 reward=0.7808164 (527.20 it/sec) -training >> step=6406000, episode=1068 reward=0.7947124 (524.65 it/sec) -training >> step=6406100, episode=1068 reward=0.7746453 (547.30 it/sec) -training >> step=6406200, episode=1068 reward=0.7818934 (507.72 it/sec) -training >> step=6406300, episode=1068 reward=0.7932211 (448.82 it/sec) -training >> step=6406400, episode=1068 reward=0.7748926 (492.41 it/sec) -training >> step=6406500, episode=1068 reward=0.7950592 (494.11 it/sec) -training >> step=6406600, episode=1068 reward=0.7751436 (505.11 it/sec) -training >> step=6406700, episode=1068 reward=0.7921188 (473.27 it/sec) -training >> step=6406800, episode=1068 reward=0.7660763 (436.85 it/sec) -training >> step=6406900, episode=1068 reward=0.7708635 (350.25 it/sec) -training >> step=6407000, episode=1068 reward=0.7794144 (428.07 it/sec) -training >> step=6407100, episode=1068 reward=0.7661718 (480.40 it/sec) -training >> step=6407200, episode=1068 reward=0.7775596 (354.15 it/sec) -training >> step=6407300, episode=1069 reward=0.7565815 (69.25 it/sec) -training >> step=6407400, episode=1069 reward=0.7940499 (473.26 it/sec) -training >> step=6407500, episode=1069 reward=0.7989178 (424.68 it/sec) -training >> step=6407600, episode=1069 reward=0.7956925 (478.63 it/sec) -training >> step=6407700, episode=1069 reward=0.7752314 (507.84 it/sec) -training >> step=6407800, episode=1069 reward=0.8090265 (469.85 it/sec) -training >> step=6407900, episode=1069 reward=0.7853838 (519.50 it/sec) -training >> step=6408000, episode=1069 reward=0.7903354 (438.91 it/sec) -training >> step=6408100, episode=1069 reward=0.7738734 (490.85 it/sec) -training >> step=6408200, episode=1069 reward=0.7975543 (484.37 it/sec) -training >> step=6408300, episode=1069 reward=0.7968737 (513.67 it/sec) -training >> step=6408400, episode=1069 reward=0.7691291 (413.28 it/sec) -training >> step=6408500, episode=1069 reward=0.7997131 (466.97 it/sec) -training >> step=6408600, episode=1069 reward=0.7892758 (467.85 it/sec) -training >> step=6408700, episode=1069 reward=0.8017429 (476.19 it/sec) -training >> step=6408800, episode=1069 reward=0.7895123 (503.80 it/sec) -training >> step=6408900, episode=1069 reward=0.7934147 (451.81 it/sec) -training >> step=6409000, episode=1069 reward=0.8082756 (439.71 it/sec) -training >> step=6409100, episode=1069 reward=0.7813526 (437.20 it/sec) -training >> step=6409200, episode=1069 reward=0.8080637 (541.53 it/sec) -training >> step=6409300, episode=1069 reward=0.800235 (489.13 it/sec) -training >> step=6409400, episode=1069 reward=0.774751 (478.95 it/sec) -training >> step=6409500, episode=1069 reward=0.7778431 (499.17 it/sec) -training >> step=6409600, episode=1069 reward=0.7831203 (490.25 it/sec) -training >> step=6409700, episode=1069 reward=0.8108185 (504.01 it/sec) -training >> step=6409800, episode=1069 reward=0.769546 (472.47 it/sec) -training >> step=6409900, episode=1069 reward=0.8011401 (505.54 it/sec) -training >> step=6410000, episode=1069 reward=0.7919806 (471.31 it/sec) -training >> step=6410100, episode=1069 reward=0.7851758 (498.65 it/sec) -training >> step=6410200, episode=1069 reward=0.7760797 (507.17 it/sec) -training >> step=6410300, episode=1069 reward=0.8136315 (504.51 it/sec) -training >> step=6410400, episode=1069 reward=0.8048578 (498.84 it/sec) -training >> step=6410500, episode=1069 reward=0.79237 (498.70 it/sec) -training >> step=6410600, episode=1069 reward=0.7704839 (483.74 it/sec) -training >> step=6410700, episode=1069 reward=0.789911 (511.62 it/sec) -training >> step=6410800, episode=1069 reward=0.7837867 (518.21 it/sec) -training >> step=6410900, episode=1069 reward=0.7909074 (486.54 it/sec) -training >> step=6411000, episode=1069 reward=0.7983397 (484.97 it/sec) -training >> step=6411100, episode=1069 reward=0.7803501 (458.82 it/sec) -training >> step=6411200, episode=1069 reward=0.7690131 (503.13 it/sec) -training >> step=6411300, episode=1069 reward=0.7917747 (471.29 it/sec) -training >> step=6411400, episode=1069 reward=0.780512 (542.38 it/sec) -training >> step=6411500, episode=1069 reward=0.7928603 (457.84 it/sec) -training >> step=6411600, episode=1069 reward=0.7838834 (475.94 it/sec) -training >> step=6411700, episode=1069 reward=0.7739542 (528.34 it/sec) -training >> step=6411800, episode=1069 reward=0.790316 (522.23 it/sec) -training >> step=6411900, episode=1069 reward=0.7670268 (502.90 it/sec) -training >> step=6412000, episode=1069 reward=0.7823676 (470.43 it/sec) -training >> step=6412100, episode=1069 reward=0.7818897 (503.15 it/sec) -training >> step=6412200, episode=1069 reward=0.7694362 (472.50 it/sec) -training >> step=6412300, episode=1069 reward=0.7608131 (517.20 it/sec) -training >> step=6412400, episode=1069 reward=0.7923464 (482.84 it/sec) -training >> step=6412500, episode=1069 reward=0.7753933 (493.72 it/sec) -training >> step=6412600, episode=1069 reward=0.7557311 (503.77 it/sec) -training >> step=6412700, episode=1069 reward=0.7633181 (468.83 it/sec) -training >> step=6412800, episode=1069 reward=0.7640163 (476.93 it/sec) -training >> step=6412900, episode=1069 reward=0.7665958 (485.20 it/sec) -training >> step=6413000, episode=1069 reward=0.7839902 (487.51 it/sec) -training >> step=6413100, episode=1069 reward=0.7715355 (484.79 it/sec) -training >> step=6413200, episode=1069 reward=0.7850029 (499.66 it/sec) -training >> step=6413300, episode=1070 reward=0.7781537 (129.77 it/sec) -training >> step=6413400, episode=1070 reward=0.7958606 (484.47 it/sec) -training >> step=6413500, episode=1070 reward=0.7966903 (476.83 it/sec) -training >> step=6413600, episode=1070 reward=0.7864642 (501.30 it/sec) -training >> step=6413700, episode=1070 reward=0.768641 (462.87 it/sec) -training >> step=6413800, episode=1070 reward=0.7876327 (502.90 it/sec) -training >> step=6413900, episode=1070 reward=0.7771924 (454.62 it/sec) -training >> step=6414000, episode=1070 reward=0.7803304 (500.50 it/sec) -training >> step=6414100, episode=1070 reward=0.7874519 (516.26 it/sec) -training >> step=6414200, episode=1070 reward=0.7960846 (503.55 it/sec) -training >> step=6414300, episode=1070 reward=0.7949622 (457.97 it/sec) -training >> step=6414400, episode=1070 reward=0.7595122 (414.10 it/sec) -training >> step=6414500, episode=1070 reward=0.8053666 (498.29 it/sec) -training >> step=6414600, episode=1070 reward=0.785247 (498.22 it/sec) -training >> step=6414700, episode=1070 reward=0.7655309 (329.31 it/sec) -training >> step=6414800, episode=1070 reward=0.7797074 (497.77 it/sec) -training >> step=6414900, episode=1070 reward=0.7890851 (499.38 it/sec) -training >> step=6415000, episode=1070 reward=0.7697648 (520.65 it/sec) -training >> step=6415100, episode=1070 reward=0.7880282 (453.62 it/sec) -training >> step=6415200, episode=1070 reward=0.7850335 (431.83 it/sec) -training >> step=6415300, episode=1070 reward=0.7934508 (454.29 it/sec) -training >> step=6415400, episode=1070 reward=0.7635333 (517.40 it/sec) -training >> step=6415500, episode=1070 reward=0.7639138 (458.67 it/sec) -training >> step=6415600, episode=1070 reward=0.7838707 (450.71 it/sec) -training >> step=6415700, episode=1070 reward=0.804616 (447.87 it/sec) -training >> step=6415800, episode=1070 reward=0.7976567 (476.67 it/sec) -training >> step=6415900, episode=1070 reward=0.7807865 (456.88 it/sec) -training >> step=6416000, episode=1070 reward=0.793247 (509.85 it/sec) -training >> step=6416100, episode=1070 reward=0.7876214 (502.28 it/sec) -training >> step=6416200, episode=1070 reward=0.7790654 (464.98 it/sec) -training >> step=6416300, episode=1070 reward=0.7859756 (477.29 it/sec) -training >> step=6416400, episode=1070 reward=0.7924094 (468.17 it/sec) -training >> step=6416500, episode=1070 reward=0.790458 (521.66 it/sec) -training >> step=6416600, episode=1070 reward=0.7891818 (481.16 it/sec) -training >> step=6416700, episode=1070 reward=0.7890067 (481.52 it/sec) -training >> step=6416800, episode=1070 reward=0.7690681 (517.70 it/sec) -training >> step=6416900, episode=1070 reward=0.7933891 (514.38 it/sec) -training >> step=6417000, episode=1070 reward=0.7970355 (505.58 it/sec) -training >> step=6417100, episode=1070 reward=0.8016602 (456.59 it/sec) -training >> step=6417200, episode=1070 reward=0.7823789 (514.67 it/sec) -training >> step=6417300, episode=1070 reward=0.7881871 (497.51 it/sec) -training >> step=6417400, episode=1070 reward=0.7697035 (495.17 it/sec) -training >> step=6417500, episode=1070 reward=0.785004 (521.88 it/sec) -training >> step=6417600, episode=1070 reward=0.8059818 (471.60 it/sec) -training >> step=6417700, episode=1070 reward=0.780553 (494.48 it/sec) -training >> step=6417800, episode=1070 reward=0.7749947 (476.12 it/sec) -training >> step=6417900, episode=1070 reward=0.7820185 (525.45 it/sec) -training >> step=6418000, episode=1070 reward=0.7635607 (491.99 it/sec) -training >> step=6418100, episode=1070 reward=0.7683411 (496.39 it/sec) -training >> step=6418200, episode=1070 reward=0.8002141 (499.45 it/sec) -training >> step=6418300, episode=1070 reward=0.7886834 (522.58 it/sec) -training >> step=6418400, episode=1070 reward=0.7706676 (506.12 it/sec) -training >> step=6418500, episode=1070 reward=0.7772883 (529.81 it/sec) -training >> step=6418600, episode=1070 reward=0.7679437 (506.23 it/sec) -training >> step=6418700, episode=1070 reward=0.7596962 (493.69 it/sec) -training >> step=6418800, episode=1070 reward=0.8027614 (496.21 it/sec) -training >> step=6418900, episode=1070 reward=0.762495 (487.72 it/sec) -training >> step=6419000, episode=1070 reward=0.7705818 (495.17 it/sec) -training >> step=6419100, episode=1070 reward=0.7772765 (507.42 it/sec) -training >> step=6419200, episode=1070 reward=0.7610656 (473.70 it/sec) -training >> step=6419300, episode=1071 reward=0.7808815 (112.90 it/sec) -training >> step=6419400, episode=1071 reward=0.7880235 (498.09 it/sec) -training >> step=6419500, episode=1071 reward=0.7818478 (496.28 it/sec) -training >> step=6419600, episode=1071 reward=0.7678233 (509.48 it/sec) -training >> step=6419700, episode=1071 reward=0.7799569 (533.21 it/sec) -training >> step=6419800, episode=1071 reward=0.7902433 (453.42 it/sec) -training >> step=6419900, episode=1071 reward=0.778941 (467.56 it/sec) -training >> step=6420000, episode=1071 reward=0.7861104 (499.20 it/sec) -training >> step=6420100, episode=1071 reward=0.7851745 (543.40 it/sec) -training >> step=6420200, episode=1071 reward=0.7911679 (523.00 it/sec) -training >> step=6420300, episode=1071 reward=0.7748764 (507.79 it/sec) -training >> step=6420400, episode=1071 reward=0.7904679 (465.01 it/sec) -training >> step=6420500, episode=1071 reward=0.7865396 (485.86 it/sec) -training >> step=6420600, episode=1071 reward=0.7919614 (495.87 it/sec) -training >> step=6420700, episode=1071 reward=0.7883927 (507.18 it/sec) -training >> step=6420800, episode=1071 reward=0.7769142 (364.48 it/sec) -training >> step=6420900, episode=1071 reward=0.8009906 (494.46 it/sec) -training >> step=6421000, episode=1071 reward=0.7875953 (459.30 it/sec) -training >> step=6421100, episode=1071 reward=0.7780613 (491.67 it/sec) -training >> step=6421200, episode=1071 reward=0.7779431 (485.92 it/sec) -training >> step=6421300, episode=1071 reward=0.7856108 (470.80 it/sec) -training >> step=6421400, episode=1071 reward=0.7888958 (486.36 it/sec) -training >> step=6421500, episode=1071 reward=0.7778603 (530.30 it/sec) -training >> step=6421600, episode=1071 reward=0.825489 (469.99 it/sec) -training >> step=6421700, episode=1071 reward=0.8062577 (492.59 it/sec) -training >> step=6421800, episode=1071 reward=0.7922763 (450.04 it/sec) -training >> step=6421900, episode=1071 reward=0.7825347 (386.98 it/sec) -training >> step=6422000, episode=1071 reward=0.7701389 (384.47 it/sec) -training >> step=6422100, episode=1071 reward=0.7785346 (409.49 it/sec) -training >> step=6422200, episode=1071 reward=0.7996966 (475.00 it/sec) -training >> step=6422300, episode=1071 reward=0.799691 (476.16 it/sec) -training >> step=6422400, episode=1071 reward=0.787396 (432.17 it/sec) -training >> step=6422500, episode=1071 reward=0.7982188 (427.97 it/sec) -training >> step=6422600, episode=1071 reward=0.7835105 (441.56 it/sec) -training >> step=6422700, episode=1071 reward=0.768472 (471.15 it/sec) -training >> step=6422800, episode=1071 reward=0.7759417 (465.78 it/sec) -training >> step=6422900, episode=1071 reward=0.7896279 (504.38 it/sec) -training >> step=6423000, episode=1071 reward=0.7702717 (502.89 it/sec) -training >> step=6423100, episode=1071 reward=0.7843195 (493.52 it/sec) -training >> step=6423200, episode=1071 reward=0.7839808 (495.19 it/sec) -training >> step=6423300, episode=1071 reward=0.7588088 (470.33 it/sec) -training >> step=6423400, episode=1071 reward=0.7811128 (495.08 it/sec) -training >> step=6423500, episode=1071 reward=0.7995558 (520.14 it/sec) -training >> step=6423600, episode=1071 reward=0.7727656 (506.59 it/sec) -training >> step=6423700, episode=1071 reward=0.7892455 (520.49 it/sec) -training >> step=6423800, episode=1071 reward=0.7883974 (479.51 it/sec) -training >> step=6423900, episode=1071 reward=0.7801283 (478.00 it/sec) -training >> step=6424000, episode=1071 reward=0.7820889 (517.01 it/sec) -training >> step=6424100, episode=1071 reward=0.7921117 (509.93 it/sec) -training >> step=6424200, episode=1071 reward=0.7697856 (505.49 it/sec) -training >> step=6424300, episode=1071 reward=0.7911763 (418.58 it/sec) -training >> step=6424400, episode=1071 reward=0.7747606 (470.59 it/sec) -training >> step=6424500, episode=1071 reward=0.7646827 (455.48 it/sec) -training >> step=6424600, episode=1071 reward=0.7927645 (441.36 it/sec) -training >> step=6424700, episode=1071 reward=0.7686821 (505.04 it/sec) -training >> step=6424800, episode=1071 reward=0.7615914 (470.47 it/sec) -training >> step=6424900, episode=1071 reward=0.7841126 (476.67 it/sec) -training >> step=6425000, episode=1071 reward=0.8203679 (481.66 it/sec) -training >> step=6425100, episode=1071 reward=0.7969016 (526.99 it/sec) -training >> step=6425200, episode=1071 reward=0.7884801 (488.90 it/sec) -training >> step=6425300, episode=1072 reward=0.7681521 (121.82 it/sec) -training >> step=6425400, episode=1072 reward=0.789964 (518.92 it/sec) -training >> step=6425500, episode=1072 reward=0.7720428 (487.06 it/sec) -training >> step=6425600, episode=1072 reward=0.7971206 (474.43 it/sec) -training >> step=6425700, episode=1072 reward=0.773847 (463.34 it/sec) -training >> step=6425800, episode=1072 reward=0.8035232 (504.14 it/sec) -training >> step=6425900, episode=1072 reward=0.7956063 (465.68 it/sec) -training >> step=6426000, episode=1072 reward=0.7769192 (502.71 it/sec) -training >> step=6426100, episode=1072 reward=0.773011 (487.89 it/sec) -training >> step=6426200, episode=1072 reward=0.7759051 (498.71 it/sec) -training >> step=6426300, episode=1072 reward=0.7741987 (508.00 it/sec) -training >> step=6426400, episode=1072 reward=0.7744403 (437.74 it/sec) -training >> step=6426500, episode=1072 reward=0.801445 (451.86 it/sec) -training >> step=6426600, episode=1072 reward=0.7788537 (466.82 it/sec) -training >> step=6426700, episode=1072 reward=0.7665933 (499.68 it/sec) -training >> step=6426800, episode=1072 reward=0.8011981 (455.99 it/sec) -training >> step=6426900, episode=1072 reward=0.7959243 (518.98 it/sec) -training >> step=6427000, episode=1072 reward=0.8016154 (484.72 it/sec) -training >> step=6427100, episode=1072 reward=0.7894388 (334.32 it/sec) -training >> step=6427200, episode=1072 reward=0.7966374 (502.00 it/sec) -training >> step=6427300, episode=1072 reward=0.7897368 (435.93 it/sec) -training >> step=6427400, episode=1072 reward=0.8189972 (471.71 it/sec) -training >> step=6427500, episode=1072 reward=0.7964684 (455.70 it/sec) -training >> step=6427600, episode=1072 reward=0.7662324 (467.44 it/sec) -training >> step=6427700, episode=1072 reward=0.7932239 (437.61 it/sec) -training >> step=6427800, episode=1072 reward=0.7904548 (471.13 it/sec) -training >> step=6427900, episode=1072 reward=0.7903054 (461.90 it/sec) -training >> step=6428000, episode=1072 reward=0.7595262 (514.76 it/sec) -training >> step=6428100, episode=1072 reward=0.7868663 (397.00 it/sec) -training >> step=6428200, episode=1072 reward=0.7911947 (346.06 it/sec) -training >> step=6428300, episode=1072 reward=0.774987 (446.69 it/sec) -training >> step=6428400, episode=1072 reward=0.8128352 (485.36 it/sec) -training >> step=6428500, episode=1072 reward=0.7874706 (515.01 it/sec) -training >> step=6428600, episode=1072 reward=0.7824732 (519.22 it/sec) -training >> step=6428700, episode=1072 reward=0.7834672 (530.18 it/sec) -training >> step=6428800, episode=1072 reward=0.7727231 (450.43 it/sec) -training >> step=6428900, episode=1072 reward=0.7902144 (408.38 it/sec) -training >> step=6429000, episode=1072 reward=0.7719221 (467.64 it/sec) -training >> step=6429100, episode=1072 reward=0.784849 (471.76 it/sec) -training >> step=6429200, episode=1072 reward=0.790093 (493.08 it/sec) -training >> step=6429300, episode=1072 reward=0.7725495 (445.08 it/sec) -training >> step=6429400, episode=1072 reward=0.7809092 (508.92 it/sec) -training >> step=6429500, episode=1072 reward=0.7801127 (488.14 it/sec) -training >> step=6429600, episode=1072 reward=0.7797086 (515.07 it/sec) -training >> step=6429700, episode=1072 reward=0.7642682 (478.37 it/sec) -training >> step=6429800, episode=1072 reward=0.7855524 (516.53 it/sec) -training >> step=6429900, episode=1072 reward=0.7575051 (510.65 it/sec) -training >> step=6430000, episode=1072 reward=0.7971338 (521.14 it/sec) -training >> step=6430100, episode=1072 reward=0.7850012 (506.57 it/sec) -training >> step=6430200, episode=1072 reward=0.7748488 (524.63 it/sec) -training >> step=6430300, episode=1072 reward=0.7700894 (502.61 it/sec) -training >> step=6430400, episode=1072 reward=0.7879232 (503.07 it/sec) -training >> step=6430500, episode=1072 reward=0.7921701 (557.34 it/sec) -training >> step=6430600, episode=1072 reward=0.7850394 (530.23 it/sec) -training >> step=6430700, episode=1072 reward=0.7553377 (543.01 it/sec) -training >> step=6430800, episode=1072 reward=0.7734665 (512.91 it/sec) -training >> step=6430900, episode=1072 reward=0.7701742 (486.69 it/sec) -training >> step=6431000, episode=1072 reward=0.7673485 (530.69 it/sec) -training >> step=6431100, episode=1072 reward=0.7769917 (531.48 it/sec) -training >> step=6431200, episode=1072 reward=0.7782142 (506.28 it/sec) -training >> step=6431300, episode=1073 reward=0.7917328 (131.21 it/sec) -training >> step=6431400, episode=1073 reward=0.7925991 (491.29 it/sec) -training >> step=6431500, episode=1073 reward=0.7738225 (518.20 it/sec) -training >> step=6431600, episode=1073 reward=0.7620074 (522.70 it/sec) -training >> step=6431700, episode=1073 reward=0.7786452 (483.47 it/sec) -training >> step=6431800, episode=1073 reward=0.7787192 (492.03 it/sec) -training >> step=6431900, episode=1073 reward=0.7858469 (509.32 it/sec) -training >> step=6432000, episode=1073 reward=0.7785724 (500.39 it/sec) -training >> step=6432100, episode=1073 reward=0.7960417 (504.88 it/sec) -training >> step=6432200, episode=1073 reward=0.7960569 (505.87 it/sec) -training >> step=6432300, episode=1073 reward=0.8052488 (517.52 it/sec) -training >> step=6432400, episode=1073 reward=0.7995344 (486.43 it/sec) -training >> step=6432500, episode=1073 reward=0.793039 (501.37 it/sec) -training >> step=6432600, episode=1073 reward=0.7906435 (502.39 it/sec) -training >> step=6432700, episode=1073 reward=0.7839573 (496.66 it/sec) -training >> step=6432800, episode=1073 reward=0.7740142 (516.04 it/sec) -training >> step=6432900, episode=1073 reward=0.7839302 (513.00 it/sec) -training >> step=6433000, episode=1073 reward=0.7933131 (496.69 it/sec) -training >> step=6433100, episode=1073 reward=0.8089557 (487.59 it/sec) -training >> step=6433200, episode=1073 reward=0.8004908 (479.93 it/sec) -training >> step=6433300, episode=1073 reward=0.7801306 (359.53 it/sec) -training >> step=6433400, episode=1073 reward=0.7846327 (485.47 it/sec) -training >> step=6433500, episode=1073 reward=0.792034 (495.63 it/sec) -training >> step=6433600, episode=1073 reward=0.7905009 (447.29 it/sec) -training >> step=6433700, episode=1073 reward=0.794521 (467.78 it/sec) -training >> step=6433800, episode=1073 reward=0.7807595 (481.60 it/sec) -training >> step=6433900, episode=1073 reward=0.7804343 (498.56 it/sec) -training >> step=6434000, episode=1073 reward=0.7793894 (472.83 it/sec) -training >> step=6434100, episode=1073 reward=0.7678002 (491.06 it/sec) -training >> step=6434200, episode=1073 reward=0.7697675 (478.67 it/sec) -training >> step=6434300, episode=1073 reward=0.7674448 (500.07 it/sec) -training >> step=6434400, episode=1073 reward=0.7766755 (505.62 it/sec) -training >> step=6434500, episode=1073 reward=0.8091611 (505.91 it/sec) -training >> step=6434600, episode=1073 reward=0.7693026 (498.97 it/sec) -training >> step=6434700, episode=1073 reward=0.7695013 (466.13 it/sec) -training >> step=6434800, episode=1073 reward=0.7972495 (508.56 it/sec) -training >> step=6434900, episode=1073 reward=0.796451 (482.03 it/sec) -training >> step=6435000, episode=1073 reward=0.7674981 (473.56 it/sec) -training >> step=6435100, episode=1073 reward=0.7786517 (520.56 it/sec) -training >> step=6435200, episode=1073 reward=0.7998994 (526.79 it/sec) -training >> step=6435300, episode=1073 reward=0.7974702 (522.70 it/sec) -training >> step=6435400, episode=1073 reward=0.7880457 (486.74 it/sec) -training >> step=6435500, episode=1073 reward=0.7849566 (507.48 it/sec) -training >> step=6435600, episode=1073 reward=0.7925404 (496.00 it/sec) -training >> step=6435700, episode=1073 reward=0.7928516 (495.06 it/sec) -training >> step=6435800, episode=1073 reward=0.7609499 (482.37 it/sec) -training >> step=6435900, episode=1073 reward=0.7800399 (532.72 it/sec) -training >> step=6436000, episode=1073 reward=0.7758856 (499.90 it/sec) -training >> step=6436100, episode=1073 reward=0.7724923 (467.77 it/sec) -training >> step=6436200, episode=1073 reward=0.784633 (487.08 it/sec) -training >> step=6436300, episode=1073 reward=0.7921836 (502.94 it/sec) -training >> step=6436400, episode=1073 reward=0.7818611 (505.70 it/sec) -training >> step=6436500, episode=1073 reward=0.7745642 (459.42 it/sec) -training >> step=6436600, episode=1073 reward=0.7729422 (500.86 it/sec) -training >> step=6436700, episode=1073 reward=0.7771112 (456.46 it/sec) -training >> step=6436800, episode=1073 reward=0.7662884 (430.96 it/sec) -training >> step=6436900, episode=1073 reward=0.7896998 (489.03 it/sec) -training >> step=6437000, episode=1073 reward=0.7878588 (526.01 it/sec) -training >> step=6437100, episode=1073 reward=0.7840118 (457.30 it/sec) -training >> step=6437200, episode=1073 reward=0.7810401 (491.30 it/sec) -training >> step=6437300, episode=1074 reward=0.7781838 (68.72 it/sec) -training >> step=6437400, episode=1074 reward=0.7718326 (469.79 it/sec) -training >> step=6437500, episode=1074 reward=0.7806248 (393.28 it/sec) -training >> step=6437600, episode=1074 reward=0.7780272 (456.16 it/sec) -training >> step=6437700, episode=1074 reward=0.7978606 (447.72 it/sec) -training >> step=6437800, episode=1074 reward=0.7926135 (522.89 it/sec) -training >> step=6437900, episode=1074 reward=0.7894914 (570.20 it/sec) -training >> step=6438000, episode=1074 reward=0.7724141 (471.25 it/sec) -training >> step=6438100, episode=1074 reward=0.7805265 (467.17 it/sec) -training >> step=6438200, episode=1074 reward=0.7712169 (516.48 it/sec) -training >> step=6438300, episode=1074 reward=0.7949086 (537.08 it/sec) -training >> step=6438400, episode=1074 reward=0.7942536 (472.55 it/sec) -training >> step=6438500, episode=1074 reward=0.7850439 (536.66 it/sec) -training >> step=6438600, episode=1074 reward=0.7634459 (476.93 it/sec) -training >> step=6438700, episode=1074 reward=0.7825841 (541.27 it/sec) -training >> step=6438800, episode=1074 reward=0.8005085 (507.88 it/sec) -training >> step=6438900, episode=1074 reward=0.7640097 (523.49 it/sec) -training >> step=6439000, episode=1074 reward=0.7854252 (531.56 it/sec) -training >> step=6439100, episode=1074 reward=0.7842365 (463.68 it/sec) -training >> step=6439200, episode=1074 reward=0.78262 (465.12 it/sec) -training >> step=6439300, episode=1074 reward=0.7768125 (504.61 it/sec) -training >> step=6439400, episode=1074 reward=0.7798664 (525.24 it/sec) -training >> step=6439500, episode=1074 reward=0.7888215 (559.16 it/sec) -training >> step=6439600, episode=1074 reward=0.7942516 (341.73 it/sec) -training >> step=6439700, episode=1074 reward=0.7868721 (550.45 it/sec) -training >> step=6439800, episode=1074 reward=0.7911297 (471.79 it/sec) -training >> step=6439900, episode=1074 reward=0.8015237 (542.93 it/sec) -training >> step=6440000, episode=1074 reward=0.791583 (532.44 it/sec) -training >> step=6440100, episode=1074 reward=0.7879996 (484.49 it/sec) -training >> step=6440200, episode=1074 reward=0.7887611 (550.72 it/sec) -training >> step=6440300, episode=1074 reward=0.7913587 (491.97 it/sec) -training >> step=6440400, episode=1074 reward=0.7646819 (547.90 it/sec) -training >> step=6440500, episode=1074 reward=0.7912413 (508.74 it/sec) -training >> step=6440600, episode=1074 reward=0.7927011 (536.10 it/sec) -training >> step=6440700, episode=1074 reward=0.7763484 (484.57 it/sec) -training >> step=6440800, episode=1074 reward=0.8000451 (570.25 it/sec) -training >> step=6440900, episode=1074 reward=0.7895969 (507.47 it/sec) -training >> step=6441000, episode=1074 reward=0.783388 (548.90 it/sec) -training >> step=6441100, episode=1074 reward=0.7934891 (517.87 it/sec) -training >> step=6441200, episode=1074 reward=0.8025676 (476.99 it/sec) -training >> step=6441300, episode=1074 reward=0.7883901 (532.91 it/sec) -training >> step=6441400, episode=1074 reward=0.7526851 (462.85 it/sec) -training >> step=6441500, episode=1074 reward=0.7696856 (521.63 it/sec) -training >> step=6441600, episode=1074 reward=0.8008804 (548.65 it/sec) -training >> step=6441700, episode=1074 reward=0.7726294 (534.30 it/sec) -training >> step=6441800, episode=1074 reward=0.7799707 (517.43 it/sec) -training >> step=6441900, episode=1074 reward=0.791813 (507.90 it/sec) -training >> step=6442000, episode=1074 reward=0.8075156 (542.91 it/sec) -training >> step=6442100, episode=1074 reward=0.7783743 (533.44 it/sec) -training >> step=6442200, episode=1074 reward=0.7717692 (464.93 it/sec) -training >> step=6442300, episode=1074 reward=0.7864294 (466.12 it/sec) -training >> step=6442400, episode=1074 reward=0.795495 (546.75 it/sec) -training >> step=6442500, episode=1074 reward=0.7876181 (498.14 it/sec) -training >> step=6442600, episode=1074 reward=0.7986501 (529.47 it/sec) -training >> step=6442700, episode=1074 reward=0.7711856 (536.57 it/sec) -training >> step=6442800, episode=1074 reward=0.7830763 (511.10 it/sec) -training >> step=6442900, episode=1074 reward=0.7682375 (543.44 it/sec) -training >> step=6443000, episode=1074 reward=0.7918693 (506.19 it/sec) -training >> step=6443100, episode=1074 reward=0.7788824 (504.62 it/sec) -training >> step=6443200, episode=1074 reward=0.7664497 (489.25 it/sec) -training >> step=6443300, episode=1075 reward=0.7924537 (101.84 it/sec) -training >> step=6443400, episode=1075 reward=0.7804024 (396.70 it/sec) -training >> step=6443500, episode=1075 reward=0.7739142 (473.82 it/sec) -training >> step=6443600, episode=1075 reward=0.7896352 (512.05 it/sec) -training >> step=6443700, episode=1075 reward=0.7880339 (469.42 it/sec) -training >> step=6443800, episode=1075 reward=0.7800696 (528.52 it/sec) -training >> step=6443900, episode=1075 reward=0.7877285 (495.22 it/sec) -training >> step=6444000, episode=1075 reward=0.7670272 (500.00 it/sec) -training >> step=6444100, episode=1075 reward=0.7814068 (480.39 it/sec) -training >> step=6444200, episode=1075 reward=0.7864089 (471.81 it/sec) -training >> step=6444300, episode=1075 reward=0.7668771 (534.72 it/sec) -training >> step=6444400, episode=1075 reward=0.781277 (474.90 it/sec) -training >> step=6444500, episode=1075 reward=0.8025387 (515.16 it/sec) -training >> step=6444600, episode=1075 reward=0.7940727 (535.56 it/sec) -training >> step=6444700, episode=1075 reward=0.8059039 (497.09 it/sec) -training >> step=6444800, episode=1075 reward=0.7724423 (506.55 it/sec) -training >> step=6444900, episode=1075 reward=0.7908322 (509.71 it/sec) -training >> step=6445000, episode=1075 reward=0.7840164 (520.78 it/sec) -training >> step=6445100, episode=1075 reward=0.7972223 (523.76 it/sec) -training >> step=6445200, episode=1075 reward=0.7782211 (523.47 it/sec) -training >> step=6445300, episode=1075 reward=0.7795763 (517.37 it/sec) -training >> step=6445400, episode=1075 reward=0.7845793 (473.98 it/sec) -training >> step=6445500, episode=1075 reward=0.8038832 (427.05 it/sec) -training >> step=6445600, episode=1075 reward=0.7810034 (359.70 it/sec) -training >> step=6445700, episode=1075 reward=0.7930707 (522.77 it/sec) -training >> step=6445800, episode=1075 reward=0.7919359 (514.35 it/sec) -training >> step=6445900, episode=1075 reward=0.7882249 (513.96 it/sec) -training >> step=6446000, episode=1075 reward=0.7925121 (548.66 it/sec) -training >> step=6446100, episode=1075 reward=0.7763209 (541.78 it/sec) -training >> step=6446200, episode=1075 reward=0.7831515 (547.88 it/sec) -training >> step=6446300, episode=1075 reward=0.7981393 (531.75 it/sec) -training >> step=6446400, episode=1075 reward=0.7837206 (501.23 it/sec) -training >> step=6446500, episode=1075 reward=0.7924947 (495.77 it/sec) -training >> step=6446600, episode=1075 reward=0.7843289 (457.79 it/sec) -training >> step=6446700, episode=1075 reward=0.7944558 (554.07 it/sec) -training >> step=6446800, episode=1075 reward=0.7695964 (498.62 it/sec) -training >> step=6446900, episode=1075 reward=0.7903524 (516.65 it/sec) -training >> step=6447000, episode=1075 reward=0.7895474 (489.93 it/sec) -training >> step=6447100, episode=1075 reward=0.7856786 (562.25 it/sec) -training >> step=6447200, episode=1075 reward=0.7810302 (509.94 it/sec) -training >> step=6447300, episode=1075 reward=0.8008595 (520.61 it/sec) -training >> step=6447400, episode=1075 reward=0.7937483 (545.90 it/sec) -training >> step=6447500, episode=1075 reward=0.7735479 (523.13 it/sec) -training >> step=6447600, episode=1075 reward=0.7650472 (493.05 it/sec) -training >> step=6447700, episode=1075 reward=0.7749928 (485.64 it/sec) -training >> step=6447800, episode=1075 reward=0.7929227 (486.75 it/sec) -training >> step=6447900, episode=1075 reward=0.7632498 (485.12 it/sec) -training >> step=6448000, episode=1075 reward=0.7853118 (490.01 it/sec) -training >> step=6448100, episode=1075 reward=0.7805607 (438.09 it/sec) -training >> step=6448200, episode=1075 reward=0.8023903 (495.48 it/sec) -training >> step=6448300, episode=1075 reward=0.7752007 (443.87 it/sec) -training >> step=6448400, episode=1075 reward=0.7716766 (463.97 it/sec) -training >> step=6448500, episode=1075 reward=0.7659842 (464.23 it/sec) -training >> step=6448600, episode=1075 reward=0.776056 (489.84 it/sec) -training >> step=6448700, episode=1075 reward=0.7889146 (453.34 it/sec) -training >> step=6448800, episode=1075 reward=0.8034953 (472.80 it/sec) -training >> step=6448900, episode=1075 reward=0.7753307 (480.97 it/sec) -training >> step=6449000, episode=1075 reward=0.7909241 (450.24 it/sec) -training >> step=6449100, episode=1075 reward=0.785767 (465.45 it/sec) -training >> step=6449200, episode=1075 reward=0.7747425 (499.81 it/sec) -training >> step=6449300, episode=1076 reward=0.7874494 (108.41 it/sec) -training >> step=6449400, episode=1076 reward=0.7946642 (343.42 it/sec) -training >> step=6449500, episode=1076 reward=0.7854994 (417.48 it/sec) -training >> step=6449600, episode=1076 reward=0.775388 (483.54 it/sec) -training >> step=6449700, episode=1076 reward=0.7874796 (471.40 it/sec) -training >> step=6449800, episode=1076 reward=0.7820832 (454.12 it/sec) -training >> step=6449900, episode=1076 reward=0.7920341 (464.80 it/sec) -training >> step=6450000, episode=1076 reward=0.790867 (495.07 it/sec) -training >> step=6450100, episode=1076 reward=0.7975025 (478.88 it/sec) -training >> step=6450200, episode=1076 reward=0.7880536 (469.97 it/sec) -training >> step=6450300, episode=1076 reward=0.7968578 (498.42 it/sec) -training >> step=6450400, episode=1076 reward=0.7630398 (456.54 it/sec) -training >> step=6450500, episode=1076 reward=0.7927739 (421.61 it/sec) -training >> step=6450600, episode=1076 reward=0.7585499 (484.28 it/sec) -training >> step=6450700, episode=1076 reward=0.7811626 (492.93 it/sec) -training >> step=6450800, episode=1076 reward=0.7852379 (480.19 it/sec) -training >> step=6450900, episode=1076 reward=0.783118 (498.40 it/sec) -training >> step=6451000, episode=1076 reward=0.7630486 (426.01 it/sec) -training >> step=6451100, episode=1076 reward=0.7806817 (479.47 it/sec) -training >> step=6451200, episode=1076 reward=0.7815599 (476.16 it/sec) -training >> step=6451300, episode=1076 reward=0.7769494 (494.46 it/sec) -training >> step=6451400, episode=1076 reward=0.7834646 (497.47 it/sec) -training >> step=6451500, episode=1076 reward=0.7930672 (439.37 it/sec) -training >> step=6451600, episode=1076 reward=0.7931017 (497.71 it/sec) -training >> step=6451700, episode=1076 reward=0.7886952 (362.84 it/sec) -training >> step=6451800, episode=1076 reward=0.7941256 (523.50 it/sec) -training >> step=6451900, episode=1076 reward=0.772094 (513.38 it/sec) -training >> step=6452000, episode=1076 reward=0.786659 (514.47 it/sec) -training >> step=6452100, episode=1076 reward=0.779712 (501.42 it/sec) -training >> step=6452200, episode=1076 reward=0.7936594 (527.07 it/sec) -training >> step=6452300, episode=1076 reward=0.7719765 (490.05 it/sec) -training >> step=6452400, episode=1076 reward=0.7763726 (451.89 it/sec) -training >> step=6452500, episode=1076 reward=0.8009812 (460.20 it/sec) -training >> step=6452600, episode=1076 reward=0.8036262 (506.08 it/sec) -training >> step=6452700, episode=1076 reward=0.7829843 (482.12 it/sec) -training >> step=6452800, episode=1076 reward=0.7781091 (525.49 it/sec) -training >> step=6452900, episode=1076 reward=0.7716357 (542.40 it/sec) -training >> step=6453000, episode=1076 reward=0.7744479 (522.91 it/sec) -training >> step=6453100, episode=1076 reward=0.7954144 (492.99 it/sec) -training >> step=6453200, episode=1076 reward=0.7736547 (531.33 it/sec) -training >> step=6453300, episode=1076 reward=0.7827541 (536.41 it/sec) -training >> step=6453400, episode=1076 reward=0.7781929 (523.44 it/sec) -training >> step=6453500, episode=1076 reward=0.7691054 (509.90 it/sec) -training >> step=6453600, episode=1076 reward=0.7928197 (528.79 it/sec) -training >> step=6453700, episode=1076 reward=0.8034104 (526.19 it/sec) -training >> step=6453800, episode=1076 reward=0.7749056 (526.25 it/sec) -training >> step=6453900, episode=1076 reward=0.7730196 (508.97 it/sec) -training >> step=6454000, episode=1076 reward=0.7723785 (519.92 it/sec) -training >> step=6454100, episode=1076 reward=0.7829621 (494.56 it/sec) -training >> step=6454200, episode=1076 reward=0.7944912 (547.96 it/sec) -training >> step=6454300, episode=1076 reward=0.7607436 (525.84 it/sec) -training >> step=6454400, episode=1076 reward=0.7660457 (509.71 it/sec) -training >> step=6454500, episode=1076 reward=0.7966308 (522.95 it/sec) -training >> step=6454600, episode=1076 reward=0.7824954 (484.61 it/sec) -training >> step=6454700, episode=1076 reward=0.7593322 (504.11 it/sec) -training >> step=6454800, episode=1076 reward=0.7777625 (507.62 it/sec) -training >> step=6454900, episode=1076 reward=0.7749731 (547.47 it/sec) -training >> step=6455000, episode=1076 reward=0.7855877 (511.67 it/sec) -training >> step=6455100, episode=1076 reward=0.7807429 (493.44 it/sec) -training >> step=6455200, episode=1076 reward=0.7936028 (516.31 it/sec) -training >> step=6455300, episode=1077 reward=0.7920746 (128.55 it/sec) -training >> step=6455400, episode=1077 reward=0.7785532 (500.51 it/sec) -training >> step=6455500, episode=1077 reward=0.7734345 (498.65 it/sec) -training >> step=6455600, episode=1077 reward=0.770751 (514.85 it/sec) -training >> step=6455700, episode=1077 reward=0.7852381 (502.25 it/sec) -training >> step=6455800, episode=1077 reward=0.7841056 (528.82 it/sec) -training >> step=6455900, episode=1077 reward=0.7899719 (518.43 it/sec) -training >> step=6456000, episode=1077 reward=0.7942401 (503.69 it/sec) -training >> step=6456100, episode=1077 reward=0.7718516 (508.55 it/sec) -training >> step=6456200, episode=1077 reward=0.7667297 (512.78 it/sec) -training >> step=6456300, episode=1077 reward=0.7873427 (515.84 it/sec) -training >> step=6456400, episode=1077 reward=0.7846435 (501.74 it/sec) -training >> step=6456500, episode=1077 reward=0.7722218 (535.56 it/sec) -training >> step=6456600, episode=1077 reward=0.7935513 (526.04 it/sec) -training >> step=6456700, episode=1077 reward=0.7791523 (476.16 it/sec) -training >> step=6456800, episode=1077 reward=0.7887807 (544.71 it/sec) -training >> step=6456900, episode=1077 reward=0.7822993 (504.75 it/sec) -training >> step=6457000, episode=1077 reward=0.7780096 (467.90 it/sec) -training >> step=6457100, episode=1077 reward=0.7970354 (509.72 it/sec) -training >> step=6457200, episode=1077 reward=0.7890738 (550.54 it/sec) -training >> step=6457300, episode=1077 reward=0.787005 (476.48 it/sec) -training >> step=6457400, episode=1077 reward=0.8004756 (534.13 it/sec) -training >> step=6457500, episode=1077 reward=0.7826559 (463.09 it/sec) -training >> step=6457600, episode=1077 reward=0.7761848 (502.64 it/sec) -training >> step=6457700, episode=1077 reward=0.7786622 (533.23 it/sec) -training >> step=6457800, episode=1077 reward=0.7769191 (507.19 it/sec) -training >> step=6457900, episode=1077 reward=0.7807137 (367.75 it/sec) -training >> step=6458000, episode=1077 reward=0.7938825 (521.19 it/sec) -training >> step=6458100, episode=1077 reward=0.7665441 (522.59 it/sec) -training >> step=6458200, episode=1077 reward=0.7847549 (461.62 it/sec) -training >> step=6458300, episode=1077 reward=0.791175 (511.69 it/sec) -training >> step=6458400, episode=1077 reward=0.7985189 (474.76 it/sec) -training >> step=6458500, episode=1077 reward=0.7787995 (494.83 it/sec) -training >> step=6458600, episode=1077 reward=0.7737802 (522.19 it/sec) -training >> step=6458700, episode=1077 reward=0.782075 (535.50 it/sec) -training >> step=6458800, episode=1077 reward=0.7918088 (537.44 it/sec) -training >> step=6458900, episode=1077 reward=0.7823772 (497.02 it/sec) -training >> step=6459000, episode=1077 reward=0.7827433 (516.04 it/sec) -training >> step=6459100, episode=1077 reward=0.8044141 (510.92 it/sec) -training >> step=6459200, episode=1077 reward=0.7693428 (531.49 it/sec) -training >> step=6459300, episode=1077 reward=0.7649621 (506.89 it/sec) -training >> step=6459400, episode=1077 reward=0.7948946 (492.32 it/sec) -training >> step=6459500, episode=1077 reward=0.7819303 (471.57 it/sec) -training >> step=6459600, episode=1077 reward=0.7828347 (500.40 it/sec) -training >> step=6459700, episode=1077 reward=0.7729522 (498.11 it/sec) -training >> step=6459800, episode=1077 reward=0.7929034 (527.20 it/sec) -training >> step=6459900, episode=1077 reward=0.7824219 (476.64 it/sec) -training >> step=6460000, episode=1077 reward=0.805508 (504.47 it/sec) -training >> step=6460100, episode=1077 reward=0.7998158 (525.59 it/sec) -training >> step=6460200, episode=1077 reward=0.7953354 (531.60 it/sec) -training >> step=6460300, episode=1077 reward=0.7923943 (511.54 it/sec) -training >> step=6460400, episode=1077 reward=0.7821351 (499.45 it/sec) -training >> step=6460500, episode=1077 reward=0.7762073 (522.57 it/sec) -training >> step=6460600, episode=1077 reward=0.7813733 (516.32 it/sec) -training >> step=6460700, episode=1077 reward=0.78285 (492.14 it/sec) -training >> step=6460800, episode=1077 reward=0.7652003 (561.65 it/sec) -training >> step=6460900, episode=1077 reward=0.768194 (533.49 it/sec) -training >> step=6461000, episode=1077 reward=0.7857951 (489.47 it/sec) -training >> step=6461100, episode=1077 reward=0.7780513 (502.94 it/sec) -training >> step=6461200, episode=1077 reward=0.7602866 (511.38 it/sec) -training >> step=6461300, episode=1078 reward=0.7645419 (129.57 it/sec) -training >> step=6461400, episode=1078 reward=0.7753182 (496.15 it/sec) -training >> step=6461500, episode=1078 reward=0.7881016 (490.75 it/sec) -training >> step=6461600, episode=1078 reward=0.791868 (522.39 it/sec) -training >> step=6461700, episode=1078 reward=0.7819957 (479.21 it/sec) -training >> step=6461800, episode=1078 reward=0.7816686 (459.05 it/sec) -training >> step=6461900, episode=1078 reward=0.7552478 (501.96 it/sec) -training >> step=6462000, episode=1078 reward=0.7691296 (520.58 it/sec) -training >> step=6462100, episode=1078 reward=0.7998145 (484.11 it/sec) -training >> step=6462200, episode=1078 reward=0.7991616 (456.05 it/sec) -training >> step=6462300, episode=1078 reward=0.7971709 (466.55 it/sec) -training >> step=6462400, episode=1078 reward=0.7761536 (465.27 it/sec) -training >> step=6462500, episode=1078 reward=0.7981154 (486.47 it/sec) -training >> step=6462600, episode=1078 reward=0.8023973 (500.05 it/sec) -training >> step=6462700, episode=1078 reward=0.7812433 (447.19 it/sec) -training >> step=6462800, episode=1078 reward=0.7912236 (319.61 it/sec) -training >> step=6462900, episode=1078 reward=0.7715353 (441.14 it/sec) -training >> step=6463000, episode=1078 reward=0.7853655 (472.77 it/sec) -training >> step=6463100, episode=1078 reward=0.8225407 (423.04 it/sec) -training >> step=6463200, episode=1078 reward=0.7855884 (466.90 it/sec) -training >> step=6463300, episode=1078 reward=0.7782147 (460.20 it/sec) -training >> step=6463400, episode=1078 reward=0.7803571 (432.63 it/sec) -training >> step=6463500, episode=1078 reward=0.7932574 (406.14 it/sec) -training >> step=6463600, episode=1078 reward=0.78863 (423.79 it/sec) -training >> step=6463700, episode=1078 reward=0.7821992 (431.33 it/sec) -training >> step=6463800, episode=1078 reward=0.7799489 (401.22 it/sec) -training >> step=6463900, episode=1078 reward=0.783417 (424.28 it/sec) -training >> step=6464000, episode=1078 reward=0.8027011 (438.64 it/sec) -training >> step=6464100, episode=1078 reward=0.7957745 (341.40 it/sec) -training >> step=6464200, episode=1078 reward=0.8046845 (441.46 it/sec) -training >> step=6464300, episode=1078 reward=0.7820677 (445.98 it/sec) -training >> step=6464400, episode=1078 reward=0.785825 (448.33 it/sec) -training >> step=6464500, episode=1078 reward=0.7826694 (450.66 it/sec) -training >> step=6464600, episode=1078 reward=0.7926622 (446.03 it/sec) -training >> step=6464700, episode=1078 reward=0.7778108 (439.11 it/sec) -training >> step=6464800, episode=1078 reward=0.7909768 (476.62 it/sec) -training >> step=6464900, episode=1078 reward=0.785583 (413.32 it/sec) -training >> step=6465000, episode=1078 reward=0.7885548 (469.63 it/sec) -training >> step=6465100, episode=1078 reward=0.7843317 (462.74 it/sec) -training >> step=6465200, episode=1078 reward=0.7972496 (451.43 it/sec) -training >> step=6465300, episode=1078 reward=0.7899112 (449.09 it/sec) -training >> step=6465400, episode=1078 reward=0.7733844 (421.67 it/sec) -training >> step=6465500, episode=1078 reward=0.7725024 (474.89 it/sec) -training >> step=6465600, episode=1078 reward=0.7727957 (484.08 it/sec) -training >> step=6465700, episode=1078 reward=0.7742424 (473.42 it/sec) -training >> step=6465800, episode=1078 reward=0.7908527 (455.61 it/sec) -training >> step=6465900, episode=1078 reward=0.7887578 (495.00 it/sec) -training >> step=6466000, episode=1078 reward=0.7929394 (473.59 it/sec) -training >> step=6466100, episode=1078 reward=0.7712855 (431.03 it/sec) -training >> step=6466200, episode=1078 reward=0.7840999 (478.46 it/sec) -training >> step=6466300, episode=1078 reward=0.7772537 (466.42 it/sec) -training >> step=6466400, episode=1078 reward=0.7577433 (478.47 it/sec) -training >> step=6466500, episode=1078 reward=0.7762263 (484.75 it/sec) -training >> step=6466600, episode=1078 reward=0.7531171 (479.22 it/sec) -training >> step=6466700, episode=1078 reward=0.8035009 (460.44 it/sec) -training >> step=6466800, episode=1078 reward=0.7922244 (448.76 it/sec) -training >> step=6466900, episode=1078 reward=0.7927894 (466.57 it/sec) -training >> step=6467000, episode=1078 reward=0.7785714 (506.09 it/sec) -training >> step=6467100, episode=1078 reward=0.7614717 (474.40 it/sec) -training >> step=6467200, episode=1078 reward=0.7907228 (505.46 it/sec) -training >> step=6467300, episode=1079 reward=0.7846155 (95.06 it/sec) -training >> step=6467400, episode=1079 reward=0.7787195 (466.10 it/sec) -training >> step=6467500, episode=1079 reward=0.774927 (473.26 it/sec) -training >> step=6467600, episode=1079 reward=0.7938806 (495.11 it/sec) -training >> step=6467700, episode=1079 reward=0.7749662 (516.87 it/sec) -training >> step=6467800, episode=1079 reward=0.7864755 (479.92 it/sec) -training >> step=6467900, episode=1079 reward=0.7724296 (483.52 it/sec) -training >> step=6468000, episode=1079 reward=0.7985712 (453.78 it/sec) -training >> step=6468100, episode=1079 reward=0.7888411 (513.46 it/sec) -training >> step=6468200, episode=1079 reward=0.7823252 (563.76 it/sec) -training >> step=6468300, episode=1079 reward=0.7916803 (520.02 it/sec) -training >> step=6468400, episode=1079 reward=0.7968864 (461.45 it/sec) -training >> step=6468500, episode=1079 reward=0.7873028 (478.07 it/sec) -training >> step=6468600, episode=1079 reward=0.7764774 (499.62 it/sec) -training >> step=6468700, episode=1079 reward=0.7727805 (527.60 it/sec) -training >> step=6468800, episode=1079 reward=0.7850028 (518.37 it/sec) -training >> step=6468900, episode=1079 reward=0.7600554 (498.65 it/sec) -training >> step=6469000, episode=1079 reward=0.8044835 (406.45 it/sec) -training >> step=6469100, episode=1079 reward=0.7835085 (491.53 it/sec) -training >> step=6469200, episode=1079 reward=0.7715293 (459.05 it/sec) -training >> step=6469300, episode=1079 reward=0.7807709 (494.57 it/sec) -training >> step=6469400, episode=1079 reward=0.8044986 (472.39 it/sec) -training >> step=6469500, episode=1079 reward=0.7954477 (505.69 it/sec) -training >> step=6469600, episode=1079 reward=0.7853971 (469.93 it/sec) -training >> step=6469700, episode=1079 reward=0.7759022 (446.95 it/sec) -training >> step=6469800, episode=1079 reward=0.7882118 (491.78 it/sec) -training >> step=6469900, episode=1079 reward=0.7995164 (506.65 it/sec) -training >> step=6470000, episode=1079 reward=0.7737586 (479.30 it/sec) -training >> step=6470100, episode=1079 reward=0.8020037 (497.66 it/sec) -training >> step=6470200, episode=1079 reward=0.7886549 (412.46 it/sec) -training >> step=6470300, episode=1079 reward=0.7787293 (471.84 it/sec) -training >> step=6470400, episode=1079 reward=0.787196 (463.78 it/sec) -training >> step=6470500, episode=1079 reward=0.7760594 (447.89 it/sec) -training >> step=6470600, episode=1079 reward=0.7702155 (482.16 it/sec) -training >> step=6470700, episode=1079 reward=0.7818266 (486.76 it/sec) -training >> step=6470800, episode=1079 reward=0.7787236 (448.91 it/sec) -training >> step=6470900, episode=1079 reward=0.7830106 (481.38 it/sec) -training >> step=6471000, episode=1079 reward=0.7918671 (525.45 it/sec) -training >> step=6471100, episode=1079 reward=0.7871501 (505.49 it/sec) -training >> step=6471200, episode=1079 reward=0.7723507 (492.82 it/sec) -training >> step=6471300, episode=1079 reward=0.7818471 (469.38 it/sec) -training >> step=6471400, episode=1079 reward=0.8013046 (523.63 it/sec) -training >> step=6471500, episode=1079 reward=0.7998229 (473.45 it/sec) -training >> step=6471600, episode=1079 reward=0.778791 (500.92 it/sec) -training >> step=6471700, episode=1079 reward=0.7833168 (508.84 it/sec) -training >> step=6471800, episode=1079 reward=0.7838954 (486.87 it/sec) -training >> step=6471900, episode=1079 reward=0.7912284 (444.24 it/sec) -training >> step=6472000, episode=1079 reward=0.8067845 (454.79 it/sec) -training >> step=6472100, episode=1079 reward=0.7869225 (488.78 it/sec) -training >> step=6472200, episode=1079 reward=0.7648668 (475.34 it/sec) -training >> step=6472300, episode=1079 reward=0.7946046 (470.44 it/sec) -training >> step=6472400, episode=1079 reward=0.7896421 (452.69 it/sec) -training >> step=6472500, episode=1079 reward=0.7774675 (461.76 it/sec) -training >> step=6472600, episode=1079 reward=0.788214 (498.52 it/sec) -training >> step=6472700, episode=1079 reward=0.7756263 (491.72 it/sec) -training >> step=6472800, episode=1079 reward=0.7826002 (466.16 it/sec) -training >> step=6472900, episode=1079 reward=0.7713505 (473.15 it/sec) -training >> step=6473000, episode=1079 reward=0.7793718 (450.04 it/sec) -training >> step=6473100, episode=1079 reward=0.7925551 (418.34 it/sec) -training >> step=6473200, episode=1079 reward=0.7873318 (494.53 it/sec) -training >> step=6473300, episode=1080 reward=0.7897589 (93.66 it/sec) -training >> step=6473400, episode=1080 reward=0.7824262 (450.51 it/sec) -training >> step=6473500, episode=1080 reward=0.7817869 (472.72 it/sec) -training >> step=6473600, episode=1080 reward=0.8044124 (439.29 it/sec) -training >> step=6473700, episode=1080 reward=0.7945055 (432.19 it/sec) -training >> step=6473800, episode=1080 reward=0.7758872 (459.64 it/sec) -training >> step=6473900, episode=1080 reward=0.7821359 (451.30 it/sec) -training >> step=6474000, episode=1080 reward=0.7946016 (445.73 it/sec) -training >> step=6474100, episode=1080 reward=0.7800581 (461.97 it/sec) -training >> step=6474200, episode=1080 reward=0.7805778 (464.30 it/sec) -training >> step=6474300, episode=1080 reward=0.8081121 (457.42 it/sec) -training >> step=6474400, episode=1080 reward=0.7804128 (447.74 it/sec) -training >> step=6474500, episode=1080 reward=0.7817599 (447.21 it/sec) -training >> step=6474600, episode=1080 reward=0.8081244 (444.24 it/sec) -training >> step=6474700, episode=1080 reward=0.7699713 (451.00 it/sec) -training >> step=6474800, episode=1080 reward=0.7749624 (423.50 it/sec) -training >> step=6474900, episode=1080 reward=0.7982887 (480.29 it/sec) -training >> step=6475000, episode=1080 reward=0.7738606 (409.96 it/sec) -training >> step=6475100, episode=1080 reward=0.7881479 (416.41 it/sec) -training >> step=6475200, episode=1080 reward=0.7821332 (465.53 it/sec) -training >> step=6475300, episode=1080 reward=0.7961575 (449.02 it/sec) -training >> step=6475400, episode=1080 reward=0.7739428 (452.04 it/sec) -training >> step=6475500, episode=1080 reward=0.7879007 (405.89 it/sec) -training >> step=6475600, episode=1080 reward=0.7942687 (432.63 it/sec) -training >> step=6475700, episode=1080 reward=0.7973557 (399.39 it/sec) -training >> step=6475800, episode=1080 reward=0.7926914 (435.52 it/sec) -training >> step=6475900, episode=1080 reward=0.7738243 (474.71 it/sec) -training >> step=6476000, episode=1080 reward=0.7817038 (513.89 it/sec) -training >> step=6476100, episode=1080 reward=0.7805659 (438.26 it/sec) -training >> step=6476200, episode=1080 reward=0.7876961 (404.71 it/sec) -training >> step=6476300, episode=1080 reward=0.7951567 (301.38 it/sec) -training >> step=6476400, episode=1080 reward=0.7997991 (470.74 it/sec) -training >> step=6476500, episode=1080 reward=0.7876444 (422.33 it/sec) -training >> step=6476600, episode=1080 reward=0.7964696 (461.74 it/sec) -training >> step=6476700, episode=1080 reward=0.7808946 (467.53 it/sec) -training >> step=6476800, episode=1080 reward=0.7864488 (471.87 it/sec) -training >> step=6476900, episode=1080 reward=0.7850192 (513.30 it/sec) -training >> step=6477000, episode=1080 reward=0.7633827 (487.02 it/sec) -training >> step=6477100, episode=1080 reward=0.7735628 (514.27 it/sec) -training >> step=6477200, episode=1080 reward=0.7938139 (479.54 it/sec) -training >> step=6477300, episode=1080 reward=0.8010396 (414.10 it/sec) -training >> step=6477400, episode=1080 reward=0.775997 (441.75 it/sec) -training >> step=6477500, episode=1080 reward=0.7701117 (393.46 it/sec) -training >> step=6477600, episode=1080 reward=0.7771566 (505.74 it/sec) -training >> step=6477700, episode=1080 reward=0.7895959 (424.57 it/sec) -training >> step=6477800, episode=1080 reward=0.7853837 (468.89 it/sec) -training >> step=6477900, episode=1080 reward=0.8070493 (415.05 it/sec) -training >> step=6478000, episode=1080 reward=0.7712656 (438.65 it/sec) -training >> step=6478100, episode=1080 reward=0.795256 (520.61 it/sec) -training >> step=6478200, episode=1080 reward=0.7732891 (467.00 it/sec) -training >> step=6478300, episode=1080 reward=0.8069744 (463.20 it/sec) -training >> step=6478400, episode=1080 reward=0.7913177 (469.35 it/sec) -training >> step=6478500, episode=1080 reward=0.782908 (461.57 it/sec) -training >> step=6478600, episode=1080 reward=0.7888647 (462.34 it/sec) -training >> step=6478700, episode=1080 reward=0.7675619 (413.60 it/sec) -training >> step=6478800, episode=1080 reward=0.7697828 (442.45 it/sec) -training >> step=6478900, episode=1080 reward=0.7778633 (475.47 it/sec) -training >> step=6479000, episode=1080 reward=0.7531832 (455.79 it/sec) -training >> step=6479100, episode=1080 reward=0.794646 (456.57 it/sec) -training >> step=6479200, episode=1080 reward=0.7824793 (403.17 it/sec) -training >> step=6479300, episode=1081 reward=0.7831911 (68.35 it/sec) -training >> step=6479400, episode=1081 reward=0.7813437 (454.22 it/sec) -training >> step=6479500, episode=1081 reward=0.7803517 (474.34 it/sec) -training >> step=6479600, episode=1081 reward=0.7843661 (379.32 it/sec) -training >> step=6479700, episode=1081 reward=0.7757181 (369.82 it/sec) -training >> step=6479800, episode=1081 reward=0.7682045 (459.06 it/sec) -training >> step=6479900, episode=1081 reward=0.7831335 (490.96 it/sec) -training >> step=6480000, episode=1081 reward=0.7979999 (456.10 it/sec) -training >> step=6480100, episode=1081 reward=0.7831228 (454.47 it/sec) -training >> step=6480200, episode=1081 reward=0.8000656 (410.40 it/sec) -training >> step=6480300, episode=1081 reward=0.7982802 (461.73 it/sec) -training >> step=6480400, episode=1081 reward=0.7875794 (454.23 it/sec) -training >> step=6480500, episode=1081 reward=0.7793972 (468.89 it/sec) -training >> step=6480600, episode=1081 reward=0.7960407 (461.77 it/sec) -training >> step=6480700, episode=1081 reward=0.777813 (483.89 it/sec) -training >> step=6480800, episode=1081 reward=0.7850463 (480.39 it/sec) -training >> step=6480900, episode=1081 reward=0.7959039 (468.68 it/sec) -training >> step=6481000, episode=1081 reward=0.7817899 (458.30 it/sec) -training >> step=6481100, episode=1081 reward=0.804358 (450.59 it/sec) -training >> step=6481200, episode=1081 reward=0.7801713 (422.49 it/sec) -training >> step=6481300, episode=1081 reward=0.7885305 (461.48 it/sec) -training >> step=6481400, episode=1081 reward=0.7983423 (466.66 it/sec) -training >> step=6481500, episode=1081 reward=0.7944081 (444.43 it/sec) -training >> step=6481600, episode=1081 reward=0.7888703 (451.92 it/sec) -training >> step=6481700, episode=1081 reward=0.7625886 (438.45 it/sec) -training >> step=6481800, episode=1081 reward=0.7838771 (491.07 it/sec) -training >> step=6481900, episode=1081 reward=0.7687615 (456.18 it/sec) -training >> step=6482000, episode=1081 reward=0.7959208 (467.99 it/sec) -training >> step=6482100, episode=1081 reward=0.7895586 (458.01 it/sec) -training >> step=6482200, episode=1081 reward=0.779344 (438.63 it/sec) -training >> step=6482300, episode=1081 reward=0.8062544 (450.23 it/sec) -training >> step=6482400, episode=1081 reward=0.7936836 (378.45 it/sec) -training >> step=6482500, episode=1081 reward=0.7884154 (434.38 it/sec) -training >> step=6482600, episode=1081 reward=0.781522 (457.49 it/sec) -training >> step=6482700, episode=1081 reward=0.78631 (421.26 it/sec) -training >> step=6482800, episode=1081 reward=0.7828127 (446.83 it/sec) -training >> step=6482900, episode=1081 reward=0.7916347 (468.79 it/sec) -training >> step=6483000, episode=1081 reward=0.7803791 (430.38 it/sec) -training >> step=6483100, episode=1081 reward=0.7863318 (431.91 it/sec) -training >> step=6483200, episode=1081 reward=0.7663025 (478.44 it/sec) -training >> step=6483300, episode=1081 reward=0.7883673 (390.47 it/sec) -training >> step=6483400, episode=1081 reward=0.8014786 (420.37 it/sec) -training >> step=6483500, episode=1081 reward=0.7863699 (452.33 it/sec) -training >> step=6483600, episode=1081 reward=0.7808282 (444.74 it/sec) -training >> step=6483700, episode=1081 reward=0.7530928 (449.42 it/sec) -training >> step=6483800, episode=1081 reward=0.8028821 (452.45 it/sec) -training >> step=6483900, episode=1081 reward=0.7647422 (470.20 it/sec) -training >> step=6484000, episode=1081 reward=0.7776495 (463.24 it/sec) -training >> step=6484100, episode=1081 reward=0.7770319 (473.58 it/sec) -training >> step=6484200, episode=1081 reward=0.8011489 (472.91 it/sec) -training >> step=6484300, episode=1081 reward=0.7861335 (456.01 it/sec) -training >> step=6484400, episode=1081 reward=0.7838328 (452.23 it/sec) -training >> step=6484500, episode=1081 reward=0.7872541 (446.10 it/sec) -training >> step=6484600, episode=1081 reward=0.7803075 (495.27 it/sec) -training >> step=6484700, episode=1081 reward=0.7690105 (458.47 it/sec) -training >> step=6484800, episode=1081 reward=0.7593907 (474.73 it/sec) -training >> step=6484900, episode=1081 reward=0.7808011 (409.55 it/sec) -training >> step=6485000, episode=1081 reward=0.7640159 (456.88 it/sec) -training >> step=6485100, episode=1081 reward=0.7820829 (441.38 it/sec) -training >> step=6485200, episode=1081 reward=0.7970205 (455.33 it/sec) -training >> step=6485300, episode=1082 reward=0.7798253 (93.53 it/sec) -training >> step=6485400, episode=1082 reward=0.7742194 (443.36 it/sec) -training >> step=6485500, episode=1082 reward=0.787546 (450.08 it/sec) -training >> step=6485600, episode=1082 reward=0.772154 (448.74 it/sec) -training >> step=6485700, episode=1082 reward=0.7763391 (492.68 it/sec) -training >> step=6485800, episode=1082 reward=0.7749715 (453.33 it/sec) -training >> step=6485900, episode=1082 reward=0.7970241 (442.83 it/sec) -training >> step=6486000, episode=1082 reward=0.7845915 (418.07 it/sec) -training >> step=6486100, episode=1082 reward=0.7949175 (388.68 it/sec) -training >> step=6486200, episode=1082 reward=0.7970093 (450.36 it/sec) -training >> step=6486300, episode=1082 reward=0.8044709 (426.34 it/sec) -training >> step=6486400, episode=1082 reward=0.7821099 (370.35 it/sec) -training >> step=6486500, episode=1082 reward=0.7955989 (473.37 it/sec) -training >> step=6486600, episode=1082 reward=0.7810737 (467.44 it/sec) -training >> step=6486700, episode=1082 reward=0.7784566 (475.02 it/sec) -training >> step=6486800, episode=1082 reward=0.7802994 (492.13 it/sec) -training >> step=6486900, episode=1082 reward=0.7861615 (469.86 it/sec) -training >> step=6487000, episode=1082 reward=0.7869838 (458.68 it/sec) -training >> step=6487100, episode=1082 reward=0.7777411 (515.29 it/sec) -training >> step=6487200, episode=1082 reward=0.8077984 (466.49 it/sec) -training >> step=6487300, episode=1082 reward=0.7905341 (462.61 it/sec) -training >> step=6487400, episode=1082 reward=0.7886695 (470.52 it/sec) -training >> step=6487500, episode=1082 reward=0.7764903 (477.11 it/sec) -training >> step=6487600, episode=1082 reward=0.7914578 (449.27 it/sec) -training >> step=6487700, episode=1082 reward=0.7929609 (465.57 it/sec) -training >> step=6487800, episode=1082 reward=0.7856461 (446.39 it/sec) -training >> step=6487900, episode=1082 reward=0.7842286 (494.65 it/sec) -training >> step=6488000, episode=1082 reward=0.8015418 (492.26 it/sec) -training >> step=6488100, episode=1082 reward=0.7908576 (472.90 it/sec) -training >> step=6488200, episode=1082 reward=0.786487 (496.52 it/sec) -training >> step=6488300, episode=1082 reward=0.7684014 (425.91 it/sec) -training >> step=6488400, episode=1082 reward=0.7948686 (521.09 it/sec) -training >> step=6488500, episode=1082 reward=0.7777509 (375.86 it/sec) -training >> step=6488600, episode=1082 reward=0.7893086 (576.51 it/sec) -training >> step=6488700, episode=1082 reward=0.7992166 (501.67 it/sec) -training >> step=6488800, episode=1082 reward=0.7859074 (430.67 it/sec) -training >> step=6488900, episode=1082 reward=0.7860656 (480.75 it/sec) -training >> step=6489000, episode=1082 reward=0.7931942 (524.00 it/sec) -training >> step=6489100, episode=1082 reward=0.7816144 (468.15 it/sec) -training >> step=6489200, episode=1082 reward=0.7773509 (481.31 it/sec) -training >> step=6489300, episode=1082 reward=0.7796957 (505.15 it/sec) -training >> step=6489400, episode=1082 reward=0.7873528 (501.70 it/sec) -training >> step=6489500, episode=1082 reward=0.7653534 (496.02 it/sec) -training >> step=6489600, episode=1082 reward=0.7678345 (512.71 it/sec) -training >> step=6489700, episode=1082 reward=0.7931772 (494.98 it/sec) -training >> step=6489800, episode=1082 reward=0.7707931 (491.82 it/sec) -training >> step=6489900, episode=1082 reward=0.7786911 (470.71 it/sec) -training >> step=6490000, episode=1082 reward=0.7811252 (490.83 it/sec) -training >> step=6490100, episode=1082 reward=0.7787616 (475.89 it/sec) -training >> step=6490200, episode=1082 reward=0.7626432 (454.15 it/sec) -training >> step=6490300, episode=1082 reward=0.7798252 (448.27 it/sec) -training >> step=6490400, episode=1082 reward=0.7981858 (487.59 it/sec) -training >> step=6490500, episode=1082 reward=0.7849377 (486.48 it/sec) -training >> step=6490600, episode=1082 reward=0.7816247 (471.53 it/sec) -training >> step=6490700, episode=1082 reward=0.7846533 (505.21 it/sec) -training >> step=6490800, episode=1082 reward=0.7680184 (430.07 it/sec) -training >> step=6490900, episode=1082 reward=0.7766656 (490.63 it/sec) -training >> step=6491000, episode=1082 reward=0.7905526 (517.28 it/sec) -training >> step=6491100, episode=1082 reward=0.7666737 (507.98 it/sec) -training >> step=6491200, episode=1082 reward=0.7913905 (458.36 it/sec) -training >> step=6491300, episode=1083 reward=0.7851893 (101.14 it/sec) -training >> step=6491400, episode=1083 reward=0.7701944 (453.03 it/sec) -training >> step=6491500, episode=1083 reward=0.7604248 (470.86 it/sec) -training >> step=6491600, episode=1083 reward=0.8104424 (485.10 it/sec) -training >> step=6491700, episode=1083 reward=0.7607352 (465.20 it/sec) -training >> step=6491800, episode=1083 reward=0.7907372 (484.39 it/sec) -training >> step=6491900, episode=1083 reward=0.783313 (477.18 it/sec) -training >> step=6492000, episode=1083 reward=0.7974836 (495.08 it/sec) -training >> step=6492100, episode=1083 reward=0.7694919 (490.63 it/sec) -training >> step=6492200, episode=1083 reward=0.7819914 (545.95 it/sec) -training >> step=6492300, episode=1083 reward=0.7988131 (521.56 it/sec) -training >> step=6492400, episode=1083 reward=0.7773817 (478.70 it/sec) -training >> step=6492500, episode=1083 reward=0.7670874 (498.53 it/sec) -training >> step=6492600, episode=1083 reward=0.7964647 (536.01 it/sec) -training >> step=6492700, episode=1083 reward=0.7939726 (502.84 it/sec) -training >> step=6492800, episode=1083 reward=0.7937412 (438.75 it/sec) -training >> step=6492900, episode=1083 reward=0.7879081 (463.66 it/sec) -training >> step=6493000, episode=1083 reward=0.7796324 (486.50 it/sec) -training >> step=6493100, episode=1083 reward=0.7838337 (524.30 it/sec) -training >> step=6493200, episode=1083 reward=0.8074734 (513.49 it/sec) -training >> step=6493300, episode=1083 reward=0.8005232 (526.20 it/sec) -training >> step=6493400, episode=1083 reward=0.7923002 (462.75 it/sec) -training >> step=6493500, episode=1083 reward=0.7702714 (479.21 it/sec) -training >> step=6493600, episode=1083 reward=0.791269 (483.51 it/sec) -training >> step=6493700, episode=1083 reward=0.7885872 (507.40 it/sec) -training >> step=6493800, episode=1083 reward=0.7964288 (508.03 it/sec) -training >> step=6493900, episode=1083 reward=0.781873 (450.10 it/sec) -training >> step=6494000, episode=1083 reward=0.7782031 (512.92 it/sec) -training >> step=6494100, episode=1083 reward=0.7770176 (525.56 it/sec) -training >> step=6494200, episode=1083 reward=0.7937891 (513.78 it/sec) -training >> step=6494300, episode=1083 reward=0.7888052 (472.65 it/sec) -training >> step=6494400, episode=1083 reward=0.7908705 (487.39 it/sec) -training >> step=6494500, episode=1083 reward=0.7845835 (427.40 it/sec) -training >> step=6494600, episode=1083 reward=0.7704924 (385.27 it/sec) -training >> step=6494700, episode=1083 reward=0.7916923 (415.57 it/sec) -training >> step=6494800, episode=1083 reward=0.7890344 (484.70 it/sec) -training >> step=6494900, episode=1083 reward=0.7858204 (429.85 it/sec) -training >> step=6495000, episode=1083 reward=0.7863796 (426.30 it/sec) -training >> step=6495100, episode=1083 reward=0.8000886 (479.84 it/sec) -training >> step=6495200, episode=1083 reward=0.7852686 (473.39 it/sec) -training >> step=6495300, episode=1083 reward=0.7891204 (487.87 it/sec) -training >> step=6495400, episode=1083 reward=0.7774988 (473.43 it/sec) -training >> step=6495500, episode=1083 reward=0.7737699 (535.12 it/sec) -training >> step=6495600, episode=1083 reward=0.7696598 (515.02 it/sec) -training >> step=6495700, episode=1083 reward=0.7677871 (513.66 it/sec) -training >> step=6495800, episode=1083 reward=0.7862768 (512.09 it/sec) -training >> step=6495900, episode=1083 reward=0.785069 (474.44 it/sec) -training >> step=6496000, episode=1083 reward=0.803847 (471.45 it/sec) -training >> step=6496100, episode=1083 reward=0.7656116 (473.15 it/sec) -training >> step=6496200, episode=1083 reward=0.7817362 (463.77 it/sec) -training >> step=6496300, episode=1083 reward=0.8047649 (437.09 it/sec) -training >> step=6496400, episode=1083 reward=0.8122426 (422.76 it/sec) -training >> step=6496500, episode=1083 reward=0.7774138 (469.96 it/sec) -training >> step=6496600, episode=1083 reward=0.7697447 (490.62 it/sec) -training >> step=6496700, episode=1083 reward=0.7891971 (469.86 it/sec) -training >> step=6496800, episode=1083 reward=0.7737826 (427.18 it/sec) -training >> step=6496900, episode=1083 reward=0.7741377 (464.08 it/sec) -training >> step=6497000, episode=1083 reward=0.790652 (477.10 it/sec) -training >> step=6497100, episode=1083 reward=0.7856667 (456.95 it/sec) -training >> step=6497200, episode=1083 reward=0.7856162 (453.26 it/sec) -training >> step=6497300, episode=1084 reward=0.7763212 (96.07 it/sec) -training >> step=6497400, episode=1084 reward=0.7861104 (457.04 it/sec) -training >> step=6497500, episode=1084 reward=0.7915251 (428.40 it/sec) -training >> step=6497600, episode=1084 reward=0.7906267 (477.97 it/sec) -training >> step=6497700, episode=1084 reward=0.7875149 (487.33 it/sec) -training >> step=6497800, episode=1084 reward=0.7802009 (445.45 it/sec) -training >> step=6497900, episode=1084 reward=0.7839534 (473.66 it/sec) -training >> step=6498000, episode=1084 reward=0.7815043 (443.19 it/sec) -training >> step=6498100, episode=1084 reward=0.7842437 (478.10 it/sec) -training >> step=6498200, episode=1084 reward=0.7885762 (469.32 it/sec) -training >> step=6498300, episode=1084 reward=0.7805261 (474.75 it/sec) -training >> step=6498400, episode=1084 reward=0.7844862 (418.04 it/sec) -training >> step=6498500, episode=1084 reward=0.7828456 (443.83 it/sec) -training >> step=6498600, episode=1084 reward=0.7808191 (494.44 it/sec) -training >> step=6498700, episode=1084 reward=0.8041637 (445.72 it/sec) -training >> step=6498800, episode=1084 reward=0.7879016 (474.94 it/sec) -training >> step=6498900, episode=1084 reward=0.7806993 (422.04 it/sec) -training >> step=6499000, episode=1084 reward=0.7984363 (480.15 it/sec) -training >> step=6499100, episode=1084 reward=0.7917504 (422.16 it/sec) -training >> step=6499200, episode=1084 reward=0.7850273 (415.97 it/sec) -training >> step=6499300, episode=1084 reward=0.7669464 (490.62 it/sec) -training >> step=6499400, episode=1084 reward=0.7943246 (473.44 it/sec) -training >> step=6499500, episode=1084 reward=0.7846624 (447.57 it/sec) -training >> step=6499600, episode=1084 reward=0.7783579 (419.70 it/sec) -training >> step=6499700, episode=1084 reward=0.7706833 (483.67 it/sec) -training >> step=6499800, episode=1084 reward=0.78704 (427.78 it/sec) -training >> step=6499900, episode=1084 reward=0.7838702 (489.47 it/sec) -training >> step=6500000, episode=1084 reward=0.801536 (479.94 it/sec) -training >> step=6500100, episode=1084 reward=0.7873921 (449.58 it/sec) -training >> step=6500200, episode=1084 reward=0.8112085 (423.38 it/sec) -training >> step=6500300, episode=1084 reward=0.7827887 (422.29 it/sec) -training >> step=6500400, episode=1084 reward=0.8101895 (447.42 it/sec) -training >> step=6500500, episode=1084 reward=0.7991976 (495.40 it/sec) -training >> step=6500600, episode=1084 reward=0.7927328 (371.42 it/sec) -training >> step=6500700, episode=1084 reward=0.7891313 (489.94 it/sec) -training >> step=6500800, episode=1084 reward=0.7784901 (478.51 it/sec) -training >> step=6500900, episode=1084 reward=0.7812266 (478.42 it/sec) -training >> step=6501000, episode=1084 reward=0.7770141 (424.69 it/sec) -training >> step=6501100, episode=1084 reward=0.7985677 (436.41 it/sec) -training >> step=6501200, episode=1084 reward=0.7545503 (481.57 it/sec) -training >> step=6501300, episode=1084 reward=0.7775987 (464.51 it/sec) -training >> step=6501400, episode=1084 reward=0.8035967 (489.10 it/sec) -training >> step=6501500, episode=1084 reward=0.7942486 (489.80 it/sec) -training >> step=6501600, episode=1084 reward=0.7845286 (494.16 it/sec) -training >> step=6501700, episode=1084 reward=0.7846605 (492.35 it/sec) -training >> step=6501800, episode=1084 reward=0.7855849 (445.05 it/sec) -training >> step=6501900, episode=1084 reward=0.7821276 (511.17 it/sec) -training >> step=6502000, episode=1084 reward=0.7823324 (472.60 it/sec) -training >> step=6502100, episode=1084 reward=0.7729588 (485.69 it/sec) -training >> step=6502200, episode=1084 reward=0.7835773 (497.05 it/sec) -training >> step=6502300, episode=1084 reward=0.7990982 (485.56 it/sec) -training >> step=6502400, episode=1084 reward=0.7776964 (479.78 it/sec) -training >> step=6502500, episode=1084 reward=0.7686623 (458.32 it/sec) -training >> step=6502600, episode=1084 reward=0.7649268 (491.08 it/sec) -training >> step=6502700, episode=1084 reward=0.7847381 (512.50 it/sec) -training >> step=6502800, episode=1084 reward=0.7886663 (458.10 it/sec) -training >> step=6502900, episode=1084 reward=0.7782441 (445.23 it/sec) -training >> step=6503000, episode=1084 reward=0.7793852 (436.20 it/sec) -training >> step=6503100, episode=1084 reward=0.78262 (461.26 it/sec) -training >> step=6503200, episode=1084 reward=0.7772267 (454.42 it/sec) -training >> step=6503300, episode=1085 reward=0.7818162 (78.43 it/sec) -training >> step=6503400, episode=1085 reward=0.768544 (462.86 it/sec) -training >> step=6503500, episode=1085 reward=0.7635298 (433.17 it/sec) -training >> step=6503600, episode=1085 reward=0.7666788 (471.36 it/sec) -training >> step=6503700, episode=1085 reward=0.7821191 (463.81 it/sec) -training >> step=6503800, episode=1085 reward=0.7687662 (466.69 it/sec) -training >> step=6503900, episode=1085 reward=0.7990177 (502.32 it/sec) -training >> step=6504000, episode=1085 reward=0.792994 (449.40 it/sec) -training >> step=6504100, episode=1085 reward=0.7861717 (441.39 it/sec) -training >> step=6504200, episode=1085 reward=0.7631875 (428.62 it/sec) -training >> step=6504300, episode=1085 reward=0.7839344 (448.39 it/sec) -training >> step=6504400, episode=1085 reward=0.801779 (473.38 it/sec) -training >> step=6504500, episode=1085 reward=0.7889079 (470.30 it/sec) -training >> step=6504600, episode=1085 reward=0.7779562 (492.59 it/sec) -training >> step=6504700, episode=1085 reward=0.8101026 (428.72 it/sec) -training >> step=6504800, episode=1085 reward=0.776987 (459.99 it/sec) -training >> step=6504900, episode=1085 reward=0.7808849 (457.49 it/sec) -training >> step=6505000, episode=1085 reward=0.7877563 (523.52 it/sec) -training >> step=6505100, episode=1085 reward=0.7728741 (480.19 it/sec) -training >> step=6505200, episode=1085 reward=0.7809293 (439.61 it/sec) -training >> step=6505300, episode=1085 reward=0.7614874 (463.92 it/sec) -training >> step=6505400, episode=1085 reward=0.8144408 (486.53 it/sec) -training >> step=6505500, episode=1085 reward=0.7883575 (445.72 it/sec) -training >> step=6505600, episode=1085 reward=0.7864525 (468.50 it/sec) -training >> step=6505700, episode=1085 reward=0.7783862 (480.53 it/sec) -training >> step=6505800, episode=1085 reward=0.7828084 (489.46 it/sec) -training >> step=6505900, episode=1085 reward=0.7862183 (455.36 it/sec) -training >> step=6506000, episode=1085 reward=0.781559 (469.67 it/sec) -training >> step=6506100, episode=1085 reward=0.8135284 (470.94 it/sec) -training >> step=6506200, episode=1085 reward=0.8053349 (485.01 it/sec) -training >> step=6506300, episode=1085 reward=0.783509 (469.06 it/sec) -training >> step=6506400, episode=1085 reward=0.7912863 (466.65 it/sec) -training >> step=6506500, episode=1085 reward=0.789114 (479.53 it/sec) -training >> step=6506600, episode=1085 reward=0.7989069 (341.50 it/sec) -training >> step=6506700, episode=1085 reward=0.7948 (472.53 it/sec) -training >> step=6506800, episode=1085 reward=0.7839915 (486.25 it/sec) -training >> step=6506900, episode=1085 reward=0.7829525 (495.84 it/sec) -training >> step=6507000, episode=1085 reward=0.8035994 (477.60 it/sec) -training >> step=6507100, episode=1085 reward=0.7817675 (462.63 it/sec) -training >> step=6507200, episode=1085 reward=0.7869647 (497.43 it/sec) -training >> step=6507300, episode=1085 reward=0.8050466 (444.02 it/sec) -training >> step=6507400, episode=1085 reward=0.7815769 (484.69 it/sec) -training >> step=6507500, episode=1085 reward=0.7782244 (477.78 it/sec) -training >> step=6507600, episode=1085 reward=0.7924231 (476.42 it/sec) -training >> step=6507700, episode=1085 reward=0.7965419 (433.73 it/sec) -training >> step=6507800, episode=1085 reward=0.7942992 (474.35 it/sec) -training >> step=6507900, episode=1085 reward=0.7646071 (514.31 it/sec) -training >> step=6508000, episode=1085 reward=0.7960031 (485.52 it/sec) -training >> step=6508100, episode=1085 reward=0.7767959 (463.20 it/sec) -training >> step=6508200, episode=1085 reward=0.7699295 (430.53 it/sec) -training >> step=6508300, episode=1085 reward=0.7988067 (465.94 it/sec) -training >> step=6508400, episode=1085 reward=0.7923017 (502.58 it/sec) -training >> step=6508500, episode=1085 reward=0.779259 (451.97 it/sec) -training >> step=6508600, episode=1085 reward=0.7919338 (479.91 it/sec) -training >> step=6508700, episode=1085 reward=0.7955948 (466.32 it/sec) -training >> step=6508800, episode=1085 reward=0.7999728 (477.18 it/sec) -training >> step=6508900, episode=1085 reward=0.7880068 (426.55 it/sec) -training >> step=6509000, episode=1085 reward=0.7831129 (448.94 it/sec) -training >> step=6509100, episode=1085 reward=0.7749982 (424.84 it/sec) -training >> step=6509200, episode=1085 reward=0.7953115 (449.02 it/sec) -training >> step=6509300, episode=1086 reward=0.7858198 (97.05 it/sec) -training >> step=6509400, episode=1086 reward=0.7640083 (369.91 it/sec) -training >> step=6509500, episode=1086 reward=0.7711316 (435.39 it/sec) -training >> step=6509600, episode=1086 reward=0.8035844 (389.57 it/sec) -training >> step=6509700, episode=1086 reward=0.7907229 (387.06 it/sec) -training >> step=6509800, episode=1086 reward=0.7796357 (480.36 it/sec) -training >> step=6509900, episode=1086 reward=0.7965457 (471.96 it/sec) -training >> step=6510000, episode=1086 reward=0.7596076 (439.56 it/sec) -training >> step=6510100, episode=1086 reward=0.7846972 (460.45 it/sec) -training >> step=6510200, episode=1086 reward=0.7865135 (425.90 it/sec) -training >> step=6510300, episode=1086 reward=0.8061676 (434.81 it/sec) -training >> step=6510400, episode=1086 reward=0.8050972 (483.06 it/sec) -training >> step=6510500, episode=1086 reward=0.7802451 (470.88 it/sec) -training >> step=6510600, episode=1086 reward=0.776446 (453.85 it/sec) -training >> step=6510700, episode=1086 reward=0.7846631 (448.77 it/sec) -training >> step=6510800, episode=1086 reward=0.7870755 (499.90 it/sec) -training >> step=6510900, episode=1086 reward=0.7934566 (425.41 it/sec) -training >> step=6511000, episode=1086 reward=0.7648728 (448.19 it/sec) -training >> step=6511100, episode=1086 reward=0.7637379 (413.32 it/sec) -training >> step=6511200, episode=1086 reward=0.8083076 (497.54 it/sec) -training >> step=6511300, episode=1086 reward=0.7837374 (479.44 it/sec) -training >> step=6511400, episode=1086 reward=0.7753136 (459.20 it/sec) -training >> step=6511500, episode=1086 reward=0.7815088 (484.80 it/sec) -training >> step=6511600, episode=1086 reward=0.7810731 (469.49 it/sec) -training >> step=6511700, episode=1086 reward=0.7993693 (474.75 it/sec) -training >> step=6511800, episode=1086 reward=0.7946569 (469.01 it/sec) -training >> step=6511900, episode=1086 reward=0.789921 (445.93 it/sec) -training >> step=6512000, episode=1086 reward=0.782695 (471.60 it/sec) -training >> step=6512100, episode=1086 reward=0.784169 (419.81 it/sec) -training >> step=6512200, episode=1086 reward=0.7741374 (504.54 it/sec) -training >> step=6512300, episode=1086 reward=0.7810706 (475.79 it/sec) -training >> step=6512400, episode=1086 reward=0.7840685 (392.06 it/sec) -training >> step=6512500, episode=1086 reward=0.7993304 (435.91 it/sec) -training >> step=6512600, episode=1086 reward=0.7783717 (451.67 it/sec) -training >> step=6512700, episode=1086 reward=0.7725998 (435.11 it/sec) -training >> step=6512800, episode=1086 reward=0.799932 (424.73 it/sec) -training >> step=6512900, episode=1086 reward=0.7941518 (304.88 it/sec) -training >> step=6513000, episode=1086 reward=0.7937096 (470.04 it/sec) -training >> step=6513100, episode=1086 reward=0.8003916 (410.66 it/sec) -training >> step=6513200, episode=1086 reward=0.7855651 (455.90 it/sec) -training >> step=6513300, episode=1086 reward=0.7783985 (456.07 it/sec) -training >> step=6513400, episode=1086 reward=0.7774596 (467.98 it/sec) -training >> step=6513500, episode=1086 reward=0.8032744 (464.52 it/sec) -training >> step=6513600, episode=1086 reward=0.8143735 (486.55 it/sec) -training >> step=6513700, episode=1086 reward=0.7709821 (490.02 it/sec) -training >> step=6513800, episode=1086 reward=0.7728671 (457.01 it/sec) -training >> step=6513900, episode=1086 reward=0.7858785 (462.96 it/sec) -training >> step=6514000, episode=1086 reward=0.7680939 (441.82 it/sec) -training >> step=6514100, episode=1086 reward=0.8051013 (478.01 it/sec) -training >> step=6514200, episode=1086 reward=0.768487 (479.14 it/sec) -training >> step=6514300, episode=1086 reward=0.7956538 (478.32 it/sec) -training >> step=6514400, episode=1086 reward=0.7644677 (465.50 it/sec) -training >> step=6514500, episode=1086 reward=0.8009411 (449.21 it/sec) -training >> step=6514600, episode=1086 reward=0.7865031 (457.39 it/sec) -training >> step=6514700, episode=1086 reward=0.7650495 (481.67 it/sec) -training >> step=6514800, episode=1086 reward=0.7959541 (482.56 it/sec) -training >> step=6514900, episode=1086 reward=0.7875453 (466.11 it/sec) -training >> step=6515000, episode=1086 reward=0.7783449 (488.09 it/sec) -training >> step=6515100, episode=1086 reward=0.7779831 (468.47 it/sec) -training >> step=6515200, episode=1086 reward=0.766623 (470.48 it/sec) -training >> step=6515300, episode=1087 reward=0.7573639 (57.10 it/sec) -training >> step=6515400, episode=1087 reward=0.7743405 (458.86 it/sec) -training >> step=6515500, episode=1087 reward=0.7729162 (464.26 it/sec) -training >> step=6515600, episode=1087 reward=0.7747815 (466.97 it/sec) -training >> step=6515700, episode=1087 reward=0.8006343 (469.35 it/sec) -training >> step=6515800, episode=1087 reward=0.7715884 (446.81 it/sec) -training >> step=6515900, episode=1087 reward=0.7662046 (463.54 it/sec) -training >> step=6516000, episode=1087 reward=0.7936323 (498.06 it/sec) -training >> step=6516100, episode=1087 reward=0.7750406 (496.24 it/sec) -training >> step=6516200, episode=1087 reward=0.7973828 (400.33 it/sec) -training >> step=6516300, episode=1087 reward=0.8081226 (466.48 it/sec) -training >> step=6516400, episode=1087 reward=0.7936636 (516.29 it/sec) -training >> step=6516500, episode=1087 reward=0.7888229 (406.93 it/sec) -training >> step=6516600, episode=1087 reward=0.7798797 (465.02 it/sec) -training >> step=6516700, episode=1087 reward=0.7928687 (512.16 it/sec) -training >> step=6516800, episode=1087 reward=0.8035188 (463.26 it/sec) -training >> step=6516900, episode=1087 reward=0.7840391 (483.98 it/sec) -training >> step=6517000, episode=1087 reward=0.7883112 (480.30 it/sec) -training >> step=6517100, episode=1087 reward=0.7818583 (519.88 it/sec) -training >> step=6517200, episode=1087 reward=0.7939892 (470.73 it/sec) -training >> step=6517300, episode=1087 reward=0.7997286 (453.83 it/sec) -training >> step=6517400, episode=1087 reward=0.7862803 (503.47 it/sec) -training >> step=6517500, episode=1087 reward=0.780402 (449.73 it/sec) -training >> step=6517600, episode=1087 reward=0.7812214 (503.59 it/sec) -training >> step=6517700, episode=1087 reward=0.7982194 (460.43 it/sec) -training >> step=6517800, episode=1087 reward=0.7890088 (488.18 it/sec) -training >> step=6517900, episode=1087 reward=0.7903343 (444.66 it/sec) -training >> step=6518000, episode=1087 reward=0.7732658 (414.21 it/sec) -training >> step=6518100, episode=1087 reward=0.7867308 (466.20 it/sec) -training >> step=6518200, episode=1087 reward=0.7768447 (519.51 it/sec) -training >> step=6518300, episode=1087 reward=0.7823368 (441.71 it/sec) -training >> step=6518400, episode=1087 reward=0.7846197 (462.16 it/sec) -training >> step=6518500, episode=1087 reward=0.7817244 (490.79 it/sec) -training >> step=6518600, episode=1087 reward=0.7921972 (504.72 it/sec) -training >> step=6518700, episode=1087 reward=0.8006123 (487.93 it/sec) -training >> step=6518800, episode=1087 reward=0.8163251 (435.05 it/sec) -training >> step=6518900, episode=1087 reward=0.787365 (455.65 it/sec) -training >> step=6519000, episode=1087 reward=0.7962972 (449.08 it/sec) -training >> step=6519100, episode=1087 reward=0.7811875 (485.55 it/sec) -training >> step=6519200, episode=1087 reward=0.7891502 (336.75 it/sec) -training >> step=6519300, episode=1087 reward=0.7966142 (489.73 it/sec) -training >> step=6519400, episode=1087 reward=0.774222 (449.78 it/sec) -training >> step=6519500, episode=1087 reward=0.8007265 (459.38 it/sec) -training >> step=6519600, episode=1087 reward=0.7858679 (530.67 it/sec) -training >> step=6519700, episode=1087 reward=0.7813852 (475.68 it/sec) -training >> step=6519800, episode=1087 reward=0.7686063 (449.22 it/sec) -training >> step=6519900, episode=1087 reward=0.7932182 (465.59 it/sec) -training >> step=6520000, episode=1087 reward=0.7950273 (479.16 it/sec) -training >> step=6520100, episode=1087 reward=0.787197 (457.88 it/sec) -training >> step=6520200, episode=1087 reward=0.7816315 (450.88 it/sec) -training >> step=6520300, episode=1087 reward=0.7774093 (448.27 it/sec) -training >> step=6520400, episode=1087 reward=0.7583476 (482.05 it/sec) -training >> step=6520500, episode=1087 reward=0.7958567 (475.05 it/sec) -training >> step=6520600, episode=1087 reward=0.8071062 (481.93 it/sec) -training >> step=6520700, episode=1087 reward=0.7932047 (493.72 it/sec) -training >> step=6520800, episode=1087 reward=0.7961012 (450.21 it/sec) -training >> step=6520900, episode=1087 reward=0.762036 (468.89 it/sec) -training >> step=6521000, episode=1087 reward=0.79346 (449.98 it/sec) -training >> step=6521100, episode=1087 reward=0.7788714 (466.86 it/sec) -training >> step=6521200, episode=1087 reward=0.7948529 (451.91 it/sec) -training >> step=6521300, episode=1088 reward=0.7788984 (39.93 it/sec) -training >> step=6521400, episode=1088 reward=0.77964 (475.61 it/sec) -training >> step=6521500, episode=1088 reward=0.7847296 (457.69 it/sec) -training >> step=6521600, episode=1088 reward=0.7878707 (487.44 it/sec) -training >> step=6521700, episode=1088 reward=0.7894691 (475.43 it/sec) -training >> step=6521800, episode=1088 reward=0.7669525 (441.63 it/sec) -training >> step=6521900, episode=1088 reward=0.7704276 (513.98 it/sec) -training >> step=6522000, episode=1088 reward=0.7604551 (456.38 it/sec) -training >> step=6522100, episode=1088 reward=0.794444 (501.50 it/sec) -training >> step=6522200, episode=1088 reward=0.7892836 (492.00 it/sec) -training >> step=6522300, episode=1088 reward=0.7873487 (530.03 it/sec) -training >> step=6522400, episode=1088 reward=0.8018987 (442.81 it/sec) -training >> step=6522500, episode=1088 reward=0.777411 (486.17 it/sec) -training >> step=6522600, episode=1088 reward=0.7919747 (515.89 it/sec) -training >> step=6522700, episode=1088 reward=0.794807 (484.24 it/sec) -training >> step=6522800, episode=1088 reward=0.7789887 (495.03 it/sec) -training >> step=6522900, episode=1088 reward=0.7952268 (441.95 it/sec) -training >> step=6523000, episode=1088 reward=0.7844314 (463.08 it/sec) -training >> step=6523100, episode=1088 reward=0.7954524 (509.01 it/sec) -training >> step=6523200, episode=1088 reward=0.7811401 (465.00 it/sec) -training >> step=6523300, episode=1088 reward=0.8014839 (475.08 it/sec) -training >> step=6523400, episode=1088 reward=0.7842019 (485.21 it/sec) -training >> step=6523500, episode=1088 reward=0.7997864 (485.40 it/sec) -training >> step=6523600, episode=1088 reward=0.7947974 (494.43 it/sec) -training >> step=6523700, episode=1088 reward=0.8078135 (475.93 it/sec) -training >> step=6523800, episode=1088 reward=0.7837325 (479.20 it/sec) -training >> step=6523900, episode=1088 reward=0.7860129 (434.76 it/sec) -training >> step=6524000, episode=1088 reward=0.7634959 (472.20 it/sec) -training >> step=6524100, episode=1088 reward=0.786657 (498.33 it/sec) -training >> step=6524200, episode=1088 reward=0.7771236 (463.90 it/sec) -training >> step=6524300, episode=1088 reward=0.7929534 (479.32 it/sec) -training >> step=6524400, episode=1088 reward=0.7809426 (497.83 it/sec) -training >> step=6524500, episode=1088 reward=0.7882462 (465.60 it/sec) -training >> step=6524600, episode=1088 reward=0.7998621 (488.93 it/sec) -training >> step=6524700, episode=1088 reward=0.793948 (438.30 it/sec) -training >> step=6524800, episode=1088 reward=0.7909443 (474.57 it/sec) -training >> step=6524900, episode=1088 reward=0.7908357 (438.49 it/sec) -training >> step=6525000, episode=1088 reward=0.7782622 (428.20 it/sec) -training >> step=6525100, episode=1088 reward=0.781172 (509.50 it/sec) -training >> step=6525200, episode=1088 reward=0.7886534 (500.09 it/sec) -training >> step=6525300, episode=1088 reward=0.7961897 (495.71 it/sec) -training >> step=6525400, episode=1088 reward=0.7996129 (348.25 it/sec) -training >> step=6525500, episode=1088 reward=0.7870478 (471.30 it/sec) -training >> step=6525600, episode=1088 reward=0.7835585 (492.57 it/sec) -training >> step=6525700, episode=1088 reward=0.7857476 (471.86 it/sec) -training >> step=6525800, episode=1088 reward=0.7818561 (491.24 it/sec) -training >> step=6525900, episode=1088 reward=0.7961448 (453.55 it/sec) -training >> step=6526000, episode=1088 reward=0.7908517 (484.20 it/sec) -training >> step=6526100, episode=1088 reward=0.7978085 (480.61 it/sec) -training >> step=6526200, episode=1088 reward=0.7779737 (488.29 it/sec) -training >> step=6526300, episode=1088 reward=0.8001182 (448.49 it/sec) -training >> step=6526400, episode=1088 reward=0.7865463 (440.17 it/sec) -training >> step=6526500, episode=1088 reward=0.7880269 (509.90 it/sec) -training >> step=6526600, episode=1088 reward=0.7923653 (515.58 it/sec) -training >> step=6526700, episode=1088 reward=0.7889674 (483.17 it/sec) -training >> step=6526800, episode=1088 reward=0.7777647 (506.29 it/sec) -training >> step=6526900, episode=1088 reward=0.7895944 (484.39 it/sec) -training >> step=6527000, episode=1088 reward=0.7746768 (503.53 it/sec) -training >> step=6527100, episode=1088 reward=0.7815886 (513.87 it/sec) -training >> step=6527200, episode=1088 reward=0.7799376 (535.20 it/sec) -training >> step=6527300, episode=1089 reward=0.8014221 (48.98 it/sec) -training >> step=6527400, episode=1089 reward=0.7844817 (467.21 it/sec) -training >> step=6527500, episode=1089 reward=0.7759526 (470.38 it/sec) -training >> step=6527600, episode=1089 reward=0.7731297 (503.68 it/sec) -training >> step=6527700, episode=1089 reward=0.7923515 (495.77 it/sec) -training >> step=6527800, episode=1089 reward=0.7712308 (501.05 it/sec) -training >> step=6527900, episode=1089 reward=0.7579175 (484.85 it/sec) -training >> step=6528000, episode=1089 reward=0.780999 (464.57 it/sec) -training >> step=6528100, episode=1089 reward=0.7897065 (508.09 it/sec) -training >> step=6528200, episode=1089 reward=0.7794856 (493.11 it/sec) -training >> step=6528300, episode=1089 reward=0.7768868 (489.92 it/sec) -training >> step=6528400, episode=1089 reward=0.7886709 (476.00 it/sec) -training >> step=6528500, episode=1089 reward=0.8014036 (469.14 it/sec) -training >> step=6528600, episode=1089 reward=0.7921457 (471.79 it/sec) -training >> step=6528700, episode=1089 reward=0.779614 (498.20 it/sec) -training >> step=6528800, episode=1089 reward=0.7603328 (499.36 it/sec) -training >> step=6528900, episode=1089 reward=0.7759876 (444.55 it/sec) -training >> step=6529000, episode=1089 reward=0.7873868 (492.47 it/sec) -training >> step=6529100, episode=1089 reward=0.7673355 (476.29 it/sec) -training >> step=6529200, episode=1089 reward=0.7692516 (488.56 it/sec) -training >> step=6529300, episode=1089 reward=0.8042262 (460.84 it/sec) -training >> step=6529400, episode=1089 reward=0.8029538 (499.32 it/sec) -training >> step=6529500, episode=1089 reward=0.7950974 (470.75 it/sec) -training >> step=6529600, episode=1089 reward=0.7858165 (458.10 it/sec) -training >> step=6529700, episode=1089 reward=0.7997959 (483.03 it/sec) -training >> step=6529800, episode=1089 reward=0.7933657 (493.02 it/sec) -training >> step=6529900, episode=1089 reward=0.7928387 (458.82 it/sec) -training >> step=6530000, episode=1089 reward=0.7968145 (429.76 it/sec) -training >> step=6530100, episode=1089 reward=0.791488 (499.06 it/sec) -training >> step=6530200, episode=1089 reward=0.7924336 (474.44 it/sec) -training >> step=6530300, episode=1089 reward=0.7748219 (459.36 it/sec) -training >> step=6530400, episode=1089 reward=0.7757233 (467.62 it/sec) -training >> step=6530500, episode=1089 reward=0.7761713 (498.20 it/sec) -training >> step=6530600, episode=1089 reward=0.8060212 (499.39 it/sec) -training >> step=6530700, episode=1089 reward=0.794021 (465.07 it/sec) -training >> step=6530800, episode=1089 reward=0.7769262 (484.29 it/sec) -training >> step=6530900, episode=1089 reward=0.7965127 (475.35 it/sec) -training >> step=6531000, episode=1089 reward=0.7685902 (482.88 it/sec) -training >> step=6531100, episode=1089 reward=0.7842194 (502.15 it/sec) -training >> step=6531200, episode=1089 reward=0.8081308 (418.60 it/sec) -training >> step=6531300, episode=1089 reward=0.8063248 (472.42 it/sec) -training >> step=6531400, episode=1089 reward=0.7729002 (469.81 it/sec) -training >> step=6531500, episode=1089 reward=0.7782779 (487.26 it/sec) -training >> step=6531600, episode=1089 reward=0.7734491 (453.95 it/sec) -training >> step=6531700, episode=1089 reward=0.7839229 (343.66 it/sec) -training >> step=6531800, episode=1089 reward=0.7904952 (435.84 it/sec) -training >> step=6531900, episode=1089 reward=0.7792538 (487.92 it/sec) -training >> step=6532000, episode=1089 reward=0.8059052 (483.92 it/sec) -training >> step=6532100, episode=1089 reward=0.7653851 (426.07 it/sec) -training >> step=6532200, episode=1089 reward=0.7701029 (458.87 it/sec) -training >> step=6532300, episode=1089 reward=0.7649693 (431.49 it/sec) -training >> step=6532400, episode=1089 reward=0.7713045 (480.90 it/sec) -training >> step=6532500, episode=1089 reward=0.7967272 (514.12 it/sec) -training >> step=6532600, episode=1089 reward=0.7884061 (471.25 it/sec) -training >> step=6532700, episode=1089 reward=0.7599148 (477.12 it/sec) -training >> step=6532800, episode=1089 reward=0.7814165 (475.10 it/sec) -training >> step=6532900, episode=1089 reward=0.7905759 (490.26 it/sec) -training >> step=6533000, episode=1089 reward=0.7779711 (472.20 it/sec) -training >> step=6533100, episode=1089 reward=0.7788818 (486.24 it/sec) -training >> step=6533200, episode=1089 reward=0.785082 (476.74 it/sec) -training >> step=6533300, episode=1090 reward=0.7771136 (54.07 it/sec) -training >> step=6533400, episode=1090 reward=0.7749049 (503.27 it/sec) -training >> step=6533500, episode=1090 reward=0.7655244 (487.10 it/sec) -training >> step=6533600, episode=1090 reward=0.7946832 (449.96 it/sec) -training >> step=6533700, episode=1090 reward=0.809641 (505.27 it/sec) -training >> step=6533800, episode=1090 reward=0.7896406 (501.29 it/sec) -training >> step=6533900, episode=1090 reward=0.7913808 (471.10 it/sec) -training >> step=6534000, episode=1090 reward=0.772099 (463.98 it/sec) -training >> step=6534100, episode=1090 reward=0.7891694 (458.93 it/sec) -training >> step=6534200, episode=1090 reward=0.7883762 (455.86 it/sec) -training >> step=6534300, episode=1090 reward=0.7804434 (414.94 it/sec) -training >> step=6534400, episode=1090 reward=0.7967376 (421.41 it/sec) -training >> step=6534500, episode=1090 reward=0.7889683 (508.92 it/sec) -training >> step=6534600, episode=1090 reward=0.7951929 (495.76 it/sec) -training >> step=6534700, episode=1090 reward=0.7852492 (491.63 it/sec) -training >> step=6534800, episode=1090 reward=0.7962151 (495.98 it/sec) -training >> step=6534900, episode=1090 reward=0.7826898 (463.18 it/sec) -training >> step=6535000, episode=1090 reward=0.7715136 (479.62 it/sec) -training >> step=6535100, episode=1090 reward=0.7903217 (488.53 it/sec) -training >> step=6535200, episode=1090 reward=0.7706618 (481.45 it/sec) -training >> step=6535300, episode=1090 reward=0.7837629 (484.00 it/sec) -training >> step=6535400, episode=1090 reward=0.8024032 (474.96 it/sec) -training >> step=6535500, episode=1090 reward=0.7953517 (454.28 it/sec) -training >> step=6535600, episode=1090 reward=0.7937667 (498.00 it/sec) -training >> step=6535700, episode=1090 reward=0.7944905 (475.37 it/sec) -training >> step=6535800, episode=1090 reward=0.7836754 (462.68 it/sec) -training >> step=6535900, episode=1090 reward=0.7993783 (469.88 it/sec) -training >> step=6536000, episode=1090 reward=0.7889943 (493.06 it/sec) -training >> step=6536100, episode=1090 reward=0.8064224 (426.05 it/sec) -training >> step=6536200, episode=1090 reward=0.7940351 (411.15 it/sec) -training >> step=6536300, episode=1090 reward=0.7872221 (482.07 it/sec) -training >> step=6536400, episode=1090 reward=0.7838703 (480.16 it/sec) -training >> step=6536500, episode=1090 reward=0.7865125 (488.41 it/sec) -training >> step=6536600, episode=1090 reward=0.7826631 (475.06 it/sec) -training >> step=6536700, episode=1090 reward=0.8055818 (490.95 it/sec) -training >> step=6536800, episode=1090 reward=0.8139261 (491.37 it/sec) -training >> step=6536900, episode=1090 reward=0.7968501 (458.50 it/sec) -training >> step=6537000, episode=1090 reward=0.7942008 (438.67 it/sec) -training >> step=6537100, episode=1090 reward=0.8054385 (450.87 it/sec) -training >> step=6537200, episode=1090 reward=0.7791857 (462.48 it/sec) -training >> step=6537300, episode=1090 reward=0.7743989 (433.20 it/sec) -training >> step=6537400, episode=1090 reward=0.7867954 (493.13 it/sec) -training >> step=6537500, episode=1090 reward=0.7853695 (499.76 it/sec) -training >> step=6537600, episode=1090 reward=0.7882131 (468.86 it/sec) -training >> step=6537700, episode=1090 reward=0.793807 (473.58 it/sec) -training >> step=6537800, episode=1090 reward=0.7873797 (471.87 it/sec) -training >> step=6537900, episode=1090 reward=0.7915226 (459.37 it/sec) -training >> step=6538000, episode=1090 reward=0.7959186 (361.28 it/sec) -training >> step=6538100, episode=1090 reward=0.7953821 (437.20 it/sec) -training >> step=6538200, episode=1090 reward=0.7862803 (500.44 it/sec) -training >> step=6538300, episode=1090 reward=0.7823007 (456.92 it/sec) -training >> step=6538400, episode=1090 reward=0.7767673 (478.45 it/sec) -training >> step=6538500, episode=1090 reward=0.7881925 (475.95 it/sec) -training >> step=6538600, episode=1090 reward=0.7808526 (474.14 it/sec) -training >> step=6538700, episode=1090 reward=0.7855433 (468.43 it/sec) -training >> step=6538800, episode=1090 reward=0.7867718 (462.13 it/sec) -training >> step=6538900, episode=1090 reward=0.772934 (505.07 it/sec) -training >> step=6539000, episode=1090 reward=0.7733438 (442.35 it/sec) -training >> step=6539100, episode=1090 reward=0.7827949 (451.24 it/sec) -training >> step=6539200, episode=1090 reward=0.7884248 (495.85 it/sec) -training >> step=6539300, episode=1091 reward=0.7939386 (54.80 it/sec) -training >> step=6539400, episode=1091 reward=0.7761568 (454.17 it/sec) -training >> step=6539500, episode=1091 reward=0.7832298 (463.37 it/sec) -training >> step=6539600, episode=1091 reward=0.7825214 (459.83 it/sec) -training >> step=6539700, episode=1091 reward=0.7668428 (456.69 it/sec) -training >> step=6539800, episode=1091 reward=0.794639 (448.99 it/sec) -training >> step=6539900, episode=1091 reward=0.7663686 (448.37 it/sec) -training >> step=6540000, episode=1091 reward=0.7807508 (445.77 it/sec) -training >> step=6540100, episode=1091 reward=0.7711257 (440.10 it/sec) -training >> step=6540200, episode=1091 reward=0.7741792 (426.88 it/sec) -training >> step=6540300, episode=1091 reward=0.7990873 (444.77 it/sec) -training >> step=6540400, episode=1091 reward=0.788957 (434.03 it/sec) -training >> step=6540500, episode=1091 reward=0.7903264 (496.06 it/sec) -training >> step=6540600, episode=1091 reward=0.7745417 (497.46 it/sec) -training >> step=6540700, episode=1091 reward=0.7796609 (432.02 it/sec) -training >> step=6540800, episode=1091 reward=0.7753835 (450.76 it/sec) -training >> step=6540900, episode=1091 reward=0.7999765 (443.69 it/sec) -training >> step=6541000, episode=1091 reward=0.7879282 (482.97 it/sec) -training >> step=6541100, episode=1091 reward=0.7826356 (455.06 it/sec) -training >> step=6541200, episode=1091 reward=0.7691121 (393.77 it/sec) -training >> step=6541300, episode=1091 reward=0.7733359 (413.71 it/sec) -training >> step=6541400, episode=1091 reward=0.7883819 (451.00 it/sec) -training >> step=6541500, episode=1091 reward=0.7969241 (396.71 it/sec) -training >> step=6541600, episode=1091 reward=0.7946612 (408.98 it/sec) -training >> step=6541700, episode=1091 reward=0.7820976 (451.14 it/sec) -training >> step=6541800, episode=1091 reward=0.8188142 (369.67 it/sec) -training >> step=6541900, episode=1091 reward=0.7962398 (467.24 it/sec) -training >> step=6542000, episode=1091 reward=0.8097537 (429.52 it/sec) -training >> step=6542100, episode=1091 reward=0.7766963 (478.20 it/sec) -training >> step=6542200, episode=1091 reward=0.8060618 (491.13 it/sec) -training >> step=6542300, episode=1091 reward=0.7935584 (505.30 it/sec) -training >> step=6542400, episode=1091 reward=0.8123423 (509.35 it/sec) -training >> step=6542500, episode=1091 reward=0.7952944 (439.95 it/sec) -training >> step=6542600, episode=1091 reward=0.7828749 (460.80 it/sec) -training >> step=6542700, episode=1091 reward=0.7862381 (454.48 it/sec) -training >> step=6542800, episode=1091 reward=0.792254 (453.55 it/sec) -training >> step=6542900, episode=1091 reward=0.7924358 (478.27 it/sec) -training >> step=6543000, episode=1091 reward=0.7784352 (488.49 it/sec) -training >> step=6543100, episode=1091 reward=0.7936522 (496.43 it/sec) -training >> step=6543200, episode=1091 reward=0.7880973 (467.73 it/sec) -training >> step=6543300, episode=1091 reward=0.7488114 (502.08 it/sec) -training >> step=6543400, episode=1091 reward=0.7766996 (481.99 it/sec) -training >> step=6543500, episode=1091 reward=0.7816514 (499.70 it/sec) -training >> step=6543600, episode=1091 reward=0.7807632 (476.83 it/sec) -training >> step=6543700, episode=1091 reward=0.8040675 (492.82 it/sec) -training >> step=6543800, episode=1091 reward=0.7711008 (509.68 it/sec) -training >> step=6543900, episode=1091 reward=0.7763688 (485.65 it/sec) -training >> step=6544000, episode=1091 reward=0.7759521 (474.36 it/sec) -training >> step=6544100, episode=1091 reward=0.768979 (494.40 it/sec) -training >> step=6544200, episode=1091 reward=0.7658138 (333.92 it/sec) -training >> step=6544300, episode=1091 reward=0.8015483 (490.20 it/sec) -training >> step=6544400, episode=1091 reward=0.7915541 (502.39 it/sec) -training >> step=6544500, episode=1091 reward=0.7755914 (497.65 it/sec) -training >> step=6544600, episode=1091 reward=0.8107744 (493.66 it/sec) -training >> step=6544700, episode=1091 reward=0.7779552 (464.71 it/sec) -training >> step=6544800, episode=1091 reward=0.7908601 (453.91 it/sec) -training >> step=6544900, episode=1091 reward=0.7852677 (472.49 it/sec) -training >> step=6545000, episode=1091 reward=0.787692 (454.77 it/sec) -training >> step=6545100, episode=1091 reward=0.7935408 (429.12 it/sec) -training >> step=6545200, episode=1091 reward=0.7714868 (483.47 it/sec) -training >> step=6545300, episode=1092 reward=0.7679322 (68.27 it/sec) -training >> step=6545400, episode=1092 reward=0.7849685 (424.40 it/sec) -training >> step=6545500, episode=1092 reward=0.779312 (466.97 it/sec) -training >> step=6545600, episode=1092 reward=0.777124 (477.39 it/sec) -training >> step=6545700, episode=1092 reward=0.7780899 (413.98 it/sec) -training >> step=6545800, episode=1092 reward=0.7719326 (497.90 it/sec) -training >> step=6545900, episode=1092 reward=0.8020766 (437.27 it/sec) -training >> step=6546000, episode=1092 reward=0.7841927 (460.38 it/sec) -training >> step=6546100, episode=1092 reward=0.7993799 (487.66 it/sec) -training >> step=6546200, episode=1092 reward=0.7821063 (499.06 it/sec) -training >> step=6546300, episode=1092 reward=0.7884626 (431.74 it/sec) -training >> step=6546400, episode=1092 reward=0.7854532 (384.45 it/sec) -training >> step=6546500, episode=1092 reward=0.7819188 (455.20 it/sec) -training >> step=6546600, episode=1092 reward=0.7786679 (428.29 it/sec) -training >> step=6546700, episode=1092 reward=0.7983899 (469.69 it/sec) -training >> step=6546800, episode=1092 reward=0.7806705 (450.29 it/sec) -training >> step=6546900, episode=1092 reward=0.7959111 (429.74 it/sec) -training >> step=6547000, episode=1092 reward=0.7991983 (440.15 it/sec) -training >> step=6547100, episode=1092 reward=0.7738721 (495.71 it/sec) -training >> step=6547200, episode=1092 reward=0.795701 (476.83 it/sec) -training >> step=6547300, episode=1092 reward=0.7907184 (461.95 it/sec) -training >> step=6547400, episode=1092 reward=0.7751911 (441.45 it/sec) -training >> step=6547500, episode=1092 reward=0.7717167 (490.79 it/sec) -training >> step=6547600, episode=1092 reward=0.8073509 (433.81 it/sec) -training >> step=6547700, episode=1092 reward=0.8014566 (442.95 it/sec) -training >> step=6547800, episode=1092 reward=0.7906578 (488.97 it/sec) -training >> step=6547900, episode=1092 reward=0.7893426 (501.96 it/sec) -training >> step=6548000, episode=1092 reward=0.7831482 (480.35 it/sec) -training >> step=6548100, episode=1092 reward=0.7900508 (457.48 it/sec) -training >> step=6548200, episode=1092 reward=0.8119762 (510.05 it/sec) -training >> step=6548300, episode=1092 reward=0.7916005 (447.63 it/sec) -training >> step=6548400, episode=1092 reward=0.793496 (503.05 it/sec) -training >> step=6548500, episode=1092 reward=0.8048659 (469.02 it/sec) -training >> step=6548600, episode=1092 reward=0.7746149 (454.52 it/sec) -training >> step=6548700, episode=1092 reward=0.8107693 (466.08 it/sec) -training >> step=6548800, episode=1092 reward=0.8105245 (455.45 it/sec) -training >> step=6548900, episode=1092 reward=0.8013092 (468.48 it/sec) -training >> step=6549000, episode=1092 reward=0.7870374 (486.54 it/sec) -training >> step=6549100, episode=1092 reward=0.7827899 (472.66 it/sec) -training >> step=6549200, episode=1092 reward=0.7814007 (467.98 it/sec) -training >> step=6549300, episode=1092 reward=0.7777718 (477.40 it/sec) -training >> step=6549400, episode=1092 reward=0.801017 (459.07 it/sec) -training >> step=6549500, episode=1092 reward=0.7967768 (469.92 it/sec) -training >> step=6549600, episode=1092 reward=0.7898023 (493.77 it/sec) -training >> step=6549700, episode=1092 reward=0.7828035 (493.52 it/sec) -training >> step=6549800, episode=1092 reward=0.7694653 (471.62 it/sec) -training >> step=6549900, episode=1092 reward=0.7954988 (415.93 it/sec) -training >> step=6550000, episode=1092 reward=0.7937087 (484.68 it/sec) -training >> step=6550100, episode=1092 reward=0.7975925 (427.98 it/sec) -training >> step=6550200, episode=1092 reward=0.7578365 (461.98 it/sec) -training >> step=6550300, episode=1092 reward=0.7770512 (480.15 it/sec) -training >> step=6550400, episode=1092 reward=0.7883133 (494.77 it/sec) -training >> step=6550500, episode=1092 reward=0.7716396 (312.74 it/sec) -training >> step=6550600, episode=1092 reward=0.7738571 (430.36 it/sec) -training >> step=6550700, episode=1092 reward=0.7793135 (479.93 it/sec) -training >> step=6550800, episode=1092 reward=0.7883007 (481.93 it/sec) -training >> step=6550900, episode=1092 reward=0.7827904 (443.31 it/sec) -training >> step=6551000, episode=1092 reward=0.7681963 (454.67 it/sec) -training >> step=6551100, episode=1092 reward=0.7612547 (470.94 it/sec) -training >> step=6551200, episode=1092 reward=0.7672366 (441.64 it/sec) -training >> step=6551300, episode=1093 reward=0.7956508 (73.83 it/sec) -training >> step=6551400, episode=1093 reward=0.7707077 (477.36 it/sec) -training >> step=6551500, episode=1093 reward=0.7801178 (423.07 it/sec) -training >> step=6551600, episode=1093 reward=0.7569571 (464.24 it/sec) -training >> step=6551700, episode=1093 reward=0.797791 (503.84 it/sec) -training >> step=6551800, episode=1093 reward=0.7799489 (462.70 it/sec) -training >> step=6551900, episode=1093 reward=0.7797861 (455.33 it/sec) -training >> step=6552000, episode=1093 reward=0.7849119 (426.19 it/sec) -training >> step=6552100, episode=1093 reward=0.8043684 (460.47 it/sec) -training >> step=6552200, episode=1093 reward=0.7677305 (471.25 it/sec) -training >> step=6552300, episode=1093 reward=0.7802817 (427.30 it/sec) -training >> step=6552400, episode=1093 reward=0.7838703 (507.63 it/sec) -training >> step=6552500, episode=1093 reward=0.8047901 (436.66 it/sec) -training >> step=6552600, episode=1093 reward=0.8030787 (488.85 it/sec) -training >> step=6552700, episode=1093 reward=0.7669968 (450.69 it/sec) -training >> step=6552800, episode=1093 reward=0.7841467 (471.80 it/sec) -training >> step=6552900, episode=1093 reward=0.7739722 (492.22 it/sec) -training >> step=6553000, episode=1093 reward=0.7692617 (454.92 it/sec) -training >> step=6553100, episode=1093 reward=0.7731649 (474.52 it/sec) -training >> step=6553200, episode=1093 reward=0.7822385 (425.95 it/sec) -training >> step=6553300, episode=1093 reward=0.8000678 (457.21 it/sec) -training >> step=6553400, episode=1093 reward=0.7896515 (491.70 it/sec) -training >> step=6553500, episode=1093 reward=0.7995355 (474.95 it/sec) -training >> step=6553600, episode=1093 reward=0.8092708 (497.84 it/sec) -training >> step=6553700, episode=1093 reward=0.7975117 (453.78 it/sec) -training >> step=6553800, episode=1093 reward=0.7928352 (440.82 it/sec) -training >> step=6553900, episode=1093 reward=0.776739 (444.90 it/sec) -training >> step=6554000, episode=1093 reward=0.8002076 (480.90 it/sec) -training >> step=6554100, episode=1093 reward=0.7885139 (520.21 it/sec) -training >> step=6554200, episode=1093 reward=0.7823761 (460.61 it/sec) -training >> step=6554300, episode=1093 reward=0.798393 (427.74 it/sec) -training >> step=6554400, episode=1093 reward=0.7848757 (469.64 it/sec) -training >> step=6554500, episode=1093 reward=0.7844539 (461.78 it/sec) -training >> step=6554600, episode=1093 reward=0.791033 (452.90 it/sec) -training >> step=6554700, episode=1093 reward=0.8072766 (466.93 it/sec) -training >> step=6554800, episode=1093 reward=0.8120405 (488.84 it/sec) -training >> step=6554900, episode=1093 reward=0.8008281 (442.72 it/sec) -training >> step=6555000, episode=1093 reward=0.7948394 (457.39 it/sec) -training >> step=6555100, episode=1093 reward=0.7895827 (473.87 it/sec) -training >> step=6555200, episode=1093 reward=0.7787162 (488.87 it/sec) -training >> step=6555300, episode=1093 reward=0.7723067 (475.15 it/sec) -training >> step=6555400, episode=1093 reward=0.7901168 (441.71 it/sec) -training >> step=6555500, episode=1093 reward=0.7994564 (505.97 it/sec) -training >> step=6555600, episode=1093 reward=0.7582729 (427.01 it/sec) -training >> step=6555700, episode=1093 reward=0.7966309 (453.09 it/sec) -training >> step=6555800, episode=1093 reward=0.7925823 (454.11 it/sec) -training >> step=6555900, episode=1093 reward=0.7819457 (460.29 it/sec) -training >> step=6556000, episode=1093 reward=0.7893288 (475.67 it/sec) -training >> step=6556100, episode=1093 reward=0.7763554 (464.03 it/sec) -training >> step=6556200, episode=1093 reward=0.7704114 (467.39 it/sec) -training >> step=6556300, episode=1093 reward=0.7935264 (458.90 it/sec) -training >> step=6556400, episode=1093 reward=0.7863657 (467.39 it/sec) -training >> step=6556500, episode=1093 reward=0.7690398 (476.20 it/sec) -training >> step=6556600, episode=1093 reward=0.7721582 (331.90 it/sec) -training >> step=6556700, episode=1093 reward=0.7857366 (448.42 it/sec) -training >> step=6556800, episode=1093 reward=0.7903441 (471.79 it/sec) -training >> step=6556900, episode=1093 reward=0.7807356 (527.94 it/sec) -training >> step=6557000, episode=1093 reward=0.7712279 (461.50 it/sec) -training >> step=6557100, episode=1093 reward=0.784778 (461.35 it/sec) -training >> step=6557200, episode=1093 reward=0.7792999 (427.12 it/sec) -training >> step=6557300, episode=1094 reward=0.7750549 (90.89 it/sec) -training >> step=6557400, episode=1094 reward=0.7808867 (456.48 it/sec) -training >> step=6557500, episode=1094 reward=0.7895536 (389.08 it/sec) -training >> step=6557600, episode=1094 reward=0.7805201 (389.99 it/sec) -training >> step=6557700, episode=1094 reward=0.7680154 (437.22 it/sec) -training >> step=6557800, episode=1094 reward=0.7715974 (463.27 it/sec) -training >> step=6557900, episode=1094 reward=0.7777369 (467.63 it/sec) -training >> step=6558000, episode=1094 reward=0.7917401 (428.27 it/sec) -training >> step=6558100, episode=1094 reward=0.7883493 (468.61 it/sec) -training >> step=6558200, episode=1094 reward=0.783136 (468.71 it/sec) -training >> step=6558300, episode=1094 reward=0.7781984 (420.49 it/sec) -training >> step=6558400, episode=1094 reward=0.7923406 (420.69 it/sec) -training >> step=6558500, episode=1094 reward=0.7875058 (452.92 it/sec) -training >> step=6558600, episode=1094 reward=0.7689012 (447.85 it/sec) -training >> step=6558700, episode=1094 reward=0.7722957 (479.52 it/sec) -training >> step=6558800, episode=1094 reward=0.783301 (453.59 it/sec) -training >> step=6558900, episode=1094 reward=0.767482 (428.21 it/sec) -training >> step=6559000, episode=1094 reward=0.7775496 (460.37 it/sec) -training >> step=6559100, episode=1094 reward=0.7871059 (433.14 it/sec) -training >> step=6559200, episode=1094 reward=0.7868721 (465.99 it/sec) -training >> step=6559300, episode=1094 reward=0.7946018 (421.12 it/sec) -training >> step=6559400, episode=1094 reward=0.7862109 (403.00 it/sec) -training >> step=6559500, episode=1094 reward=0.7837458 (433.96 it/sec) -training >> step=6559600, episode=1094 reward=0.8075104 (434.52 it/sec) -training >> step=6559700, episode=1094 reward=0.7847347 (412.16 it/sec) -training >> step=6559800, episode=1094 reward=0.7900006 (423.37 it/sec) -training >> step=6559900, episode=1094 reward=0.8102571 (448.28 it/sec) -training >> step=6560000, episode=1094 reward=0.8014995 (444.49 it/sec) -training >> step=6560100, episode=1094 reward=0.7858479 (480.10 it/sec) -training >> step=6560200, episode=1094 reward=0.7706959 (425.50 it/sec) -training >> step=6560300, episode=1094 reward=0.7977618 (455.31 it/sec) -training >> step=6560400, episode=1094 reward=0.7915344 (408.14 it/sec) -training >> step=6560500, episode=1094 reward=0.7830565 (492.62 it/sec) -training >> step=6560600, episode=1094 reward=0.7889014 (443.75 it/sec) -training >> step=6560700, episode=1094 reward=0.7986713 (413.72 it/sec) -training >> step=6560800, episode=1094 reward=0.7813803 (433.38 it/sec) -training >> step=6560900, episode=1094 reward=0.774236 (453.42 it/sec) -training >> step=6561000, episode=1094 reward=0.7836377 (468.54 it/sec) -training >> step=6561100, episode=1094 reward=0.7832678 (460.05 it/sec) -training >> step=6561200, episode=1094 reward=0.7802896 (415.81 it/sec) -training >> step=6561300, episode=1094 reward=0.7938948 (408.03 it/sec) -training >> step=6561400, episode=1094 reward=0.7654952 (452.96 it/sec) -training >> step=6561500, episode=1094 reward=0.7936559 (465.44 it/sec) -training >> step=6561600, episode=1094 reward=0.7734341 (475.88 it/sec) -training >> step=6561700, episode=1094 reward=0.7993191 (459.48 it/sec) -training >> step=6561800, episode=1094 reward=0.7690501 (444.70 it/sec) -training >> step=6561900, episode=1094 reward=0.7947209 (477.08 it/sec) -training >> step=6562000, episode=1094 reward=0.7825583 (447.94 it/sec) -training >> step=6562100, episode=1094 reward=0.7898926 (410.30 it/sec) -training >> step=6562200, episode=1094 reward=0.7791099 (461.20 it/sec) -training >> step=6562300, episode=1094 reward=0.7969934 (456.40 it/sec) -training >> step=6562400, episode=1094 reward=0.7862071 (455.18 it/sec) -training >> step=6562500, episode=1094 reward=0.8019996 (436.86 it/sec) -training >> step=6562600, episode=1094 reward=0.7899811 (460.85 it/sec) -training >> step=6562700, episode=1094 reward=0.7822772 (441.04 it/sec) -training >> step=6562800, episode=1094 reward=0.7838068 (438.21 it/sec) -training >> step=6562900, episode=1094 reward=0.7937086 (354.23 it/sec) -training >> step=6563000, episode=1094 reward=0.7838429 (451.15 it/sec) -training >> step=6563100, episode=1094 reward=0.7836537 (428.90 it/sec) -training >> step=6563200, episode=1094 reward=0.7865603 (452.61 it/sec) -training >> step=6563300, episode=1095 reward=0.7796276 (74.02 it/sec) -training >> step=6563400, episode=1095 reward=0.7898576 (432.21 it/sec) -training >> step=6563500, episode=1095 reward=0.7912567 (462.36 it/sec) -training >> step=6563600, episode=1095 reward=0.779986 (461.55 it/sec) -training >> step=6563700, episode=1095 reward=0.7761186 (440.61 it/sec) -training >> step=6563800, episode=1095 reward=0.788028 (469.32 it/sec) -training >> step=6563900, episode=1095 reward=0.7759883 (466.28 it/sec) -training >> step=6564000, episode=1095 reward=0.7850401 (483.69 it/sec) -training >> step=6564100, episode=1095 reward=0.778871 (448.09 it/sec) -training >> step=6564200, episode=1095 reward=0.7764071 (503.97 it/sec) -training >> step=6564300, episode=1095 reward=0.7790138 (486.46 it/sec) -training >> step=6564400, episode=1095 reward=0.7864029 (466.17 it/sec) -training >> step=6564500, episode=1095 reward=0.7917913 (485.59 it/sec) -training >> step=6564600, episode=1095 reward=0.7956661 (527.80 it/sec) -training >> step=6564700, episode=1095 reward=0.7854778 (492.34 it/sec) -training >> step=6564800, episode=1095 reward=0.7784147 (486.40 it/sec) -training >> step=6564900, episode=1095 reward=0.7977093 (514.37 it/sec) -training >> step=6565000, episode=1095 reward=0.7874655 (490.25 it/sec) -training >> step=6565100, episode=1095 reward=0.7989501 (487.42 it/sec) -training >> step=6565200, episode=1095 reward=0.78361 (473.58 it/sec) -training >> step=6565300, episode=1095 reward=0.76126 (507.93 it/sec) -training >> step=6565400, episode=1095 reward=0.7881318 (471.50 it/sec) -training >> step=6565500, episode=1095 reward=0.8046447 (484.60 it/sec) -training >> step=6565600, episode=1095 reward=0.8051651 (465.20 it/sec) -training >> step=6565700, episode=1095 reward=0.8018321 (513.18 it/sec) -training >> step=6565800, episode=1095 reward=0.79877 (486.98 it/sec) -training >> step=6565900, episode=1095 reward=0.7993948 (467.08 it/sec) -training >> step=6566000, episode=1095 reward=0.774807 (422.91 it/sec) -training >> step=6566100, episode=1095 reward=0.7977054 (486.01 it/sec) -training >> step=6566200, episode=1095 reward=0.7920853 (483.68 it/sec) -training >> step=6566300, episode=1095 reward=0.7800631 (459.77 it/sec) -training >> step=6566400, episode=1095 reward=0.7766084 (494.16 it/sec) -training >> step=6566500, episode=1095 reward=0.7792217 (406.37 it/sec) -training >> step=6566600, episode=1095 reward=0.8025621 (498.83 it/sec) -training >> step=6566700, episode=1095 reward=0.7816959 (521.37 it/sec) -training >> step=6566800, episode=1095 reward=0.7765418 (442.92 it/sec) -training >> step=6566900, episode=1095 reward=0.7799917 (488.64 it/sec) -training >> step=6567000, episode=1095 reward=0.772679 (436.00 it/sec) -training >> step=6567100, episode=1095 reward=0.7893487 (503.17 it/sec) -training >> step=6567200, episode=1095 reward=0.7971977 (478.04 it/sec) -training >> step=6567300, episode=1095 reward=0.779171 (423.57 it/sec) -training >> step=6567400, episode=1095 reward=0.8044433 (502.78 it/sec) -training >> step=6567500, episode=1095 reward=0.7772693 (479.97 it/sec) -training >> step=6567600, episode=1095 reward=0.77825 (508.18 it/sec) -training >> step=6567700, episode=1095 reward=0.7807317 (466.58 it/sec) -training >> step=6567800, episode=1095 reward=0.782579 (487.64 it/sec) -training >> step=6567900, episode=1095 reward=0.7758497 (428.57 it/sec) -training >> step=6568000, episode=1095 reward=0.7706932 (452.04 it/sec) -training >> step=6568100, episode=1095 reward=0.7559369 (502.45 it/sec) -training >> step=6568200, episode=1095 reward=0.7639315 (494.76 it/sec) -training >> step=6568300, episode=1095 reward=0.780692 (442.31 it/sec) -training >> step=6568400, episode=1095 reward=0.7930172 (433.18 it/sec) -training >> step=6568500, episode=1095 reward=0.7861301 (458.97 it/sec) -training >> step=6568600, episode=1095 reward=0.7895599 (494.42 it/sec) -training >> step=6568700, episode=1095 reward=0.7670145 (423.52 it/sec) -training >> step=6568800, episode=1095 reward=0.7815181 (480.78 it/sec) -training >> step=6568900, episode=1095 reward=0.7638853 (501.15 it/sec) -training >> step=6569000, episode=1095 reward=0.7777507 (481.31 it/sec) -training >> step=6569100, episode=1095 reward=0.7787436 (376.99 it/sec) -training >> step=6569200, episode=1095 reward=0.7849032 (459.39 it/sec) -training >> step=6569300, episode=1096 reward=0.7814987 (88.54 it/sec) -training >> step=6569400, episode=1096 reward=0.7963923 (470.13 it/sec) -training >> step=6569500, episode=1096 reward=0.7977694 (404.75 it/sec) -training >> step=6569600, episode=1096 reward=0.8090982 (453.14 it/sec) -training >> step=6569700, episode=1096 reward=0.7863571 (486.62 it/sec) -training >> step=6569800, episode=1096 reward=0.770744 (489.35 it/sec) -training >> step=6569900, episode=1096 reward=0.7778521 (466.36 it/sec) -training >> step=6570000, episode=1096 reward=0.7908328 (465.60 it/sec) -training >> step=6570100, episode=1096 reward=0.7662898 (452.76 it/sec) -training >> step=6570200, episode=1096 reward=0.7938932 (493.05 it/sec) -training >> step=6570300, episode=1096 reward=0.7816995 (463.27 it/sec) -training >> step=6570400, episode=1096 reward=0.7919274 (469.65 it/sec) -training >> step=6570500, episode=1096 reward=0.7792704 (470.21 it/sec) -training >> step=6570600, episode=1096 reward=0.7829494 (506.82 it/sec) -training >> step=6570700, episode=1096 reward=0.7974065 (449.20 it/sec) -training >> step=6570800, episode=1096 reward=0.7769963 (473.03 it/sec) -training >> step=6570900, episode=1096 reward=0.7893586 (472.20 it/sec) -training >> step=6571000, episode=1096 reward=0.7859207 (478.47 it/sec) -training >> step=6571100, episode=1096 reward=0.7777148 (489.50 it/sec) -training >> step=6571200, episode=1096 reward=0.7855476 (440.38 it/sec) -training >> step=6571300, episode=1096 reward=0.7683212 (508.91 it/sec) -training >> step=6571400, episode=1096 reward=0.7599431 (484.96 it/sec) -training >> step=6571500, episode=1096 reward=0.7767326 (482.55 it/sec) -training >> step=6571600, episode=1096 reward=0.7945259 (485.82 it/sec) -training >> step=6571700, episode=1096 reward=0.7847711 (465.80 it/sec) -training >> step=6571800, episode=1096 reward=0.7785072 (489.64 it/sec) -training >> step=6571900, episode=1096 reward=0.7944567 (462.69 it/sec) -training >> step=6572000, episode=1096 reward=0.8160375 (498.63 it/sec) -training >> step=6572100, episode=1096 reward=0.7945714 (445.60 it/sec) -training >> step=6572200, episode=1096 reward=0.7927218 (435.47 it/sec) -training >> step=6572300, episode=1096 reward=0.7936842 (478.75 it/sec) -training >> step=6572400, episode=1096 reward=0.7990084 (484.68 it/sec) -training >> step=6572500, episode=1096 reward=0.8037435 (478.64 it/sec) -training >> step=6572600, episode=1096 reward=0.777355 (454.31 it/sec) -training >> step=6572700, episode=1096 reward=0.7988752 (429.23 it/sec) -training >> step=6572800, episode=1096 reward=0.7870377 (438.47 it/sec) -training >> step=6572900, episode=1096 reward=0.7674598 (441.30 it/sec) -training >> step=6573000, episode=1096 reward=0.7795972 (428.28 it/sec) -training >> step=6573100, episode=1096 reward=0.7895473 (437.69 it/sec) -training >> step=6573200, episode=1096 reward=0.7777356 (470.45 it/sec) -training >> step=6573300, episode=1096 reward=0.8045343 (482.65 it/sec) -training >> step=6573400, episode=1096 reward=0.8011109 (451.93 it/sec) -training >> step=6573500, episode=1096 reward=0.8003092 (461.83 it/sec) -training >> step=6573600, episode=1096 reward=0.8064995 (479.91 it/sec) -training >> step=6573700, episode=1096 reward=0.7826428 (469.49 it/sec) -training >> step=6573800, episode=1096 reward=0.7865666 (482.37 it/sec) -training >> step=6573900, episode=1096 reward=0.7962941 (464.01 it/sec) -training >> step=6574000, episode=1096 reward=0.7908852 (458.04 it/sec) -training >> step=6574100, episode=1096 reward=0.7935138 (483.53 it/sec) -training >> step=6574200, episode=1096 reward=0.7788633 (451.53 it/sec) -training >> step=6574300, episode=1096 reward=0.799386 (465.58 it/sec) -training >> step=6574400, episode=1096 reward=0.7737755 (481.15 it/sec) -training >> step=6574500, episode=1096 reward=0.7881891 (452.35 it/sec) -training >> step=6574600, episode=1096 reward=0.7710508 (427.84 it/sec) -training >> step=6574700, episode=1096 reward=0.7887213 (464.49 it/sec) -training >> step=6574800, episode=1096 reward=0.7771851 (483.98 it/sec) -training >> step=6574900, episode=1096 reward=0.793696 (433.82 it/sec) -training >> step=6575000, episode=1096 reward=0.7831876 (443.51 it/sec) -training >> step=6575100, episode=1096 reward=0.7728128 (437.29 it/sec) -training >> step=6575200, episode=1096 reward=0.7875228 (471.65 it/sec) -training >> step=6575300, episode=1097 reward=0.7729182 (43.72 it/sec) -training >> step=6575400, episode=1097 reward=0.7823501 (428.96 it/sec) -training >> step=6575500, episode=1097 reward=0.7986391 (460.10 it/sec) -training >> step=6575600, episode=1097 reward=0.8141101 (453.45 it/sec) -training >> step=6575700, episode=1097 reward=0.7903454 (406.20 it/sec) -training >> step=6575800, episode=1097 reward=0.7851725 (483.13 it/sec) -training >> step=6575900, episode=1097 reward=0.8157571 (430.80 it/sec) -training >> step=6576000, episode=1097 reward=0.7888929 (447.67 it/sec) -training >> step=6576100, episode=1097 reward=0.7639148 (496.09 it/sec) -training >> step=6576200, episode=1097 reward=0.7763288 (437.00 it/sec) -training >> step=6576300, episode=1097 reward=0.788096 (463.89 it/sec) -training >> step=6576400, episode=1097 reward=0.7848681 (434.50 it/sec) -training >> step=6576500, episode=1097 reward=0.7882604 (473.16 it/sec) -training >> step=6576600, episode=1097 reward=0.7989501 (494.52 it/sec) -training >> step=6576700, episode=1097 reward=0.7776833 (532.75 it/sec) -training >> step=6576800, episode=1097 reward=0.7836376 (482.65 it/sec) -training >> step=6576900, episode=1097 reward=0.799334 (438.48 it/sec) -training >> step=6577000, episode=1097 reward=0.7977244 (504.35 it/sec) -training >> step=6577100, episode=1097 reward=0.7711882 (469.70 it/sec) -training >> step=6577200, episode=1097 reward=0.7801431 (483.56 it/sec) -training >> step=6577300, episode=1097 reward=0.7672312 (507.38 it/sec) -training >> step=6577400, episode=1097 reward=0.8061838 (478.75 it/sec) -training >> step=6577500, episode=1097 reward=0.8063685 (484.24 it/sec) -training >> step=6577600, episode=1097 reward=0.7930983 (458.97 it/sec) -training >> step=6577700, episode=1097 reward=0.8025917 (491.03 it/sec) -training >> step=6577800, episode=1097 reward=0.804148 (523.11 it/sec) -training >> step=6577900, episode=1097 reward=0.7718753 (447.12 it/sec) -training >> step=6578000, episode=1097 reward=0.7775741 (479.83 it/sec) -training >> step=6578100, episode=1097 reward=0.7951701 (466.39 it/sec) -training >> step=6578200, episode=1097 reward=0.8004652 (510.42 it/sec) -training >> step=6578300, episode=1097 reward=0.7571676 (453.43 it/sec) -training >> step=6578400, episode=1097 reward=0.7917948 (448.08 it/sec) -training >> step=6578500, episode=1097 reward=0.7961574 (491.68 it/sec) -training >> step=6578600, episode=1097 reward=0.7716809 (451.25 it/sec) -training >> step=6578700, episode=1097 reward=0.7930596 (466.83 it/sec) -training >> step=6578800, episode=1097 reward=0.7813575 (474.96 it/sec) -training >> step=6578900, episode=1097 reward=0.7734674 (480.20 it/sec) -training >> step=6579000, episode=1097 reward=0.7952884 (499.99 it/sec) -training >> step=6579100, episode=1097 reward=0.7977725 (473.89 it/sec) -training >> step=6579200, episode=1097 reward=0.7695037 (491.21 it/sec) -training >> step=6579300, episode=1097 reward=0.773552 (444.63 it/sec) -training >> step=6579400, episode=1097 reward=0.7803854 (487.33 it/sec) -training >> step=6579500, episode=1097 reward=0.7794526 (394.77 it/sec) -training >> step=6579600, episode=1097 reward=0.7723196 (503.33 it/sec) -training >> step=6579700, episode=1097 reward=0.7817482 (428.55 it/sec) -training >> step=6579800, episode=1097 reward=0.7868862 (423.95 it/sec) -training >> step=6579900, episode=1097 reward=0.7874622 (428.90 it/sec) -training >> step=6580000, episode=1097 reward=0.7840269 (471.15 it/sec) -training >> step=6580100, episode=1097 reward=0.7754115 (514.85 it/sec) -training >> step=6580200, episode=1097 reward=0.7836823 (464.56 it/sec) -training >> step=6580300, episode=1097 reward=0.7876812 (468.16 it/sec) -training >> step=6580400, episode=1097 reward=0.7511142 (485.61 it/sec) -training >> step=6580500, episode=1097 reward=0.7637467 (469.68 it/sec) -training >> step=6580600, episode=1097 reward=0.7716722 (463.45 it/sec) -training >> step=6580700, episode=1097 reward=0.7819934 (470.61 it/sec) -training >> step=6580800, episode=1097 reward=0.7950211 (464.85 it/sec) -training >> step=6580900, episode=1097 reward=0.7854772 (484.15 it/sec) -training >> step=6581000, episode=1097 reward=0.7835597 (522.00 it/sec) -training >> step=6581100, episode=1097 reward=0.7608606 (501.75 it/sec) -training >> step=6581200, episode=1097 reward=0.7945424 (490.01 it/sec) -training >> step=6581300, episode=1098 reward=0.7847506 (52.79 it/sec) -training >> step=6581400, episode=1098 reward=0.8024921 (509.44 it/sec) -training >> step=6581500, episode=1098 reward=0.7712015 (457.70 it/sec) -training >> step=6581600, episode=1098 reward=0.7762145 (509.77 it/sec) -training >> step=6581700, episode=1098 reward=0.7764822 (492.79 it/sec) -training >> step=6581800, episode=1098 reward=0.7747071 (501.88 it/sec) -training >> step=6581900, episode=1098 reward=0.8039252 (495.59 it/sec) -training >> step=6582000, episode=1098 reward=0.7856977 (495.72 it/sec) -training >> step=6582100, episode=1098 reward=0.7980117 (489.65 it/sec) -training >> step=6582200, episode=1098 reward=0.7684582 (460.31 it/sec) -training >> step=6582300, episode=1098 reward=0.7964883 (539.75 it/sec) -training >> step=6582400, episode=1098 reward=0.7969576 (513.11 it/sec) -training >> step=6582500, episode=1098 reward=0.7771031 (504.75 it/sec) -training >> step=6582600, episode=1098 reward=0.7979692 (502.01 it/sec) -training >> step=6582700, episode=1098 reward=0.7892436 (509.89 it/sec) -training >> step=6582800, episode=1098 reward=0.7902762 (481.81 it/sec) -training >> step=6582900, episode=1098 reward=0.7718955 (534.79 it/sec) -training >> step=6583000, episode=1098 reward=0.7720328 (504.82 it/sec) -training >> step=6583100, episode=1098 reward=0.7825852 (501.05 it/sec) -training >> step=6583200, episode=1098 reward=0.7947226 (519.22 it/sec) -training >> step=6583300, episode=1098 reward=0.7794918 (498.28 it/sec) -training >> step=6583400, episode=1098 reward=0.7796191 (521.97 it/sec) -training >> step=6583500, episode=1098 reward=0.7842051 (498.21 it/sec) -training >> step=6583600, episode=1098 reward=0.7867898 (504.80 it/sec) -training >> step=6583700, episode=1098 reward=0.7808022 (534.47 it/sec) -training >> step=6583800, episode=1098 reward=0.803582 (493.26 it/sec) -training >> step=6583900, episode=1098 reward=0.7840224 (505.60 it/sec) -training >> step=6584000, episode=1098 reward=0.7840276 (508.61 it/sec) -training >> step=6584100, episode=1098 reward=0.7808928 (490.91 it/sec) -training >> step=6584200, episode=1098 reward=0.7875615 (497.25 it/sec) -training >> step=6584300, episode=1098 reward=0.780471 (511.58 it/sec) -training >> step=6584400, episode=1098 reward=0.7797795 (466.20 it/sec) -training >> step=6584500, episode=1098 reward=0.7849013 (487.79 it/sec) -training >> step=6584600, episode=1098 reward=0.7931753 (463.13 it/sec) -training >> step=6584700, episode=1098 reward=0.8059313 (506.95 it/sec) -training >> step=6584800, episode=1098 reward=0.7776507 (476.57 it/sec) -training >> step=6584900, episode=1098 reward=0.7789761 (472.63 it/sec) -training >> step=6585000, episode=1098 reward=0.7869973 (479.17 it/sec) -training >> step=6585100, episode=1098 reward=0.7790796 (408.93 it/sec) -training >> step=6585200, episode=1098 reward=0.7949739 (410.75 it/sec) -training >> step=6585300, episode=1098 reward=0.7715627 (478.80 it/sec) -training >> step=6585400, episode=1098 reward=0.7732434 (488.86 it/sec) -training >> step=6585500, episode=1098 reward=0.776256 (521.05 it/sec) -training >> step=6585600, episode=1098 reward=0.7751755 (534.13 it/sec) -training >> step=6585700, episode=1098 reward=0.7953048 (524.03 it/sec) -training >> step=6585800, episode=1098 reward=0.7674085 (544.94 it/sec) -training >> step=6585900, episode=1098 reward=0.7925148 (510.26 it/sec) -training >> step=6586000, episode=1098 reward=0.7638528 (531.48 it/sec) -training >> step=6586100, episode=1098 reward=0.7926921 (493.16 it/sec) -training >> step=6586200, episode=1098 reward=0.7558805 (510.06 it/sec) -training >> step=6586300, episode=1098 reward=0.7574975 (534.24 it/sec) -training >> step=6586400, episode=1098 reward=0.7788411 (519.26 it/sec) -training >> step=6586500, episode=1098 reward=0.781648 (506.59 it/sec) -training >> step=6586600, episode=1098 reward=0.7858129 (495.06 it/sec) -training >> step=6586700, episode=1098 reward=0.7866009 (492.66 it/sec) -training >> step=6586800, episode=1098 reward=0.7770046 (523.70 it/sec) -training >> step=6586900, episode=1098 reward=0.8016181 (507.89 it/sec) -training >> step=6587000, episode=1098 reward=0.7737826 (520.46 it/sec) -training >> step=6587100, episode=1098 reward=0.7950152 (494.58 it/sec) -training >> step=6587200, episode=1098 reward=0.7644189 (496.16 it/sec) -training >> step=6587300, episode=1099 reward=0.785641 (56.24 it/sec) -training >> step=6587400, episode=1099 reward=0.7803017 (456.63 it/sec) -training >> step=6587500, episode=1099 reward=0.7588267 (466.92 it/sec) -training >> step=6587600, episode=1099 reward=0.7809379 (440.49 it/sec) -training >> step=6587700, episode=1099 reward=0.7743589 (478.55 it/sec) -training >> step=6587800, episode=1099 reward=0.8004974 (449.82 it/sec) -training >> step=6587900, episode=1099 reward=0.7840567 (453.43 it/sec) -training >> step=6588000, episode=1099 reward=0.7882602 (452.91 it/sec) -training >> step=6588100, episode=1099 reward=0.7764118 (516.71 it/sec) -training >> step=6588200, episode=1099 reward=0.7801601 (467.11 it/sec) -training >> step=6588300, episode=1099 reward=0.7873582 (465.85 it/sec) -training >> step=6588400, episode=1099 reward=0.776298 (481.19 it/sec) -training >> step=6588500, episode=1099 reward=0.7838005 (501.19 it/sec) -training >> step=6588600, episode=1099 reward=0.7858813 (476.13 it/sec) -training >> step=6588700, episode=1099 reward=0.7929612 (511.26 it/sec) -training >> step=6588800, episode=1099 reward=0.7927672 (460.57 it/sec) -training >> step=6588900, episode=1099 reward=0.8106496 (469.28 it/sec) -training >> step=6589000, episode=1099 reward=0.7884622 (460.28 it/sec) -training >> step=6589100, episode=1099 reward=0.7949486 (474.08 it/sec) -training >> step=6589200, episode=1099 reward=0.7899094 (491.21 it/sec) -training >> step=6589300, episode=1099 reward=0.796227 (487.49 it/sec) -training >> step=6589400, episode=1099 reward=0.7811108 (412.93 it/sec) -training >> step=6589500, episode=1099 reward=0.8007193 (479.18 it/sec) -training >> step=6589600, episode=1099 reward=0.7900919 (469.09 it/sec) -training >> step=6589700, episode=1099 reward=0.7805937 (457.83 it/sec) -training >> step=6589800, episode=1099 reward=0.7956701 (443.62 it/sec) -training >> step=6589900, episode=1099 reward=0.8042706 (494.18 it/sec) -training >> step=6590000, episode=1099 reward=0.7759967 (482.34 it/sec) -training >> step=6590100, episode=1099 reward=0.7803457 (508.67 it/sec) -training >> step=6590200, episode=1099 reward=0.8186446 (505.03 it/sec) -training >> step=6590300, episode=1099 reward=0.7952511 (499.33 it/sec) -training >> step=6590400, episode=1099 reward=0.7878804 (497.14 it/sec) -training >> step=6590500, episode=1099 reward=0.7742888 (410.46 it/sec) -training >> step=6590600, episode=1099 reward=0.7970676 (380.47 it/sec) -training >> step=6590700, episode=1099 reward=0.7782067 (391.58 it/sec) -training >> step=6590800, episode=1099 reward=0.7707315 (376.40 it/sec) -training >> step=6590900, episode=1099 reward=0.7813386 (333.86 it/sec) -training >> step=6591000, episode=1099 reward=0.7969064 (482.62 it/sec) -training >> step=6591100, episode=1099 reward=0.7804022 (459.76 it/sec) -training >> step=6591200, episode=1099 reward=0.7898555 (493.77 it/sec) -training >> step=6591300, episode=1099 reward=0.8006573 (520.16 it/sec) -training >> step=6591400, episode=1099 reward=0.7822496 (481.05 it/sec) -training >> step=6591500, episode=1099 reward=0.7858514 (514.12 it/sec) -training >> step=6591600, episode=1099 reward=0.7981756 (503.13 it/sec) -training >> step=6591700, episode=1099 reward=0.7857456 (494.25 it/sec) -training >> step=6591800, episode=1099 reward=0.7813935 (492.27 it/sec) -training >> step=6591900, episode=1099 reward=0.7732812 (500.50 it/sec) -training >> step=6592000, episode=1099 reward=0.757638 (502.19 it/sec) -training >> step=6592100, episode=1099 reward=0.7731531 (522.22 it/sec) -training >> step=6592200, episode=1099 reward=0.8011208 (503.36 it/sec) -training >> step=6592300, episode=1099 reward=0.7732237 (533.60 it/sec) -training >> step=6592400, episode=1099 reward=0.7872318 (513.39 it/sec) -training >> step=6592500, episode=1099 reward=0.7921249 (493.34 it/sec) -training >> step=6592600, episode=1099 reward=0.7673655 (500.05 it/sec) -training >> step=6592700, episode=1099 reward=0.7509078 (474.91 it/sec) -training >> step=6592800, episode=1099 reward=0.7675156 (533.20 it/sec) -training >> step=6592900, episode=1099 reward=0.7857755 (472.69 it/sec) -training >> step=6593000, episode=1099 reward=0.7732902 (503.23 it/sec) -training >> step=6593100, episode=1099 reward=0.782653 (521.56 it/sec) -training >> step=6593200, episode=1099 reward=0.7481103 (506.29 it/sec) -training >> step=6593300, episode=1100 reward=0.7633543 (98.31 it/sec) -training >> step=6593400, episode=1100 reward=0.7750655 (467.46 it/sec) -training >> step=6593500, episode=1100 reward=0.7805339 (513.14 it/sec) -training >> step=6593600, episode=1100 reward=0.7853045 (463.43 it/sec) -training >> step=6593700, episode=1100 reward=0.7798885 (484.21 it/sec) -training >> step=6593800, episode=1100 reward=0.771602 (462.27 it/sec) -training >> step=6593900, episode=1100 reward=0.7905182 (549.32 it/sec) -training >> step=6594000, episode=1100 reward=0.7850562 (515.07 it/sec) -training >> step=6594100, episode=1100 reward=0.790291 (507.07 it/sec) -training >> step=6594200, episode=1100 reward=0.7963504 (512.81 it/sec) -training >> step=6594300, episode=1100 reward=0.8003517 (461.87 it/sec) -training >> step=6594400, episode=1100 reward=0.7833299 (514.36 it/sec) -training >> step=6594500, episode=1100 reward=0.8062756 (500.09 it/sec) -training >> step=6594600, episode=1100 reward=0.7942596 (521.01 it/sec) -training >> step=6594700, episode=1100 reward=0.7771444 (473.81 it/sec) -training >> step=6594800, episode=1100 reward=0.7715496 (496.68 it/sec) -training >> step=6594900, episode=1100 reward=0.7859305 (507.71 it/sec) -training >> step=6595000, episode=1100 reward=0.7979236 (522.81 it/sec) -training >> step=6595100, episode=1100 reward=0.7906938 (505.29 it/sec) -training >> step=6595200, episode=1100 reward=0.7802358 (505.83 it/sec) -training >> step=6595300, episode=1100 reward=0.7844384 (491.81 it/sec) -training >> step=6595400, episode=1100 reward=0.779915 (484.12 it/sec) -training >> step=6595500, episode=1100 reward=0.7751839 (472.67 it/sec) -training >> step=6595600, episode=1100 reward=0.789416 (478.78 it/sec) -training >> step=6595700, episode=1100 reward=0.7876706 (532.61 it/sec) -training >> step=6595800, episode=1100 reward=0.7867518 (508.75 it/sec) -training >> step=6595900, episode=1100 reward=0.7936608 (451.31 it/sec) -training >> step=6596000, episode=1100 reward=0.790825 (526.98 it/sec) -training >> step=6596100, episode=1100 reward=0.783864 (499.84 it/sec) -training >> step=6596200, episode=1100 reward=0.7990065 (510.13 it/sec) -training >> step=6596300, episode=1100 reward=0.7776148 (498.32 it/sec) -training >> step=6596400, episode=1100 reward=0.7952552 (462.55 it/sec) -training >> step=6596500, episode=1100 reward=0.7891563 (476.06 it/sec) -training >> step=6596600, episode=1100 reward=0.7823581 (499.85 it/sec) -training >> step=6596700, episode=1100 reward=0.7781772 (530.36 it/sec) -training >> step=6596800, episode=1100 reward=0.7884959 (525.31 it/sec) -training >> step=6596900, episode=1100 reward=0.7799954 (482.44 it/sec) -training >> step=6597000, episode=1100 reward=0.8030328 (493.59 it/sec) -training >> step=6597100, episode=1100 reward=0.7970456 (535.17 it/sec) -training >> step=6597200, episode=1100 reward=0.7888388 (523.87 it/sec) -training >> step=6597300, episode=1100 reward=0.7726605 (517.33 it/sec) -training >> step=6597400, episode=1100 reward=0.780018 (507.97 it/sec) -training >> step=6597500, episode=1100 reward=0.7805497 (518.96 it/sec) -training >> step=6597600, episode=1100 reward=0.8020428 (489.35 it/sec) -training >> step=6597700, episode=1100 reward=0.7813981 (444.88 it/sec) -training >> step=6597800, episode=1100 reward=0.7635017 (527.00 it/sec) -training >> step=6597900, episode=1100 reward=0.7778556 (484.66 it/sec) -training >> step=6598000, episode=1100 reward=0.7981272 (480.22 it/sec) -training >> step=6598100, episode=1100 reward=0.7864744 (461.88 it/sec) -training >> step=6598200, episode=1100 reward=0.775682 (503.29 it/sec) -training >> step=6598300, episode=1100 reward=0.7678317 (472.18 it/sec) -training >> step=6598400, episode=1100 reward=0.8005103 (469.22 it/sec) -training >> step=6598500, episode=1100 reward=0.7786775 (492.97 it/sec) -training >> step=6598600, episode=1100 reward=0.7840365 (511.33 it/sec) -training >> step=6598700, episode=1100 reward=0.7753555 (468.83 it/sec) -training >> step=6598800, episode=1100 reward=0.7868071 (454.02 it/sec) -training >> step=6598900, episode=1100 reward=0.7674915 (484.17 it/sec) -training >> step=6599000, episode=1100 reward=0.7659261 (511.41 it/sec) -training >> step=6599100, episode=1100 reward=0.77605 (520.66 it/sec) -training >> step=6599200, episode=1100 reward=0.7831681 (496.82 it/sec) -training >> step=6599300, episode=1101 reward=0.7778768 (93.73 it/sec) -training >> step=6599400, episode=1101 reward=0.7522325 (501.50 it/sec) -training >> step=6599500, episode=1101 reward=0.8022266 (459.44 it/sec) -training >> step=6599600, episode=1101 reward=0.7768998 (500.82 it/sec) -training >> step=6599700, episode=1101 reward=0.7970163 (499.68 it/sec) -training >> step=6599800, episode=1101 reward=0.7837124 (491.80 it/sec) -training >> step=6599900, episode=1101 reward=0.7893003 (481.92 it/sec) -training >> step=6600000, episode=1101 reward=0.7687148 (474.65 it/sec) -training >> step=6600100, episode=1101 reward=0.7834196 (511.98 it/sec) -training >> step=6600200, episode=1101 reward=0.7774021 (410.78 it/sec) -training >> step=6600300, episode=1101 reward=0.781828 (468.87 it/sec) -training >> step=6600400, episode=1101 reward=0.7753348 (474.10 it/sec) -training >> step=6600500, episode=1101 reward=0.7751545 (427.39 it/sec) -training >> step=6600600, episode=1101 reward=0.7902924 (505.57 it/sec) -training >> step=6600700, episode=1101 reward=0.7764551 (469.31 it/sec) -training >> step=6600800, episode=1101 reward=0.8072549 (484.99 it/sec) -training >> step=6600900, episode=1101 reward=0.7865931 (500.07 it/sec) -training >> step=6601000, episode=1101 reward=0.7948933 (469.54 it/sec) -training >> step=6601100, episode=1101 reward=0.7859966 (482.20 it/sec) -training >> step=6601200, episode=1101 reward=0.7889763 (482.16 it/sec) -training >> step=6601300, episode=1101 reward=0.7695616 (497.67 it/sec) -training >> step=6601400, episode=1101 reward=0.7626157 (471.07 it/sec) -training >> step=6601500, episode=1101 reward=0.7929116 (475.83 it/sec) -training >> step=6601600, episode=1101 reward=0.8038484 (480.28 it/sec) -training >> step=6601700, episode=1101 reward=0.785427 (480.82 it/sec) -training >> step=6601800, episode=1101 reward=0.7735571 (496.45 it/sec) -training >> step=6601900, episode=1101 reward=0.7888138 (488.07 it/sec) -training >> step=6602000, episode=1101 reward=0.7855896 (459.11 it/sec) -training >> step=6602100, episode=1101 reward=0.7950487 (442.56 it/sec) -training >> step=6602200, episode=1101 reward=0.7968812 (507.78 it/sec) -training >> step=6602300, episode=1101 reward=0.7929697 (465.47 it/sec) -training >> step=6602400, episode=1101 reward=0.7986706 (436.13 it/sec) -training >> step=6602500, episode=1101 reward=0.781531 (443.00 it/sec) -training >> step=6602600, episode=1101 reward=0.7885082 (497.79 it/sec) -training >> step=6602700, episode=1101 reward=0.771254 (445.12 it/sec) -training >> step=6602800, episode=1101 reward=0.7901558 (461.92 it/sec) -training >> step=6602900, episode=1101 reward=0.7789108 (484.23 it/sec) -training >> step=6603000, episode=1101 reward=0.7810898 (467.54 it/sec) -training >> step=6603100, episode=1101 reward=0.7898327 (444.30 it/sec) -training >> step=6603200, episode=1101 reward=0.7893665 (405.48 it/sec) -training >> step=6603300, episode=1101 reward=0.7846661 (413.20 it/sec) -training >> step=6603400, episode=1101 reward=0.7603201 (445.70 it/sec) -training >> step=6603500, episode=1101 reward=0.7749013 (409.02 it/sec) -training >> step=6603600, episode=1101 reward=0.800218 (415.69 it/sec) -training >> step=6603700, episode=1101 reward=0.7791188 (439.74 it/sec) -training >> step=6603800, episode=1101 reward=0.7823554 (371.30 it/sec) -training >> step=6603900, episode=1101 reward=0.7801473 (461.83 it/sec) -training >> step=6604000, episode=1101 reward=0.7752085 (428.63 it/sec) -training >> step=6604100, episode=1101 reward=0.7620325 (418.62 it/sec) -training >> step=6604200, episode=1101 reward=0.794328 (447.60 it/sec) -training >> step=6604300, episode=1101 reward=0.8047316 (457.99 it/sec) -training >> step=6604400, episode=1101 reward=0.7665467 (492.95 it/sec) -training >> step=6604500, episode=1101 reward=0.7813997 (458.93 it/sec) -training >> step=6604600, episode=1101 reward=0.7921714 (411.76 it/sec) -training >> step=6604700, episode=1101 reward=0.7960558 (480.84 it/sec) -training >> step=6604800, episode=1101 reward=0.7866753 (432.01 it/sec) -training >> step=6604900, episode=1101 reward=0.7772856 (468.57 it/sec) -training >> step=6605000, episode=1101 reward=0.7724872 (448.85 it/sec) -training >> step=6605100, episode=1101 reward=0.7849838 (515.88 it/sec) -training >> step=6605200, episode=1101 reward=0.7991536 (466.45 it/sec) -training >> step=6605300, episode=1102 reward=0.784521 (98.26 it/sec) -training >> step=6605400, episode=1102 reward=0.7889735 (452.63 it/sec) -training >> step=6605500, episode=1102 reward=0.7882891 (411.25 it/sec) -training >> step=6605600, episode=1102 reward=0.7842455 (431.42 it/sec) -training >> step=6605700, episode=1102 reward=0.7779464 (384.07 it/sec) -training >> step=6605800, episode=1102 reward=0.7925679 (405.46 it/sec) -training >> step=6605900, episode=1102 reward=0.7705826 (484.03 it/sec) -training >> step=6606000, episode=1102 reward=0.7798546 (453.61 it/sec) -training >> step=6606100, episode=1102 reward=0.7838686 (483.65 it/sec) -training >> step=6606200, episode=1102 reward=0.7868495 (394.16 it/sec) -training >> step=6606300, episode=1102 reward=0.7853174 (432.49 it/sec) -training >> step=6606400, episode=1102 reward=0.7860467 (437.10 it/sec) -training >> step=6606500, episode=1102 reward=0.7985421 (455.24 it/sec) -training >> step=6606600, episode=1102 reward=0.7755166 (467.64 it/sec) -training >> step=6606700, episode=1102 reward=0.778497 (440.15 it/sec) -training >> step=6606800, episode=1102 reward=0.8088836 (473.06 it/sec) -training >> step=6606900, episode=1102 reward=0.8022543 (478.58 it/sec) -training >> step=6607000, episode=1102 reward=0.7838026 (433.38 it/sec) -training >> step=6607100, episode=1102 reward=0.7951663 (449.86 it/sec) -training >> step=6607200, episode=1102 reward=0.7839865 (466.66 it/sec) -training >> step=6607300, episode=1102 reward=0.7929035 (472.15 it/sec) -training >> step=6607400, episode=1102 reward=0.7465443 (469.59 it/sec) -training >> step=6607500, episode=1102 reward=0.7932436 (497.20 it/sec) -training >> step=6607600, episode=1102 reward=0.7891815 (453.00 it/sec) -training >> step=6607700, episode=1102 reward=0.7691485 (463.56 it/sec) -training >> step=6607800, episode=1102 reward=0.7711494 (439.77 it/sec) -training >> step=6607900, episode=1102 reward=0.7799606 (405.64 it/sec) -training >> step=6608000, episode=1102 reward=0.7828474 (444.11 it/sec) -training >> step=6608100, episode=1102 reward=0.8099037 (419.52 it/sec) -training >> step=6608200, episode=1102 reward=0.8074521 (469.33 it/sec) -training >> step=6608300, episode=1102 reward=0.783348 (479.76 it/sec) -training >> step=6608400, episode=1102 reward=0.7945818 (433.14 it/sec) -training >> step=6608500, episode=1102 reward=0.7800549 (481.61 it/sec) -training >> step=6608600, episode=1102 reward=0.7588446 (457.78 it/sec) -training >> step=6608700, episode=1102 reward=0.7960754 (463.73 it/sec) -training >> step=6608800, episode=1102 reward=0.798627 (396.82 it/sec) -training >> step=6608900, episode=1102 reward=0.7831836 (461.85 it/sec) -training >> step=6609000, episode=1102 reward=0.7917094 (451.23 it/sec) -training >> step=6609100, episode=1102 reward=0.7878385 (407.40 it/sec) -training >> step=6609200, episode=1102 reward=0.7560163 (444.04 it/sec) -training >> step=6609300, episode=1102 reward=0.7949305 (428.96 it/sec) -training >> step=6609400, episode=1102 reward=0.7922409 (448.55 it/sec) -training >> step=6609500, episode=1102 reward=0.7878102 (470.42 it/sec) -training >> step=6609600, episode=1102 reward=0.7899625 (456.87 it/sec) -training >> step=6609700, episode=1102 reward=0.7956313 (472.40 it/sec) -training >> step=6609800, episode=1102 reward=0.7760536 (482.29 it/sec) -training >> step=6609900, episode=1102 reward=0.7777362 (462.47 it/sec) -training >> step=6610000, episode=1102 reward=0.7704629 (416.92 it/sec) -training >> step=6610100, episode=1102 reward=0.7831177 (476.16 it/sec) -training >> step=6610200, episode=1102 reward=0.7777945 (466.95 it/sec) -training >> step=6610300, episode=1102 reward=0.7779989 (475.02 it/sec) -training >> step=6610400, episode=1102 reward=0.7915933 (461.35 it/sec) -training >> step=6610500, episode=1102 reward=0.7828916 (383.45 it/sec) -training >> step=6610600, episode=1102 reward=0.7983316 (389.02 it/sec) -training >> step=6610700, episode=1102 reward=0.7741736 (386.38 it/sec) -training >> step=6610800, episode=1102 reward=0.7595672 (456.69 it/sec) -training >> step=6610900, episode=1102 reward=0.7717503 (498.83 it/sec) -training >> step=6611000, episode=1102 reward=0.779372 (524.62 it/sec) -training >> step=6611100, episode=1102 reward=0.7899665 (506.16 it/sec) -training >> step=6611200, episode=1102 reward=0.7827107 (507.87 it/sec) -training >> step=6611300, episode=1103 reward=0.7844511 (111.18 it/sec) -training >> step=6611400, episode=1103 reward=0.769373 (503.35 it/sec) -training >> step=6611500, episode=1103 reward=0.7840455 (452.19 it/sec) -training >> step=6611600, episode=1103 reward=0.7846809 (473.02 it/sec) -training >> step=6611700, episode=1103 reward=0.7933708 (500.82 it/sec) -training >> step=6611800, episode=1103 reward=0.7810667 (500.57 it/sec) -training >> step=6611900, episode=1103 reward=0.8126956 (467.29 it/sec) -training >> step=6612000, episode=1103 reward=0.777411 (515.72 it/sec) -training >> step=6612100, episode=1103 reward=0.8089229 (505.50 it/sec) -training >> step=6612200, episode=1103 reward=0.7770162 (542.40 it/sec) -training >> step=6612300, episode=1103 reward=0.7940828 (518.92 it/sec) -training >> step=6612400, episode=1103 reward=0.7703793 (522.48 it/sec) -training >> step=6612500, episode=1103 reward=0.7807015 (494.73 it/sec) -training >> step=6612600, episode=1103 reward=0.7889559 (510.74 it/sec) -training >> step=6612700, episode=1103 reward=0.7822313 (515.98 it/sec) -training >> step=6612800, episode=1103 reward=0.7923428 (521.96 it/sec) -training >> step=6612900, episode=1103 reward=0.7852708 (544.71 it/sec) -training >> step=6613000, episode=1103 reward=0.7900683 (488.29 it/sec) -training >> step=6613100, episode=1103 reward=0.8024443 (475.23 it/sec) -training >> step=6613200, episode=1103 reward=0.7897372 (524.10 it/sec) -training >> step=6613300, episode=1103 reward=0.7758916 (478.55 it/sec) -training >> step=6613400, episode=1103 reward=0.7935916 (461.55 it/sec) -training >> step=6613500, episode=1103 reward=0.7888672 (503.23 it/sec) -training >> step=6613600, episode=1103 reward=0.767505 (527.69 it/sec) -training >> step=6613700, episode=1103 reward=0.785038 (452.07 it/sec) -training >> step=6613800, episode=1103 reward=0.7959442 (521.17 it/sec) -training >> step=6613900, episode=1103 reward=0.7883645 (515.14 it/sec) -training >> step=6614000, episode=1103 reward=0.8123481 (563.35 it/sec) -training >> step=6614100, episode=1103 reward=0.7946475 (493.49 it/sec) -training >> step=6614200, episode=1103 reward=0.7816524 (472.38 it/sec) -training >> step=6614300, episode=1103 reward=0.7827516 (529.51 it/sec) -training >> step=6614400, episode=1103 reward=0.7932888 (519.70 it/sec) -training >> step=6614500, episode=1103 reward=0.767905 (486.56 it/sec) -training >> step=6614600, episode=1103 reward=0.7873842 (471.33 it/sec) -training >> step=6614700, episode=1103 reward=0.7927325 (512.13 it/sec) -training >> step=6614800, episode=1103 reward=0.7776448 (508.56 it/sec) -training >> step=6614900, episode=1103 reward=0.7806051 (508.40 it/sec) -training >> step=6615000, episode=1103 reward=0.7771647 (519.88 it/sec) -training >> step=6615100, episode=1103 reward=0.790853 (509.44 it/sec) -training >> step=6615200, episode=1103 reward=0.7954426 (526.55 it/sec) -training >> step=6615300, episode=1103 reward=0.789988 (510.46 it/sec) -training >> step=6615400, episode=1103 reward=0.7897281 (539.70 it/sec) -training >> step=6615500, episode=1103 reward=0.8018727 (494.54 it/sec) -training >> step=6615600, episode=1103 reward=0.7922704 (506.06 it/sec) -training >> step=6615700, episode=1103 reward=0.7848141 (489.19 it/sec) -training >> step=6615800, episode=1103 reward=0.7872327 (522.52 it/sec) -training >> step=6615900, episode=1103 reward=0.7967649 (519.03 it/sec) -training >> step=6616000, episode=1103 reward=0.7667099 (543.32 it/sec) -training >> step=6616100, episode=1103 reward=0.7782892 (494.69 it/sec) -training >> step=6616200, episode=1103 reward=0.7894596 (503.74 it/sec) -training >> step=6616300, episode=1103 reward=0.7894285 (518.31 it/sec) -training >> step=6616400, episode=1103 reward=0.7963756 (520.04 it/sec) -training >> step=6616500, episode=1103 reward=0.7855611 (498.71 it/sec) -training >> step=6616600, episode=1103 reward=0.8076273 (479.11 it/sec) -training >> step=6616700, episode=1103 reward=0.7869121 (487.75 it/sec) -training >> step=6616800, episode=1103 reward=0.7796021 (513.05 it/sec) -training >> step=6616900, episode=1103 reward=0.7831023 (533.23 it/sec) -training >> step=6617000, episode=1103 reward=0.762996 (536.86 it/sec) -training >> step=6617100, episode=1103 reward=0.7722982 (533.36 it/sec) -training >> step=6617200, episode=1103 reward=0.7976849 (516.08 it/sec) -training >> step=6617300, episode=1104 reward=0.8081165 (112.18 it/sec) -training >> step=6617400, episode=1104 reward=0.789925 (434.98 it/sec) -training >> step=6617500, episode=1104 reward=0.7783495 (491.64 it/sec) -training >> step=6617600, episode=1104 reward=0.7824434 (499.67 it/sec) -training >> step=6617700, episode=1104 reward=0.7907562 (507.20 it/sec) -training >> step=6617800, episode=1104 reward=0.789417 (501.33 it/sec) -training >> step=6617900, episode=1104 reward=0.7930578 (516.06 it/sec) -training >> step=6618000, episode=1104 reward=0.78811 (513.19 it/sec) -training >> step=6618100, episode=1104 reward=0.7842235 (512.04 it/sec) -training >> step=6618200, episode=1104 reward=0.7838305 (494.58 it/sec) -training >> step=6618300, episode=1104 reward=0.7911509 (548.09 it/sec) -training >> step=6618400, episode=1104 reward=0.7666579 (454.84 it/sec) -training >> step=6618500, episode=1104 reward=0.7955077 (521.59 it/sec) -training >> step=6618600, episode=1104 reward=0.7827832 (542.91 it/sec) -training >> step=6618700, episode=1104 reward=0.7911308 (545.26 it/sec) -training >> step=6618800, episode=1104 reward=0.8037475 (530.94 it/sec) -training >> step=6618900, episode=1104 reward=0.7920564 (504.43 it/sec) -training >> step=6619000, episode=1104 reward=0.7965409 (498.99 it/sec) -training >> step=6619100, episode=1104 reward=0.7651506 (502.33 it/sec) -training >> step=6619200, episode=1104 reward=0.8030615 (534.87 it/sec) -training >> step=6619300, episode=1104 reward=0.7844419 (529.92 it/sec) -training >> step=6619400, episode=1104 reward=0.7868226 (548.38 it/sec) -training >> step=6619500, episode=1104 reward=0.7819735 (473.63 it/sec) -training >> step=6619600, episode=1104 reward=0.7909206 (497.72 it/sec) -training >> step=6619700, episode=1104 reward=0.7720029 (509.03 it/sec) -training >> step=6619800, episode=1104 reward=0.7973459 (568.83 it/sec) -training >> step=6619900, episode=1104 reward=0.8194186 (527.78 it/sec) -training >> step=6620000, episode=1104 reward=0.7858202 (484.30 it/sec) -training >> step=6620100, episode=1104 reward=0.7784297 (552.62 it/sec) -training >> step=6620200, episode=1104 reward=0.7818437 (512.83 it/sec) -training >> step=6620300, episode=1104 reward=0.7974696 (535.79 it/sec) -training >> step=6620400, episode=1104 reward=0.8006365 (461.62 it/sec) -training >> step=6620500, episode=1104 reward=0.7983773 (478.40 it/sec) -training >> step=6620600, episode=1104 reward=0.7832235 (449.76 it/sec) -training >> step=6620700, episode=1104 reward=0.8060847 (436.61 it/sec) -training >> step=6620800, episode=1104 reward=0.7803782 (490.84 it/sec) -training >> step=6620900, episode=1104 reward=0.8036336 (505.96 it/sec) -training >> step=6621000, episode=1104 reward=0.8160427 (470.79 it/sec) -training >> step=6621100, episode=1104 reward=0.7973285 (493.11 it/sec) -training >> step=6621200, episode=1104 reward=0.7894538 (444.47 it/sec) -training >> step=6621300, episode=1104 reward=0.7969557 (449.76 it/sec) -training >> step=6621400, episode=1104 reward=0.8028589 (457.61 it/sec) -training >> step=6621500, episode=1104 reward=0.7681849 (458.23 it/sec) -training >> step=6621600, episode=1104 reward=0.7955481 (464.87 it/sec) -training >> step=6621700, episode=1104 reward=0.8021102 (471.73 it/sec) -training >> step=6621800, episode=1104 reward=0.7806247 (476.37 it/sec) -training >> step=6621900, episode=1104 reward=0.7717353 (459.27 it/sec) -training >> step=6622000, episode=1104 reward=0.7750865 (471.52 it/sec) -training >> step=6622100, episode=1104 reward=0.7900115 (494.20 it/sec) -training >> step=6622200, episode=1104 reward=0.7790636 (487.34 it/sec) -training >> step=6622300, episode=1104 reward=0.7954746 (514.12 it/sec) -training >> step=6622400, episode=1104 reward=0.782478 (471.75 it/sec) -training >> step=6622500, episode=1104 reward=0.760627 (425.45 it/sec) -training >> step=6622600, episode=1104 reward=0.7865772 (497.28 it/sec) -training >> step=6622700, episode=1104 reward=0.8078756 (511.06 it/sec) -training >> step=6622800, episode=1104 reward=0.7675834 (474.45 it/sec) -training >> step=6622900, episode=1104 reward=0.7699488 (455.63 it/sec) -training >> step=6623000, episode=1104 reward=0.7860734 (490.14 it/sec) -training >> step=6623100, episode=1104 reward=0.7989127 (458.22 it/sec) -training >> step=6623200, episode=1104 reward=0.7756537 (507.47 it/sec) -training >> step=6623300, episode=1105 reward=0.7867978 (108.11 it/sec) -training >> step=6623400, episode=1105 reward=0.7805618 (360.75 it/sec) -training >> step=6623500, episode=1105 reward=0.7845225 (440.91 it/sec) -training >> step=6623600, episode=1105 reward=0.7687485 (462.20 it/sec) -training >> step=6623700, episode=1105 reward=0.7838193 (485.45 it/sec) -training >> step=6623800, episode=1105 reward=0.7541395 (458.96 it/sec) -training >> step=6623900, episode=1105 reward=0.7829915 (466.13 it/sec) -training >> step=6624000, episode=1105 reward=0.7939501 (501.40 it/sec) -training >> step=6624100, episode=1105 reward=0.7871379 (462.46 it/sec) -training >> step=6624200, episode=1105 reward=0.7836827 (462.73 it/sec) -training >> step=6624300, episode=1105 reward=0.7955419 (526.26 it/sec) -training >> step=6624400, episode=1105 reward=0.7872753 (493.54 it/sec) -training >> step=6624500, episode=1105 reward=0.7924857 (483.03 it/sec) -training >> step=6624600, episode=1105 reward=0.7907271 (482.72 it/sec) -training >> step=6624700, episode=1105 reward=0.7710536 (493.62 it/sec) -training >> step=6624800, episode=1105 reward=0.7829913 (454.83 it/sec) -training >> step=6624900, episode=1105 reward=0.7709804 (494.37 it/sec) -training >> step=6625000, episode=1105 reward=0.7849079 (409.72 it/sec) -training >> step=6625100, episode=1105 reward=0.7766486 (467.38 it/sec) -training >> step=6625200, episode=1105 reward=0.7800705 (458.74 it/sec) -training >> step=6625300, episode=1105 reward=0.7766356 (469.21 it/sec) -training >> step=6625400, episode=1105 reward=0.7838439 (466.82 it/sec) -training >> step=6625500, episode=1105 reward=0.8041699 (424.89 it/sec) -training >> step=6625600, episode=1105 reward=0.7827916 (418.91 it/sec) -training >> step=6625700, episode=1105 reward=0.7793248 (466.72 it/sec) -training >> step=6625800, episode=1105 reward=0.8026484 (490.35 it/sec) -training >> step=6625900, episode=1105 reward=0.787767 (469.68 it/sec) -training >> step=6626000, episode=1105 reward=0.8022313 (481.03 it/sec) -training >> step=6626100, episode=1105 reward=0.8068383 (482.95 it/sec) -training >> step=6626200, episode=1105 reward=0.7767261 (457.17 it/sec) -training >> step=6626300, episode=1105 reward=0.7957417 (442.33 it/sec) -training >> step=6626400, episode=1105 reward=0.7876661 (456.95 it/sec) -training >> step=6626500, episode=1105 reward=0.7922096 (506.26 it/sec) -training >> step=6626600, episode=1105 reward=0.7955803 (478.52 it/sec) -training >> step=6626700, episode=1105 reward=0.7987443 (448.08 it/sec) -training >> step=6626800, episode=1105 reward=0.7845082 (514.37 it/sec) -training >> step=6626900, episode=1105 reward=0.7722981 (480.23 it/sec) -training >> step=6627000, episode=1105 reward=0.7939121 (492.09 it/sec) -training >> step=6627100, episode=1105 reward=0.7938375 (465.38 it/sec) -training >> step=6627200, episode=1105 reward=0.7600374 (511.43 it/sec) -training >> step=6627300, episode=1105 reward=0.7691911 (455.04 it/sec) -training >> step=6627400, episode=1105 reward=0.7901019 (477.24 it/sec) -training >> step=6627500, episode=1105 reward=0.7892606 (489.02 it/sec) -training >> step=6627600, episode=1105 reward=0.7712254 (483.35 it/sec) -training >> step=6627700, episode=1105 reward=0.7864624 (476.71 it/sec) -training >> step=6627800, episode=1105 reward=0.7927806 (443.46 it/sec) -training >> step=6627900, episode=1105 reward=0.7870478 (498.64 it/sec) -training >> step=6628000, episode=1105 reward=0.7727517 (470.75 it/sec) -training >> step=6628100, episode=1105 reward=0.7953824 (486.01 it/sec) -training >> step=6628200, episode=1105 reward=0.7952959 (425.10 it/sec) -training >> step=6628300, episode=1105 reward=0.7843892 (495.02 it/sec) -training >> step=6628400, episode=1105 reward=0.7837207 (474.00 it/sec) -training >> step=6628500, episode=1105 reward=0.7849686 (466.85 it/sec) -training >> step=6628600, episode=1105 reward=0.8013461 (457.96 it/sec) -training >> step=6628700, episode=1105 reward=0.7923759 (461.03 it/sec) -training >> step=6628800, episode=1105 reward=0.7967202 (482.58 it/sec) -training >> step=6628900, episode=1105 reward=0.7751821 (463.74 it/sec) -training >> step=6629000, episode=1105 reward=0.7847908 (491.76 it/sec) -training >> step=6629100, episode=1105 reward=0.769976 (506.42 it/sec) -training >> step=6629200, episode=1105 reward=0.7764941 (435.41 it/sec) -training >> step=6629300, episode=1106 reward=0.7648891 (103.14 it/sec) -training >> step=6629400, episode=1106 reward=0.796064 (479.81 it/sec) -training >> step=6629500, episode=1106 reward=0.8004727 (375.36 it/sec) -training >> step=6629600, episode=1106 reward=0.7825555 (438.22 it/sec) -training >> step=6629700, episode=1106 reward=0.7772716 (499.85 it/sec) -training >> step=6629800, episode=1106 reward=0.7899713 (459.86 it/sec) -training >> step=6629900, episode=1106 reward=0.7719691 (467.34 it/sec) -training >> step=6630000, episode=1106 reward=0.7765565 (472.09 it/sec) -training >> step=6630100, episode=1106 reward=0.8132613 (474.28 it/sec) -training >> step=6630200, episode=1106 reward=0.7952123 (487.26 it/sec) -training >> step=6630300, episode=1106 reward=0.7841177 (489.08 it/sec) -training >> step=6630400, episode=1106 reward=0.8151389 (460.94 it/sec) -training >> step=6630500, episode=1106 reward=0.7927132 (446.49 it/sec) -training >> step=6630600, episode=1106 reward=0.7897227 (452.99 it/sec) -training >> step=6630700, episode=1106 reward=0.7897561 (465.40 it/sec) -training >> step=6630800, episode=1106 reward=0.788303 (482.87 it/sec) -training >> step=6630900, episode=1106 reward=0.7754125 (477.58 it/sec) -training >> step=6631000, episode=1106 reward=0.7778499 (460.92 it/sec) -training >> step=6631100, episode=1106 reward=0.782182 (476.09 it/sec) -training >> step=6631200, episode=1106 reward=0.7980278 (482.56 it/sec) -training >> step=6631300, episode=1106 reward=0.7863884 (453.80 it/sec) -training >> step=6631400, episode=1106 reward=0.7804453 (451.83 it/sec) -training >> step=6631500, episode=1106 reward=0.7850543 (441.45 it/sec) -training >> step=6631600, episode=1106 reward=0.7951907 (489.00 it/sec) -training >> step=6631700, episode=1106 reward=0.7749233 (463.44 it/sec) -training >> step=6631800, episode=1106 reward=0.7895122 (475.30 it/sec) -training >> step=6631900, episode=1106 reward=0.780091 (465.23 it/sec) -training >> step=6632000, episode=1106 reward=0.7886341 (446.02 it/sec) -training >> step=6632100, episode=1106 reward=0.7935699 (501.50 it/sec) -training >> step=6632200, episode=1106 reward=0.802194 (459.83 it/sec) -training >> step=6632300, episode=1106 reward=0.7885982 (408.99 it/sec) -training >> step=6632400, episode=1106 reward=0.8000186 (433.34 it/sec) -training >> step=6632500, episode=1106 reward=0.7939742 (458.72 it/sec) -training >> step=6632600, episode=1106 reward=0.7861135 (481.28 it/sec) -training >> step=6632700, episode=1106 reward=0.7863556 (476.87 it/sec) -training >> step=6632800, episode=1106 reward=0.7915492 (494.74 it/sec) -training >> step=6632900, episode=1106 reward=0.7872127 (464.65 it/sec) -training >> step=6633000, episode=1106 reward=0.8101554 (493.51 it/sec) -training >> step=6633100, episode=1106 reward=0.7982166 (475.95 it/sec) -training >> step=6633200, episode=1106 reward=0.7873193 (502.11 it/sec) -training >> step=6633300, episode=1106 reward=0.7879652 (472.32 it/sec) -training >> step=6633400, episode=1106 reward=0.7725031 (445.97 it/sec) -training >> step=6633500, episode=1106 reward=0.7742849 (473.17 it/sec) -training >> step=6633600, episode=1106 reward=0.7794773 (489.94 it/sec) -training >> step=6633700, episode=1106 reward=0.7854074 (496.50 it/sec) -training >> step=6633800, episode=1106 reward=0.7906973 (452.60 it/sec) -training >> step=6633900, episode=1106 reward=0.7604839 (467.78 it/sec) -training >> step=6634000, episode=1106 reward=0.7581916 (478.16 it/sec) -training >> step=6634100, episode=1106 reward=0.7599738 (461.02 it/sec) -training >> step=6634200, episode=1106 reward=0.7789791 (457.82 it/sec) -training >> step=6634300, episode=1106 reward=0.7750239 (495.88 it/sec) -training >> step=6634400, episode=1106 reward=0.7816487 (489.10 it/sec) -training >> step=6634500, episode=1106 reward=0.7857683 (509.21 it/sec) -training >> step=6634600, episode=1106 reward=0.8114917 (539.18 it/sec) -training >> step=6634700, episode=1106 reward=0.7739241 (466.36 it/sec) -training >> step=6634800, episode=1106 reward=0.7952144 (495.44 it/sec) -training >> step=6634900, episode=1106 reward=0.7933003 (489.85 it/sec) -training >> step=6635000, episode=1106 reward=0.781881 (540.42 it/sec) -training >> step=6635100, episode=1106 reward=0.7687842 (530.72 it/sec) -training >> step=6635200, episode=1106 reward=0.7998461 (482.39 it/sec) -training >> step=6635300, episode=1107 reward=0.7968653 (125.04 it/sec) -training >> step=6635400, episode=1107 reward=0.7823495 (496.94 it/sec) -training >> step=6635500, episode=1107 reward=0.7971528 (490.79 it/sec) -training >> step=6635600, episode=1107 reward=0.7875524 (504.79 it/sec) -training >> step=6635700, episode=1107 reward=0.7949451 (481.84 it/sec) -training >> step=6635800, episode=1107 reward=0.7811547 (520.19 it/sec) -training >> step=6635900, episode=1107 reward=0.7862549 (508.10 it/sec) -training >> step=6636000, episode=1107 reward=0.7740074 (508.54 it/sec) -training >> step=6636100, episode=1107 reward=0.8006132 (536.31 it/sec) -training >> step=6636200, episode=1107 reward=0.7850537 (504.71 it/sec) -training >> step=6636300, episode=1107 reward=0.7973323 (509.20 it/sec) -training >> step=6636400, episode=1107 reward=0.7844211 (516.84 it/sec) -training >> step=6636500, episode=1107 reward=0.7864739 (491.92 it/sec) -training >> step=6636600, episode=1107 reward=0.7957598 (496.82 it/sec) -training >> step=6636700, episode=1107 reward=0.7941276 (522.17 it/sec) -training >> step=6636800, episode=1107 reward=0.7688293 (546.26 it/sec) -training >> step=6636900, episode=1107 reward=0.8041027 (483.58 it/sec) -training >> step=6637000, episode=1107 reward=0.7727504 (419.33 it/sec) -training >> step=6637100, episode=1107 reward=0.7946936 (486.74 it/sec) -training >> step=6637200, episode=1107 reward=0.7839941 (495.36 it/sec) -training >> step=6637300, episode=1107 reward=0.7834538 (511.48 it/sec) -training >> step=6637400, episode=1107 reward=0.783529 (495.68 it/sec) -training >> step=6637500, episode=1107 reward=0.7753689 (524.35 it/sec) -training >> step=6637600, episode=1107 reward=0.7650115 (511.81 it/sec) -training >> step=6637700, episode=1107 reward=0.7862517 (503.66 it/sec) -training >> step=6637800, episode=1107 reward=0.7821642 (502.31 it/sec) -training >> step=6637900, episode=1107 reward=0.8033361 (508.32 it/sec) -training >> step=6638000, episode=1107 reward=0.7916359 (543.45 it/sec) -training >> step=6638100, episode=1107 reward=0.8147962 (492.27 it/sec) -training >> step=6638200, episode=1107 reward=0.7915879 (491.67 it/sec) -training >> step=6638300, episode=1107 reward=0.8021038 (479.57 it/sec) -training >> step=6638400, episode=1107 reward=0.7839192 (503.36 it/sec) -training >> step=6638500, episode=1107 reward=0.7889084 (523.05 it/sec) -training >> step=6638600, episode=1107 reward=0.7986275 (523.91 it/sec) -training >> step=6638700, episode=1107 reward=0.778458 (480.20 it/sec) -training >> step=6638800, episode=1107 reward=0.7834409 (480.42 it/sec) -training >> step=6638900, episode=1107 reward=0.8048226 (543.87 it/sec) -training >> step=6639000, episode=1107 reward=0.7922562 (510.07 it/sec) -training >> step=6639100, episode=1107 reward=0.7854582 (495.81 it/sec) -training >> step=6639200, episode=1107 reward=0.7960392 (485.80 it/sec) -training >> step=6639300, episode=1107 reward=0.7665539 (497.77 it/sec) -training >> step=6639400, episode=1107 reward=0.7948177 (501.53 it/sec) -training >> step=6639500, episode=1107 reward=0.7912214 (470.42 it/sec) -training >> step=6639600, episode=1107 reward=0.7718436 (491.73 it/sec) -training >> step=6639700, episode=1107 reward=0.7801248 (516.21 it/sec) -training >> step=6639800, episode=1107 reward=0.7932333 (442.59 it/sec) -training >> step=6639900, episode=1107 reward=0.7637087 (522.38 it/sec) -training >> step=6640000, episode=1107 reward=0.7724015 (511.95 it/sec) -training >> step=6640100, episode=1107 reward=0.7694713 (519.62 it/sec) -training >> step=6640200, episode=1107 reward=0.7885882 (490.59 it/sec) -training >> step=6640300, episode=1107 reward=0.8016768 (477.26 it/sec) -training >> step=6640400, episode=1107 reward=0.7889371 (520.99 it/sec) -training >> step=6640500, episode=1107 reward=0.782178 (500.67 it/sec) -training >> step=6640600, episode=1107 reward=0.7815621 (477.08 it/sec) -training >> step=6640700, episode=1107 reward=0.7898111 (510.44 it/sec) -training >> step=6640800, episode=1107 reward=0.7889398 (493.06 it/sec) -training >> step=6640900, episode=1107 reward=0.785279 (520.48 it/sec) -training >> step=6641000, episode=1107 reward=0.7706884 (503.93 it/sec) -training >> step=6641100, episode=1107 reward=0.7822176 (526.31 it/sec) -training >> step=6641200, episode=1107 reward=0.7683143 (486.32 it/sec) -training >> step=6641300, episode=1108 reward=0.7987252 (121.27 it/sec) -training >> step=6641400, episode=1108 reward=0.7733914 (469.14 it/sec) -training >> step=6641500, episode=1108 reward=0.7998387 (485.28 it/sec) -training >> step=6641600, episode=1108 reward=0.7887584 (509.35 it/sec) -training >> step=6641700, episode=1108 reward=0.7806363 (502.83 it/sec) -training >> step=6641800, episode=1108 reward=0.7744603 (520.87 it/sec) -training >> step=6641900, episode=1108 reward=0.8000021 (490.21 it/sec) -training >> step=6642000, episode=1108 reward=0.7753723 (496.14 it/sec) -training >> step=6642100, episode=1108 reward=0.7858192 (485.87 it/sec) -training >> step=6642200, episode=1108 reward=0.7882714 (534.27 it/sec) -training >> step=6642300, episode=1108 reward=0.7884916 (511.88 it/sec) -training >> step=6642400, episode=1108 reward=0.7961228 (461.35 it/sec) -training >> step=6642500, episode=1108 reward=0.7777532 (513.25 it/sec) -training >> step=6642600, episode=1108 reward=0.7838296 (530.46 it/sec) -training >> step=6642700, episode=1108 reward=0.7990593 (493.86 it/sec) -training >> step=6642800, episode=1108 reward=0.7847281 (534.97 it/sec) -training >> step=6642900, episode=1108 reward=0.8055477 (528.51 it/sec) -training >> step=6643000, episode=1108 reward=0.7743706 (459.98 it/sec) -training >> step=6643100, episode=1108 reward=0.7772991 (468.93 it/sec) -training >> step=6643200, episode=1108 reward=0.79541 (515.60 it/sec) -training >> step=6643300, episode=1108 reward=0.7957268 (511.33 it/sec) -training >> step=6643400, episode=1108 reward=0.7776238 (514.67 it/sec) -training >> step=6643500, episode=1108 reward=0.786386 (462.02 it/sec) -training >> step=6643600, episode=1108 reward=0.7765113 (491.03 it/sec) -training >> step=6643700, episode=1108 reward=0.7930738 (523.43 it/sec) -training >> step=6643800, episode=1108 reward=0.8185669 (489.09 it/sec) -training >> step=6643900, episode=1108 reward=0.7841311 (488.79 it/sec) -training >> step=6644000, episode=1108 reward=0.7796224 (477.69 it/sec) -training >> step=6644100, episode=1108 reward=0.7877887 (528.54 it/sec) -training >> step=6644200, episode=1108 reward=0.7959691 (426.83 it/sec) -training >> step=6644300, episode=1108 reward=0.782797 (494.11 it/sec) -training >> step=6644400, episode=1108 reward=0.7893112 (490.01 it/sec) -training >> step=6644500, episode=1108 reward=0.7953756 (443.34 it/sec) -training >> step=6644600, episode=1108 reward=0.7829873 (511.18 it/sec) -training >> step=6644700, episode=1108 reward=0.7954202 (534.46 it/sec) -training >> step=6644800, episode=1108 reward=0.7976373 (536.94 it/sec) -training >> step=6644900, episode=1108 reward=0.7924097 (502.15 it/sec) -training >> step=6645000, episode=1108 reward=0.7600422 (490.20 it/sec) -training >> step=6645100, episode=1108 reward=0.7963192 (525.03 it/sec) -training >> step=6645200, episode=1108 reward=0.8071689 (540.51 it/sec) -training >> step=6645300, episode=1108 reward=0.7721272 (523.85 it/sec) -training >> step=6645400, episode=1108 reward=0.7782502 (538.93 it/sec) -training >> step=6645500, episode=1108 reward=0.7805344 (479.14 it/sec) -training >> step=6645600, episode=1108 reward=0.795299 (492.39 it/sec) -training >> step=6645700, episode=1108 reward=0.7971549 (490.03 it/sec) -training >> step=6645800, episode=1108 reward=0.7989075 (537.17 it/sec) -training >> step=6645900, episode=1108 reward=0.7896809 (519.01 it/sec) -training >> step=6646000, episode=1108 reward=0.7823158 (478.42 it/sec) -training >> step=6646100, episode=1108 reward=0.7833231 (477.15 it/sec) -training >> step=6646200, episode=1108 reward=0.7818778 (500.09 it/sec) -training >> step=6646300, episode=1108 reward=0.7795907 (528.07 it/sec) -training >> step=6646400, episode=1108 reward=0.7913997 (526.87 it/sec) -training >> step=6646500, episode=1108 reward=0.8046641 (479.22 it/sec) -training >> step=6646600, episode=1108 reward=0.7876814 (494.13 it/sec) -training >> step=6646700, episode=1108 reward=0.7893685 (453.23 it/sec) -training >> step=6646800, episode=1108 reward=0.7692157 (482.06 it/sec) -training >> step=6646900, episode=1108 reward=0.7882221 (546.47 it/sec) -training >> step=6647000, episode=1108 reward=0.7756925 (498.55 it/sec) -training >> step=6647100, episode=1108 reward=0.7714754 (491.91 it/sec) -training >> step=6647200, episode=1108 reward=0.7902919 (527.96 it/sec) -training >> step=6647300, episode=1109 reward=0.79765 (121.25 it/sec) -training >> step=6647400, episode=1109 reward=0.7698828 (511.64 it/sec) -training >> step=6647500, episode=1109 reward=0.7772043 (473.46 it/sec) -training >> step=6647600, episode=1109 reward=0.78924 (530.16 it/sec) -training >> step=6647700, episode=1109 reward=0.7917004 (475.69 it/sec) -training >> step=6647800, episode=1109 reward=0.7959536 (489.49 it/sec) -training >> step=6647900, episode=1109 reward=0.780463 (496.81 it/sec) -training >> step=6648000, episode=1109 reward=0.7921003 (482.05 it/sec) -training >> step=6648100, episode=1109 reward=0.8053077 (520.85 it/sec) -training >> step=6648200, episode=1109 reward=0.7752932 (492.43 it/sec) -training >> step=6648300, episode=1109 reward=0.8001423 (476.34 it/sec) -training >> step=6648400, episode=1109 reward=0.7816387 (485.30 it/sec) -training >> step=6648500, episode=1109 reward=0.7927027 (417.20 it/sec) -training >> step=6648600, episode=1109 reward=0.7967218 (458.52 it/sec) -training >> step=6648700, episode=1109 reward=0.7909888 (447.35 it/sec) -training >> step=6648800, episode=1109 reward=0.7821239 (442.03 it/sec) -training >> step=6648900, episode=1109 reward=0.7904525 (468.14 it/sec) -training >> step=6649000, episode=1109 reward=0.80304 (527.75 it/sec) -training >> step=6649100, episode=1109 reward=0.7946938 (489.29 it/sec) -training >> step=6649200, episode=1109 reward=0.7867242 (480.06 it/sec) -training >> step=6649300, episode=1109 reward=0.7926954 (473.74 it/sec) -training >> step=6649400, episode=1109 reward=0.8044274 (437.60 it/sec) -training >> step=6649500, episode=1109 reward=0.7892864 (491.23 it/sec) -training >> step=6649600, episode=1109 reward=0.7814926 (510.10 it/sec) -training >> step=6649700, episode=1109 reward=0.7900623 (427.36 it/sec) -training >> step=6649800, episode=1109 reward=0.7961344 (422.67 it/sec) -training >> step=6649900, episode=1109 reward=0.7861978 (500.28 it/sec) -training >> step=6650000, episode=1109 reward=0.780256 (492.11 it/sec) -training >> step=6650100, episode=1109 reward=0.7829674 (516.64 it/sec) -training >> step=6650200, episode=1109 reward=0.811766 (466.51 it/sec) -training >> step=6650300, episode=1109 reward=0.7961549 (487.08 it/sec) -training >> step=6650400, episode=1109 reward=0.7794604 (505.10 it/sec) -training >> step=6650500, episode=1109 reward=0.785086 (511.91 it/sec) -training >> step=6650600, episode=1109 reward=0.7673376 (478.40 it/sec) -training >> step=6650700, episode=1109 reward=0.7789562 (465.51 it/sec) -training >> step=6650800, episode=1109 reward=0.7800068 (495.83 it/sec) -training >> step=6650900, episode=1109 reward=0.8030193 (508.18 it/sec) -training >> step=6651000, episode=1109 reward=0.7932903 (498.40 it/sec) -training >> step=6651100, episode=1109 reward=0.7911897 (510.92 it/sec) -training >> step=6651200, episode=1109 reward=0.8024848 (492.82 it/sec) -training >> step=6651300, episode=1109 reward=0.763496 (464.91 it/sec) -training >> step=6651400, episode=1109 reward=0.8086608 (419.77 it/sec) -training >> step=6651500, episode=1109 reward=0.7947146 (480.79 it/sec) -training >> step=6651600, episode=1109 reward=0.798903 (470.07 it/sec) -training >> step=6651700, episode=1109 reward=0.7754393 (442.30 it/sec) -training >> step=6651800, episode=1109 reward=0.7843599 (429.24 it/sec) -training >> step=6651900, episode=1109 reward=0.7818505 (478.26 it/sec) -training >> step=6652000, episode=1109 reward=0.7834074 (472.02 it/sec) -training >> step=6652100, episode=1109 reward=0.7990695 (475.20 it/sec) -training >> step=6652200, episode=1109 reward=0.7668667 (430.84 it/sec) -training >> step=6652300, episode=1109 reward=0.7863321 (492.84 it/sec) -training >> step=6652400, episode=1109 reward=0.7773802 (431.19 it/sec) -training >> step=6652500, episode=1109 reward=0.7789015 (449.71 it/sec) -training >> step=6652600, episode=1109 reward=0.7969801 (466.67 it/sec) -training >> step=6652700, episode=1109 reward=0.7878538 (478.23 it/sec) -training >> step=6652800, episode=1109 reward=0.7915566 (451.09 it/sec) -training >> step=6652900, episode=1109 reward=0.7931542 (443.44 it/sec) -training >> step=6653000, episode=1109 reward=0.7613565 (478.36 it/sec) -training >> step=6653100, episode=1109 reward=0.8021553 (442.67 it/sec) -training >> step=6653200, episode=1109 reward=0.7934287 (412.84 it/sec) -training >> step=6653300, episode=1110 reward=0.7691564 (96.23 it/sec) -training >> step=6653400, episode=1110 reward=0.7807366 (453.40 it/sec) -training >> step=6653500, episode=1110 reward=0.7931956 (462.49 it/sec) -training >> step=6653600, episode=1110 reward=0.7709154 (433.56 it/sec) -training >> step=6653700, episode=1110 reward=0.7759318 (445.37 it/sec) -training >> step=6653800, episode=1110 reward=0.7851032 (463.89 it/sec) -training >> step=6653900, episode=1110 reward=0.776099 (408.96 it/sec) -training >> step=6654000, episode=1110 reward=0.7778542 (459.32 it/sec) -training >> step=6654100, episode=1110 reward=0.7907723 (461.85 it/sec) -training >> step=6654200, episode=1110 reward=0.7826403 (452.04 it/sec) -training >> step=6654300, episode=1110 reward=0.7716796 (481.06 it/sec) -training >> step=6654400, episode=1110 reward=0.7896486 (497.03 it/sec) -training >> step=6654500, episode=1110 reward=0.7969686 (496.65 it/sec) -training >> step=6654600, episode=1110 reward=0.7930406 (445.83 it/sec) -training >> step=6654700, episode=1110 reward=0.8006884 (428.19 it/sec) -training >> step=6654800, episode=1110 reward=0.7706364 (458.55 it/sec) -training >> step=6654900, episode=1110 reward=0.7867312 (470.98 it/sec) -training >> step=6655000, episode=1110 reward=0.7853245 (450.53 it/sec) -training >> step=6655100, episode=1110 reward=0.797959 (475.86 it/sec) -training >> step=6655200, episode=1110 reward=0.7760602 (470.18 it/sec) -training >> step=6655300, episode=1110 reward=0.7676572 (494.40 it/sec) -training >> step=6655400, episode=1110 reward=0.789353 (471.56 it/sec) -training >> step=6655500, episode=1110 reward=0.7901332 (459.21 it/sec) -training >> step=6655600, episode=1110 reward=0.7942268 (473.71 it/sec) -training >> step=6655700, episode=1110 reward=0.811029 (467.17 it/sec) -training >> step=6655800, episode=1110 reward=0.7987335 (432.69 it/sec) -training >> step=6655900, episode=1110 reward=0.786055 (482.57 it/sec) -training >> step=6656000, episode=1110 reward=0.7854534 (489.84 it/sec) -training >> step=6656100, episode=1110 reward=0.7804931 (464.13 it/sec) -training >> step=6656200, episode=1110 reward=0.7988632 (516.20 it/sec) -training >> step=6656300, episode=1110 reward=0.780731 (465.74 it/sec) -training >> step=6656400, episode=1110 reward=0.7828338 (467.38 it/sec) -training >> step=6656500, episode=1110 reward=0.8087354 (483.06 it/sec) -training >> step=6656600, episode=1110 reward=0.7863179 (452.36 it/sec) -training >> step=6656700, episode=1110 reward=0.7738999 (475.50 it/sec) -training >> step=6656800, episode=1110 reward=0.7841848 (470.83 it/sec) -training >> step=6656900, episode=1110 reward=0.800771 (514.77 it/sec) -training >> step=6657000, episode=1110 reward=0.7942214 (485.77 it/sec) -training >> step=6657100, episode=1110 reward=0.7733171 (486.65 it/sec) -training >> step=6657200, episode=1110 reward=0.8103824 (486.14 it/sec) -training >> step=6657300, episode=1110 reward=0.7734845 (485.65 it/sec) -training >> step=6657400, episode=1110 reward=0.7877245 (509.56 it/sec) -training >> step=6657500, episode=1110 reward=0.7973938 (426.86 it/sec) -training >> step=6657600, episode=1110 reward=0.7953832 (485.56 it/sec) -training >> step=6657700, episode=1110 reward=0.7964104 (461.91 it/sec) -training >> step=6657800, episode=1110 reward=0.787042 (452.11 it/sec) -training >> step=6657900, episode=1110 reward=0.8028148 (463.44 it/sec) -training >> step=6658000, episode=1110 reward=0.7789159 (483.30 it/sec) -training >> step=6658100, episode=1110 reward=0.8056034 (402.04 it/sec) -training >> step=6658200, episode=1110 reward=0.7697064 (453.10 it/sec) -training >> step=6658300, episode=1110 reward=0.776281 (502.46 it/sec) -training >> step=6658400, episode=1110 reward=0.7870158 (480.65 it/sec) -training >> step=6658500, episode=1110 reward=0.7886417 (487.88 it/sec) -training >> step=6658600, episode=1110 reward=0.7847559 (447.29 it/sec) -training >> step=6658700, episode=1110 reward=0.7962905 (534.52 it/sec) -training >> step=6658800, episode=1110 reward=0.7865691 (510.21 it/sec) -training >> step=6658900, episode=1110 reward=0.7668584 (496.04 it/sec) -training >> step=6659000, episode=1110 reward=0.795424 (477.53 it/sec) -training >> step=6659100, episode=1110 reward=0.78361 (521.78 it/sec) -training >> step=6659200, episode=1110 reward=0.7723615 (493.82 it/sec) -training >> step=6659300, episode=1111 reward=0.7759085 (115.08 it/sec) -training >> step=6659400, episode=1111 reward=0.7928217 (463.06 it/sec) -training >> step=6659500, episode=1111 reward=0.7832681 (515.26 it/sec) -training >> step=6659600, episode=1111 reward=0.7727191 (472.86 it/sec) -training >> step=6659700, episode=1111 reward=0.7970418 (429.96 it/sec) -training >> step=6659800, episode=1111 reward=0.782415 (508.06 it/sec) -training >> step=6659900, episode=1111 reward=0.7944393 (463.35 it/sec) -training >> step=6660000, episode=1111 reward=0.7884613 (504.14 it/sec) -training >> step=6660100, episode=1111 reward=0.7865123 (488.81 it/sec) -training >> step=6660200, episode=1111 reward=0.7849091 (492.83 it/sec) -training >> step=6660300, episode=1111 reward=0.8227543 (510.85 it/sec) -training >> step=6660400, episode=1111 reward=0.7786413 (471.47 it/sec) -training >> step=6660500, episode=1111 reward=0.8002344 (493.03 it/sec) -training >> step=6660600, episode=1111 reward=0.7949567 (454.61 it/sec) -training >> step=6660700, episode=1111 reward=0.8001649 (474.86 it/sec) -training >> step=6660800, episode=1111 reward=0.7955792 (494.02 it/sec) -training >> step=6660900, episode=1111 reward=0.8028257 (481.23 it/sec) -training >> step=6661000, episode=1111 reward=0.7812146 (463.88 it/sec) -training >> step=6661100, episode=1111 reward=0.7791977 (464.43 it/sec) -training >> step=6661200, episode=1111 reward=0.7829483 (497.35 it/sec) -training >> step=6661300, episode=1111 reward=0.7919757 (538.65 it/sec) -training >> step=6661400, episode=1111 reward=0.7970988 (520.12 it/sec) -training >> step=6661500, episode=1111 reward=0.7773905 (561.91 it/sec) -training >> step=6661600, episode=1111 reward=0.7987247 (542.78 it/sec) -training >> step=6661700, episode=1111 reward=0.7646127 (527.99 it/sec) -training >> step=6661800, episode=1111 reward=0.8077187 (493.82 it/sec) -training >> step=6661900, episode=1111 reward=0.7946104 (535.20 it/sec) -training >> step=6662000, episode=1111 reward=0.7774272 (553.96 it/sec) -training >> step=6662100, episode=1111 reward=0.7853216 (534.68 it/sec) -training >> step=6662200, episode=1111 reward=0.778416 (509.15 it/sec) -training >> step=6662300, episode=1111 reward=0.7786934 (531.93 it/sec) -training >> step=6662400, episode=1111 reward=0.7997653 (502.91 it/sec) -training >> step=6662500, episode=1111 reward=0.8076388 (500.38 it/sec) -training >> step=6662600, episode=1111 reward=0.7849609 (494.95 it/sec) -training >> step=6662700, episode=1111 reward=0.795036 (526.83 it/sec) -training >> step=6662800, episode=1111 reward=0.7994628 (495.81 it/sec) -training >> step=6662900, episode=1111 reward=0.7948634 (570.16 it/sec) -training >> step=6663000, episode=1111 reward=0.7998772 (480.79 it/sec) -training >> step=6663100, episode=1111 reward=0.7976272 (502.34 it/sec) -training >> step=6663200, episode=1111 reward=0.7953148 (490.72 it/sec) -training >> step=6663300, episode=1111 reward=0.7816312 (416.16 it/sec) -training >> step=6663400, episode=1111 reward=0.810749 (546.77 it/sec) -training >> step=6663500, episode=1111 reward=0.7875481 (522.47 it/sec) -training >> step=6663600, episode=1111 reward=0.7910233 (525.73 it/sec) -training >> step=6663700, episode=1111 reward=0.7901704 (539.42 it/sec) -training >> step=6663800, episode=1111 reward=0.7717896 (495.39 it/sec) -training >> step=6663900, episode=1111 reward=0.7774217 (529.75 it/sec) -training >> step=6664000, episode=1111 reward=0.7879802 (515.58 it/sec) -training >> step=6664100, episode=1111 reward=0.7807388 (543.16 it/sec) -training >> step=6664200, episode=1111 reward=0.7765236 (465.08 it/sec) -training >> step=6664300, episode=1111 reward=0.7681682 (537.25 it/sec) -training >> step=6664400, episode=1111 reward=0.766243 (487.03 it/sec) -training >> step=6664500, episode=1111 reward=0.7700511 (462.59 it/sec) -training >> step=6664600, episode=1111 reward=0.7752235 (461.55 it/sec) -training >> step=6664700, episode=1111 reward=0.7877897 (507.50 it/sec) -training >> step=6664800, episode=1111 reward=0.7846572 (509.71 it/sec) -training >> step=6664900, episode=1111 reward=0.7834474 (494.12 it/sec) -training >> step=6665000, episode=1111 reward=0.8179533 (506.05 it/sec) -training >> step=6665100, episode=1111 reward=0.7729393 (540.06 it/sec) -training >> step=6665200, episode=1111 reward=0.7958859 (550.77 it/sec) -training >> step=6665300, episode=1112 reward=0.7765874 (110.65 it/sec) -training >> step=6665400, episode=1112 reward=0.771533 (504.09 it/sec) -training >> step=6665500, episode=1112 reward=0.776186 (440.86 it/sec) -training >> step=6665600, episode=1112 reward=0.7793497 (526.99 it/sec) -training >> step=6665700, episode=1112 reward=0.7926694 (522.06 it/sec) -training >> step=6665800, episode=1112 reward=0.7784673 (523.10 it/sec) -training >> step=6665900, episode=1112 reward=0.7742675 (523.75 it/sec) -training >> step=6666000, episode=1112 reward=0.7910865 (490.22 it/sec) -training >> step=6666100, episode=1112 reward=0.8025013 (505.26 it/sec) -training >> step=6666200, episode=1112 reward=0.7702756 (513.15 it/sec) -training >> step=6666300, episode=1112 reward=0.7888282 (553.58 it/sec) -training >> step=6666400, episode=1112 reward=0.7962165 (438.48 it/sec) -training >> step=6666500, episode=1112 reward=0.785853 (488.81 it/sec) -training >> step=6666600, episode=1112 reward=0.7939587 (491.22 it/sec) -training >> step=6666700, episode=1112 reward=0.7782088 (468.62 it/sec) -training >> step=6666800, episode=1112 reward=0.7863843 (492.70 it/sec) -training >> step=6666900, episode=1112 reward=0.7862853 (497.87 it/sec) -training >> step=6667000, episode=1112 reward=0.7806299 (526.56 it/sec) -training >> step=6667100, episode=1112 reward=0.7765285 (449.02 it/sec) -training >> step=6667200, episode=1112 reward=0.7839127 (432.56 it/sec) -training >> step=6667300, episode=1112 reward=0.7864427 (515.85 it/sec) -training >> step=6667400, episode=1112 reward=0.7653669 (507.67 it/sec) -training >> step=6667500, episode=1112 reward=0.77106 (459.11 it/sec) -training >> step=6667600, episode=1112 reward=0.7795777 (473.19 it/sec) -training >> step=6667700, episode=1112 reward=0.7773038 (528.46 it/sec) -training >> step=6667800, episode=1112 reward=0.7786254 (463.40 it/sec) -training >> step=6667900, episode=1112 reward=0.7811725 (473.56 it/sec) -training >> step=6668000, episode=1112 reward=0.8025434 (492.22 it/sec) -training >> step=6668100, episode=1112 reward=0.7914666 (510.23 it/sec) -training >> step=6668200, episode=1112 reward=0.7875958 (460.48 it/sec) -training >> step=6668300, episode=1112 reward=0.8069297 (451.73 it/sec) -training >> step=6668400, episode=1112 reward=0.7822687 (499.13 it/sec) -training >> step=6668500, episode=1112 reward=0.7839921 (447.93 it/sec) -training >> step=6668600, episode=1112 reward=0.7893572 (482.32 it/sec) -training >> step=6668700, episode=1112 reward=0.79725 (485.30 it/sec) -training >> step=6668800, episode=1112 reward=0.805118 (489.71 it/sec) -training >> step=6668900, episode=1112 reward=0.7735589 (480.45 it/sec) -training >> step=6669000, episode=1112 reward=0.7904754 (466.85 it/sec) -training >> step=6669100, episode=1112 reward=0.764829 (518.71 it/sec) -training >> step=6669200, episode=1112 reward=0.787339 (436.51 it/sec) -training >> step=6669300, episode=1112 reward=0.7814573 (464.02 it/sec) -training >> step=6669400, episode=1112 reward=0.7601913 (508.14 it/sec) -training >> step=6669500, episode=1112 reward=0.7898539 (479.90 it/sec) -training >> step=6669600, episode=1112 reward=0.7816468 (430.37 it/sec) -training >> step=6669700, episode=1112 reward=0.7812731 (470.68 it/sec) -training >> step=6669800, episode=1112 reward=0.7937951 (508.01 it/sec) -training >> step=6669900, episode=1112 reward=0.8040347 (514.27 it/sec) -training >> step=6670000, episode=1112 reward=0.7754649 (447.61 it/sec) -training >> step=6670100, episode=1112 reward=0.7920999 (486.67 it/sec) -training >> step=6670200, episode=1112 reward=0.7886569 (488.54 it/sec) -training >> step=6670300, episode=1112 reward=0.7719969 (468.99 it/sec) -training >> step=6670400, episode=1112 reward=0.7895425 (488.53 it/sec) -training >> step=6670500, episode=1112 reward=0.7854195 (477.84 it/sec) -training >> step=6670600, episode=1112 reward=0.7757622 (523.27 it/sec) -training >> step=6670700, episode=1112 reward=0.7901064 (485.44 it/sec) -training >> step=6670800, episode=1112 reward=0.7890191 (497.49 it/sec) -training >> step=6670900, episode=1112 reward=0.7820692 (481.72 it/sec) -training >> step=6671000, episode=1112 reward=0.7856193 (479.27 it/sec) -training >> step=6671100, episode=1112 reward=0.7915434 (478.97 it/sec) -training >> step=6671200, episode=1112 reward=0.784414 (497.97 it/sec) -training >> step=6671300, episode=1113 reward=0.7964671 (100.91 it/sec) -training >> step=6671400, episode=1113 reward=0.798238 (445.04 it/sec) -training >> step=6671500, episode=1113 reward=0.7781184 (494.53 it/sec) -training >> step=6671600, episode=1113 reward=0.8014777 (440.89 it/sec) -training >> step=6671700, episode=1113 reward=0.7643664 (468.94 it/sec) -training >> step=6671800, episode=1113 reward=0.8143418 (497.27 it/sec) -training >> step=6671900, episode=1113 reward=0.7991344 (506.46 it/sec) -training >> step=6672000, episode=1113 reward=0.801627 (447.80 it/sec) -training >> step=6672100, episode=1113 reward=0.7846323 (478.82 it/sec) -training >> step=6672200, episode=1113 reward=0.7863094 (417.41 it/sec) -training >> step=6672300, episode=1113 reward=0.7989634 (490.22 it/sec) -training >> step=6672400, episode=1113 reward=0.7815472 (504.69 it/sec) -training >> step=6672500, episode=1113 reward=0.7983844 (447.04 it/sec) -training >> step=6672600, episode=1113 reward=0.7829513 (470.33 it/sec) -training >> step=6672700, episode=1113 reward=0.7769699 (490.97 it/sec) -training >> step=6672800, episode=1113 reward=0.7963067 (466.65 it/sec) -training >> step=6672900, episode=1113 reward=0.7664444 (483.25 it/sec) -training >> step=6673000, episode=1113 reward=0.790683 (525.90 it/sec) -training >> step=6673100, episode=1113 reward=0.7825987 (455.58 it/sec) -training >> step=6673200, episode=1113 reward=0.7753223 (452.81 it/sec) -training >> step=6673300, episode=1113 reward=0.7848399 (482.80 it/sec) -training >> step=6673400, episode=1113 reward=0.784409 (491.36 it/sec) -training >> step=6673500, episode=1113 reward=0.7669218 (483.11 it/sec) -training >> step=6673600, episode=1113 reward=0.783199 (488.97 it/sec) -training >> step=6673700, episode=1113 reward=0.7785587 (515.82 it/sec) -training >> step=6673800, episode=1113 reward=0.8045206 (462.39 it/sec) -training >> step=6673900, episode=1113 reward=0.7845439 (474.67 it/sec) -training >> step=6674000, episode=1113 reward=0.8016452 (462.11 it/sec) -training >> step=6674100, episode=1113 reward=0.7762174 (508.82 it/sec) -training >> step=6674200, episode=1113 reward=0.7862109 (478.94 it/sec) -training >> step=6674300, episode=1113 reward=0.8000739 (437.49 it/sec) -training >> step=6674400, episode=1113 reward=0.774873 (464.73 it/sec) -training >> step=6674500, episode=1113 reward=0.7785752 (482.85 it/sec) -training >> step=6674600, episode=1113 reward=0.7977786 (485.96 it/sec) -training >> step=6674700, episode=1113 reward=0.7902018 (441.97 it/sec) -training >> step=6674800, episode=1113 reward=0.7865607 (526.15 it/sec) -training >> step=6674900, episode=1113 reward=0.7995353 (483.46 it/sec) -training >> step=6675000, episode=1113 reward=0.7738107 (511.25 it/sec) -training >> step=6675100, episode=1113 reward=0.772245 (498.06 it/sec) -training >> step=6675200, episode=1113 reward=0.7853897 (466.13 it/sec) -training >> step=6675300, episode=1113 reward=0.769644 (535.67 it/sec) -training >> step=6675400, episode=1113 reward=0.7803676 (508.80 it/sec) -training >> step=6675500, episode=1113 reward=0.7996069 (496.59 it/sec) -training >> step=6675600, episode=1113 reward=0.7977141 (482.68 it/sec) -training >> step=6675700, episode=1113 reward=0.7767435 (449.63 it/sec) -training >> step=6675800, episode=1113 reward=0.7553154 (514.74 it/sec) -training >> step=6675900, episode=1113 reward=0.7954336 (531.97 it/sec) -training >> step=6676000, episode=1113 reward=0.7606658 (477.94 it/sec) -training >> step=6676100, episode=1113 reward=0.7715568 (478.75 it/sec) -training >> step=6676200, episode=1113 reward=0.778181 (494.15 it/sec) -training >> step=6676300, episode=1113 reward=0.7691091 (479.28 it/sec) -training >> step=6676400, episode=1113 reward=0.778548 (488.34 it/sec) -training >> step=6676500, episode=1113 reward=0.7373238 (469.97 it/sec) -training >> step=6676600, episode=1113 reward=0.7877823 (513.67 it/sec) -training >> step=6676700, episode=1113 reward=0.7925071 (473.36 it/sec) -training >> step=6676800, episode=1113 reward=0.7888543 (522.66 it/sec) -training >> step=6676900, episode=1113 reward=0.7817015 (504.44 it/sec) -training >> step=6677000, episode=1113 reward=0.7743236 (486.43 it/sec) -training >> step=6677100, episode=1113 reward=0.7977785 (481.24 it/sec) -training >> step=6677200, episode=1113 reward=0.7654577 (528.47 it/sec) -training >> step=6677300, episode=1114 reward=0.7761669 (122.27 it/sec) -training >> step=6677400, episode=1114 reward=0.7768964 (456.75 it/sec) -training >> step=6677500, episode=1114 reward=0.7710133 (522.89 it/sec) -training >> step=6677600, episode=1114 reward=0.7726604 (512.80 it/sec) -training >> step=6677700, episode=1114 reward=0.7648215 (537.12 it/sec) -training >> step=6677800, episode=1114 reward=0.7960958 (471.08 it/sec) -training >> step=6677900, episode=1114 reward=0.7901558 (494.37 it/sec) -training >> step=6678000, episode=1114 reward=0.7935777 (491.35 it/sec) -training >> step=6678100, episode=1114 reward=0.7702969 (503.42 it/sec) -training >> step=6678200, episode=1114 reward=0.7704472 (519.75 it/sec) -training >> step=6678300, episode=1114 reward=0.7857972 (472.11 it/sec) -training >> step=6678400, episode=1114 reward=0.7809765 (503.14 it/sec) -training >> step=6678500, episode=1114 reward=0.7833724 (500.29 it/sec) -training >> step=6678600, episode=1114 reward=0.809386 (532.70 it/sec) -training >> step=6678700, episode=1114 reward=0.7748975 (498.97 it/sec) -training >> step=6678800, episode=1114 reward=0.7942483 (489.05 it/sec) -training >> step=6678900, episode=1114 reward=0.8212458 (501.97 it/sec) -training >> step=6679000, episode=1114 reward=0.8028429 (458.03 it/sec) -training >> step=6679100, episode=1114 reward=0.7814774 (526.05 it/sec) -training >> step=6679200, episode=1114 reward=0.784017 (490.00 it/sec) -training >> step=6679300, episode=1114 reward=0.7725834 (523.97 it/sec) -training >> step=6679400, episode=1114 reward=0.7850863 (497.79 it/sec) -training >> step=6679500, episode=1114 reward=0.7726521 (544.91 it/sec) -training >> step=6679600, episode=1114 reward=0.7931754 (511.59 it/sec) -training >> step=6679700, episode=1114 reward=0.7699324 (477.15 it/sec) -training >> step=6679800, episode=1114 reward=0.7697259 (531.22 it/sec) -training >> step=6679900, episode=1114 reward=0.7729158 (520.12 it/sec) -training >> step=6680000, episode=1114 reward=0.7929759 (446.21 it/sec) -training >> step=6680100, episode=1114 reward=0.7973331 (486.56 it/sec) -training >> step=6680200, episode=1114 reward=0.7909218 (541.67 it/sec) -training >> step=6680300, episode=1114 reward=0.7793674 (488.66 it/sec) -training >> step=6680400, episode=1114 reward=0.790787 (487.91 it/sec) -training >> step=6680500, episode=1114 reward=0.7851076 (509.05 it/sec) -training >> step=6680600, episode=1114 reward=0.8006107 (466.58 it/sec) -training >> step=6680700, episode=1114 reward=0.7759365 (482.51 it/sec) -training >> step=6680800, episode=1114 reward=0.7817989 (488.33 it/sec) -training >> step=6680900, episode=1114 reward=0.777863 (507.48 it/sec) -training >> step=6681000, episode=1114 reward=0.7775438 (505.39 it/sec) -training >> step=6681100, episode=1114 reward=0.8104998 (441.31 it/sec) -training >> step=6681200, episode=1114 reward=0.7809097 (500.90 it/sec) -training >> step=6681300, episode=1114 reward=0.7778001 (508.81 it/sec) -training >> step=6681400, episode=1114 reward=0.7895881 (486.66 it/sec) -training >> step=6681500, episode=1114 reward=0.7728295 (537.03 it/sec) -training >> step=6681600, episode=1114 reward=0.7886553 (533.15 it/sec) -training >> step=6681700, episode=1114 reward=0.8021209 (512.25 it/sec) -training >> step=6681800, episode=1114 reward=0.7617453 (511.74 it/sec) -training >> step=6681900, episode=1114 reward=0.7704419 (441.34 it/sec) -training >> step=6682000, episode=1114 reward=0.7617339 (539.77 it/sec) -training >> step=6682100, episode=1114 reward=0.7835327 (541.24 it/sec) -training >> step=6682200, episode=1114 reward=0.7980807 (524.48 it/sec) -training >> step=6682300, episode=1114 reward=0.7627858 (456.69 it/sec) -training >> step=6682400, episode=1114 reward=0.7762801 (452.67 it/sec) -training >> step=6682500, episode=1114 reward=0.786891 (489.10 it/sec) -training >> step=6682600, episode=1114 reward=0.7976075 (496.10 it/sec) -training >> step=6682700, episode=1114 reward=0.7802973 (471.50 it/sec) -training >> step=6682800, episode=1114 reward=0.778861 (503.47 it/sec) -training >> step=6682900, episode=1114 reward=0.7779939 (488.02 it/sec) -training >> step=6683000, episode=1114 reward=0.7978147 (484.51 it/sec) -training >> step=6683100, episode=1114 reward=0.7649338 (535.87 it/sec) -training >> step=6683200, episode=1114 reward=0.7669697 (509.86 it/sec) -training >> step=6683300, episode=1115 reward=0.7699603 (49.65 it/sec) -training >> step=6683400, episode=1115 reward=0.7752084 (516.95 it/sec) -training >> step=6683500, episode=1115 reward=0.7601017 (506.53 it/sec) -training >> step=6683600, episode=1115 reward=0.7797645 (445.78 it/sec) -training >> step=6683700, episode=1115 reward=0.7783887 (546.62 it/sec) -training >> step=6683800, episode=1115 reward=0.7750946 (513.33 it/sec) -training >> step=6683900, episode=1115 reward=0.787394 (503.73 it/sec) -training >> step=6684000, episode=1115 reward=0.7922439 (485.26 it/sec) -training >> step=6684100, episode=1115 reward=0.781926 (493.42 it/sec) -training >> step=6684200, episode=1115 reward=0.7905072 (515.82 it/sec) -training >> step=6684300, episode=1115 reward=0.7906146 (477.57 it/sec) -training >> step=6684400, episode=1115 reward=0.7698294 (541.49 it/sec) -training >> step=6684500, episode=1115 reward=0.7803735 (509.69 it/sec) -training >> step=6684600, episode=1115 reward=0.7984006 (488.19 it/sec) -training >> step=6684700, episode=1115 reward=0.8040612 (489.71 it/sec) -training >> step=6684800, episode=1115 reward=0.789457 (511.88 it/sec) -training >> step=6684900, episode=1115 reward=0.7730865 (547.26 it/sec) -training >> step=6685000, episode=1115 reward=0.7905443 (540.54 it/sec) -training >> step=6685100, episode=1115 reward=0.8102461 (484.57 it/sec) -training >> step=6685200, episode=1115 reward=0.7888985 (494.81 it/sec) -training >> step=6685300, episode=1115 reward=0.7858738 (484.43 it/sec) -training >> step=6685400, episode=1115 reward=0.7919973 (507.02 it/sec) -training >> step=6685500, episode=1115 reward=0.7842783 (527.30 it/sec) -training >> step=6685600, episode=1115 reward=0.7974666 (486.81 it/sec) -training >> step=6685700, episode=1115 reward=0.7895406 (547.01 it/sec) -training >> step=6685800, episode=1115 reward=0.7723927 (495.77 it/sec) -training >> step=6685900, episode=1115 reward=0.7825536 (521.54 it/sec) -training >> step=6686000, episode=1115 reward=0.7871371 (500.83 it/sec) -training >> step=6686100, episode=1115 reward=0.7783032 (523.41 it/sec) -training >> step=6686200, episode=1115 reward=0.7847382 (451.50 it/sec) -training >> step=6686300, episode=1115 reward=0.8001972 (475.62 it/sec) -training >> step=6686400, episode=1115 reward=0.7927296 (484.92 it/sec) -training >> step=6686500, episode=1115 reward=0.7956428 (494.41 it/sec) -training >> step=6686600, episode=1115 reward=0.777949 (495.10 it/sec) -training >> step=6686700, episode=1115 reward=0.7784297 (487.75 it/sec) -training >> step=6686800, episode=1115 reward=0.7664328 (491.53 it/sec) -training >> step=6686900, episode=1115 reward=0.7792453 (499.63 it/sec) -training >> step=6687000, episode=1115 reward=0.7912063 (507.62 it/sec) -training >> step=6687100, episode=1115 reward=0.7773653 (501.39 it/sec) -training >> step=6687200, episode=1115 reward=0.7808391 (466.03 it/sec) -training >> step=6687300, episode=1115 reward=0.7827122 (496.47 it/sec) -training >> step=6687400, episode=1115 reward=0.7887685 (519.66 it/sec) -training >> step=6687500, episode=1115 reward=0.7856266 (481.70 it/sec) -training >> step=6687600, episode=1115 reward=0.7813082 (516.12 it/sec) -training >> step=6687700, episode=1115 reward=0.7908599 (463.92 it/sec) -training >> step=6687800, episode=1115 reward=0.7958142 (475.07 it/sec) -training >> step=6687900, episode=1115 reward=0.7807124 (533.79 it/sec) -training >> step=6688000, episode=1115 reward=0.7855042 (460.84 it/sec) -training >> step=6688100, episode=1115 reward=0.7920911 (482.35 it/sec) -training >> step=6688200, episode=1115 reward=0.7657387 (510.98 it/sec) -training >> step=6688300, episode=1115 reward=0.795405 (512.34 it/sec) -training >> step=6688400, episode=1115 reward=0.8018708 (503.83 it/sec) -training >> step=6688500, episode=1115 reward=0.7991853 (515.70 it/sec) -training >> step=6688600, episode=1115 reward=0.7922789 (528.78 it/sec) -training >> step=6688700, episode=1115 reward=0.7652514 (524.63 it/sec) -training >> step=6688800, episode=1115 reward=0.7903342 (463.83 it/sec) -training >> step=6688900, episode=1115 reward=0.7844205 (473.05 it/sec) -training >> step=6689000, episode=1115 reward=0.7886107 (508.27 it/sec) -training >> step=6689100, episode=1115 reward=0.7791864 (497.97 it/sec) -training >> step=6689200, episode=1115 reward=0.769508 (469.31 it/sec) -training >> step=6689300, episode=1116 reward=0.7832807 (42.30 it/sec) -training >> step=6689400, episode=1116 reward=0.7747353 (498.00 it/sec) -training >> step=6689500, episode=1116 reward=0.7705489 (439.33 it/sec) -training >> step=6689600, episode=1116 reward=0.7721303 (475.75 it/sec) -training >> step=6689700, episode=1116 reward=0.7810523 (466.38 it/sec) -training >> step=6689800, episode=1116 reward=0.7823738 (500.98 it/sec) -training >> step=6689900, episode=1116 reward=0.7731418 (489.16 it/sec) -training >> step=6690000, episode=1116 reward=0.778173 (494.72 it/sec) -training >> step=6690100, episode=1116 reward=0.778142 (454.31 it/sec) -training >> step=6690200, episode=1116 reward=0.7877103 (487.56 it/sec) -training >> step=6690300, episode=1116 reward=0.7749863 (524.34 it/sec) -training >> step=6690400, episode=1116 reward=0.7907267 (482.90 it/sec) -training >> step=6690500, episode=1116 reward=0.8014153 (469.76 it/sec) -training >> step=6690600, episode=1116 reward=0.7868708 (428.48 it/sec) -training >> step=6690700, episode=1116 reward=0.794839 (464.48 it/sec) -training >> step=6690800, episode=1116 reward=0.7838423 (495.83 it/sec) -training >> step=6690900, episode=1116 reward=0.8028866 (486.30 it/sec) -training >> step=6691000, episode=1116 reward=0.7850479 (488.60 it/sec) -training >> step=6691100, episode=1116 reward=0.8021981 (466.41 it/sec) -training >> step=6691200, episode=1116 reward=0.792687 (501.05 it/sec) -training >> step=6691300, episode=1116 reward=0.7975867 (509.69 it/sec) -training >> step=6691400, episode=1116 reward=0.7717414 (497.46 it/sec) -training >> step=6691500, episode=1116 reward=0.7670295 (529.97 it/sec) -training >> step=6691600, episode=1116 reward=0.7802088 (471.50 it/sec) -training >> step=6691700, episode=1116 reward=0.7958875 (408.19 it/sec) -training >> step=6691800, episode=1116 reward=0.8011559 (470.53 it/sec) -training >> step=6691900, episode=1116 reward=0.7940624 (475.03 it/sec) -training >> step=6692000, episode=1116 reward=0.7718019 (529.24 it/sec) -training >> step=6692100, episode=1116 reward=0.7800882 (504.83 it/sec) -training >> step=6692200, episode=1116 reward=0.8042716 (462.77 it/sec) -training >> step=6692300, episode=1116 reward=0.78541 (445.86 it/sec) -training >> step=6692400, episode=1116 reward=0.7720008 (467.55 it/sec) -training >> step=6692500, episode=1116 reward=0.7881585 (466.55 it/sec) -training >> step=6692600, episode=1116 reward=0.774925 (465.59 it/sec) -training >> step=6692700, episode=1116 reward=0.7831529 (463.93 it/sec) -training >> step=6692800, episode=1116 reward=0.7998359 (542.84 it/sec) -training >> step=6692900, episode=1116 reward=0.7917053 (498.58 it/sec) -training >> step=6693000, episode=1116 reward=0.7837854 (500.51 it/sec) -training >> step=6693100, episode=1116 reward=0.7807001 (463.04 it/sec) -training >> step=6693200, episode=1116 reward=0.7767707 (474.85 it/sec) -training >> step=6693300, episode=1116 reward=0.807578 (468.59 it/sec) -training >> step=6693400, episode=1116 reward=0.7827384 (507.15 it/sec) -training >> step=6693500, episode=1116 reward=0.7865838 (501.13 it/sec) -training >> step=6693600, episode=1116 reward=0.7922011 (491.27 it/sec) -training >> step=6693700, episode=1116 reward=0.7867927 (474.05 it/sec) -training >> step=6693800, episode=1116 reward=0.7845994 (500.82 it/sec) -training >> step=6693900, episode=1116 reward=0.7715995 (499.73 it/sec) -training >> step=6694000, episode=1116 reward=0.7877772 (481.90 it/sec) -training >> step=6694100, episode=1116 reward=0.7682725 (444.09 it/sec) -training >> step=6694200, episode=1116 reward=0.7718933 (521.42 it/sec) -training >> step=6694300, episode=1116 reward=0.7927369 (499.25 it/sec) -training >> step=6694400, episode=1116 reward=0.7807965 (490.94 it/sec) -training >> step=6694500, episode=1116 reward=0.7794688 (483.80 it/sec) -training >> step=6694600, episode=1116 reward=0.7853165 (492.30 it/sec) -training >> step=6694700, episode=1116 reward=0.788895 (505.61 it/sec) -training >> step=6694800, episode=1116 reward=0.7987961 (503.52 it/sec) -training >> step=6694900, episode=1116 reward=0.790374 (494.38 it/sec) -training >> step=6695000, episode=1116 reward=0.7582883 (510.81 it/sec) -training >> step=6695100, episode=1116 reward=0.7687396 (489.00 it/sec) -training >> step=6695200, episode=1116 reward=0.7818139 (487.92 it/sec) -training >> step=6695300, episode=1117 reward=0.7979788 (93.93 it/sec) -training >> step=6695400, episode=1117 reward=0.7761375 (473.09 it/sec) -training >> step=6695500, episode=1117 reward=0.8068853 (478.03 it/sec) -training >> step=6695600, episode=1117 reward=0.7627518 (479.63 it/sec) -training >> step=6695700, episode=1117 reward=0.8072937 (511.56 it/sec) -training >> step=6695800, episode=1117 reward=0.7842463 (493.52 it/sec) -training >> step=6695900, episode=1117 reward=0.8027525 (457.82 it/sec) -training >> step=6696000, episode=1117 reward=0.7751893 (481.58 it/sec) -training >> step=6696100, episode=1117 reward=0.7713466 (459.50 it/sec) -training >> step=6696200, episode=1117 reward=0.7868693 (462.28 it/sec) -training >> step=6696300, episode=1117 reward=0.7977517 (507.04 it/sec) -training >> step=6696400, episode=1117 reward=0.7836403 (484.27 it/sec) -training >> step=6696500, episode=1117 reward=0.7736034 (479.87 it/sec) -training >> step=6696600, episode=1117 reward=0.7559311 (454.01 it/sec) -training >> step=6696700, episode=1117 reward=0.7814295 (511.27 it/sec) -training >> step=6696800, episode=1117 reward=0.7885207 (526.48 it/sec) -training >> step=6696900, episode=1117 reward=0.7818466 (443.36 it/sec) -training >> step=6697000, episode=1117 reward=0.7747695 (513.36 it/sec) -training >> step=6697100, episode=1117 reward=0.7823094 (472.93 it/sec) -training >> step=6697200, episode=1117 reward=0.7861234 (477.74 it/sec) -training >> step=6697300, episode=1117 reward=0.8065105 (481.55 it/sec) -training >> step=6697400, episode=1117 reward=0.7737933 (512.75 it/sec) -training >> step=6697500, episode=1117 reward=0.7873557 (497.09 it/sec) -training >> step=6697600, episode=1117 reward=0.7997702 (470.25 it/sec) -training >> step=6697700, episode=1117 reward=0.8110569 (502.01 it/sec) -training >> step=6697800, episode=1117 reward=0.7819242 (550.58 it/sec) -training >> step=6697900, episode=1117 reward=0.7885958 (504.45 it/sec) -training >> step=6698000, episode=1117 reward=0.7999968 (509.85 it/sec) -training >> step=6698100, episode=1117 reward=0.7602565 (503.12 it/sec) -training >> step=6698200, episode=1117 reward=0.7952069 (455.04 it/sec) -training >> step=6698300, episode=1117 reward=0.7850508 (487.16 it/sec) -training >> step=6698400, episode=1117 reward=0.7884299 (471.89 it/sec) -training >> step=6698500, episode=1117 reward=0.770798 (502.86 it/sec) -training >> step=6698600, episode=1117 reward=0.7944007 (412.66 it/sec) -training >> step=6698700, episode=1117 reward=0.797109 (430.65 it/sec) -training >> step=6698800, episode=1117 reward=0.7904773 (495.72 it/sec) -training >> step=6698900, episode=1117 reward=0.8227889 (505.71 it/sec) -training >> step=6699000, episode=1117 reward=0.7761209 (469.24 it/sec) -training >> step=6699100, episode=1117 reward=0.7829399 (483.86 it/sec) -training >> step=6699200, episode=1117 reward=0.7770795 (531.11 it/sec) -training >> step=6699300, episode=1117 reward=0.7812187 (510.35 it/sec) -training >> step=6699400, episode=1117 reward=0.7829677 (489.66 it/sec) -training >> step=6699500, episode=1117 reward=0.7874027 (480.05 it/sec) -training >> step=6699600, episode=1117 reward=0.7696316 (496.64 it/sec) -training >> step=6699700, episode=1117 reward=0.7905312 (470.71 it/sec) -training >> step=6699800, episode=1117 reward=0.7776709 (444.53 it/sec) -training >> step=6699900, episode=1117 reward=0.7609963 (468.63 it/sec) -training >> step=6700000, episode=1117 reward=0.8012764 (473.89 it/sec) -training >> step=6700100, episode=1117 reward=0.7872576 (429.47 it/sec) -training >> step=6700200, episode=1117 reward=0.777309 (454.31 it/sec) -training >> step=6700300, episode=1117 reward=0.7801642 (525.50 it/sec) -training >> step=6700400, episode=1117 reward=0.7772754 (456.47 it/sec) -training >> step=6700500, episode=1117 reward=0.7818908 (468.72 it/sec) -training >> step=6700600, episode=1117 reward=0.7776468 (457.27 it/sec) -training >> step=6700700, episode=1117 reward=0.7712613 (484.16 it/sec) -training >> step=6700800, episode=1117 reward=0.7874551 (465.91 it/sec) -training >> step=6700900, episode=1117 reward=0.7677934 (485.08 it/sec) -training >> step=6701000, episode=1117 reward=0.7999874 (495.35 it/sec) -training >> step=6701100, episode=1117 reward=0.7985022 (455.04 it/sec) -training >> step=6701200, episode=1117 reward=0.7883373 (463.18 it/sec) -training >> step=6701300, episode=1118 reward=0.7962018 (99.92 it/sec) -training >> step=6701400, episode=1118 reward=0.7539165 (497.95 it/sec) -training >> step=6701500, episode=1118 reward=0.7966059 (498.62 it/sec) -training >> step=6701600, episode=1118 reward=0.7861323 (503.86 it/sec) -training >> step=6701700, episode=1118 reward=0.8131666 (430.14 it/sec) -training >> step=6701800, episode=1118 reward=0.7941087 (460.28 it/sec) -training >> step=6701900, episode=1118 reward=0.7997843 (487.64 it/sec) -training >> step=6702000, episode=1118 reward=0.7744642 (516.20 it/sec) -training >> step=6702100, episode=1118 reward=0.7870874 (536.20 it/sec) -training >> step=6702200, episode=1118 reward=0.8002303 (385.86 it/sec) -training >> step=6702300, episode=1118 reward=0.7901447 (419.82 it/sec) -training >> step=6702400, episode=1118 reward=0.7780967 (466.66 it/sec) -training >> step=6702500, episode=1118 reward=0.8001248 (424.05 it/sec) -training >> step=6702600, episode=1118 reward=0.8159372 (455.04 it/sec) -training >> step=6702700, episode=1118 reward=0.7870226 (488.81 it/sec) -training >> step=6702800, episode=1118 reward=0.7704147 (478.36 it/sec) -training >> step=6702900, episode=1118 reward=0.79838 (424.77 it/sec) -training >> step=6703000, episode=1118 reward=0.7779253 (434.60 it/sec) -training >> step=6703100, episode=1118 reward=0.7872138 (479.34 it/sec) -training >> step=6703200, episode=1118 reward=0.7855565 (440.69 it/sec) -training >> step=6703300, episode=1118 reward=0.7886803 (467.21 it/sec) -training >> step=6703400, episode=1118 reward=0.7768501 (492.39 it/sec) -training >> step=6703500, episode=1118 reward=0.7881199 (469.74 it/sec) -training >> step=6703600, episode=1118 reward=0.7735155 (405.51 it/sec) -training >> step=6703700, episode=1118 reward=0.7980714 (475.38 it/sec) -training >> step=6703800, episode=1118 reward=0.7906049 (495.08 it/sec) -training >> step=6703900, episode=1118 reward=0.7904328 (468.38 it/sec) -training >> step=6704000, episode=1118 reward=0.7815945 (466.37 it/sec) -training >> step=6704100, episode=1118 reward=0.7857385 (477.81 it/sec) -training >> step=6704200, episode=1118 reward=0.788928 (465.29 it/sec) -training >> step=6704300, episode=1118 reward=0.8006174 (484.85 it/sec) -training >> step=6704400, episode=1118 reward=0.8047229 (482.33 it/sec) -training >> step=6704500, episode=1118 reward=0.7862406 (438.11 it/sec) -training >> step=6704600, episode=1118 reward=0.774426 (437.43 it/sec) -training >> step=6704700, episode=1118 reward=0.7728025 (486.01 it/sec) -training >> step=6704800, episode=1118 reward=0.7790445 (463.36 it/sec) -training >> step=6704900, episode=1118 reward=0.8020098 (486.02 it/sec) -training >> step=6705000, episode=1118 reward=0.7835423 (470.96 it/sec) -training >> step=6705100, episode=1118 reward=0.7865992 (431.53 it/sec) -training >> step=6705200, episode=1118 reward=0.7737405 (495.87 it/sec) -training >> step=6705300, episode=1118 reward=0.7794718 (489.16 it/sec) -training >> step=6705400, episode=1118 reward=0.7861423 (482.09 it/sec) -training >> step=6705500, episode=1118 reward=0.7865767 (458.55 it/sec) -training >> step=6705600, episode=1118 reward=0.7851202 (499.08 it/sec) -training >> step=6705700, episode=1118 reward=0.7860711 (426.00 it/sec) -training >> step=6705800, episode=1118 reward=0.7869599 (452.07 it/sec) -training >> step=6705900, episode=1118 reward=0.8026831 (499.01 it/sec) -training >> step=6706000, episode=1118 reward=0.8078305 (477.57 it/sec) -training >> step=6706100, episode=1118 reward=0.7913584 (485.71 it/sec) -training >> step=6706200, episode=1118 reward=0.7906027 (466.09 it/sec) -training >> step=6706300, episode=1118 reward=0.7993934 (505.33 it/sec) -training >> step=6706400, episode=1118 reward=0.7750454 (461.33 it/sec) -training >> step=6706500, episode=1118 reward=0.7834278 (454.30 it/sec) -training >> step=6706600, episode=1118 reward=0.786727 (487.76 it/sec) -training >> step=6706700, episode=1118 reward=0.7881678 (498.29 it/sec) -training >> step=6706800, episode=1118 reward=0.7650967 (480.71 it/sec) -training >> step=6706900, episode=1118 reward=0.7956845 (454.90 it/sec) -training >> step=6707000, episode=1118 reward=0.7529508 (483.33 it/sec) -training >> step=6707100, episode=1118 reward=0.7828456 (480.22 it/sec) -training >> step=6707200, episode=1118 reward=0.7961915 (491.58 it/sec) -training >> step=6707300, episode=1119 reward=0.7740889 (86.28 it/sec) -training >> step=6707400, episode=1119 reward=0.7933098 (474.94 it/sec) -training >> step=6707500, episode=1119 reward=0.7712469 (427.47 it/sec) -training >> step=6707600, episode=1119 reward=0.7692682 (472.01 it/sec) -training >> step=6707700, episode=1119 reward=0.7959942 (498.26 it/sec) -training >> step=6707800, episode=1119 reward=0.7653442 (474.79 it/sec) -training >> step=6707900, episode=1119 reward=0.7858632 (492.26 it/sec) -training >> step=6708000, episode=1119 reward=0.7937648 (488.05 it/sec) -training >> step=6708100, episode=1119 reward=0.8143299 (462.65 it/sec) -training >> step=6708200, episode=1119 reward=0.7873912 (468.59 it/sec) -training >> step=6708300, episode=1119 reward=0.7942207 (470.69 it/sec) -training >> step=6708400, episode=1119 reward=0.7769871 (471.59 it/sec) -training >> step=6708500, episode=1119 reward=0.7960104 (448.39 it/sec) -training >> step=6708600, episode=1119 reward=0.7913135 (446.89 it/sec) -training >> step=6708700, episode=1119 reward=0.7999158 (490.40 it/sec) -training >> step=6708800, episode=1119 reward=0.7890788 (478.51 it/sec) -training >> step=6708900, episode=1119 reward=0.7791497 (429.94 it/sec) -training >> step=6709000, episode=1119 reward=0.7857316 (497.87 it/sec) -training >> step=6709100, episode=1119 reward=0.807929 (494.94 it/sec) -training >> step=6709200, episode=1119 reward=0.7829217 (469.61 it/sec) -training >> step=6709300, episode=1119 reward=0.7881665 (454.64 it/sec) -training >> step=6709400, episode=1119 reward=0.7832173 (447.01 it/sec) -training >> step=6709500, episode=1119 reward=0.7880484 (469.81 it/sec) -training >> step=6709600, episode=1119 reward=0.7830329 (499.56 it/sec) -training >> step=6709700, episode=1119 reward=0.8076352 (478.21 it/sec) -training >> step=6709800, episode=1119 reward=0.8050986 (492.06 it/sec) -training >> step=6709900, episode=1119 reward=0.8018963 (442.16 it/sec) -training >> step=6710000, episode=1119 reward=0.7859365 (444.99 it/sec) -training >> step=6710100, episode=1119 reward=0.784465 (483.03 it/sec) -training >> step=6710200, episode=1119 reward=0.7719013 (467.99 it/sec) -training >> step=6710300, episode=1119 reward=0.7833842 (478.65 it/sec) -training >> step=6710400, episode=1119 reward=0.8153226 (401.32 it/sec) -training >> step=6710500, episode=1119 reward=0.7693611 (433.40 it/sec) -training >> step=6710600, episode=1119 reward=0.7819455 (439.24 it/sec) -training >> step=6710700, episode=1119 reward=0.7992364 (496.12 it/sec) -training >> step=6710800, episode=1119 reward=0.7807854 (446.62 it/sec) -training >> step=6710900, episode=1119 reward=0.7760486 (440.72 it/sec) -training >> step=6711000, episode=1119 reward=0.7918164 (467.27 it/sec) -training >> step=6711100, episode=1119 reward=0.8108619 (451.75 it/sec) -training >> step=6711200, episode=1119 reward=0.791163 (500.97 it/sec) -training >> step=6711300, episode=1119 reward=0.8001735 (460.57 it/sec) -training >> step=6711400, episode=1119 reward=0.806522 (400.07 it/sec) -training >> step=6711500, episode=1119 reward=0.7816502 (460.71 it/sec) -training >> step=6711600, episode=1119 reward=0.8033563 (479.34 it/sec) -training >> step=6711700, episode=1119 reward=0.7894131 (423.37 it/sec) -training >> step=6711800, episode=1119 reward=0.809822 (403.78 it/sec) -training >> step=6711900, episode=1119 reward=0.7974073 (477.13 it/sec) -training >> step=6712000, episode=1119 reward=0.7837924 (494.82 it/sec) -training >> step=6712100, episode=1119 reward=0.7754812 (482.24 it/sec) -training >> step=6712200, episode=1119 reward=0.780636 (457.60 it/sec) -training >> step=6712300, episode=1119 reward=0.8028426 (521.16 it/sec) -training >> step=6712400, episode=1119 reward=0.785126 (521.46 it/sec) -training >> step=6712500, episode=1119 reward=0.7880806 (507.67 it/sec) -training >> step=6712600, episode=1119 reward=0.7868689 (509.14 it/sec) -training >> step=6712700, episode=1119 reward=0.7777601 (517.77 it/sec) -training >> step=6712800, episode=1119 reward=0.7735653 (476.74 it/sec) -training >> step=6712900, episode=1119 reward=0.787859 (495.71 it/sec) -training >> step=6713000, episode=1119 reward=0.7858053 (510.16 it/sec) -training >> step=6713100, episode=1119 reward=0.7819306 (503.72 it/sec) -training >> step=6713200, episode=1119 reward=0.7728621 (504.60 it/sec) -training >> step=6713300, episode=1120 reward=0.7879028 (109.91 it/sec) -training >> step=6713400, episode=1120 reward=0.797299 (516.12 it/sec) -training >> step=6713500, episode=1120 reward=0.7692317 (442.12 it/sec) -training >> step=6713600, episode=1120 reward=0.7915957 (439.46 it/sec) -training >> step=6713700, episode=1120 reward=0.7814951 (490.45 it/sec) -training >> step=6713800, episode=1120 reward=0.7870265 (450.85 it/sec) -training >> step=6713900, episode=1120 reward=0.7842159 (477.59 it/sec) -training >> step=6714000, episode=1120 reward=0.7696757 (486.05 it/sec) -training >> step=6714100, episode=1120 reward=0.7917262 (503.55 it/sec) -training >> step=6714200, episode=1120 reward=0.7892874 (538.28 it/sec) -training >> step=6714300, episode=1120 reward=0.8013815 (439.13 it/sec) -training >> step=6714400, episode=1120 reward=0.7743675 (421.27 it/sec) -training >> step=6714500, episode=1120 reward=0.7920581 (474.17 it/sec) -training >> step=6714600, episode=1120 reward=0.7933792 (508.36 it/sec) -training >> step=6714700, episode=1120 reward=0.784265 (461.41 it/sec) -training >> step=6714800, episode=1120 reward=0.7848515 (485.17 it/sec) -training >> step=6714900, episode=1120 reward=0.7876651 (502.58 it/sec) -training >> step=6715000, episode=1120 reward=0.790005 (486.25 it/sec) -training >> step=6715100, episode=1120 reward=0.7872671 (471.79 it/sec) -training >> step=6715200, episode=1120 reward=0.7718741 (526.26 it/sec) -training >> step=6715300, episode=1120 reward=0.7852405 (519.25 it/sec) -training >> step=6715400, episode=1120 reward=0.7894135 (506.79 it/sec) -training >> step=6715500, episode=1120 reward=0.7733859 (493.70 it/sec) -training >> step=6715600, episode=1120 reward=0.784457 (519.43 it/sec) -training >> step=6715700, episode=1120 reward=0.7841064 (482.07 it/sec) -training >> step=6715800, episode=1120 reward=0.7923756 (528.06 it/sec) -training >> step=6715900, episode=1120 reward=0.7705524 (509.45 it/sec) -training >> step=6716000, episode=1120 reward=0.7929319 (515.72 it/sec) -training >> step=6716100, episode=1120 reward=0.7787428 (424.99 it/sec) -training >> step=6716200, episode=1120 reward=0.7982679 (506.68 it/sec) -training >> step=6716300, episode=1120 reward=0.7828651 (502.67 it/sec) -training >> step=6716400, episode=1120 reward=0.7832488 (457.39 it/sec) -training >> step=6716500, episode=1120 reward=0.8026995 (452.94 it/sec) -training >> step=6716600, episode=1120 reward=0.7877601 (510.71 it/sec) -training >> step=6716700, episode=1120 reward=0.7834557 (502.72 it/sec) -training >> step=6716800, episode=1120 reward=0.7657654 (513.34 it/sec) -training >> step=6716900, episode=1120 reward=0.7917265 (489.62 it/sec) -training >> step=6717000, episode=1120 reward=0.7992514 (471.28 it/sec) -training >> step=6717100, episode=1120 reward=0.7681519 (523.37 it/sec) -training >> step=6717200, episode=1120 reward=0.7902765 (497.12 it/sec) -training >> step=6717300, episode=1120 reward=0.8035509 (512.53 it/sec) -training >> step=6717400, episode=1120 reward=0.7911229 (520.09 it/sec) -training >> step=6717500, episode=1120 reward=0.7901397 (461.34 it/sec) -training >> step=6717600, episode=1120 reward=0.7900148 (486.07 it/sec) -training >> step=6717700, episode=1120 reward=0.7692049 (431.42 it/sec) -training >> step=6717800, episode=1120 reward=0.7845985 (494.13 it/sec) -training >> step=6717900, episode=1120 reward=0.7906328 (444.90 it/sec) -training >> step=6718000, episode=1120 reward=0.7756767 (497.18 it/sec) -training >> step=6718100, episode=1120 reward=0.7923539 (501.31 it/sec) -training >> step=6718200, episode=1120 reward=0.7984062 (488.14 it/sec) -training >> step=6718300, episode=1120 reward=0.7828146 (499.82 it/sec) -training >> step=6718400, episode=1120 reward=0.7881914 (487.91 it/sec) -training >> step=6718500, episode=1120 reward=0.7638351 (551.76 it/sec) -training >> step=6718600, episode=1120 reward=0.7944782 (452.28 it/sec) -training >> step=6718700, episode=1120 reward=0.8031253 (514.25 it/sec) -training >> step=6718800, episode=1120 reward=0.7919309 (564.46 it/sec) -training >> step=6718900, episode=1120 reward=0.7846125 (571.73 it/sec) -training >> step=6719000, episode=1120 reward=0.772596 (498.83 it/sec) -training >> step=6719100, episode=1120 reward=0.7798998 (521.14 it/sec) -training >> step=6719200, episode=1120 reward=0.782873 (570.72 it/sec) -training >> step=6719300, episode=1121 reward=0.7739118 (112.09 it/sec) -training >> step=6719400, episode=1121 reward=0.7812988 (509.85 it/sec) -training >> step=6719500, episode=1121 reward=0.7805434 (504.70 it/sec) -training >> step=6719600, episode=1121 reward=0.8064114 (559.17 it/sec) -training >> step=6719700, episode=1121 reward=0.7535679 (485.52 it/sec) -training >> step=6719800, episode=1121 reward=0.7741323 (514.22 it/sec) -training >> step=6719900, episode=1121 reward=0.7700799 (546.23 it/sec) -training >> step=6720000, episode=1121 reward=0.7788986 (511.20 it/sec) -training >> step=6720100, episode=1121 reward=0.7821334 (499.01 it/sec) -training >> step=6720200, episode=1121 reward=0.8034883 (475.40 it/sec) -training >> step=6720300, episode=1121 reward=0.779218 (507.66 it/sec) -training >> step=6720400, episode=1121 reward=0.8089903 (507.22 it/sec) -training >> step=6720500, episode=1121 reward=0.7942253 (483.47 it/sec) -training >> step=6720600, episode=1121 reward=0.7815441 (514.58 it/sec) -training >> step=6720700, episode=1121 reward=0.7852188 (479.22 it/sec) -training >> step=6720800, episode=1121 reward=0.7746969 (398.64 it/sec) -training >> step=6720900, episode=1121 reward=0.7840958 (484.10 it/sec) -training >> step=6721000, episode=1121 reward=0.7809191 (558.12 it/sec) -training >> step=6721100, episode=1121 reward=0.7825751 (439.24 it/sec) -training >> step=6721200, episode=1121 reward=0.7888236 (425.67 it/sec) -training >> step=6721300, episode=1121 reward=0.7830103 (451.64 it/sec) -training >> step=6721400, episode=1121 reward=0.7900184 (529.96 it/sec) -training >> step=6721500, episode=1121 reward=0.7914906 (532.54 it/sec) -training >> step=6721600, episode=1121 reward=0.8005978 (540.62 it/sec) -training >> step=6721700, episode=1121 reward=0.7917196 (528.21 it/sec) -training >> step=6721800, episode=1121 reward=0.7737147 (505.82 it/sec) -training >> step=6721900, episode=1121 reward=0.790092 (526.51 it/sec) -training >> step=6722000, episode=1121 reward=0.7777502 (528.07 it/sec) -training >> step=6722100, episode=1121 reward=0.7900468 (555.97 it/sec) -training >> step=6722200, episode=1121 reward=0.7961609 (506.38 it/sec) -training >> step=6722300, episode=1121 reward=0.7904012 (480.45 it/sec) -training >> step=6722400, episode=1121 reward=0.7784189 (500.67 it/sec) -training >> step=6722500, episode=1121 reward=0.7771416 (518.06 it/sec) -training >> step=6722600, episode=1121 reward=0.792661 (529.32 it/sec) -training >> step=6722700, episode=1121 reward=0.7832758 (523.53 it/sec) -training >> step=6722800, episode=1121 reward=0.7669337 (491.99 it/sec) -training >> step=6722900, episode=1121 reward=0.8098439 (499.79 it/sec) -training >> step=6723000, episode=1121 reward=0.7804022 (525.32 it/sec) -training >> step=6723100, episode=1121 reward=0.7986608 (480.56 it/sec) -training >> step=6723200, episode=1121 reward=0.7864226 (533.27 it/sec) -training >> step=6723300, episode=1121 reward=0.8074002 (493.99 it/sec) -training >> step=6723400, episode=1121 reward=0.7853584 (429.02 it/sec) -training >> step=6723500, episode=1121 reward=0.7687436 (528.81 it/sec) -training >> step=6723600, episode=1121 reward=0.77523 (508.95 it/sec) -training >> step=6723700, episode=1121 reward=0.7950275 (505.72 it/sec) -training >> step=6723800, episode=1121 reward=0.78927 (497.83 it/sec) -training >> step=6723900, episode=1121 reward=0.7772003 (412.41 it/sec) -training >> step=6724000, episode=1121 reward=0.7933727 (474.16 it/sec) -training >> step=6724100, episode=1121 reward=0.7888548 (505.24 it/sec) -training >> step=6724200, episode=1121 reward=0.7832333 (504.30 it/sec) -training >> step=6724300, episode=1121 reward=0.7921457 (530.82 it/sec) -training >> step=6724400, episode=1121 reward=0.7726491 (452.54 it/sec) -training >> step=6724500, episode=1121 reward=0.7763361 (464.49 it/sec) -training >> step=6724600, episode=1121 reward=0.7945982 (517.51 it/sec) -training >> step=6724700, episode=1121 reward=0.78865 (515.47 it/sec) -training >> step=6724800, episode=1121 reward=0.7826381 (479.58 it/sec) -training >> step=6724900, episode=1121 reward=0.787925 (430.86 it/sec) -training >> step=6725000, episode=1121 reward=0.7874777 (454.39 it/sec) -training >> step=6725100, episode=1121 reward=0.796865 (467.87 it/sec) -training >> step=6725200, episode=1121 reward=0.7821778 (479.36 it/sec) -training >> step=6725300, episode=1122 reward=0.7830042 (74.34 it/sec) -training >> step=6725400, episode=1122 reward=0.7700501 (432.06 it/sec) -training >> step=6725500, episode=1122 reward=0.7658218 (416.16 it/sec) -training >> step=6725600, episode=1122 reward=0.7835342 (487.70 it/sec) -training >> step=6725700, episode=1122 reward=0.8039604 (509.21 it/sec) -training >> step=6725800, episode=1122 reward=0.7696159 (494.63 it/sec) -training >> step=6725900, episode=1122 reward=0.7849736 (411.80 it/sec) -training >> step=6726000, episode=1122 reward=0.7627981 (525.03 it/sec) -training >> step=6726100, episode=1122 reward=0.7992964 (522.52 it/sec) -training >> step=6726200, episode=1122 reward=0.796153 (502.50 it/sec) -training >> step=6726300, episode=1122 reward=0.769931 (501.07 it/sec) -training >> step=6726400, episode=1122 reward=0.8036095 (483.23 it/sec) -training >> step=6726500, episode=1122 reward=0.777715 (490.15 it/sec) -training >> step=6726600, episode=1122 reward=0.7869561 (551.03 it/sec) -training >> step=6726700, episode=1122 reward=0.7901717 (481.31 it/sec) -training >> step=6726800, episode=1122 reward=0.7792617 (481.51 it/sec) -training >> step=6726900, episode=1122 reward=0.7712302 (484.86 it/sec) -training >> step=6727000, episode=1122 reward=0.8005471 (489.63 it/sec) -training >> step=6727100, episode=1122 reward=0.7831722 (493.21 it/sec) -training >> step=6727200, episode=1122 reward=0.7812136 (498.61 it/sec) -training >> step=6727300, episode=1122 reward=0.7877877 (523.72 it/sec) -training >> step=6727400, episode=1122 reward=0.7831869 (458.50 it/sec) -training >> step=6727500, episode=1122 reward=0.7737854 (507.41 it/sec) -training >> step=6727600, episode=1122 reward=0.7917627 (537.39 it/sec) -training >> step=6727700, episode=1122 reward=0.7822473 (547.17 it/sec) -training >> step=6727800, episode=1122 reward=0.7910648 (530.48 it/sec) -training >> step=6727900, episode=1122 reward=0.7930171 (525.04 it/sec) -training >> step=6728000, episode=1122 reward=0.7691113 (459.26 it/sec) -training >> step=6728100, episode=1122 reward=0.7908098 (540.82 it/sec) -training >> step=6728200, episode=1122 reward=0.7812835 (550.14 it/sec) -training >> step=6728300, episode=1122 reward=0.8021809 (537.01 it/sec) -training >> step=6728400, episode=1122 reward=0.7916638 (530.94 it/sec) -training >> step=6728500, episode=1122 reward=0.7945573 (496.70 it/sec) -training >> step=6728600, episode=1122 reward=0.7895135 (517.92 it/sec) -training >> step=6728700, episode=1122 reward=0.7839139 (520.02 it/sec) -training >> step=6728800, episode=1122 reward=0.7800101 (524.77 it/sec) -training >> step=6728900, episode=1122 reward=0.7857193 (525.75 it/sec) -training >> step=6729000, episode=1122 reward=0.7922797 (515.58 it/sec) -training >> step=6729100, episode=1122 reward=0.790871 (521.38 it/sec) -training >> step=6729200, episode=1122 reward=0.78385 (527.76 it/sec) -training >> step=6729300, episode=1122 reward=0.805762 (516.24 it/sec) -training >> step=6729400, episode=1122 reward=0.764423 (497.66 it/sec) -training >> step=6729500, episode=1122 reward=0.7779106 (506.46 it/sec) -training >> step=6729600, episode=1122 reward=0.7851711 (473.99 it/sec) -training >> step=6729700, episode=1122 reward=0.7860058 (514.02 it/sec) -training >> step=6729800, episode=1122 reward=0.7934365 (543.15 it/sec) -training >> step=6729900, episode=1122 reward=0.7782798 (496.41 it/sec) -training >> step=6730000, episode=1122 reward=0.7837119 (503.11 it/sec) -training >> step=6730100, episode=1122 reward=0.7908394 (527.49 it/sec) -training >> step=6730200, episode=1122 reward=0.7770857 (532.37 it/sec) -training >> step=6730300, episode=1122 reward=0.7679034 (492.56 it/sec) -training >> step=6730400, episode=1122 reward=0.7608845 (492.71 it/sec) -training >> step=6730500, episode=1122 reward=0.7716163 (498.84 it/sec) -training >> step=6730600, episode=1122 reward=0.7880991 (440.27 it/sec) -training >> step=6730700, episode=1122 reward=0.7811011 (459.24 it/sec) -training >> step=6730800, episode=1122 reward=0.7970635 (514.96 it/sec) -training >> step=6730900, episode=1122 reward=0.8025149 (520.80 it/sec) -training >> step=6731000, episode=1122 reward=0.7812517 (540.45 it/sec) -training >> step=6731100, episode=1122 reward=0.7861202 (464.82 it/sec) -training >> step=6731200, episode=1122 reward=0.8045937 (530.12 it/sec) -training >> step=6731300, episode=1123 reward=0.8009287 (45.52 it/sec) -training >> step=6731400, episode=1123 reward=0.7764711 (514.03 it/sec) -training >> step=6731500, episode=1123 reward=0.7925789 (443.63 it/sec) -training >> step=6731600, episode=1123 reward=0.7832513 (482.92 it/sec) -training >> step=6731700, episode=1123 reward=0.7892037 (451.57 it/sec) -training >> step=6731800, episode=1123 reward=0.7905583 (535.89 it/sec) -training >> step=6731900, episode=1123 reward=0.7675049 (499.93 it/sec) -training >> step=6732000, episode=1123 reward=0.7688931 (502.30 it/sec) -training >> step=6732100, episode=1123 reward=0.7760584 (525.62 it/sec) -training >> step=6732200, episode=1123 reward=0.7995679 (502.64 it/sec) -training >> step=6732300, episode=1123 reward=0.784985 (504.87 it/sec) -training >> step=6732400, episode=1123 reward=0.7954594 (517.32 it/sec) -training >> step=6732500, episode=1123 reward=0.7959645 (500.71 it/sec) -training >> step=6732600, episode=1123 reward=0.7995384 (532.70 it/sec) -training >> step=6732700, episode=1123 reward=0.7809358 (472.84 it/sec) -training >> step=6732800, episode=1123 reward=0.7698426 (495.24 it/sec) -training >> step=6732900, episode=1123 reward=0.77912 (456.65 it/sec) -training >> step=6733000, episode=1123 reward=0.7877064 (465.21 it/sec) -training >> step=6733100, episode=1123 reward=0.7832257 (503.15 it/sec) -training >> step=6733200, episode=1123 reward=0.7919998 (488.76 it/sec) -training >> step=6733300, episode=1123 reward=0.7916043 (509.83 it/sec) -training >> step=6733400, episode=1123 reward=0.7789384 (478.14 it/sec) -training >> step=6733500, episode=1123 reward=0.7759853 (521.25 it/sec) -training >> step=6733600, episode=1123 reward=0.7933744 (456.74 it/sec) -training >> step=6733700, episode=1123 reward=0.8015552 (470.54 it/sec) -training >> step=6733800, episode=1123 reward=0.7828613 (490.50 it/sec) -training >> step=6733900, episode=1123 reward=0.7846183 (520.91 it/sec) -training >> step=6734000, episode=1123 reward=0.7768398 (486.23 it/sec) -training >> step=6734100, episode=1123 reward=0.7994912 (474.05 it/sec) -training >> step=6734200, episode=1123 reward=0.7681674 (456.99 it/sec) -training >> step=6734300, episode=1123 reward=0.8099995 (499.53 it/sec) -training >> step=6734400, episode=1123 reward=0.7966793 (405.63 it/sec) -training >> step=6734500, episode=1123 reward=0.7987855 (456.71 it/sec) -training >> step=6734600, episode=1123 reward=0.7893748 (505.17 it/sec) -training >> step=6734700, episode=1123 reward=0.7965041 (459.75 it/sec) -training >> step=6734800, episode=1123 reward=0.8048688 (499.82 it/sec) -training >> step=6734900, episode=1123 reward=0.7722848 (493.24 it/sec) -training >> step=6735000, episode=1123 reward=0.8019775 (437.77 it/sec) -training >> step=6735100, episode=1123 reward=0.7906851 (457.49 it/sec) -training >> step=6735200, episode=1123 reward=0.7805663 (460.10 it/sec) -training >> step=6735300, episode=1123 reward=0.7945629 (561.68 it/sec) -training >> step=6735400, episode=1123 reward=0.8079034 (551.54 it/sec) -training >> step=6735500, episode=1123 reward=0.8035035 (513.89 it/sec) -training >> step=6735600, episode=1123 reward=0.7786972 (482.08 it/sec) -training >> step=6735700, episode=1123 reward=0.769751 (517.46 it/sec) -training >> step=6735800, episode=1123 reward=0.7979308 (507.42 it/sec) -training >> step=6735900, episode=1123 reward=0.7855371 (521.01 it/sec) -training >> step=6736000, episode=1123 reward=0.783989 (505.76 it/sec) -training >> step=6736100, episode=1123 reward=0.7860031 (515.30 it/sec) -training >> step=6736200, episode=1123 reward=0.7902864 (455.94 it/sec) -training >> step=6736300, episode=1123 reward=0.7848278 (501.55 it/sec) -training >> step=6736400, episode=1123 reward=0.7720041 (523.94 it/sec) -training >> step=6736500, episode=1123 reward=0.7886516 (504.72 it/sec) -training >> step=6736600, episode=1123 reward=0.8014774 (511.70 it/sec) -training >> step=6736700, episode=1123 reward=0.7744787 (497.22 it/sec) -training >> step=6736800, episode=1123 reward=0.8148261 (544.90 it/sec) -training >> step=6736900, episode=1123 reward=0.7916808 (506.85 it/sec) -training >> step=6737000, episode=1123 reward=0.7761915 (506.71 it/sec) -training >> step=6737100, episode=1123 reward=0.7659001 (506.32 it/sec) -training >> step=6737200, episode=1123 reward=0.8144368 (517.08 it/sec) -training >> step=6737300, episode=1124 reward=0.7892725 (44.32 it/sec) -training >> step=6737400, episode=1124 reward=0.767293 (533.22 it/sec) -training >> step=6737500, episode=1124 reward=0.7823825 (487.60 it/sec) -training >> step=6737600, episode=1124 reward=0.8148335 (492.25 it/sec) -training >> step=6737700, episode=1124 reward=0.7648478 (513.34 it/sec) -training >> step=6737800, episode=1124 reward=0.7873594 (530.83 it/sec) -training >> step=6737900, episode=1124 reward=0.7904303 (517.90 it/sec) -training >> step=6738000, episode=1124 reward=0.7903304 (538.47 it/sec) -training >> step=6738100, episode=1124 reward=0.8018922 (479.76 it/sec) -training >> step=6738200, episode=1124 reward=0.7779668 (515.86 it/sec) -training >> step=6738300, episode=1124 reward=0.7943404 (443.82 it/sec) -training >> step=6738400, episode=1124 reward=0.7738832 (501.74 it/sec) -training >> step=6738500, episode=1124 reward=0.7874829 (476.53 it/sec) -training >> step=6738600, episode=1124 reward=0.7623982 (463.11 it/sec) -training >> step=6738700, episode=1124 reward=0.7931661 (511.92 it/sec) -training >> step=6738800, episode=1124 reward=0.7940218 (456.71 it/sec) -training >> step=6738900, episode=1124 reward=0.7801992 (505.64 it/sec) -training >> step=6739000, episode=1124 reward=0.7909551 (501.42 it/sec) -training >> step=6739100, episode=1124 reward=0.7762665 (478.06 it/sec) -training >> step=6739200, episode=1124 reward=0.7660348 (517.25 it/sec) -training >> step=6739300, episode=1124 reward=0.7966768 (456.39 it/sec) -training >> step=6739400, episode=1124 reward=0.7996234 (473.98 it/sec) -training >> step=6739500, episode=1124 reward=0.7780426 (510.95 it/sec) -training >> step=6739600, episode=1124 reward=0.7866731 (491.19 it/sec) -training >> step=6739700, episode=1124 reward=0.7845927 (471.02 it/sec) -training >> step=6739800, episode=1124 reward=0.7912496 (490.15 it/sec) -training >> step=6739900, episode=1124 reward=0.7981316 (501.17 it/sec) -training >> step=6740000, episode=1124 reward=0.7692338 (514.77 it/sec) -training >> step=6740100, episode=1124 reward=0.7702186 (483.06 it/sec) -training >> step=6740200, episode=1124 reward=0.7759185 (454.87 it/sec) -training >> step=6740300, episode=1124 reward=0.7904282 (418.38 it/sec) -training >> step=6740400, episode=1124 reward=0.7772593 (397.08 it/sec) -training >> step=6740500, episode=1124 reward=0.7889243 (384.30 it/sec) -training >> step=6740600, episode=1124 reward=0.790263 (464.78 it/sec) -training >> step=6740700, episode=1124 reward=0.7684338 (452.22 it/sec) -training >> step=6740800, episode=1124 reward=0.7868873 (439.86 it/sec) -training >> step=6740900, episode=1124 reward=0.7916611 (425.11 it/sec) -training >> step=6741000, episode=1124 reward=0.7998235 (435.72 it/sec) -training >> step=6741100, episode=1124 reward=0.798856 (379.19 it/sec) -training >> step=6741200, episode=1124 reward=0.7934696 (403.54 it/sec) -training >> step=6741300, episode=1124 reward=0.7766463 (430.73 it/sec) -training >> step=6741400, episode=1124 reward=0.7807086 (436.72 it/sec) -training >> step=6741500, episode=1124 reward=0.7836787 (411.57 it/sec) -training >> step=6741600, episode=1124 reward=0.7496375 (498.98 it/sec) -training >> step=6741700, episode=1124 reward=0.7638619 (517.24 it/sec) -training >> step=6741800, episode=1124 reward=0.7908913 (507.99 it/sec) -training >> step=6741900, episode=1124 reward=0.7895899 (521.18 it/sec) -training >> step=6742000, episode=1124 reward=0.7671531 (533.81 it/sec) -training >> step=6742100, episode=1124 reward=0.7833686 (521.22 it/sec) -training >> step=6742200, episode=1124 reward=0.7831337 (505.35 it/sec) -training >> step=6742300, episode=1124 reward=0.8051416 (514.75 it/sec) -training >> step=6742400, episode=1124 reward=0.7965239 (528.30 it/sec) -training >> step=6742500, episode=1124 reward=0.7887472 (511.01 it/sec) -training >> step=6742600, episode=1124 reward=0.7826686 (474.50 it/sec) -training >> step=6742700, episode=1124 reward=0.7964503 (503.14 it/sec) -training >> step=6742800, episode=1124 reward=0.7937771 (510.97 it/sec) -training >> step=6742900, episode=1124 reward=0.7917242 (491.39 it/sec) -training >> step=6743000, episode=1124 reward=0.7907137 (485.21 it/sec) -training >> step=6743100, episode=1124 reward=0.7948472 (462.29 it/sec) -training >> step=6743200, episode=1124 reward=0.7849801 (497.47 it/sec) -training >> step=6743300, episode=1125 reward=0.781642 (56.38 it/sec) -training >> step=6743400, episode=1125 reward=0.7867393 (390.64 it/sec) -training >> step=6743500, episode=1125 reward=0.7950614 (473.13 it/sec) -training >> step=6743600, episode=1125 reward=0.7873006 (435.93 it/sec) -training >> step=6743700, episode=1125 reward=0.7786989 (511.54 it/sec) -training >> step=6743800, episode=1125 reward=0.7877391 (490.72 it/sec) -training >> step=6743900, episode=1125 reward=0.7914945 (467.30 it/sec) -training >> step=6744000, episode=1125 reward=0.7890373 (456.61 it/sec) -training >> step=6744100, episode=1125 reward=0.7874054 (502.10 it/sec) -training >> step=6744200, episode=1125 reward=0.7825862 (460.70 it/sec) -training >> step=6744300, episode=1125 reward=0.7831787 (481.21 it/sec) -training >> step=6744400, episode=1125 reward=0.7937734 (509.41 it/sec) -training >> step=6744500, episode=1125 reward=0.7907053 (463.21 it/sec) -training >> step=6744600, episode=1125 reward=0.7734815 (504.69 it/sec) -training >> step=6744700, episode=1125 reward=0.7719523 (465.77 it/sec) -training >> step=6744800, episode=1125 reward=0.7847174 (451.55 it/sec) -training >> step=6744900, episode=1125 reward=0.7931973 (461.95 it/sec) -training >> step=6745000, episode=1125 reward=0.7918514 (416.93 it/sec) -training >> step=6745100, episode=1125 reward=0.779337 (456.55 it/sec) -training >> step=6745200, episode=1125 reward=0.7679383 (495.13 it/sec) -training >> step=6745300, episode=1125 reward=0.7821691 (431.77 it/sec) -training >> step=6745400, episode=1125 reward=0.8004565 (525.54 it/sec) -training >> step=6745500, episode=1125 reward=0.7850223 (473.36 it/sec) -training >> step=6745600, episode=1125 reward=0.7805561 (448.42 it/sec) -training >> step=6745700, episode=1125 reward=0.7978569 (502.92 it/sec) -training >> step=6745800, episode=1125 reward=0.7879986 (497.97 it/sec) -training >> step=6745900, episode=1125 reward=0.7739664 (524.02 it/sec) -training >> step=6746000, episode=1125 reward=0.7755545 (519.16 it/sec) -training >> step=6746100, episode=1125 reward=0.7984541 (488.46 it/sec) -training >> step=6746200, episode=1125 reward=0.7946485 (512.33 it/sec) -training >> step=6746300, episode=1125 reward=0.7878541 (464.63 it/sec) -training >> step=6746400, episode=1125 reward=0.8060796 (495.26 it/sec) -training >> step=6746500, episode=1125 reward=0.799305 (546.07 it/sec) -training >> step=6746600, episode=1125 reward=0.7927566 (526.17 it/sec) -training >> step=6746700, episode=1125 reward=0.7957091 (436.52 it/sec) -training >> step=6746800, episode=1125 reward=0.7834089 (394.47 it/sec) -training >> step=6746900, episode=1125 reward=0.7961529 (537.60 it/sec) -training >> step=6747000, episode=1125 reward=0.7971779 (448.25 it/sec) -training >> step=6747100, episode=1125 reward=0.7905463 (447.48 it/sec) -training >> step=6747200, episode=1125 reward=0.7930564 (497.16 it/sec) -training >> step=6747300, episode=1125 reward=0.815605 (441.14 it/sec) -training >> step=6747400, episode=1125 reward=0.7689514 (471.17 it/sec) -training >> step=6747500, episode=1125 reward=0.7896191 (421.47 it/sec) -training >> step=6747600, episode=1125 reward=0.7995555 (481.04 it/sec) -training >> step=6747700, episode=1125 reward=0.7918302 (500.91 it/sec) -training >> step=6747800, episode=1125 reward=0.7773548 (503.29 it/sec) -training >> step=6747900, episode=1125 reward=0.7796257 (544.54 it/sec) -training >> step=6748000, episode=1125 reward=0.7655508 (501.08 it/sec) -training >> step=6748100, episode=1125 reward=0.7850393 (502.42 it/sec) -training >> step=6748200, episode=1125 reward=0.7852145 (478.98 it/sec) -training >> step=6748300, episode=1125 reward=0.7980465 (492.34 it/sec) -training >> step=6748400, episode=1125 reward=0.7809522 (489.35 it/sec) -training >> step=6748500, episode=1125 reward=0.7709056 (526.69 it/sec) -training >> step=6748600, episode=1125 reward=0.8032347 (511.20 it/sec) -training >> step=6748700, episode=1125 reward=0.7947899 (524.86 it/sec) -training >> step=6748800, episode=1125 reward=0.7601951 (513.83 it/sec) -training >> step=6748900, episode=1125 reward=0.7724159 (535.59 it/sec) -training >> step=6749000, episode=1125 reward=0.7843788 (537.30 it/sec) -training >> step=6749100, episode=1125 reward=0.7794042 (504.71 it/sec) -training >> step=6749200, episode=1125 reward=0.7890456 (515.97 it/sec) -training >> step=6749300, episode=1126 reward=0.7844279 (131.13 it/sec) -training >> step=6749400, episode=1126 reward=0.7920579 (495.37 it/sec) -training >> step=6749500, episode=1126 reward=0.7831144 (346.78 it/sec) -training >> step=6749600, episode=1126 reward=0.7641944 (499.31 it/sec) -training >> step=6749700, episode=1126 reward=0.7786621 (515.95 it/sec) -training >> step=6749800, episode=1126 reward=0.7780752 (518.11 it/sec) -training >> step=6749900, episode=1126 reward=0.7905428 (507.26 it/sec) -training >> step=6750000, episode=1126 reward=0.7883732 (479.18 it/sec) -training >> step=6750100, episode=1126 reward=0.7820956 (453.06 it/sec) -training >> step=6750200, episode=1126 reward=0.7882433 (506.81 it/sec) -training >> step=6750300, episode=1126 reward=0.770941 (487.00 it/sec) -training >> step=6750400, episode=1126 reward=0.7796391 (493.99 it/sec) -training >> step=6750500, episode=1126 reward=0.7857602 (447.75 it/sec) -training >> step=6750600, episode=1126 reward=0.7614921 (526.85 it/sec) -training >> step=6750700, episode=1126 reward=0.791768 (534.23 it/sec) -training >> step=6750800, episode=1126 reward=0.7935581 (447.74 it/sec) -training >> step=6750900, episode=1126 reward=0.7835531 (461.50 it/sec) -training >> step=6751000, episode=1126 reward=0.7824337 (475.97 it/sec) -training >> step=6751100, episode=1126 reward=0.802168 (537.62 it/sec) -training >> step=6751200, episode=1126 reward=0.7891434 (541.57 it/sec) -training >> step=6751300, episode=1126 reward=0.794947 (522.07 it/sec) -training >> step=6751400, episode=1126 reward=0.7851521 (462.28 it/sec) -training >> step=6751500, episode=1126 reward=0.7892852 (488.35 it/sec) -training >> step=6751600, episode=1126 reward=0.8029196 (560.08 it/sec) -training >> step=6751700, episode=1126 reward=0.7915931 (521.64 it/sec) -training >> step=6751800, episode=1126 reward=0.7796113 (503.98 it/sec) -training >> step=6751900, episode=1126 reward=0.7726491 (458.15 it/sec) -training >> step=6752000, episode=1126 reward=0.8058649 (448.92 it/sec) -training >> step=6752100, episode=1126 reward=0.7984514 (494.31 it/sec) -training >> step=6752200, episode=1126 reward=0.7845512 (497.23 it/sec) -training >> step=6752300, episode=1126 reward=0.7958944 (491.79 it/sec) -training >> step=6752400, episode=1126 reward=0.7804869 (509.43 it/sec) -training >> step=6752500, episode=1126 reward=0.7950093 (472.25 it/sec) -training >> step=6752600, episode=1126 reward=0.7716562 (486.07 it/sec) -training >> step=6752700, episode=1126 reward=0.7755705 (516.57 it/sec) -training >> step=6752800, episode=1126 reward=0.7962375 (488.31 it/sec) -training >> step=6752900, episode=1126 reward=0.8054011 (495.27 it/sec) -training >> step=6753000, episode=1126 reward=0.7949925 (533.37 it/sec) -training >> step=6753100, episode=1126 reward=0.7992581 (512.75 it/sec) -training >> step=6753200, episode=1126 reward=0.7719507 (494.61 it/sec) -training >> step=6753300, episode=1126 reward=0.7655872 (495.23 it/sec) -training >> step=6753400, episode=1126 reward=0.7763891 (497.99 it/sec) -training >> step=6753500, episode=1126 reward=0.805604 (496.35 it/sec) -training >> step=6753600, episode=1126 reward=0.7676474 (488.12 it/sec) -training >> step=6753700, episode=1126 reward=0.7919776 (519.81 it/sec) -training >> step=6753800, episode=1126 reward=0.7881994 (481.08 it/sec) -training >> step=6753900, episode=1126 reward=0.7985554 (469.58 it/sec) -training >> step=6754000, episode=1126 reward=0.7835223 (495.91 it/sec) -training >> step=6754100, episode=1126 reward=0.7955198 (530.64 it/sec) -training >> step=6754200, episode=1126 reward=0.7632965 (523.04 it/sec) -training >> step=6754300, episode=1126 reward=0.7811993 (476.82 it/sec) -training >> step=6754400, episode=1126 reward=0.7863811 (497.24 it/sec) -training >> step=6754500, episode=1126 reward=0.79844 (484.48 it/sec) -training >> step=6754600, episode=1126 reward=0.8006798 (456.60 it/sec) -training >> step=6754700, episode=1126 reward=0.8010811 (532.95 it/sec) -training >> step=6754800, episode=1126 reward=0.7938711 (490.23 it/sec) -training >> step=6754900, episode=1126 reward=0.7955915 (455.42 it/sec) -training >> step=6755000, episode=1126 reward=0.7775465 (487.80 it/sec) -training >> step=6755100, episode=1126 reward=0.7722659 (508.23 it/sec) -training >> step=6755200, episode=1126 reward=0.7889515 (531.96 it/sec) -training >> step=6755300, episode=1127 reward=0.7842566 (109.40 it/sec) -training >> step=6755400, episode=1127 reward=0.7813148 (504.07 it/sec) -training >> step=6755500, episode=1127 reward=0.797603 (495.64 it/sec) -training >> step=6755600, episode=1127 reward=0.7842922 (510.56 it/sec) -training >> step=6755700, episode=1127 reward=0.7694627 (334.19 it/sec) -training >> step=6755800, episode=1127 reward=0.7656978 (507.72 it/sec) -training >> step=6755900, episode=1127 reward=0.7639285 (506.55 it/sec) -training >> step=6756000, episode=1127 reward=0.7818639 (516.06 it/sec) -training >> step=6756100, episode=1127 reward=0.7944065 (498.25 it/sec) -training >> step=6756200, episode=1127 reward=0.7849897 (465.02 it/sec) -training >> step=6756300, episode=1127 reward=0.7962088 (497.71 it/sec) -training >> step=6756400, episode=1127 reward=0.7729404 (518.70 it/sec) -training >> step=6756500, episode=1127 reward=0.790857 (460.11 it/sec) -training >> step=6756600, episode=1127 reward=0.7852036 (488.29 it/sec) -training >> step=6756700, episode=1127 reward=0.8050326 (481.58 it/sec) -training >> step=6756800, episode=1127 reward=0.7977887 (501.86 it/sec) -training >> step=6756900, episode=1127 reward=0.7905435 (439.48 it/sec) -training >> step=6757000, episode=1127 reward=0.788304 (525.32 it/sec) -training >> step=6757100, episode=1127 reward=0.7791354 (523.16 it/sec) -training >> step=6757200, episode=1127 reward=0.7926381 (462.85 it/sec) -training >> step=6757300, episode=1127 reward=0.7923806 (504.00 it/sec) -training >> step=6757400, episode=1127 reward=0.7776066 (534.33 it/sec) -training >> step=6757500, episode=1127 reward=0.7878725 (472.53 it/sec) -training >> step=6757600, episode=1127 reward=0.7869146 (516.47 it/sec) -training >> step=6757700, episode=1127 reward=0.8026298 (455.18 it/sec) -training >> step=6757800, episode=1127 reward=0.7970281 (509.09 it/sec) -training >> step=6757900, episode=1127 reward=0.792587 (505.21 it/sec) -training >> step=6758000, episode=1127 reward=0.7908863 (503.21 it/sec) -training >> step=6758100, episode=1127 reward=0.7776385 (566.51 it/sec) -training >> step=6758200, episode=1127 reward=0.7845379 (486.44 it/sec) -training >> step=6758300, episode=1127 reward=0.7799845 (503.33 it/sec) -training >> step=6758400, episode=1127 reward=0.7964551 (513.85 it/sec) -training >> step=6758500, episode=1127 reward=0.7888346 (473.85 it/sec) -training >> step=6758600, episode=1127 reward=0.7664502 (506.18 it/sec) -training >> step=6758700, episode=1127 reward=0.7790975 (501.33 it/sec) -training >> step=6758800, episode=1127 reward=0.7892786 (503.75 it/sec) -training >> step=6758900, episode=1127 reward=0.7865636 (500.49 it/sec) -training >> step=6759000, episode=1127 reward=0.8022096 (505.08 it/sec) -training >> step=6759100, episode=1127 reward=0.7812992 (472.48 it/sec) -training >> step=6759200, episode=1127 reward=0.781978 (537.87 it/sec) -training >> step=6759300, episode=1127 reward=0.7786058 (457.17 it/sec) -training >> step=6759400, episode=1127 reward=0.787212 (510.56 it/sec) -training >> step=6759500, episode=1127 reward=0.796988 (516.42 it/sec) -training >> step=6759600, episode=1127 reward=0.7940668 (515.49 it/sec) -training >> step=6759700, episode=1127 reward=0.7826858 (522.37 it/sec) -training >> step=6759800, episode=1127 reward=0.7768489 (414.24 it/sec) -training >> step=6759900, episode=1127 reward=0.7846414 (549.71 it/sec) -training >> step=6760000, episode=1127 reward=0.7929198 (481.03 it/sec) -training >> step=6760100, episode=1127 reward=0.7767413 (486.64 it/sec) -training >> step=6760200, episode=1127 reward=0.7706162 (502.68 it/sec) -training >> step=6760300, episode=1127 reward=0.7896745 (528.18 it/sec) -training >> step=6760400, episode=1127 reward=0.8007355 (529.67 it/sec) -training >> step=6760500, episode=1127 reward=0.7673537 (508.27 it/sec) -training >> step=6760600, episode=1127 reward=0.7676625 (526.93 it/sec) -training >> step=6760700, episode=1127 reward=0.7963089 (532.97 it/sec) -training >> step=6760800, episode=1127 reward=0.7782354 (520.38 it/sec) -training >> step=6760900, episode=1127 reward=0.7889959 (502.59 it/sec) -training >> step=6761000, episode=1127 reward=0.7952515 (534.90 it/sec) -training >> step=6761100, episode=1127 reward=0.7764279 (490.94 it/sec) -training >> step=6761200, episode=1127 reward=0.779642 (542.59 it/sec) -training >> step=6761300, episode=1128 reward=0.7847747 (122.76 it/sec) -training >> step=6761400, episode=1128 reward=0.7806528 (533.30 it/sec) -training >> step=6761500, episode=1128 reward=0.7960587 (481.08 it/sec) -training >> step=6761600, episode=1128 reward=0.7670621 (429.38 it/sec) -training >> step=6761700, episode=1128 reward=0.7847107 (516.92 it/sec) -training >> step=6761800, episode=1128 reward=0.8067105 (523.67 it/sec) -training >> step=6761900, episode=1128 reward=0.7905227 (370.13 it/sec) -training >> step=6762000, episode=1128 reward=0.7863175 (505.18 it/sec) -training >> step=6762100, episode=1128 reward=0.7921641 (501.85 it/sec) -training >> step=6762200, episode=1128 reward=0.785646 (495.50 it/sec) -training >> step=6762300, episode=1128 reward=0.7942865 (507.09 it/sec) -training >> step=6762400, episode=1128 reward=0.7905905 (500.22 it/sec) -training >> step=6762500, episode=1128 reward=0.8089436 (499.34 it/sec) -training >> step=6762600, episode=1128 reward=0.7768406 (494.87 it/sec) -training >> step=6762700, episode=1128 reward=0.7895219 (497.80 it/sec) -training >> step=6762800, episode=1128 reward=0.777665 (512.01 it/sec) -training >> step=6762900, episode=1128 reward=0.7668146 (507.71 it/sec) -training >> step=6763000, episode=1128 reward=0.7834989 (505.45 it/sec) -training >> step=6763100, episode=1128 reward=0.7836323 (448.64 it/sec) -training >> step=6763200, episode=1128 reward=0.7949589 (486.78 it/sec) -training >> step=6763300, episode=1128 reward=0.7849694 (497.49 it/sec) -training >> step=6763400, episode=1128 reward=0.8094788 (489.46 it/sec) -training >> step=6763500, episode=1128 reward=0.7863087 (503.08 it/sec) -training >> step=6763600, episode=1128 reward=0.7918859 (495.02 it/sec) -training >> step=6763700, episode=1128 reward=0.808052 (520.23 it/sec) -training >> step=6763800, episode=1128 reward=0.7889114 (486.25 it/sec) -training >> step=6763900, episode=1128 reward=0.7959774 (534.33 it/sec) -training >> step=6764000, episode=1128 reward=0.7939065 (482.52 it/sec) -training >> step=6764100, episode=1128 reward=0.7588477 (469.44 it/sec) -training >> step=6764200, episode=1128 reward=0.7959393 (443.07 it/sec) -training >> step=6764300, episode=1128 reward=0.8103208 (508.43 it/sec) -training >> step=6764400, episode=1128 reward=0.7941061 (521.78 it/sec) -training >> step=6764500, episode=1128 reward=0.7791517 (505.80 it/sec) -training >> step=6764600, episode=1128 reward=0.7863881 (481.11 it/sec) -training >> step=6764700, episode=1128 reward=0.7713777 (523.06 it/sec) -training >> step=6764800, episode=1128 reward=0.7784023 (506.00 it/sec) -training >> step=6764900, episode=1128 reward=0.782418 (515.78 it/sec) -training >> step=6765000, episode=1128 reward=0.7951651 (454.91 it/sec) -training >> step=6765100, episode=1128 reward=0.7775228 (417.62 it/sec) -training >> step=6765200, episode=1128 reward=0.7763036 (376.12 it/sec) -training >> step=6765300, episode=1128 reward=0.7855624 (400.45 it/sec) -training >> step=6765400, episode=1128 reward=0.7840896 (415.81 it/sec) -training >> step=6765500, episode=1128 reward=0.7935218 (378.21 it/sec) -training >> step=6765600, episode=1128 reward=0.7906235 (491.31 it/sec) -training >> step=6765700, episode=1128 reward=0.768463 (505.59 it/sec) -training >> step=6765800, episode=1128 reward=0.7933398 (418.70 it/sec) -training >> step=6765900, episode=1128 reward=0.787138 (421.69 it/sec) -training >> step=6766000, episode=1128 reward=0.7854792 (441.53 it/sec) -training >> step=6766100, episode=1128 reward=0.7616637 (463.09 it/sec) -training >> step=6766200, episode=1128 reward=0.7695175 (498.36 it/sec) -training >> step=6766300, episode=1128 reward=0.788783 (491.92 it/sec) -training >> step=6766400, episode=1128 reward=0.789957 (503.90 it/sec) -training >> step=6766500, episode=1128 reward=0.7937707 (478.99 it/sec) -training >> step=6766600, episode=1128 reward=0.7863297 (533.58 it/sec) -training >> step=6766700, episode=1128 reward=0.8124435 (516.31 it/sec) -training >> step=6766800, episode=1128 reward=0.7915545 (514.52 it/sec) -training >> step=6766900, episode=1128 reward=0.7712433 (416.92 it/sec) -training >> step=6767000, episode=1128 reward=0.8072973 (443.46 it/sec) -training >> step=6767100, episode=1128 reward=0.7978589 (494.38 it/sec) -training >> step=6767200, episode=1128 reward=0.769426 (502.68 it/sec) -training >> step=6767300, episode=1129 reward=0.793501 (133.94 it/sec) -training >> step=6767400, episode=1129 reward=0.7833456 (484.63 it/sec) -training >> step=6767500, episode=1129 reward=0.7750462 (496.60 it/sec) -training >> step=6767600, episode=1129 reward=0.7709827 (506.04 it/sec) -training >> step=6767700, episode=1129 reward=0.7981458 (460.25 it/sec) -training >> step=6767800, episode=1129 reward=0.7942306 (488.88 it/sec) -training >> step=6767900, episode=1129 reward=0.7920045 (487.24 it/sec) -training >> step=6768000, episode=1129 reward=0.8013358 (500.35 it/sec) -training >> step=6768100, episode=1129 reward=0.7858405 (365.11 it/sec) -training >> step=6768200, episode=1129 reward=0.7807775 (448.39 it/sec) -training >> step=6768300, episode=1129 reward=0.7839184 (493.21 it/sec) -training >> step=6768400, episode=1129 reward=0.7819033 (475.12 it/sec) -training >> step=6768500, episode=1129 reward=0.7831765 (506.64 it/sec) -training >> step=6768600, episode=1129 reward=0.7951444 (525.28 it/sec) -training >> step=6768700, episode=1129 reward=0.777081 (479.06 it/sec) -training >> step=6768800, episode=1129 reward=0.785346 (453.35 it/sec) -training >> step=6768900, episode=1129 reward=0.7756593 (502.92 it/sec) -training >> step=6769000, episode=1129 reward=0.790375 (527.43 it/sec) -training >> step=6769100, episode=1129 reward=0.7898833 (486.42 it/sec) -training >> step=6769200, episode=1129 reward=0.8128759 (479.42 it/sec) -training >> step=6769300, episode=1129 reward=0.7961901 (509.36 it/sec) -training >> step=6769400, episode=1129 reward=0.7726505 (486.74 it/sec) -training >> step=6769500, episode=1129 reward=0.7875344 (492.84 it/sec) -training >> step=6769600, episode=1129 reward=0.7774411 (510.78 it/sec) -training >> step=6769700, episode=1129 reward=0.8064225 (476.14 it/sec) -training >> step=6769800, episode=1129 reward=0.7672997 (507.12 it/sec) -training >> step=6769900, episode=1129 reward=0.8029887 (477.95 it/sec) -training >> step=6770000, episode=1129 reward=0.7847237 (464.27 it/sec) -training >> step=6770100, episode=1129 reward=0.8086297 (512.65 it/sec) -training >> step=6770200, episode=1129 reward=0.7942809 (470.93 it/sec) -training >> step=6770300, episode=1129 reward=0.7831084 (462.32 it/sec) -training >> step=6770400, episode=1129 reward=0.7876687 (512.52 it/sec) -training >> step=6770500, episode=1129 reward=0.7997011 (478.87 it/sec) -training >> step=6770600, episode=1129 reward=0.78327 (467.47 it/sec) -training >> step=6770700, episode=1129 reward=0.799221 (474.82 it/sec) -training >> step=6770800, episode=1129 reward=0.7878178 (475.89 it/sec) -training >> step=6770900, episode=1129 reward=0.8099867 (498.38 it/sec) -training >> step=6771000, episode=1129 reward=0.7976574 (456.75 it/sec) -training >> step=6771100, episode=1129 reward=0.7944615 (558.42 it/sec) -training >> step=6771200, episode=1129 reward=0.8004007 (433.34 it/sec) -training >> step=6771300, episode=1129 reward=0.7978652 (496.80 it/sec) -training >> step=6771400, episode=1129 reward=0.8034706 (492.22 it/sec) -training >> step=6771500, episode=1129 reward=0.7815979 (525.18 it/sec) -training >> step=6771600, episode=1129 reward=0.781772 (526.75 it/sec) -training >> step=6771700, episode=1129 reward=0.7951913 (496.80 it/sec) -training >> step=6771800, episode=1129 reward=0.7794014 (483.66 it/sec) -training >> step=6771900, episode=1129 reward=0.7888837 (499.91 it/sec) -training >> step=6772000, episode=1129 reward=0.78477 (495.77 it/sec) -training >> step=6772100, episode=1129 reward=0.7945205 (507.44 it/sec) -training >> step=6772200, episode=1129 reward=0.7881448 (501.49 it/sec) -training >> step=6772300, episode=1129 reward=0.7694099 (484.61 it/sec) -training >> step=6772400, episode=1129 reward=0.8094813 (482.96 it/sec) -training >> step=6772500, episode=1129 reward=0.7660596 (503.65 it/sec) -training >> step=6772600, episode=1129 reward=0.7827292 (509.58 it/sec) -training >> step=6772700, episode=1129 reward=0.7948967 (503.46 it/sec) -training >> step=6772800, episode=1129 reward=0.7855195 (449.38 it/sec) -training >> step=6772900, episode=1129 reward=0.7992666 (486.11 it/sec) -training >> step=6773000, episode=1129 reward=0.7790601 (409.21 it/sec) -training >> step=6773100, episode=1129 reward=0.7888775 (411.01 it/sec) -training >> step=6773200, episode=1129 reward=0.8067111 (408.67 it/sec) -training >> step=6773300, episode=1130 reward=0.773028 (120.31 it/sec) -training >> step=6773400, episode=1130 reward=0.780768 (480.23 it/sec) -training >> step=6773500, episode=1130 reward=0.7689449 (481.28 it/sec) -training >> step=6773600, episode=1130 reward=0.768939 (461.60 it/sec) -training >> step=6773700, episode=1130 reward=0.7782542 (505.99 it/sec) -training >> step=6773800, episode=1130 reward=0.7995512 (484.34 it/sec) -training >> step=6773900, episode=1130 reward=0.7703922 (491.98 it/sec) -training >> step=6774000, episode=1130 reward=0.7845626 (498.45 it/sec) -training >> step=6774100, episode=1130 reward=0.7954941 (486.55 it/sec) -training >> step=6774200, episode=1130 reward=0.7653462 (487.36 it/sec) -training >> step=6774300, episode=1130 reward=0.7830878 (417.38 it/sec) -training >> step=6774400, episode=1130 reward=0.7786576 (512.66 it/sec) -training >> step=6774500, episode=1130 reward=0.7693017 (486.30 it/sec) -training >> step=6774600, episode=1130 reward=0.7795213 (502.35 it/sec) -training >> step=6774700, episode=1130 reward=0.7836772 (535.00 it/sec) -training >> step=6774800, episode=1130 reward=0.7745239 (486.17 it/sec) -training >> step=6774900, episode=1130 reward=0.7838444 (466.08 it/sec) -training >> step=6775000, episode=1130 reward=0.8079057 (517.98 it/sec) -training >> step=6775100, episode=1130 reward=0.78771 (545.39 it/sec) -training >> step=6775200, episode=1130 reward=0.7899659 (447.42 it/sec) -training >> step=6775300, episode=1130 reward=0.7962565 (428.84 it/sec) -training >> step=6775400, episode=1130 reward=0.7836801 (513.20 it/sec) -training >> step=6775500, episode=1130 reward=0.7861103 (488.52 it/sec) -training >> step=6775600, episode=1130 reward=0.7851143 (481.83 it/sec) -training >> step=6775700, episode=1130 reward=0.7876852 (445.78 it/sec) -training >> step=6775800, episode=1130 reward=0.7560038 (516.09 it/sec) -training >> step=6775900, episode=1130 reward=0.7853866 (480.45 it/sec) -training >> step=6776000, episode=1130 reward=0.7714655 (479.59 it/sec) -training >> step=6776100, episode=1130 reward=0.7956511 (468.36 it/sec) -training >> step=6776200, episode=1130 reward=0.795399 (501.10 it/sec) -training >> step=6776300, episode=1130 reward=0.8033786 (460.59 it/sec) -training >> step=6776400, episode=1130 reward=0.7883076 (440.46 it/sec) -training >> step=6776500, episode=1130 reward=0.7811483 (532.34 it/sec) -training >> step=6776600, episode=1130 reward=0.7987755 (480.77 it/sec) -training >> step=6776700, episode=1130 reward=0.7893366 (506.43 it/sec) -training >> step=6776800, episode=1130 reward=0.7672163 (492.68 it/sec) -training >> step=6776900, episode=1130 reward=0.7966655 (495.63 it/sec) -training >> step=6777000, episode=1130 reward=0.7913776 (466.35 it/sec) -training >> step=6777100, episode=1130 reward=0.7683567 (509.60 it/sec) -training >> step=6777200, episode=1130 reward=0.8113843 (521.75 it/sec) -training >> step=6777300, episode=1130 reward=0.8003858 (504.38 it/sec) -training >> step=6777400, episode=1130 reward=0.7925102 (507.72 it/sec) -training >> step=6777500, episode=1130 reward=0.7937037 (513.09 it/sec) -training >> step=6777600, episode=1130 reward=0.7653857 (521.05 it/sec) -training >> step=6777700, episode=1130 reward=0.7885647 (466.98 it/sec) -training >> step=6777800, episode=1130 reward=0.7796548 (482.01 it/sec) -training >> step=6777900, episode=1130 reward=0.7970601 (485.09 it/sec) -training >> step=6778000, episode=1130 reward=0.795281 (543.12 it/sec) -training >> step=6778100, episode=1130 reward=0.7925485 (482.93 it/sec) -training >> step=6778200, episode=1130 reward=0.7573698 (507.45 it/sec) -training >> step=6778300, episode=1130 reward=0.7791757 (521.48 it/sec) -training >> step=6778400, episode=1130 reward=0.7894716 (492.39 it/sec) -training >> step=6778500, episode=1130 reward=0.7990521 (479.12 it/sec) -training >> step=6778600, episode=1130 reward=0.7980071 (497.39 it/sec) -training >> step=6778700, episode=1130 reward=0.7831901 (524.11 it/sec) -training >> step=6778800, episode=1130 reward=0.7673822 (486.38 it/sec) -training >> step=6778900, episode=1130 reward=0.7843727 (469.26 it/sec) -training >> step=6779000, episode=1130 reward=0.7835456 (495.73 it/sec) -training >> step=6779100, episode=1130 reward=0.7761378 (528.65 it/sec) -training >> step=6779200, episode=1130 reward=0.7708465 (507.42 it/sec) -training >> step=6779300, episode=1131 reward=0.7687401 (124.48 it/sec) -training >> step=6779400, episode=1131 reward=0.7830555 (521.13 it/sec) -training >> step=6779500, episode=1131 reward=0.776405 (507.90 it/sec) -training >> step=6779600, episode=1131 reward=0.7992942 (480.45 it/sec) -training >> step=6779700, episode=1131 reward=0.7927076 (499.87 it/sec) -training >> step=6779800, episode=1131 reward=0.7918854 (534.12 it/sec) -training >> step=6779900, episode=1131 reward=0.7933694 (487.45 it/sec) -training >> step=6780000, episode=1131 reward=0.7972209 (487.25 it/sec) -training >> step=6780100, episode=1131 reward=0.7981908 (489.23 it/sec) -training >> step=6780200, episode=1131 reward=0.7908636 (473.20 it/sec) -training >> step=6780300, episode=1131 reward=0.7869943 (482.86 it/sec) -training >> step=6780400, episode=1131 reward=0.7993847 (367.67 it/sec) -training >> step=6780500, episode=1131 reward=0.798867 (538.33 it/sec) -training >> step=6780600, episode=1131 reward=0.7901803 (446.92 it/sec) -training >> step=6780700, episode=1131 reward=0.7847568 (468.98 it/sec) -training >> step=6780800, episode=1131 reward=0.7759778 (506.07 it/sec) -training >> step=6780900, episode=1131 reward=0.7915987 (511.12 it/sec) -training >> step=6781000, episode=1131 reward=0.7862037 (482.31 it/sec) -training >> step=6781100, episode=1131 reward=0.8047343 (450.71 it/sec) -training >> step=6781200, episode=1131 reward=0.7803653 (432.97 it/sec) -training >> step=6781300, episode=1131 reward=0.7932433 (484.61 it/sec) -training >> step=6781400, episode=1131 reward=0.7852909 (500.38 it/sec) -training >> step=6781500, episode=1131 reward=0.7903121 (480.60 it/sec) -training >> step=6781600, episode=1131 reward=0.8213012 (470.49 it/sec) -training >> step=6781700, episode=1131 reward=0.7811835 (450.90 it/sec) -training >> step=6781800, episode=1131 reward=0.7878088 (477.56 it/sec) -training >> step=6781900, episode=1131 reward=0.7760807 (516.20 it/sec) -training >> step=6782000, episode=1131 reward=0.7863013 (522.18 it/sec) -training >> step=6782100, episode=1131 reward=0.7995754 (465.38 it/sec) -training >> step=6782200, episode=1131 reward=0.788065 (485.80 it/sec) -training >> step=6782300, episode=1131 reward=0.7990919 (462.48 it/sec) -training >> step=6782400, episode=1131 reward=0.8045307 (483.17 it/sec) -training >> step=6782500, episode=1131 reward=0.7931414 (492.56 it/sec) -training >> step=6782600, episode=1131 reward=0.7900787 (461.76 it/sec) -training >> step=6782700, episode=1131 reward=0.7838129 (485.94 it/sec) -training >> step=6782800, episode=1131 reward=0.785941 (477.78 it/sec) -training >> step=6782900, episode=1131 reward=0.7906759 (472.24 it/sec) -training >> step=6783000, episode=1131 reward=0.7730181 (453.93 it/sec) -training >> step=6783100, episode=1131 reward=0.7841783 (446.21 it/sec) -training >> step=6783200, episode=1131 reward=0.7673156 (503.40 it/sec) -training >> step=6783300, episode=1131 reward=0.7916237 (508.96 it/sec) -training >> step=6783400, episode=1131 reward=0.7740757 (536.01 it/sec) -training >> step=6783500, episode=1131 reward=0.7926078 (479.28 it/sec) -training >> step=6783600, episode=1131 reward=0.7776251 (494.84 it/sec) -training >> step=6783700, episode=1131 reward=0.780409 (495.25 it/sec) -training >> step=6783800, episode=1131 reward=0.7743371 (503.66 it/sec) -training >> step=6783900, episode=1131 reward=0.766472 (521.49 it/sec) -training >> step=6784000, episode=1131 reward=0.7887328 (518.09 it/sec) -training >> step=6784100, episode=1131 reward=0.7848804 (504.74 it/sec) -training >> step=6784200, episode=1131 reward=0.7873821 (481.75 it/sec) -training >> step=6784300, episode=1131 reward=0.796466 (519.75 it/sec) -training >> step=6784400, episode=1131 reward=0.7922264 (500.37 it/sec) -training >> step=6784500, episode=1131 reward=0.7888388 (550.42 it/sec) -training >> step=6784600, episode=1131 reward=0.7810112 (507.81 it/sec) -training >> step=6784700, episode=1131 reward=0.7786981 (460.04 it/sec) -training >> step=6784800, episode=1131 reward=0.7781507 (479.81 it/sec) -training >> step=6784900, episode=1131 reward=0.7784819 (498.04 it/sec) -training >> step=6785000, episode=1131 reward=0.7900091 (497.49 it/sec) -training >> step=6785100, episode=1131 reward=0.7795377 (478.33 it/sec) -training >> step=6785200, episode=1131 reward=0.7957339 (517.37 it/sec) -training >> step=6785300, episode=1132 reward=0.7683843 (135.95 it/sec) -training >> step=6785400, episode=1132 reward=0.7873331 (506.27 it/sec) -training >> step=6785500, episode=1132 reward=0.7919564 (479.63 it/sec) -training >> step=6785600, episode=1132 reward=0.7834231 (491.94 it/sec) -training >> step=6785700, episode=1132 reward=0.7865109 (520.43 it/sec) -training >> step=6785800, episode=1132 reward=0.7664329 (506.84 it/sec) -training >> step=6785900, episode=1132 reward=0.7889318 (492.39 it/sec) -training >> step=6786000, episode=1132 reward=0.7878129 (501.44 it/sec) -training >> step=6786100, episode=1132 reward=0.78482 (533.76 it/sec) -training >> step=6786200, episode=1132 reward=0.7880405 (514.15 it/sec) -training >> step=6786300, episode=1132 reward=0.769074 (516.40 it/sec) -training >> step=6786400, episode=1132 reward=0.7912503 (485.85 it/sec) -training >> step=6786500, episode=1132 reward=0.8032769 (494.01 it/sec) -training >> step=6786600, episode=1132 reward=0.800661 (519.51 it/sec) -training >> step=6786700, episode=1132 reward=0.7933094 (497.03 it/sec) -training >> step=6786800, episode=1132 reward=0.7915666 (354.97 it/sec) -training >> step=6786900, episode=1132 reward=0.7740926 (474.63 it/sec) -training >> step=6787000, episode=1132 reward=0.7890455 (528.18 it/sec) -training >> step=6787100, episode=1132 reward=0.7982847 (487.28 it/sec) -training >> step=6787200, episode=1132 reward=0.7658008 (477.86 it/sec) -training >> step=6787300, episode=1132 reward=0.7973961 (501.27 it/sec) -training >> step=6787400, episode=1132 reward=0.7862484 (521.18 it/sec) -training >> step=6787500, episode=1132 reward=0.8129136 (487.29 it/sec) -training >> step=6787600, episode=1132 reward=0.7784886 (500.36 it/sec) -training >> step=6787700, episode=1132 reward=0.7904252 (534.93 it/sec) -training >> step=6787800, episode=1132 reward=0.7811226 (509.54 it/sec) -training >> step=6787900, episode=1132 reward=0.7982512 (486.67 it/sec) -training >> step=6788000, episode=1132 reward=0.8091893 (498.40 it/sec) -training >> step=6788100, episode=1132 reward=0.8008704 (540.89 it/sec) -training >> step=6788200, episode=1132 reward=0.7868492 (488.59 it/sec) -training >> step=6788300, episode=1132 reward=0.7936026 (529.40 it/sec) -training >> step=6788400, episode=1132 reward=0.7579523 (480.89 it/sec) -training >> step=6788500, episode=1132 reward=0.7892053 (520.27 it/sec) -training >> step=6788600, episode=1132 reward=0.7848869 (507.06 it/sec) -training >> step=6788700, episode=1132 reward=0.7685262 (481.91 it/sec) -training >> step=6788800, episode=1132 reward=0.8144653 (463.13 it/sec) -training >> step=6788900, episode=1132 reward=0.7901075 (513.32 it/sec) -training >> step=6789000, episode=1132 reward=0.7844962 (481.46 it/sec) -training >> step=6789100, episode=1132 reward=0.7789564 (507.69 it/sec) -training >> step=6789200, episode=1132 reward=0.7948671 (537.22 it/sec) -training >> step=6789300, episode=1132 reward=0.7757539 (501.89 it/sec) -training >> step=6789400, episode=1132 reward=0.7616499 (456.38 it/sec) -training >> step=6789500, episode=1132 reward=0.7671964 (484.21 it/sec) -training >> step=6789600, episode=1132 reward=0.792383 (497.28 it/sec) -training >> step=6789700, episode=1132 reward=0.7747954 (492.83 it/sec) -training >> step=6789800, episode=1132 reward=0.7878323 (498.73 it/sec) -training >> step=6789900, episode=1132 reward=0.7759811 (500.80 it/sec) -training >> step=6790000, episode=1132 reward=0.7885233 (486.07 it/sec) -training >> step=6790100, episode=1132 reward=0.7942342 (507.63 it/sec) -training >> step=6790200, episode=1132 reward=0.7788989 (489.61 it/sec) -training >> step=6790300, episode=1132 reward=0.7748967 (512.03 it/sec) -training >> step=6790400, episode=1132 reward=0.785551 (513.68 it/sec) -training >> step=6790500, episode=1132 reward=0.7936132 (493.91 it/sec) -training >> step=6790600, episode=1132 reward=0.7898584 (519.22 it/sec) -training >> step=6790700, episode=1132 reward=0.781476 (526.35 it/sec) -training >> step=6790800, episode=1132 reward=0.7899675 (522.14 it/sec) -training >> step=6790900, episode=1132 reward=0.7820123 (525.05 it/sec) -training >> step=6791000, episode=1132 reward=0.7791765 (518.89 it/sec) -training >> step=6791100, episode=1132 reward=0.7760475 (470.83 it/sec) -training >> step=6791200, episode=1132 reward=0.8028855 (514.31 it/sec) -training >> step=6791300, episode=1133 reward=0.798788 (118.00 it/sec) -training >> step=6791400, episode=1133 reward=0.806331 (537.03 it/sec) -training >> step=6791500, episode=1133 reward=0.791403 (524.27 it/sec) -training >> step=6791600, episode=1133 reward=0.7987675 (517.24 it/sec) -training >> step=6791700, episode=1133 reward=0.7826749 (493.19 it/sec) -training >> step=6791800, episode=1133 reward=0.7864837 (503.54 it/sec) -training >> step=6791900, episode=1133 reward=0.8018903 (475.08 it/sec) -training >> step=6792000, episode=1133 reward=0.7803257 (517.81 it/sec) -training >> step=6792100, episode=1133 reward=0.7768294 (537.08 it/sec) -training >> step=6792200, episode=1133 reward=0.7950813 (509.09 it/sec) -training >> step=6792300, episode=1133 reward=0.7782444 (484.46 it/sec) -training >> step=6792400, episode=1133 reward=0.7896511 (491.23 it/sec) -training >> step=6792500, episode=1133 reward=0.8089136 (534.39 it/sec) -training >> step=6792600, episode=1133 reward=0.7824183 (481.88 it/sec) -training >> step=6792700, episode=1133 reward=0.7976053 (507.82 it/sec) -training >> step=6792800, episode=1133 reward=0.7775024 (464.84 it/sec) -training >> step=6792900, episode=1133 reward=0.7699006 (355.82 it/sec) -training >> step=6793000, episode=1133 reward=0.7852356 (473.82 it/sec) -training >> step=6793100, episode=1133 reward=0.7762163 (488.65 it/sec) -training >> step=6793200, episode=1133 reward=0.7559766 (485.75 it/sec) -training >> step=6793300, episode=1133 reward=0.7860533 (478.90 it/sec) -training >> step=6793400, episode=1133 reward=0.7776054 (491.10 it/sec) -training >> step=6793500, episode=1133 reward=0.7762261 (531.17 it/sec) -training >> step=6793600, episode=1133 reward=0.7833191 (446.27 it/sec) -training >> step=6793700, episode=1133 reward=0.7780042 (497.68 it/sec) -training >> step=6793800, episode=1133 reward=0.7916095 (471.41 it/sec) -training >> step=6793900, episode=1133 reward=0.7829946 (523.91 it/sec) -training >> step=6794000, episode=1133 reward=0.7992769 (471.07 it/sec) -training >> step=6794100, episode=1133 reward=0.7850435 (450.31 it/sec) -training >> step=6794200, episode=1133 reward=0.7623289 (455.31 it/sec) -training >> step=6794300, episode=1133 reward=0.8050385 (465.20 it/sec) -training >> step=6794400, episode=1133 reward=0.7659181 (477.58 it/sec) -training >> step=6794500, episode=1133 reward=0.8051713 (499.23 it/sec) -training >> step=6794600, episode=1133 reward=0.7756481 (498.58 it/sec) -training >> step=6794700, episode=1133 reward=0.7926846 (480.55 it/sec) -training >> step=6794800, episode=1133 reward=0.799818 (453.13 it/sec) -training >> step=6794900, episode=1133 reward=0.7941315 (518.39 it/sec) -training >> step=6795000, episode=1133 reward=0.7827961 (497.18 it/sec) -training >> step=6795100, episode=1133 reward=0.781737 (483.14 it/sec) -training >> step=6795200, episode=1133 reward=0.7893618 (483.26 it/sec) -training >> step=6795300, episode=1133 reward=0.8029121 (482.27 it/sec) -training >> step=6795400, episode=1133 reward=0.7940241 (493.40 it/sec) -training >> step=6795500, episode=1133 reward=0.798417 (480.74 it/sec) -training >> step=6795600, episode=1133 reward=0.7735692 (489.61 it/sec) -training >> step=6795700, episode=1133 reward=0.7758522 (490.54 it/sec) -training >> step=6795800, episode=1133 reward=0.7970747 (486.60 it/sec) -training >> step=6795900, episode=1133 reward=0.7883987 (474.58 it/sec) -training >> step=6796000, episode=1133 reward=0.769786 (485.18 it/sec) -training >> step=6796100, episode=1133 reward=0.7730919 (451.66 it/sec) -training >> step=6796200, episode=1133 reward=0.7777732 (467.10 it/sec) -training >> step=6796300, episode=1133 reward=0.7828496 (467.15 it/sec) -training >> step=6796400, episode=1133 reward=0.786893 (494.82 it/sec) -training >> step=6796500, episode=1133 reward=0.7841328 (479.00 it/sec) -training >> step=6796600, episode=1133 reward=0.7754954 (521.12 it/sec) -training >> step=6796700, episode=1133 reward=0.8021208 (450.55 it/sec) -training >> step=6796800, episode=1133 reward=0.7829819 (503.28 it/sec) -training >> step=6796900, episode=1133 reward=0.796741 (493.45 it/sec) -training >> step=6797000, episode=1133 reward=0.7657984 (507.97 it/sec) -training >> step=6797100, episode=1133 reward=0.7795596 (511.14 it/sec) -training >> step=6797200, episode=1133 reward=0.7968546 (400.34 it/sec) -training >> step=6797300, episode=1134 reward=0.8062014 (96.40 it/sec) -training >> step=6797400, episode=1134 reward=0.7793722 (466.39 it/sec) -training >> step=6797500, episode=1134 reward=0.7691359 (507.75 it/sec) -training >> step=6797600, episode=1134 reward=0.780332 (472.76 it/sec) -training >> step=6797700, episode=1134 reward=0.7926449 (497.66 it/sec) -training >> step=6797800, episode=1134 reward=0.7805123 (478.03 it/sec) -training >> step=6797900, episode=1134 reward=0.8054768 (514.32 it/sec) -training >> step=6798000, episode=1134 reward=0.7697157 (487.46 it/sec) -training >> step=6798100, episode=1134 reward=0.7773688 (459.62 it/sec) -training >> step=6798200, episode=1134 reward=0.7740887 (468.23 it/sec) -training >> step=6798300, episode=1134 reward=0.7720987 (480.40 it/sec) -training >> step=6798400, episode=1134 reward=0.7763693 (517.61 it/sec) -training >> step=6798500, episode=1134 reward=0.774079 (466.93 it/sec) -training >> step=6798600, episode=1134 reward=0.7823909 (465.48 it/sec) -training >> step=6798700, episode=1134 reward=0.8086537 (469.80 it/sec) -training >> step=6798800, episode=1134 reward=0.8027916 (429.44 it/sec) -training >> step=6798900, episode=1134 reward=0.7911668 (444.70 it/sec) -training >> step=6799000, episode=1134 reward=0.7799805 (463.83 it/sec) -training >> step=6799100, episode=1134 reward=0.7950699 (468.81 it/sec) -training >> step=6799200, episode=1134 reward=0.7764152 (453.76 it/sec) -training >> step=6799300, episode=1134 reward=0.7932806 (372.73 it/sec) -training >> step=6799400, episode=1134 reward=0.7683604 (472.34 it/sec) -training >> step=6799500, episode=1134 reward=0.7841608 (478.84 it/sec) -training >> step=6799600, episode=1134 reward=0.7958382 (486.10 it/sec) -training >> step=6799700, episode=1134 reward=0.7823406 (481.50 it/sec) -training >> step=6799800, episode=1134 reward=0.7891461 (470.95 it/sec) -training >> step=6799900, episode=1134 reward=0.7685091 (478.83 it/sec) -training >> step=6800000, episode=1134 reward=0.7781466 (492.77 it/sec) -training >> step=6800100, episode=1134 reward=0.786736 (473.44 it/sec) -training >> step=6800200, episode=1134 reward=0.8025975 (472.62 it/sec) -training >> step=6800300, episode=1134 reward=0.7737622 (456.73 it/sec) -training >> step=6800400, episode=1134 reward=0.7860932 (495.24 it/sec) -training >> step=6800500, episode=1134 reward=0.7999758 (483.47 it/sec) -training >> step=6800600, episode=1134 reward=0.783105 (417.97 it/sec) -training >> step=6800700, episode=1134 reward=0.7825493 (465.19 it/sec) -training >> step=6800800, episode=1134 reward=0.7789802 (502.23 it/sec) -training >> step=6800900, episode=1134 reward=0.7775423 (505.31 it/sec) -training >> step=6801000, episode=1134 reward=0.7876354 (443.19 it/sec) -training >> step=6801100, episode=1134 reward=0.7813354 (441.40 it/sec) -training >> step=6801200, episode=1134 reward=0.7849056 (467.43 it/sec) -training >> step=6801300, episode=1134 reward=0.8067954 (423.89 it/sec) -training >> step=6801400, episode=1134 reward=0.8349172 (467.75 it/sec) -training >> step=6801500, episode=1134 reward=0.7901387 (463.99 it/sec) -training >> step=6801600, episode=1134 reward=0.8029547 (417.79 it/sec) -training >> step=6801700, episode=1134 reward=0.7913691 (483.95 it/sec) -training >> step=6801800, episode=1134 reward=0.8019843 (499.38 it/sec) -training >> step=6801900, episode=1134 reward=0.7759925 (478.95 it/sec) -training >> step=6802000, episode=1134 reward=0.7728785 (439.19 it/sec) -training >> step=6802100, episode=1134 reward=0.7808754 (444.02 it/sec) -training >> step=6802200, episode=1134 reward=0.7756383 (489.89 it/sec) -training >> step=6802300, episode=1134 reward=0.7738242 (514.88 it/sec) -training >> step=6802400, episode=1134 reward=0.7868901 (462.40 it/sec) -training >> step=6802500, episode=1134 reward=0.7975878 (464.64 it/sec) -training >> step=6802600, episode=1134 reward=0.7636218 (487.08 it/sec) -training >> step=6802700, episode=1134 reward=0.7793583 (484.88 it/sec) -training >> step=6802800, episode=1134 reward=0.7911518 (456.26 it/sec) -training >> step=6802900, episode=1134 reward=0.757591 (476.46 it/sec) -training >> step=6803000, episode=1134 reward=0.7874133 (489.61 it/sec) -training >> step=6803100, episode=1134 reward=0.7468928 (472.51 it/sec) -training >> step=6803200, episode=1134 reward=0.7848525 (462.54 it/sec) -training >> step=6803300, episode=1135 reward=0.7682837 (154.79 it/sec) -training >> step=6803400, episode=1135 reward=0.7820035 (509.11 it/sec) -training >> step=6803500, episode=1135 reward=0.7857975 (494.32 it/sec) -training >> step=6803600, episode=1135 reward=0.7874281 (484.01 it/sec) -training >> step=6803700, episode=1135 reward=0.7969918 (483.52 it/sec) -training >> step=6803800, episode=1135 reward=0.7756982 (475.30 it/sec) -training >> step=6803900, episode=1135 reward=0.7866065 (497.69 it/sec) -training >> step=6804000, episode=1135 reward=0.7705852 (444.22 it/sec) -training >> step=6804100, episode=1135 reward=0.7907566 (543.67 it/sec) -training >> step=6804200, episode=1135 reward=0.7627668 (496.15 it/sec) -training >> step=6804300, episode=1135 reward=0.7694923 (492.47 it/sec) -training >> step=6804400, episode=1135 reward=0.7715512 (469.07 it/sec) -training >> step=6804500, episode=1135 reward=0.7910784 (497.64 it/sec) -training >> step=6804600, episode=1135 reward=0.7798496 (513.46 it/sec) -training >> step=6804700, episode=1135 reward=0.7852328 (518.58 it/sec) -training >> step=6804800, episode=1135 reward=0.7818973 (499.44 it/sec) -training >> step=6804900, episode=1135 reward=0.7898086 (450.47 it/sec) -training >> step=6805000, episode=1135 reward=0.802325 (504.93 it/sec) -training >> step=6805100, episode=1135 reward=0.7817516 (523.85 it/sec) -training >> step=6805200, episode=1135 reward=0.7834682 (518.47 it/sec) -training >> step=6805300, episode=1135 reward=0.7749532 (475.08 it/sec) -training >> step=6805400, episode=1135 reward=0.7763075 (482.80 it/sec) -training >> step=6805500, episode=1135 reward=0.7971587 (374.96 it/sec) -training >> step=6805600, episode=1135 reward=0.7925916 (493.24 it/sec) -training >> step=6805700, episode=1135 reward=0.7877582 (487.50 it/sec) -training >> step=6805800, episode=1135 reward=0.7798488 (470.45 it/sec) -training >> step=6805900, episode=1135 reward=0.7987567 (534.19 it/sec) -training >> step=6806000, episode=1135 reward=0.7976897 (518.27 it/sec) -training >> step=6806100, episode=1135 reward=0.7950672 (506.63 it/sec) -training >> step=6806200, episode=1135 reward=0.7706985 (543.00 it/sec) -training >> step=6806300, episode=1135 reward=0.8001775 (495.63 it/sec) -training >> step=6806400, episode=1135 reward=0.7941976 (486.20 it/sec) -training >> step=6806500, episode=1135 reward=0.7886275 (521.07 it/sec) -training >> step=6806600, episode=1135 reward=0.7820758 (531.84 it/sec) -training >> step=6806700, episode=1135 reward=0.7874985 (522.56 it/sec) -training >> step=6806800, episode=1135 reward=0.7947209 (485.20 it/sec) -training >> step=6806900, episode=1135 reward=0.7893126 (497.84 it/sec) -training >> step=6807000, episode=1135 reward=0.7855042 (518.94 it/sec) -training >> step=6807100, episode=1135 reward=0.7826562 (528.52 it/sec) -training >> step=6807200, episode=1135 reward=0.7797711 (523.25 it/sec) -training >> step=6807300, episode=1135 reward=0.7971513 (519.91 it/sec) -training >> step=6807400, episode=1135 reward=0.7833781 (464.26 it/sec) -training >> step=6807500, episode=1135 reward=0.7941347 (492.30 it/sec) -training >> step=6807600, episode=1135 reward=0.7847178 (493.23 it/sec) -training >> step=6807700, episode=1135 reward=0.7986235 (540.48 it/sec) -training >> step=6807800, episode=1135 reward=0.7953725 (537.89 it/sec) -training >> step=6807900, episode=1135 reward=0.7617575 (469.53 it/sec) -training >> step=6808000, episode=1135 reward=0.7779855 (450.71 it/sec) -training >> step=6808100, episode=1135 reward=0.785287 (493.31 it/sec) -training >> step=6808200, episode=1135 reward=0.7796552 (501.10 it/sec) -training >> step=6808300, episode=1135 reward=0.7918914 (478.14 it/sec) -training >> step=6808400, episode=1135 reward=0.7691671 (507.97 it/sec) -training >> step=6808500, episode=1135 reward=0.8107424 (431.62 it/sec) -training >> step=6808600, episode=1135 reward=0.7872536 (515.50 it/sec) -training >> step=6808700, episode=1135 reward=0.8101848 (465.06 it/sec) -training >> step=6808800, episode=1135 reward=0.7789248 (524.16 it/sec) -training >> step=6808900, episode=1135 reward=0.8020153 (449.83 it/sec) -training >> step=6809000, episode=1135 reward=0.766201 (467.07 it/sec) -training >> step=6809100, episode=1135 reward=0.7807766 (517.99 it/sec) -training >> step=6809200, episode=1135 reward=0.7823143 (518.02 it/sec) -training >> step=6809300, episode=1136 reward=0.7755949 (126.45 it/sec) -training >> step=6809400, episode=1136 reward=0.794636 (498.26 it/sec) -training >> step=6809500, episode=1136 reward=0.7663258 (515.07 it/sec) -training >> step=6809600, episode=1136 reward=0.7947899 (454.23 it/sec) -training >> step=6809700, episode=1136 reward=0.7892802 (439.73 it/sec) -training >> step=6809800, episode=1136 reward=0.7859231 (440.42 it/sec) -training >> step=6809900, episode=1136 reward=0.7983813 (521.57 it/sec) -training >> step=6810000, episode=1136 reward=0.7969022 (459.95 it/sec) -training >> step=6810100, episode=1136 reward=0.7954306 (460.15 it/sec) -training >> step=6810200, episode=1136 reward=0.7915462 (476.67 it/sec) -training >> step=6810300, episode=1136 reward=0.7909861 (458.09 it/sec) -training >> step=6810400, episode=1136 reward=0.8000984 (502.08 it/sec) -training >> step=6810500, episode=1136 reward=0.7893492 (479.10 it/sec) -training >> step=6810600, episode=1136 reward=0.763081 (467.63 it/sec) -training >> step=6810700, episode=1136 reward=0.7941412 (500.36 it/sec) -training >> step=6810800, episode=1136 reward=0.7873387 (478.51 it/sec) -training >> step=6810900, episode=1136 reward=0.7995219 (470.18 it/sec) -training >> step=6811000, episode=1136 reward=0.7896473 (418.59 it/sec) -training >> step=6811100, episode=1136 reward=0.8126324 (479.12 it/sec) -training >> step=6811200, episode=1136 reward=0.7766486 (452.27 it/sec) -training >> step=6811300, episode=1136 reward=0.7975045 (513.47 it/sec) -training >> step=6811400, episode=1136 reward=0.7711145 (470.65 it/sec) -training >> step=6811500, episode=1136 reward=0.7846041 (480.97 it/sec) -training >> step=6811600, episode=1136 reward=0.7908477 (434.88 it/sec) -training >> step=6811700, episode=1136 reward=0.7876038 (390.76 it/sec) -training >> step=6811800, episode=1136 reward=0.7798967 (510.18 it/sec) -training >> step=6811900, episode=1136 reward=0.7864657 (426.55 it/sec) -training >> step=6812000, episode=1136 reward=0.8055248 (464.04 it/sec) -training >> step=6812100, episode=1136 reward=0.7729762 (478.10 it/sec) -training >> step=6812200, episode=1136 reward=0.7941602 (458.82 it/sec) -training >> step=6812300, episode=1136 reward=0.7989423 (483.80 it/sec) -training >> step=6812400, episode=1136 reward=0.7957683 (499.20 it/sec) -training >> step=6812500, episode=1136 reward=0.7925665 (432.11 it/sec) -training >> step=6812600, episode=1136 reward=0.7871733 (464.82 it/sec) -training >> step=6812700, episode=1136 reward=0.7806694 (497.60 it/sec) -training >> step=6812800, episode=1136 reward=0.7838227 (458.20 it/sec) -training >> step=6812900, episode=1136 reward=0.7973459 (476.86 it/sec) -training >> step=6813000, episode=1136 reward=0.8051179 (469.59 it/sec) -training >> step=6813100, episode=1136 reward=0.7694596 (499.45 it/sec) -training >> step=6813200, episode=1136 reward=0.7711867 (495.52 it/sec) -training >> step=6813300, episode=1136 reward=0.7807915 (460.77 it/sec) -training >> step=6813400, episode=1136 reward=0.8042872 (409.74 it/sec) -training >> step=6813500, episode=1136 reward=0.7916934 (462.68 it/sec) -training >> step=6813600, episode=1136 reward=0.7741318 (480.71 it/sec) -training >> step=6813700, episode=1136 reward=0.7709118 (488.67 it/sec) -training >> step=6813800, episode=1136 reward=0.7873548 (502.74 it/sec) -training >> step=6813900, episode=1136 reward=0.785157 (437.10 it/sec) -training >> step=6814000, episode=1136 reward=0.7991279 (475.00 it/sec) -training >> step=6814100, episode=1136 reward=0.8111209 (463.44 it/sec) -training >> step=6814200, episode=1136 reward=0.78954 (500.07 it/sec) -training >> step=6814300, episode=1136 reward=0.7838343 (478.76 it/sec) -training >> step=6814400, episode=1136 reward=0.7851843 (476.80 it/sec) -training >> step=6814500, episode=1136 reward=0.7714126 (488.61 it/sec) -training >> step=6814600, episode=1136 reward=0.7756968 (439.11 it/sec) -training >> step=6814700, episode=1136 reward=0.7673022 (513.17 it/sec) -training >> step=6814800, episode=1136 reward=0.7757937 (495.53 it/sec) -training >> step=6814900, episode=1136 reward=0.7831453 (470.82 it/sec) -training >> step=6815000, episode=1136 reward=0.7968697 (482.74 it/sec) -training >> step=6815100, episode=1136 reward=0.76542 (462.46 it/sec) -training >> step=6815200, episode=1136 reward=0.7884312 (497.35 it/sec) -training >> step=6815300, episode=1137 reward=0.7611745 (117.58 it/sec) -training >> step=6815400, episode=1137 reward=0.7931207 (452.38 it/sec) -training >> step=6815500, episode=1137 reward=0.7791193 (484.05 it/sec) -training >> step=6815600, episode=1137 reward=0.8022845 (476.91 it/sec) -training >> step=6815700, episode=1137 reward=0.7759163 (478.63 it/sec) -training >> step=6815800, episode=1137 reward=0.790261 (482.98 it/sec) -training >> step=6815900, episode=1137 reward=0.8030369 (516.22 it/sec) -training >> step=6816000, episode=1137 reward=0.805908 (455.40 it/sec) -training >> step=6816100, episode=1137 reward=0.7896079 (467.08 it/sec) -training >> step=6816200, episode=1137 reward=0.7767925 (448.85 it/sec) -training >> step=6816300, episode=1137 reward=0.7875108 (517.77 it/sec) -training >> step=6816400, episode=1137 reward=0.7769877 (481.77 it/sec) -training >> step=6816500, episode=1137 reward=0.7935423 (487.96 it/sec) -training >> step=6816600, episode=1137 reward=0.7740507 (494.34 it/sec) -training >> step=6816700, episode=1137 reward=0.7876089 (505.75 it/sec) -training >> step=6816800, episode=1137 reward=0.7871296 (530.66 it/sec) -training >> step=6816900, episode=1137 reward=0.8044783 (474.87 it/sec) -training >> step=6817000, episode=1137 reward=0.7729446 (504.24 it/sec) -training >> step=6817100, episode=1137 reward=0.788729 (456.10 it/sec) -training >> step=6817200, episode=1137 reward=0.8025064 (495.29 it/sec) -training >> step=6817300, episode=1137 reward=0.7848819 (524.45 it/sec) -training >> step=6817400, episode=1137 reward=0.8052233 (560.63 it/sec) -training >> step=6817500, episode=1137 reward=0.795146 (465.16 it/sec) -training >> step=6817600, episode=1137 reward=0.7835327 (507.63 it/sec) -training >> step=6817700, episode=1137 reward=0.7999818 (351.20 it/sec) -training >> step=6817800, episode=1137 reward=0.7946009 (499.10 it/sec) -training >> step=6817900, episode=1137 reward=0.7993602 (512.51 it/sec) -training >> step=6818000, episode=1137 reward=0.7830516 (509.75 it/sec) -training >> step=6818100, episode=1137 reward=0.8251387 (452.05 it/sec) -training >> step=6818200, episode=1137 reward=0.7777953 (496.85 it/sec) -training >> step=6818300, episode=1137 reward=0.7914879 (524.38 it/sec) -training >> step=6818400, episode=1137 reward=0.800514 (511.69 it/sec) -training >> step=6818500, episode=1137 reward=0.7794099 (529.17 it/sec) -training >> step=6818600, episode=1137 reward=0.7961935 (467.77 it/sec) -training >> step=6818700, episode=1137 reward=0.7738605 (490.38 it/sec) -training >> step=6818800, episode=1137 reward=0.7931552 (547.31 it/sec) -training >> step=6818900, episode=1137 reward=0.8086725 (525.62 it/sec) -training >> step=6819000, episode=1137 reward=0.7945295 (489.22 it/sec) -training >> step=6819100, episode=1137 reward=0.7801688 (460.79 it/sec) -training >> step=6819200, episode=1137 reward=0.7752576 (514.57 it/sec) -training >> step=6819300, episode=1137 reward=0.802236 (510.21 it/sec) -training >> step=6819400, episode=1137 reward=0.7881522 (520.67 it/sec) -training >> step=6819500, episode=1137 reward=0.789361 (476.87 it/sec) -training >> step=6819600, episode=1137 reward=0.7625293 (535.51 it/sec) -training >> step=6819700, episode=1137 reward=0.7957228 (467.06 it/sec) -training >> step=6819800, episode=1137 reward=0.7882238 (533.90 it/sec) -training >> step=6819900, episode=1137 reward=0.7773544 (543.21 it/sec) -training >> step=6820000, episode=1137 reward=0.8022804 (508.33 it/sec) -training >> step=6820100, episode=1137 reward=0.777755 (424.25 it/sec) -training >> step=6820200, episode=1137 reward=0.8018834 (472.92 it/sec) -training >> step=6820300, episode=1137 reward=0.7771354 (526.26 it/sec) -training >> step=6820400, episode=1137 reward=0.80471 (518.56 it/sec) -training >> step=6820500, episode=1137 reward=0.7786864 (489.13 it/sec) -training >> step=6820600, episode=1137 reward=0.8017159 (504.56 it/sec) -training >> step=6820700, episode=1137 reward=0.7992395 (499.30 it/sec) -training >> step=6820800, episode=1137 reward=0.8033231 (515.68 it/sec) -training >> step=6820900, episode=1137 reward=0.7685372 (510.76 it/sec) -training >> step=6821000, episode=1137 reward=0.7933092 (555.01 it/sec) -training >> step=6821100, episode=1137 reward=0.8061946 (484.58 it/sec) -training >> step=6821200, episode=1137 reward=0.7861025 (476.23 it/sec) -training >> step=6821300, episode=1138 reward=0.7628341 (74.17 it/sec) -training >> step=6821400, episode=1138 reward=0.7864848 (478.07 it/sec) -training >> step=6821500, episode=1138 reward=0.7809198 (481.67 it/sec) -training >> step=6821600, episode=1138 reward=0.8018699 (516.08 it/sec) -training >> step=6821700, episode=1138 reward=0.7598374 (464.46 it/sec) -training >> step=6821800, episode=1138 reward=0.7904499 (543.16 it/sec) -training >> step=6821900, episode=1138 reward=0.7850379 (555.21 it/sec) -training >> step=6822000, episode=1138 reward=0.7878072 (518.30 it/sec) -training >> step=6822100, episode=1138 reward=0.7820437 (498.70 it/sec) -training >> step=6822200, episode=1138 reward=0.7934901 (528.27 it/sec) -training >> step=6822300, episode=1138 reward=0.7821803 (511.89 it/sec) -training >> step=6822400, episode=1138 reward=0.8046695 (541.75 it/sec) -training >> step=6822500, episode=1138 reward=0.7930116 (523.97 it/sec) -training >> step=6822600, episode=1138 reward=0.7762998 (509.87 it/sec) -training >> step=6822700, episode=1138 reward=0.7781618 (503.56 it/sec) -training >> step=6822800, episode=1138 reward=0.7941147 (507.75 it/sec) -training >> step=6822900, episode=1138 reward=0.7800657 (541.37 it/sec) -training >> step=6823000, episode=1138 reward=0.7787653 (517.13 it/sec) -training >> step=6823100, episode=1138 reward=0.8076186 (534.26 it/sec) -training >> step=6823200, episode=1138 reward=0.8004493 (484.04 it/sec) -training >> step=6823300, episode=1138 reward=0.8127691 (485.12 it/sec) -training >> step=6823400, episode=1138 reward=0.788411 (528.84 it/sec) -training >> step=6823500, episode=1138 reward=0.7768834 (568.53 it/sec) -training >> step=6823600, episode=1138 reward=0.796373 (508.85 it/sec) -training >> step=6823700, episode=1138 reward=0.818371 (498.83 it/sec) -training >> step=6823800, episode=1138 reward=0.7891659 (355.52 it/sec) -training >> step=6823900, episode=1138 reward=0.7860584 (551.54 it/sec) -training >> step=6824000, episode=1138 reward=0.7814001 (539.66 it/sec) -training >> step=6824100, episode=1138 reward=0.7887695 (504.04 it/sec) -training >> step=6824200, episode=1138 reward=0.7838185 (527.37 it/sec) -training >> step=6824300, episode=1138 reward=0.7994515 (482.63 it/sec) -training >> step=6824400, episode=1138 reward=0.7817875 (524.30 it/sec) -training >> step=6824500, episode=1138 reward=0.7984461 (519.54 it/sec) -training >> step=6824600, episode=1138 reward=0.7752338 (533.86 it/sec) -training >> step=6824700, episode=1138 reward=0.7887911 (506.41 it/sec) -training >> step=6824800, episode=1138 reward=0.8135889 (467.05 it/sec) -training >> step=6824900, episode=1138 reward=0.7950489 (511.06 it/sec) -training >> step=6825000, episode=1138 reward=0.8011786 (529.64 it/sec) -training >> step=6825100, episode=1138 reward=0.7795622 (514.45 it/sec) -training >> step=6825200, episode=1138 reward=0.799585 (482.43 it/sec) -training >> step=6825300, episode=1138 reward=0.8110415 (496.22 it/sec) -training >> step=6825400, episode=1138 reward=0.7897288 (498.17 it/sec) -training >> step=6825500, episode=1138 reward=0.7890043 (530.26 it/sec) -training >> step=6825600, episode=1138 reward=0.7789516 (513.00 it/sec) -training >> step=6825700, episode=1138 reward=0.7816105 (497.04 it/sec) -training >> step=6825800, episode=1138 reward=0.8156337 (479.87 it/sec) -training >> step=6825900, episode=1138 reward=0.7854556 (505.02 it/sec) -training >> step=6826000, episode=1138 reward=0.782625 (543.02 it/sec) -training >> step=6826100, episode=1138 reward=0.7917891 (490.25 it/sec) -training >> step=6826200, episode=1138 reward=0.7690102 (461.59 it/sec) -training >> step=6826300, episode=1138 reward=0.8021696 (489.71 it/sec) -training >> step=6826400, episode=1138 reward=0.7611092 (537.34 it/sec) -training >> step=6826500, episode=1138 reward=0.7826077 (462.46 it/sec) -training >> step=6826600, episode=1138 reward=0.7742791 (475.46 it/sec) -training >> step=6826700, episode=1138 reward=0.7968701 (551.73 it/sec) -training >> step=6826800, episode=1138 reward=0.8082426 (475.16 it/sec) -training >> step=6826900, episode=1138 reward=0.7965574 (482.15 it/sec) -training >> step=6827000, episode=1138 reward=0.774821 (526.53 it/sec) -training >> step=6827100, episode=1138 reward=0.7896544 (571.21 it/sec) -training >> step=6827200, episode=1138 reward=0.7783037 (480.13 it/sec) -training >> step=6827300, episode=1139 reward=0.8089933 (135.30 it/sec) -training >> step=6827400, episode=1139 reward=0.7679629 (497.45 it/sec) -training >> step=6827500, episode=1139 reward=0.7905774 (519.86 it/sec) -training >> step=6827600, episode=1139 reward=0.8008008 (490.43 it/sec) -training >> step=6827700, episode=1139 reward=0.7759393 (499.49 it/sec) -training >> step=6827800, episode=1139 reward=0.7937277 (516.39 it/sec) -training >> step=6827900, episode=1139 reward=0.7851456 (486.69 it/sec) -training >> step=6828000, episode=1139 reward=0.7686817 (496.42 it/sec) -training >> step=6828100, episode=1139 reward=0.8048981 (469.53 it/sec) -training >> step=6828200, episode=1139 reward=0.7722839 (440.07 it/sec) -training >> step=6828300, episode=1139 reward=0.7859336 (513.99 it/sec) -training >> step=6828400, episode=1139 reward=0.7808133 (495.33 it/sec) -training >> step=6828500, episode=1139 reward=0.787841 (534.06 it/sec) -training >> step=6828600, episode=1139 reward=0.7846835 (489.74 it/sec) -training >> step=6828700, episode=1139 reward=0.7959678 (479.68 it/sec) -training >> step=6828800, episode=1139 reward=0.7903571 (524.52 it/sec) -training >> step=6828900, episode=1139 reward=0.7855411 (546.77 it/sec) -training >> step=6829000, episode=1139 reward=0.797617 (484.06 it/sec) -training >> step=6829100, episode=1139 reward=0.8081506 (495.51 it/sec) -training >> step=6829200, episode=1139 reward=0.805155 (512.64 it/sec) -training >> step=6829300, episode=1139 reward=0.7780618 (516.27 it/sec) -training >> step=6829400, episode=1139 reward=0.7847235 (478.62 it/sec) -training >> step=6829500, episode=1139 reward=0.7848223 (444.67 it/sec) -training >> step=6829600, episode=1139 reward=0.7915501 (517.83 it/sec) -training >> step=6829700, episode=1139 reward=0.7858867 (457.74 it/sec) -training >> step=6829800, episode=1139 reward=0.8097837 (380.93 it/sec) -training >> step=6829900, episode=1139 reward=0.7974179 (498.15 it/sec) -training >> step=6830000, episode=1139 reward=0.784732 (555.01 it/sec) -training >> step=6830100, episode=1139 reward=0.7899027 (498.62 it/sec) -training >> step=6830200, episode=1139 reward=0.7790763 (515.80 it/sec) -training >> step=6830300, episode=1139 reward=0.7835889 (521.08 it/sec) -training >> step=6830400, episode=1139 reward=0.7886126 (518.83 it/sec) -training >> step=6830500, episode=1139 reward=0.7750589 (492.40 it/sec) -training >> step=6830600, episode=1139 reward=0.7804638 (497.72 it/sec) -training >> step=6830700, episode=1139 reward=0.7880434 (502.37 it/sec) -training >> step=6830800, episode=1139 reward=0.7720754 (442.47 it/sec) -training >> step=6830900, episode=1139 reward=0.769876 (491.39 it/sec) -training >> step=6831000, episode=1139 reward=0.7872463 (505.99 it/sec) -training >> step=6831100, episode=1139 reward=0.7691396 (517.70 it/sec) -training >> step=6831200, episode=1139 reward=0.7690927 (451.71 it/sec) -training >> step=6831300, episode=1139 reward=0.7877993 (507.58 it/sec) -training >> step=6831400, episode=1139 reward=0.7833024 (475.49 it/sec) -training >> step=6831500, episode=1139 reward=0.8054313 (427.59 it/sec) -training >> step=6831600, episode=1139 reward=0.7840995 (416.27 it/sec) -training >> step=6831700, episode=1139 reward=0.7783272 (394.74 it/sec) -training >> step=6831800, episode=1139 reward=0.7984956 (394.47 it/sec) -training >> step=6831900, episode=1139 reward=0.7853961 (410.99 it/sec) -training >> step=6832000, episode=1139 reward=0.7983301 (483.35 it/sec) -training >> step=6832100, episode=1139 reward=0.790279 (490.34 it/sec) -training >> step=6832200, episode=1139 reward=0.7956597 (475.25 it/sec) -training >> step=6832300, episode=1139 reward=0.8110055 (478.28 it/sec) -training >> step=6832400, episode=1139 reward=0.7745169 (425.70 it/sec) -training >> step=6832500, episode=1139 reward=0.782191 (424.71 it/sec) -training >> step=6832600, episode=1139 reward=0.7802531 (417.13 it/sec) -training >> step=6832700, episode=1139 reward=0.7643491 (495.21 it/sec) -training >> step=6832800, episode=1139 reward=0.803975 (418.75 it/sec) -training >> step=6832900, episode=1139 reward=0.7774723 (503.48 it/sec) -training >> step=6833000, episode=1139 reward=0.7991034 (464.63 it/sec) -training >> step=6833100, episode=1139 reward=0.7855404 (483.35 it/sec) -training >> step=6833200, episode=1139 reward=0.7819954 (541.66 it/sec) -training >> step=6833300, episode=1140 reward=0.7731601 (56.30 it/sec) -training >> step=6833400, episode=1140 reward=0.7816398 (521.63 it/sec) -training >> step=6833500, episode=1140 reward=0.8050262 (521.56 it/sec) -training >> step=6833600, episode=1140 reward=0.7909018 (509.84 it/sec) -training >> step=6833700, episode=1140 reward=0.790645 (464.82 it/sec) -training >> step=6833800, episode=1140 reward=0.788407 (460.50 it/sec) -training >> step=6833900, episode=1140 reward=0.7939592 (501.64 it/sec) -training >> step=6834000, episode=1140 reward=0.7866687 (480.05 it/sec) -training >> step=6834100, episode=1140 reward=0.7775721 (486.13 it/sec) -training >> step=6834200, episode=1140 reward=0.7969233 (480.58 it/sec) -training >> step=6834300, episode=1140 reward=0.7839802 (413.45 it/sec) -training >> step=6834400, episode=1140 reward=0.7966861 (530.07 it/sec) -training >> step=6834500, episode=1140 reward=0.7728617 (493.39 it/sec) -training >> step=6834600, episode=1140 reward=0.7975971 (461.42 it/sec) -training >> step=6834700, episode=1140 reward=0.7736326 (451.32 it/sec) -training >> step=6834800, episode=1140 reward=0.7858853 (523.63 it/sec) -training >> step=6834900, episode=1140 reward=0.7744989 (480.11 it/sec) -training >> step=6835000, episode=1140 reward=0.7829891 (478.72 it/sec) -training >> step=6835100, episode=1140 reward=0.7822723 (452.81 it/sec) -training >> step=6835200, episode=1140 reward=0.7852213 (401.16 it/sec) -training >> step=6835300, episode=1140 reward=0.7700827 (502.86 it/sec) -training >> step=6835400, episode=1140 reward=0.7851478 (455.07 it/sec) -training >> step=6835500, episode=1140 reward=0.7923043 (500.18 it/sec) -training >> step=6835600, episode=1140 reward=0.8169625 (509.58 it/sec) -training >> step=6835700, episode=1140 reward=0.7722772 (448.10 it/sec) -training >> step=6835800, episode=1140 reward=0.7835444 (509.33 it/sec) -training >> step=6835900, episode=1140 reward=0.76298 (504.79 it/sec) -training >> step=6836000, episode=1140 reward=0.771894 (356.23 it/sec) -training >> step=6836100, episode=1140 reward=0.7917364 (504.03 it/sec) -training >> step=6836200, episode=1140 reward=0.7836888 (494.32 it/sec) -training >> step=6836300, episode=1140 reward=0.7824964 (481.76 it/sec) -training >> step=6836400, episode=1140 reward=0.7823218 (433.45 it/sec) -training >> step=6836500, episode=1140 reward=0.7864581 (434.80 it/sec) -training >> step=6836600, episode=1140 reward=0.7638855 (442.91 it/sec) -training >> step=6836700, episode=1140 reward=0.8075427 (450.50 it/sec) -training >> step=6836800, episode=1140 reward=0.7786449 (518.25 it/sec) -training >> step=6836900, episode=1140 reward=0.7870181 (479.40 it/sec) -training >> step=6837000, episode=1140 reward=0.7747083 (505.02 it/sec) -training >> step=6837100, episode=1140 reward=0.7979947 (479.24 it/sec) -training >> step=6837200, episode=1140 reward=0.7708976 (489.32 it/sec) -training >> step=6837300, episode=1140 reward=0.7917318 (520.94 it/sec) -training >> step=6837400, episode=1140 reward=0.748693 (499.44 it/sec) -training >> step=6837500, episode=1140 reward=0.7757263 (509.13 it/sec) -training >> step=6837600, episode=1140 reward=0.7971146 (499.67 it/sec) -training >> step=6837700, episode=1140 reward=0.775636 (467.27 it/sec) -training >> step=6837800, episode=1140 reward=0.7737131 (497.08 it/sec) -training >> step=6837900, episode=1140 reward=0.7907803 (475.34 it/sec) -training >> step=6838000, episode=1140 reward=0.782031 (489.36 it/sec) -training >> step=6838100, episode=1140 reward=0.7786476 (530.84 it/sec) -training >> step=6838200, episode=1140 reward=0.7774125 (490.53 it/sec) -training >> step=6838300, episode=1140 reward=0.7703816 (478.50 it/sec) -training >> step=6838400, episode=1140 reward=0.7794641 (522.36 it/sec) -training >> step=6838500, episode=1140 reward=0.779741 (503.44 it/sec) -training >> step=6838600, episode=1140 reward=0.7810925 (475.17 it/sec) -training >> step=6838700, episode=1140 reward=0.7793117 (459.45 it/sec) -training >> step=6838800, episode=1140 reward=0.8010181 (430.16 it/sec) -training >> step=6838900, episode=1140 reward=0.7969533 (426.48 it/sec) -training >> step=6839000, episode=1140 reward=0.7647675 (380.97 it/sec) -training >> step=6839100, episode=1140 reward=0.774547 (430.89 it/sec) -training >> step=6839200, episode=1140 reward=0.7921803 (453.55 it/sec) -training >> step=6839300, episode=1141 reward=0.7940767 (94.49 it/sec) -training >> step=6839400, episode=1141 reward=0.7765843 (457.36 it/sec) -training >> step=6839500, episode=1141 reward=0.7980159 (462.47 it/sec) -training >> step=6839600, episode=1141 reward=0.7847033 (395.40 it/sec) -training >> step=6839700, episode=1141 reward=0.805112 (429.67 it/sec) -training >> step=6839800, episode=1141 reward=0.7672627 (469.42 it/sec) -training >> step=6839900, episode=1141 reward=0.8052431 (457.30 it/sec) -training >> step=6840000, episode=1141 reward=0.7877201 (450.67 it/sec) -training >> step=6840100, episode=1141 reward=0.8061855 (493.81 it/sec) -training >> step=6840200, episode=1141 reward=0.7819793 (464.08 it/sec) -training >> step=6840300, episode=1141 reward=0.762471 (483.10 it/sec) -training >> step=6840400, episode=1141 reward=0.7727824 (470.01 it/sec) -training >> step=6840500, episode=1141 reward=0.7635493 (507.88 it/sec) -training >> step=6840600, episode=1141 reward=0.7853754 (452.95 it/sec) -training >> step=6840700, episode=1141 reward=0.8022668 (454.80 it/sec) -training >> step=6840800, episode=1141 reward=0.7812494 (504.68 it/sec) -training >> step=6840900, episode=1141 reward=0.7737139 (505.97 it/sec) -training >> step=6841000, episode=1141 reward=0.7774557 (461.91 it/sec) -training >> step=6841100, episode=1141 reward=0.7824736 (464.95 it/sec) -training >> step=6841200, episode=1141 reward=0.7944044 (473.85 it/sec) -training >> step=6841300, episode=1141 reward=0.7821457 (500.45 it/sec) -training >> step=6841400, episode=1141 reward=0.7883203 (456.96 it/sec) -training >> step=6841500, episode=1141 reward=0.7724859 (466.55 it/sec) -training >> step=6841600, episode=1141 reward=0.7930397 (471.38 it/sec) -training >> step=6841700, episode=1141 reward=0.7986832 (468.71 it/sec) -training >> step=6841800, episode=1141 reward=0.7930385 (474.39 it/sec) -training >> step=6841900, episode=1141 reward=0.7993848 (538.54 it/sec) -training >> step=6842000, episode=1141 reward=0.7989053 (496.77 it/sec) -training >> step=6842100, episode=1141 reward=0.7813736 (317.25 it/sec) -training >> step=6842200, episode=1141 reward=0.7957695 (498.01 it/sec) -training >> step=6842300, episode=1141 reward=0.7815163 (518.46 it/sec) -training >> step=6842400, episode=1141 reward=0.7904036 (497.13 it/sec) -training >> step=6842500, episode=1141 reward=0.812821 (464.44 it/sec) -training >> step=6842600, episode=1141 reward=0.7811118 (461.59 it/sec) -training >> step=6842700, episode=1141 reward=0.7813562 (463.98 it/sec) -training >> step=6842800, episode=1141 reward=0.7869701 (483.21 it/sec) -training >> step=6842900, episode=1141 reward=0.7968456 (448.07 it/sec) -training >> step=6843000, episode=1141 reward=0.7677897 (416.41 it/sec) -training >> step=6843100, episode=1141 reward=0.7725145 (493.26 it/sec) -training >> step=6843200, episode=1141 reward=0.790127 (479.16 it/sec) -training >> step=6843300, episode=1141 reward=0.7746843 (428.01 it/sec) -training >> step=6843400, episode=1141 reward=0.76759 (494.30 it/sec) -training >> step=6843500, episode=1141 reward=0.7732978 (455.00 it/sec) -training >> step=6843600, episode=1141 reward=0.7737972 (491.59 it/sec) -training >> step=6843700, episode=1141 reward=0.7834834 (479.23 it/sec) -training >> step=6843800, episode=1141 reward=0.7744203 (450.96 it/sec) -training >> step=6843900, episode=1141 reward=0.769537 (477.39 it/sec) -training >> step=6844000, episode=1141 reward=0.7896541 (431.92 it/sec) -training >> step=6844100, episode=1141 reward=0.7803968 (471.07 it/sec) -training >> step=6844200, episode=1141 reward=0.7815149 (428.86 it/sec) -training >> step=6844300, episode=1141 reward=0.7669294 (475.81 it/sec) -training >> step=6844400, episode=1141 reward=0.766966 (496.70 it/sec) -training >> step=6844500, episode=1141 reward=0.7840728 (488.35 it/sec) -training >> step=6844600, episode=1141 reward=0.7964907 (483.40 it/sec) -training >> step=6844700, episode=1141 reward=0.793013 (528.85 it/sec) -training >> step=6844800, episode=1141 reward=0.7957275 (491.64 it/sec) -training >> step=6844900, episode=1141 reward=0.777501 (422.15 it/sec) -training >> step=6845000, episode=1141 reward=0.7831305 (494.44 it/sec) -training >> step=6845100, episode=1141 reward=0.7842308 (496.72 it/sec) -training >> step=6845200, episode=1141 reward=0.7928568 (501.58 it/sec) -training >> step=6845300, episode=1142 reward=0.7572482 (87.45 it/sec) -training >> step=6845400, episode=1142 reward=0.7845031 (509.40 it/sec) -training >> step=6845500, episode=1142 reward=0.7877185 (485.29 it/sec) -training >> step=6845600, episode=1142 reward=0.7775912 (487.65 it/sec) -training >> step=6845700, episode=1142 reward=0.7680172 (480.73 it/sec) -training >> step=6845800, episode=1142 reward=0.7943897 (513.73 it/sec) -training >> step=6845900, episode=1142 reward=0.7755568 (483.43 it/sec) -training >> step=6846000, episode=1142 reward=0.788151 (474.10 it/sec) -training >> step=6846100, episode=1142 reward=0.803643 (522.90 it/sec) -training >> step=6846200, episode=1142 reward=0.7660289 (494.78 it/sec) -training >> step=6846300, episode=1142 reward=0.7696281 (491.70 it/sec) -training >> step=6846400, episode=1142 reward=0.7823254 (490.91 it/sec) -training >> step=6846500, episode=1142 reward=0.7769084 (484.05 it/sec) -training >> step=6846600, episode=1142 reward=0.7764834 (520.96 it/sec) -training >> step=6846700, episode=1142 reward=0.7870067 (466.00 it/sec) -training >> step=6846800, episode=1142 reward=0.7930141 (479.90 it/sec) -training >> step=6846900, episode=1142 reward=0.7762537 (502.05 it/sec) -training >> step=6847000, episode=1142 reward=0.7938679 (464.84 it/sec) -training >> step=6847100, episode=1142 reward=0.7677711 (485.97 it/sec) -training >> step=6847200, episode=1142 reward=0.784497 (509.30 it/sec) -training >> step=6847300, episode=1142 reward=0.791697 (480.00 it/sec) -training >> step=6847400, episode=1142 reward=0.7602212 (472.59 it/sec) -training >> step=6847500, episode=1142 reward=0.7815499 (469.01 it/sec) -training >> step=6847600, episode=1142 reward=0.7858704 (522.07 it/sec) -training >> step=6847700, episode=1142 reward=0.7988184 (473.30 it/sec) -training >> step=6847800, episode=1142 reward=0.7849328 (447.18 it/sec) -training >> step=6847900, episode=1142 reward=0.7796211 (520.85 it/sec) -training >> step=6848000, episode=1142 reward=0.8029535 (462.84 it/sec) -training >> step=6848100, episode=1142 reward=0.81423 (488.78 it/sec) -training >> step=6848200, episode=1142 reward=0.771881 (474.70 it/sec) -training >> step=6848300, episode=1142 reward=0.7935734 (394.31 it/sec) -training >> step=6848400, episode=1142 reward=0.7830529 (472.42 it/sec) -training >> step=6848500, episode=1142 reward=0.7886475 (461.26 it/sec) -training >> step=6848600, episode=1142 reward=0.8019751 (452.49 it/sec) -training >> step=6848700, episode=1142 reward=0.7863236 (491.20 it/sec) -training >> step=6848800, episode=1142 reward=0.7921713 (499.99 it/sec) -training >> step=6848900, episode=1142 reward=0.7751247 (492.67 it/sec) -training >> step=6849000, episode=1142 reward=0.7957675 (508.86 it/sec) -training >> step=6849100, episode=1142 reward=0.7944707 (479.90 it/sec) -training >> step=6849200, episode=1142 reward=0.8079048 (476.89 it/sec) -training >> step=6849300, episode=1142 reward=0.7862496 (455.97 it/sec) -training >> step=6849400, episode=1142 reward=0.7832809 (492.60 it/sec) -training >> step=6849500, episode=1142 reward=0.78224 (491.57 it/sec) -training >> step=6849600, episode=1142 reward=0.7886143 (505.30 it/sec) -training >> step=6849700, episode=1142 reward=0.7775324 (498.76 it/sec) -training >> step=6849800, episode=1142 reward=0.7774813 (509.08 it/sec) -training >> step=6849900, episode=1142 reward=0.8015695 (494.27 it/sec) -training >> step=6850000, episode=1142 reward=0.7846275 (495.73 it/sec) -training >> step=6850100, episode=1142 reward=0.7952051 (510.50 it/sec) -training >> step=6850200, episode=1142 reward=0.7617484 (483.16 it/sec) -training >> step=6850300, episode=1142 reward=0.763166 (493.87 it/sec) -training >> step=6850400, episode=1142 reward=0.7860469 (496.34 it/sec) -training >> step=6850500, episode=1142 reward=0.7743661 (437.05 it/sec) -training >> step=6850600, episode=1142 reward=0.7719954 (496.37 it/sec) -training >> step=6850700, episode=1142 reward=0.7871293 (490.74 it/sec) -training >> step=6850800, episode=1142 reward=0.7899616 (504.51 it/sec) -training >> step=6850900, episode=1142 reward=0.7822972 (473.96 it/sec) -training >> step=6851000, episode=1142 reward=0.7854639 (457.66 it/sec) -training >> step=6851100, episode=1142 reward=0.7919277 (466.98 it/sec) -training >> step=6851200, episode=1142 reward=0.7964348 (496.68 it/sec) -training >> step=6851300, episode=1143 reward=0.7574408 (95.01 it/sec) -training >> step=6851400, episode=1143 reward=0.789453 (490.67 it/sec) -training >> step=6851500, episode=1143 reward=0.7779871 (499.58 it/sec) -training >> step=6851600, episode=1143 reward=0.8058046 (492.55 it/sec) -training >> step=6851700, episode=1143 reward=0.7884976 (482.69 it/sec) -training >> step=6851800, episode=1143 reward=0.8005857 (473.63 it/sec) -training >> step=6851900, episode=1143 reward=0.7861452 (463.54 it/sec) -training >> step=6852000, episode=1143 reward=0.7616835 (447.77 it/sec) -training >> step=6852100, episode=1143 reward=0.8003111 (492.47 it/sec) -training >> step=6852200, episode=1143 reward=0.7961305 (516.46 it/sec) -training >> step=6852300, episode=1143 reward=0.8009565 (527.14 it/sec) -training >> step=6852400, episode=1143 reward=0.7812574 (514.42 it/sec) -training >> step=6852500, episode=1143 reward=0.8038772 (549.00 it/sec) -training >> step=6852600, episode=1143 reward=0.7985969 (480.29 it/sec) -training >> step=6852700, episode=1143 reward=0.7628855 (513.34 it/sec) -training >> step=6852800, episode=1143 reward=0.7837462 (519.88 it/sec) -training >> step=6852900, episode=1143 reward=0.7802271 (516.94 it/sec) -training >> step=6853000, episode=1143 reward=0.789533 (508.58 it/sec) -training >> step=6853100, episode=1143 reward=0.7878422 (459.64 it/sec) -training >> step=6853200, episode=1143 reward=0.7679366 (476.49 it/sec) -training >> step=6853300, episode=1143 reward=0.797867 (495.17 it/sec) -training >> step=6853400, episode=1143 reward=0.802051 (492.53 it/sec) -training >> step=6853500, episode=1143 reward=0.802238 (462.91 it/sec) -training >> step=6853600, episode=1143 reward=0.778685 (511.19 it/sec) -training >> step=6853700, episode=1143 reward=0.7816222 (476.46 it/sec) -training >> step=6853800, episode=1143 reward=0.8208424 (460.21 it/sec) -training >> step=6853900, episode=1143 reward=0.7822616 (478.34 it/sec) -training >> step=6854000, episode=1143 reward=0.797942 (498.35 it/sec) -training >> step=6854100, episode=1143 reward=0.7832373 (498.87 it/sec) -training >> step=6854200, episode=1143 reward=0.7991517 (480.11 it/sec) -training >> step=6854300, episode=1143 reward=0.8077241 (498.85 it/sec) -training >> step=6854400, episode=1143 reward=0.8057306 (365.95 it/sec) -training >> step=6854500, episode=1143 reward=0.7985843 (474.22 it/sec) -training >> step=6854600, episode=1143 reward=0.7623051 (491.39 it/sec) -training >> step=6854700, episode=1143 reward=0.8007483 (485.81 it/sec) -training >> step=6854800, episode=1143 reward=0.7957314 (519.94 it/sec) -training >> step=6854900, episode=1143 reward=0.7916647 (461.93 it/sec) -training >> step=6855000, episode=1143 reward=0.7954081 (473.86 it/sec) -training >> step=6855100, episode=1143 reward=0.8017182 (468.89 it/sec) -training >> step=6855200, episode=1143 reward=0.7813247 (480.06 it/sec) -training >> step=6855300, episode=1143 reward=0.773219 (466.93 it/sec) -training >> step=6855400, episode=1143 reward=0.7814276 (490.29 it/sec) -training >> step=6855500, episode=1143 reward=0.793866 (452.95 it/sec) -training >> step=6855600, episode=1143 reward=0.7893001 (441.20 it/sec) -training >> step=6855700, episode=1143 reward=0.7715018 (437.61 it/sec) -training >> step=6855800, episode=1143 reward=0.8010858 (443.21 it/sec) -training >> step=6855900, episode=1143 reward=0.7800288 (455.19 it/sec) -training >> step=6856000, episode=1143 reward=0.8108087 (441.34 it/sec) -training >> step=6856100, episode=1143 reward=0.7649025 (472.40 it/sec) -training >> step=6856200, episode=1143 reward=0.7809666 (424.03 it/sec) -training >> step=6856300, episode=1143 reward=0.7773696 (459.18 it/sec) -training >> step=6856400, episode=1143 reward=0.7640315 (405.74 it/sec) -training >> step=6856500, episode=1143 reward=0.787549 (488.79 it/sec) -training >> step=6856600, episode=1143 reward=0.7850813 (470.98 it/sec) -training >> step=6856700, episode=1143 reward=0.7649603 (460.37 it/sec) -training >> step=6856800, episode=1143 reward=0.7805881 (458.63 it/sec) -training >> step=6856900, episode=1143 reward=0.7866142 (444.62 it/sec) -training >> step=6857000, episode=1143 reward=0.7897821 (484.19 it/sec) -training >> step=6857100, episode=1143 reward=0.7783429 (450.80 it/sec) -training >> step=6857200, episode=1143 reward=0.8023676 (488.65 it/sec) -training >> step=6857300, episode=1144 reward=0.7944483 (94.17 it/sec) -training >> step=6857400, episode=1144 reward=0.7751114 (456.30 it/sec) -training >> step=6857500, episode=1144 reward=0.8002173 (441.92 it/sec) -training >> step=6857600, episode=1144 reward=0.7844198 (491.08 it/sec) -training >> step=6857700, episode=1144 reward=0.7840897 (492.11 it/sec) -training >> step=6857800, episode=1144 reward=0.7792255 (480.76 it/sec) -training >> step=6857900, episode=1144 reward=0.7900382 (482.44 it/sec) -training >> step=6858000, episode=1144 reward=0.7889337 (475.86 it/sec) -training >> step=6858100, episode=1144 reward=0.7811233 (500.67 it/sec) -training >> step=6858200, episode=1144 reward=0.7768672 (505.32 it/sec) -training >> step=6858300, episode=1144 reward=0.7723303 (486.92 it/sec) -training >> step=6858400, episode=1144 reward=0.7920048 (450.45 it/sec) -training >> step=6858500, episode=1144 reward=0.7911808 (532.26 it/sec) -training >> step=6858600, episode=1144 reward=0.7792835 (496.55 it/sec) -training >> step=6858700, episode=1144 reward=0.7804956 (496.99 it/sec) -training >> step=6858800, episode=1144 reward=0.7921863 (528.49 it/sec) -training >> step=6858900, episode=1144 reward=0.7801932 (514.94 it/sec) -training >> step=6859000, episode=1144 reward=0.7851504 (491.10 it/sec) -training >> step=6859100, episode=1144 reward=0.7801878 (455.14 it/sec) -training >> step=6859200, episode=1144 reward=0.7833221 (510.19 it/sec) -training >> step=6859300, episode=1144 reward=0.780717 (457.95 it/sec) -training >> step=6859400, episode=1144 reward=0.7975049 (477.86 it/sec) -training >> step=6859500, episode=1144 reward=0.7846039 (474.49 it/sec) -training >> step=6859600, episode=1144 reward=0.773088 (506.16 it/sec) -training >> step=6859700, episode=1144 reward=0.794674 (485.15 it/sec) -training >> step=6859800, episode=1144 reward=0.7786017 (512.00 it/sec) -training >> step=6859900, episode=1144 reward=0.8192716 (452.59 it/sec) -training >> step=6860000, episode=1144 reward=0.7824503 (506.73 it/sec) -training >> step=6860100, episode=1144 reward=0.788048 (487.78 it/sec) -training >> step=6860200, episode=1144 reward=0.8016715 (491.01 it/sec) -training >> step=6860300, episode=1144 reward=0.7797233 (489.24 it/sec) -training >> step=6860400, episode=1144 reward=0.780677 (453.12 it/sec) -training >> step=6860500, episode=1144 reward=0.7920916 (486.91 it/sec) -training >> step=6860600, episode=1144 reward=0.7831239 (375.65 it/sec) -training >> step=6860700, episode=1144 reward=0.7867187 (525.55 it/sec) -training >> step=6860800, episode=1144 reward=0.7908436 (521.00 it/sec) -training >> step=6860900, episode=1144 reward=0.785632 (470.80 it/sec) -training >> step=6861000, episode=1144 reward=0.7811347 (544.66 it/sec) -training >> step=6861100, episode=1144 reward=0.7948754 (505.19 it/sec) -training >> step=6861200, episode=1144 reward=0.7778027 (508.41 it/sec) -training >> step=6861300, episode=1144 reward=0.7742767 (523.34 it/sec) -training >> step=6861400, episode=1144 reward=0.7906525 (558.37 it/sec) -training >> step=6861500, episode=1144 reward=0.782679 (469.63 it/sec) -training >> step=6861600, episode=1144 reward=0.779407 (504.91 it/sec) -training >> step=6861700, episode=1144 reward=0.7732888 (507.83 it/sec) -training >> step=6861800, episode=1144 reward=0.7913261 (533.62 it/sec) -training >> step=6861900, episode=1144 reward=0.7745881 (492.86 it/sec) -training >> step=6862000, episode=1144 reward=0.7972655 (481.88 it/sec) -training >> step=6862100, episode=1144 reward=0.7933992 (507.46 it/sec) -training >> step=6862200, episode=1144 reward=0.755986 (505.53 it/sec) -training >> step=6862300, episode=1144 reward=0.7459986 (421.10 it/sec) -training >> step=6862400, episode=1144 reward=0.7785646 (486.01 it/sec) -training >> step=6862500, episode=1144 reward=0.7778528 (475.64 it/sec) -training >> step=6862600, episode=1144 reward=0.7984781 (481.26 it/sec) -training >> step=6862700, episode=1144 reward=0.7647818 (506.30 it/sec) -training >> step=6862800, episode=1144 reward=0.7597541 (511.14 it/sec) -training >> step=6862900, episode=1144 reward=0.7783766 (521.27 it/sec) -training >> step=6863000, episode=1144 reward=0.8042809 (492.86 it/sec) -training >> step=6863100, episode=1144 reward=0.7777927 (513.97 it/sec) -training >> step=6863200, episode=1144 reward=0.800269 (490.82 it/sec) -training >> step=6863300, episode=1145 reward=0.7919604 (98.10 it/sec) -training >> step=6863400, episode=1145 reward=0.775837 (435.71 it/sec) -training >> step=6863500, episode=1145 reward=0.7639975 (497.68 it/sec) -training >> step=6863600, episode=1145 reward=0.8006452 (500.06 it/sec) -training >> step=6863700, episode=1145 reward=0.7988821 (491.32 it/sec) -training >> step=6863800, episode=1145 reward=0.7831776 (511.97 it/sec) -training >> step=6863900, episode=1145 reward=0.7969616 (521.32 it/sec) -training >> step=6864000, episode=1145 reward=0.7855977 (510.58 it/sec) -training >> step=6864100, episode=1145 reward=0.7915545 (484.04 it/sec) -training >> step=6864200, episode=1145 reward=0.7936046 (475.68 it/sec) -training >> step=6864300, episode=1145 reward=0.7922773 (511.55 it/sec) -training >> step=6864400, episode=1145 reward=0.8005612 (477.70 it/sec) -training >> step=6864500, episode=1145 reward=0.7891521 (495.52 it/sec) -training >> step=6864600, episode=1145 reward=0.8176715 (455.78 it/sec) -training >> step=6864700, episode=1145 reward=0.7769787 (500.29 it/sec) -training >> step=6864800, episode=1145 reward=0.7859089 (500.83 it/sec) -training >> step=6864900, episode=1145 reward=0.7636741 (492.56 it/sec) -training >> step=6865000, episode=1145 reward=0.7834762 (546.23 it/sec) -training >> step=6865100, episode=1145 reward=0.789772 (506.97 it/sec) -training >> step=6865200, episode=1145 reward=0.7712402 (510.79 it/sec) -training >> step=6865300, episode=1145 reward=0.7899012 (503.60 it/sec) -training >> step=6865400, episode=1145 reward=0.7676971 (502.92 it/sec) -training >> step=6865500, episode=1145 reward=0.7736689 (503.34 it/sec) -training >> step=6865600, episode=1145 reward=0.8006358 (513.35 it/sec) -training >> step=6865700, episode=1145 reward=0.7972764 (507.87 it/sec) -training >> step=6865800, episode=1145 reward=0.7927886 (544.57 it/sec) -training >> step=6865900, episode=1145 reward=0.7919282 (508.84 it/sec) -training >> step=6866000, episode=1145 reward=0.7900704 (504.79 it/sec) -training >> step=6866100, episode=1145 reward=0.7765428 (510.99 it/sec) -training >> step=6866200, episode=1145 reward=0.8068511 (496.11 it/sec) -training >> step=6866300, episode=1145 reward=0.7887337 (505.84 it/sec) -training >> step=6866400, episode=1145 reward=0.7706209 (506.83 it/sec) -training >> step=6866500, episode=1145 reward=0.799648 (392.37 it/sec) -training >> step=6866600, episode=1145 reward=0.7883794 (470.73 it/sec) -training >> step=6866700, episode=1145 reward=0.7837366 (502.40 it/sec) -training >> step=6866800, episode=1145 reward=0.7884551 (487.77 it/sec) -training >> step=6866900, episode=1145 reward=0.790197 (478.19 it/sec) -training >> step=6867000, episode=1145 reward=0.767607 (495.59 it/sec) -training >> step=6867100, episode=1145 reward=0.7608046 (475.45 it/sec) -training >> step=6867200, episode=1145 reward=0.8009641 (489.44 it/sec) -training >> step=6867300, episode=1145 reward=0.7830018 (484.07 it/sec) -training >> step=6867400, episode=1145 reward=0.7808717 (481.95 it/sec) -training >> step=6867500, episode=1145 reward=0.7914899 (526.05 it/sec) -training >> step=6867600, episode=1145 reward=0.7878342 (507.88 it/sec) -training >> step=6867700, episode=1145 reward=0.781441 (487.46 it/sec) -training >> step=6867800, episode=1145 reward=0.7766597 (521.50 it/sec) -training >> step=6867900, episode=1145 reward=0.7758707 (527.03 it/sec) -training >> step=6868000, episode=1145 reward=0.7522378 (541.80 it/sec) -training >> step=6868100, episode=1145 reward=0.7754006 (502.46 it/sec) -training >> step=6868200, episode=1145 reward=0.7717304 (467.68 it/sec) -training >> step=6868300, episode=1145 reward=0.7682599 (508.78 it/sec) -training >> step=6868400, episode=1145 reward=0.7753335 (497.55 it/sec) -training >> step=6868500, episode=1145 reward=0.7932584 (520.60 it/sec) -training >> step=6868600, episode=1145 reward=0.7927012 (481.67 it/sec) -training >> step=6868700, episode=1145 reward=0.7958372 (524.47 it/sec) -training >> step=6868800, episode=1145 reward=0.782025 (487.48 it/sec) -training >> step=6868900, episode=1145 reward=0.7709348 (500.40 it/sec) -training >> step=6869000, episode=1145 reward=0.7951111 (530.80 it/sec) -training >> step=6869100, episode=1145 reward=0.7829586 (516.91 it/sec) -training >> step=6869200, episode=1145 reward=0.7627254 (483.08 it/sec) -training >> step=6869300, episode=1146 reward=0.7854226 (111.94 it/sec) -training >> step=6869400, episode=1146 reward=0.767677 (362.85 it/sec) -training >> step=6869500, episode=1146 reward=0.8043834 (490.04 it/sec) -training >> step=6869600, episode=1146 reward=0.7929505 (476.00 it/sec) -training >> step=6869700, episode=1146 reward=0.7690671 (491.09 it/sec) -training >> step=6869800, episode=1146 reward=0.7704318 (473.68 it/sec) -training >> step=6869900, episode=1146 reward=0.7724463 (466.21 it/sec) -training >> step=6870000, episode=1146 reward=0.7810985 (493.71 it/sec) -training >> step=6870100, episode=1146 reward=0.8054413 (548.45 it/sec) -training >> step=6870200, episode=1146 reward=0.7980212 (520.11 it/sec) -training >> step=6870300, episode=1146 reward=0.7815284 (517.59 it/sec) -training >> step=6870400, episode=1146 reward=0.7870995 (497.94 it/sec) -training >> step=6870500, episode=1146 reward=0.7799818 (517.04 it/sec) -training >> step=6870600, episode=1146 reward=0.7839887 (490.63 it/sec) -training >> step=6870700, episode=1146 reward=0.8050143 (496.55 it/sec) -training >> step=6870800, episode=1146 reward=0.7953206 (435.64 it/sec) -training >> step=6870900, episode=1146 reward=0.7823052 (499.79 it/sec) -training >> step=6871000, episode=1146 reward=0.7873849 (427.19 it/sec) -training >> step=6871100, episode=1146 reward=0.7939878 (456.47 it/sec) -training >> step=6871200, episode=1146 reward=0.7871369 (490.35 it/sec) -training >> step=6871300, episode=1146 reward=0.7894796 (502.45 it/sec) -training >> step=6871400, episode=1146 reward=0.8033265 (469.65 it/sec) -training >> step=6871500, episode=1146 reward=0.7922169 (474.31 it/sec) -training >> step=6871600, episode=1146 reward=0.7935762 (521.65 it/sec) -training >> step=6871700, episode=1146 reward=0.7767035 (478.95 it/sec) -training >> step=6871800, episode=1146 reward=0.8105708 (488.43 it/sec) -training >> step=6871900, episode=1146 reward=0.7484472 (483.32 it/sec) -training >> step=6872000, episode=1146 reward=0.7861447 (512.27 it/sec) -training >> step=6872100, episode=1146 reward=0.7957346 (471.25 it/sec) -training >> step=6872200, episode=1146 reward=0.7933446 (419.18 it/sec) -training >> step=6872300, episode=1146 reward=0.7840918 (487.09 it/sec) -training >> step=6872400, episode=1146 reward=0.7642153 (450.04 it/sec) -training >> step=6872500, episode=1146 reward=0.7702026 (471.28 it/sec) -training >> step=6872600, episode=1146 reward=0.8026146 (364.67 it/sec) -training >> step=6872700, episode=1146 reward=0.7933754 (517.84 it/sec) -training >> step=6872800, episode=1146 reward=0.7854591 (467.24 it/sec) -training >> step=6872900, episode=1146 reward=0.7884678 (467.50 it/sec) -training >> step=6873000, episode=1146 reward=0.7704013 (526.81 it/sec) -training >> step=6873100, episode=1146 reward=0.8039411 (504.73 it/sec) -training >> step=6873200, episode=1146 reward=0.7884248 (431.54 it/sec) -training >> step=6873300, episode=1146 reward=0.7939587 (464.18 it/sec) -training >> step=6873400, episode=1146 reward=0.7795872 (531.43 it/sec) -training >> step=6873500, episode=1146 reward=0.7886497 (488.98 it/sec) -training >> step=6873600, episode=1146 reward=0.7664591 (471.76 it/sec) -training >> step=6873700, episode=1146 reward=0.8001755 (485.10 it/sec) -training >> step=6873800, episode=1146 reward=0.7821386 (508.05 it/sec) -training >> step=6873900, episode=1146 reward=0.791267 (464.70 it/sec) -training >> step=6874000, episode=1146 reward=0.7744806 (473.30 it/sec) -training >> step=6874100, episode=1146 reward=0.7881537 (498.25 it/sec) -training >> step=6874200, episode=1146 reward=0.7804238 (499.15 it/sec) -training >> step=6874300, episode=1146 reward=0.7844248 (469.36 it/sec) -training >> step=6874400, episode=1146 reward=0.7589315 (434.25 it/sec) -training >> step=6874500, episode=1146 reward=0.7824355 (513.28 it/sec) -training >> step=6874600, episode=1146 reward=0.77382 (475.67 it/sec) -training >> step=6874700, episode=1146 reward=0.7649807 (445.06 it/sec) -training >> step=6874800, episode=1146 reward=0.7816684 (488.89 it/sec) -training >> step=6874900, episode=1146 reward=0.7832411 (501.56 it/sec) -training >> step=6875000, episode=1146 reward=0.7845525 (473.94 it/sec) -training >> step=6875100, episode=1146 reward=0.7800766 (490.02 it/sec) -training >> step=6875200, episode=1146 reward=0.7749186 (462.05 it/sec) -training >> step=6875300, episode=1147 reward=0.8067594 (89.90 it/sec) -training >> step=6875400, episode=1147 reward=0.7774336 (443.28 it/sec) -training >> step=6875500, episode=1147 reward=0.7946528 (482.04 it/sec) -training >> step=6875600, episode=1147 reward=0.7872625 (468.75 it/sec) -training >> step=6875700, episode=1147 reward=0.7855408 (504.13 it/sec) -training >> step=6875800, episode=1147 reward=0.8031005 (458.78 it/sec) -training >> step=6875900, episode=1147 reward=0.781648 (421.53 it/sec) -training >> step=6876000, episode=1147 reward=0.7867066 (509.30 it/sec) -training >> step=6876100, episode=1147 reward=0.7738377 (449.73 it/sec) -training >> step=6876200, episode=1147 reward=0.8023024 (485.18 it/sec) -training >> step=6876300, episode=1147 reward=0.7695698 (513.34 it/sec) -training >> step=6876400, episode=1147 reward=0.7954516 (459.34 it/sec) -training >> step=6876500, episode=1147 reward=0.7839416 (488.46 it/sec) -training >> step=6876600, episode=1147 reward=0.7920901 (480.46 it/sec) -training >> step=6876700, episode=1147 reward=0.7928782 (475.10 it/sec) -training >> step=6876800, episode=1147 reward=0.7893285 (467.39 it/sec) -training >> step=6876900, episode=1147 reward=0.7884038 (461.01 it/sec) -training >> step=6877000, episode=1147 reward=0.7860162 (479.51 it/sec) -training >> step=6877100, episode=1147 reward=0.7914315 (531.00 it/sec) -training >> step=6877200, episode=1147 reward=0.7882861 (437.40 it/sec) -training >> step=6877300, episode=1147 reward=0.7788438 (511.14 it/sec) -training >> step=6877400, episode=1147 reward=0.7966385 (506.01 it/sec) -training >> step=6877500, episode=1147 reward=0.7892745 (473.46 it/sec) -training >> step=6877600, episode=1147 reward=0.798998 (515.04 it/sec) -training >> step=6877700, episode=1147 reward=0.8028581 (503.31 it/sec) -training >> step=6877800, episode=1147 reward=0.7808228 (510.99 it/sec) -training >> step=6877900, episode=1147 reward=0.789079 (495.02 it/sec) -training >> step=6878000, episode=1147 reward=0.7858556 (524.01 it/sec) -training >> step=6878100, episode=1147 reward=0.7873639 (465.30 it/sec) -training >> step=6878200, episode=1147 reward=0.7866822 (456.38 it/sec) -training >> step=6878300, episode=1147 reward=0.7920691 (465.88 it/sec) -training >> step=6878400, episode=1147 reward=0.7835187 (443.90 it/sec) -training >> step=6878500, episode=1147 reward=0.7894492 (472.33 it/sec) -training >> step=6878600, episode=1147 reward=0.7868799 (524.64 it/sec) -training >> step=6878700, episode=1147 reward=0.7763063 (475.68 it/sec) -training >> step=6878800, episode=1147 reward=0.7933285 (362.76 it/sec) -training >> step=6878900, episode=1147 reward=0.7753243 (534.89 it/sec) -training >> step=6879000, episode=1147 reward=0.7806529 (522.71 it/sec) -training >> step=6879100, episode=1147 reward=0.7762861 (509.26 it/sec) -training >> step=6879200, episode=1147 reward=0.7862214 (491.06 it/sec) -training >> step=6879300, episode=1147 reward=0.7991617 (527.37 it/sec) -training >> step=6879400, episode=1147 reward=0.7898394 (458.85 it/sec) -training >> step=6879500, episode=1147 reward=0.7880387 (510.11 it/sec) -training >> step=6879600, episode=1147 reward=0.8019888 (508.02 it/sec) -training >> step=6879700, episode=1147 reward=0.7816946 (503.43 it/sec) -training >> step=6879800, episode=1147 reward=0.7882648 (507.58 it/sec) -training >> step=6879900, episode=1147 reward=0.8061752 (504.75 it/sec) -training >> step=6880000, episode=1147 reward=0.7799088 (487.17 it/sec) -training >> step=6880100, episode=1147 reward=0.7734043 (482.36 it/sec) -training >> step=6880200, episode=1147 reward=0.7864653 (452.42 it/sec) -training >> step=6880300, episode=1147 reward=0.7768415 (522.29 it/sec) -training >> step=6880400, episode=1147 reward=0.7978692 (494.72 it/sec) -training >> step=6880500, episode=1147 reward=0.7936891 (451.19 it/sec) -training >> step=6880600, episode=1147 reward=0.7582824 (442.60 it/sec) -training >> step=6880700, episode=1147 reward=0.7665924 (469.87 it/sec) -training >> step=6880800, episode=1147 reward=0.7804868 (509.95 it/sec) -training >> step=6880900, episode=1147 reward=0.7763639 (509.33 it/sec) -training >> step=6881000, episode=1147 reward=0.7640765 (534.66 it/sec) -training >> step=6881100, episode=1147 reward=0.7876033 (519.62 it/sec) -training >> step=6881200, episode=1147 reward=0.7610473 (506.81 it/sec) -training >> step=6881300, episode=1148 reward=0.769029 (97.21 it/sec) -training >> step=6881400, episode=1148 reward=0.7893003 (498.38 it/sec) -training >> step=6881500, episode=1148 reward=0.7886544 (531.39 it/sec) -training >> step=6881600, episode=1148 reward=0.786303 (523.04 it/sec) -training >> step=6881700, episode=1148 reward=0.7869992 (503.48 it/sec) -training >> step=6881800, episode=1148 reward=0.7780967 (533.86 it/sec) -training >> step=6881900, episode=1148 reward=0.7948114 (510.44 it/sec) -training >> step=6882000, episode=1148 reward=0.7855226 (522.02 it/sec) -training >> step=6882100, episode=1148 reward=0.8023878 (516.17 it/sec) -training >> step=6882200, episode=1148 reward=0.7939226 (528.72 it/sec) -training >> step=6882300, episode=1148 reward=0.7626119 (496.28 it/sec) -training >> step=6882400, episode=1148 reward=0.7952673 (520.60 it/sec) -training >> step=6882500, episode=1148 reward=0.7919163 (537.42 it/sec) -training >> step=6882600, episode=1148 reward=0.7955393 (479.13 it/sec) -training >> step=6882700, episode=1148 reward=0.7907587 (522.34 it/sec) -training >> step=6882800, episode=1148 reward=0.7893494 (539.63 it/sec) -training >> step=6882900, episode=1148 reward=0.7823095 (499.89 it/sec) -training >> step=6883000, episode=1148 reward=0.8108492 (515.13 it/sec) -training >> step=6883100, episode=1148 reward=0.7923096 (453.40 it/sec) -training >> step=6883200, episode=1148 reward=0.7810714 (495.07 it/sec) -training >> step=6883300, episode=1148 reward=0.7706704 (472.64 it/sec) -training >> step=6883400, episode=1148 reward=0.8141997 (504.06 it/sec) -training >> step=6883500, episode=1148 reward=0.7941514 (529.83 it/sec) -training >> step=6883600, episode=1148 reward=0.8010573 (481.05 it/sec) -training >> step=6883700, episode=1148 reward=0.7870816 (505.66 it/sec) -training >> step=6883800, episode=1148 reward=0.7843126 (489.99 it/sec) -training >> step=6883900, episode=1148 reward=0.7967954 (534.92 it/sec) -training >> step=6884000, episode=1148 reward=0.7880043 (533.10 it/sec) -training >> step=6884100, episode=1148 reward=0.7960536 (421.40 it/sec) -training >> step=6884200, episode=1148 reward=0.7773351 (513.07 it/sec) -training >> step=6884300, episode=1148 reward=0.7949504 (540.53 it/sec) -training >> step=6884400, episode=1148 reward=0.7905949 (466.99 it/sec) -training >> step=6884500, episode=1148 reward=0.7830715 (504.88 it/sec) -training >> step=6884600, episode=1148 reward=0.7795601 (493.65 it/sec) -training >> step=6884700, episode=1148 reward=0.7832786 (462.60 it/sec) -training >> step=6884800, episode=1148 reward=0.7595397 (543.05 it/sec) -training >> step=6884900, episode=1148 reward=0.7834835 (363.74 it/sec) -training >> step=6885000, episode=1148 reward=0.8040156 (525.89 it/sec) -training >> step=6885100, episode=1148 reward=0.8030866 (481.27 it/sec) -training >> step=6885200, episode=1148 reward=0.8082486 (497.22 it/sec) -training >> step=6885300, episode=1148 reward=0.7880267 (473.61 it/sec) -training >> step=6885400, episode=1148 reward=0.7642755 (500.11 it/sec) -training >> step=6885500, episode=1148 reward=0.7900037 (527.23 it/sec) -training >> step=6885600, episode=1148 reward=0.8120362 (512.79 it/sec) -training >> step=6885700, episode=1148 reward=0.7857155 (487.91 it/sec) -training >> step=6885800, episode=1148 reward=0.7911406 (503.44 it/sec) -training >> step=6885900, episode=1148 reward=0.803438 (468.42 it/sec) -training >> step=6886000, episode=1148 reward=0.7829505 (512.94 it/sec) -training >> step=6886100, episode=1148 reward=0.7958931 (507.63 it/sec) -training >> step=6886200, episode=1148 reward=0.7896897 (495.86 it/sec) -training >> step=6886300, episode=1148 reward=0.786625 (524.40 it/sec) -training >> step=6886400, episode=1148 reward=0.7785911 (528.03 it/sec) -training >> step=6886500, episode=1148 reward=0.7927903 (497.40 it/sec) -training >> step=6886600, episode=1148 reward=0.7793387 (478.62 it/sec) -training >> step=6886700, episode=1148 reward=0.7742301 (519.15 it/sec) -training >> step=6886800, episode=1148 reward=0.7749145 (555.20 it/sec) -training >> step=6886900, episode=1148 reward=0.7700832 (499.04 it/sec) -training >> step=6887000, episode=1148 reward=0.7723494 (499.93 it/sec) -training >> step=6887100, episode=1148 reward=0.794072 (488.41 it/sec) -training >> step=6887200, episode=1148 reward=0.7788072 (507.90 it/sec) -training >> step=6887300, episode=1149 reward=0.7960533 (61.11 it/sec) -training >> step=6887400, episode=1149 reward=0.7770612 (498.08 it/sec) -training >> step=6887500, episode=1149 reward=0.7780118 (449.06 it/sec) -training >> step=6887600, episode=1149 reward=0.7959451 (505.41 it/sec) -training >> step=6887700, episode=1149 reward=0.7957612 (479.65 it/sec) -training >> step=6887800, episode=1149 reward=0.7684205 (464.15 it/sec) -training >> step=6887900, episode=1149 reward=0.7836434 (457.01 it/sec) -training >> step=6888000, episode=1149 reward=0.7827657 (518.45 it/sec) -training >> step=6888100, episode=1149 reward=0.7877863 (484.10 it/sec) -training >> step=6888200, episode=1149 reward=0.7930318 (480.65 it/sec) -training >> step=6888300, episode=1149 reward=0.7947475 (445.64 it/sec) -training >> step=6888400, episode=1149 reward=0.7853408 (450.24 it/sec) -training >> step=6888500, episode=1149 reward=0.815021 (477.97 it/sec) -training >> step=6888600, episode=1149 reward=0.7949451 (521.83 it/sec) -training >> step=6888700, episode=1149 reward=0.7807698 (439.14 it/sec) -training >> step=6888800, episode=1149 reward=0.7749059 (500.05 it/sec) -training >> step=6888900, episode=1149 reward=0.7974494 (494.99 it/sec) -training >> step=6889000, episode=1149 reward=0.7979192 (502.69 it/sec) -training >> step=6889100, episode=1149 reward=0.780654 (507.69 it/sec) -training >> step=6889200, episode=1149 reward=0.7725032 (515.74 it/sec) -training >> step=6889300, episode=1149 reward=0.7976185 (502.42 it/sec) -training >> step=6889400, episode=1149 reward=0.7723163 (472.31 it/sec) -training >> step=6889500, episode=1149 reward=0.7762029 (490.08 it/sec) -training >> step=6889600, episode=1149 reward=0.7916169 (521.78 it/sec) -training >> step=6889700, episode=1149 reward=0.7817028 (531.34 it/sec) -training >> step=6889800, episode=1149 reward=0.7675743 (492.37 it/sec) -training >> step=6889900, episode=1149 reward=0.8118821 (499.95 it/sec) -training >> step=6890000, episode=1149 reward=0.7986296 (490.95 it/sec) -training >> step=6890100, episode=1149 reward=0.7879863 (497.46 it/sec) -training >> step=6890200, episode=1149 reward=0.7864472 (515.87 it/sec) -training >> step=6890300, episode=1149 reward=0.79731 (482.90 it/sec) -training >> step=6890400, episode=1149 reward=0.794374 (474.88 it/sec) -training >> step=6890500, episode=1149 reward=0.794645 (457.18 it/sec) -training >> step=6890600, episode=1149 reward=0.7804823 (507.91 it/sec) -training >> step=6890700, episode=1149 reward=0.7956254 (486.62 it/sec) -training >> step=6890800, episode=1149 reward=0.8028352 (479.18 it/sec) -training >> step=6890900, episode=1149 reward=0.7936972 (487.95 it/sec) -training >> step=6891000, episode=1149 reward=0.8011307 (368.19 it/sec) -training >> step=6891100, episode=1149 reward=0.7836781 (478.41 it/sec) -training >> step=6891200, episode=1149 reward=0.7638038 (514.10 it/sec) -training >> step=6891300, episode=1149 reward=0.7794038 (479.45 it/sec) -training >> step=6891400, episode=1149 reward=0.7671341 (482.26 it/sec) -training >> step=6891500, episode=1149 reward=0.7950272 (469.84 it/sec) -training >> step=6891600, episode=1149 reward=0.8016649 (481.28 it/sec) -training >> step=6891700, episode=1149 reward=0.7873616 (467.33 it/sec) -training >> step=6891800, episode=1149 reward=0.7740842 (481.62 it/sec) -training >> step=6891900, episode=1149 reward=0.7795677 (425.47 it/sec) -training >> step=6892000, episode=1149 reward=0.7703096 (429.01 it/sec) -training >> step=6892100, episode=1149 reward=0.7889178 (420.66 it/sec) -training >> step=6892200, episode=1149 reward=0.779197 (486.15 it/sec) -training >> step=6892300, episode=1149 reward=0.7738825 (409.69 it/sec) -training >> step=6892400, episode=1149 reward=0.7715445 (514.59 it/sec) -training >> step=6892500, episode=1149 reward=0.7911523 (460.11 it/sec) -training >> step=6892600, episode=1149 reward=0.7793933 (355.25 it/sec) -training >> step=6892700, episode=1149 reward=0.8007152 (373.49 it/sec) -training >> step=6892800, episode=1149 reward=0.7791896 (346.51 it/sec) -training >> step=6892900, episode=1149 reward=0.7970488 (381.27 it/sec) -training >> step=6893000, episode=1149 reward=0.7662535 (391.48 it/sec) -training >> step=6893100, episode=1149 reward=0.7716075 (394.69 it/sec) -training >> step=6893200, episode=1149 reward=0.7809287 (401.84 it/sec) -training >> step=6893300, episode=1150 reward=0.8080371 (66.00 it/sec) -training >> step=6893400, episode=1150 reward=0.7986177 (370.49 it/sec) -training >> step=6893500, episode=1150 reward=0.7970374 (371.40 it/sec) -training >> step=6893600, episode=1150 reward=0.7907053 (432.11 it/sec) -training >> step=6893700, episode=1150 reward=0.7834261 (435.07 it/sec) -training >> step=6893800, episode=1150 reward=0.778352 (482.41 it/sec) -training >> step=6893900, episode=1150 reward=0.7887952 (469.27 it/sec) -training >> step=6894000, episode=1150 reward=0.7630613 (466.61 it/sec) -training >> step=6894100, episode=1150 reward=0.7929426 (458.27 it/sec) -training >> step=6894200, episode=1150 reward=0.7854488 (472.12 it/sec) -training >> step=6894300, episode=1150 reward=0.8014605 (438.28 it/sec) -training >> step=6894400, episode=1150 reward=0.7940339 (427.85 it/sec) -training >> step=6894500, episode=1150 reward=0.804974 (452.89 it/sec) -training >> step=6894600, episode=1150 reward=0.80316 (453.98 it/sec) -training >> step=6894700, episode=1150 reward=0.788854 (417.80 it/sec) -training >> step=6894800, episode=1150 reward=0.7829845 (488.68 it/sec) -training >> step=6894900, episode=1150 reward=0.808086 (468.78 it/sec) -training >> step=6895000, episode=1150 reward=0.7856544 (487.68 it/sec) -training >> step=6895100, episode=1150 reward=0.8103706 (435.75 it/sec) -training >> step=6895200, episode=1150 reward=0.7837541 (391.65 it/sec) -training >> step=6895300, episode=1150 reward=0.7871401 (400.44 it/sec) -training >> step=6895400, episode=1150 reward=0.8039098 (421.76 it/sec) -training >> step=6895500, episode=1150 reward=0.8069379 (440.11 it/sec) -training >> step=6895600, episode=1150 reward=0.8067535 (419.75 it/sec) -training >> step=6895700, episode=1150 reward=0.8119414 (400.38 it/sec) -training >> step=6895800, episode=1150 reward=0.7994574 (373.88 it/sec) -training >> step=6895900, episode=1150 reward=0.7903085 (433.90 it/sec) -training >> step=6896000, episode=1150 reward=0.7759812 (364.29 it/sec) -training >> step=6896100, episode=1150 reward=0.8119296 (455.60 it/sec) -training >> step=6896200, episode=1150 reward=0.7803037 (443.16 it/sec) -training >> step=6896300, episode=1150 reward=0.7604238 (410.81 it/sec) -training >> step=6896400, episode=1150 reward=0.7768641 (374.83 it/sec) -training >> step=6896500, episode=1150 reward=0.7871898 (463.49 it/sec) -training >> step=6896600, episode=1150 reward=0.8062258 (469.75 it/sec) -training >> step=6896700, episode=1150 reward=0.7852379 (445.79 it/sec) -training >> step=6896800, episode=1150 reward=0.770925 (406.72 it/sec) -training >> step=6896900, episode=1150 reward=0.7944444 (439.60 it/sec) -training >> step=6897000, episode=1150 reward=0.7906502 (481.20 it/sec) -training >> step=6897100, episode=1150 reward=0.787365 (425.31 it/sec) -training >> step=6897200, episode=1150 reward=0.7788718 (399.50 it/sec) -training >> step=6897300, episode=1150 reward=0.7901046 (402.35 it/sec) -training >> step=6897400, episode=1150 reward=0.774114 (305.03 it/sec) -training >> step=6897500, episode=1150 reward=0.781758 (426.15 it/sec) -training >> step=6897600, episode=1150 reward=0.8027216 (418.95 it/sec) -training >> step=6897700, episode=1150 reward=0.7800857 (439.41 it/sec) -training >> step=6897800, episode=1150 reward=0.7917882 (423.42 it/sec) -training >> step=6897900, episode=1150 reward=0.7712602 (450.20 it/sec) -training >> step=6898000, episode=1150 reward=0.7797041 (443.45 it/sec) -training >> step=6898100, episode=1150 reward=0.7499257 (492.06 it/sec) -training >> step=6898200, episode=1150 reward=0.80739 (457.61 it/sec) -training >> step=6898300, episode=1150 reward=0.7776313 (498.93 it/sec) -training >> step=6898400, episode=1150 reward=0.7874942 (495.62 it/sec) -training >> step=6898500, episode=1150 reward=0.7791772 (499.63 it/sec) -training >> step=6898600, episode=1150 reward=0.7964238 (448.62 it/sec) -training >> step=6898700, episode=1150 reward=0.78571 (451.58 it/sec) -training >> step=6898800, episode=1150 reward=0.7859131 (439.06 it/sec) -training >> step=6898900, episode=1150 reward=0.7862007 (400.09 it/sec) -training >> step=6899000, episode=1150 reward=0.7730376 (449.98 it/sec) -training >> step=6899100, episode=1150 reward=0.7915587 (473.21 it/sec) -training >> step=6899200, episode=1150 reward=0.7818632 (450.60 it/sec) -training >> step=6899300, episode=1151 reward=0.779974 (60.73 it/sec) -training >> step=6899400, episode=1151 reward=0.7758061 (515.87 it/sec) -training >> step=6899500, episode=1151 reward=0.8091667 (466.47 it/sec) -training >> step=6899600, episode=1151 reward=0.771943 (455.08 it/sec) -training >> step=6899700, episode=1151 reward=0.7873116 (478.69 it/sec) -training >> step=6899800, episode=1151 reward=0.7949429 (484.16 it/sec) -training >> step=6899900, episode=1151 reward=0.7933658 (472.68 it/sec) -training >> step=6900000, episode=1151 reward=0.7883043 (494.11 it/sec) -training >> step=6900100, episode=1151 reward=0.7843817 (510.74 it/sec) -training >> step=6900200, episode=1151 reward=0.7954976 (450.93 it/sec) -training >> step=6900300, episode=1151 reward=0.7901303 (497.63 it/sec) -training >> step=6900400, episode=1151 reward=0.7776527 (465.55 it/sec) -training >> step=6900500, episode=1151 reward=0.7883894 (438.59 it/sec) -training >> step=6900600, episode=1151 reward=0.812318 (486.84 it/sec) -training >> step=6900700, episode=1151 reward=0.7887511 (469.60 it/sec) -training >> step=6900800, episode=1151 reward=0.7772632 (486.74 it/sec) -training >> step=6900900, episode=1151 reward=0.7992543 (438.20 it/sec) -training >> step=6901000, episode=1151 reward=0.7715052 (432.24 it/sec) -training >> step=6901100, episode=1151 reward=0.7966276 (508.88 it/sec) -training >> step=6901200, episode=1151 reward=0.7942232 (432.45 it/sec) -training >> step=6901300, episode=1151 reward=0.7924437 (423.18 it/sec) -training >> step=6901400, episode=1151 reward=0.7888176 (476.87 it/sec) -training >> step=6901500, episode=1151 reward=0.7697191 (472.26 it/sec) -training >> step=6901600, episode=1151 reward=0.7880546 (513.78 it/sec) -training >> step=6901700, episode=1151 reward=0.8056312 (455.65 it/sec) -training >> step=6901800, episode=1151 reward=0.7841563 (445.30 it/sec) -training >> step=6901900, episode=1151 reward=0.7954394 (426.42 it/sec) -training >> step=6902000, episode=1151 reward=0.7913263 (471.07 it/sec) -training >> step=6902100, episode=1151 reward=0.78737 (445.87 it/sec) -training >> step=6902200, episode=1151 reward=0.8052216 (474.89 it/sec) -training >> step=6902300, episode=1151 reward=0.794759 (438.93 it/sec) -training >> step=6902400, episode=1151 reward=0.8051417 (508.28 it/sec) -training >> step=6902500, episode=1151 reward=0.7807927 (492.79 it/sec) -training >> step=6902600, episode=1151 reward=0.7898927 (473.31 it/sec) -training >> step=6902700, episode=1151 reward=0.7834002 (495.31 it/sec) -training >> step=6902800, episode=1151 reward=0.7835338 (471.43 it/sec) -training >> step=6902900, episode=1151 reward=0.7851768 (386.99 it/sec) -training >> step=6903000, episode=1151 reward=0.7810837 (453.29 it/sec) -training >> step=6903100, episode=1151 reward=0.7984607 (460.78 it/sec) -training >> step=6903200, episode=1151 reward=0.7711478 (490.73 it/sec) -training >> step=6903300, episode=1151 reward=0.7933137 (456.46 it/sec) -training >> step=6903400, episode=1151 reward=0.7678548 (514.25 it/sec) -training >> step=6903500, episode=1151 reward=0.7847106 (360.18 it/sec) -training >> step=6903600, episode=1151 reward=0.7787774 (464.92 it/sec) -training >> step=6903700, episode=1151 reward=0.7791081 (468.43 it/sec) -training >> step=6903800, episode=1151 reward=0.7830865 (460.81 it/sec) -training >> step=6903900, episode=1151 reward=0.7763234 (516.15 it/sec) -training >> step=6904000, episode=1151 reward=0.767606 (503.75 it/sec) -training >> step=6904100, episode=1151 reward=0.7940627 (487.80 it/sec) -training >> step=6904200, episode=1151 reward=0.7891476 (484.26 it/sec) -training >> step=6904300, episode=1151 reward=0.7810147 (475.07 it/sec) -training >> step=6904400, episode=1151 reward=0.7976885 (487.95 it/sec) -training >> step=6904500, episode=1151 reward=0.7974542 (467.20 it/sec) -training >> step=6904600, episode=1151 reward=0.7903708 (427.38 it/sec) -training >> step=6904700, episode=1151 reward=0.7622333 (435.95 it/sec) -training >> step=6904800, episode=1151 reward=0.7817179 (470.11 it/sec) -training >> step=6904900, episode=1151 reward=0.8071208 (444.65 it/sec) -training >> step=6905000, episode=1151 reward=0.7787755 (377.32 it/sec) -training >> step=6905100, episode=1151 reward=0.7834879 (443.56 it/sec) -training >> step=6905200, episode=1151 reward=0.7870799 (452.11 it/sec) -training >> step=6905300, episode=1152 reward=0.7934767 (59.32 it/sec) -training >> step=6905400, episode=1152 reward=0.7702289 (414.54 it/sec) -training >> step=6905500, episode=1152 reward=0.7848067 (465.10 it/sec) -training >> step=6905600, episode=1152 reward=0.8134005 (474.52 it/sec) -training >> step=6905700, episode=1152 reward=0.7823353 (448.39 it/sec) -training >> step=6905800, episode=1152 reward=0.786085 (490.32 it/sec) -training >> step=6905900, episode=1152 reward=0.7771326 (455.21 it/sec) -training >> step=6906000, episode=1152 reward=0.7949042 (473.87 it/sec) -training >> step=6906100, episode=1152 reward=0.7958584 (448.81 it/sec) -training >> step=6906200, episode=1152 reward=0.7808115 (504.27 it/sec) -training >> step=6906300, episode=1152 reward=0.7959273 (480.32 it/sec) -training >> step=6906400, episode=1152 reward=0.8017397 (514.83 it/sec) -training >> step=6906500, episode=1152 reward=0.7848127 (474.78 it/sec) -training >> step=6906600, episode=1152 reward=0.7706324 (485.41 it/sec) -training >> step=6906700, episode=1152 reward=0.7948318 (476.20 it/sec) -training >> step=6906800, episode=1152 reward=0.7980214 (500.70 it/sec) -training >> step=6906900, episode=1152 reward=0.7940331 (462.63 it/sec) -training >> step=6907000, episode=1152 reward=0.7715411 (495.02 it/sec) -training >> step=6907100, episode=1152 reward=0.8117642 (475.98 it/sec) -training >> step=6907200, episode=1152 reward=0.7889382 (487.65 it/sec) -training >> step=6907300, episode=1152 reward=0.7868755 (489.52 it/sec) -training >> step=6907400, episode=1152 reward=0.7799494 (503.62 it/sec) -training >> step=6907500, episode=1152 reward=0.7815272 (490.04 it/sec) -training >> step=6907600, episode=1152 reward=0.7920896 (465.93 it/sec) -training >> step=6907700, episode=1152 reward=0.7918972 (483.61 it/sec) -training >> step=6907800, episode=1152 reward=0.7917258 (485.86 it/sec) -training >> step=6907900, episode=1152 reward=0.7829615 (498.82 it/sec) -training >> step=6908000, episode=1152 reward=0.7956487 (466.11 it/sec) -training >> step=6908100, episode=1152 reward=0.779232 (414.65 it/sec) -training >> step=6908200, episode=1152 reward=0.7826417 (478.79 it/sec) -training >> step=6908300, episode=1152 reward=0.7828192 (449.81 it/sec) -training >> step=6908400, episode=1152 reward=0.7960292 (463.98 it/sec) -training >> step=6908500, episode=1152 reward=0.809413 (450.40 it/sec) -training >> step=6908600, episode=1152 reward=0.7882444 (422.73 it/sec) -training >> step=6908700, episode=1152 reward=0.7732266 (501.31 it/sec) -training >> step=6908800, episode=1152 reward=0.7703731 (524.62 it/sec) -training >> step=6908900, episode=1152 reward=0.7814518 (496.20 it/sec) -training >> step=6909000, episode=1152 reward=0.7871137 (473.25 it/sec) -training >> step=6909100, episode=1152 reward=0.7972625 (502.44 it/sec) -training >> step=6909200, episode=1152 reward=0.779652 (501.29 it/sec) -training >> step=6909300, episode=1152 reward=0.7966377 (541.00 it/sec) -training >> step=6909400, episode=1152 reward=0.7881044 (514.70 it/sec) -training >> step=6909500, episode=1152 reward=0.7910525 (477.51 it/sec) -training >> step=6909600, episode=1152 reward=0.7707637 (516.36 it/sec) -training >> step=6909700, episode=1152 reward=0.8002009 (500.46 it/sec) -training >> step=6909800, episode=1152 reward=0.7742953 (403.10 it/sec) -training >> step=6909900, episode=1152 reward=0.8029506 (492.07 it/sec) -training >> step=6910000, episode=1152 reward=0.7736523 (473.00 it/sec) -training >> step=6910100, episode=1152 reward=0.7680624 (512.99 it/sec) -training >> step=6910200, episode=1152 reward=0.7735434 (501.97 it/sec) -training >> step=6910300, episode=1152 reward=0.7832744 (503.67 it/sec) -training >> step=6910400, episode=1152 reward=0.7808066 (518.37 it/sec) -training >> step=6910500, episode=1152 reward=0.7807115 (467.28 it/sec) -training >> step=6910600, episode=1152 reward=0.7716126 (464.77 it/sec) -training >> step=6910700, episode=1152 reward=0.7483153 (491.55 it/sec) -training >> step=6910800, episode=1152 reward=0.7800407 (536.26 it/sec) -training >> step=6910900, episode=1152 reward=0.8032707 (496.90 it/sec) -training >> step=6911000, episode=1152 reward=0.7947059 (432.52 it/sec) -training >> step=6911100, episode=1152 reward=0.7865154 (513.34 it/sec) -training >> step=6911200, episode=1152 reward=0.7880162 (522.31 it/sec) -training >> step=6911300, episode=1153 reward=0.7792757 (100.32 it/sec) -training >> step=6911400, episode=1153 reward=0.798632 (491.83 it/sec) -training >> step=6911500, episode=1153 reward=0.7905676 (524.18 it/sec) -training >> step=6911600, episode=1153 reward=0.7740876 (473.61 it/sec) -training >> step=6911700, episode=1153 reward=0.7833316 (503.25 it/sec) -training >> step=6911800, episode=1153 reward=0.7658301 (530.32 it/sec) -training >> step=6911900, episode=1153 reward=0.7844018 (481.72 it/sec) -training >> step=6912000, episode=1153 reward=0.8106508 (487.12 it/sec) -training >> step=6912100, episode=1153 reward=0.807794 (528.91 it/sec) -training >> step=6912200, episode=1153 reward=0.7886707 (501.24 it/sec) -training >> step=6912300, episode=1153 reward=0.7920013 (472.70 it/sec) -training >> step=6912400, episode=1153 reward=0.8036516 (497.02 it/sec) -training >> step=6912500, episode=1153 reward=0.7780267 (532.53 it/sec) -training >> step=6912600, episode=1153 reward=0.7838255 (510.01 it/sec) -training >> step=6912700, episode=1153 reward=0.801172 (488.75 it/sec) -training >> step=6912800, episode=1153 reward=0.7838935 (539.27 it/sec) -training >> step=6912900, episode=1153 reward=0.7982664 (526.99 it/sec) -training >> step=6913000, episode=1153 reward=0.771188 (492.46 it/sec) -training >> step=6913100, episode=1153 reward=0.7920063 (510.82 it/sec) -training >> step=6913200, episode=1153 reward=0.7769142 (520.84 it/sec) -training >> step=6913300, episode=1153 reward=0.7914212 (507.35 it/sec) -training >> step=6913400, episode=1153 reward=0.7889851 (491.85 it/sec) -training >> step=6913500, episode=1153 reward=0.7788578 (496.27 it/sec) -training >> step=6913600, episode=1153 reward=0.7691897 (513.44 it/sec) -training >> step=6913700, episode=1153 reward=0.7775036 (485.23 it/sec) -training >> step=6913800, episode=1153 reward=0.7822152 (467.63 it/sec) -training >> step=6913900, episode=1153 reward=0.7820121 (511.47 it/sec) -training >> step=6914000, episode=1153 reward=0.7981545 (493.32 it/sec) -training >> step=6914100, episode=1153 reward=0.7834362 (502.60 it/sec) -training >> step=6914200, episode=1153 reward=0.779906 (465.58 it/sec) -training >> step=6914300, episode=1153 reward=0.7796139 (498.55 it/sec) -training >> step=6914400, episode=1153 reward=0.7951347 (520.03 it/sec) -training >> step=6914500, episode=1153 reward=0.7656217 (476.11 it/sec) -training >> step=6914600, episode=1153 reward=0.7910336 (528.79 it/sec) -training >> step=6914700, episode=1153 reward=0.7800251 (494.27 it/sec) -training >> step=6914800, episode=1153 reward=0.7801402 (482.79 it/sec) -training >> step=6914900, episode=1153 reward=0.7922844 (500.86 it/sec) -training >> step=6915000, episode=1153 reward=0.7979018 (511.97 it/sec) -training >> step=6915100, episode=1153 reward=0.7909251 (449.57 it/sec) -training >> step=6915200, episode=1153 reward=0.8088124 (379.71 it/sec) -training >> step=6915300, episode=1153 reward=0.7763471 (489.74 it/sec) -training >> step=6915400, episode=1153 reward=0.783305 (541.59 it/sec) -training >> step=6915500, episode=1153 reward=0.7849388 (516.28 it/sec) -training >> step=6915600, episode=1153 reward=0.7935005 (464.48 it/sec) -training >> step=6915700, episode=1153 reward=0.8017325 (522.49 it/sec) -training >> step=6915800, episode=1153 reward=0.7961749 (493.56 it/sec) -training >> step=6915900, episode=1153 reward=0.7698934 (375.10 it/sec) -training >> step=6916000, episode=1153 reward=0.7878563 (518.58 it/sec) -training >> step=6916100, episode=1153 reward=0.7903148 (518.71 it/sec) -training >> step=6916200, episode=1153 reward=0.8006126 (469.40 it/sec) -training >> step=6916300, episode=1153 reward=0.7966035 (505.60 it/sec) -training >> step=6916400, episode=1153 reward=0.7909449 (473.93 it/sec) -training >> step=6916500, episode=1153 reward=0.7845898 (555.00 it/sec) -training >> step=6916600, episode=1153 reward=0.7849925 (507.33 it/sec) -training >> step=6916700, episode=1153 reward=0.7897187 (510.10 it/sec) -training >> step=6916800, episode=1153 reward=0.7814949 (511.05 it/sec) -training >> step=6916900, episode=1153 reward=0.7742426 (497.88 it/sec) -training >> step=6917000, episode=1153 reward=0.7672526 (499.06 it/sec) -training >> step=6917100, episode=1153 reward=0.7772267 (511.33 it/sec) -training >> step=6917200, episode=1153 reward=0.7499299 (528.55 it/sec) -training >> step=6917300, episode=1154 reward=0.7865897 (132.73 it/sec) -training >> step=6917400, episode=1154 reward=0.8085716 (472.78 it/sec) -training >> step=6917500, episode=1154 reward=0.7788482 (444.82 it/sec) -training >> step=6917600, episode=1154 reward=0.8065042 (488.58 it/sec) -training >> step=6917700, episode=1154 reward=0.8157541 (486.99 it/sec) -training >> step=6917800, episode=1154 reward=0.8044381 (458.86 it/sec) -training >> step=6917900, episode=1154 reward=0.7753472 (475.14 it/sec) -training >> step=6918000, episode=1154 reward=0.7775919 (405.88 it/sec) -training >> step=6918100, episode=1154 reward=0.7799708 (441.99 it/sec) -training >> step=6918200, episode=1154 reward=0.7820411 (458.12 it/sec) -training >> step=6918300, episode=1154 reward=0.7672968 (484.58 it/sec) -training >> step=6918400, episode=1154 reward=0.7923889 (440.60 it/sec) -training >> step=6918500, episode=1154 reward=0.8007495 (435.77 it/sec) -training >> step=6918600, episode=1154 reward=0.7846035 (489.72 it/sec) -training >> step=6918700, episode=1154 reward=0.7850033 (474.93 it/sec) -training >> step=6918800, episode=1154 reward=0.7881822 (525.77 it/sec) -training >> step=6918900, episode=1154 reward=0.7909598 (417.23 it/sec) -training >> step=6919000, episode=1154 reward=0.7912793 (456.25 it/sec) -training >> step=6919100, episode=1154 reward=0.7958369 (501.24 it/sec) -training >> step=6919200, episode=1154 reward=0.7897237 (506.31 it/sec) -training >> step=6919300, episode=1154 reward=0.7929345 (495.40 it/sec) -training >> step=6919400, episode=1154 reward=0.7852418 (511.89 it/sec) -training >> step=6919500, episode=1154 reward=0.7940474 (501.21 it/sec) -training >> step=6919600, episode=1154 reward=0.7956967 (520.60 it/sec) -training >> step=6919700, episode=1154 reward=0.7726919 (410.53 it/sec) -training >> step=6919800, episode=1154 reward=0.7684256 (457.08 it/sec) -training >> step=6919900, episode=1154 reward=0.7800382 (504.23 it/sec) -training >> step=6920000, episode=1154 reward=0.7994428 (510.63 it/sec) -training >> step=6920100, episode=1154 reward=0.7818511 (524.57 it/sec) -training >> step=6920200, episode=1154 reward=0.7769373 (518.37 it/sec) -training >> step=6920300, episode=1154 reward=0.8014063 (496.47 it/sec) -training >> step=6920400, episode=1154 reward=0.790831 (503.91 it/sec) -training >> step=6920500, episode=1154 reward=0.8080406 (479.15 it/sec) -training >> step=6920600, episode=1154 reward=0.7942739 (494.64 it/sec) -training >> step=6920700, episode=1154 reward=0.7995589 (509.69 it/sec) -training >> step=6920800, episode=1154 reward=0.7504126 (536.06 it/sec) -training >> step=6920900, episode=1154 reward=0.7977888 (443.78 it/sec) -training >> step=6921000, episode=1154 reward=0.7926277 (502.93 it/sec) -training >> step=6921100, episode=1154 reward=0.7858123 (520.98 it/sec) -training >> step=6921200, episode=1154 reward=0.7785969 (526.32 it/sec) -training >> step=6921300, episode=1154 reward=0.7901781 (528.37 it/sec) -training >> step=6921400, episode=1154 reward=0.7819663 (519.67 it/sec) -training >> step=6921500, episode=1154 reward=0.7666459 (503.29 it/sec) -training >> step=6921600, episode=1154 reward=0.7968697 (521.55 it/sec) -training >> step=6921700, episode=1154 reward=0.7821645 (493.36 it/sec) -training >> step=6921800, episode=1154 reward=0.7859172 (498.48 it/sec) -training >> step=6921900, episode=1154 reward=0.8007405 (525.11 it/sec) -training >> step=6922000, episode=1154 reward=0.770155 (474.06 it/sec) -training >> step=6922100, episode=1154 reward=0.7868061 (463.59 it/sec) -training >> step=6922200, episode=1154 reward=0.784642 (395.19 it/sec) -training >> step=6922300, episode=1154 reward=0.7756622 (504.79 it/sec) -training >> step=6922400, episode=1154 reward=0.7869849 (492.38 it/sec) -training >> step=6922500, episode=1154 reward=0.7773547 (499.27 it/sec) -training >> step=6922600, episode=1154 reward=0.7975323 (526.12 it/sec) -training >> step=6922700, episode=1154 reward=0.8017215 (500.56 it/sec) -training >> step=6922800, episode=1154 reward=0.7768351 (515.96 it/sec) -training >> step=6922900, episode=1154 reward=0.7830537 (505.79 it/sec) -training >> step=6923000, episode=1154 reward=0.7896639 (497.56 it/sec) -training >> step=6923100, episode=1154 reward=0.7718124 (520.76 it/sec) -training >> step=6923200, episode=1154 reward=0.7731879 (505.28 it/sec) -training >> step=6923300, episode=1155 reward=0.7962795 (118.94 it/sec) -training >> step=6923400, episode=1155 reward=0.7947353 (510.61 it/sec) -training >> step=6923500, episode=1155 reward=0.7815699 (454.00 it/sec) -training >> step=6923600, episode=1155 reward=0.7876426 (511.02 it/sec) -training >> step=6923700, episode=1155 reward=0.8057793 (530.03 it/sec) -training >> step=6923800, episode=1155 reward=0.78935 (460.27 it/sec) -training >> step=6923900, episode=1155 reward=0.8035133 (502.51 it/sec) -training >> step=6924000, episode=1155 reward=0.7828671 (522.75 it/sec) -training >> step=6924100, episode=1155 reward=0.7782139 (541.89 it/sec) -training >> step=6924200, episode=1155 reward=0.802716 (518.18 it/sec) -training >> step=6924300, episode=1155 reward=0.7853245 (512.73 it/sec) -training >> step=6924400, episode=1155 reward=0.8074414 (502.01 it/sec) -training >> step=6924500, episode=1155 reward=0.7821714 (479.27 it/sec) -training >> step=6924600, episode=1155 reward=0.7820618 (513.96 it/sec) -training >> step=6924700, episode=1155 reward=0.7844557 (522.55 it/sec) -training >> step=6924800, episode=1155 reward=0.7837929 (531.97 it/sec) -training >> step=6924900, episode=1155 reward=0.7776319 (485.11 it/sec) -training >> step=6925000, episode=1155 reward=0.7809973 (492.48 it/sec) -training >> step=6925100, episode=1155 reward=0.7868797 (549.82 it/sec) -training >> step=6925200, episode=1155 reward=0.7907376 (517.15 it/sec) -training >> step=6925300, episode=1155 reward=0.7855915 (512.20 it/sec) -training >> step=6925400, episode=1155 reward=0.7929359 (519.45 it/sec) -training >> step=6925500, episode=1155 reward=0.7841756 (555.19 it/sec) -training >> step=6925600, episode=1155 reward=0.7744092 (499.20 it/sec) -training >> step=6925700, episode=1155 reward=0.7873298 (467.28 it/sec) -training >> step=6925800, episode=1155 reward=0.8070576 (472.86 it/sec) -training >> step=6925900, episode=1155 reward=0.7871991 (523.33 it/sec) -training >> step=6926000, episode=1155 reward=0.7927607 (513.99 it/sec) -training >> step=6926100, episode=1155 reward=0.7985204 (519.21 it/sec) -training >> step=6926200, episode=1155 reward=0.7882438 (509.59 it/sec) -training >> step=6926300, episode=1155 reward=0.7797225 (439.20 it/sec) -training >> step=6926400, episode=1155 reward=0.7898569 (373.37 it/sec) -training >> step=6926500, episode=1155 reward=0.7802501 (386.03 it/sec) -training >> step=6926600, episode=1155 reward=0.7890191 (414.39 it/sec) -training >> step=6926700, episode=1155 reward=0.7635601 (344.51 it/sec) -training >> step=6926800, episode=1155 reward=0.7857349 (367.31 it/sec) -training >> step=6926900, episode=1155 reward=0.7934144 (416.34 it/sec) -training >> step=6927000, episode=1155 reward=0.8255866 (407.46 it/sec) -training >> step=6927100, episode=1155 reward=0.790148 (486.61 it/sec) -training >> step=6927200, episode=1155 reward=0.8039097 (510.03 it/sec) -training >> step=6927300, episode=1155 reward=0.7788665 (517.72 it/sec) -training >> step=6927400, episode=1155 reward=0.7855703 (536.15 it/sec) -training >> step=6927500, episode=1155 reward=0.7808892 (476.29 it/sec) -training >> step=6927600, episode=1155 reward=0.7708288 (528.24 it/sec) -training >> step=6927700, episode=1155 reward=0.7724568 (520.95 it/sec) -training >> step=6927800, episode=1155 reward=0.784519 (508.13 it/sec) -training >> step=6927900, episode=1155 reward=0.7845002 (505.13 it/sec) -training >> step=6928000, episode=1155 reward=0.780059 (504.24 it/sec) -training >> step=6928100, episode=1155 reward=0.7671269 (507.93 it/sec) -training >> step=6928200, episode=1155 reward=0.7625482 (528.52 it/sec) -training >> step=6928300, episode=1155 reward=0.7861058 (489.04 it/sec) -training >> step=6928400, episode=1155 reward=0.7772854 (398.23 it/sec) -training >> step=6928500, episode=1155 reward=0.7810242 (515.56 it/sec) -training >> step=6928600, episode=1155 reward=0.7903181 (520.74 it/sec) -training >> step=6928700, episode=1155 reward=0.7823358 (519.77 it/sec) -training >> step=6928800, episode=1155 reward=0.8015677 (522.57 it/sec) -training >> step=6928900, episode=1155 reward=0.7967201 (507.69 it/sec) -training >> step=6929000, episode=1155 reward=0.7938718 (496.69 it/sec) -training >> step=6929100, episode=1155 reward=0.7674309 (532.80 it/sec) -training >> step=6929200, episode=1155 reward=0.7742075 (497.20 it/sec) -training >> step=6929300, episode=1156 reward=0.780686 (110.21 it/sec) -training >> step=6929400, episode=1156 reward=0.7845062 (492.46 it/sec) -training >> step=6929500, episode=1156 reward=0.778381 (514.03 it/sec) -training >> step=6929600, episode=1156 reward=0.7971255 (523.73 it/sec) -training >> step=6929700, episode=1156 reward=0.7842343 (504.07 it/sec) -training >> step=6929800, episode=1156 reward=0.7887165 (505.01 it/sec) -training >> step=6929900, episode=1156 reward=0.800435 (529.97 it/sec) -training >> step=6930000, episode=1156 reward=0.7833616 (528.86 it/sec) -training >> step=6930100, episode=1156 reward=0.7668564 (496.74 it/sec) -training >> step=6930200, episode=1156 reward=0.7891418 (542.51 it/sec) -training >> step=6930300, episode=1156 reward=0.7904169 (530.54 it/sec) -training >> step=6930400, episode=1156 reward=0.7802767 (487.04 it/sec) -training >> step=6930500, episode=1156 reward=0.774375 (535.86 it/sec) -training >> step=6930600, episode=1156 reward=0.7764277 (527.77 it/sec) -training >> step=6930700, episode=1156 reward=0.7972814 (496.17 it/sec) -training >> step=6930800, episode=1156 reward=0.7815889 (519.17 it/sec) -training >> step=6930900, episode=1156 reward=0.7872464 (506.11 it/sec) -training >> step=6931000, episode=1156 reward=0.7828526 (508.54 it/sec) -training >> step=6931100, episode=1156 reward=0.7932827 (506.02 it/sec) -training >> step=6931200, episode=1156 reward=0.7799705 (509.46 it/sec) -training >> step=6931300, episode=1156 reward=0.7886741 (538.34 it/sec) -training >> step=6931400, episode=1156 reward=0.790134 (492.15 it/sec) -training >> step=6931500, episode=1156 reward=0.7848164 (518.39 it/sec) -training >> step=6931600, episode=1156 reward=0.7790954 (511.89 it/sec) -training >> step=6931700, episode=1156 reward=0.7730584 (506.89 it/sec) -training >> step=6931800, episode=1156 reward=0.7646868 (491.49 it/sec) -training >> step=6931900, episode=1156 reward=0.7786469 (483.10 it/sec) -training >> step=6932000, episode=1156 reward=0.802202 (498.11 it/sec) -training >> step=6932100, episode=1156 reward=0.7909321 (526.91 it/sec) -training >> step=6932200, episode=1156 reward=0.7911564 (501.92 it/sec) -training >> step=6932300, episode=1156 reward=0.7996422 (507.64 it/sec) -training >> step=6932400, episode=1156 reward=0.7954341 (547.63 it/sec) -training >> step=6932500, episode=1156 reward=0.7923682 (480.45 it/sec) -training >> step=6932600, episode=1156 reward=0.7856072 (488.69 it/sec) -training >> step=6932700, episode=1156 reward=0.7730341 (512.00 it/sec) -training >> step=6932800, episode=1156 reward=0.8004851 (547.72 it/sec) -training >> step=6932900, episode=1156 reward=0.7994213 (529.17 it/sec) -training >> step=6933000, episode=1156 reward=0.792246 (492.83 it/sec) -training >> step=6933100, episode=1156 reward=0.7806208 (525.40 it/sec) -training >> step=6933200, episode=1156 reward=0.7676908 (491.72 it/sec) -training >> step=6933300, episode=1156 reward=0.7871096 (508.11 it/sec) -training >> step=6933400, episode=1156 reward=0.7760628 (480.18 it/sec) -training >> step=6933500, episode=1156 reward=0.7771208 (514.23 it/sec) -training >> step=6933600, episode=1156 reward=0.793299 (490.16 it/sec) -training >> step=6933700, episode=1156 reward=0.7604274 (506.08 it/sec) -training >> step=6933800, episode=1156 reward=0.7749311 (542.57 it/sec) -training >> step=6933900, episode=1156 reward=0.7817215 (436.59 it/sec) -training >> step=6934000, episode=1156 reward=0.7656989 (407.74 it/sec) -training >> step=6934100, episode=1156 reward=0.7807702 (409.71 it/sec) -training >> step=6934200, episode=1156 reward=0.7985412 (429.71 it/sec) -training >> step=6934300, episode=1156 reward=0.7614762 (391.47 it/sec) -training >> step=6934400, episode=1156 reward=0.7751647 (393.57 it/sec) -training >> step=6934500, episode=1156 reward=0.7761688 (281.51 it/sec) -training >> step=6934600, episode=1156 reward=0.7755789 (463.97 it/sec) -training >> step=6934700, episode=1156 reward=0.7811875 (498.55 it/sec) -training >> step=6934800, episode=1156 reward=0.783506 (466.14 it/sec) -training >> step=6934900, episode=1156 reward=0.791432 (500.99 it/sec) -training >> step=6935000, episode=1156 reward=0.7751904 (538.71 it/sec) -training >> step=6935100, episode=1156 reward=0.7846746 (481.07 it/sec) -training >> step=6935200, episode=1156 reward=0.7732843 (477.67 it/sec) -training >> step=6935300, episode=1157 reward=0.7655683 (68.48 it/sec) -training >> step=6935400, episode=1157 reward=0.7838716 (490.03 it/sec) -training >> step=6935500, episode=1157 reward=0.8126206 (458.00 it/sec) -training >> step=6935600, episode=1157 reward=0.7843661 (431.05 it/sec) -training >> step=6935700, episode=1157 reward=0.8084108 (420.35 it/sec) -training >> step=6935800, episode=1157 reward=0.7930857 (449.96 it/sec) -training >> step=6935900, episode=1157 reward=0.7783968 (462.89 it/sec) -training >> step=6936000, episode=1157 reward=0.7789624 (412.77 it/sec) -training >> step=6936100, episode=1157 reward=0.7743621 (463.80 it/sec) -training >> step=6936200, episode=1157 reward=0.7895221 (469.82 it/sec) -training >> step=6936300, episode=1157 reward=0.7727837 (403.76 it/sec) -training >> step=6936400, episode=1157 reward=0.7709072 (417.89 it/sec) -training >> step=6936500, episode=1157 reward=0.7959118 (458.17 it/sec) -training >> step=6936600, episode=1157 reward=0.7857975 (431.81 it/sec) -training >> step=6936700, episode=1157 reward=0.7723239 (406.06 it/sec) -training >> step=6936800, episode=1157 reward=0.7931019 (419.02 it/sec) -training >> step=6936900, episode=1157 reward=0.8028668 (394.49 it/sec) -training >> step=6937000, episode=1157 reward=0.7830697 (507.54 it/sec) -training >> step=6937100, episode=1157 reward=0.7651207 (466.79 it/sec) -training >> step=6937200, episode=1157 reward=0.7986731 (426.85 it/sec) -training >> step=6937300, episode=1157 reward=0.7951962 (472.64 it/sec) -training >> step=6937400, episode=1157 reward=0.7820807 (415.81 it/sec) -training >> step=6937500, episode=1157 reward=0.7767311 (456.30 it/sec) -training >> step=6937600, episode=1157 reward=0.7785053 (491.89 it/sec) -training >> step=6937700, episode=1157 reward=0.8002757 (444.83 it/sec) -training >> step=6937800, episode=1157 reward=0.7810689 (496.70 it/sec) -training >> step=6937900, episode=1157 reward=0.7732719 (466.13 it/sec) -training >> step=6938000, episode=1157 reward=0.7749029 (468.28 it/sec) -training >> step=6938100, episode=1157 reward=0.7873703 (513.99 it/sec) -training >> step=6938200, episode=1157 reward=0.7904372 (501.90 it/sec) -training >> step=6938300, episode=1157 reward=0.7906752 (505.30 it/sec) -training >> step=6938400, episode=1157 reward=0.8023298 (500.86 it/sec) -training >> step=6938500, episode=1157 reward=0.7878421 (483.09 it/sec) -training >> step=6938600, episode=1157 reward=0.7853894 (505.05 it/sec) -training >> step=6938700, episode=1157 reward=0.7820432 (485.34 it/sec) -training >> step=6938800, episode=1157 reward=0.7859765 (455.54 it/sec) -training >> step=6938900, episode=1157 reward=0.7715814 (497.37 it/sec) -training >> step=6939000, episode=1157 reward=0.7885395 (467.10 it/sec) -training >> step=6939100, episode=1157 reward=0.8015661 (504.24 it/sec) -training >> step=6939200, episode=1157 reward=0.794128 (516.15 it/sec) -training >> step=6939300, episode=1157 reward=0.7996598 (521.94 it/sec) -training >> step=6939400, episode=1157 reward=0.788573 (538.58 it/sec) -training >> step=6939500, episode=1157 reward=0.7882105 (513.63 it/sec) -training >> step=6939600, episode=1157 reward=0.7834987 (505.09 it/sec) -training >> step=6939700, episode=1157 reward=0.7860163 (515.74 it/sec) -training >> step=6939800, episode=1157 reward=0.7781005 (509.98 it/sec) -training >> step=6939900, episode=1157 reward=0.7832502 (521.50 it/sec) -training >> step=6940000, episode=1157 reward=0.7835165 (485.45 it/sec) -training >> step=6940100, episode=1157 reward=0.7911764 (485.71 it/sec) -training >> step=6940200, episode=1157 reward=0.7739695 (501.06 it/sec) -training >> step=6940300, episode=1157 reward=0.764897 (506.55 it/sec) -training >> step=6940400, episode=1157 reward=0.7817842 (515.74 it/sec) -training >> step=6940500, episode=1157 reward=0.7704223 (537.92 it/sec) -training >> step=6940600, episode=1157 reward=0.7806331 (487.49 it/sec) -training >> step=6940700, episode=1157 reward=0.7947496 (408.55 it/sec) -training >> step=6940800, episode=1157 reward=0.8008528 (545.10 it/sec) -training >> step=6940900, episode=1157 reward=0.778671 (468.29 it/sec) -training >> step=6941000, episode=1157 reward=0.7862966 (487.54 it/sec) -training >> step=6941100, episode=1157 reward=0.7906318 (480.91 it/sec) -training >> step=6941200, episode=1157 reward=0.7931007 (531.37 it/sec) -training >> step=6941300, episode=1158 reward=0.779205 (71.92 it/sec) -training >> step=6941400, episode=1158 reward=0.7758781 (490.88 it/sec) -training >> step=6941500, episode=1158 reward=0.7828779 (518.91 it/sec) -training >> step=6941600, episode=1158 reward=0.7969303 (526.27 it/sec) -training >> step=6941700, episode=1158 reward=0.7916903 (505.51 it/sec) -training >> step=6941800, episode=1158 reward=0.7694971 (525.35 it/sec) -training >> step=6941900, episode=1158 reward=0.7666557 (485.93 it/sec) -training >> step=6942000, episode=1158 reward=0.7772464 (503.39 it/sec) -training >> step=6942100, episode=1158 reward=0.7709421 (497.09 it/sec) -training >> step=6942200, episode=1158 reward=0.773669 (502.10 it/sec) -training >> step=6942300, episode=1158 reward=0.7927146 (440.94 it/sec) -training >> step=6942400, episode=1158 reward=0.7856261 (466.08 it/sec) -training >> step=6942500, episode=1158 reward=0.7921009 (520.11 it/sec) -training >> step=6942600, episode=1158 reward=0.7869013 (511.35 it/sec) -training >> step=6942700, episode=1158 reward=0.7974088 (447.10 it/sec) -training >> step=6942800, episode=1158 reward=0.7876304 (496.27 it/sec) -training >> step=6942900, episode=1158 reward=0.7743858 (519.01 it/sec) -training >> step=6943000, episode=1158 reward=0.7964926 (489.07 it/sec) -training >> step=6943100, episode=1158 reward=0.7876126 (465.10 it/sec) -training >> step=6943200, episode=1158 reward=0.7849231 (463.93 it/sec) -training >> step=6943300, episode=1158 reward=0.7937901 (471.32 it/sec) -training >> step=6943400, episode=1158 reward=0.7801216 (497.92 it/sec) -training >> step=6943500, episode=1158 reward=0.7797804 (436.81 it/sec) -training >> step=6943600, episode=1158 reward=0.7969015 (534.42 it/sec) -training >> step=6943700, episode=1158 reward=0.784057 (444.96 it/sec) -training >> step=6943800, episode=1158 reward=0.8088627 (533.32 it/sec) -training >> step=6943900, episode=1158 reward=0.7747383 (496.66 it/sec) -training >> step=6944000, episode=1158 reward=0.8080204 (516.29 it/sec) -training >> step=6944100, episode=1158 reward=0.7969241 (405.35 it/sec) -training >> step=6944200, episode=1158 reward=0.8006392 (459.78 it/sec) -training >> step=6944300, episode=1158 reward=0.7800665 (470.00 it/sec) -training >> step=6944400, episode=1158 reward=0.7929453 (507.67 it/sec) -training >> step=6944500, episode=1158 reward=0.7887237 (432.14 it/sec) -training >> step=6944600, episode=1158 reward=0.7829136 (432.54 it/sec) -training >> step=6944700, episode=1158 reward=0.7843786 (504.68 it/sec) -training >> step=6944800, episode=1158 reward=0.7826678 (489.22 it/sec) -training >> step=6944900, episode=1158 reward=0.7844259 (493.81 it/sec) -training >> step=6945000, episode=1158 reward=0.7903541 (476.57 it/sec) -training >> step=6945100, episode=1158 reward=0.7769313 (542.44 it/sec) -training >> step=6945200, episode=1158 reward=0.7996858 (480.83 it/sec) -training >> step=6945300, episode=1158 reward=0.7775191 (484.05 it/sec) -training >> step=6945400, episode=1158 reward=0.7968487 (516.85 it/sec) -training >> step=6945500, episode=1158 reward=0.7740554 (520.86 it/sec) -training >> step=6945600, episode=1158 reward=0.7802948 (521.10 it/sec) -training >> step=6945700, episode=1158 reward=0.784447 (436.21 it/sec) -training >> step=6945800, episode=1158 reward=0.797756 (524.22 it/sec) -training >> step=6945900, episode=1158 reward=0.7814113 (510.16 it/sec) -training >> step=6946000, episode=1158 reward=0.77068 (479.25 it/sec) -training >> step=6946100, episode=1158 reward=0.7606103 (531.94 it/sec) -training >> step=6946200, episode=1158 reward=0.7929522 (532.97 it/sec) -training >> step=6946300, episode=1158 reward=0.7754281 (509.87 it/sec) -training >> step=6946400, episode=1158 reward=0.7646244 (493.25 it/sec) -training >> step=6946500, episode=1158 reward=0.7649253 (519.98 it/sec) -training >> step=6946600, episode=1158 reward=0.8059829 (543.58 it/sec) -training >> step=6946700, episode=1158 reward=0.7806662 (494.09 it/sec) -training >> step=6946800, episode=1158 reward=0.7934591 (509.11 it/sec) -training >> step=6946900, episode=1158 reward=0.7923402 (350.40 it/sec) -training >> step=6947000, episode=1158 reward=0.7773173 (454.13 it/sec) -training >> step=6947100, episode=1158 reward=0.7896786 (489.57 it/sec) -training >> step=6947200, episode=1158 reward=0.7912095 (472.67 it/sec) -training >> step=6947300, episode=1159 reward=0.7903293 (97.58 it/sec) -training >> step=6947400, episode=1159 reward=0.7963039 (508.82 it/sec) -training >> step=6947500, episode=1159 reward=0.7852245 (469.25 it/sec) -training >> step=6947600, episode=1159 reward=0.7946557 (494.44 it/sec) -training >> step=6947700, episode=1159 reward=0.7837999 (538.26 it/sec) -training >> step=6947800, episode=1159 reward=0.7819827 (515.09 it/sec) -training >> step=6947900, episode=1159 reward=0.7776145 (527.45 it/sec) -training >> step=6948000, episode=1159 reward=0.7841099 (463.72 it/sec) -training >> step=6948100, episode=1159 reward=0.7865136 (491.11 it/sec) -training >> step=6948200, episode=1159 reward=0.772068 (506.61 it/sec) -training >> step=6948300, episode=1159 reward=0.779193 (518.45 it/sec) -training >> step=6948400, episode=1159 reward=0.7824822 (511.67 it/sec) -training >> step=6948500, episode=1159 reward=0.7841887 (488.24 it/sec) -training >> step=6948600, episode=1159 reward=0.7816384 (532.50 it/sec) -training >> step=6948700, episode=1159 reward=0.7812985 (522.64 it/sec) -training >> step=6948800, episode=1159 reward=0.7923076 (506.46 it/sec) -training >> step=6948900, episode=1159 reward=0.7910888 (522.19 it/sec) -training >> step=6949000, episode=1159 reward=0.7795168 (535.00 it/sec) -training >> step=6949100, episode=1159 reward=0.7854312 (513.36 it/sec) -training >> step=6949200, episode=1159 reward=0.7713439 (497.95 it/sec) -training >> step=6949300, episode=1159 reward=0.7925766 (532.95 it/sec) -training >> step=6949400, episode=1159 reward=0.8065714 (498.15 it/sec) -training >> step=6949500, episode=1159 reward=0.8023877 (498.72 it/sec) -training >> step=6949600, episode=1159 reward=0.7754756 (510.65 it/sec) -training >> step=6949700, episode=1159 reward=0.7874853 (505.98 it/sec) -training >> step=6949800, episode=1159 reward=0.7903619 (520.01 it/sec) -training >> step=6949900, episode=1159 reward=0.7913219 (481.63 it/sec) -training >> step=6950000, episode=1159 reward=0.7953755 (516.84 it/sec) -training >> step=6950100, episode=1159 reward=0.7894637 (502.18 it/sec) -training >> step=6950200, episode=1159 reward=0.7951151 (437.99 it/sec) -training >> step=6950300, episode=1159 reward=0.7934297 (449.10 it/sec) -training >> step=6950400, episode=1159 reward=0.7901544 (511.47 it/sec) -training >> step=6950500, episode=1159 reward=0.7828887 (489.91 it/sec) -training >> step=6950600, episode=1159 reward=0.811223 (505.35 it/sec) -training >> step=6950700, episode=1159 reward=0.7952159 (487.83 it/sec) -training >> step=6950800, episode=1159 reward=0.7749892 (529.66 it/sec) -training >> step=6950900, episode=1159 reward=0.7928022 (477.66 it/sec) -training >> step=6951000, episode=1159 reward=0.7807042 (537.64 it/sec) -training >> step=6951100, episode=1159 reward=0.7931621 (482.19 it/sec) -training >> step=6951200, episode=1159 reward=0.7785918 (548.54 it/sec) -training >> step=6951300, episode=1159 reward=0.7927579 (505.45 it/sec) -training >> step=6951400, episode=1159 reward=0.7722605 (516.75 it/sec) -training >> step=6951500, episode=1159 reward=0.7817646 (518.00 it/sec) -training >> step=6951600, episode=1159 reward=0.7787806 (538.06 it/sec) -training >> step=6951700, episode=1159 reward=0.7713869 (512.83 it/sec) -training >> step=6951800, episode=1159 reward=0.7679825 (503.71 it/sec) -training >> step=6951900, episode=1159 reward=0.791061 (504.89 it/sec) -training >> step=6952000, episode=1159 reward=0.7782928 (493.87 it/sec) -training >> step=6952100, episode=1159 reward=0.7882671 (514.47 it/sec) -training >> step=6952200, episode=1159 reward=0.8019217 (511.15 it/sec) -training >> step=6952300, episode=1159 reward=0.7811873 (493.05 it/sec) -training >> step=6952400, episode=1159 reward=0.794512 (494.18 it/sec) -training >> step=6952500, episode=1159 reward=0.7762341 (499.82 it/sec) -training >> step=6952600, episode=1159 reward=0.7931203 (547.49 it/sec) -training >> step=6952700, episode=1159 reward=0.782309 (489.26 it/sec) -training >> step=6952800, episode=1159 reward=0.7998403 (489.07 it/sec) -training >> step=6952900, episode=1159 reward=0.7916743 (490.19 it/sec) -training >> step=6953000, episode=1159 reward=0.7885617 (495.82 it/sec) -training >> step=6953100, episode=1159 reward=0.7789308 (361.66 it/sec) -training >> step=6953200, episode=1159 reward=0.801191 (461.97 it/sec) -training >> step=6953300, episode=1160 reward=0.7806987 (97.68 it/sec) -training >> step=6953400, episode=1160 reward=0.7838355 (486.97 it/sec) -training >> step=6953500, episode=1160 reward=0.7770268 (485.15 it/sec) -training >> step=6953600, episode=1160 reward=0.7918376 (515.10 it/sec) -training >> step=6953700, episode=1160 reward=0.7881092 (530.64 it/sec) -training >> step=6953800, episode=1160 reward=0.7981395 (492.46 it/sec) -training >> step=6953900, episode=1160 reward=0.7800761 (513.40 it/sec) -training >> step=6954000, episode=1160 reward=0.7862943 (479.63 it/sec) -training >> step=6954100, episode=1160 reward=0.7802748 (502.56 it/sec) -training >> step=6954200, episode=1160 reward=0.8136337 (525.55 it/sec) -training >> step=6954300, episode=1160 reward=0.7918558 (515.19 it/sec) -training >> step=6954400, episode=1160 reward=0.7833318 (551.33 it/sec) -training >> step=6954500, episode=1160 reward=0.7886898 (495.77 it/sec) -training >> step=6954600, episode=1160 reward=0.8010875 (534.58 it/sec) -training >> step=6954700, episode=1160 reward=0.7956621 (501.81 it/sec) -training >> step=6954800, episode=1160 reward=0.771459 (538.44 it/sec) -training >> step=6954900, episode=1160 reward=0.7796501 (507.39 it/sec) -training >> step=6955000, episode=1160 reward=0.7624163 (508.52 it/sec) -training >> step=6955100, episode=1160 reward=0.7719302 (508.17 it/sec) -training >> step=6955200, episode=1160 reward=0.7952109 (522.13 it/sec) -training >> step=6955300, episode=1160 reward=0.8023953 (519.65 it/sec) -training >> step=6955400, episode=1160 reward=0.7770792 (509.50 it/sec) -training >> step=6955500, episode=1160 reward=0.8075494 (549.87 it/sec) -training >> step=6955600, episode=1160 reward=0.7897712 (498.68 it/sec) -training >> step=6955700, episode=1160 reward=0.7813689 (496.42 it/sec) -training >> step=6955800, episode=1160 reward=0.7975917 (539.44 it/sec) -training >> step=6955900, episode=1160 reward=0.7851427 (492.91 it/sec) -training >> step=6956000, episode=1160 reward=0.784973 (499.40 it/sec) -training >> step=6956100, episode=1160 reward=0.8099952 (497.59 it/sec) -training >> step=6956200, episode=1160 reward=0.7683905 (492.10 it/sec) -training >> step=6956300, episode=1160 reward=0.7845381 (502.51 it/sec) -training >> step=6956400, episode=1160 reward=0.7960742 (422.10 it/sec) -training >> step=6956500, episode=1160 reward=0.7800211 (469.19 it/sec) -training >> step=6956600, episode=1160 reward=0.8130159 (520.61 it/sec) -training >> step=6956700, episode=1160 reward=0.7831323 (526.77 it/sec) -training >> step=6956800, episode=1160 reward=0.7903166 (513.10 it/sec) -training >> step=6956900, episode=1160 reward=0.7705303 (464.83 it/sec) -training >> step=6957000, episode=1160 reward=0.7982233 (490.74 it/sec) -training >> step=6957100, episode=1160 reward=0.7746746 (517.75 it/sec) -training >> step=6957200, episode=1160 reward=0.7927307 (491.80 it/sec) -training >> step=6957300, episode=1160 reward=0.7834975 (530.37 it/sec) -training >> step=6957400, episode=1160 reward=0.7832518 (480.01 it/sec) -training >> step=6957500, episode=1160 reward=0.7971942 (465.28 it/sec) -training >> step=6957600, episode=1160 reward=0.7948344 (469.86 it/sec) -training >> step=6957700, episode=1160 reward=0.7869606 (449.73 it/sec) -training >> step=6957800, episode=1160 reward=0.7789527 (477.33 it/sec) -training >> step=6957900, episode=1160 reward=0.7751413 (446.93 it/sec) -training >> step=6958000, episode=1160 reward=0.7842975 (392.92 it/sec) -training >> step=6958100, episode=1160 reward=0.7835422 (434.74 it/sec) -training >> step=6958200, episode=1160 reward=0.7777941 (501.16 it/sec) -training >> step=6958300, episode=1160 reward=0.7956861 (467.03 it/sec) -training >> step=6958400, episode=1160 reward=0.7758945 (486.33 it/sec) -training >> step=6958500, episode=1160 reward=0.8023806 (403.74 it/sec) -training >> step=6958600, episode=1160 reward=0.7726715 (386.97 it/sec) -training >> step=6958700, episode=1160 reward=0.7910206 (467.55 it/sec) -training >> step=6958800, episode=1160 reward=0.7756408 (443.51 it/sec) -training >> step=6958900, episode=1160 reward=0.7763765 (468.08 it/sec) -training >> step=6959000, episode=1160 reward=0.7905481 (481.16 it/sec) -training >> step=6959100, episode=1160 reward=0.8110625 (472.10 it/sec) -training >> step=6959200, episode=1160 reward=0.7667166 (298.95 it/sec) -training >> step=6959300, episode=1161 reward=0.8071882 (58.29 it/sec) -training >> step=6959400, episode=1161 reward=0.7686492 (463.72 it/sec) -training >> step=6959500, episode=1161 reward=0.7918255 (398.75 it/sec) -training >> step=6959600, episode=1161 reward=0.7951316 (421.02 it/sec) -training >> step=6959700, episode=1161 reward=0.7777482 (468.56 it/sec) -training >> step=6959800, episode=1161 reward=0.7770953 (438.45 it/sec) -training >> step=6959900, episode=1161 reward=0.8129716 (416.35 it/sec) -training >> step=6960000, episode=1161 reward=0.7919244 (428.34 it/sec) -training >> step=6960100, episode=1161 reward=0.7665737 (456.02 it/sec) -training >> step=6960200, episode=1161 reward=0.7838383 (446.95 it/sec) -training >> step=6960300, episode=1161 reward=0.7905371 (480.56 it/sec) -training >> step=6960400, episode=1161 reward=0.7787962 (439.96 it/sec) -training >> step=6960500, episode=1161 reward=0.8022434 (473.38 it/sec) -training >> step=6960600, episode=1161 reward=0.7733096 (492.70 it/sec) -training >> step=6960700, episode=1161 reward=0.8002527 (471.28 it/sec) -training >> step=6960800, episode=1161 reward=0.7946697 (480.65 it/sec) -training >> step=6960900, episode=1161 reward=0.7999003 (496.89 it/sec) -training >> step=6961000, episode=1161 reward=0.7811653 (466.29 it/sec) -training >> step=6961100, episode=1161 reward=0.7800045 (482.76 it/sec) -training >> step=6961200, episode=1161 reward=0.7963985 (483.43 it/sec) -training >> step=6961300, episode=1161 reward=0.795144 (495.72 it/sec) -training >> step=6961400, episode=1161 reward=0.7946338 (440.69 it/sec) -training >> step=6961500, episode=1161 reward=0.7946025 (473.19 it/sec) -training >> step=6961600, episode=1161 reward=0.790857 (437.89 it/sec) -training >> step=6961700, episode=1161 reward=0.7858156 (480.27 it/sec) -training >> step=6961800, episode=1161 reward=0.8015795 (499.02 it/sec) -training >> step=6961900, episode=1161 reward=0.7875829 (416.12 it/sec) -training >> step=6962000, episode=1161 reward=0.804678 (494.23 it/sec) -training >> step=6962100, episode=1161 reward=0.7955782 (511.67 it/sec) -training >> step=6962200, episode=1161 reward=0.7696717 (478.36 it/sec) -training >> step=6962300, episode=1161 reward=0.7760817 (536.56 it/sec) -training >> step=6962400, episode=1161 reward=0.7966696 (494.02 it/sec) -training >> step=6962500, episode=1161 reward=0.7907147 (502.13 it/sec) -training >> step=6962600, episode=1161 reward=0.7895116 (478.26 it/sec) -training >> step=6962700, episode=1161 reward=0.8044101 (493.14 it/sec) -training >> step=6962800, episode=1161 reward=0.7938876 (465.71 it/sec) -training >> step=6962900, episode=1161 reward=0.788998 (448.46 it/sec) -training >> step=6963000, episode=1161 reward=0.7940467 (506.15 it/sec) -training >> step=6963100, episode=1161 reward=0.7651175 (526.40 it/sec) -training >> step=6963200, episode=1161 reward=0.783456 (504.10 it/sec) -training >> step=6963300, episode=1161 reward=0.7916858 (477.47 it/sec) -training >> step=6963400, episode=1161 reward=0.7832809 (518.55 it/sec) -training >> step=6963500, episode=1161 reward=0.7971436 (518.97 it/sec) -training >> step=6963600, episode=1161 reward=0.791199 (506.69 it/sec) -training >> step=6963700, episode=1161 reward=0.8007964 (514.47 it/sec) -training >> step=6963800, episode=1161 reward=0.7780071 (490.97 it/sec) -training >> step=6963900, episode=1161 reward=0.7734312 (502.71 it/sec) -training >> step=6964000, episode=1161 reward=0.7864217 (475.90 it/sec) -training >> step=6964100, episode=1161 reward=0.7732852 (501.21 it/sec) -training >> step=6964200, episode=1161 reward=0.7774001 (511.55 it/sec) -training >> step=6964300, episode=1161 reward=0.7959382 (504.24 it/sec) -training >> step=6964400, episode=1161 reward=0.7871556 (500.35 it/sec) -training >> step=6964500, episode=1161 reward=0.7734562 (512.45 it/sec) -training >> step=6964600, episode=1161 reward=0.7867231 (472.98 it/sec) -training >> step=6964700, episode=1161 reward=0.7899572 (479.74 it/sec) -training >> step=6964800, episode=1161 reward=0.7683334 (539.05 it/sec) -training >> step=6964900, episode=1161 reward=0.7904559 (497.54 it/sec) -training >> step=6965000, episode=1161 reward=0.7782941 (507.11 it/sec) -training >> step=6965100, episode=1161 reward=0.7649744 (471.38 it/sec) -training >> step=6965200, episode=1161 reward=0.8045013 (499.82 it/sec) -training >> step=6965300, episode=1162 reward=0.8047227 (44.91 it/sec) -training >> step=6965400, episode=1162 reward=0.7848746 (462.56 it/sec) -training >> step=6965500, episode=1162 reward=0.777572 (481.22 it/sec) -training >> step=6965600, episode=1162 reward=0.7829704 (455.80 it/sec) -training >> step=6965700, episode=1162 reward=0.7710926 (469.30 it/sec) -training >> step=6965800, episode=1162 reward=0.7954453 (551.40 it/sec) -training >> step=6965900, episode=1162 reward=0.7768193 (497.07 it/sec) -training >> step=6966000, episode=1162 reward=0.7817011 (464.76 it/sec) -training >> step=6966100, episode=1162 reward=0.8035644 (507.07 it/sec) -training >> step=6966200, episode=1162 reward=0.7897153 (508.73 it/sec) -training >> step=6966300, episode=1162 reward=0.793463 (441.01 it/sec) -training >> step=6966400, episode=1162 reward=0.8037652 (510.20 it/sec) -training >> step=6966500, episode=1162 reward=0.7593113 (496.59 it/sec) -training >> step=6966600, episode=1162 reward=0.7816442 (473.27 it/sec) -training >> step=6966700, episode=1162 reward=0.8028652 (486.37 it/sec) -training >> step=6966800, episode=1162 reward=0.7760224 (525.78 it/sec) -training >> step=6966900, episode=1162 reward=0.7930743 (457.49 it/sec) -training >> step=6967000, episode=1162 reward=0.7902211 (480.29 it/sec) -training >> step=6967100, episode=1162 reward=0.7975075 (504.79 it/sec) -training >> step=6967200, episode=1162 reward=0.7926452 (515.40 it/sec) -training >> step=6967300, episode=1162 reward=0.7938532 (478.24 it/sec) -training >> step=6967400, episode=1162 reward=0.7921917 (484.05 it/sec) -training >> step=6967500, episode=1162 reward=0.7843754 (472.47 it/sec) -training >> step=6967600, episode=1162 reward=0.802496 (477.45 it/sec) -training >> step=6967700, episode=1162 reward=0.7854878 (502.25 it/sec) -training >> step=6967800, episode=1162 reward=0.7671481 (477.35 it/sec) -training >> step=6967900, episode=1162 reward=0.7971963 (499.99 it/sec) -training >> step=6968000, episode=1162 reward=0.7975407 (473.27 it/sec) -training >> step=6968100, episode=1162 reward=0.7846748 (495.43 it/sec) -training >> step=6968200, episode=1162 reward=0.7734264 (511.07 it/sec) -training >> step=6968300, episode=1162 reward=0.7698406 (530.33 it/sec) -training >> step=6968400, episode=1162 reward=0.7949883 (466.56 it/sec) -training >> step=6968500, episode=1162 reward=0.808113 (506.38 it/sec) -training >> step=6968600, episode=1162 reward=0.7899325 (495.74 it/sec) -training >> step=6968700, episode=1162 reward=0.7872348 (497.35 it/sec) -training >> step=6968800, episode=1162 reward=0.7879406 (460.92 it/sec) -training >> step=6968900, episode=1162 reward=0.7869146 (444.62 it/sec) -training >> step=6969000, episode=1162 reward=0.7855166 (465.86 it/sec) -training >> step=6969100, episode=1162 reward=0.7940766 (454.62 it/sec) -training >> step=6969200, episode=1162 reward=0.8051934 (441.64 it/sec) -training >> step=6969300, episode=1162 reward=0.7929878 (468.63 it/sec) -training >> step=6969400, episode=1162 reward=0.801645 (436.66 it/sec) -training >> step=6969500, episode=1162 reward=0.7822059 (434.76 it/sec) -training >> step=6969600, episode=1162 reward=0.7869832 (508.11 it/sec) -training >> step=6969700, episode=1162 reward=0.7780762 (499.63 it/sec) -training >> step=6969800, episode=1162 reward=0.8052995 (461.56 it/sec) -training >> step=6969900, episode=1162 reward=0.7761132 (523.07 it/sec) -training >> step=6970000, episode=1162 reward=0.8036863 (493.58 it/sec) -training >> step=6970100, episode=1162 reward=0.7955492 (526.25 it/sec) -training >> step=6970200, episode=1162 reward=0.7735963 (488.85 it/sec) -training >> step=6970300, episode=1162 reward=0.7688537 (440.64 it/sec) -training >> step=6970400, episode=1162 reward=0.7808251 (512.06 it/sec) -training >> step=6970500, episode=1162 reward=0.7791362 (509.10 it/sec) -training >> step=6970600, episode=1162 reward=0.7928851 (490.46 it/sec) -training >> step=6970700, episode=1162 reward=0.7760939 (512.40 it/sec) -training >> step=6970800, episode=1162 reward=0.7744482 (515.38 it/sec) -training >> step=6970900, episode=1162 reward=0.7939208 (480.64 it/sec) -training >> step=6971000, episode=1162 reward=0.7906834 (464.94 it/sec) -training >> step=6971100, episode=1162 reward=0.787265 (525.04 it/sec) -training >> step=6971200, episode=1162 reward=0.7833433 (529.96 it/sec) -training >> step=6971300, episode=1163 reward=0.7820407 (59.73 it/sec) -training >> step=6971400, episode=1163 reward=0.799992 (499.93 it/sec) -training >> step=6971500, episode=1163 reward=0.7769266 (529.40 it/sec) -training >> step=6971600, episode=1163 reward=0.7927481 (511.33 it/sec) -training >> step=6971700, episode=1163 reward=0.7951246 (510.05 it/sec) -training >> step=6971800, episode=1163 reward=0.7740126 (483.99 it/sec) -training >> step=6971900, episode=1163 reward=0.7927416 (473.43 it/sec) -training >> step=6972000, episode=1163 reward=0.7841286 (468.14 it/sec) -training >> step=6972100, episode=1163 reward=0.7776418 (529.45 it/sec) -training >> step=6972200, episode=1163 reward=0.8042238 (460.48 it/sec) -training >> step=6972300, episode=1163 reward=0.7856665 (482.21 it/sec) -training >> step=6972400, episode=1163 reward=0.7923185 (442.03 it/sec) -training >> step=6972500, episode=1163 reward=0.7878756 (506.30 it/sec) -training >> step=6972600, episode=1163 reward=0.7959054 (492.41 it/sec) -training >> step=6972700, episode=1163 reward=0.7872981 (522.74 it/sec) -training >> step=6972800, episode=1163 reward=0.7887697 (463.06 it/sec) -training >> step=6972900, episode=1163 reward=0.7809565 (497.72 it/sec) -training >> step=6973000, episode=1163 reward=0.7756316 (508.09 it/sec) -training >> step=6973100, episode=1163 reward=0.7959048 (525.24 it/sec) -training >> step=6973200, episode=1163 reward=0.7911326 (470.25 it/sec) -training >> step=6973300, episode=1163 reward=0.7785239 (471.66 it/sec) -training >> step=6973400, episode=1163 reward=0.7799889 (484.37 it/sec) -training >> step=6973500, episode=1163 reward=0.7929118 (505.09 it/sec) -training >> step=6973600, episode=1163 reward=0.7894679 (499.71 it/sec) -training >> step=6973700, episode=1163 reward=0.797928 (484.65 it/sec) -training >> step=6973800, episode=1163 reward=0.7934705 (493.69 it/sec) -training >> step=6973900, episode=1163 reward=0.7824076 (504.79 it/sec) -training >> step=6974000, episode=1163 reward=0.8005833 (488.84 it/sec) -training >> step=6974100, episode=1163 reward=0.7998223 (520.90 it/sec) -training >> step=6974200, episode=1163 reward=0.7959605 (485.09 it/sec) -training >> step=6974300, episode=1163 reward=0.821663 (474.47 it/sec) -training >> step=6974400, episode=1163 reward=0.8106321 (420.68 it/sec) -training >> step=6974500, episode=1163 reward=0.766138 (520.57 it/sec) -training >> step=6974600, episode=1163 reward=0.8099111 (484.93 it/sec) -training >> step=6974700, episode=1163 reward=0.7702002 (479.31 it/sec) -training >> step=6974800, episode=1163 reward=0.7969388 (435.10 it/sec) -training >> step=6974900, episode=1163 reward=0.7762892 (503.36 it/sec) -training >> step=6975000, episode=1163 reward=0.790646 (503.87 it/sec) -training >> step=6975100, episode=1163 reward=0.8218228 (494.43 it/sec) -training >> step=6975200, episode=1163 reward=0.797227 (466.32 it/sec) -training >> step=6975300, episode=1163 reward=0.7953721 (486.89 it/sec) -training >> step=6975400, episode=1163 reward=0.7826568 (481.27 it/sec) -training >> step=6975500, episode=1163 reward=0.7903856 (459.11 it/sec) -training >> step=6975600, episode=1163 reward=0.7806001 (539.49 it/sec) -training >> step=6975700, episode=1163 reward=0.7723668 (445.48 it/sec) -training >> step=6975800, episode=1163 reward=0.7878945 (449.08 it/sec) -training >> step=6975900, episode=1163 reward=0.7684575 (494.29 it/sec) -training >> step=6976000, episode=1163 reward=0.7913506 (456.61 it/sec) -training >> step=6976100, episode=1163 reward=0.7876719 (494.71 it/sec) -training >> step=6976200, episode=1163 reward=0.7911596 (487.72 it/sec) -training >> step=6976300, episode=1163 reward=0.7855248 (498.64 it/sec) -training >> step=6976400, episode=1163 reward=0.7853996 (456.43 it/sec) -training >> step=6976500, episode=1163 reward=0.7724202 (492.26 it/sec) -training >> step=6976600, episode=1163 reward=0.7957487 (487.39 it/sec) -training >> step=6976700, episode=1163 reward=0.7958434 (478.09 it/sec) -training >> step=6976800, episode=1163 reward=0.8044237 (486.03 it/sec) -training >> step=6976900, episode=1163 reward=0.7883925 (459.16 it/sec) -training >> step=6977000, episode=1163 reward=0.7814549 (516.85 it/sec) -training >> step=6977100, episode=1163 reward=0.7947599 (501.32 it/sec) -training >> step=6977200, episode=1163 reward=0.7606729 (451.21 it/sec) -training >> step=6977300, episode=1164 reward=0.7581376 (93.24 it/sec) -training >> step=6977400, episode=1164 reward=0.7815459 (461.80 it/sec) -training >> step=6977500, episode=1164 reward=0.787688 (417.71 it/sec) -training >> step=6977600, episode=1164 reward=0.7847098 (488.03 it/sec) -training >> step=6977700, episode=1164 reward=0.7848995 (467.22 it/sec) -training >> step=6977800, episode=1164 reward=0.7918774 (447.33 it/sec) -training >> step=6977900, episode=1164 reward=0.7966912 (479.65 it/sec) -training >> step=6978000, episode=1164 reward=0.7813346 (540.94 it/sec) -training >> step=6978100, episode=1164 reward=0.8026786 (504.27 it/sec) -training >> step=6978200, episode=1164 reward=0.777929 (477.89 it/sec) -training >> step=6978300, episode=1164 reward=0.7988387 (499.65 it/sec) -training >> step=6978400, episode=1164 reward=0.779107 (459.77 it/sec) -training >> step=6978500, episode=1164 reward=0.7987739 (500.19 it/sec) -training >> step=6978600, episode=1164 reward=0.7616298 (471.21 it/sec) -training >> step=6978700, episode=1164 reward=0.7776813 (517.53 it/sec) -training >> step=6978800, episode=1164 reward=0.7919027 (489.89 it/sec) -training >> step=6978900, episode=1164 reward=0.7750381 (408.90 it/sec) -training >> step=6979000, episode=1164 reward=0.7847961 (504.68 it/sec) -training >> step=6979100, episode=1164 reward=0.7966284 (534.62 it/sec) -training >> step=6979200, episode=1164 reward=0.7884504 (455.75 it/sec) -training >> step=6979300, episode=1164 reward=0.7967219 (508.01 it/sec) -training >> step=6979400, episode=1164 reward=0.806039 (491.51 it/sec) -training >> step=6979500, episode=1164 reward=0.7829125 (516.51 it/sec) -training >> step=6979600, episode=1164 reward=0.7882716 (497.82 it/sec) -training >> step=6979700, episode=1164 reward=0.785847 (429.56 it/sec) -training >> step=6979800, episode=1164 reward=0.7847795 (468.35 it/sec) -training >> step=6979900, episode=1164 reward=0.7985917 (465.39 it/sec) -training >> step=6980000, episode=1164 reward=0.7746661 (489.30 it/sec) -training >> step=6980100, episode=1164 reward=0.789938 (470.74 it/sec) -training >> step=6980200, episode=1164 reward=0.7907529 (501.20 it/sec) -training >> step=6980300, episode=1164 reward=0.8024932 (482.63 it/sec) -training >> step=6980400, episode=1164 reward=0.8090675 (486.06 it/sec) -training >> step=6980500, episode=1164 reward=0.7920532 (528.65 it/sec) -training >> step=6980600, episode=1164 reward=0.7919657 (489.81 it/sec) -training >> step=6980700, episode=1164 reward=0.7851397 (480.61 it/sec) -training >> step=6980800, episode=1164 reward=0.8002418 (475.28 it/sec) -training >> step=6980900, episode=1164 reward=0.796428 (487.84 it/sec) -training >> step=6981000, episode=1164 reward=0.7645866 (507.23 it/sec) -training >> step=6981100, episode=1164 reward=0.8007654 (490.10 it/sec) -training >> step=6981200, episode=1164 reward=0.76213 (450.11 it/sec) -training >> step=6981300, episode=1164 reward=0.7808692 (456.32 it/sec) -training >> step=6981400, episode=1164 reward=0.782414 (479.42 it/sec) -training >> step=6981500, episode=1164 reward=0.7902881 (482.40 it/sec) -training >> step=6981600, episode=1164 reward=0.7971296 (497.99 it/sec) -training >> step=6981700, episode=1164 reward=0.784447 (420.16 it/sec) -training >> step=6981800, episode=1164 reward=0.7940513 (467.92 it/sec) -training >> step=6981900, episode=1164 reward=0.7781424 (471.02 it/sec) -training >> step=6982000, episode=1164 reward=0.7774261 (499.63 it/sec) -training >> step=6982100, episode=1164 reward=0.7856686 (486.86 it/sec) -training >> step=6982200, episode=1164 reward=0.7810071 (491.49 it/sec) -training >> step=6982300, episode=1164 reward=0.7777218 (432.72 it/sec) -training >> step=6982400, episode=1164 reward=0.790978 (505.81 it/sec) -training >> step=6982500, episode=1164 reward=0.7782964 (463.67 it/sec) -training >> step=6982600, episode=1164 reward=0.7970287 (449.21 it/sec) -training >> step=6982700, episode=1164 reward=0.8020943 (487.72 it/sec) -training >> step=6982800, episode=1164 reward=0.8073743 (477.73 it/sec) -training >> step=6982900, episode=1164 reward=0.7818646 (476.17 it/sec) -training >> step=6983000, episode=1164 reward=0.7876846 (470.10 it/sec) -training >> step=6983100, episode=1164 reward=0.7910522 (497.89 it/sec) -training >> step=6983200, episode=1164 reward=0.7816002 (474.25 it/sec) -training >> step=6983300, episode=1165 reward=0.787681 (87.08 it/sec) -training >> step=6983400, episode=1165 reward=0.7916579 (508.15 it/sec) -training >> step=6983500, episode=1165 reward=0.8079842 (479.69 it/sec) -training >> step=6983600, episode=1165 reward=0.8096707 (472.21 it/sec) -training >> step=6983700, episode=1165 reward=0.7852302 (470.76 it/sec) -training >> step=6983800, episode=1165 reward=0.7819816 (493.41 it/sec) -training >> step=6983900, episode=1165 reward=0.7901701 (485.29 it/sec) -training >> step=6984000, episode=1165 reward=0.7947845 (500.13 it/sec) -training >> step=6984100, episode=1165 reward=0.781316 (516.35 it/sec) -training >> step=6984200, episode=1165 reward=0.7884366 (468.63 it/sec) -training >> step=6984300, episode=1165 reward=0.7824497 (458.09 it/sec) -training >> step=6984400, episode=1165 reward=0.7753342 (486.37 it/sec) -training >> step=6984500, episode=1165 reward=0.7947458 (517.94 it/sec) -training >> step=6984600, episode=1165 reward=0.7994785 (442.26 it/sec) -training >> step=6984700, episode=1165 reward=0.7936487 (464.88 it/sec) -training >> step=6984800, episode=1165 reward=0.7797 (509.15 it/sec) -training >> step=6984900, episode=1165 reward=0.7938359 (496.95 it/sec) -training >> step=6985000, episode=1165 reward=0.8129218 (504.70 it/sec) -training >> step=6985100, episode=1165 reward=0.8033965 (479.75 it/sec) -training >> step=6985200, episode=1165 reward=0.7799132 (528.66 it/sec) -training >> step=6985300, episode=1165 reward=0.8102665 (473.47 it/sec) -training >> step=6985400, episode=1165 reward=0.7796736 (474.55 it/sec) -training >> step=6985500, episode=1165 reward=0.7950334 (486.08 it/sec) -training >> step=6985600, episode=1165 reward=0.7724269 (505.45 it/sec) -training >> step=6985700, episode=1165 reward=0.7800159 (486.76 it/sec) -training >> step=6985800, episode=1165 reward=0.7931712 (501.37 it/sec) -training >> step=6985900, episode=1165 reward=0.7845125 (471.65 it/sec) -training >> step=6986000, episode=1165 reward=0.7819739 (455.81 it/sec) -training >> step=6986100, episode=1165 reward=0.7776571 (499.13 it/sec) -training >> step=6986200, episode=1165 reward=0.7785208 (489.55 it/sec) -training >> step=6986300, episode=1165 reward=0.8016243 (476.42 it/sec) -training >> step=6986400, episode=1165 reward=0.7694667 (505.43 it/sec) -training >> step=6986500, episode=1165 reward=0.7817261 (446.71 it/sec) -training >> step=6986600, episode=1165 reward=0.7926422 (499.08 it/sec) -training >> step=6986700, episode=1165 reward=0.7871507 (487.33 it/sec) -training >> step=6986800, episode=1165 reward=0.7910557 (472.87 it/sec) -training >> step=6986900, episode=1165 reward=0.7817047 (444.49 it/sec) -training >> step=6987000, episode=1165 reward=0.7825775 (467.35 it/sec) -training >> step=6987100, episode=1165 reward=0.7826285 (429.35 it/sec) -training >> step=6987200, episode=1165 reward=0.7863274 (496.97 it/sec) -training >> step=6987300, episode=1165 reward=0.7842085 (491.48 it/sec) -training >> step=6987400, episode=1165 reward=0.7563082 (546.09 it/sec) -training >> step=6987500, episode=1165 reward=0.7864679 (543.24 it/sec) -training >> step=6987600, episode=1165 reward=0.7932792 (533.31 it/sec) -training >> step=6987700, episode=1165 reward=0.7772663 (503.77 it/sec) -training >> step=6987800, episode=1165 reward=0.7890314 (490.44 it/sec) -training >> step=6987900, episode=1165 reward=0.7785534 (474.36 it/sec) -training >> step=6988000, episode=1165 reward=0.7888666 (507.98 it/sec) -training >> step=6988100, episode=1165 reward=0.7776394 (544.21 it/sec) -training >> step=6988200, episode=1165 reward=0.7735623 (503.22 it/sec) -training >> step=6988300, episode=1165 reward=0.793682 (478.55 it/sec) -training >> step=6988400, episode=1165 reward=0.768221 (506.92 it/sec) -training >> step=6988500, episode=1165 reward=0.7978619 (480.29 it/sec) -training >> step=6988600, episode=1165 reward=0.7924195 (513.98 it/sec) -training >> step=6988700, episode=1165 reward=0.7800743 (486.60 it/sec) -training >> step=6988800, episode=1165 reward=0.7804462 (454.12 it/sec) -training >> step=6988900, episode=1165 reward=0.7821026 (487.07 it/sec) -training >> step=6989000, episode=1165 reward=0.7925531 (487.95 it/sec) -training >> step=6989100, episode=1165 reward=0.79576 (521.96 it/sec) -training >> step=6989200, episode=1165 reward=0.7875485 (523.21 it/sec) -training >> step=6989300, episode=1166 reward=0.7998515 (92.36 it/sec) -training >> step=6989400, episode=1166 reward=0.7849103 (468.57 it/sec) -training >> step=6989500, episode=1166 reward=0.7964333 (484.79 it/sec) -training >> step=6989600, episode=1166 reward=0.8050381 (458.17 it/sec) -training >> step=6989700, episode=1166 reward=0.7769731 (472.22 it/sec) -training >> step=6989800, episode=1166 reward=0.7815441 (477.58 it/sec) -training >> step=6989900, episode=1166 reward=0.7949383 (520.35 it/sec) -training >> step=6990000, episode=1166 reward=0.7771143 (478.78 it/sec) -training >> step=6990100, episode=1166 reward=0.7783588 (436.96 it/sec) -training >> step=6990200, episode=1166 reward=0.8022687 (513.90 it/sec) -training >> step=6990300, episode=1166 reward=0.7608227 (470.51 it/sec) -training >> step=6990400, episode=1166 reward=0.7894438 (469.31 it/sec) -training >> step=6990500, episode=1166 reward=0.7788219 (482.18 it/sec) -training >> step=6990600, episode=1166 reward=0.7818764 (503.50 it/sec) -training >> step=6990700, episode=1166 reward=0.7903839 (474.41 it/sec) -training >> step=6990800, episode=1166 reward=0.8034525 (480.38 it/sec) -training >> step=6990900, episode=1166 reward=0.7807971 (483.02 it/sec) -training >> step=6991000, episode=1166 reward=0.7800285 (493.94 it/sec) -training >> step=6991100, episode=1166 reward=0.7755464 (478.35 it/sec) -training >> step=6991200, episode=1166 reward=0.8065728 (457.24 it/sec) -training >> step=6991300, episode=1166 reward=0.7886263 (443.58 it/sec) -training >> step=6991400, episode=1166 reward=0.7880883 (443.25 it/sec) -training >> step=6991500, episode=1166 reward=0.7906021 (476.82 it/sec) -training >> step=6991600, episode=1166 reward=0.7862137 (468.97 it/sec) -training >> step=6991700, episode=1166 reward=0.7781138 (477.57 it/sec) -training >> step=6991800, episode=1166 reward=0.7876547 (467.49 it/sec) -training >> step=6991900, episode=1166 reward=0.7967715 (452.21 it/sec) -training >> step=6992000, episode=1166 reward=0.7838635 (474.17 it/sec) -training >> step=6992100, episode=1166 reward=0.7889717 (480.25 it/sec) -training >> step=6992200, episode=1166 reward=0.7866688 (465.77 it/sec) -training >> step=6992300, episode=1166 reward=0.8097074 (462.69 it/sec) -training >> step=6992400, episode=1166 reward=0.799481 (478.52 it/sec) -training >> step=6992500, episode=1166 reward=0.8055249 (474.78 it/sec) -training >> step=6992600, episode=1166 reward=0.7707603 (472.74 it/sec) -training >> step=6992700, episode=1166 reward=0.792273 (474.36 it/sec) -training >> step=6992800, episode=1166 reward=0.7869014 (475.53 it/sec) -training >> step=6992900, episode=1166 reward=0.7815123 (486.90 it/sec) -training >> step=6993000, episode=1166 reward=0.7679687 (407.94 it/sec) -training >> step=6993100, episode=1166 reward=0.7880043 (488.92 it/sec) -training >> step=6993200, episode=1166 reward=0.7773347 (446.86 it/sec) -training >> step=6993300, episode=1166 reward=0.7768694 (470.78 it/sec) -training >> step=6993400, episode=1166 reward=0.774223 (449.72 it/sec) -training >> step=6993500, episode=1166 reward=0.7645497 (460.60 it/sec) -training >> step=6993600, episode=1166 reward=0.7914007 (459.96 it/sec) -training >> step=6993700, episode=1166 reward=0.7670367 (423.04 it/sec) -training >> step=6993800, episode=1166 reward=0.7771791 (499.87 it/sec) -training >> step=6993900, episode=1166 reward=0.7880506 (462.33 it/sec) -training >> step=6994000, episode=1166 reward=0.7831114 (469.79 it/sec) -training >> step=6994100, episode=1166 reward=0.7910078 (480.88 it/sec) -training >> step=6994200, episode=1166 reward=0.7758641 (485.94 it/sec) -training >> step=6994300, episode=1166 reward=0.8019848 (451.27 it/sec) -training >> step=6994400, episode=1166 reward=0.7862719 (499.36 it/sec) -training >> step=6994500, episode=1166 reward=0.7863209 (485.69 it/sec) -training >> step=6994600, episode=1166 reward=0.7802206 (461.73 it/sec) -training >> step=6994700, episode=1166 reward=0.7776783 (455.27 it/sec) -training >> step=6994800, episode=1166 reward=0.7736053 (490.24 it/sec) -training >> step=6994900, episode=1166 reward=0.7569071 (500.86 it/sec) -training >> step=6995000, episode=1166 reward=0.7591491 (468.44 it/sec) -training >> step=6995100, episode=1166 reward=0.7762165 (489.70 it/sec) -training >> step=6995200, episode=1166 reward=0.7989038 (454.25 it/sec) -training >> step=6995300, episode=1167 reward=0.8099122 (116.21 it/sec) -training >> step=6995400, episode=1167 reward=0.7852125 (489.56 it/sec) -training >> step=6995500, episode=1167 reward=0.7832937 (456.63 it/sec) -training >> step=6995600, episode=1167 reward=0.7999817 (514.50 it/sec) -training >> step=6995700, episode=1167 reward=0.7864969 (523.79 it/sec) -training >> step=6995800, episode=1167 reward=0.7967177 (521.84 it/sec) -training >> step=6995900, episode=1167 reward=0.7930942 (489.63 it/sec) -training >> step=6996000, episode=1167 reward=0.7945799 (493.33 it/sec) -training >> step=6996100, episode=1167 reward=0.7952706 (522.92 it/sec) -training >> step=6996200, episode=1167 reward=0.7794434 (521.21 it/sec) -training >> step=6996300, episode=1167 reward=0.7983307 (510.56 it/sec) -training >> step=6996400, episode=1167 reward=0.7796737 (550.53 it/sec) -training >> step=6996500, episode=1167 reward=0.771134 (528.63 it/sec) -training >> step=6996600, episode=1167 reward=0.7916148 (478.08 it/sec) -training >> step=6996700, episode=1167 reward=0.7789246 (482.47 it/sec) -training >> step=6996800, episode=1167 reward=0.8030272 (441.91 it/sec) -training >> step=6996900, episode=1167 reward=0.8006257 (500.12 it/sec) -training >> step=6997000, episode=1167 reward=0.7988455 (491.57 it/sec) -training >> step=6997100, episode=1167 reward=0.7823544 (551.17 it/sec) -training >> step=6997200, episode=1167 reward=0.7873667 (478.50 it/sec) -training >> step=6997300, episode=1167 reward=0.7813076 (518.42 it/sec) -training >> step=6997400, episode=1167 reward=0.7872895 (495.61 it/sec) -training >> step=6997500, episode=1167 reward=0.7803648 (517.50 it/sec) -training >> step=6997600, episode=1167 reward=0.8003078 (551.92 it/sec) -training >> step=6997700, episode=1167 reward=0.7766518 (504.07 it/sec) -training >> step=6997800, episode=1167 reward=0.7905312 (440.75 it/sec) -training >> step=6997900, episode=1167 reward=0.8007857 (514.09 it/sec) -training >> step=6998000, episode=1167 reward=0.7847802 (444.99 it/sec) -training >> step=6998100, episode=1167 reward=0.7827165 (529.40 it/sec) -training >> step=6998200, episode=1167 reward=0.7944165 (526.80 it/sec) -training >> step=6998300, episode=1167 reward=0.7870841 (564.24 it/sec) -training >> step=6998400, episode=1167 reward=0.7837393 (486.27 it/sec) -training >> step=6998500, episode=1167 reward=0.7587978 (507.39 it/sec) -training >> step=6998600, episode=1167 reward=0.7883338 (512.79 it/sec) -training >> step=6998700, episode=1167 reward=0.7925193 (526.65 it/sec) -training >> step=6998800, episode=1167 reward=0.8066872 (517.09 it/sec) -training >> step=6998900, episode=1167 reward=0.7547261 (507.63 it/sec) -training >> step=6999000, episode=1167 reward=0.7688754 (551.41 it/sec) -training >> step=6999100, episode=1167 reward=0.7829701 (504.04 it/sec) -training >> step=6999200, episode=1167 reward=0.7833515 (450.73 it/sec) -training >> step=6999300, episode=1167 reward=0.8142126 (517.74 it/sec) -training >> step=6999400, episode=1167 reward=0.7857793 (527.79 it/sec) -training >> step=6999500, episode=1167 reward=0.8017834 (511.03 it/sec) -training >> step=6999600, episode=1167 reward=0.7805933 (494.35 it/sec) -training >> step=6999700, episode=1167 reward=0.7923425 (516.48 it/sec) -training >> step=6999800, episode=1167 reward=0.7776883 (506.13 it/sec) -training >> step=6999900, episode=1167 reward=0.7856357 (514.72 it/sec) -training >> step=7000000, episode=1167 reward=0.7645757 (477.18 it/sec) -training >> step=7000100, episode=1167 reward=0.7653399 (504.38 it/sec) -training >> step=7000200, episode=1167 reward=0.7575942 (514.43 it/sec) -training >> step=7000300, episode=1167 reward=0.8078786 (524.88 it/sec) -training >> step=7000400, episode=1167 reward=0.7666829 (484.68 it/sec) -training >> step=7000500, episode=1167 reward=0.7889376 (486.44 it/sec) -training >> step=7000600, episode=1167 reward=0.7721368 (478.12 it/sec) -training >> step=7000700, episode=1167 reward=0.7969341 (487.23 it/sec) -training >> step=7000800, episode=1167 reward=0.7926775 (537.72 it/sec) -training >> step=7000900, episode=1167 reward=0.7880747 (545.14 it/sec) -training >> step=7001000, episode=1167 reward=0.7875302 (434.54 it/sec) -training >> step=7001100, episode=1167 reward=0.7981574 (506.45 it/sec) -training >> step=7001200, episode=1167 reward=0.7703023 (530.26 it/sec) -training >> step=7001300, episode=1168 reward=0.7797515 (74.11 it/sec) -training >> step=7001400, episode=1168 reward=0.800798 (505.08 it/sec) -training >> step=7001500, episode=1168 reward=0.7589368 (491.61 it/sec) -training >> step=7001600, episode=1168 reward=0.7878606 (482.64 it/sec) -training >> step=7001700, episode=1168 reward=0.7872873 (517.65 it/sec) -training >> step=7001800, episode=1168 reward=0.7800437 (490.77 it/sec) -training >> step=7001900, episode=1168 reward=0.7697153 (458.68 it/sec) -training >> step=7002000, episode=1168 reward=0.8037259 (474.63 it/sec) -training >> step=7002100, episode=1168 reward=0.779492 (497.20 it/sec) -training >> step=7002200, episode=1168 reward=0.7831416 (478.86 it/sec) -training >> step=7002300, episode=1168 reward=0.780237 (490.31 it/sec) -training >> step=7002400, episode=1168 reward=0.7554844 (520.59 it/sec) -training >> step=7002500, episode=1168 reward=0.793549 (472.61 it/sec) -training >> step=7002600, episode=1168 reward=0.7981063 (479.28 it/sec) -training >> step=7002700, episode=1168 reward=0.8090594 (505.64 it/sec) -training >> step=7002800, episode=1168 reward=0.7986219 (461.80 it/sec) -training >> step=7002900, episode=1168 reward=0.794407 (490.85 it/sec) -training >> step=7003000, episode=1168 reward=0.7881997 (436.79 it/sec) -training >> step=7003100, episode=1168 reward=0.780629 (472.47 it/sec) -training >> step=7003200, episode=1168 reward=0.7959975 (494.56 it/sec) -training >> step=7003300, episode=1168 reward=0.7767022 (465.33 it/sec) -training >> step=7003400, episode=1168 reward=0.7715769 (486.58 it/sec) -training >> step=7003500, episode=1168 reward=0.8107547 (464.97 it/sec) -training >> step=7003600, episode=1168 reward=0.7906757 (470.75 it/sec) -training >> step=7003700, episode=1168 reward=0.7977231 (460.69 it/sec) -training >> step=7003800, episode=1168 reward=0.7928853 (490.06 it/sec) -training >> step=7003900, episode=1168 reward=0.7842987 (431.68 it/sec) -training >> step=7004000, episode=1168 reward=0.8025414 (474.06 it/sec) -training >> step=7004100, episode=1168 reward=0.8072162 (456.71 it/sec) -training >> step=7004200, episode=1168 reward=0.7764001 (498.13 it/sec) -training >> step=7004300, episode=1168 reward=0.7764636 (478.16 it/sec) -training >> step=7004400, episode=1168 reward=0.7788725 (465.53 it/sec) -training >> step=7004500, episode=1168 reward=0.7667307 (468.53 it/sec) -training >> step=7004600, episode=1168 reward=0.7713639 (453.09 it/sec) -training >> step=7004700, episode=1168 reward=0.7995445 (492.29 it/sec) -training >> step=7004800, episode=1168 reward=0.7852893 (493.88 it/sec) -training >> step=7004900, episode=1168 reward=0.7925954 (508.74 it/sec) -training >> step=7005000, episode=1168 reward=0.7893072 (464.71 it/sec) -training >> step=7005100, episode=1168 reward=0.8041445 (437.31 it/sec) -training >> step=7005200, episode=1168 reward=0.7588724 (491.17 it/sec) -training >> step=7005300, episode=1168 reward=0.7825541 (457.96 it/sec) -training >> step=7005400, episode=1168 reward=0.7879516 (463.40 it/sec) -training >> step=7005500, episode=1168 reward=0.7596008 (484.30 it/sec) -training >> step=7005600, episode=1168 reward=0.7866029 (480.99 it/sec) -training >> step=7005700, episode=1168 reward=0.7918353 (441.58 it/sec) -training >> step=7005800, episode=1168 reward=0.7946743 (493.07 it/sec) -training >> step=7005900, episode=1168 reward=0.7934327 (492.24 it/sec) -training >> step=7006000, episode=1168 reward=0.7710617 (480.79 it/sec) -training >> step=7006100, episode=1168 reward=0.7679994 (504.26 it/sec) -training >> step=7006200, episode=1168 reward=0.7774036 (476.52 it/sec) -training >> step=7006300, episode=1168 reward=0.7652574 (469.04 it/sec) -training >> step=7006400, episode=1168 reward=0.80424 (479.88 it/sec) -training >> step=7006500, episode=1168 reward=0.7852336 (457.88 it/sec) -training >> step=7006600, episode=1168 reward=0.7719824 (499.40 it/sec) -training >> step=7006700, episode=1168 reward=0.7894406 (504.88 it/sec) -training >> step=7006800, episode=1168 reward=0.7808535 (479.93 it/sec) -training >> step=7006900, episode=1168 reward=0.7838066 (425.87 it/sec) -training >> step=7007000, episode=1168 reward=0.7815561 (506.70 it/sec) -training >> step=7007100, episode=1168 reward=0.7871448 (480.47 it/sec) -training >> step=7007200, episode=1168 reward=0.7942799 (452.85 it/sec) -training >> step=7007300, episode=1169 reward=0.8085314 (140.17 it/sec) -training >> step=7007400, episode=1169 reward=0.7843381 (450.99 it/sec) -training >> step=7007500, episode=1169 reward=0.7785725 (472.09 it/sec) -training >> step=7007600, episode=1169 reward=0.7849343 (437.82 it/sec) -training >> step=7007700, episode=1169 reward=0.7904462 (480.68 it/sec) -training >> step=7007800, episode=1169 reward=0.8001642 (451.59 it/sec) -training >> step=7007900, episode=1169 reward=0.7875162 (462.71 it/sec) -training >> step=7008000, episode=1169 reward=0.7826685 (457.15 it/sec) -training >> step=7008100, episode=1169 reward=0.7918453 (471.49 it/sec) -training >> step=7008200, episode=1169 reward=0.7864331 (387.51 it/sec) -training >> step=7008300, episode=1169 reward=0.791307 (459.94 it/sec) -training >> step=7008400, episode=1169 reward=0.8043556 (484.13 it/sec) -training >> step=7008500, episode=1169 reward=0.7784029 (476.20 it/sec) -training >> step=7008600, episode=1169 reward=0.8139948 (424.82 it/sec) -training >> step=7008700, episode=1169 reward=0.7948557 (461.92 it/sec) -training >> step=7008800, episode=1169 reward=0.8293711 (454.09 it/sec) -training >> step=7008900, episode=1169 reward=0.8014891 (481.56 it/sec) -training >> step=7009000, episode=1169 reward=0.7707002 (424.71 it/sec) -training >> step=7009100, episode=1169 reward=0.7757959 (462.05 it/sec) -training >> step=7009200, episode=1169 reward=0.7899486 (489.60 it/sec) -training >> step=7009300, episode=1169 reward=0.7865778 (473.61 it/sec) -training >> step=7009400, episode=1169 reward=0.8012983 (458.91 it/sec) -training >> step=7009500, episode=1169 reward=0.7873089 (484.64 it/sec) -training >> step=7009600, episode=1169 reward=0.7971402 (438.08 it/sec) -training >> step=7009700, episode=1169 reward=0.774774 (434.99 it/sec) -training >> step=7009800, episode=1169 reward=0.8039314 (461.52 it/sec) -training >> step=7009900, episode=1169 reward=0.7853922 (493.43 it/sec) -training >> step=7010000, episode=1169 reward=0.8023832 (426.73 it/sec) -training >> step=7010100, episode=1169 reward=0.7998083 (444.44 it/sec) -training >> step=7010200, episode=1169 reward=0.7677932 (465.48 it/sec) -training >> step=7010300, episode=1169 reward=0.7964585 (484.84 it/sec) -training >> step=7010400, episode=1169 reward=0.7750297 (419.05 it/sec) -training >> step=7010500, episode=1169 reward=0.7794017 (467.56 it/sec) -training >> step=7010600, episode=1169 reward=0.8045578 (440.77 it/sec) -training >> step=7010700, episode=1169 reward=0.7835647 (430.10 it/sec) -training >> step=7010800, episode=1169 reward=0.7857085 (449.16 it/sec) -training >> step=7010900, episode=1169 reward=0.7837628 (445.42 it/sec) -training >> step=7011000, episode=1169 reward=0.7905306 (455.65 it/sec) -training >> step=7011100, episode=1169 reward=0.7611417 (469.28 it/sec) -training >> step=7011200, episode=1169 reward=0.7843841 (452.30 it/sec) -training >> step=7011300, episode=1169 reward=0.7511348 (508.42 it/sec) -training >> step=7011400, episode=1169 reward=0.7717093 (478.34 it/sec) -training >> step=7011500, episode=1169 reward=0.7760548 (483.41 it/sec) -training >> step=7011600, episode=1169 reward=0.7704309 (513.26 it/sec) -training >> step=7011700, episode=1169 reward=0.7771877 (529.83 it/sec) -training >> step=7011800, episode=1169 reward=0.7836462 (521.08 it/sec) -training >> step=7011900, episode=1169 reward=0.782829 (458.47 it/sec) -training >> step=7012000, episode=1169 reward=0.7691848 (502.57 it/sec) -training >> step=7012100, episode=1169 reward=0.7733521 (520.25 it/sec) -training >> step=7012200, episode=1169 reward=0.7634211 (503.07 it/sec) -training >> step=7012300, episode=1169 reward=0.7914377 (526.07 it/sec) -training >> step=7012400, episode=1169 reward=0.7669368 (525.25 it/sec) -training >> step=7012500, episode=1169 reward=0.7849149 (452.34 it/sec) -training >> step=7012600, episode=1169 reward=0.7799577 (496.70 it/sec) -training >> step=7012700, episode=1169 reward=0.7846036 (478.17 it/sec) -training >> step=7012800, episode=1169 reward=0.7878755 (519.77 it/sec) -training >> step=7012900, episode=1169 reward=0.7835503 (491.00 it/sec) -training >> step=7013000, episode=1169 reward=0.7923886 (484.65 it/sec) -training >> step=7013100, episode=1169 reward=0.7821977 (528.83 it/sec) -training >> step=7013200, episode=1169 reward=0.777487 (528.53 it/sec) -training >> step=7013300, episode=1170 reward=0.7768282 (90.52 it/sec) -training >> step=7013400, episode=1170 reward=0.7964697 (468.59 it/sec) -training >> step=7013500, episode=1170 reward=0.7890006 (493.06 it/sec) -training >> step=7013600, episode=1170 reward=0.7833313 (502.21 it/sec) -training >> step=7013700, episode=1170 reward=0.7649378 (493.68 it/sec) -training >> step=7013800, episode=1170 reward=0.8035821 (506.75 it/sec) -training >> step=7013900, episode=1170 reward=0.7898732 (486.12 it/sec) -training >> step=7014000, episode=1170 reward=0.8238962 (461.40 it/sec) -training >> step=7014100, episode=1170 reward=0.7771897 (451.76 it/sec) -training >> step=7014200, episode=1170 reward=0.7916348 (529.30 it/sec) -training >> step=7014300, episode=1170 reward=0.7719393 (459.55 it/sec) -training >> step=7014400, episode=1170 reward=0.803601 (476.86 it/sec) -training >> step=7014500, episode=1170 reward=0.7677316 (512.11 it/sec) -training >> step=7014600, episode=1170 reward=0.7812901 (464.87 it/sec) -training >> step=7014700, episode=1170 reward=0.7659106 (484.35 it/sec) -training >> step=7014800, episode=1170 reward=0.796829 (507.91 it/sec) -training >> step=7014900, episode=1170 reward=0.7987363 (473.54 it/sec) -training >> step=7015000, episode=1170 reward=0.7957518 (475.83 it/sec) -training >> step=7015100, episode=1170 reward=0.8244547 (441.68 it/sec) -training >> step=7015200, episode=1170 reward=0.7635598 (510.96 it/sec) -training >> step=7015300, episode=1170 reward=0.8006687 (471.64 it/sec) -training >> step=7015400, episode=1170 reward=0.7615316 (427.96 it/sec) -training >> step=7015500, episode=1170 reward=0.7975001 (505.50 it/sec) -training >> step=7015600, episode=1170 reward=0.8028803 (486.01 it/sec) -training >> step=7015700, episode=1170 reward=0.795235 (490.82 it/sec) -training >> step=7015800, episode=1170 reward=0.7870044 (427.58 it/sec) -training >> step=7015900, episode=1170 reward=0.784917 (514.45 it/sec) -training >> step=7016000, episode=1170 reward=0.7755576 (489.03 it/sec) -training >> step=7016100, episode=1170 reward=0.795357 (467.11 it/sec) -training >> step=7016200, episode=1170 reward=0.7822618 (447.31 it/sec) -training >> step=7016300, episode=1170 reward=0.7970621 (480.54 it/sec) -training >> step=7016400, episode=1170 reward=0.78522 (460.77 it/sec) -training >> step=7016500, episode=1170 reward=0.7706971 (463.37 it/sec) -training >> step=7016600, episode=1170 reward=0.769357 (524.71 it/sec) -training >> step=7016700, episode=1170 reward=0.7650524 (468.92 it/sec) -training >> step=7016800, episode=1170 reward=0.7944939 (492.38 it/sec) -training >> step=7016900, episode=1170 reward=0.7764929 (445.98 it/sec) -training >> step=7017000, episode=1170 reward=0.7812558 (492.61 it/sec) -training >> step=7017100, episode=1170 reward=0.7921306 (494.24 it/sec) -training >> step=7017200, episode=1170 reward=0.7633824 (468.96 it/sec) -training >> step=7017300, episode=1170 reward=0.7919599 (448.78 it/sec) -training >> step=7017400, episode=1170 reward=0.7754544 (491.37 it/sec) -training >> step=7017500, episode=1170 reward=0.7706472 (488.80 it/sec) -training >> step=7017600, episode=1170 reward=0.7842492 (506.34 it/sec) -training >> step=7017700, episode=1170 reward=0.7805716 (463.96 it/sec) -training >> step=7017800, episode=1170 reward=0.8007606 (471.94 it/sec) -training >> step=7017900, episode=1170 reward=0.7957541 (439.99 it/sec) -training >> step=7018000, episode=1170 reward=0.7834162 (495.50 it/sec) -training >> step=7018100, episode=1170 reward=0.7796229 (461.74 it/sec) -training >> step=7018200, episode=1170 reward=0.7749894 (487.19 it/sec) -training >> step=7018300, episode=1170 reward=0.768383 (458.49 it/sec) -training >> step=7018400, episode=1170 reward=0.7778848 (471.39 it/sec) -training >> step=7018500, episode=1170 reward=0.7743326 (442.13 it/sec) -training >> step=7018600, episode=1170 reward=0.7843533 (495.48 it/sec) -training >> step=7018700, episode=1170 reward=0.7717822 (454.95 it/sec) -training >> step=7018800, episode=1170 reward=0.7908793 (527.14 it/sec) -training >> step=7018900, episode=1170 reward=0.7922931 (488.40 it/sec) -training >> step=7019000, episode=1170 reward=0.780976 (469.99 it/sec) -training >> step=7019100, episode=1170 reward=0.7981175 (510.24 it/sec) -training >> step=7019200, episode=1170 reward=0.7678446 (521.13 it/sec) -training >> step=7019300, episode=1171 reward=0.7730564 (99.33 it/sec) -training >> step=7019400, episode=1171 reward=0.7835403 (445.40 it/sec) -training >> step=7019500, episode=1171 reward=0.7664759 (538.10 it/sec) -training >> step=7019600, episode=1171 reward=0.768369 (471.39 it/sec) -training >> step=7019700, episode=1171 reward=0.7800873 (514.86 it/sec) -training >> step=7019800, episode=1171 reward=0.7800716 (497.21 it/sec) -training >> step=7019900, episode=1171 reward=0.7717616 (487.22 it/sec) -training >> step=7020000, episode=1171 reward=0.7823591 (493.21 it/sec) -training >> step=7020100, episode=1171 reward=0.7876149 (512.74 it/sec) -training >> step=7020200, episode=1171 reward=0.8041274 (508.29 it/sec) -training >> step=7020300, episode=1171 reward=0.7783355 (487.89 it/sec) -training >> step=7020400, episode=1171 reward=0.7863806 (478.95 it/sec) -training >> step=7020500, episode=1171 reward=0.8005312 (526.84 it/sec) -training >> step=7020600, episode=1171 reward=0.7882342 (514.20 it/sec) -training >> step=7020700, episode=1171 reward=0.8029659 (465.23 it/sec) -training >> step=7020800, episode=1171 reward=0.7934233 (508.49 it/sec) -training >> step=7020900, episode=1171 reward=0.7926784 (519.88 it/sec) -training >> step=7021000, episode=1171 reward=0.7976477 (510.91 it/sec) -training >> step=7021100, episode=1171 reward=0.7883903 (484.01 it/sec) -training >> step=7021200, episode=1171 reward=0.7894143 (537.29 it/sec) -training >> step=7021300, episode=1171 reward=0.7874455 (476.69 it/sec) -training >> step=7021400, episode=1171 reward=0.7780215 (493.95 it/sec) -training >> step=7021500, episode=1171 reward=0.7804376 (529.76 it/sec) -training >> step=7021600, episode=1171 reward=0.7634217 (529.86 it/sec) -training >> step=7021700, episode=1171 reward=0.7936905 (507.87 it/sec) -training >> step=7021800, episode=1171 reward=0.7894329 (510.80 it/sec) -training >> step=7021900, episode=1171 reward=0.7912672 (538.89 it/sec) -training >> step=7022000, episode=1171 reward=0.7684979 (494.98 it/sec) -training >> step=7022100, episode=1171 reward=0.7671972 (529.93 it/sec) -training >> step=7022200, episode=1171 reward=0.7921847 (518.63 it/sec) -training >> step=7022300, episode=1171 reward=0.7984983 (537.90 it/sec) -training >> step=7022400, episode=1171 reward=0.7741091 (451.28 it/sec) -training >> step=7022500, episode=1171 reward=0.8003875 (480.52 it/sec) -training >> step=7022600, episode=1171 reward=0.7851839 (500.64 it/sec) -training >> step=7022700, episode=1171 reward=0.79677 (555.74 it/sec) -training >> step=7022800, episode=1171 reward=0.7861385 (530.77 it/sec) -training >> step=7022900, episode=1171 reward=0.8040993 (520.42 it/sec) -training >> step=7023000, episode=1171 reward=0.7758825 (515.76 it/sec) -training >> step=7023100, episode=1171 reward=0.7958391 (519.24 it/sec) -training >> step=7023200, episode=1171 reward=0.7881321 (482.53 it/sec) -training >> step=7023300, episode=1171 reward=0.7905502 (526.89 it/sec) -training >> step=7023400, episode=1171 reward=0.769442 (527.54 it/sec) -training >> step=7023500, episode=1171 reward=0.792639 (507.51 it/sec) -training >> step=7023600, episode=1171 reward=0.7675022 (439.96 it/sec) -training >> step=7023700, episode=1171 reward=0.786313 (501.67 it/sec) -training >> step=7023800, episode=1171 reward=0.783076 (540.87 it/sec) -training >> step=7023900, episode=1171 reward=0.7886783 (523.02 it/sec) -training >> step=7024000, episode=1171 reward=0.7780123 (523.42 it/sec) -training >> step=7024100, episode=1171 reward=0.7926829 (526.84 it/sec) -training >> step=7024200, episode=1171 reward=0.7738782 (473.22 it/sec) -training >> step=7024300, episode=1171 reward=0.7911819 (530.06 it/sec) -training >> step=7024400, episode=1171 reward=0.7791424 (531.49 it/sec) -training >> step=7024500, episode=1171 reward=0.7745135 (529.70 it/sec) -training >> step=7024600, episode=1171 reward=0.7615099 (519.95 it/sec) -training >> step=7024700, episode=1171 reward=0.7956149 (484.21 it/sec) -training >> step=7024800, episode=1171 reward=0.7624879 (499.56 it/sec) -training >> step=7024900, episode=1171 reward=0.8002213 (497.59 it/sec) -training >> step=7025000, episode=1171 reward=0.7952009 (471.25 it/sec) -training >> step=7025100, episode=1171 reward=0.7841006 (464.23 it/sec) -training >> step=7025200, episode=1171 reward=0.7840351 (523.73 it/sec) -training >> step=7025300, episode=1172 reward=0.8004364 (110.09 it/sec) -training >> step=7025400, episode=1172 reward=0.7968276 (489.15 it/sec) -training >> step=7025500, episode=1172 reward=0.7874092 (476.18 it/sec) -training >> step=7025600, episode=1172 reward=0.7770507 (501.48 it/sec) -training >> step=7025700, episode=1172 reward=0.7792439 (493.04 it/sec) -training >> step=7025800, episode=1172 reward=0.7770095 (442.70 it/sec) -training >> step=7025900, episode=1172 reward=0.7749069 (491.11 it/sec) -training >> step=7026000, episode=1172 reward=0.792578 (478.75 it/sec) -training >> step=7026100, episode=1172 reward=0.7769267 (489.46 it/sec) -training >> step=7026200, episode=1172 reward=0.7913254 (472.09 it/sec) -training >> step=7026300, episode=1172 reward=0.7841159 (506.65 it/sec) -training >> step=7026400, episode=1172 reward=0.7982449 (474.37 it/sec) -training >> step=7026500, episode=1172 reward=0.7892015 (478.73 it/sec) -training >> step=7026600, episode=1172 reward=0.7716863 (528.11 it/sec) -training >> step=7026700, episode=1172 reward=0.7847704 (473.11 it/sec) -training >> step=7026800, episode=1172 reward=0.7615563 (473.32 it/sec) -training >> step=7026900, episode=1172 reward=0.7961801 (458.83 it/sec) -training >> step=7027000, episode=1172 reward=0.7807748 (496.87 it/sec) -training >> step=7027100, episode=1172 reward=0.7935967 (480.60 it/sec) -training >> step=7027200, episode=1172 reward=0.792492 (489.56 it/sec) -training >> step=7027300, episode=1172 reward=0.775224 (506.20 it/sec) -training >> step=7027400, episode=1172 reward=0.7981178 (492.12 it/sec) -training >> step=7027500, episode=1172 reward=0.7663933 (469.23 it/sec) -training >> step=7027600, episode=1172 reward=0.7958447 (460.98 it/sec) -training >> step=7027700, episode=1172 reward=0.7895892 (505.41 it/sec) -training >> step=7027800, episode=1172 reward=0.7815924 (492.73 it/sec) -training >> step=7027900, episode=1172 reward=0.7725182 (480.24 it/sec) -training >> step=7028000, episode=1172 reward=0.8112969 (478.17 it/sec) -training >> step=7028100, episode=1172 reward=0.7925146 (480.79 it/sec) -training >> step=7028200, episode=1172 reward=0.777549 (413.19 it/sec) -training >> step=7028300, episode=1172 reward=0.8015971 (475.40 it/sec) -training >> step=7028400, episode=1172 reward=0.7920628 (455.97 it/sec) -training >> step=7028500, episode=1172 reward=0.7890633 (463.33 it/sec) -training >> step=7028600, episode=1172 reward=0.7867442 (432.04 it/sec) -training >> step=7028700, episode=1172 reward=0.7857535 (424.91 it/sec) -training >> step=7028800, episode=1172 reward=0.7976423 (498.16 it/sec) -training >> step=7028900, episode=1172 reward=0.8035958 (463.91 it/sec) -training >> step=7029000, episode=1172 reward=0.7746642 (472.13 it/sec) -training >> step=7029100, episode=1172 reward=0.7978343 (463.96 it/sec) -training >> step=7029200, episode=1172 reward=0.7763811 (473.73 it/sec) -training >> step=7029300, episode=1172 reward=0.7830244 (454.13 it/sec) -training >> step=7029400, episode=1172 reward=0.7819235 (427.96 it/sec) -training >> step=7029500, episode=1172 reward=0.7854491 (454.03 it/sec) -training >> step=7029600, episode=1172 reward=0.7764285 (473.71 it/sec) -training >> step=7029700, episode=1172 reward=0.7825719 (390.86 it/sec) -training >> step=7029800, episode=1172 reward=0.7910441 (435.83 it/sec) -training >> step=7029900, episode=1172 reward=0.7995274 (477.29 it/sec) -training >> step=7030000, episode=1172 reward=0.7916802 (467.56 it/sec) -training >> step=7030100, episode=1172 reward=0.7836695 (462.25 it/sec) -training >> step=7030200, episode=1172 reward=0.795453 (479.22 it/sec) -training >> step=7030300, episode=1172 reward=0.7919751 (481.25 it/sec) -training >> step=7030400, episode=1172 reward=0.7693144 (474.19 it/sec) -training >> step=7030500, episode=1172 reward=0.7695596 (474.96 it/sec) -training >> step=7030600, episode=1172 reward=0.7817647 (457.62 it/sec) -training >> step=7030700, episode=1172 reward=0.7883652 (434.30 it/sec) -training >> step=7030800, episode=1172 reward=0.8065739 (479.66 it/sec) -training >> step=7030900, episode=1172 reward=0.7887914 (455.09 it/sec) -training >> step=7031000, episode=1172 reward=0.775243 (477.63 it/sec) -training >> step=7031100, episode=1172 reward=0.7871892 (473.45 it/sec) -training >> step=7031200, episode=1172 reward=0.7860042 (462.45 it/sec) -training >> step=7031300, episode=1173 reward=0.7836261 (61.61 it/sec) -training >> step=7031400, episode=1173 reward=0.7862399 (431.00 it/sec) -training >> step=7031500, episode=1173 reward=0.7846806 (454.23 it/sec) -training >> step=7031600, episode=1173 reward=0.7969676 (478.26 it/sec) -training >> step=7031700, episode=1173 reward=0.7853774 (477.04 it/sec) -training >> step=7031800, episode=1173 reward=0.7838681 (472.91 it/sec) -training >> step=7031900, episode=1173 reward=0.7813225 (470.67 it/sec) -training >> step=7032000, episode=1173 reward=0.80565 (469.54 it/sec) -training >> step=7032100, episode=1173 reward=0.7902911 (454.84 it/sec) -training >> step=7032200, episode=1173 reward=0.7964897 (440.70 it/sec) -training >> step=7032300, episode=1173 reward=0.7991421 (451.83 it/sec) -training >> step=7032400, episode=1173 reward=0.7786718 (469.15 it/sec) -training >> step=7032500, episode=1173 reward=0.7902021 (424.76 it/sec) -training >> step=7032600, episode=1173 reward=0.8013248 (449.93 it/sec) -training >> step=7032700, episode=1173 reward=0.8122625 (452.29 it/sec) -training >> step=7032800, episode=1173 reward=0.7904196 (492.76 it/sec) -training >> step=7032900, episode=1173 reward=0.772934 (436.52 it/sec) -training >> step=7033000, episode=1173 reward=0.7891997 (482.08 it/sec) -training >> step=7033100, episode=1173 reward=0.7864731 (466.30 it/sec) -training >> step=7033200, episode=1173 reward=0.7814261 (511.58 it/sec) -training >> step=7033300, episode=1173 reward=0.7941163 (446.32 it/sec) -training >> step=7033400, episode=1173 reward=0.7723233 (432.35 it/sec) -training >> step=7033500, episode=1173 reward=0.7968596 (460.15 it/sec) -training >> step=7033600, episode=1173 reward=0.8068214 (422.80 it/sec) -training >> step=7033700, episode=1173 reward=0.7836751 (490.42 it/sec) -training >> step=7033800, episode=1173 reward=0.7975057 (473.74 it/sec) -training >> step=7033900, episode=1173 reward=0.7775899 (458.67 it/sec) -training >> step=7034000, episode=1173 reward=0.7743232 (484.37 it/sec) -training >> step=7034100, episode=1173 reward=0.7677557 (486.70 it/sec) -training >> step=7034200, episode=1173 reward=0.7970248 (477.76 it/sec) -training >> step=7034300, episode=1173 reward=0.7714131 (473.52 it/sec) -training >> step=7034400, episode=1173 reward=0.7971815 (482.16 it/sec) -training >> step=7034500, episode=1173 reward=0.7806411 (512.56 it/sec) -training >> step=7034600, episode=1173 reward=0.7977652 (476.01 it/sec) -training >> step=7034700, episode=1173 reward=0.7870126 (491.85 it/sec) -training >> step=7034800, episode=1173 reward=0.7952548 (523.38 it/sec) -training >> step=7034900, episode=1173 reward=0.7973909 (518.77 it/sec) -training >> step=7035000, episode=1173 reward=0.7943433 (492.40 it/sec) -training >> step=7035100, episode=1173 reward=0.7724938 (543.02 it/sec) -training >> step=7035200, episode=1173 reward=0.7976805 (481.00 it/sec) -training >> step=7035300, episode=1173 reward=0.7803218 (475.82 it/sec) -training >> step=7035400, episode=1173 reward=0.7829981 (472.57 it/sec) -training >> step=7035500, episode=1173 reward=0.7812853 (481.12 it/sec) -training >> step=7035600, episode=1173 reward=0.7825638 (520.53 it/sec) -training >> step=7035700, episode=1173 reward=0.7833418 (490.68 it/sec) -training >> step=7035800, episode=1173 reward=0.7759013 (491.09 it/sec) -training >> step=7035900, episode=1173 reward=0.7558613 (463.50 it/sec) -training >> step=7036000, episode=1173 reward=0.7712669 (499.41 it/sec) -training >> step=7036100, episode=1173 reward=0.7886328 (511.45 it/sec) -training >> step=7036200, episode=1173 reward=0.7730702 (491.44 it/sec) -training >> step=7036300, episode=1173 reward=0.7875304 (496.82 it/sec) -training >> step=7036400, episode=1173 reward=0.788036 (499.46 it/sec) -training >> step=7036500, episode=1173 reward=0.7868447 (501.39 it/sec) -training >> step=7036600, episode=1173 reward=0.7939227 (537.00 it/sec) -training >> step=7036700, episode=1173 reward=0.7606959 (524.22 it/sec) -training >> step=7036800, episode=1173 reward=0.7959243 (492.17 it/sec) -training >> step=7036900, episode=1173 reward=0.7885265 (476.87 it/sec) -training >> step=7037000, episode=1173 reward=0.7761148 (497.27 it/sec) -training >> step=7037100, episode=1173 reward=0.7670465 (490.86 it/sec) -training >> step=7037200, episode=1173 reward=0.7842866 (479.85 it/sec) -training >> step=7037300, episode=1174 reward=0.7825235 (94.27 it/sec) -training >> step=7037400, episode=1174 reward=0.7850505 (472.20 it/sec) -training >> step=7037500, episode=1174 reward=0.7973128 (446.22 it/sec) -training >> step=7037600, episode=1174 reward=0.7841499 (462.66 it/sec) -training >> step=7037700, episode=1174 reward=0.7943116 (510.23 it/sec) -training >> step=7037800, episode=1174 reward=0.7978928 (507.94 it/sec) -training >> step=7037900, episode=1174 reward=0.7922364 (506.65 it/sec) -training >> step=7038000, episode=1174 reward=0.7899437 (437.34 it/sec) -training >> step=7038100, episode=1174 reward=0.7874671 (473.49 it/sec) -training >> step=7038200, episode=1174 reward=0.7677301 (464.24 it/sec) -training >> step=7038300, episode=1174 reward=0.7877165 (471.11 it/sec) -training >> step=7038400, episode=1174 reward=0.7715698 (479.16 it/sec) -training >> step=7038500, episode=1174 reward=0.7845032 (471.59 it/sec) -training >> step=7038600, episode=1174 reward=0.7928944 (405.17 it/sec) -training >> step=7038700, episode=1174 reward=0.7810861 (466.43 it/sec) -training >> step=7038800, episode=1174 reward=0.781846 (486.08 it/sec) -training >> step=7038900, episode=1174 reward=0.792571 (468.67 it/sec) -training >> step=7039000, episode=1174 reward=0.7680679 (495.15 it/sec) -training >> step=7039100, episode=1174 reward=0.7951782 (464.73 it/sec) -training >> step=7039200, episode=1174 reward=0.7913278 (487.90 it/sec) -training >> step=7039300, episode=1174 reward=0.7800797 (482.07 it/sec) -training >> step=7039400, episode=1174 reward=0.7788979 (460.00 it/sec) -training >> step=7039500, episode=1174 reward=0.784668 (483.57 it/sec) -training >> step=7039600, episode=1174 reward=0.7910577 (499.68 it/sec) -training >> step=7039700, episode=1174 reward=0.7857412 (409.80 it/sec) -training >> step=7039800, episode=1174 reward=0.7668134 (460.57 it/sec) -training >> step=7039900, episode=1174 reward=0.7858705 (459.82 it/sec) -training >> step=7040000, episode=1174 reward=0.7806838 (453.30 it/sec) -training >> step=7040100, episode=1174 reward=0.7911595 (534.91 it/sec) -training >> step=7040200, episode=1174 reward=0.7872164 (497.92 it/sec) -training >> step=7040300, episode=1174 reward=0.7809613 (476.58 it/sec) -training >> step=7040400, episode=1174 reward=0.7857597 (468.65 it/sec) -training >> step=7040500, episode=1174 reward=0.7939319 (484.43 it/sec) -training >> step=7040600, episode=1174 reward=0.7853266 (515.84 it/sec) -training >> step=7040700, episode=1174 reward=0.77885 (507.47 it/sec) -training >> step=7040800, episode=1174 reward=0.8000291 (472.62 it/sec) -training >> step=7040900, episode=1174 reward=0.8054327 (463.95 it/sec) -training >> step=7041000, episode=1174 reward=0.7764431 (501.67 it/sec) -training >> step=7041100, episode=1174 reward=0.7778308 (497.65 it/sec) -training >> step=7041200, episode=1174 reward=0.7998304 (463.38 it/sec) -training >> step=7041300, episode=1174 reward=0.7681752 (526.04 it/sec) -training >> step=7041400, episode=1174 reward=0.7684815 (477.07 it/sec) -training >> step=7041500, episode=1174 reward=0.781693 (487.52 it/sec) -training >> step=7041600, episode=1174 reward=0.791414 (517.77 it/sec) -training >> step=7041700, episode=1174 reward=0.801744 (500.55 it/sec) -training >> step=7041800, episode=1174 reward=0.774515 (452.49 it/sec) -training >> step=7041900, episode=1174 reward=0.7879344 (495.83 it/sec) -training >> step=7042000, episode=1174 reward=0.7971599 (491.76 it/sec) -training >> step=7042100, episode=1174 reward=0.7844416 (510.04 it/sec) -training >> step=7042200, episode=1174 reward=0.7677209 (455.66 it/sec) -training >> step=7042300, episode=1174 reward=0.7934713 (498.87 it/sec) -training >> step=7042400, episode=1174 reward=0.7959192 (498.20 it/sec) -training >> step=7042500, episode=1174 reward=0.7891156 (483.18 it/sec) -training >> step=7042600, episode=1174 reward=0.7655685 (489.71 it/sec) -training >> step=7042700, episode=1174 reward=0.7702526 (510.68 it/sec) -training >> step=7042800, episode=1174 reward=0.7833953 (499.63 it/sec) -training >> step=7042900, episode=1174 reward=0.7702923 (505.28 it/sec) -training >> step=7043000, episode=1174 reward=0.7973416 (452.20 it/sec) -training >> step=7043100, episode=1174 reward=0.7900895 (478.96 it/sec) -training >> step=7043200, episode=1174 reward=0.7810696 (444.92 it/sec) -training >> step=7043300, episode=1175 reward=0.8060375 (84.86 it/sec) -training >> step=7043400, episode=1175 reward=0.7824112 (463.58 it/sec) -training >> step=7043500, episode=1175 reward=0.7908405 (463.09 it/sec) -training >> step=7043600, episode=1175 reward=0.7795404 (446.11 it/sec) -training >> step=7043700, episode=1175 reward=0.7802007 (450.57 it/sec) -training >> step=7043800, episode=1175 reward=0.7900361 (449.20 it/sec) -training >> step=7043900, episode=1175 reward=0.7831667 (499.29 it/sec) -training >> step=7044000, episode=1175 reward=0.7671258 (461.38 it/sec) -training >> step=7044100, episode=1175 reward=0.7832065 (466.31 it/sec) -training >> step=7044200, episode=1175 reward=0.809367 (453.34 it/sec) -training >> step=7044300, episode=1175 reward=0.775659 (467.88 it/sec) -training >> step=7044400, episode=1175 reward=0.8057185 (471.54 it/sec) -training >> step=7044500, episode=1175 reward=0.7979758 (465.49 it/sec) -training >> step=7044600, episode=1175 reward=0.7836812 (462.33 it/sec) -training >> step=7044700, episode=1175 reward=0.7993981 (462.17 it/sec) -training >> step=7044800, episode=1175 reward=0.8020369 (460.06 it/sec) -training >> step=7044900, episode=1175 reward=0.7780941 (452.98 it/sec) -training >> step=7045000, episode=1175 reward=0.7862636 (441.84 it/sec) -training >> step=7045100, episode=1175 reward=0.7917637 (444.49 it/sec) -training >> step=7045200, episode=1175 reward=0.7939851 (466.00 it/sec) -training >> step=7045300, episode=1175 reward=0.7811003 (466.55 it/sec) -training >> step=7045400, episode=1175 reward=0.7820477 (488.72 it/sec) -training >> step=7045500, episode=1175 reward=0.7783489 (480.79 it/sec) -training >> step=7045600, episode=1175 reward=0.7580504 (430.41 it/sec) -training >> step=7045700, episode=1175 reward=0.7970464 (445.15 it/sec) -training >> step=7045800, episode=1175 reward=0.78473 (436.50 it/sec) -training >> step=7045900, episode=1175 reward=0.7852027 (510.16 it/sec) -training >> step=7046000, episode=1175 reward=0.7915268 (453.38 it/sec) -training >> step=7046100, episode=1175 reward=0.8062239 (460.15 it/sec) -training >> step=7046200, episode=1175 reward=0.7882723 (473.41 it/sec) -training >> step=7046300, episode=1175 reward=0.7943589 (468.91 it/sec) -training >> step=7046400, episode=1175 reward=0.7962044 (509.12 it/sec) -training >> step=7046500, episode=1175 reward=0.7806645 (456.02 it/sec) -training >> step=7046600, episode=1175 reward=0.7974321 (464.52 it/sec) -training >> step=7046700, episode=1175 reward=0.8028501 (403.49 it/sec) -training >> step=7046800, episode=1175 reward=0.7922043 (463.57 it/sec) -training >> step=7046900, episode=1175 reward=0.7945898 (452.41 it/sec) -training >> step=7047000, episode=1175 reward=0.769569 (494.08 it/sec) -training >> step=7047100, episode=1175 reward=0.8063948 (473.93 it/sec) -training >> step=7047200, episode=1175 reward=0.7733085 (470.31 it/sec) -training >> step=7047300, episode=1175 reward=0.8016768 (493.80 it/sec) -training >> step=7047400, episode=1175 reward=0.7865405 (451.40 it/sec) -training >> step=7047500, episode=1175 reward=0.7885581 (467.77 it/sec) -training >> step=7047600, episode=1175 reward=0.7804238 (472.57 it/sec) -training >> step=7047700, episode=1175 reward=0.7915049 (486.79 it/sec) -training >> step=7047800, episode=1175 reward=0.7833954 (478.46 it/sec) -training >> step=7047900, episode=1175 reward=0.7793952 (459.03 it/sec) -training >> step=7048000, episode=1175 reward=0.7929915 (463.52 it/sec) -training >> step=7048100, episode=1175 reward=0.8127552 (480.15 it/sec) -training >> step=7048200, episode=1175 reward=0.7707242 (476.02 it/sec) -training >> step=7048300, episode=1175 reward=0.7763605 (501.09 it/sec) -training >> step=7048400, episode=1175 reward=0.7757905 (516.84 it/sec) -training >> step=7048500, episode=1175 reward=0.7794089 (471.91 it/sec) -training >> step=7048600, episode=1175 reward=0.7959495 (456.39 it/sec) -training >> step=7048700, episode=1175 reward=0.7926632 (493.01 it/sec) -training >> step=7048800, episode=1175 reward=0.812181 (460.16 it/sec) -training >> step=7048900, episode=1175 reward=0.7792514 (477.13 it/sec) -training >> step=7049000, episode=1175 reward=0.7854795 (502.50 it/sec) -training >> step=7049100, episode=1175 reward=0.7535491 (487.13 it/sec) -training >> step=7049200, episode=1175 reward=0.796777 (510.92 it/sec) -training >> step=7049300, episode=1176 reward=0.7698643 (64.32 it/sec) -training >> step=7049400, episode=1176 reward=0.7678738 (502.38 it/sec) -training >> step=7049500, episode=1176 reward=0.8045666 (448.06 it/sec) -training >> step=7049600, episode=1176 reward=0.796267 (444.48 it/sec) -training >> step=7049700, episode=1176 reward=0.7989665 (474.10 it/sec) -training >> step=7049800, episode=1176 reward=0.7941542 (445.99 it/sec) -training >> step=7049900, episode=1176 reward=0.7774218 (485.68 it/sec) -training >> step=7050000, episode=1176 reward=0.7678612 (463.94 it/sec) -training >> step=7050100, episode=1176 reward=0.8001611 (458.48 it/sec) -training >> step=7050200, episode=1176 reward=0.7756178 (431.01 it/sec) -training >> step=7050300, episode=1176 reward=0.7962901 (473.72 it/sec) -training >> step=7050400, episode=1176 reward=0.8015658 (540.72 it/sec) -training >> step=7050500, episode=1176 reward=0.8009878 (462.95 it/sec) -training >> step=7050600, episode=1176 reward=0.7942975 (466.37 it/sec) -training >> step=7050700, episode=1176 reward=0.8007382 (430.44 it/sec) -training >> step=7050800, episode=1176 reward=0.7901524 (477.82 it/sec) -training >> step=7050900, episode=1176 reward=0.7828388 (486.17 it/sec) -training >> step=7051000, episode=1176 reward=0.7920486 (489.67 it/sec) -training >> step=7051100, episode=1176 reward=0.7857266 (481.89 it/sec) -training >> step=7051200, episode=1176 reward=0.7744094 (417.21 it/sec) -training >> step=7051300, episode=1176 reward=0.7854961 (522.61 it/sec) -training >> step=7051400, episode=1176 reward=0.7741577 (507.91 it/sec) -training >> step=7051500, episode=1176 reward=0.7780735 (460.42 it/sec) -training >> step=7051600, episode=1176 reward=0.8098502 (456.03 it/sec) -training >> step=7051700, episode=1176 reward=0.7792566 (450.67 it/sec) -training >> step=7051800, episode=1176 reward=0.7937981 (509.18 it/sec) -training >> step=7051900, episode=1176 reward=0.7733652 (515.33 it/sec) -training >> step=7052000, episode=1176 reward=0.8087488 (475.74 it/sec) -training >> step=7052100, episode=1176 reward=0.7760718 (480.15 it/sec) -training >> step=7052200, episode=1176 reward=0.771058 (509.74 it/sec) -training >> step=7052300, episode=1176 reward=0.786876 (486.50 it/sec) -training >> step=7052400, episode=1176 reward=0.7929166 (429.50 it/sec) -training >> step=7052500, episode=1176 reward=0.7891576 (493.12 it/sec) -training >> step=7052600, episode=1176 reward=0.785122 (517.31 it/sec) -training >> step=7052700, episode=1176 reward=0.7732667 (465.99 it/sec) -training >> step=7052800, episode=1176 reward=0.7903334 (496.08 it/sec) -training >> step=7052900, episode=1176 reward=0.7859929 (510.44 it/sec) -training >> step=7053000, episode=1176 reward=0.7660567 (470.31 it/sec) -training >> step=7053100, episode=1176 reward=0.7798963 (499.26 it/sec) -training >> step=7053200, episode=1176 reward=0.8003297 (508.04 it/sec) -training >> step=7053300, episode=1176 reward=0.7949407 (528.81 it/sec) -training >> step=7053400, episode=1176 reward=0.8020661 (508.84 it/sec) -training >> step=7053500, episode=1176 reward=0.7739776 (494.14 it/sec) -training >> step=7053600, episode=1176 reward=0.7963853 (471.99 it/sec) -training >> step=7053700, episode=1176 reward=0.7939024 (494.89 it/sec) -training >> step=7053800, episode=1176 reward=0.7588536 (500.78 it/sec) -training >> step=7053900, episode=1176 reward=0.7965907 (517.95 it/sec) -training >> step=7054000, episode=1176 reward=0.7814265 (491.52 it/sec) -training >> step=7054100, episode=1176 reward=0.7806056 (465.06 it/sec) -training >> step=7054200, episode=1176 reward=0.7860171 (489.14 it/sec) -training >> step=7054300, episode=1176 reward=0.7860231 (470.82 it/sec) -training >> step=7054400, episode=1176 reward=0.7779533 (487.13 it/sec) -training >> step=7054500, episode=1176 reward=0.7892554 (472.43 it/sec) -training >> step=7054600, episode=1176 reward=0.761336 (480.42 it/sec) -training >> step=7054700, episode=1176 reward=0.7944135 (534.19 it/sec) -training >> step=7054800, episode=1176 reward=0.7926633 (487.58 it/sec) -training >> step=7054900, episode=1176 reward=0.7909728 (514.88 it/sec) -training >> step=7055000, episode=1176 reward=0.7971087 (483.80 it/sec) -training >> step=7055100, episode=1176 reward=0.7878218 (480.80 it/sec) -training >> step=7055200, episode=1176 reward=0.7956992 (473.08 it/sec) -training >> step=7055300, episode=1177 reward=0.7705664 (54.70 it/sec) -training >> step=7055400, episode=1177 reward=0.7847317 (425.34 it/sec) -training >> step=7055500, episode=1177 reward=0.7982782 (484.56 it/sec) -training >> step=7055600, episode=1177 reward=0.7896178 (489.40 it/sec) -training >> step=7055700, episode=1177 reward=0.7723892 (474.08 it/sec) -training >> step=7055800, episode=1177 reward=0.7713245 (488.32 it/sec) -training >> step=7055900, episode=1177 reward=0.776063 (476.63 it/sec) -training >> step=7056000, episode=1177 reward=0.7907032 (477.77 it/sec) -training >> step=7056100, episode=1177 reward=0.7798162 (453.81 it/sec) -training >> step=7056200, episode=1177 reward=0.7772332 (485.29 it/sec) -training >> step=7056300, episode=1177 reward=0.7850217 (522.07 it/sec) -training >> step=7056400, episode=1177 reward=0.7960966 (482.55 it/sec) -training >> step=7056500, episode=1177 reward=0.7901754 (492.25 it/sec) -training >> step=7056600, episode=1177 reward=0.7927756 (470.15 it/sec) -training >> step=7056700, episode=1177 reward=0.7955717 (484.11 it/sec) -training >> step=7056800, episode=1177 reward=0.7876326 (502.59 it/sec) -training >> step=7056900, episode=1177 reward=0.7715604 (447.91 it/sec) -training >> step=7057000, episode=1177 reward=0.7880578 (521.73 it/sec) -training >> step=7057100, episode=1177 reward=0.7709238 (452.40 it/sec) -training >> step=7057200, episode=1177 reward=0.7746743 (478.68 it/sec) -training >> step=7057300, episode=1177 reward=0.7821591 (468.38 it/sec) -training >> step=7057400, episode=1177 reward=0.773963 (468.97 it/sec) -training >> step=7057500, episode=1177 reward=0.7879853 (488.71 it/sec) -training >> step=7057600, episode=1177 reward=0.7921457 (473.81 it/sec) -training >> step=7057700, episode=1177 reward=0.7746844 (447.92 it/sec) -training >> step=7057800, episode=1177 reward=0.7978256 (439.36 it/sec) -training >> step=7057900, episode=1177 reward=0.7777646 (431.01 it/sec) -training >> step=7058000, episode=1177 reward=0.7727736 (471.65 it/sec) -training >> step=7058100, episode=1177 reward=0.7850596 (463.90 it/sec) -training >> step=7058200, episode=1177 reward=0.7789509 (461.97 it/sec) -training >> step=7058300, episode=1177 reward=0.7938395 (415.61 it/sec) -training >> step=7058400, episode=1177 reward=0.7930134 (472.93 it/sec) -training >> step=7058500, episode=1177 reward=0.7992277 (405.69 it/sec) -training >> step=7058600, episode=1177 reward=0.7849104 (414.80 it/sec) -training >> step=7058700, episode=1177 reward=0.7932281 (443.04 it/sec) -training >> step=7058800, episode=1177 reward=0.782159 (463.30 it/sec) -training >> step=7058900, episode=1177 reward=0.7878665 (455.34 it/sec) -training >> step=7059000, episode=1177 reward=0.7658725 (451.23 it/sec) -training >> step=7059100, episode=1177 reward=0.768128 (434.42 it/sec) -training >> step=7059200, episode=1177 reward=0.7962768 (468.05 it/sec) -training >> step=7059300, episode=1177 reward=0.7821063 (448.03 it/sec) -training >> step=7059400, episode=1177 reward=0.785717 (437.72 it/sec) -training >> step=7059500, episode=1177 reward=0.7761594 (415.68 it/sec) -training >> step=7059600, episode=1177 reward=0.7681323 (443.61 it/sec) -training >> step=7059700, episode=1177 reward=0.8109599 (446.20 it/sec) -training >> step=7059800, episode=1177 reward=0.7730892 (459.01 it/sec) -training >> step=7059900, episode=1177 reward=0.783063 (476.00 it/sec) -training >> step=7060000, episode=1177 reward=0.7778684 (486.62 it/sec) -training >> step=7060100, episode=1177 reward=0.7849862 (438.63 it/sec) -training >> step=7060200, episode=1177 reward=0.7786852 (496.74 it/sec) -training >> step=7060300, episode=1177 reward=0.7847234 (460.89 it/sec) -training >> step=7060400, episode=1177 reward=0.8016887 (427.86 it/sec) -training >> step=7060500, episode=1177 reward=0.7821692 (440.93 it/sec) -training >> step=7060600, episode=1177 reward=0.7828517 (462.71 it/sec) -training >> step=7060700, episode=1177 reward=0.7726755 (455.54 it/sec) -training >> step=7060800, episode=1177 reward=0.7986391 (479.70 it/sec) -training >> step=7060900, episode=1177 reward=0.8012565 (487.52 it/sec) -training >> step=7061000, episode=1177 reward=0.7719887 (521.19 it/sec) -training >> step=7061100, episode=1177 reward=0.7837834 (466.24 it/sec) -training >> step=7061200, episode=1177 reward=0.7720336 (450.33 it/sec) -training >> step=7061300, episode=1178 reward=0.7709348 (66.25 it/sec) -training >> step=7061400, episode=1178 reward=0.7504008 (482.62 it/sec) -training >> step=7061500, episode=1178 reward=0.7702997 (493.12 it/sec) -training >> step=7061600, episode=1178 reward=0.7835324 (530.84 it/sec) -training >> step=7061700, episode=1178 reward=0.8079854 (490.38 it/sec) -training >> step=7061800, episode=1178 reward=0.7982343 (499.32 it/sec) -training >> step=7061900, episode=1178 reward=0.7925254 (531.11 it/sec) -training >> step=7062000, episode=1178 reward=0.8000221 (481.82 it/sec) -training >> step=7062100, episode=1178 reward=0.7881967 (506.63 it/sec) -training >> step=7062200, episode=1178 reward=0.7916602 (519.48 it/sec) -training >> step=7062300, episode=1178 reward=0.7877976 (505.63 it/sec) -training >> step=7062400, episode=1178 reward=0.8020158 (506.93 it/sec) -training >> step=7062500, episode=1178 reward=0.7881731 (508.81 it/sec) -training >> step=7062600, episode=1178 reward=0.7906451 (518.57 it/sec) -training >> step=7062700, episode=1178 reward=0.7906774 (484.59 it/sec) -training >> step=7062800, episode=1178 reward=0.7900442 (445.42 it/sec) -training >> step=7062900, episode=1178 reward=0.7695156 (511.53 it/sec) -training >> step=7063000, episode=1178 reward=0.7858189 (509.42 it/sec) -training >> step=7063100, episode=1178 reward=0.805317 (509.62 it/sec) -training >> step=7063200, episode=1178 reward=0.8051654 (496.06 it/sec) -training >> step=7063300, episode=1178 reward=0.8037255 (462.49 it/sec) -training >> step=7063400, episode=1178 reward=0.7973632 (472.46 it/sec) -training >> step=7063500, episode=1178 reward=0.7792754 (474.24 it/sec) -training >> step=7063600, episode=1178 reward=0.8072789 (509.63 it/sec) -training >> step=7063700, episode=1178 reward=0.7747997 (442.18 it/sec) -training >> step=7063800, episode=1178 reward=0.7921979 (445.79 it/sec) -training >> step=7063900, episode=1178 reward=0.7916633 (505.24 it/sec) -training >> step=7064000, episode=1178 reward=0.7967079 (478.87 it/sec) -training >> step=7064100, episode=1178 reward=0.7966567 (498.19 it/sec) -training >> step=7064200, episode=1178 reward=0.7684481 (451.10 it/sec) -training >> step=7064300, episode=1178 reward=0.7885104 (500.98 it/sec) -training >> step=7064400, episode=1178 reward=0.7884495 (464.59 it/sec) -training >> step=7064500, episode=1178 reward=0.7893381 (415.72 it/sec) -training >> step=7064600, episode=1178 reward=0.7863526 (470.16 it/sec) -training >> step=7064700, episode=1178 reward=0.8052069 (469.89 it/sec) -training >> step=7064800, episode=1178 reward=0.7734976 (518.93 it/sec) -training >> step=7064900, episode=1178 reward=0.7957677 (505.43 it/sec) -training >> step=7065000, episode=1178 reward=0.7730566 (495.19 it/sec) -training >> step=7065100, episode=1178 reward=0.7964233 (517.55 it/sec) -training >> step=7065200, episode=1178 reward=0.7967484 (495.89 it/sec) -training >> step=7065300, episode=1178 reward=0.7974907 (514.86 it/sec) -training >> step=7065400, episode=1178 reward=0.7890716 (496.60 it/sec) -training >> step=7065500, episode=1178 reward=0.7875758 (491.99 it/sec) -training >> step=7065600, episode=1178 reward=0.7833228 (489.71 it/sec) -training >> step=7065700, episode=1178 reward=0.7905909 (525.46 it/sec) -training >> step=7065800, episode=1178 reward=0.7757624 (520.83 it/sec) -training >> step=7065900, episode=1178 reward=0.7945828 (489.75 it/sec) -training >> step=7066000, episode=1178 reward=0.7575436 (526.99 it/sec) -training >> step=7066100, episode=1178 reward=0.7731398 (515.99 it/sec) -training >> step=7066200, episode=1178 reward=0.7925656 (481.61 it/sec) -training >> step=7066300, episode=1178 reward=0.7746573 (490.46 it/sec) -training >> step=7066400, episode=1178 reward=0.7815878 (503.06 it/sec) -training >> step=7066500, episode=1178 reward=0.7763065 (513.06 it/sec) -training >> step=7066600, episode=1178 reward=0.7721715 (506.54 it/sec) -training >> step=7066700, episode=1178 reward=0.7931095 (477.76 it/sec) -training >> step=7066800, episode=1178 reward=0.7770379 (516.17 it/sec) -training >> step=7066900, episode=1178 reward=0.7885293 (517.92 it/sec) -training >> step=7067000, episode=1178 reward=0.797314 (484.19 it/sec) -training >> step=7067100, episode=1178 reward=0.7930321 (512.17 it/sec) -training >> step=7067200, episode=1178 reward=0.801506 (524.81 it/sec) -training >> step=7067300, episode=1179 reward=0.7964054 (96.92 it/sec) -training >> step=7067400, episode=1179 reward=0.7901695 (475.98 it/sec) -training >> step=7067500, episode=1179 reward=0.8052085 (513.50 it/sec) -training >> step=7067600, episode=1179 reward=0.7860551 (489.42 it/sec) -training >> step=7067700, episode=1179 reward=0.7856939 (496.35 it/sec) -training >> step=7067800, episode=1179 reward=0.7602867 (514.15 it/sec) -training >> step=7067900, episode=1179 reward=0.7707445 (461.67 it/sec) -training >> step=7068000, episode=1179 reward=0.7799676 (509.20 it/sec) -training >> step=7068100, episode=1179 reward=0.7949214 (497.34 it/sec) -training >> step=7068200, episode=1179 reward=0.7994593 (515.55 it/sec) -training >> step=7068300, episode=1179 reward=0.7898062 (477.43 it/sec) -training >> step=7068400, episode=1179 reward=0.775754 (461.20 it/sec) -training >> step=7068500, episode=1179 reward=0.783855 (513.90 it/sec) -training >> step=7068600, episode=1179 reward=0.7953258 (490.00 it/sec) -training >> step=7068700, episode=1179 reward=0.7760387 (475.56 it/sec) -training >> step=7068800, episode=1179 reward=0.7821952 (468.26 it/sec) -training >> step=7068900, episode=1179 reward=0.7949472 (479.01 it/sec) -training >> step=7069000, episode=1179 reward=0.784476 (511.14 it/sec) -training >> step=7069100, episode=1179 reward=0.7898349 (492.66 it/sec) -training >> step=7069200, episode=1179 reward=0.802061 (489.85 it/sec) -training >> step=7069300, episode=1179 reward=0.7838065 (496.16 it/sec) -training >> step=7069400, episode=1179 reward=0.7870957 (470.23 it/sec) -training >> step=7069500, episode=1179 reward=0.7754152 (502.96 it/sec) -training >> step=7069600, episode=1179 reward=0.7782179 (466.45 it/sec) -training >> step=7069700, episode=1179 reward=0.8029712 (460.74 it/sec) -training >> step=7069800, episode=1179 reward=0.7871118 (513.28 it/sec) -training >> step=7069900, episode=1179 reward=0.7800143 (506.28 it/sec) -training >> step=7070000, episode=1179 reward=0.7806503 (520.20 it/sec) -training >> step=7070100, episode=1179 reward=0.789427 (449.75 it/sec) -training >> step=7070200, episode=1179 reward=0.8004215 (489.31 it/sec) -training >> step=7070300, episode=1179 reward=0.7810381 (475.97 it/sec) -training >> step=7070400, episode=1179 reward=0.7815374 (462.96 it/sec) -training >> step=7070500, episode=1179 reward=0.7809457 (468.60 it/sec) -training >> step=7070600, episode=1179 reward=0.7782621 (502.08 it/sec) -training >> step=7070700, episode=1179 reward=0.7823194 (522.28 it/sec) -training >> step=7070800, episode=1179 reward=0.7798291 (546.22 it/sec) -training >> step=7070900, episode=1179 reward=0.7781743 (510.53 it/sec) -training >> step=7071000, episode=1179 reward=0.7857718 (512.66 it/sec) -training >> step=7071100, episode=1179 reward=0.7799153 (525.88 it/sec) -training >> step=7071200, episode=1179 reward=0.7982805 (501.97 it/sec) -training >> step=7071300, episode=1179 reward=0.7783461 (529.11 it/sec) -training >> step=7071400, episode=1179 reward=0.7822483 (519.66 it/sec) -training >> step=7071500, episode=1179 reward=0.7791902 (535.40 it/sec) -training >> step=7071600, episode=1179 reward=0.757149 (515.48 it/sec) -training >> step=7071700, episode=1179 reward=0.7985894 (510.16 it/sec) -training >> step=7071800, episode=1179 reward=0.786039 (540.02 it/sec) -training >> step=7071900, episode=1179 reward=0.7758889 (487.44 it/sec) -training >> step=7072000, episode=1179 reward=0.7834202 (521.30 it/sec) -training >> step=7072100, episode=1179 reward=0.7783525 (523.70 it/sec) -training >> step=7072200, episode=1179 reward=0.7759755 (494.48 it/sec) -training >> step=7072300, episode=1179 reward=0.7887472 (487.31 it/sec) -training >> step=7072400, episode=1179 reward=0.779351 (515.06 it/sec) -training >> step=7072500, episode=1179 reward=0.8025985 (521.61 it/sec) -training >> step=7072600, episode=1179 reward=0.7763448 (520.97 it/sec) -training >> step=7072700, episode=1179 reward=0.7754779 (544.23 it/sec) -training >> step=7072800, episode=1179 reward=0.7929382 (505.12 it/sec) -training >> step=7072900, episode=1179 reward=0.7732013 (525.60 it/sec) -training >> step=7073000, episode=1179 reward=0.7731985 (511.60 it/sec) -training >> step=7073100, episode=1179 reward=0.7812715 (510.52 it/sec) -training >> step=7073200, episode=1179 reward=0.7884609 (528.38 it/sec) -training >> step=7073300, episode=1180 reward=0.7837197 (117.70 it/sec) -training >> step=7073400, episode=1180 reward=0.786288 (478.15 it/sec) -training >> step=7073500, episode=1180 reward=0.7842143 (483.03 it/sec) -training >> step=7073600, episode=1180 reward=0.7745749 (493.98 it/sec) -training >> step=7073700, episode=1180 reward=0.7928314 (505.95 it/sec) -training >> step=7073800, episode=1180 reward=0.7922847 (521.03 it/sec) -training >> step=7073900, episode=1180 reward=0.7860144 (489.80 it/sec) -training >> step=7074000, episode=1180 reward=0.7857207 (518.77 it/sec) -training >> step=7074100, episode=1180 reward=0.7922012 (527.60 it/sec) -training >> step=7074200, episode=1180 reward=0.7800072 (513.72 it/sec) -training >> step=7074300, episode=1180 reward=0.7874768 (516.67 it/sec) -training >> step=7074400, episode=1180 reward=0.7996778 (487.47 it/sec) -training >> step=7074500, episode=1180 reward=0.7760432 (515.45 it/sec) -training >> step=7074600, episode=1180 reward=0.7811741 (493.04 it/sec) -training >> step=7074700, episode=1180 reward=0.7920743 (532.56 it/sec) -training >> step=7074800, episode=1180 reward=0.7946677 (536.18 it/sec) -training >> step=7074900, episode=1180 reward=0.7828244 (514.15 it/sec) -training >> step=7075000, episode=1180 reward=0.7969244 (524.54 it/sec) -training >> step=7075100, episode=1180 reward=0.7841458 (513.14 it/sec) -training >> step=7075200, episode=1180 reward=0.7876303 (500.85 it/sec) -training >> step=7075300, episode=1180 reward=0.7978708 (513.54 it/sec) -training >> step=7075400, episode=1180 reward=0.7932456 (504.83 it/sec) -training >> step=7075500, episode=1180 reward=0.8041342 (474.85 it/sec) -training >> step=7075600, episode=1180 reward=0.770266 (512.05 it/sec) -training >> step=7075700, episode=1180 reward=0.7899224 (518.66 it/sec) -training >> step=7075800, episode=1180 reward=0.7790319 (541.28 it/sec) -training >> step=7075900, episode=1180 reward=0.7901679 (511.48 it/sec) -training >> step=7076000, episode=1180 reward=0.8026978 (495.68 it/sec) -training >> step=7076100, episode=1180 reward=0.7792523 (509.15 it/sec) -training >> step=7076200, episode=1180 reward=0.7914881 (494.00 it/sec) -training >> step=7076300, episode=1180 reward=0.7858496 (488.52 it/sec) -training >> step=7076400, episode=1180 reward=0.7991089 (509.33 it/sec) -training >> step=7076500, episode=1180 reward=0.788973 (522.89 it/sec) -training >> step=7076600, episode=1180 reward=0.7948499 (506.66 it/sec) -training >> step=7076700, episode=1180 reward=0.7960958 (523.71 it/sec) -training >> step=7076800, episode=1180 reward=0.794671 (514.53 it/sec) -training >> step=7076900, episode=1180 reward=0.7652352 (554.12 it/sec) -training >> step=7077000, episode=1180 reward=0.7941311 (484.30 it/sec) -training >> step=7077100, episode=1180 reward=0.8048995 (510.39 it/sec) -training >> step=7077200, episode=1180 reward=0.7919907 (508.56 it/sec) -training >> step=7077300, episode=1180 reward=0.7690502 (513.23 it/sec) -training >> step=7077400, episode=1180 reward=0.7933779 (540.37 it/sec) -training >> step=7077500, episode=1180 reward=0.7890566 (519.70 it/sec) -training >> step=7077600, episode=1180 reward=0.7714315 (517.74 it/sec) -training >> step=7077700, episode=1180 reward=0.7980861 (476.55 it/sec) -training >> step=7077800, episode=1180 reward=0.7914196 (507.49 it/sec) -training >> step=7077900, episode=1180 reward=0.8071346 (501.23 it/sec) -training >> step=7078000, episode=1180 reward=0.759946 (524.05 it/sec) -training >> step=7078100, episode=1180 reward=0.7823968 (501.37 it/sec) -training >> step=7078200, episode=1180 reward=0.7628341 (510.71 it/sec) -training >> step=7078300, episode=1180 reward=0.7695045 (452.32 it/sec) -training >> step=7078400, episode=1180 reward=0.7842172 (490.15 it/sec) -training >> step=7078500, episode=1180 reward=0.7994985 (492.15 it/sec) -training >> step=7078600, episode=1180 reward=0.7988644 (504.80 it/sec) -training >> step=7078700, episode=1180 reward=0.7923449 (519.69 it/sec) -training >> step=7078800, episode=1180 reward=0.7809063 (494.27 it/sec) -training >> step=7078900, episode=1180 reward=0.7735821 (512.23 it/sec) -training >> step=7079000, episode=1180 reward=0.7715032 (519.79 it/sec) -training >> step=7079100, episode=1180 reward=0.7891475 (529.23 it/sec) -training >> step=7079200, episode=1180 reward=0.7849421 (536.19 it/sec) -training >> step=7079300, episode=1181 reward=0.7711631 (113.19 it/sec) -training >> step=7079400, episode=1181 reward=0.7989345 (499.22 it/sec) -training >> step=7079500, episode=1181 reward=0.7780089 (475.17 it/sec) -training >> step=7079600, episode=1181 reward=0.7906367 (494.60 it/sec) -training >> step=7079700, episode=1181 reward=0.7841632 (528.63 it/sec) -training >> step=7079800, episode=1181 reward=0.7938776 (520.03 it/sec) -training >> step=7079900, episode=1181 reward=0.8035849 (520.29 it/sec) -training >> step=7080000, episode=1181 reward=0.7833665 (515.47 it/sec) -training >> step=7080100, episode=1181 reward=0.7981172 (496.94 it/sec) -training >> step=7080200, episode=1181 reward=0.7713969 (479.00 it/sec) -training >> step=7080300, episode=1181 reward=0.7707862 (518.19 it/sec) -training >> step=7080400, episode=1181 reward=0.7879336 (508.57 it/sec) -training >> step=7080500, episode=1181 reward=0.7908343 (504.51 it/sec) -training >> step=7080600, episode=1181 reward=0.7961631 (489.81 it/sec) -training >> step=7080700, episode=1181 reward=0.7937595 (532.81 it/sec) -training >> step=7080800, episode=1181 reward=0.7941476 (452.47 it/sec) -training >> step=7080900, episode=1181 reward=0.7829271 (506.92 it/sec) -training >> step=7081000, episode=1181 reward=0.7925025 (517.85 it/sec) -training >> step=7081100, episode=1181 reward=0.7978358 (534.60 it/sec) -training >> step=7081200, episode=1181 reward=0.8078651 (522.65 it/sec) -training >> step=7081300, episode=1181 reward=0.7852925 (531.15 it/sec) -training >> step=7081400, episode=1181 reward=0.7584924 (540.53 it/sec) -training >> step=7081500, episode=1181 reward=0.8027201 (487.83 it/sec) -training >> step=7081600, episode=1181 reward=0.7867891 (519.70 it/sec) -training >> step=7081700, episode=1181 reward=0.7941397 (507.91 it/sec) -training >> step=7081800, episode=1181 reward=0.774533 (490.85 it/sec) -training >> step=7081900, episode=1181 reward=0.7875656 (473.89 it/sec) -training >> step=7082000, episode=1181 reward=0.7942986 (473.09 it/sec) -training >> step=7082100, episode=1181 reward=0.8052769 (507.03 it/sec) -training >> step=7082200, episode=1181 reward=0.8059942 (519.17 it/sec) -training >> step=7082300, episode=1181 reward=0.7729719 (505.85 it/sec) -training >> step=7082400, episode=1181 reward=0.797778 (516.22 it/sec) -training >> step=7082500, episode=1181 reward=0.7725816 (515.02 it/sec) -training >> step=7082600, episode=1181 reward=0.7967109 (510.70 it/sec) -training >> step=7082700, episode=1181 reward=0.7836025 (508.13 it/sec) -training >> step=7082800, episode=1181 reward=0.7773851 (531.41 it/sec) -training >> step=7082900, episode=1181 reward=0.7709225 (538.45 it/sec) -training >> step=7083000, episode=1181 reward=0.7885657 (502.41 it/sec) -training >> step=7083100, episode=1181 reward=0.7609704 (495.72 it/sec) -training >> step=7083200, episode=1181 reward=0.8072981 (479.87 it/sec) -training >> step=7083300, episode=1181 reward=0.7675564 (520.65 it/sec) -training >> step=7083400, episode=1181 reward=0.778503 (514.03 it/sec) -training >> step=7083500, episode=1181 reward=0.780031 (490.89 it/sec) -training >> step=7083600, episode=1181 reward=0.7513196 (544.87 it/sec) -training >> step=7083700, episode=1181 reward=0.7962893 (505.93 it/sec) -training >> step=7083800, episode=1181 reward=0.7872494 (489.96 it/sec) -training >> step=7083900, episode=1181 reward=0.7807709 (517.00 it/sec) -training >> step=7084000, episode=1181 reward=0.7898835 (510.35 it/sec) -training >> step=7084100, episode=1181 reward=0.7905771 (520.35 it/sec) -training >> step=7084200, episode=1181 reward=0.7816524 (518.89 it/sec) -training >> step=7084300, episode=1181 reward=0.7871204 (533.04 it/sec) -training >> step=7084400, episode=1181 reward=0.797574 (517.11 it/sec) -training >> step=7084500, episode=1181 reward=0.7771537 (522.62 it/sec) -training >> step=7084600, episode=1181 reward=0.7672161 (517.60 it/sec) -training >> step=7084700, episode=1181 reward=0.7851326 (527.72 it/sec) -training >> step=7084800, episode=1181 reward=0.7857953 (502.14 it/sec) -training >> step=7084900, episode=1181 reward=0.7743941 (513.21 it/sec) -training >> step=7085000, episode=1181 reward=0.7783136 (532.27 it/sec) -training >> step=7085100, episode=1181 reward=0.7903409 (515.93 it/sec) -training >> step=7085200, episode=1181 reward=0.7816124 (493.62 it/sec) -training >> step=7085300, episode=1182 reward=0.8039601 (112.98 it/sec) -training >> step=7085400, episode=1182 reward=0.7718901 (512.65 it/sec) -training >> step=7085500, episode=1182 reward=0.7768972 (527.41 it/sec) -training >> step=7085600, episode=1182 reward=0.8003317 (490.10 it/sec) -training >> step=7085700, episode=1182 reward=0.7828221 (516.17 it/sec) -training >> step=7085800, episode=1182 reward=0.7919805 (547.32 it/sec) -training >> step=7085900, episode=1182 reward=0.796315 (491.72 it/sec) -training >> step=7086000, episode=1182 reward=0.7783841 (496.83 it/sec) -training >> step=7086100, episode=1182 reward=0.7873498 (503.99 it/sec) -training >> step=7086200, episode=1182 reward=0.7807869 (496.12 it/sec) -training >> step=7086300, episode=1182 reward=0.8012372 (501.40 it/sec) -training >> step=7086400, episode=1182 reward=0.7922221 (511.05 it/sec) -training >> step=7086500, episode=1182 reward=0.7930709 (472.82 it/sec) -training >> step=7086600, episode=1182 reward=0.7835644 (490.81 it/sec) -training >> step=7086700, episode=1182 reward=0.7850968 (531.26 it/sec) -training >> step=7086800, episode=1182 reward=0.795528 (508.78 it/sec) -training >> step=7086900, episode=1182 reward=0.7912678 (494.56 it/sec) -training >> step=7087000, episode=1182 reward=0.7939955 (504.70 it/sec) -training >> step=7087100, episode=1182 reward=0.7723662 (503.53 it/sec) -training >> step=7087200, episode=1182 reward=0.7935491 (529.07 it/sec) -training >> step=7087300, episode=1182 reward=0.7893249 (514.62 it/sec) -training >> step=7087400, episode=1182 reward=0.7967202 (517.36 it/sec) -training >> step=7087500, episode=1182 reward=0.7985036 (477.90 it/sec) -training >> step=7087600, episode=1182 reward=0.8009585 (484.29 it/sec) -training >> step=7087700, episode=1182 reward=0.7816681 (415.62 it/sec) -training >> step=7087800, episode=1182 reward=0.7895808 (478.28 it/sec) -training >> step=7087900, episode=1182 reward=0.8088925 (517.90 it/sec) -training >> step=7088000, episode=1182 reward=0.7896669 (455.59 it/sec) -training >> step=7088100, episode=1182 reward=0.793493 (499.52 it/sec) -training >> step=7088200, episode=1182 reward=0.7934728 (447.78 it/sec) -training >> step=7088300, episode=1182 reward=0.7841628 (522.27 it/sec) -training >> step=7088400, episode=1182 reward=0.8053217 (461.71 it/sec) -training >> step=7088500, episode=1182 reward=0.8042499 (477.27 it/sec) -training >> step=7088600, episode=1182 reward=0.801029 (515.70 it/sec) -training >> step=7088700, episode=1182 reward=0.7810729 (464.16 it/sec) -training >> step=7088800, episode=1182 reward=0.8002356 (510.96 it/sec) -training >> step=7088900, episode=1182 reward=0.7876617 (497.56 it/sec) -training >> step=7089000, episode=1182 reward=0.7889035 (518.06 it/sec) -training >> step=7089100, episode=1182 reward=0.7883103 (502.40 it/sec) -training >> step=7089200, episode=1182 reward=0.7761577 (517.13 it/sec) -training >> step=7089300, episode=1182 reward=0.7868126 (504.98 it/sec) -training >> step=7089400, episode=1182 reward=0.7770094 (464.54 it/sec) -training >> step=7089500, episode=1182 reward=0.7781001 (488.20 it/sec) -training >> step=7089600, episode=1182 reward=0.7877815 (484.05 it/sec) -training >> step=7089700, episode=1182 reward=0.7925259 (471.83 it/sec) -training >> step=7089800, episode=1182 reward=0.7768809 (448.02 it/sec) -training >> step=7089900, episode=1182 reward=0.7887925 (444.95 it/sec) -training >> step=7090000, episode=1182 reward=0.7745904 (500.66 it/sec) -training >> step=7090100, episode=1182 reward=0.7909349 (499.79 it/sec) -training >> step=7090200, episode=1182 reward=0.7904714 (453.97 it/sec) -training >> step=7090300, episode=1182 reward=0.7867728 (488.18 it/sec) -training >> step=7090400, episode=1182 reward=0.7891892 (477.95 it/sec) -training >> step=7090500, episode=1182 reward=0.8031497 (438.90 it/sec) -training >> step=7090600, episode=1182 reward=0.8007792 (467.43 it/sec) -training >> step=7090700, episode=1182 reward=0.7865806 (431.37 it/sec) -training >> step=7090800, episode=1182 reward=0.8003719 (514.70 it/sec) -training >> step=7090900, episode=1182 reward=0.7874408 (459.66 it/sec) -training >> step=7091000, episode=1182 reward=0.7685781 (487.39 it/sec) -training >> step=7091100, episode=1182 reward=0.7994233 (491.40 it/sec) -training >> step=7091200, episode=1182 reward=0.7714526 (498.96 it/sec) -training >> step=7091300, episode=1183 reward=0.7894019 (96.15 it/sec) -training >> step=7091400, episode=1183 reward=0.7863782 (457.51 it/sec) -training >> step=7091500, episode=1183 reward=0.7674688 (459.23 it/sec) -training >> step=7091600, episode=1183 reward=0.8028344 (480.96 it/sec) -training >> step=7091700, episode=1183 reward=0.7836325 (472.95 it/sec) -training >> step=7091800, episode=1183 reward=0.7820706 (455.18 it/sec) -training >> step=7091900, episode=1183 reward=0.7870266 (449.30 it/sec) -training >> step=7092000, episode=1183 reward=0.7735584 (470.87 it/sec) -training >> step=7092100, episode=1183 reward=0.7956778 (461.95 it/sec) -training >> step=7092200, episode=1183 reward=0.7691182 (506.51 it/sec) -training >> step=7092300, episode=1183 reward=0.7938378 (447.11 it/sec) -training >> step=7092400, episode=1183 reward=0.7710411 (461.88 it/sec) -training >> step=7092500, episode=1183 reward=0.7977968 (496.67 it/sec) -training >> step=7092600, episode=1183 reward=0.8059459 (470.27 it/sec) -training >> step=7092700, episode=1183 reward=0.803767 (482.05 it/sec) -training >> step=7092800, episode=1183 reward=0.7836744 (474.58 it/sec) -training >> step=7092900, episode=1183 reward=0.775165 (439.74 it/sec) -training >> step=7093000, episode=1183 reward=0.8074878 (498.75 it/sec) -training >> step=7093100, episode=1183 reward=0.7849977 (469.38 it/sec) -training >> step=7093200, episode=1183 reward=0.7963098 (488.00 it/sec) -training >> step=7093300, episode=1183 reward=0.7909885 (479.73 it/sec) -training >> step=7093400, episode=1183 reward=0.7928122 (434.75 it/sec) -training >> step=7093500, episode=1183 reward=0.7832381 (471.14 it/sec) -training >> step=7093600, episode=1183 reward=0.7600971 (486.97 it/sec) -training >> step=7093700, episode=1183 reward=0.792046 (472.87 it/sec) -training >> step=7093800, episode=1183 reward=0.7880155 (484.03 it/sec) -training >> step=7093900, episode=1183 reward=0.7998993 (449.36 it/sec) -training >> step=7094000, episode=1183 reward=0.7817691 (509.23 it/sec) -training >> step=7094100, episode=1183 reward=0.7574678 (457.02 it/sec) -training >> step=7094200, episode=1183 reward=0.7904986 (486.58 it/sec) -training >> step=7094300, episode=1183 reward=0.7891096 (500.01 it/sec) -training >> step=7094400, episode=1183 reward=0.7851833 (511.94 it/sec) -training >> step=7094500, episode=1183 reward=0.7655511 (454.25 it/sec) -training >> step=7094600, episode=1183 reward=0.7944735 (466.17 it/sec) -training >> step=7094700, episode=1183 reward=0.7870336 (460.57 it/sec) -training >> step=7094800, episode=1183 reward=0.7936473 (510.61 it/sec) -training >> step=7094900, episode=1183 reward=0.7862816 (458.74 it/sec) -training >> step=7095000, episode=1183 reward=0.7569706 (497.84 it/sec) -training >> step=7095100, episode=1183 reward=0.7890192 (522.40 it/sec) -training >> step=7095200, episode=1183 reward=0.7967875 (455.75 it/sec) -training >> step=7095300, episode=1183 reward=0.8060715 (500.29 it/sec) -training >> step=7095400, episode=1183 reward=0.7933419 (498.20 it/sec) -training >> step=7095500, episode=1183 reward=0.7844034 (516.45 it/sec) -training >> step=7095600, episode=1183 reward=0.768501 (457.77 it/sec) -training >> step=7095700, episode=1183 reward=0.7738391 (458.74 it/sec) -training >> step=7095800, episode=1183 reward=0.7689129 (422.96 it/sec) -training >> step=7095900, episode=1183 reward=0.7865398 (445.64 it/sec) -training >> step=7096000, episode=1183 reward=0.7959421 (490.04 it/sec) -training >> step=7096100, episode=1183 reward=0.7909896 (480.80 it/sec) -training >> step=7096200, episode=1183 reward=0.806653 (504.76 it/sec) -training >> step=7096300, episode=1183 reward=0.8223101 (479.45 it/sec) -training >> step=7096400, episode=1183 reward=0.7685868 (455.90 it/sec) -training >> step=7096500, episode=1183 reward=0.7763667 (514.80 it/sec) -training >> step=7096600, episode=1183 reward=0.7992084 (478.22 it/sec) -training >> step=7096700, episode=1183 reward=0.7859259 (500.14 it/sec) -training >> step=7096800, episode=1183 reward=0.7903568 (469.92 it/sec) -training >> step=7096900, episode=1183 reward=0.8024058 (483.49 it/sec) -training >> step=7097000, episode=1183 reward=0.7900152 (472.27 it/sec) -training >> step=7097100, episode=1183 reward=0.7786165 (484.47 it/sec) -training >> step=7097200, episode=1183 reward=0.7887007 (463.40 it/sec) -training >> step=7097300, episode=1184 reward=0.7796794 (125.22 it/sec) -training >> step=7097400, episode=1184 reward=0.774514 (474.91 it/sec) -training >> step=7097500, episode=1184 reward=0.7788784 (490.20 it/sec) -training >> step=7097600, episode=1184 reward=0.8134135 (443.15 it/sec) -training >> step=7097700, episode=1184 reward=0.7818338 (506.67 it/sec) -training >> step=7097800, episode=1184 reward=0.7904365 (486.64 it/sec) -training >> step=7097900, episode=1184 reward=0.783532 (465.28 it/sec) -training >> step=7098000, episode=1184 reward=0.7942626 (503.74 it/sec) -training >> step=7098100, episode=1184 reward=0.7679302 (439.83 it/sec) -training >> step=7098200, episode=1184 reward=0.7691339 (468.09 it/sec) -training >> step=7098300, episode=1184 reward=0.7872903 (453.16 it/sec) -training >> step=7098400, episode=1184 reward=0.7994128 (499.87 it/sec) -training >> step=7098500, episode=1184 reward=0.7873517 (463.19 it/sec) -training >> step=7098600, episode=1184 reward=0.7897988 (459.29 it/sec) -training >> step=7098700, episode=1184 reward=0.7711135 (518.14 it/sec) -training >> step=7098800, episode=1184 reward=0.7988166 (466.76 it/sec) -training >> step=7098900, episode=1184 reward=0.7856951 (486.59 it/sec) -training >> step=7099000, episode=1184 reward=0.7817616 (477.23 it/sec) -training >> step=7099100, episode=1184 reward=0.7875034 (505.46 it/sec) -training >> step=7099200, episode=1184 reward=0.7985422 (461.40 it/sec) -training >> step=7099300, episode=1184 reward=0.7835238 (421.62 it/sec) -training >> step=7099400, episode=1184 reward=0.7879168 (515.13 it/sec) -training >> step=7099500, episode=1184 reward=0.7982696 (505.42 it/sec) -training >> step=7099600, episode=1184 reward=0.7902078 (502.32 it/sec) -training >> step=7099700, episode=1184 reward=0.7904174 (437.56 it/sec) -training >> step=7099800, episode=1184 reward=0.7960765 (492.68 it/sec) -training >> step=7099900, episode=1184 reward=0.7962167 (455.77 it/sec) -training >> step=7100000, episode=1184 reward=0.7812722 (479.77 it/sec) -training >> step=7100100, episode=1184 reward=0.7796046 (491.82 it/sec) -training >> step=7100200, episode=1184 reward=0.7954281 (507.98 it/sec) -training >> step=7100300, episode=1184 reward=0.7830226 (482.96 it/sec) -training >> step=7100400, episode=1184 reward=0.7724555 (471.66 it/sec) -training >> step=7100500, episode=1184 reward=0.7943648 (476.71 it/sec) -training >> step=7100600, episode=1184 reward=0.7951412 (481.34 it/sec) -training >> step=7100700, episode=1184 reward=0.7772659 (508.67 it/sec) -training >> step=7100800, episode=1184 reward=0.7970753 (464.83 it/sec) -training >> step=7100900, episode=1184 reward=0.7730516 (500.14 it/sec) -training >> step=7101000, episode=1184 reward=0.8016773 (480.03 it/sec) -training >> step=7101100, episode=1184 reward=0.8008146 (480.42 it/sec) -training >> step=7101200, episode=1184 reward=0.7840369 (462.23 it/sec) -training >> step=7101300, episode=1184 reward=0.8104209 (469.97 it/sec) -training >> step=7101400, episode=1184 reward=0.7855815 (499.70 it/sec) -training >> step=7101500, episode=1184 reward=0.7842511 (488.37 it/sec) -training >> step=7101600, episode=1184 reward=0.7704169 (519.47 it/sec) -training >> step=7101700, episode=1184 reward=0.7819118 (478.56 it/sec) -training >> step=7101800, episode=1184 reward=0.7822803 (468.13 it/sec) -training >> step=7101900, episode=1184 reward=0.8061085 (502.18 it/sec) -training >> step=7102000, episode=1184 reward=0.7864993 (479.88 it/sec) -training >> step=7102100, episode=1184 reward=0.7611041 (469.20 it/sec) -training >> step=7102200, episode=1184 reward=0.7751074 (480.24 it/sec) -training >> step=7102300, episode=1184 reward=0.7878264 (515.91 it/sec) -training >> step=7102400, episode=1184 reward=0.7834069 (484.02 it/sec) -training >> step=7102500, episode=1184 reward=0.7940569 (474.38 it/sec) -training >> step=7102600, episode=1184 reward=0.7833591 (496.68 it/sec) -training >> step=7102700, episode=1184 reward=0.7906837 (469.66 it/sec) -training >> step=7102800, episode=1184 reward=0.7843441 (469.21 it/sec) -training >> step=7102900, episode=1184 reward=0.8079254 (507.51 it/sec) -training >> step=7103000, episode=1184 reward=0.7934186 (501.14 it/sec) -training >> step=7103100, episode=1184 reward=0.7860172 (458.29 it/sec) -training >> step=7103200, episode=1184 reward=0.7688591 (490.03 it/sec) -training >> step=7103300, episode=1185 reward=0.7930356 (97.13 it/sec) -training >> step=7103400, episode=1185 reward=0.7967257 (431.05 it/sec) -training >> step=7103500, episode=1185 reward=0.7974887 (411.89 it/sec) -training >> step=7103600, episode=1185 reward=0.7716666 (493.97 it/sec) -training >> step=7103700, episode=1185 reward=0.7907588 (486.11 it/sec) -training >> step=7103800, episode=1185 reward=0.7902328 (441.30 it/sec) -training >> step=7103900, episode=1185 reward=0.7868165 (491.52 it/sec) -training >> step=7104000, episode=1185 reward=0.8112311 (491.57 it/sec) -training >> step=7104100, episode=1185 reward=0.7678203 (503.35 it/sec) -training >> step=7104200, episode=1185 reward=0.7784314 (442.82 it/sec) -training >> step=7104300, episode=1185 reward=0.7957752 (496.20 it/sec) -training >> step=7104400, episode=1185 reward=0.7753498 (498.82 it/sec) -training >> step=7104500, episode=1185 reward=0.7819584 (486.16 it/sec) -training >> step=7104600, episode=1185 reward=0.8031341 (496.68 it/sec) -training >> step=7104700, episode=1185 reward=0.7953678 (475.61 it/sec) -training >> step=7104800, episode=1185 reward=0.8022217 (459.23 it/sec) -training >> step=7104900, episode=1185 reward=0.781877 (453.45 it/sec) -training >> step=7105000, episode=1185 reward=0.7886571 (502.50 it/sec) -training >> step=7105100, episode=1185 reward=0.7921234 (480.12 it/sec) -training >> step=7105200, episode=1185 reward=0.7710505 (500.51 it/sec) -training >> step=7105300, episode=1185 reward=0.807786 (467.10 it/sec) -training >> step=7105400, episode=1185 reward=0.7900733 (492.32 it/sec) -training >> step=7105500, episode=1185 reward=0.7896798 (496.91 it/sec) -training >> step=7105600, episode=1185 reward=0.7803845 (479.68 it/sec) -training >> step=7105700, episode=1185 reward=0.7922371 (499.94 it/sec) -training >> step=7105800, episode=1185 reward=0.7946375 (508.91 it/sec) -training >> step=7105900, episode=1185 reward=0.7891335 (490.59 it/sec) -training >> step=7106000, episode=1185 reward=0.7936812 (474.46 it/sec) -training >> step=7106100, episode=1185 reward=0.7937499 (481.78 it/sec) -training >> step=7106200, episode=1185 reward=0.7948489 (471.23 it/sec) -training >> step=7106300, episode=1185 reward=0.8184929 (407.04 it/sec) -training >> step=7106400, episode=1185 reward=0.7877682 (449.36 it/sec) -training >> step=7106500, episode=1185 reward=0.8004039 (496.51 it/sec) -training >> step=7106600, episode=1185 reward=0.8109921 (464.72 it/sec) -training >> step=7106700, episode=1185 reward=0.7814327 (482.05 it/sec) -training >> step=7106800, episode=1185 reward=0.7890636 (514.75 it/sec) -training >> step=7106900, episode=1185 reward=0.8094279 (473.41 it/sec) -training >> step=7107000, episode=1185 reward=0.7910073 (483.14 it/sec) -training >> step=7107100, episode=1185 reward=0.8027403 (464.67 it/sec) -training >> step=7107200, episode=1185 reward=0.7770408 (481.04 it/sec) -training >> step=7107300, episode=1185 reward=0.7698492 (461.34 it/sec) -training >> step=7107400, episode=1185 reward=0.7987154 (497.14 it/sec) -training >> step=7107500, episode=1185 reward=0.7823729 (463.41 it/sec) -training >> step=7107600, episode=1185 reward=0.8005832 (512.92 it/sec) -training >> step=7107700, episode=1185 reward=0.7608121 (456.91 it/sec) -training >> step=7107800, episode=1185 reward=0.7857026 (465.74 it/sec) -training >> step=7107900, episode=1185 reward=0.7690296 (470.85 it/sec) -training >> step=7108000, episode=1185 reward=0.7760264 (450.22 it/sec) -training >> step=7108100, episode=1185 reward=0.7830765 (488.57 it/sec) -training >> step=7108200, episode=1185 reward=0.8080127 (455.51 it/sec) -training >> step=7108300, episode=1185 reward=0.7799022 (525.37 it/sec) -training >> step=7108400, episode=1185 reward=0.7754764 (479.33 it/sec) -training >> step=7108500, episode=1185 reward=0.7797137 (462.75 it/sec) -training >> step=7108600, episode=1185 reward=0.7739968 (482.16 it/sec) -training >> step=7108700, episode=1185 reward=0.7821028 (519.71 it/sec) -training >> step=7108800, episode=1185 reward=0.789959 (474.69 it/sec) -training >> step=7108900, episode=1185 reward=0.7978464 (488.80 it/sec) -training >> step=7109000, episode=1185 reward=0.7986698 (500.96 it/sec) -training >> step=7109100, episode=1185 reward=0.7663189 (438.60 it/sec) -training >> step=7109200, episode=1185 reward=0.786214 (498.28 it/sec) -training >> step=7109300, episode=1186 reward=0.7951568 (112.50 it/sec) -training >> step=7109400, episode=1186 reward=0.7770151 (504.61 it/sec) -training >> step=7109500, episode=1186 reward=0.7724363 (477.96 it/sec) -training >> step=7109600, episode=1186 reward=0.7867386 (468.02 it/sec) -training >> step=7109700, episode=1186 reward=0.7844205 (512.94 it/sec) -training >> step=7109800, episode=1186 reward=0.7946892 (487.58 it/sec) -training >> step=7109900, episode=1186 reward=0.7786852 (433.51 it/sec) -training >> step=7110000, episode=1186 reward=0.7970748 (459.02 it/sec) -training >> step=7110100, episode=1186 reward=0.7852236 (501.41 it/sec) -training >> step=7110200, episode=1186 reward=0.7912359 (469.58 it/sec) -training >> step=7110300, episode=1186 reward=0.8058306 (500.26 it/sec) -training >> step=7110400, episode=1186 reward=0.7912197 (475.14 it/sec) -training >> step=7110500, episode=1186 reward=0.7836342 (520.44 it/sec) -training >> step=7110600, episode=1186 reward=0.7656779 (457.90 it/sec) -training >> step=7110700, episode=1186 reward=0.7880228 (426.11 it/sec) -training >> step=7110800, episode=1186 reward=0.7968099 (500.59 it/sec) -training >> step=7110900, episode=1186 reward=0.7820011 (457.41 it/sec) -training >> step=7111000, episode=1186 reward=0.800119 (468.91 it/sec) -training >> step=7111100, episode=1186 reward=0.794479 (462.65 it/sec) -training >> step=7111200, episode=1186 reward=0.782219 (488.74 it/sec) -training >> step=7111300, episode=1186 reward=0.7793754 (467.36 it/sec) -training >> step=7111400, episode=1186 reward=0.7998892 (471.33 it/sec) -training >> step=7111500, episode=1186 reward=0.8045967 (477.47 it/sec) -training >> step=7111600, episode=1186 reward=0.7987708 (498.53 it/sec) -training >> step=7111700, episode=1186 reward=0.7675685 (490.00 it/sec) -training >> step=7111800, episode=1186 reward=0.7959602 (409.06 it/sec) -training >> step=7111900, episode=1186 reward=0.787928 (486.04 it/sec) -training >> step=7112000, episode=1186 reward=0.7790034 (509.21 it/sec) -training >> step=7112100, episode=1186 reward=0.7662044 (480.45 it/sec) -training >> step=7112200, episode=1186 reward=0.7815132 (478.32 it/sec) -training >> step=7112300, episode=1186 reward=0.808681 (518.09 it/sec) -training >> step=7112400, episode=1186 reward=0.7829986 (505.16 it/sec) -training >> step=7112500, episode=1186 reward=0.7811862 (492.10 it/sec) -training >> step=7112600, episode=1186 reward=0.7548838 (446.27 it/sec) -training >> step=7112700, episode=1186 reward=0.7890645 (492.27 it/sec) -training >> step=7112800, episode=1186 reward=0.7697836 (492.17 it/sec) -training >> step=7112900, episode=1186 reward=0.7907174 (495.58 it/sec) -training >> step=7113000, episode=1186 reward=0.7707078 (449.76 it/sec) -training >> step=7113100, episode=1186 reward=0.7810816 (489.74 it/sec) -training >> step=7113200, episode=1186 reward=0.7912608 (481.72 it/sec) -training >> step=7113300, episode=1186 reward=0.7679022 (468.28 it/sec) -training >> step=7113400, episode=1186 reward=0.7801297 (501.30 it/sec) -training >> step=7113500, episode=1186 reward=0.7862808 (492.45 it/sec) -training >> step=7113600, episode=1186 reward=0.7575685 (432.12 it/sec) -training >> step=7113700, episode=1186 reward=0.7878806 (507.84 it/sec) -training >> step=7113800, episode=1186 reward=0.7775901 (470.76 it/sec) -training >> step=7113900, episode=1186 reward=0.7955927 (491.82 it/sec) -training >> step=7114000, episode=1186 reward=0.7957422 (448.48 it/sec) -training >> step=7114100, episode=1186 reward=0.7607213 (453.44 it/sec) -training >> step=7114200, episode=1186 reward=0.7786952 (496.02 it/sec) -training >> step=7114300, episode=1186 reward=0.7945122 (448.59 it/sec) -training >> step=7114400, episode=1186 reward=0.8077574 (507.03 it/sec) -training >> step=7114500, episode=1186 reward=0.7643394 (493.76 it/sec) -training >> step=7114600, episode=1186 reward=0.7843993 (467.27 it/sec) -training >> step=7114700, episode=1186 reward=0.797564 (432.09 it/sec) -training >> step=7114800, episode=1186 reward=0.7656261 (507.14 it/sec) -training >> step=7114900, episode=1186 reward=0.7621479 (479.85 it/sec) -training >> step=7115000, episode=1186 reward=0.7690417 (431.81 it/sec) -training >> step=7115100, episode=1186 reward=0.7931793 (486.13 it/sec) -training >> step=7115200, episode=1186 reward=0.7738329 (525.58 it/sec) -training >> step=7115300, episode=1187 reward=0.8036269 (98.01 it/sec) -training >> step=7115400, episode=1187 reward=0.7806751 (454.76 it/sec) -training >> step=7115500, episode=1187 reward=0.779106 (517.61 it/sec) -training >> step=7115600, episode=1187 reward=0.7979015 (482.16 it/sec) -training >> step=7115700, episode=1187 reward=0.7802548 (484.86 it/sec) -training >> step=7115800, episode=1187 reward=0.7980253 (507.87 it/sec) -training >> step=7115900, episode=1187 reward=0.7822795 (511.72 it/sec) -training >> step=7116000, episode=1187 reward=0.7871352 (507.95 it/sec) -training >> step=7116100, episode=1187 reward=0.79644 (444.02 it/sec) -training >> step=7116200, episode=1187 reward=0.7987909 (481.56 it/sec) -training >> step=7116300, episode=1187 reward=0.7691547 (479.00 it/sec) -training >> step=7116400, episode=1187 reward=0.768195 (504.21 it/sec) -training >> step=7116500, episode=1187 reward=0.798951 (514.41 it/sec) -training >> step=7116600, episode=1187 reward=0.7748944 (473.47 it/sec) -training >> step=7116700, episode=1187 reward=0.7995631 (490.06 it/sec) -training >> step=7116800, episode=1187 reward=0.7903244 (493.05 it/sec) -training >> step=7116900, episode=1187 reward=0.8040692 (489.99 it/sec) -training >> step=7117000, episode=1187 reward=0.7863865 (484.11 it/sec) -training >> step=7117100, episode=1187 reward=0.7950076 (492.66 it/sec) -training >> step=7117200, episode=1187 reward=0.789321 (493.63 it/sec) -training >> step=7117300, episode=1187 reward=0.7917128 (493.64 it/sec) -training >> step=7117400, episode=1187 reward=0.8009134 (437.84 it/sec) -training >> step=7117500, episode=1187 reward=0.7883655 (501.83 it/sec) -training >> step=7117600, episode=1187 reward=0.7870939 (495.53 it/sec) -training >> step=7117700, episode=1187 reward=0.7927073 (498.07 it/sec) -training >> step=7117800, episode=1187 reward=0.7953573 (503.15 it/sec) -training >> step=7117900, episode=1187 reward=0.8023005 (510.63 it/sec) -training >> step=7118000, episode=1187 reward=0.7973981 (484.25 it/sec) -training >> step=7118100, episode=1187 reward=0.7972224 (498.68 it/sec) -training >> step=7118200, episode=1187 reward=0.7977858 (527.81 it/sec) -training >> step=7118300, episode=1187 reward=0.7956904 (475.50 it/sec) -training >> step=7118400, episode=1187 reward=0.778603 (458.07 it/sec) -training >> step=7118500, episode=1187 reward=0.7763502 (474.02 it/sec) -training >> step=7118600, episode=1187 reward=0.7682407 (460.58 it/sec) -training >> step=7118700, episode=1187 reward=0.7972392 (484.38 it/sec) -training >> step=7118800, episode=1187 reward=0.7867643 (494.61 it/sec) -training >> step=7118900, episode=1187 reward=0.8046897 (503.39 it/sec) -training >> step=7119000, episode=1187 reward=0.7828367 (533.59 it/sec) -training >> step=7119100, episode=1187 reward=0.7792188 (489.34 it/sec) -training >> step=7119200, episode=1187 reward=0.7870924 (463.44 it/sec) -training >> step=7119300, episode=1187 reward=0.7992421 (494.02 it/sec) -training >> step=7119400, episode=1187 reward=0.7865984 (464.66 it/sec) -training >> step=7119500, episode=1187 reward=0.7880443 (467.35 it/sec) -training >> step=7119600, episode=1187 reward=0.7872289 (466.34 it/sec) -training >> step=7119700, episode=1187 reward=0.778125 (493.83 it/sec) -training >> step=7119800, episode=1187 reward=0.7759022 (404.06 it/sec) -training >> step=7119900, episode=1187 reward=0.7558373 (481.76 it/sec) -training >> step=7120000, episode=1187 reward=0.7981964 (525.35 it/sec) -training >> step=7120100, episode=1187 reward=0.8078807 (470.82 it/sec) -training >> step=7120200, episode=1187 reward=0.7785097 (456.00 it/sec) -training >> step=7120300, episode=1187 reward=0.7774693 (492.06 it/sec) -training >> step=7120400, episode=1187 reward=0.7796614 (517.04 it/sec) -training >> step=7120500, episode=1187 reward=0.7729781 (442.12 it/sec) -training >> step=7120600, episode=1187 reward=0.7936711 (465.40 it/sec) -training >> step=7120700, episode=1187 reward=0.7852935 (483.65 it/sec) -training >> step=7120800, episode=1187 reward=0.7787867 (471.96 it/sec) -training >> step=7120900, episode=1187 reward=0.7571734 (498.72 it/sec) -training >> step=7121000, episode=1187 reward=0.7860498 (461.06 it/sec) -training >> step=7121100, episode=1187 reward=0.7903242 (490.37 it/sec) -training >> step=7121200, episode=1187 reward=0.7803634 (488.56 it/sec) -training >> step=7121300, episode=1188 reward=0.775994 (111.60 it/sec) -training >> step=7121400, episode=1188 reward=0.7779335 (450.44 it/sec) -training >> step=7121500, episode=1188 reward=0.8026363 (489.60 it/sec) -training >> step=7121600, episode=1188 reward=0.7989029 (505.86 it/sec) -training >> step=7121700, episode=1188 reward=0.7976275 (469.86 it/sec) -training >> step=7121800, episode=1188 reward=0.7871047 (468.24 it/sec) -training >> step=7121900, episode=1188 reward=0.7873142 (518.09 it/sec) -training >> step=7122000, episode=1188 reward=0.7980203 (504.67 it/sec) -training >> step=7122100, episode=1188 reward=0.7847303 (472.00 it/sec) -training >> step=7122200, episode=1188 reward=0.7720203 (507.71 it/sec) -training >> step=7122300, episode=1188 reward=0.7952529 (494.11 it/sec) -training >> step=7122400, episode=1188 reward=0.7865412 (470.67 it/sec) -training >> step=7122500, episode=1188 reward=0.7803101 (464.82 it/sec) -training >> step=7122600, episode=1188 reward=0.7906025 (477.36 it/sec) -training >> step=7122700, episode=1188 reward=0.7770632 (486.91 it/sec) -training >> step=7122800, episode=1188 reward=0.7823186 (518.33 it/sec) -training >> step=7122900, episode=1188 reward=0.7883571 (516.54 it/sec) -training >> step=7123000, episode=1188 reward=0.7891284 (490.71 it/sec) -training >> step=7123100, episode=1188 reward=0.8027978 (473.83 it/sec) -training >> step=7123200, episode=1188 reward=0.7928448 (463.88 it/sec) -training >> step=7123300, episode=1188 reward=0.7872454 (479.88 it/sec) -training >> step=7123400, episode=1188 reward=0.7884283 (426.28 it/sec) -training >> step=7123500, episode=1188 reward=0.7838994 (503.16 it/sec) -training >> step=7123600, episode=1188 reward=0.7889755 (532.33 it/sec) -training >> step=7123700, episode=1188 reward=0.7738153 (482.27 it/sec) -training >> step=7123800, episode=1188 reward=0.7827291 (507.16 it/sec) -training >> step=7123900, episode=1188 reward=0.7943576 (460.80 it/sec) -training >> step=7124000, episode=1188 reward=0.7949314 (526.49 it/sec) -training >> step=7124100, episode=1188 reward=0.786247 (507.28 it/sec) -training >> step=7124200, episode=1188 reward=0.8004761 (482.66 it/sec) -training >> step=7124300, episode=1188 reward=0.7746264 (533.15 it/sec) -training >> step=7124400, episode=1188 reward=0.8096418 (488.42 it/sec) -training >> step=7124500, episode=1188 reward=0.7821856 (501.39 it/sec) -training >> step=7124600, episode=1188 reward=0.7817698 (499.76 it/sec) -training >> step=7124700, episode=1188 reward=0.8026281 (493.04 it/sec) -training >> step=7124800, episode=1188 reward=0.7989185 (489.77 it/sec) -training >> step=7124900, episode=1188 reward=0.777975 (496.49 it/sec) -training >> step=7125000, episode=1188 reward=0.8018269 (499.07 it/sec) -training >> step=7125100, episode=1188 reward=0.7679663 (532.69 it/sec) -training >> step=7125200, episode=1188 reward=0.814814 (495.47 it/sec) -training >> step=7125300, episode=1188 reward=0.7956836 (469.82 it/sec) -training >> step=7125400, episode=1188 reward=0.7927852 (468.35 it/sec) -training >> step=7125500, episode=1188 reward=0.7828283 (507.31 it/sec) -training >> step=7125600, episode=1188 reward=0.794509 (502.41 it/sec) -training >> step=7125700, episode=1188 reward=0.7886836 (506.42 it/sec) -training >> step=7125800, episode=1188 reward=0.7686638 (520.02 it/sec) -training >> step=7125900, episode=1188 reward=0.8133904 (415.96 it/sec) -training >> step=7126000, episode=1188 reward=0.7954625 (450.71 it/sec) -training >> step=7126100, episode=1188 reward=0.7725302 (469.64 it/sec) -training >> step=7126200, episode=1188 reward=0.7887088 (518.03 it/sec) -training >> step=7126300, episode=1188 reward=0.7820447 (495.18 it/sec) -training >> step=7126400, episode=1188 reward=0.7619033 (492.90 it/sec) -training >> step=7126500, episode=1188 reward=0.7941824 (476.44 it/sec) -training >> step=7126600, episode=1188 reward=0.7728662 (500.68 it/sec) -training >> step=7126700, episode=1188 reward=0.7883299 (495.21 it/sec) -training >> step=7126800, episode=1188 reward=0.7941608 (507.16 it/sec) -training >> step=7126900, episode=1188 reward=0.7825118 (478.09 it/sec) -training >> step=7127000, episode=1188 reward=0.7917364 (512.62 it/sec) -training >> step=7127100, episode=1188 reward=0.7794558 (512.96 it/sec) -training >> step=7127200, episode=1188 reward=0.7853597 (497.40 it/sec) -training >> step=7127300, episode=1189 reward=0.8006723 (101.70 it/sec) -training >> step=7127400, episode=1189 reward=0.7620856 (491.99 it/sec) -training >> step=7127500, episode=1189 reward=0.8053147 (526.69 it/sec) -training >> step=7127600, episode=1189 reward=0.793154 (473.20 it/sec) -training >> step=7127700, episode=1189 reward=0.8025472 (498.79 it/sec) -training >> step=7127800, episode=1189 reward=0.7967635 (479.88 it/sec) -training >> step=7127900, episode=1189 reward=0.7885458 (501.22 it/sec) -training >> step=7128000, episode=1189 reward=0.7840713 (493.59 it/sec) -training >> step=7128100, episode=1189 reward=0.778526 (482.78 it/sec) -training >> step=7128200, episode=1189 reward=0.7982489 (553.88 it/sec) -training >> step=7128300, episode=1189 reward=0.7988397 (488.92 it/sec) -training >> step=7128400, episode=1189 reward=0.7936292 (467.68 it/sec) -training >> step=7128500, episode=1189 reward=0.7897692 (486.57 it/sec) -training >> step=7128600, episode=1189 reward=0.7875443 (491.33 it/sec) -training >> step=7128700, episode=1189 reward=0.7835707 (485.84 it/sec) -training >> step=7128800, episode=1189 reward=0.7829911 (505.00 it/sec) -training >> step=7128900, episode=1189 reward=0.7834067 (481.45 it/sec) -training >> step=7129000, episode=1189 reward=0.8034504 (493.26 it/sec) -training >> step=7129100, episode=1189 reward=0.7906716 (485.39 it/sec) -training >> step=7129200, episode=1189 reward=0.7906939 (528.74 it/sec) -training >> step=7129300, episode=1189 reward=0.7926122 (505.36 it/sec) -training >> step=7129400, episode=1189 reward=0.8051431 (494.11 it/sec) -training >> step=7129500, episode=1189 reward=0.7914647 (513.11 it/sec) -training >> step=7129600, episode=1189 reward=0.7919958 (484.99 it/sec) -training >> step=7129700, episode=1189 reward=0.8013786 (522.21 it/sec) -training >> step=7129800, episode=1189 reward=0.7980267 (525.13 it/sec) -training >> step=7129900, episode=1189 reward=0.7906457 (527.93 it/sec) -training >> step=7130000, episode=1189 reward=0.786545 (491.62 it/sec) -training >> step=7130100, episode=1189 reward=0.7931665 (459.44 it/sec) -training >> step=7130200, episode=1189 reward=0.7932733 (514.07 it/sec) -training >> step=7130300, episode=1189 reward=0.7849091 (498.65 it/sec) -training >> step=7130400, episode=1189 reward=0.784592 (412.27 it/sec) -training >> step=7130500, episode=1189 reward=0.7683536 (492.08 it/sec) -training >> step=7130600, episode=1189 reward=0.7937214 (471.49 it/sec) -training >> step=7130700, episode=1189 reward=0.7720575 (505.69 it/sec) -training >> step=7130800, episode=1189 reward=0.7777551 (484.18 it/sec) -training >> step=7130900, episode=1189 reward=0.7833267 (508.76 it/sec) -training >> step=7131000, episode=1189 reward=0.7919833 (483.43 it/sec) -training >> step=7131100, episode=1189 reward=0.7831023 (473.24 it/sec) -training >> step=7131200, episode=1189 reward=0.7733191 (487.36 it/sec) -training >> step=7131300, episode=1189 reward=0.7678634 (527.91 it/sec) -training >> step=7131400, episode=1189 reward=0.7795742 (483.16 it/sec) -training >> step=7131500, episode=1189 reward=0.7740829 (503.53 it/sec) -training >> step=7131600, episode=1189 reward=0.7783067 (479.81 it/sec) -training >> step=7131700, episode=1189 reward=0.778857 (479.03 it/sec) -training >> step=7131800, episode=1189 reward=0.7743447 (474.48 it/sec) -training >> step=7131900, episode=1189 reward=0.7929676 (489.09 it/sec) -training >> step=7132000, episode=1189 reward=0.7953771 (529.42 it/sec) -training >> step=7132100, episode=1189 reward=0.7802327 (426.58 it/sec) -training >> step=7132200, episode=1189 reward=0.7833066 (466.41 it/sec) -training >> step=7132300, episode=1189 reward=0.7976691 (480.94 it/sec) -training >> step=7132400, episode=1189 reward=0.7522767 (518.44 it/sec) -training >> step=7132500, episode=1189 reward=0.7787039 (482.45 it/sec) -training >> step=7132600, episode=1189 reward=0.7825489 (463.52 it/sec) -training >> step=7132700, episode=1189 reward=0.7905327 (524.44 it/sec) -training >> step=7132800, episode=1189 reward=0.7895243 (516.16 it/sec) -training >> step=7132900, episode=1189 reward=0.7965825 (489.53 it/sec) -training >> step=7133000, episode=1189 reward=0.7865121 (467.42 it/sec) -training >> step=7133100, episode=1189 reward=0.7748306 (458.61 it/sec) -training >> step=7133200, episode=1189 reward=0.7825664 (493.86 it/sec) -training >> step=7133300, episode=1190 reward=0.7611198 (125.96 it/sec) -training >> step=7133400, episode=1190 reward=0.7753973 (419.07 it/sec) -training >> step=7133500, episode=1190 reward=0.7799326 (536.95 it/sec) -training >> step=7133600, episode=1190 reward=0.7804686 (495.18 it/sec) -training >> step=7133700, episode=1190 reward=0.7746716 (499.96 it/sec) -training >> step=7133800, episode=1190 reward=0.7721284 (499.78 it/sec) -training >> step=7133900, episode=1190 reward=0.7806907 (515.30 it/sec) -training >> step=7134000, episode=1190 reward=0.8049313 (517.20 it/sec) -training >> step=7134100, episode=1190 reward=0.7838598 (499.73 it/sec) -training >> step=7134200, episode=1190 reward=0.8021748 (478.97 it/sec) -training >> step=7134300, episode=1190 reward=0.7800819 (484.57 it/sec) -training >> step=7134400, episode=1190 reward=0.8011394 (485.92 it/sec) -training >> step=7134500, episode=1190 reward=0.7891341 (506.64 it/sec) -training >> step=7134600, episode=1190 reward=0.7796514 (505.65 it/sec) -training >> step=7134700, episode=1190 reward=0.7820673 (500.28 it/sec) -training >> step=7134800, episode=1190 reward=0.768131 (488.23 it/sec) -training >> step=7134900, episode=1190 reward=0.7723943 (487.54 it/sec) -training >> step=7135000, episode=1190 reward=0.7855933 (496.48 it/sec) -training >> step=7135100, episode=1190 reward=0.7978098 (508.65 it/sec) -training >> step=7135200, episode=1190 reward=0.7792341 (469.19 it/sec) -training >> step=7135300, episode=1190 reward=0.7935715 (511.36 it/sec) -training >> step=7135400, episode=1190 reward=0.7740012 (482.25 it/sec) -training >> step=7135500, episode=1190 reward=0.7946575 (489.51 it/sec) -training >> step=7135600, episode=1190 reward=0.7900103 (506.56 it/sec) -training >> step=7135700, episode=1190 reward=0.8032741 (493.46 it/sec) -training >> step=7135800, episode=1190 reward=0.7952849 (506.05 it/sec) -training >> step=7135900, episode=1190 reward=0.7959747 (493.05 it/sec) -training >> step=7136000, episode=1190 reward=0.7929666 (535.37 it/sec) -training >> step=7136100, episode=1190 reward=0.8101957 (447.10 it/sec) -training >> step=7136200, episode=1190 reward=0.7755688 (476.83 it/sec) -training >> step=7136300, episode=1190 reward=0.8241308 (493.80 it/sec) -training >> step=7136400, episode=1190 reward=0.7830009 (477.06 it/sec) -training >> step=7136500, episode=1190 reward=0.7918824 (448.39 it/sec) -training >> step=7136600, episode=1190 reward=0.791674 (421.90 it/sec) -training >> step=7136700, episode=1190 reward=0.7916201 (456.95 it/sec) -training >> step=7136800, episode=1190 reward=0.7792151 (488.59 it/sec) -training >> step=7136900, episode=1190 reward=0.7780376 (491.20 it/sec) -training >> step=7137000, episode=1190 reward=0.7717578 (501.45 it/sec) -training >> step=7137100, episode=1190 reward=0.7873265 (523.14 it/sec) -training >> step=7137200, episode=1190 reward=0.7681753 (450.63 it/sec) -training >> step=7137300, episode=1190 reward=0.7600675 (441.14 it/sec) -training >> step=7137400, episode=1190 reward=0.7835854 (500.51 it/sec) -training >> step=7137500, episode=1190 reward=0.7786772 (528.94 it/sec) -training >> step=7137600, episode=1190 reward=0.7678643 (479.59 it/sec) -training >> step=7137700, episode=1190 reward=0.7630646 (450.93 it/sec) -training >> step=7137800, episode=1190 reward=0.779357 (499.72 it/sec) -training >> step=7137900, episode=1190 reward=0.7910871 (501.43 it/sec) -training >> step=7138000, episode=1190 reward=0.7929137 (460.94 it/sec) -training >> step=7138100, episode=1190 reward=0.7885855 (498.36 it/sec) -training >> step=7138200, episode=1190 reward=0.778288 (512.39 it/sec) -training >> step=7138300, episode=1190 reward=0.7664467 (487.28 it/sec) -training >> step=7138400, episode=1190 reward=0.7837626 (508.66 it/sec) -training >> step=7138500, episode=1190 reward=0.7708817 (515.78 it/sec) -training >> step=7138600, episode=1190 reward=0.7937798 (502.41 it/sec) -training >> step=7138700, episode=1190 reward=0.7752389 (497.27 it/sec) -training >> step=7138800, episode=1190 reward=0.7937759 (478.57 it/sec) -training >> step=7138900, episode=1190 reward=0.7651467 (457.61 it/sec) -training >> step=7139000, episode=1190 reward=0.7941496 (461.15 it/sec) -training >> step=7139100, episode=1190 reward=0.7677469 (424.61 it/sec) -training >> step=7139200, episode=1190 reward=0.8083788 (422.49 it/sec) -training >> step=7139300, episode=1191 reward=0.7843359 (128.51 it/sec) -training >> step=7139400, episode=1191 reward=0.7722786 (506.21 it/sec) -training >> step=7139500, episode=1191 reward=0.7818565 (469.77 it/sec) -training >> step=7139600, episode=1191 reward=0.7739031 (391.38 it/sec) -training >> step=7139700, episode=1191 reward=0.7987589 (479.45 it/sec) -training >> step=7139800, episode=1191 reward=0.7711409 (508.23 it/sec) -training >> step=7139900, episode=1191 reward=0.8105152 (449.65 it/sec) -training >> step=7140000, episode=1191 reward=0.8011576 (497.72 it/sec) -training >> step=7140100, episode=1191 reward=0.8102989 (496.01 it/sec) -training >> step=7140200, episode=1191 reward=0.7715912 (424.31 it/sec) -training >> step=7140300, episode=1191 reward=0.7914004 (414.36 it/sec) -training >> step=7140400, episode=1191 reward=0.7599143 (474.51 it/sec) -training >> step=7140500, episode=1191 reward=0.7757028 (482.35 it/sec) -training >> step=7140600, episode=1191 reward=0.7858016 (442.93 it/sec) -training >> step=7140700, episode=1191 reward=0.7717353 (520.97 it/sec) -training >> step=7140800, episode=1191 reward=0.7896082 (470.83 it/sec) -training >> step=7140900, episode=1191 reward=0.7929038 (499.58 it/sec) -training >> step=7141000, episode=1191 reward=0.7900831 (470.28 it/sec) -training >> step=7141100, episode=1191 reward=0.7807103 (499.19 it/sec) -training >> step=7141200, episode=1191 reward=0.7824177 (493.50 it/sec) -training >> step=7141300, episode=1191 reward=0.7815589 (495.60 it/sec) -training >> step=7141400, episode=1191 reward=0.8009503 (498.40 it/sec) -training >> step=7141500, episode=1191 reward=0.771446 (475.83 it/sec) -training >> step=7141600, episode=1191 reward=0.8045489 (489.87 it/sec) -training >> step=7141700, episode=1191 reward=0.7804374 (451.48 it/sec) -training >> step=7141800, episode=1191 reward=0.7844747 (486.17 it/sec) -training >> step=7141900, episode=1191 reward=0.8196349 (445.94 it/sec) -training >> step=7142000, episode=1191 reward=0.8048358 (458.71 it/sec) -training >> step=7142100, episode=1191 reward=0.7861621 (517.52 it/sec) -training >> step=7142200, episode=1191 reward=0.7973691 (500.35 it/sec) -training >> step=7142300, episode=1191 reward=0.798799 (418.03 it/sec) -training >> step=7142400, episode=1191 reward=0.7879549 (462.79 it/sec) -training >> step=7142500, episode=1191 reward=0.789256 (447.07 it/sec) -training >> step=7142600, episode=1191 reward=0.8093191 (474.61 it/sec) -training >> step=7142700, episode=1191 reward=0.796164 (458.80 it/sec) -training >> step=7142800, episode=1191 reward=0.7936386 (400.94 it/sec) -training >> step=7142900, episode=1191 reward=0.7907201 (481.18 it/sec) -training >> step=7143000, episode=1191 reward=0.7910508 (499.68 it/sec) -training >> step=7143100, episode=1191 reward=0.7981499 (460.34 it/sec) -training >> step=7143200, episode=1191 reward=0.7747002 (509.91 it/sec) -training >> step=7143300, episode=1191 reward=0.7713832 (443.47 it/sec) -training >> step=7143400, episode=1191 reward=0.7939184 (475.95 it/sec) -training >> step=7143500, episode=1191 reward=0.7901697 (511.09 it/sec) -training >> step=7143600, episode=1191 reward=0.8033943 (521.94 it/sec) -training >> step=7143700, episode=1191 reward=0.7792689 (463.94 it/sec) -training >> step=7143800, episode=1191 reward=0.7949488 (492.23 it/sec) -training >> step=7143900, episode=1191 reward=0.7970399 (515.25 it/sec) -training >> step=7144000, episode=1191 reward=0.769182 (501.09 it/sec) -training >> step=7144100, episode=1191 reward=0.7771919 (507.02 it/sec) -training >> step=7144200, episode=1191 reward=0.773349 (500.18 it/sec) -training >> step=7144300, episode=1191 reward=0.7803333 (515.49 it/sec) -training >> step=7144400, episode=1191 reward=0.7585903 (469.51 it/sec) -training >> step=7144500, episode=1191 reward=0.8002372 (503.26 it/sec) -training >> step=7144600, episode=1191 reward=0.7788494 (477.52 it/sec) -training >> step=7144700, episode=1191 reward=0.782133 (533.81 it/sec) -training >> step=7144800, episode=1191 reward=0.7873555 (505.71 it/sec) -training >> step=7144900, episode=1191 reward=0.7872304 (496.13 it/sec) -training >> step=7145000, episode=1191 reward=0.7857008 (520.34 it/sec) -training >> step=7145100, episode=1191 reward=0.7762375 (506.14 it/sec) -training >> step=7145200, episode=1191 reward=0.7794513 (515.69 it/sec) -training >> step=7145300, episode=1192 reward=0.8035702 (126.08 it/sec) -training >> step=7145400, episode=1192 reward=0.780493 (479.69 it/sec) -training >> step=7145500, episode=1192 reward=0.7828494 (477.76 it/sec) -training >> step=7145600, episode=1192 reward=0.7830686 (495.42 it/sec) -training >> step=7145700, episode=1192 reward=0.7711456 (454.76 it/sec) -training >> step=7145800, episode=1192 reward=0.7621856 (410.85 it/sec) -training >> step=7145900, episode=1192 reward=0.7975453 (491.36 it/sec) -training >> step=7146000, episode=1192 reward=0.7778586 (498.26 it/sec) -training >> step=7146100, episode=1192 reward=0.7949702 (505.82 it/sec) -training >> step=7146200, episode=1192 reward=0.8007944 (412.38 it/sec) -training >> step=7146300, episode=1192 reward=0.8007886 (439.91 it/sec) -training >> step=7146400, episode=1192 reward=0.7990043 (477.99 it/sec) -training >> step=7146500, episode=1192 reward=0.7963096 (467.23 it/sec) -training >> step=7146600, episode=1192 reward=0.7822652 (455.20 it/sec) -training >> step=7146700, episode=1192 reward=0.7830051 (470.79 it/sec) -training >> step=7146800, episode=1192 reward=0.780198 (474.03 it/sec) -training >> step=7146900, episode=1192 reward=0.7832097 (505.96 it/sec) -training >> step=7147000, episode=1192 reward=0.7901553 (520.96 it/sec) -training >> step=7147100, episode=1192 reward=0.7830226 (523.88 it/sec) -training >> step=7147200, episode=1192 reward=0.7904896 (561.68 it/sec) -training >> step=7147300, episode=1192 reward=0.7875407 (552.86 it/sec) -training >> step=7147400, episode=1192 reward=0.795969 (553.88 it/sec) -training >> step=7147500, episode=1192 reward=0.7948785 (522.00 it/sec) -training >> step=7147600, episode=1192 reward=0.7975377 (571.32 it/sec) -training >> step=7147700, episode=1192 reward=0.7951466 (522.58 it/sec) -training >> step=7147800, episode=1192 reward=0.7774457 (537.24 it/sec) -training >> step=7147900, episode=1192 reward=0.7865934 (543.87 it/sec) -training >> step=7148000, episode=1192 reward=0.7752819 (541.59 it/sec) -training >> step=7148100, episode=1192 reward=0.7806013 (525.78 it/sec) -training >> step=7148200, episode=1192 reward=0.7777645 (522.43 it/sec) -training >> step=7148300, episode=1192 reward=0.7954755 (525.57 it/sec) -training >> step=7148400, episode=1192 reward=0.7789226 (512.17 it/sec) -training >> step=7148500, episode=1192 reward=0.7780057 (533.26 it/sec) -training >> step=7148600, episode=1192 reward=0.775941 (518.59 it/sec) -training >> step=7148700, episode=1192 reward=0.7884936 (529.11 it/sec) -training >> step=7148800, episode=1192 reward=0.7859079 (533.83 it/sec) -training >> step=7148900, episode=1192 reward=0.7784519 (538.35 it/sec) -training >> step=7149000, episode=1192 reward=0.8157849 (526.92 it/sec) -training >> step=7149100, episode=1192 reward=0.7686373 (523.72 it/sec) -training >> step=7149200, episode=1192 reward=0.7949356 (525.94 it/sec) -training >> step=7149300, episode=1192 reward=0.7671915 (552.66 it/sec) -training >> step=7149400, episode=1192 reward=0.7800465 (555.03 it/sec) -training >> step=7149500, episode=1192 reward=0.7762176 (532.07 it/sec) -training >> step=7149600, episode=1192 reward=0.7712566 (515.62 it/sec) -training >> step=7149700, episode=1192 reward=0.7753854 (538.92 it/sec) -training >> step=7149800, episode=1192 reward=0.7772328 (525.47 it/sec) -training >> step=7149900, episode=1192 reward=0.7936004 (549.50 it/sec) -training >> step=7150000, episode=1192 reward=0.7689595 (541.25 it/sec) -training >> step=7150100, episode=1192 reward=0.7837899 (564.66 it/sec) -training >> step=7150200, episode=1192 reward=0.8132709 (539.41 it/sec) -training >> step=7150300, episode=1192 reward=0.7786524 (502.13 it/sec) -training >> step=7150400, episode=1192 reward=0.7725413 (522.70 it/sec) -training >> step=7150500, episode=1192 reward=0.8009099 (533.42 it/sec) -training >> step=7150600, episode=1192 reward=0.7773758 (487.00 it/sec) -training >> step=7150700, episode=1192 reward=0.7818947 (490.35 it/sec) -training >> step=7150800, episode=1192 reward=0.7743795 (549.32 it/sec) -training >> step=7150900, episode=1192 reward=0.7895113 (490.32 it/sec) -training >> step=7151000, episode=1192 reward=0.780367 (449.04 it/sec) -training >> step=7151100, episode=1192 reward=0.7928312 (492.34 it/sec) -training >> step=7151200, episode=1192 reward=0.7652736 (480.58 it/sec) -training >> step=7151300, episode=1193 reward=0.778749 (111.52 it/sec) -training >> step=7151400, episode=1193 reward=0.7675794 (463.83 it/sec) -training >> step=7151500, episode=1193 reward=0.7852604 (517.38 it/sec) -training >> step=7151600, episode=1193 reward=0.794657 (436.21 it/sec) -training >> step=7151700, episode=1193 reward=0.7860165 (452.60 it/sec) -training >> step=7151800, episode=1193 reward=0.7837243 (455.13 it/sec) -training >> step=7151900, episode=1193 reward=0.7860558 (442.90 it/sec) -training >> step=7152000, episode=1193 reward=0.7949859 (311.72 it/sec) -training >> step=7152100, episode=1193 reward=0.7694342 (429.93 it/sec) -training >> step=7152200, episode=1193 reward=0.7898235 (408.91 it/sec) -training >> step=7152300, episode=1193 reward=0.7799075 (473.76 it/sec) -training >> step=7152400, episode=1193 reward=0.7811092 (470.45 it/sec) -training >> step=7152500, episode=1193 reward=0.7913598 (450.13 it/sec) -training >> step=7152600, episode=1193 reward=0.7797018 (483.82 it/sec) -training >> step=7152700, episode=1193 reward=0.7848118 (491.93 it/sec) -training >> step=7152800, episode=1193 reward=0.7824876 (408.13 it/sec) -training >> step=7152900, episode=1193 reward=0.7583385 (499.02 it/sec) -training >> step=7153000, episode=1193 reward=0.7897514 (433.55 it/sec) -training >> step=7153100, episode=1193 reward=0.790986 (414.06 it/sec) -training >> step=7153200, episode=1193 reward=0.8022232 (458.98 it/sec) -training >> step=7153300, episode=1193 reward=0.780916 (472.05 it/sec) -training >> step=7153400, episode=1193 reward=0.7788196 (453.43 it/sec) -training >> step=7153500, episode=1193 reward=0.769563 (506.44 it/sec) -training >> step=7153600, episode=1193 reward=0.7805588 (460.04 it/sec) -training >> step=7153700, episode=1193 reward=0.7906995 (471.48 it/sec) -training >> step=7153800, episode=1193 reward=0.7814963 (535.68 it/sec) -training >> step=7153900, episode=1193 reward=0.7671501 (507.89 it/sec) -training >> step=7154000, episode=1193 reward=0.7947178 (513.80 it/sec) -training >> step=7154100, episode=1193 reward=0.8031006 (499.51 it/sec) -training >> step=7154200, episode=1193 reward=0.7791201 (497.55 it/sec) -training >> step=7154300, episode=1193 reward=0.7760934 (509.23 it/sec) -training >> step=7154400, episode=1193 reward=0.7888958 (490.10 it/sec) -training >> step=7154500, episode=1193 reward=0.8011369 (513.67 it/sec) -training >> step=7154600, episode=1193 reward=0.7753673 (483.93 it/sec) -training >> step=7154700, episode=1193 reward=0.7821235 (538.14 it/sec) -training >> step=7154800, episode=1193 reward=0.805315 (523.66 it/sec) -training >> step=7154900, episode=1193 reward=0.7988436 (505.55 it/sec) -training >> step=7155000, episode=1193 reward=0.777944 (499.64 it/sec) -training >> step=7155100, episode=1193 reward=0.7813151 (558.31 it/sec) -training >> step=7155200, episode=1193 reward=0.7965544 (476.84 it/sec) -training >> step=7155300, episode=1193 reward=0.8105253 (486.68 it/sec) -training >> step=7155400, episode=1193 reward=0.7868829 (483.11 it/sec) -training >> step=7155500, episode=1193 reward=0.7922187 (523.81 it/sec) -training >> step=7155600, episode=1193 reward=0.7850094 (521.58 it/sec) -training >> step=7155700, episode=1193 reward=0.7700954 (451.91 it/sec) -training >> step=7155800, episode=1193 reward=0.7942383 (449.25 it/sec) -training >> step=7155900, episode=1193 reward=0.7857238 (463.47 it/sec) -training >> step=7156000, episode=1193 reward=0.7673294 (429.73 it/sec) -training >> step=7156100, episode=1193 reward=0.7795462 (472.69 it/sec) -training >> step=7156200, episode=1193 reward=0.7799695 (452.20 it/sec) -training >> step=7156300, episode=1193 reward=0.7849994 (454.99 it/sec) -training >> step=7156400, episode=1193 reward=0.7836724 (452.79 it/sec) -training >> step=7156500, episode=1193 reward=0.7843572 (483.19 it/sec) -training >> step=7156600, episode=1193 reward=0.8013096 (488.61 it/sec) -training >> step=7156700, episode=1193 reward=0.7753471 (453.92 it/sec) -training >> step=7156800, episode=1193 reward=0.7758023 (464.68 it/sec) -training >> step=7156900, episode=1193 reward=0.77531 (466.06 it/sec) -training >> step=7157000, episode=1193 reward=0.8116699 (513.39 it/sec) -training >> step=7157100, episode=1193 reward=0.7742647 (484.20 it/sec) -training >> step=7157200, episode=1193 reward=0.7875537 (497.40 it/sec) -training >> step=7157300, episode=1194 reward=0.7873745 (99.88 it/sec) -training >> step=7157400, episode=1194 reward=0.7719859 (479.66 it/sec) -training >> step=7157500, episode=1194 reward=0.7809427 (430.39 it/sec) -training >> step=7157600, episode=1194 reward=0.7759833 (490.03 it/sec) -training >> step=7157700, episode=1194 reward=0.7771626 (480.94 it/sec) -training >> step=7157800, episode=1194 reward=0.7816842 (477.90 it/sec) -training >> step=7157900, episode=1194 reward=0.7944292 (547.99 it/sec) -training >> step=7158000, episode=1194 reward=0.8025046 (347.37 it/sec) -training >> step=7158100, episode=1194 reward=0.7794622 (487.91 it/sec) -training >> step=7158200, episode=1194 reward=0.8031753 (540.97 it/sec) -training >> step=7158300, episode=1194 reward=0.7860822 (522.17 it/sec) -training >> step=7158400, episode=1194 reward=0.806461 (520.47 it/sec) -training >> step=7158500, episode=1194 reward=0.7843506 (459.62 it/sec) -training >> step=7158600, episode=1194 reward=0.7792637 (507.97 it/sec) -training >> step=7158700, episode=1194 reward=0.7974431 (469.15 it/sec) -training >> step=7158800, episode=1194 reward=0.7895443 (527.93 it/sec) -training >> step=7158900, episode=1194 reward=0.7783286 (493.31 it/sec) -training >> step=7159000, episode=1194 reward=0.7734082 (434.88 it/sec) -training >> step=7159100, episode=1194 reward=0.7868658 (524.32 it/sec) -training >> step=7159200, episode=1194 reward=0.7796643 (514.37 it/sec) -training >> step=7159300, episode=1194 reward=0.7886068 (422.97 it/sec) -training >> step=7159400, episode=1194 reward=0.7950394 (495.06 it/sec) -training >> step=7159500, episode=1194 reward=0.7909382 (478.72 it/sec) -training >> step=7159600, episode=1194 reward=0.7900484 (477.69 it/sec) -training >> step=7159700, episode=1194 reward=0.7953396 (518.19 it/sec) -training >> step=7159800, episode=1194 reward=0.787521 (454.80 it/sec) -training >> step=7159900, episode=1194 reward=0.7876529 (486.52 it/sec) -training >> step=7160000, episode=1194 reward=0.7807966 (492.38 it/sec) -training >> step=7160100, episode=1194 reward=0.7715603 (386.65 it/sec) -training >> step=7160200, episode=1194 reward=0.792924 (401.55 it/sec) -training >> step=7160300, episode=1194 reward=0.7715722 (389.12 it/sec) -training >> step=7160400, episode=1194 reward=0.7856969 (485.26 it/sec) -training >> step=7160500, episode=1194 reward=0.7887772 (507.55 it/sec) -training >> step=7160600, episode=1194 reward=0.7798951 (485.39 it/sec) -training >> step=7160700, episode=1194 reward=0.7953132 (459.48 it/sec) -training >> step=7160800, episode=1194 reward=0.7884016 (504.92 it/sec) -training >> step=7160900, episode=1194 reward=0.7709511 (478.01 it/sec) -training >> step=7161000, episode=1194 reward=0.7839503 (388.22 it/sec) -training >> step=7161100, episode=1194 reward=0.7779876 (405.82 it/sec) -training >> step=7161200, episode=1194 reward=0.7813763 (402.09 it/sec) -training >> step=7161300, episode=1194 reward=0.7814419 (437.02 it/sec) -training >> step=7161400, episode=1194 reward=0.7552105 (434.81 it/sec) -training >> step=7161500, episode=1194 reward=0.7756355 (447.40 it/sec) -training >> step=7161600, episode=1194 reward=0.7810481 (455.23 it/sec) -training >> step=7161700, episode=1194 reward=0.7661115 (505.26 it/sec) -training >> step=7161800, episode=1194 reward=0.7956046 (505.05 it/sec) -training >> step=7161900, episode=1194 reward=0.7867767 (557.41 it/sec) -training >> step=7162000, episode=1194 reward=0.7751965 (495.76 it/sec) -training >> step=7162100, episode=1194 reward=0.7862961 (490.26 it/sec) -training >> step=7162200, episode=1194 reward=0.7916319 (536.11 it/sec) -training >> step=7162300, episode=1194 reward=0.7837678 (515.99 it/sec) -training >> step=7162400, episode=1194 reward=0.8028473 (485.47 it/sec) -training >> step=7162500, episode=1194 reward=0.7749408 (516.65 it/sec) -training >> step=7162600, episode=1194 reward=0.8086521 (518.03 it/sec) -training >> step=7162700, episode=1194 reward=0.7993179 (499.39 it/sec) -training >> step=7162800, episode=1194 reward=0.7865922 (507.63 it/sec) -training >> step=7162900, episode=1194 reward=0.7849045 (530.81 it/sec) -training >> step=7163000, episode=1194 reward=0.7828903 (528.46 it/sec) -training >> step=7163100, episode=1194 reward=0.7763896 (518.85 it/sec) -training >> step=7163200, episode=1194 reward=0.776571 (496.03 it/sec) -training >> step=7163300, episode=1195 reward=0.7928068 (129.09 it/sec) -training >> step=7163400, episode=1195 reward=0.7765632 (474.68 it/sec) -training >> step=7163500, episode=1195 reward=0.8057392 (447.69 it/sec) -training >> step=7163600, episode=1195 reward=0.7856605 (489.63 it/sec) -training >> step=7163700, episode=1195 reward=0.7668417 (569.74 it/sec) -training >> step=7163800, episode=1195 reward=0.7792701 (520.29 it/sec) -training >> step=7163900, episode=1195 reward=0.7941656 (497.56 it/sec) -training >> step=7164000, episode=1195 reward=0.7813538 (489.61 it/sec) -training >> step=7164100, episode=1195 reward=0.7780139 (523.95 it/sec) -training >> step=7164200, episode=1195 reward=0.7863978 (369.77 it/sec) -training >> step=7164300, episode=1195 reward=0.7781727 (517.15 it/sec) -training >> step=7164400, episode=1195 reward=0.7778172 (554.42 it/sec) -training >> step=7164500, episode=1195 reward=0.7830865 (494.59 it/sec) -training >> step=7164600, episode=1195 reward=0.7841241 (487.87 it/sec) -training >> step=7164700, episode=1195 reward=0.7854878 (555.70 it/sec) -training >> step=7164800, episode=1195 reward=0.7942379 (508.45 it/sec) -training >> step=7164900, episode=1195 reward=0.7717667 (499.77 it/sec) -training >> step=7165000, episode=1195 reward=0.7861677 (522.99 it/sec) -training >> step=7165100, episode=1195 reward=0.7949367 (551.88 it/sec) -training >> step=7165200, episode=1195 reward=0.7944633 (488.02 it/sec) -training >> step=7165300, episode=1195 reward=0.7701896 (549.30 it/sec) -training >> step=7165400, episode=1195 reward=0.791517 (530.35 it/sec) -training >> step=7165500, episode=1195 reward=0.7759144 (546.98 it/sec) -training >> step=7165600, episode=1195 reward=0.8084036 (526.32 it/sec) -training >> step=7165700, episode=1195 reward=0.793383 (507.23 it/sec) -training >> step=7165800, episode=1195 reward=0.7780148 (527.59 it/sec) -training >> step=7165900, episode=1195 reward=0.7951961 (502.22 it/sec) -training >> step=7166000, episode=1195 reward=0.7843177 (533.88 it/sec) -training >> step=7166100, episode=1195 reward=0.7778478 (462.64 it/sec) -training >> step=7166200, episode=1195 reward=0.7920608 (546.13 it/sec) -training >> step=7166300, episode=1195 reward=0.793647 (518.34 it/sec) -training >> step=7166400, episode=1195 reward=0.7987821 (501.28 it/sec) -training >> step=7166500, episode=1195 reward=0.8022292 (531.27 it/sec) -training >> step=7166600, episode=1195 reward=0.7957839 (494.42 it/sec) -training >> step=7166700, episode=1195 reward=0.7759917 (535.75 it/sec) -training >> step=7166800, episode=1195 reward=0.7900549 (509.60 it/sec) -training >> step=7166900, episode=1195 reward=0.7820531 (534.62 it/sec) -training >> step=7167000, episode=1195 reward=0.7963352 (523.40 it/sec) -training >> step=7167100, episode=1195 reward=0.771849 (514.01 it/sec) -training >> step=7167200, episode=1195 reward=0.7993366 (523.17 it/sec) -training >> step=7167300, episode=1195 reward=0.793325 (533.51 it/sec) -training >> step=7167400, episode=1195 reward=0.7947149 (519.00 it/sec) -training >> step=7167500, episode=1195 reward=0.7756084 (488.79 it/sec) -training >> step=7167600, episode=1195 reward=0.7900198 (506.89 it/sec) -training >> step=7167700, episode=1195 reward=0.795603 (503.82 it/sec) -training >> step=7167800, episode=1195 reward=0.7538891 (512.27 it/sec) -training >> step=7167900, episode=1195 reward=0.7929343 (503.97 it/sec) -training >> step=7168000, episode=1195 reward=0.7684692 (484.38 it/sec) -training >> step=7168100, episode=1195 reward=0.7792728 (480.83 it/sec) -training >> step=7168200, episode=1195 reward=0.7740872 (533.84 it/sec) -training >> step=7168300, episode=1195 reward=0.7731753 (519.48 it/sec) -training >> step=7168400, episode=1195 reward=0.7870848 (544.66 it/sec) -training >> step=7168500, episode=1195 reward=0.791495 (530.06 it/sec) -training >> step=7168600, episode=1195 reward=0.7821923 (506.17 it/sec) -training >> step=7168700, episode=1195 reward=0.7993696 (535.80 it/sec) -training >> step=7168800, episode=1195 reward=0.7743964 (549.77 it/sec) -training >> step=7168900, episode=1195 reward=0.7797049 (482.08 it/sec) -training >> step=7169000, episode=1195 reward=0.7789881 (491.83 it/sec) -training >> step=7169100, episode=1195 reward=0.7812457 (567.37 it/sec) -training >> step=7169200, episode=1195 reward=0.7708468 (507.68 it/sec) -training >> step=7169300, episode=1196 reward=0.8047345 (145.98 it/sec) -training >> step=7169400, episode=1196 reward=0.7820593 (517.97 it/sec) -training >> step=7169500, episode=1196 reward=0.7874365 (502.40 it/sec) -training >> step=7169600, episode=1196 reward=0.8085973 (414.44 it/sec) -training >> step=7169700, episode=1196 reward=0.7988032 (392.19 it/sec) -training >> step=7169800, episode=1196 reward=0.7563115 (380.56 it/sec) -training >> step=7169900, episode=1196 reward=0.7861251 (398.49 it/sec) -training >> step=7170000, episode=1196 reward=0.7847993 (499.01 it/sec) -training >> step=7170100, episode=1196 reward=0.7761897 (502.41 it/sec) -training >> step=7170200, episode=1196 reward=0.7784439 (540.50 it/sec) -training >> step=7170300, episode=1196 reward=0.7983634 (357.80 it/sec) -training >> step=7170400, episode=1196 reward=0.788286 (480.80 it/sec) -training >> step=7170500, episode=1196 reward=0.804167 (522.06 it/sec) -training >> step=7170600, episode=1196 reward=0.7807552 (519.93 it/sec) -training >> step=7170700, episode=1196 reward=0.7916829 (507.84 it/sec) -training >> step=7170800, episode=1196 reward=0.7793882 (524.16 it/sec) -training >> step=7170900, episode=1196 reward=0.7625294 (541.04 it/sec) -training >> step=7171000, episode=1196 reward=0.7726381 (501.84 it/sec) -training >> step=7171100, episode=1196 reward=0.7961109 (506.23 it/sec) -training >> step=7171200, episode=1196 reward=0.7901117 (480.47 it/sec) -training >> step=7171300, episode=1196 reward=0.7932413 (543.89 it/sec) -training >> step=7171400, episode=1196 reward=0.804088 (509.38 it/sec) -training >> step=7171500, episode=1196 reward=0.7872135 (499.43 it/sec) -training >> step=7171600, episode=1196 reward=0.7837459 (551.30 it/sec) -training >> step=7171700, episode=1196 reward=0.780548 (466.19 it/sec) -training >> step=7171800, episode=1196 reward=0.8020802 (541.82 it/sec) -training >> step=7171900, episode=1196 reward=0.7656585 (524.96 it/sec) -training >> step=7172000, episode=1196 reward=0.7818751 (538.01 it/sec) -training >> step=7172100, episode=1196 reward=0.7717887 (525.05 it/sec) -training >> step=7172200, episode=1196 reward=0.8001247 (504.71 it/sec) -training >> step=7172300, episode=1196 reward=0.7850587 (523.58 it/sec) -training >> step=7172400, episode=1196 reward=0.773922 (517.11 it/sec) -training >> step=7172500, episode=1196 reward=0.7847591 (495.23 it/sec) -training >> step=7172600, episode=1196 reward=0.7680743 (536.54 it/sec) -training >> step=7172700, episode=1196 reward=0.8053328 (535.33 it/sec) -training >> step=7172800, episode=1196 reward=0.7966088 (526.92 it/sec) -training >> step=7172900, episode=1196 reward=0.7777291 (488.40 it/sec) -training >> step=7173000, episode=1196 reward=0.783537 (564.64 it/sec) -training >> step=7173100, episode=1196 reward=0.7976132 (524.13 it/sec) -training >> step=7173200, episode=1196 reward=0.7951419 (519.62 it/sec) -training >> step=7173300, episode=1196 reward=0.7717584 (476.36 it/sec) -training >> step=7173400, episode=1196 reward=0.7831851 (532.75 it/sec) -training >> step=7173500, episode=1196 reward=0.7861431 (495.13 it/sec) -training >> step=7173600, episode=1196 reward=0.7873697 (536.22 it/sec) -training >> step=7173700, episode=1196 reward=0.8027314 (507.77 it/sec) -training >> step=7173800, episode=1196 reward=0.7886403 (533.10 it/sec) -training >> step=7173900, episode=1196 reward=0.7653596 (529.27 it/sec) -training >> step=7174000, episode=1196 reward=0.7887812 (540.24 it/sec) -training >> step=7174100, episode=1196 reward=0.7763428 (542.13 it/sec) -training >> step=7174200, episode=1196 reward=0.7965209 (517.98 it/sec) -training >> step=7174300, episode=1196 reward=0.7892385 (514.26 it/sec) -training >> step=7174400, episode=1196 reward=0.7883066 (534.55 it/sec) -training >> step=7174500, episode=1196 reward=0.7646899 (540.59 it/sec) -training >> step=7174600, episode=1196 reward=0.7839895 (505.43 it/sec) -training >> step=7174700, episode=1196 reward=0.785099 (531.03 it/sec) -training >> step=7174800, episode=1196 reward=0.7995033 (508.46 it/sec) -training >> step=7174900, episode=1196 reward=0.7707093 (465.50 it/sec) -training >> step=7175000, episode=1196 reward=0.7820492 (551.38 it/sec) -training >> step=7175100, episode=1196 reward=0.7866532 (536.26 it/sec) -training >> step=7175200, episode=1196 reward=0.7862153 (539.25 it/sec) -training >> step=7175300, episode=1197 reward=0.7804139 (130.98 it/sec) -training >> step=7175400, episode=1197 reward=0.7434227 (518.20 it/sec) -training >> step=7175500, episode=1197 reward=0.7768259 (487.66 it/sec) -training >> step=7175600, episode=1197 reward=0.7949876 (540.90 it/sec) -training >> step=7175700, episode=1197 reward=0.7898842 (513.85 it/sec) -training >> step=7175800, episode=1197 reward=0.809938 (537.96 it/sec) -training >> step=7175900, episode=1197 reward=0.7895517 (539.24 it/sec) -training >> step=7176000, episode=1197 reward=0.783802 (537.47 it/sec) -training >> step=7176100, episode=1197 reward=0.7851089 (503.52 it/sec) -training >> step=7176200, episode=1197 reward=0.768489 (498.13 it/sec) -training >> step=7176300, episode=1197 reward=0.7945324 (534.97 it/sec) -training >> step=7176400, episode=1197 reward=0.8005253 (363.99 it/sec) -training >> step=7176500, episode=1197 reward=0.7815995 (517.19 it/sec) -training >> step=7176600, episode=1197 reward=0.7808352 (591.16 it/sec) -training >> step=7176700, episode=1197 reward=0.7878693 (491.81 it/sec) -training >> step=7176800, episode=1197 reward=0.7684179 (511.45 it/sec) -training >> step=7176900, episode=1197 reward=0.7743871 (523.15 it/sec) -training >> step=7177000, episode=1197 reward=0.8159818 (540.65 it/sec) -training >> step=7177100, episode=1197 reward=0.7961984 (523.53 it/sec) -training >> step=7177200, episode=1197 reward=0.776634 (529.07 it/sec) -training >> step=7177300, episode=1197 reward=0.7632052 (505.17 it/sec) -training >> step=7177400, episode=1197 reward=0.7835771 (548.90 it/sec) -training >> step=7177500, episode=1197 reward=0.7877467 (517.87 it/sec) -training >> step=7177600, episode=1197 reward=0.7810809 (543.08 it/sec) -training >> step=7177700, episode=1197 reward=0.7908754 (517.06 it/sec) -training >> step=7177800, episode=1197 reward=0.7672653 (500.38 it/sec) -training >> step=7177900, episode=1197 reward=0.7845454 (523.69 it/sec) -training >> step=7178000, episode=1197 reward=0.7884103 (522.66 it/sec) -training >> step=7178100, episode=1197 reward=0.7812492 (532.25 it/sec) -training >> step=7178200, episode=1197 reward=0.7899011 (506.00 it/sec) -training >> step=7178300, episode=1197 reward=0.7695183 (482.51 it/sec) -training >> step=7178400, episode=1197 reward=0.7812502 (533.98 it/sec) -training >> step=7178500, episode=1197 reward=0.7791647 (560.57 it/sec) -training >> step=7178600, episode=1197 reward=0.7980481 (522.50 it/sec) -training >> step=7178700, episode=1197 reward=0.8038881 (529.17 it/sec) -training >> step=7178800, episode=1197 reward=0.7886067 (541.24 it/sec) -training >> step=7178900, episode=1197 reward=0.7891208 (479.72 it/sec) -training >> step=7179000, episode=1197 reward=0.7709945 (522.09 it/sec) -training >> step=7179100, episode=1197 reward=0.7874588 (516.45 it/sec) -training >> step=7179200, episode=1197 reward=0.7654428 (525.11 it/sec) -training >> step=7179300, episode=1197 reward=0.7642092 (516.01 it/sec) -training >> step=7179400, episode=1197 reward=0.777648 (502.38 it/sec) -training >> step=7179500, episode=1197 reward=0.7868268 (491.57 it/sec) -training >> step=7179600, episode=1197 reward=0.7725536 (502.17 it/sec) -training >> step=7179700, episode=1197 reward=0.7685992 (425.93 it/sec) -training >> step=7179800, episode=1197 reward=0.7901754 (457.26 it/sec) -training >> step=7179900, episode=1197 reward=0.7819046 (528.61 it/sec) -training >> step=7180000, episode=1197 reward=0.7853536 (535.09 it/sec) -training >> step=7180100, episode=1197 reward=0.7785943 (459.30 it/sec) -training >> step=7180200, episode=1197 reward=0.7885626 (536.03 it/sec) -training >> step=7180300, episode=1197 reward=0.7801388 (487.85 it/sec) -training >> step=7180400, episode=1197 reward=0.795095 (489.83 it/sec) -training >> step=7180500, episode=1197 reward=0.7779664 (512.09 it/sec) -training >> step=7180600, episode=1197 reward=0.7731512 (537.79 it/sec) -training >> step=7180700, episode=1197 reward=0.7979997 (505.94 it/sec) -training >> step=7180800, episode=1197 reward=0.7930776 (452.46 it/sec) -training >> step=7180900, episode=1197 reward=0.7858942 (457.58 it/sec) -training >> step=7181000, episode=1197 reward=0.7823803 (474.06 it/sec) -training >> step=7181100, episode=1197 reward=0.775959 (454.31 it/sec) -training >> step=7181200, episode=1197 reward=0.7825005 (379.71 it/sec) -training >> step=7181300, episode=1198 reward=0.8078598 (100.21 it/sec) -training >> step=7181400, episode=1198 reward=0.776446 (423.29 it/sec) -training >> step=7181500, episode=1198 reward=0.8056721 (469.00 it/sec) -training >> step=7181600, episode=1198 reward=0.787418 (499.66 it/sec) -training >> step=7181700, episode=1198 reward=0.7743734 (395.99 it/sec) -training >> step=7181800, episode=1198 reward=0.7665194 (429.32 it/sec) -training >> step=7181900, episode=1198 reward=0.7839963 (458.50 it/sec) -training >> step=7182000, episode=1198 reward=0.7917992 (445.69 it/sec) -training >> step=7182100, episode=1198 reward=0.7812185 (429.05 it/sec) -training >> step=7182200, episode=1198 reward=0.8026674 (398.10 it/sec) -training >> step=7182300, episode=1198 reward=0.7947019 (414.49 it/sec) -training >> step=7182400, episode=1198 reward=0.784043 (442.91 it/sec) -training >> step=7182500, episode=1198 reward=0.799271 (475.22 it/sec) -training >> step=7182600, episode=1198 reward=0.786294 (370.27 it/sec) -training >> step=7182700, episode=1198 reward=0.7704001 (500.71 it/sec) -training >> step=7182800, episode=1198 reward=0.7930567 (508.28 it/sec) -training >> step=7182900, episode=1198 reward=0.7608386 (468.25 it/sec) -training >> step=7183000, episode=1198 reward=0.7819598 (494.09 it/sec) -training >> step=7183100, episode=1198 reward=0.7930452 (424.16 it/sec) -training >> step=7183200, episode=1198 reward=0.8013734 (445.73 it/sec) -training >> step=7183300, episode=1198 reward=0.8056376 (379.65 it/sec) -training >> step=7183400, episode=1198 reward=0.7756278 (483.44 it/sec) -training >> step=7183500, episode=1198 reward=0.784116 (392.65 it/sec) -training >> step=7183600, episode=1198 reward=0.7708909 (439.64 it/sec) -training >> step=7183700, episode=1198 reward=0.8031355 (504.67 it/sec) -training >> step=7183800, episode=1198 reward=0.7852607 (484.99 it/sec) -training >> step=7183900, episode=1198 reward=0.7804372 (453.36 it/sec) -training >> step=7184000, episode=1198 reward=0.7981264 (481.45 it/sec) -training >> step=7184100, episode=1198 reward=0.8068355 (509.35 it/sec) -training >> step=7184200, episode=1198 reward=0.7973095 (485.60 it/sec) -training >> step=7184300, episode=1198 reward=0.7793172 (525.95 it/sec) -training >> step=7184400, episode=1198 reward=0.7846798 (518.70 it/sec) -training >> step=7184500, episode=1198 reward=0.7950725 (515.64 it/sec) -training >> step=7184600, episode=1198 reward=0.7707958 (505.21 it/sec) -training >> step=7184700, episode=1198 reward=0.7871669 (492.92 it/sec) -training >> step=7184800, episode=1198 reward=0.7978722 (546.82 it/sec) -training >> step=7184900, episode=1198 reward=0.7816235 (495.73 it/sec) -training >> step=7185000, episode=1198 reward=0.7802469 (472.37 it/sec) -training >> step=7185100, episode=1198 reward=0.7925686 (531.51 it/sec) -training >> step=7185200, episode=1198 reward=0.7884264 (519.53 it/sec) -training >> step=7185300, episode=1198 reward=0.7925368 (521.25 it/sec) -training >> step=7185400, episode=1198 reward=0.7743534 (512.88 it/sec) -training >> step=7185500, episode=1198 reward=0.7695169 (526.84 it/sec) -training >> step=7185600, episode=1198 reward=0.777123 (498.26 it/sec) -training >> step=7185700, episode=1198 reward=0.775981 (508.28 it/sec) -training >> step=7185800, episode=1198 reward=0.7934532 (498.13 it/sec) -training >> step=7185900, episode=1198 reward=0.779254 (539.48 it/sec) -training >> step=7186000, episode=1198 reward=0.7731593 (538.38 it/sec) -training >> step=7186100, episode=1198 reward=0.7962059 (508.96 it/sec) -training >> step=7186200, episode=1198 reward=0.7952103 (552.79 it/sec) -training >> step=7186300, episode=1198 reward=0.7837238 (523.10 it/sec) -training >> step=7186400, episode=1198 reward=0.773631 (497.93 it/sec) -training >> step=7186500, episode=1198 reward=0.7911459 (515.91 it/sec) -training >> step=7186600, episode=1198 reward=0.7884846 (529.44 it/sec) -training >> step=7186700, episode=1198 reward=0.785133 (537.45 it/sec) -training >> step=7186800, episode=1198 reward=0.7657823 (473.79 it/sec) -training >> step=7186900, episode=1198 reward=0.7804961 (525.44 it/sec) -training >> step=7187000, episode=1198 reward=0.7927601 (555.82 it/sec) -training >> step=7187100, episode=1198 reward=0.773321 (535.24 it/sec) -training >> step=7187200, episode=1198 reward=0.7679727 (512.48 it/sec) -training >> step=7187300, episode=1199 reward=0.7919841 (116.04 it/sec) -training >> step=7187400, episode=1199 reward=0.7859459 (462.72 it/sec) -training >> step=7187500, episode=1199 reward=0.7726995 (511.34 it/sec) -training >> step=7187600, episode=1199 reward=0.7938652 (505.36 it/sec) -training >> step=7187700, episode=1199 reward=0.7705805 (435.33 it/sec) -training >> step=7187800, episode=1199 reward=0.7848729 (513.53 it/sec) -training >> step=7187900, episode=1199 reward=0.7885119 (497.87 it/sec) -training >> step=7188000, episode=1199 reward=0.7763617 (506.19 it/sec) -training >> step=7188100, episode=1199 reward=0.7913708 (516.43 it/sec) -training >> step=7188200, episode=1199 reward=0.7821375 (486.66 it/sec) -training >> step=7188300, episode=1199 reward=0.7696761 (498.06 it/sec) -training >> step=7188400, episode=1199 reward=0.8127362 (523.77 it/sec) -training >> step=7188500, episode=1199 reward=0.7913665 (556.23 it/sec) -training >> step=7188600, episode=1199 reward=0.7897202 (328.66 it/sec) -training >> step=7188700, episode=1199 reward=0.7845737 (526.04 it/sec) -training >> step=7188800, episode=1199 reward=0.7919742 (507.19 it/sec) -training >> step=7188900, episode=1199 reward=0.8129615 (518.53 it/sec) -training >> step=7189000, episode=1199 reward=0.7955077 (511.13 it/sec) -training >> step=7189100, episode=1199 reward=0.7956863 (500.64 it/sec) -training >> step=7189200, episode=1199 reward=0.7973469 (524.85 it/sec) -training >> step=7189300, episode=1199 reward=0.7876731 (514.92 it/sec) -training >> step=7189400, episode=1199 reward=0.7870155 (491.88 it/sec) -training >> step=7189500, episode=1199 reward=0.7828279 (502.38 it/sec) -training >> step=7189600, episode=1199 reward=0.786479 (491.29 it/sec) -training >> step=7189700, episode=1199 reward=0.789459 (516.01 it/sec) -training >> step=7189800, episode=1199 reward=0.7994542 (546.68 it/sec) -training >> step=7189900, episode=1199 reward=0.7816033 (517.59 it/sec) -training >> step=7190000, episode=1199 reward=0.7876756 (547.09 it/sec) -training >> step=7190100, episode=1199 reward=0.7861639 (500.86 it/sec) -training >> step=7190200, episode=1199 reward=0.7719679 (518.48 it/sec) -training >> step=7190300, episode=1199 reward=0.7782276 (498.92 it/sec) -training >> step=7190400, episode=1199 reward=0.7844297 (486.05 it/sec) -training >> step=7190500, episode=1199 reward=0.7731985 (504.06 it/sec) -training >> step=7190600, episode=1199 reward=0.8048283 (493.73 it/sec) -training >> step=7190700, episode=1199 reward=0.7977016 (460.33 it/sec) -training >> step=7190800, episode=1199 reward=0.7983447 (486.55 it/sec) -training >> step=7190900, episode=1199 reward=0.8016648 (506.26 it/sec) -training >> step=7191000, episode=1199 reward=0.7990678 (530.90 it/sec) -training >> step=7191100, episode=1199 reward=0.7815934 (497.92 it/sec) -training >> step=7191200, episode=1199 reward=0.7867348 (515.60 it/sec) -training >> step=7191300, episode=1199 reward=0.7809449 (538.05 it/sec) -training >> step=7191400, episode=1199 reward=0.7966004 (520.81 it/sec) -training >> step=7191500, episode=1199 reward=0.8013133 (494.89 it/sec) -training >> step=7191600, episode=1199 reward=0.7751297 (525.82 it/sec) -training >> step=7191700, episode=1199 reward=0.7841183 (484.87 it/sec) -training >> step=7191800, episode=1199 reward=0.7784922 (529.61 it/sec) -training >> step=7191900, episode=1199 reward=0.7907204 (489.26 it/sec) -training >> step=7192000, episode=1199 reward=0.7529869 (498.37 it/sec) -training >> step=7192100, episode=1199 reward=0.7917169 (450.74 it/sec) -training >> step=7192200, episode=1199 reward=0.7597481 (483.22 it/sec) -training >> step=7192300, episode=1199 reward=0.7986551 (468.32 it/sec) -training >> step=7192400, episode=1199 reward=0.7757372 (493.40 it/sec) -training >> step=7192500, episode=1199 reward=0.7957233 (508.17 it/sec) -training >> step=7192600, episode=1199 reward=0.758373 (451.53 it/sec) -training >> step=7192700, episode=1199 reward=0.7650369 (489.77 it/sec) -training >> step=7192800, episode=1199 reward=0.7748334 (500.23 it/sec) -training >> step=7192900, episode=1199 reward=0.7997516 (499.29 it/sec) -training >> step=7193000, episode=1199 reward=0.7885169 (519.43 it/sec) -training >> step=7193100, episode=1199 reward=0.7937479 (538.28 it/sec) -training >> step=7193200, episode=1199 reward=0.7738091 (489.12 it/sec) -training >> step=7193300, episode=1200 reward=0.7952213 (127.02 it/sec) -training >> step=7193400, episode=1200 reward=0.7752677 (461.48 it/sec) -training >> step=7193500, episode=1200 reward=0.7825907 (466.73 it/sec) -training >> step=7193600, episode=1200 reward=0.7831841 (467.23 it/sec) -training >> step=7193700, episode=1200 reward=0.7634532 (511.17 it/sec) -training >> step=7193800, episode=1200 reward=0.7820888 (522.80 it/sec) -training >> step=7193900, episode=1200 reward=0.7830345 (468.30 it/sec) -training >> step=7194000, episode=1200 reward=0.7965564 (476.61 it/sec) -training >> step=7194100, episode=1200 reward=0.7944677 (493.12 it/sec) -training >> step=7194200, episode=1200 reward=0.7952502 (520.02 it/sec) -training >> step=7194300, episode=1200 reward=0.8042051 (534.86 it/sec) -training >> step=7194400, episode=1200 reward=0.7833465 (435.46 it/sec) -training >> step=7194500, episode=1200 reward=0.7831701 (470.69 it/sec) -training >> step=7194600, episode=1200 reward=0.7844937 (496.82 it/sec) -training >> step=7194700, episode=1200 reward=0.8017625 (377.37 it/sec) -training >> step=7194800, episode=1200 reward=0.7907016 (484.35 it/sec) -training >> step=7194900, episode=1200 reward=0.7765644 (465.10 it/sec) -training >> step=7195000, episode=1200 reward=0.804696 (497.68 it/sec) -training >> step=7195100, episode=1200 reward=0.7620142 (493.51 it/sec) -training >> step=7195200, episode=1200 reward=0.8065951 (516.03 it/sec) -training >> step=7195300, episode=1200 reward=0.7803354 (488.08 it/sec) -training >> step=7195400, episode=1200 reward=0.80155 (476.67 it/sec) -training >> step=7195500, episode=1200 reward=0.7887658 (498.31 it/sec) -training >> step=7195600, episode=1200 reward=0.7908354 (506.02 it/sec) -training >> step=7195700, episode=1200 reward=0.7712479 (476.15 it/sec) -training >> step=7195800, episode=1200 reward=0.7717728 (495.03 it/sec) -training >> step=7195900, episode=1200 reward=0.7765335 (482.67 it/sec) -training >> step=7196000, episode=1200 reward=0.7986261 (545.88 it/sec) -training >> step=7196100, episode=1200 reward=0.7794133 (530.85 it/sec) -training >> step=7196200, episode=1200 reward=0.7603261 (506.66 it/sec) -training >> step=7196300, episode=1200 reward=0.7859469 (500.80 it/sec) -training >> step=7196400, episode=1200 reward=0.8082888 (414.89 it/sec) -training >> step=7196500, episode=1200 reward=0.7861205 (502.30 it/sec) -training >> step=7196600, episode=1200 reward=0.8033684 (502.93 it/sec) -training >> step=7196700, episode=1200 reward=0.7668251 (525.18 it/sec) -training >> step=7196800, episode=1200 reward=0.7576343 (514.53 it/sec) -training >> step=7196900, episode=1200 reward=0.7912971 (495.41 it/sec) -training >> step=7197000, episode=1200 reward=0.7789186 (500.92 it/sec) -training >> step=7197100, episode=1200 reward=0.7744542 (490.67 it/sec) -training >> step=7197200, episode=1200 reward=0.7937479 (483.63 it/sec) -training >> step=7197300, episode=1200 reward=0.7821904 (492.05 it/sec) -training >> step=7197400, episode=1200 reward=0.7786047 (524.70 it/sec) -training >> step=7197500, episode=1200 reward=0.7721981 (488.40 it/sec) -training >> step=7197600, episode=1200 reward=0.7797318 (519.76 it/sec) -training >> step=7197700, episode=1200 reward=0.7689058 (520.90 it/sec) -training >> step=7197800, episode=1200 reward=0.793437 (545.79 it/sec) -training >> step=7197900, episode=1200 reward=0.7718736 (486.92 it/sec) -training >> step=7198000, episode=1200 reward=0.7791596 (516.94 it/sec) -training >> step=7198100, episode=1200 reward=0.7643865 (528.19 it/sec) -training >> step=7198200, episode=1200 reward=0.7874292 (512.11 it/sec) -training >> step=7198300, episode=1200 reward=0.7922614 (508.42 it/sec) -training >> step=7198400, episode=1200 reward=0.7716498 (472.44 it/sec) -training >> step=7198500, episode=1200 reward=0.780525 (512.46 it/sec) -training >> step=7198600, episode=1200 reward=0.7689289 (490.23 it/sec) -training >> step=7198700, episode=1200 reward=0.7805751 (491.01 it/sec) -training >> step=7198800, episode=1200 reward=0.792973 (522.81 it/sec) -training >> step=7198900, episode=1200 reward=0.7719158 (466.25 it/sec) -training >> step=7199000, episode=1200 reward=0.7991719 (478.57 it/sec) -training >> step=7199100, episode=1200 reward=0.7647383 (499.53 it/sec) -training >> step=7199200, episode=1200 reward=0.7779058 (549.33 it/sec) -training >> step=7199300, episode=1201 reward=0.7796889 (120.76 it/sec) -training >> step=7199400, episode=1201 reward=0.7975299 (480.68 it/sec) -training >> step=7199500, episode=1201 reward=0.7743184 (507.59 it/sec) -training >> step=7199600, episode=1201 reward=0.7753668 (556.37 it/sec) -training >> step=7199700, episode=1201 reward=0.7769295 (516.66 it/sec) -training >> step=7199800, episode=1201 reward=0.7899812 (505.38 it/sec) -training >> step=7199900, episode=1201 reward=0.7926558 (494.87 it/sec) -training >> step=7200000, episode=1201 reward=0.7718143 (483.72 it/sec) -training >> step=7200100, episode=1201 reward=0.7848343 (513.67 it/sec) -training >> step=7200200, episode=1201 reward=0.8081775 (421.02 it/sec) -training >> step=7200300, episode=1201 reward=0.774211 (550.35 it/sec) -training >> step=7200400, episode=1201 reward=0.8026437 (511.77 it/sec) -training >> step=7200500, episode=1201 reward=0.7913777 (474.75 it/sec) -training >> step=7200600, episode=1201 reward=0.7781085 (550.18 it/sec) -training >> step=7200700, episode=1201 reward=0.8012053 (468.36 it/sec) -training >> step=7200800, episode=1201 reward=0.7702588 (381.95 it/sec) -training >> step=7200900, episode=1201 reward=0.8121474 (478.79 it/sec) -training >> step=7201000, episode=1201 reward=0.7957818 (509.56 it/sec) -training >> step=7201100, episode=1201 reward=0.7967369 (499.58 it/sec) -training >> step=7201200, episode=1201 reward=0.7864439 (496.92 it/sec) -training >> step=7201300, episode=1201 reward=0.7945553 (487.39 it/sec) -training >> step=7201400, episode=1201 reward=0.7813748 (565.70 it/sec) -training >> step=7201500, episode=1201 reward=0.7984338 (499.92 it/sec) -training >> step=7201600, episode=1201 reward=0.7911233 (506.33 it/sec) -training >> step=7201700, episode=1201 reward=0.7748302 (515.31 it/sec) -training >> step=7201800, episode=1201 reward=0.787621 (485.16 it/sec) -training >> step=7201900, episode=1201 reward=0.7986306 (516.45 it/sec) -training >> step=7202000, episode=1201 reward=0.8019453 (514.99 it/sec) -training >> step=7202100, episode=1201 reward=0.8038188 (538.07 it/sec) -training >> step=7202200, episode=1201 reward=0.7984748 (462.34 it/sec) -training >> step=7202300, episode=1201 reward=0.7918457 (489.84 it/sec) -training >> step=7202400, episode=1201 reward=0.7999713 (517.97 it/sec) -training >> step=7202500, episode=1201 reward=0.7748823 (512.02 it/sec) -training >> step=7202600, episode=1201 reward=0.7668292 (492.04 it/sec) -training >> step=7202700, episode=1201 reward=0.8118408 (495.59 it/sec) -training >> step=7202800, episode=1201 reward=0.787079 (506.57 it/sec) -training >> step=7202900, episode=1201 reward=0.7989053 (501.19 it/sec) -training >> step=7203000, episode=1201 reward=0.7930025 (489.18 it/sec) -training >> step=7203100, episode=1201 reward=0.7805983 (450.07 it/sec) -training >> step=7203200, episode=1201 reward=0.7967181 (528.95 it/sec) -training >> step=7203300, episode=1201 reward=0.7863883 (482.86 it/sec) -training >> step=7203400, episode=1201 reward=0.7991123 (501.61 it/sec) -training >> step=7203500, episode=1201 reward=0.778675 (534.16 it/sec) -training >> step=7203600, episode=1201 reward=0.7967705 (510.80 it/sec) -training >> step=7203700, episode=1201 reward=0.8036841 (500.57 it/sec) -training >> step=7203800, episode=1201 reward=0.7711628 (475.85 it/sec) -training >> step=7203900, episode=1201 reward=0.7838351 (503.69 it/sec) -training >> step=7204000, episode=1201 reward=0.7999894 (438.32 it/sec) -training >> step=7204100, episode=1201 reward=0.7852723 (481.45 it/sec) -training >> step=7204200, episode=1201 reward=0.795647 (507.31 it/sec) -training >> step=7204300, episode=1201 reward=0.7728168 (497.86 it/sec) -training >> step=7204400, episode=1201 reward=0.7716626 (484.22 it/sec) -training >> step=7204500, episode=1201 reward=0.7834123 (501.26 it/sec) -training >> step=7204600, episode=1201 reward=0.781511 (538.46 it/sec) -training >> step=7204700, episode=1201 reward=0.7923999 (511.93 it/sec) -training >> step=7204800, episode=1201 reward=0.7681502 (470.01 it/sec) -training >> step=7204900, episode=1201 reward=0.7701212 (476.62 it/sec) -training >> step=7205000, episode=1201 reward=0.7733196 (495.55 it/sec) -training >> step=7205100, episode=1201 reward=0.7588157 (504.81 it/sec) -training >> step=7205200, episode=1201 reward=0.7728879 (446.79 it/sec) -training >> step=7205300, episode=1202 reward=0.7703576 (69.67 it/sec) -training >> step=7205400, episode=1202 reward=0.7997699 (480.08 it/sec) -training >> step=7205500, episode=1202 reward=0.7807007 (487.04 it/sec) -training >> step=7205600, episode=1202 reward=0.7940007 (525.03 it/sec) -training >> step=7205700, episode=1202 reward=0.7847071 (487.60 it/sec) -training >> step=7205800, episode=1202 reward=0.7906609 (457.92 it/sec) -training >> step=7205900, episode=1202 reward=0.7867513 (531.45 it/sec) -training >> step=7206000, episode=1202 reward=0.7956533 (445.42 it/sec) -training >> step=7206100, episode=1202 reward=0.7796328 (502.34 it/sec) -training >> step=7206200, episode=1202 reward=0.7850778 (498.55 it/sec) -training >> step=7206300, episode=1202 reward=0.791769 (497.59 it/sec) -training >> step=7206400, episode=1202 reward=0.7855415 (466.81 it/sec) -training >> step=7206500, episode=1202 reward=0.8001716 (462.85 it/sec) -training >> step=7206600, episode=1202 reward=0.7734409 (484.78 it/sec) -training >> step=7206700, episode=1202 reward=0.7871733 (455.57 it/sec) -training >> step=7206800, episode=1202 reward=0.7908322 (458.96 it/sec) -training >> step=7206900, episode=1202 reward=0.786318 (380.31 it/sec) -training >> step=7207000, episode=1202 reward=0.8081042 (423.02 it/sec) -training >> step=7207100, episode=1202 reward=0.7945538 (478.77 it/sec) -training >> step=7207200, episode=1202 reward=0.7778894 (509.02 it/sec) -training >> step=7207300, episode=1202 reward=0.7766148 (466.23 it/sec) -training >> step=7207400, episode=1202 reward=0.7888998 (451.46 it/sec) -training >> step=7207500, episode=1202 reward=0.7733489 (484.78 it/sec) -training >> step=7207600, episode=1202 reward=0.8003551 (527.39 it/sec) -training >> step=7207700, episode=1202 reward=0.7977027 (503.18 it/sec) -training >> step=7207800, episode=1202 reward=0.7915524 (500.24 it/sec) -training >> step=7207900, episode=1202 reward=0.768711 (453.79 it/sec) -training >> step=7208000, episode=1202 reward=0.796764 (481.77 it/sec) -training >> step=7208100, episode=1202 reward=0.7886398 (463.94 it/sec) -training >> step=7208200, episode=1202 reward=0.7850508 (491.11 it/sec) -training >> step=7208300, episode=1202 reward=0.770502 (500.37 it/sec) -training >> step=7208400, episode=1202 reward=0.7830154 (494.02 it/sec) -training >> step=7208500, episode=1202 reward=0.7991572 (468.83 it/sec) -training >> step=7208600, episode=1202 reward=0.7920228 (492.14 it/sec) -training >> step=7208700, episode=1202 reward=0.7768314 (487.80 it/sec) -training >> step=7208800, episode=1202 reward=0.7921465 (455.19 it/sec) -training >> step=7208900, episode=1202 reward=0.7687693 (451.18 it/sec) -training >> step=7209000, episode=1202 reward=0.7765812 (488.29 it/sec) -training >> step=7209100, episode=1202 reward=0.7917217 (523.63 it/sec) -training >> step=7209200, episode=1202 reward=0.7898554 (485.80 it/sec) -training >> step=7209300, episode=1202 reward=0.7941164 (478.61 it/sec) -training >> step=7209400, episode=1202 reward=0.7980542 (524.13 it/sec) -training >> step=7209500, episode=1202 reward=0.7711458 (501.45 it/sec) -training >> step=7209600, episode=1202 reward=0.7718343 (405.92 it/sec) -training >> step=7209700, episode=1202 reward=0.7794188 (483.12 it/sec) -training >> step=7209800, episode=1202 reward=0.7726202 (487.51 it/sec) -training >> step=7209900, episode=1202 reward=0.7747235 (470.92 it/sec) -training >> step=7210000, episode=1202 reward=0.771603 (492.58 it/sec) -training >> step=7210100, episode=1202 reward=0.7906685 (491.84 it/sec) -training >> step=7210200, episode=1202 reward=0.7840233 (517.28 it/sec) -training >> step=7210300, episode=1202 reward=0.7852351 (487.83 it/sec) -training >> step=7210400, episode=1202 reward=0.7912935 (514.42 it/sec) -training >> step=7210500, episode=1202 reward=0.7988303 (522.33 it/sec) -training >> step=7210600, episode=1202 reward=0.7918999 (490.70 it/sec) -training >> step=7210700, episode=1202 reward=0.7787292 (494.82 it/sec) -training >> step=7210800, episode=1202 reward=0.7798051 (495.47 it/sec) -training >> step=7210900, episode=1202 reward=0.7884005 (514.41 it/sec) -training >> step=7211000, episode=1202 reward=0.7767535 (479.14 it/sec) -training >> step=7211100, episode=1202 reward=0.7991868 (467.31 it/sec) -training >> step=7211200, episode=1202 reward=0.7774512 (509.16 it/sec) -training >> step=7211300, episode=1203 reward=0.7761 (115.17 it/sec) -training >> step=7211400, episode=1203 reward=0.7571276 (481.89 it/sec) -training >> step=7211500, episode=1203 reward=0.7899873 (483.90 it/sec) -training >> step=7211600, episode=1203 reward=0.7807745 (492.45 it/sec) -training >> step=7211700, episode=1203 reward=0.7861272 (488.46 it/sec) -training >> step=7211800, episode=1203 reward=0.7776299 (452.20 it/sec) -training >> step=7211900, episode=1203 reward=0.7675656 (463.80 it/sec) -training >> step=7212000, episode=1203 reward=0.7874286 (511.91 it/sec) -training >> step=7212100, episode=1203 reward=0.7883206 (485.15 it/sec) -training >> step=7212200, episode=1203 reward=0.772506 (493.54 it/sec) -training >> step=7212300, episode=1203 reward=0.7725031 (480.02 it/sec) -training >> step=7212400, episode=1203 reward=0.7975094 (435.11 it/sec) -training >> step=7212500, episode=1203 reward=0.7746596 (468.42 it/sec) -training >> step=7212600, episode=1203 reward=0.7849028 (471.13 it/sec) -training >> step=7212700, episode=1203 reward=0.7931271 (524.93 it/sec) -training >> step=7212800, episode=1203 reward=0.7998716 (474.79 it/sec) -training >> step=7212900, episode=1203 reward=0.8053907 (433.91 it/sec) -training >> step=7213000, episode=1203 reward=0.8025898 (507.79 it/sec) -training >> step=7213100, episode=1203 reward=0.7925236 (361.52 it/sec) -training >> step=7213200, episode=1203 reward=0.7735771 (486.69 it/sec) -training >> step=7213300, episode=1203 reward=0.8110731 (484.69 it/sec) -training >> step=7213400, episode=1203 reward=0.7782577 (439.29 it/sec) -training >> step=7213500, episode=1203 reward=0.7919813 (485.80 it/sec) -training >> step=7213600, episode=1203 reward=0.7961403 (456.78 it/sec) -training >> step=7213700, episode=1203 reward=0.7748002 (485.91 it/sec) -training >> step=7213800, episode=1203 reward=0.802834 (476.38 it/sec) -training >> step=7213900, episode=1203 reward=0.7854386 (497.98 it/sec) -training >> step=7214000, episode=1203 reward=0.8096139 (447.33 it/sec) -training >> step=7214100, episode=1203 reward=0.7848379 (515.37 it/sec) -training >> step=7214200, episode=1203 reward=0.7967628 (476.29 it/sec) -training >> step=7214300, episode=1203 reward=0.7735929 (478.17 it/sec) -training >> step=7214400, episode=1203 reward=0.7898036 (492.97 it/sec) -training >> step=7214500, episode=1203 reward=0.7673641 (489.94 it/sec) -training >> step=7214600, episode=1203 reward=0.7979245 (512.72 it/sec) -training >> step=7214700, episode=1203 reward=0.7919584 (465.95 it/sec) -training >> step=7214800, episode=1203 reward=0.8076455 (454.54 it/sec) -training >> step=7214900, episode=1203 reward=0.7718288 (504.67 it/sec) -training >> step=7215000, episode=1203 reward=0.7816621 (461.98 it/sec) -training >> step=7215100, episode=1203 reward=0.7895836 (453.32 it/sec) -training >> step=7215200, episode=1203 reward=0.8116851 (523.09 it/sec) -training >> step=7215300, episode=1203 reward=0.7897111 (465.54 it/sec) -training >> step=7215400, episode=1203 reward=0.7979957 (481.92 it/sec) -training >> step=7215500, episode=1203 reward=0.7701792 (509.46 it/sec) -training >> step=7215600, episode=1203 reward=0.7857292 (482.22 it/sec) -training >> step=7215700, episode=1203 reward=0.7905695 (509.67 it/sec) -training >> step=7215800, episode=1203 reward=0.7981283 (448.18 it/sec) -training >> step=7215900, episode=1203 reward=0.7975326 (531.80 it/sec) -training >> step=7216000, episode=1203 reward=0.8055122 (429.59 it/sec) -training >> step=7216100, episode=1203 reward=0.7840393 (492.97 it/sec) -training >> step=7216200, episode=1203 reward=0.7879686 (448.09 it/sec) -training >> step=7216300, episode=1203 reward=0.7784663 (512.46 it/sec) -training >> step=7216400, episode=1203 reward=0.7910526 (460.94 it/sec) -training >> step=7216500, episode=1203 reward=0.7883913 (500.42 it/sec) -training >> step=7216600, episode=1203 reward=0.7882493 (507.07 it/sec) -training >> step=7216700, episode=1203 reward=0.7737837 (442.84 it/sec) -training >> step=7216800, episode=1203 reward=0.7878796 (434.66 it/sec) -training >> step=7216900, episode=1203 reward=0.7810275 (497.96 it/sec) -training >> step=7217000, episode=1203 reward=0.790199 (491.23 it/sec) -training >> step=7217100, episode=1203 reward=0.7925735 (477.78 it/sec) -training >> step=7217200, episode=1203 reward=0.7847223 (499.48 it/sec) -training >> step=7217300, episode=1204 reward=0.7810353 (98.35 it/sec) -training >> step=7217400, episode=1204 reward=0.77314 (483.36 it/sec) -training >> step=7217500, episode=1204 reward=0.7677959 (465.07 it/sec) -training >> step=7217600, episode=1204 reward=0.7836168 (480.18 it/sec) -training >> step=7217700, episode=1204 reward=0.7688316 (477.19 it/sec) -training >> step=7217800, episode=1204 reward=0.7855003 (485.32 it/sec) -training >> step=7217900, episode=1204 reward=0.7696221 (502.27 it/sec) -training >> step=7218000, episode=1204 reward=0.7889993 (519.33 it/sec) -training >> step=7218100, episode=1204 reward=0.7797306 (495.95 it/sec) -training >> step=7218200, episode=1204 reward=0.7770398 (465.09 it/sec) -training >> step=7218300, episode=1204 reward=0.7979084 (478.20 it/sec) -training >> step=7218400, episode=1204 reward=0.7979521 (415.70 it/sec) -training >> step=7218500, episode=1204 reward=0.8022664 (505.15 it/sec) -training >> step=7218600, episode=1204 reward=0.7948081 (509.13 it/sec) -training >> step=7218700, episode=1204 reward=0.779069 (500.71 it/sec) -training >> step=7218800, episode=1204 reward=0.7775688 (467.90 it/sec) -training >> step=7218900, episode=1204 reward=0.7876713 (518.74 it/sec) -training >> step=7219000, episode=1204 reward=0.7760017 (494.72 it/sec) -training >> step=7219100, episode=1204 reward=0.7922865 (504.12 it/sec) -training >> step=7219200, episode=1204 reward=0.7942563 (509.72 it/sec) -training >> step=7219300, episode=1204 reward=0.7883778 (382.27 it/sec) -training >> step=7219400, episode=1204 reward=0.8108304 (455.02 it/sec) -training >> step=7219500, episode=1204 reward=0.7682657 (456.40 it/sec) -training >> step=7219600, episode=1204 reward=0.8157339 (411.85 it/sec) -training >> step=7219700, episode=1204 reward=0.8036619 (441.84 it/sec) -training >> step=7219800, episode=1204 reward=0.7950023 (421.12 it/sec) -training >> step=7219900, episode=1204 reward=0.8108984 (481.21 it/sec) -training >> step=7220000, episode=1204 reward=0.7875777 (528.36 it/sec) -training >> step=7220100, episode=1204 reward=0.792603 (492.35 it/sec) -training >> step=7220200, episode=1204 reward=0.7707958 (471.54 it/sec) -training >> step=7220300, episode=1204 reward=0.7945201 (491.46 it/sec) -training >> step=7220400, episode=1204 reward=0.770843 (483.67 it/sec) -training >> step=7220500, episode=1204 reward=0.7852725 (449.36 it/sec) -training >> step=7220600, episode=1204 reward=0.7911122 (453.22 it/sec) -training >> step=7220700, episode=1204 reward=0.7957886 (503.75 it/sec) -training >> step=7220800, episode=1204 reward=0.7808409 (440.75 it/sec) -training >> step=7220900, episode=1204 reward=0.7813852 (495.04 it/sec) -training >> step=7221000, episode=1204 reward=0.7723055 (497.14 it/sec) -training >> step=7221100, episode=1204 reward=0.7860854 (500.01 it/sec) -training >> step=7221200, episode=1204 reward=0.7857618 (509.60 it/sec) -training >> step=7221300, episode=1204 reward=0.7686271 (490.56 it/sec) -training >> step=7221400, episode=1204 reward=0.7842426 (549.93 it/sec) -training >> step=7221500, episode=1204 reward=0.7856124 (541.13 it/sec) -training >> step=7221600, episode=1204 reward=0.7787216 (495.61 it/sec) -training >> step=7221700, episode=1204 reward=0.8021008 (498.38 it/sec) -training >> step=7221800, episode=1204 reward=0.7800744 (533.74 it/sec) -training >> step=7221900, episode=1204 reward=0.7816648 (499.11 it/sec) -training >> step=7222000, episode=1204 reward=0.788387 (529.65 it/sec) -training >> step=7222100, episode=1204 reward=0.7949455 (486.60 it/sec) -training >> step=7222200, episode=1204 reward=0.783161 (536.86 it/sec) -training >> step=7222300, episode=1204 reward=0.7890241 (508.04 it/sec) -training >> step=7222400, episode=1204 reward=0.7717327 (507.20 it/sec) -training >> step=7222500, episode=1204 reward=0.7714792 (498.16 it/sec) -training >> step=7222600, episode=1204 reward=0.7827696 (516.62 it/sec) -training >> step=7222700, episode=1204 reward=0.7918879 (493.71 it/sec) -training >> step=7222800, episode=1204 reward=0.7896123 (505.48 it/sec) -training >> step=7222900, episode=1204 reward=0.7912089 (517.66 it/sec) -training >> step=7223000, episode=1204 reward=0.7771634 (513.51 it/sec) -training >> step=7223100, episode=1204 reward=0.788905 (511.20 it/sec) -training >> step=7223200, episode=1204 reward=0.7730084 (533.13 it/sec) -training >> step=7223300, episode=1205 reward=0.78409 (70.85 it/sec) -training >> step=7223400, episode=1205 reward=0.8029388 (471.12 it/sec) -training >> step=7223500, episode=1205 reward=0.7870044 (502.28 it/sec) -training >> step=7223600, episode=1205 reward=0.7922244 (538.33 it/sec) -training >> step=7223700, episode=1205 reward=0.7585208 (508.50 it/sec) -training >> step=7223800, episode=1205 reward=0.7579345 (528.28 it/sec) -training >> step=7223900, episode=1205 reward=0.7929012 (538.22 it/sec) -training >> step=7224000, episode=1205 reward=0.7932975 (531.90 it/sec) -training >> step=7224100, episode=1205 reward=0.8052143 (521.12 it/sec) -training >> step=7224200, episode=1205 reward=0.7891675 (518.10 it/sec) -training >> step=7224300, episode=1205 reward=0.7710897 (492.63 it/sec) -training >> step=7224400, episode=1205 reward=0.7710683 (534.28 it/sec) -training >> step=7224500, episode=1205 reward=0.7900606 (539.16 it/sec) -training >> step=7224600, episode=1205 reward=0.7826406 (494.17 it/sec) -training >> step=7224700, episode=1205 reward=0.7888243 (522.31 it/sec) -training >> step=7224800, episode=1205 reward=0.7860132 (508.97 it/sec) -training >> step=7224900, episode=1205 reward=0.7855984 (511.73 it/sec) -training >> step=7225000, episode=1205 reward=0.783851 (491.00 it/sec) -training >> step=7225100, episode=1205 reward=0.7912324 (503.46 it/sec) -training >> step=7225200, episode=1205 reward=0.8089488 (546.03 it/sec) -training >> step=7225300, episode=1205 reward=0.7860611 (475.53 it/sec) -training >> step=7225400, episode=1205 reward=0.7971379 (507.20 it/sec) -training >> step=7225500, episode=1205 reward=0.7814714 (558.07 it/sec) -training >> step=7225600, episode=1205 reward=0.7563173 (374.52 it/sec) -training >> step=7225700, episode=1205 reward=0.7924005 (480.86 it/sec) -training >> step=7225800, episode=1205 reward=0.7849681 (496.46 it/sec) -training >> step=7225900, episode=1205 reward=0.7991497 (480.30 it/sec) -training >> step=7226000, episode=1205 reward=0.7987738 (477.36 it/sec) -training >> step=7226100, episode=1205 reward=0.8056702 (507.20 it/sec) -training >> step=7226200, episode=1205 reward=0.7958723 (513.39 it/sec) -training >> step=7226300, episode=1205 reward=0.7693238 (543.40 it/sec) -training >> step=7226400, episode=1205 reward=0.7872712 (473.45 it/sec) -training >> step=7226500, episode=1205 reward=0.7791403 (481.56 it/sec) -training >> step=7226600, episode=1205 reward=0.7825974 (500.58 it/sec) -training >> step=7226700, episode=1205 reward=0.7716828 (489.16 it/sec) -training >> step=7226800, episode=1205 reward=0.7994931 (472.61 it/sec) -training >> step=7226900, episode=1205 reward=0.8052668 (448.57 it/sec) -training >> step=7227000, episode=1205 reward=0.7912349 (538.61 it/sec) -training >> step=7227100, episode=1205 reward=0.7933154 (522.86 it/sec) -training >> step=7227200, episode=1205 reward=0.7887306 (490.47 it/sec) -training >> step=7227300, episode=1205 reward=0.7973365 (434.99 it/sec) -training >> step=7227400, episode=1205 reward=0.7792711 (405.91 it/sec) -training >> step=7227500, episode=1205 reward=0.788833 (433.92 it/sec) -training >> step=7227600, episode=1205 reward=0.7840907 (468.29 it/sec) -training >> step=7227700, episode=1205 reward=0.7700506 (492.97 it/sec) -training >> step=7227800, episode=1205 reward=0.7961379 (459.58 it/sec) -training >> step=7227900, episode=1205 reward=0.7942604 (485.25 it/sec) -training >> step=7228000, episode=1205 reward=0.7815918 (481.36 it/sec) -training >> step=7228100, episode=1205 reward=0.7678278 (504.57 it/sec) -training >> step=7228200, episode=1205 reward=0.7902423 (494.88 it/sec) -training >> step=7228300, episode=1205 reward=0.7880144 (486.14 it/sec) -training >> step=7228400, episode=1205 reward=0.7658241 (518.23 it/sec) -training >> step=7228500, episode=1205 reward=0.7799977 (525.18 it/sec) -training >> step=7228600, episode=1205 reward=0.7745742 (518.87 it/sec) -training >> step=7228700, episode=1205 reward=0.7722392 (527.68 it/sec) -training >> step=7228800, episode=1205 reward=0.78268 (527.33 it/sec) -training >> step=7228900, episode=1205 reward=0.7764854 (519.29 it/sec) -training >> step=7229000, episode=1205 reward=0.7861555 (532.00 it/sec) -training >> step=7229100, episode=1205 reward=0.8006036 (504.76 it/sec) -training >> step=7229200, episode=1205 reward=0.7873904 (549.07 it/sec) -training >> step=7229300, episode=1206 reward=0.7665722 (90.57 it/sec) -training >> step=7229400, episode=1206 reward=0.7989267 (506.20 it/sec) -training >> step=7229500, episode=1206 reward=0.7747773 (497.86 it/sec) -training >> step=7229600, episode=1206 reward=0.8000324 (521.95 it/sec) -training >> step=7229700, episode=1206 reward=0.7710413 (519.71 it/sec) -training >> step=7229800, episode=1206 reward=0.8072791 (515.54 it/sec) -training >> step=7229900, episode=1206 reward=0.7828416 (540.46 it/sec) -training >> step=7230000, episode=1206 reward=0.7803388 (504.93 it/sec) -training >> step=7230100, episode=1206 reward=0.7680122 (504.40 it/sec) -training >> step=7230200, episode=1206 reward=0.7935877 (514.04 it/sec) -training >> step=7230300, episode=1206 reward=0.782116 (530.99 it/sec) -training >> step=7230400, episode=1206 reward=0.7906295 (529.80 it/sec) -training >> step=7230500, episode=1206 reward=0.7804706 (513.17 it/sec) -training >> step=7230600, episode=1206 reward=0.8005466 (494.13 it/sec) -training >> step=7230700, episode=1206 reward=0.7898603 (519.58 it/sec) -training >> step=7230800, episode=1206 reward=0.7783785 (516.83 it/sec) -training >> step=7230900, episode=1206 reward=0.7870882 (534.06 it/sec) -training >> step=7231000, episode=1206 reward=0.7658384 (532.67 it/sec) -training >> step=7231100, episode=1206 reward=0.7882838 (491.35 it/sec) -training >> step=7231200, episode=1206 reward=0.7833703 (549.05 it/sec) -training >> step=7231300, episode=1206 reward=0.7986133 (514.78 it/sec) -training >> step=7231400, episode=1206 reward=0.7722996 (470.87 it/sec) -training >> step=7231500, episode=1206 reward=0.8035649 (540.83 it/sec) -training >> step=7231600, episode=1206 reward=0.7867988 (533.26 it/sec) -training >> step=7231700, episode=1206 reward=0.800491 (537.28 it/sec) -training >> step=7231800, episode=1206 reward=0.801508 (375.09 it/sec) -training >> step=7231900, episode=1206 reward=0.7857545 (531.75 it/sec) -training >> step=7232000, episode=1206 reward=0.7889016 (516.35 it/sec) -training >> step=7232100, episode=1206 reward=0.7743011 (475.43 it/sec) -training >> step=7232200, episode=1206 reward=0.7941655 (547.41 it/sec) -training >> step=7232300, episode=1206 reward=0.7836322 (522.51 it/sec) -training >> step=7232400, episode=1206 reward=0.7824352 (520.20 it/sec) -training >> step=7232500, episode=1206 reward=0.7898684 (496.03 it/sec) -training >> step=7232600, episode=1206 reward=0.7806547 (519.16 it/sec) -training >> step=7232700, episode=1206 reward=0.8010148 (517.18 it/sec) -training >> step=7232800, episode=1206 reward=0.8119336 (498.43 it/sec) -training >> step=7232900, episode=1206 reward=0.793624 (525.88 it/sec) -training >> step=7233000, episode=1206 reward=0.7724087 (539.42 it/sec) -training >> step=7233100, episode=1206 reward=0.7926524 (520.68 it/sec) -training >> step=7233200, episode=1206 reward=0.7835792 (513.79 it/sec) -training >> step=7233300, episode=1206 reward=0.7771376 (535.52 it/sec) -training >> step=7233400, episode=1206 reward=0.7872222 (520.76 it/sec) -training >> step=7233500, episode=1206 reward=0.7769303 (544.76 it/sec) -training >> step=7233600, episode=1206 reward=0.7816738 (516.81 it/sec) -training >> step=7233700, episode=1206 reward=0.7871715 (529.87 it/sec) -training >> step=7233800, episode=1206 reward=0.7821669 (518.41 it/sec) -training >> step=7233900, episode=1206 reward=0.7860804 (490.47 it/sec) -training >> step=7234000, episode=1206 reward=0.7894672 (538.91 it/sec) -training >> step=7234100, episode=1206 reward=0.7928574 (523.95 it/sec) -training >> step=7234200, episode=1206 reward=0.8041703 (513.63 it/sec) -training >> step=7234300, episode=1206 reward=0.7694064 (528.50 it/sec) -training >> step=7234400, episode=1206 reward=0.7903699 (550.67 it/sec) -training >> step=7234500, episode=1206 reward=0.7823543 (484.84 it/sec) -training >> step=7234600, episode=1206 reward=0.7950486 (495.93 it/sec) -training >> step=7234700, episode=1206 reward=0.7767045 (511.33 it/sec) -training >> step=7234800, episode=1206 reward=0.7915554 (531.90 it/sec) -training >> step=7234900, episode=1206 reward=0.7846398 (514.62 it/sec) -training >> step=7235000, episode=1206 reward=0.7980708 (515.37 it/sec) -training >> step=7235100, episode=1206 reward=0.7775249 (518.68 it/sec) -training >> step=7235200, episode=1206 reward=0.8134644 (516.64 it/sec) -training >> step=7235300, episode=1207 reward=0.799408 (121.57 it/sec) -training >> step=7235400, episode=1207 reward=0.7950167 (499.71 it/sec) -training >> step=7235500, episode=1207 reward=0.8124344 (529.55 it/sec) -training >> step=7235600, episode=1207 reward=0.7734768 (534.38 it/sec) -training >> step=7235700, episode=1207 reward=0.8077542 (488.03 it/sec) -training >> step=7235800, episode=1207 reward=0.7982793 (544.92 it/sec) -training >> step=7235900, episode=1207 reward=0.795509 (519.41 it/sec) -training >> step=7236000, episode=1207 reward=0.7632256 (497.66 it/sec) -training >> step=7236100, episode=1207 reward=0.7943067 (523.39 it/sec) -training >> step=7236200, episode=1207 reward=0.792188 (533.91 it/sec) -training >> step=7236300, episode=1207 reward=0.8038462 (526.23 it/sec) -training >> step=7236400, episode=1207 reward=0.771408 (525.52 it/sec) -training >> step=7236500, episode=1207 reward=0.7853343 (505.65 it/sec) -training >> step=7236600, episode=1207 reward=0.7906697 (539.14 it/sec) -training >> step=7236700, episode=1207 reward=0.7801133 (545.28 it/sec) -training >> step=7236800, episode=1207 reward=0.7768939 (504.16 it/sec) -training >> step=7236900, episode=1207 reward=0.8046266 (495.77 it/sec) -training >> step=7237000, episode=1207 reward=0.8239942 (473.40 it/sec) -training >> step=7237100, episode=1207 reward=0.7858304 (467.46 it/sec) -training >> step=7237200, episode=1207 reward=0.7900791 (514.75 it/sec) -training >> step=7237300, episode=1207 reward=0.8024016 (509.18 it/sec) -training >> step=7237400, episode=1207 reward=0.8002962 (479.39 it/sec) -training >> step=7237500, episode=1207 reward=0.7817871 (456.94 it/sec) -training >> step=7237600, episode=1207 reward=0.7746969 (519.88 it/sec) -training >> step=7237700, episode=1207 reward=0.7872056 (489.21 it/sec) -training >> step=7237800, episode=1207 reward=0.7759066 (367.24 it/sec) -training >> step=7237900, episode=1207 reward=0.7626535 (507.76 it/sec) -training >> step=7238000, episode=1207 reward=0.7656375 (533.87 it/sec) -training >> step=7238100, episode=1207 reward=0.8006685 (514.83 it/sec) -training >> step=7238200, episode=1207 reward=0.7748093 (523.88 it/sec) -training >> step=7238300, episode=1207 reward=0.8025329 (504.43 it/sec) -training >> step=7238400, episode=1207 reward=0.7915668 (523.01 it/sec) -training >> step=7238500, episode=1207 reward=0.7896678 (476.26 it/sec) -training >> step=7238600, episode=1207 reward=0.7826887 (519.79 it/sec) -training >> step=7238700, episode=1207 reward=0.8089115 (508.03 it/sec) -training >> step=7238800, episode=1207 reward=0.8011755 (508.79 it/sec) -training >> step=7238900, episode=1207 reward=0.7782602 (503.56 it/sec) -training >> step=7239000, episode=1207 reward=0.7684785 (540.85 it/sec) -training >> step=7239100, episode=1207 reward=0.7902695 (529.20 it/sec) -training >> step=7239200, episode=1207 reward=0.7838809 (519.71 it/sec) -training >> step=7239300, episode=1207 reward=0.7966148 (538.50 it/sec) -training >> step=7239400, episode=1207 reward=0.7907168 (509.73 it/sec) -training >> step=7239500, episode=1207 reward=0.7803478 (522.65 it/sec) -training >> step=7239600, episode=1207 reward=0.7813436 (506.91 it/sec) -training >> step=7239700, episode=1207 reward=0.7773898 (433.22 it/sec) -training >> step=7239800, episode=1207 reward=0.8020999 (501.69 it/sec) -training >> step=7239900, episode=1207 reward=0.7844544 (488.10 it/sec) -training >> step=7240000, episode=1207 reward=0.7675791 (451.19 it/sec) -training >> step=7240100, episode=1207 reward=0.7871195 (495.05 it/sec) -training >> step=7240200, episode=1207 reward=0.7851277 (495.91 it/sec) -training >> step=7240300, episode=1207 reward=0.7717903 (519.27 it/sec) -training >> step=7240400, episode=1207 reward=0.7797698 (508.13 it/sec) -training >> step=7240500, episode=1207 reward=0.7920592 (524.69 it/sec) -training >> step=7240600, episode=1207 reward=0.7912187 (486.49 it/sec) -training >> step=7240700, episode=1207 reward=0.7947186 (514.05 it/sec) -training >> step=7240800, episode=1207 reward=0.7855696 (482.75 it/sec) -training >> step=7240900, episode=1207 reward=0.7723397 (493.70 it/sec) -training >> step=7241000, episode=1207 reward=0.7917176 (511.63 it/sec) -training >> step=7241100, episode=1207 reward=0.76641 (492.35 it/sec) -training >> step=7241200, episode=1207 reward=0.798034 (493.10 it/sec) -training >> step=7241300, episode=1208 reward=0.7886112 (116.92 it/sec) -training >> step=7241400, episode=1208 reward=0.7785633 (518.89 it/sec) -training >> step=7241500, episode=1208 reward=0.7872185 (509.19 it/sec) -training >> step=7241600, episode=1208 reward=0.7804834 (512.87 it/sec) -training >> step=7241700, episode=1208 reward=0.8060996 (539.27 it/sec) -training >> step=7241800, episode=1208 reward=0.7781133 (458.66 it/sec) -training >> step=7241900, episode=1208 reward=0.80102 (512.24 it/sec) -training >> step=7242000, episode=1208 reward=0.7898633 (502.84 it/sec) -training >> step=7242100, episode=1208 reward=0.788917 (510.69 it/sec) -training >> step=7242200, episode=1208 reward=0.7784958 (511.86 it/sec) -training >> step=7242300, episode=1208 reward=0.7740787 (453.76 it/sec) -training >> step=7242400, episode=1208 reward=0.802638 (490.71 it/sec) -training >> step=7242500, episode=1208 reward=0.7720197 (481.01 it/sec) -training >> step=7242600, episode=1208 reward=0.804723 (519.43 it/sec) -training >> step=7242700, episode=1208 reward=0.7950323 (498.20 it/sec) -training >> step=7242800, episode=1208 reward=0.7975854 (393.98 it/sec) -training >> step=7242900, episode=1208 reward=0.7946262 (402.85 it/sec) -training >> step=7243000, episode=1208 reward=0.792685 (453.40 it/sec) -training >> step=7243100, episode=1208 reward=0.7974312 (407.80 it/sec) -training >> step=7243200, episode=1208 reward=0.7928407 (438.10 it/sec) -training >> step=7243300, episode=1208 reward=0.7828941 (448.50 it/sec) -training >> step=7243400, episode=1208 reward=0.7928177 (530.00 it/sec) -training >> step=7243500, episode=1208 reward=0.7872094 (499.63 it/sec) -training >> step=7243600, episode=1208 reward=0.7803631 (509.86 it/sec) -training >> step=7243700, episode=1208 reward=0.8111913 (473.75 it/sec) -training >> step=7243800, episode=1208 reward=0.7903461 (512.05 it/sec) -training >> step=7243900, episode=1208 reward=0.8011429 (351.77 it/sec) -training >> step=7244000, episode=1208 reward=0.7675141 (509.87 it/sec) -training >> step=7244100, episode=1208 reward=0.8064983 (519.97 it/sec) -training >> step=7244200, episode=1208 reward=0.7755789 (503.30 it/sec) -training >> step=7244300, episode=1208 reward=0.7833955 (512.44 it/sec) -training >> step=7244400, episode=1208 reward=0.7968699 (471.02 it/sec) -training >> step=7244500, episode=1208 reward=0.7861832 (474.22 it/sec) -training >> step=7244600, episode=1208 reward=0.7711124 (433.74 it/sec) -training >> step=7244700, episode=1208 reward=0.7736934 (480.66 it/sec) -training >> step=7244800, episode=1208 reward=0.7728732 (500.13 it/sec) -training >> step=7244900, episode=1208 reward=0.7841314 (520.71 it/sec) -training >> step=7245000, episode=1208 reward=0.7932366 (511.19 it/sec) -training >> step=7245100, episode=1208 reward=0.7835809 (497.69 it/sec) -training >> step=7245200, episode=1208 reward=0.7855981 (517.54 it/sec) -training >> step=7245300, episode=1208 reward=0.8012083 (488.29 it/sec) -training >> step=7245400, episode=1208 reward=0.7780483 (498.63 it/sec) -training >> step=7245500, episode=1208 reward=0.7567884 (522.87 it/sec) -training >> step=7245600, episode=1208 reward=0.7921647 (521.37 it/sec) -training >> step=7245700, episode=1208 reward=0.7834648 (506.60 it/sec) -training >> step=7245800, episode=1208 reward=0.7812485 (495.79 it/sec) -training >> step=7245900, episode=1208 reward=0.7881889 (495.49 it/sec) -training >> step=7246000, episode=1208 reward=0.7723596 (480.55 it/sec) -training >> step=7246100, episode=1208 reward=0.7657532 (477.96 it/sec) -training >> step=7246200, episode=1208 reward=0.7797514 (428.18 it/sec) -training >> step=7246300, episode=1208 reward=0.7812971 (446.60 it/sec) -training >> step=7246400, episode=1208 reward=0.7746392 (504.34 it/sec) -training >> step=7246500, episode=1208 reward=0.7784845 (493.14 it/sec) -training >> step=7246600, episode=1208 reward=0.7737857 (499.86 it/sec) -training >> step=7246700, episode=1208 reward=0.780863 (473.29 it/sec) -training >> step=7246800, episode=1208 reward=0.7970574 (483.52 it/sec) -training >> step=7246900, episode=1208 reward=0.7925768 (445.79 it/sec) -training >> step=7247000, episode=1208 reward=0.7894637 (463.24 it/sec) -training >> step=7247100, episode=1208 reward=0.7955492 (492.00 it/sec) -training >> step=7247200, episode=1208 reward=0.7754174 (453.59 it/sec) -training >> step=7247300, episode=1209 reward=0.779471 (52.43 it/sec) -training >> step=7247400, episode=1209 reward=0.7896391 (513.58 it/sec) -training >> step=7247500, episode=1209 reward=0.7663901 (477.67 it/sec) -training >> step=7247600, episode=1209 reward=0.7804896 (473.58 it/sec) -training >> step=7247700, episode=1209 reward=0.7831092 (492.83 it/sec) -training >> step=7247800, episode=1209 reward=0.7985795 (508.01 it/sec) -training >> step=7247900, episode=1209 reward=0.7849721 (501.83 it/sec) -training >> step=7248000, episode=1209 reward=0.7739861 (475.45 it/sec) -training >> step=7248100, episode=1209 reward=0.771598 (469.56 it/sec) -training >> step=7248200, episode=1209 reward=0.7925893 (531.22 it/sec) -training >> step=7248300, episode=1209 reward=0.7941476 (502.00 it/sec) -training >> step=7248400, episode=1209 reward=0.7934445 (479.23 it/sec) -training >> step=7248500, episode=1209 reward=0.7850576 (508.63 it/sec) -training >> step=7248600, episode=1209 reward=0.7915531 (513.82 it/sec) -training >> step=7248700, episode=1209 reward=0.8036764 (479.50 it/sec) -training >> step=7248800, episode=1209 reward=0.7763695 (480.12 it/sec) -training >> step=7248900, episode=1209 reward=0.8006509 (515.06 it/sec) -training >> step=7249000, episode=1209 reward=0.7868429 (494.16 it/sec) -training >> step=7249100, episode=1209 reward=0.802293 (534.89 it/sec) -training >> step=7249200, episode=1209 reward=0.7957333 (475.86 it/sec) -training >> step=7249300, episode=1209 reward=0.7843388 (499.25 it/sec) -training >> step=7249400, episode=1209 reward=0.8041136 (493.99 it/sec) -training >> step=7249500, episode=1209 reward=0.7894738 (508.83 it/sec) -training >> step=7249600, episode=1209 reward=0.8058798 (475.91 it/sec) -training >> step=7249700, episode=1209 reward=0.788611 (479.89 it/sec) -training >> step=7249800, episode=1209 reward=0.8012022 (522.41 it/sec) -training >> step=7249900, episode=1209 reward=0.7807084 (510.19 it/sec) -training >> step=7250000, episode=1209 reward=0.8015316 (512.17 it/sec) -training >> step=7250100, episode=1209 reward=0.793418 (502.82 it/sec) -training >> step=7250200, episode=1209 reward=0.7920014 (361.27 it/sec) -training >> step=7250300, episode=1209 reward=0.795329 (483.06 it/sec) -training >> step=7250400, episode=1209 reward=0.800739 (482.43 it/sec) -training >> step=7250500, episode=1209 reward=0.7774509 (485.79 it/sec) -training >> step=7250600, episode=1209 reward=0.7791021 (488.86 it/sec) -training >> step=7250700, episode=1209 reward=0.7824455 (482.84 it/sec) -training >> step=7250800, episode=1209 reward=0.7826295 (494.29 it/sec) -training >> step=7250900, episode=1209 reward=0.7862047 (524.36 it/sec) -training >> step=7251000, episode=1209 reward=0.7778407 (518.94 it/sec) -training >> step=7251100, episode=1209 reward=0.8024014 (494.74 it/sec) -training >> step=7251200, episode=1209 reward=0.7919449 (509.74 it/sec) -training >> step=7251300, episode=1209 reward=0.7985461 (517.20 it/sec) -training >> step=7251400, episode=1209 reward=0.796199 (495.65 it/sec) -training >> step=7251500, episode=1209 reward=0.7995276 (529.18 it/sec) -training >> step=7251600, episode=1209 reward=0.7859764 (490.29 it/sec) -training >> step=7251700, episode=1209 reward=0.7853575 (485.61 it/sec) -training >> step=7251800, episode=1209 reward=0.7937534 (519.68 it/sec) -training >> step=7251900, episode=1209 reward=0.773932 (465.52 it/sec) -training >> step=7252000, episode=1209 reward=0.7822331 (485.21 it/sec) -training >> step=7252100, episode=1209 reward=0.7538306 (493.04 it/sec) -training >> step=7252200, episode=1209 reward=0.7898152 (505.99 it/sec) -training >> step=7252300, episode=1209 reward=0.77953 (481.97 it/sec) -training >> step=7252400, episode=1209 reward=0.7624589 (533.33 it/sec) -training >> step=7252500, episode=1209 reward=0.7822839 (477.96 it/sec) -training >> step=7252600, episode=1209 reward=0.8066759 (488.20 it/sec) -training >> step=7252700, episode=1209 reward=0.7939093 (527.68 it/sec) -training >> step=7252800, episode=1209 reward=0.7455447 (505.02 it/sec) -training >> step=7252900, episode=1209 reward=0.7709164 (501.21 it/sec) -training >> step=7253000, episode=1209 reward=0.7830585 (479.24 it/sec) -training >> step=7253100, episode=1209 reward=0.7773344 (484.16 it/sec) -training >> step=7253200, episode=1209 reward=0.765522 (461.11 it/sec) -training >> step=7253300, episode=1210 reward=0.7893569 (52.95 it/sec) -training >> step=7253400, episode=1210 reward=0.7834323 (465.47 it/sec) -training >> step=7253500, episode=1210 reward=0.7942204 (468.41 it/sec) -training >> step=7253600, episode=1210 reward=0.7904566 (489.25 it/sec) -training >> step=7253700, episode=1210 reward=0.7847024 (507.67 it/sec) -training >> step=7253800, episode=1210 reward=0.7937403 (497.12 it/sec) -training >> step=7253900, episode=1210 reward=0.7725453 (491.58 it/sec) -training >> step=7254000, episode=1210 reward=0.7735465 (489.85 it/sec) -training >> step=7254100, episode=1210 reward=0.7870604 (512.92 it/sec) -training >> step=7254200, episode=1210 reward=0.7787777 (451.99 it/sec) -training >> step=7254300, episode=1210 reward=0.7770258 (375.36 it/sec) -training >> step=7254400, episode=1210 reward=0.7917375 (422.98 it/sec) -training >> step=7254500, episode=1210 reward=0.7869245 (450.62 it/sec) -training >> step=7254600, episode=1210 reward=0.7790351 (468.06 it/sec) -training >> step=7254700, episode=1210 reward=0.7889446 (473.25 it/sec) -training >> step=7254800, episode=1210 reward=0.7919705 (495.26 it/sec) -training >> step=7254900, episode=1210 reward=0.7973372 (515.99 it/sec) -training >> step=7255000, episode=1210 reward=0.7760185 (519.05 it/sec) -training >> step=7255100, episode=1210 reward=0.7885933 (476.07 it/sec) -training >> step=7255200, episode=1210 reward=0.7816557 (519.06 it/sec) -training >> step=7255300, episode=1210 reward=0.7882953 (519.86 it/sec) -training >> step=7255400, episode=1210 reward=0.7905877 (526.45 it/sec) -training >> step=7255500, episode=1210 reward=0.8072476 (471.23 it/sec) -training >> step=7255600, episode=1210 reward=0.8004205 (504.45 it/sec) -training >> step=7255700, episode=1210 reward=0.7987899 (499.41 it/sec) -training >> step=7255800, episode=1210 reward=0.7989683 (508.40 it/sec) -training >> step=7255900, episode=1210 reward=0.7849537 (510.68 it/sec) -training >> step=7256000, episode=1210 reward=0.7818437 (471.47 it/sec) -training >> step=7256100, episode=1210 reward=0.7827951 (506.35 it/sec) -training >> step=7256200, episode=1210 reward=0.7873935 (477.16 it/sec) -training >> step=7256300, episode=1210 reward=0.7979758 (339.61 it/sec) -training >> step=7256400, episode=1210 reward=0.7962912 (546.97 it/sec) -training >> step=7256500, episode=1210 reward=0.7732646 (468.80 it/sec) -training >> step=7256600, episode=1210 reward=0.7961987 (482.84 it/sec) -training >> step=7256700, episode=1210 reward=0.7869154 (465.37 it/sec) -training >> step=7256800, episode=1210 reward=0.7914445 (433.81 it/sec) -training >> step=7256900, episode=1210 reward=0.7857371 (458.30 it/sec) -training >> step=7257000, episode=1210 reward=0.781177 (471.96 it/sec) -training >> step=7257100, episode=1210 reward=0.8166028 (522.60 it/sec) -training >> step=7257200, episode=1210 reward=0.7716017 (507.85 it/sec) -training >> step=7257300, episode=1210 reward=0.7706673 (453.28 it/sec) -training >> step=7257400, episode=1210 reward=0.7880221 (490.72 it/sec) -training >> step=7257500, episode=1210 reward=0.7967636 (473.88 it/sec) -training >> step=7257600, episode=1210 reward=0.7740427 (442.12 it/sec) -training >> step=7257700, episode=1210 reward=0.799807 (452.21 it/sec) -training >> step=7257800, episode=1210 reward=0.7729014 (489.92 it/sec) -training >> step=7257900, episode=1210 reward=0.7842809 (519.49 it/sec) -training >> step=7258000, episode=1210 reward=0.7911756 (472.20 it/sec) -training >> step=7258100, episode=1210 reward=0.7856044 (478.56 it/sec) -training >> step=7258200, episode=1210 reward=0.7973774 (492.60 it/sec) -training >> step=7258300, episode=1210 reward=0.7891732 (404.78 it/sec) -training >> step=7258400, episode=1210 reward=0.7754756 (446.67 it/sec) -training >> step=7258500, episode=1210 reward=0.7848362 (433.20 it/sec) -training >> step=7258600, episode=1210 reward=0.7766152 (473.83 it/sec) -training >> step=7258700, episode=1210 reward=0.771388 (476.57 it/sec) -training >> step=7258800, episode=1210 reward=0.778376 (509.20 it/sec) -training >> step=7258900, episode=1210 reward=0.7692097 (524.79 it/sec) -training >> step=7259000, episode=1210 reward=0.7742094 (508.63 it/sec) -training >> step=7259100, episode=1210 reward=0.7879959 (449.97 it/sec) -training >> step=7259200, episode=1210 reward=0.8010272 (460.43 it/sec) -training >> step=7259300, episode=1211 reward=0.7603293 (59.27 it/sec) -training >> step=7259400, episode=1211 reward=0.7966588 (476.54 it/sec) -training >> step=7259500, episode=1211 reward=0.7891605 (509.17 it/sec) -training >> step=7259600, episode=1211 reward=0.7510524 (530.14 it/sec) -training >> step=7259700, episode=1211 reward=0.784991 (400.72 it/sec) -training >> step=7259800, episode=1211 reward=0.7845221 (387.10 it/sec) -training >> step=7259900, episode=1211 reward=0.7987298 (436.48 it/sec) -training >> step=7260000, episode=1211 reward=0.7985773 (503.12 it/sec) -training >> step=7260100, episode=1211 reward=0.7776057 (501.30 it/sec) -training >> step=7260200, episode=1211 reward=0.8008206 (528.86 it/sec) -training >> step=7260300, episode=1211 reward=0.791017 (492.86 it/sec) -training >> step=7260400, episode=1211 reward=0.781413 (477.86 it/sec) -training >> step=7260500, episode=1211 reward=0.8016256 (485.50 it/sec) -training >> step=7260600, episode=1211 reward=0.7761672 (461.15 it/sec) -training >> step=7260700, episode=1211 reward=0.7857872 (442.67 it/sec) -training >> step=7260800, episode=1211 reward=0.7977626 (523.65 it/sec) -training >> step=7260900, episode=1211 reward=0.7966803 (493.96 it/sec) -training >> step=7261000, episode=1211 reward=0.8149968 (445.95 it/sec) -training >> step=7261100, episode=1211 reward=0.7818114 (468.46 it/sec) -training >> step=7261200, episode=1211 reward=0.794982 (469.51 it/sec) -training >> step=7261300, episode=1211 reward=0.7817686 (493.96 it/sec) -training >> step=7261400, episode=1211 reward=0.7749027 (500.41 it/sec) -training >> step=7261500, episode=1211 reward=0.7885199 (529.95 it/sec) -training >> step=7261600, episode=1211 reward=0.7949821 (474.55 it/sec) -training >> step=7261700, episode=1211 reward=0.778357 (470.35 it/sec) -training >> step=7261800, episode=1211 reward=0.7882152 (487.45 it/sec) -training >> step=7261900, episode=1211 reward=0.7664581 (519.45 it/sec) -training >> step=7262000, episode=1211 reward=0.7884585 (523.07 it/sec) -training >> step=7262100, episode=1211 reward=0.7682664 (492.81 it/sec) -training >> step=7262200, episode=1211 reward=0.7966077 (559.33 it/sec) -training >> step=7262300, episode=1211 reward=0.8157036 (523.34 it/sec) -training >> step=7262400, episode=1211 reward=0.7951698 (482.39 it/sec) -training >> step=7262500, episode=1211 reward=0.7707755 (357.38 it/sec) -training >> step=7262600, episode=1211 reward=0.7836134 (492.17 it/sec) -training >> step=7262700, episode=1211 reward=0.7753435 (475.71 it/sec) -training >> step=7262800, episode=1211 reward=0.783377 (459.31 it/sec) -training >> step=7262900, episode=1211 reward=0.7852087 (489.24 it/sec) -training >> step=7263000, episode=1211 reward=0.7919726 (535.74 it/sec) -training >> step=7263100, episode=1211 reward=0.785696 (511.38 it/sec) -training >> step=7263200, episode=1211 reward=0.7811311 (524.06 it/sec) -training >> step=7263300, episode=1211 reward=0.7839978 (542.12 it/sec) -training >> step=7263400, episode=1211 reward=0.7765824 (502.65 it/sec) -training >> step=7263500, episode=1211 reward=0.7736536 (504.13 it/sec) -training >> step=7263600, episode=1211 reward=0.793128 (444.84 it/sec) -training >> step=7263700, episode=1211 reward=0.8001824 (415.14 it/sec) -training >> step=7263800, episode=1211 reward=0.7540458 (467.24 it/sec) -training >> step=7263900, episode=1211 reward=0.783408 (446.01 it/sec) -training >> step=7264000, episode=1211 reward=0.7836361 (442.01 it/sec) -training >> step=7264100, episode=1211 reward=0.7790965 (450.75 it/sec) -training >> step=7264200, episode=1211 reward=0.7636142 (446.55 it/sec) -training >> step=7264300, episode=1211 reward=0.7755068 (484.54 it/sec) -training >> step=7264400, episode=1211 reward=0.7582294 (479.39 it/sec) -training >> step=7264500, episode=1211 reward=0.7779884 (423.91 it/sec) -training >> step=7264600, episode=1211 reward=0.7823589 (396.85 it/sec) -training >> step=7264700, episode=1211 reward=0.7782307 (506.18 it/sec) -training >> step=7264800, episode=1211 reward=0.7885026 (498.73 it/sec) -training >> step=7264900, episode=1211 reward=0.7983954 (454.39 it/sec) -training >> step=7265000, episode=1211 reward=0.7818413 (505.18 it/sec) -training >> step=7265100, episode=1211 reward=0.7873386 (507.10 it/sec) -training >> step=7265200, episode=1211 reward=0.7781129 (421.01 it/sec) -training >> step=7265300, episode=1212 reward=0.7898472 (72.51 it/sec) -training >> step=7265400, episode=1212 reward=0.7734993 (492.67 it/sec) -training >> step=7265500, episode=1212 reward=0.8002288 (502.07 it/sec) -training >> step=7265600, episode=1212 reward=0.7701121 (522.65 it/sec) -training >> step=7265700, episode=1212 reward=0.7952091 (456.60 it/sec) -training >> step=7265800, episode=1212 reward=0.7815831 (502.39 it/sec) -training >> step=7265900, episode=1212 reward=0.7963022 (425.08 it/sec) -training >> step=7266000, episode=1212 reward=0.7786728 (472.21 it/sec) -training >> step=7266100, episode=1212 reward=0.7858748 (495.25 it/sec) -training >> step=7266200, episode=1212 reward=0.7790956 (444.37 it/sec) -training >> step=7266300, episode=1212 reward=0.7915323 (478.39 it/sec) -training >> step=7266400, episode=1212 reward=0.7913767 (503.81 it/sec) -training >> step=7266500, episode=1212 reward=0.7870418 (502.10 it/sec) -training >> step=7266600, episode=1212 reward=0.7895011 (504.05 it/sec) -training >> step=7266700, episode=1212 reward=0.7864999 (527.36 it/sec) -training >> step=7266800, episode=1212 reward=0.785508 (540.42 it/sec) -training >> step=7266900, episode=1212 reward=0.7787573 (487.24 it/sec) -training >> step=7267000, episode=1212 reward=0.7990937 (524.48 it/sec) -training >> step=7267100, episode=1212 reward=0.7819344 (490.73 it/sec) -training >> step=7267200, episode=1212 reward=0.7833709 (489.32 it/sec) -training >> step=7267300, episode=1212 reward=0.7820212 (516.96 it/sec) -training >> step=7267400, episode=1212 reward=0.7727693 (489.89 it/sec) -training >> step=7267500, episode=1212 reward=0.7959553 (481.82 it/sec) -training >> step=7267600, episode=1212 reward=0.7984149 (458.39 it/sec) -training >> step=7267700, episode=1212 reward=0.7819373 (548.02 it/sec) -training >> step=7267800, episode=1212 reward=0.7830011 (519.33 it/sec) -training >> step=7267900, episode=1212 reward=0.7818373 (480.40 it/sec) -training >> step=7268000, episode=1212 reward=0.7790798 (494.78 it/sec) -training >> step=7268100, episode=1212 reward=0.7758408 (505.73 it/sec) -training >> step=7268200, episode=1212 reward=0.7735413 (498.05 it/sec) -training >> step=7268300, episode=1212 reward=0.7926766 (474.85 it/sec) -training >> step=7268400, episode=1212 reward=0.7903362 (415.31 it/sec) -training >> step=7268500, episode=1212 reward=0.779433 (513.43 it/sec) -training >> step=7268600, episode=1212 reward=0.8101838 (424.50 it/sec) -training >> step=7268700, episode=1212 reward=0.7867857 (502.90 it/sec) -training >> step=7268800, episode=1212 reward=0.7870521 (374.14 it/sec) -training >> step=7268900, episode=1212 reward=0.7876331 (414.90 it/sec) -training >> step=7269000, episode=1212 reward=0.7967871 (480.02 it/sec) -training >> step=7269100, episode=1212 reward=0.7800866 (478.38 it/sec) -training >> step=7269200, episode=1212 reward=0.7771427 (496.69 it/sec) -training >> step=7269300, episode=1212 reward=0.7834153 (504.71 it/sec) -training >> step=7269400, episode=1212 reward=0.7740217 (504.92 it/sec) -training >> step=7269500, episode=1212 reward=0.7763472 (460.34 it/sec) -training >> step=7269600, episode=1212 reward=0.7792206 (520.31 it/sec) -training >> step=7269700, episode=1212 reward=0.8028829 (496.01 it/sec) -training >> step=7269800, episode=1212 reward=0.7786143 (512.02 it/sec) -training >> step=7269900, episode=1212 reward=0.7782379 (487.44 it/sec) -training >> step=7270000, episode=1212 reward=0.7852158 (471.22 it/sec) -training >> step=7270100, episode=1212 reward=0.7714753 (494.80 it/sec) -training >> step=7270200, episode=1212 reward=0.7818549 (480.71 it/sec) -training >> step=7270300, episode=1212 reward=0.7861133 (499.66 it/sec) -training >> step=7270400, episode=1212 reward=0.7827032 (525.87 it/sec) -training >> step=7270500, episode=1212 reward=0.7927431 (540.40 it/sec) -training >> step=7270600, episode=1212 reward=0.7992566 (529.53 it/sec) -training >> step=7270700, episode=1212 reward=0.8049819 (477.88 it/sec) -training >> step=7270800, episode=1212 reward=0.7773638 (497.81 it/sec) -training >> step=7270900, episode=1212 reward=0.7692425 (448.66 it/sec) -training >> step=7271000, episode=1212 reward=0.771318 (481.30 it/sec) -training >> step=7271100, episode=1212 reward=0.7699822 (513.20 it/sec) -training >> step=7271200, episode=1212 reward=0.7864164 (545.03 it/sec) -training >> step=7271300, episode=1213 reward=0.8000386 (126.70 it/sec) -training >> step=7271400, episode=1213 reward=0.7659507 (462.51 it/sec) -training >> step=7271500, episode=1213 reward=0.7920803 (486.52 it/sec) -training >> step=7271600, episode=1213 reward=0.7714744 (471.72 it/sec) -training >> step=7271700, episode=1213 reward=0.7956476 (463.06 it/sec) -training >> step=7271800, episode=1213 reward=0.7957597 (440.55 it/sec) -training >> step=7271900, episode=1213 reward=0.792156 (544.62 it/sec) -training >> step=7272000, episode=1213 reward=0.7707794 (480.77 it/sec) -training >> step=7272100, episode=1213 reward=0.7838849 (472.28 it/sec) -training >> step=7272200, episode=1213 reward=0.79218 (486.70 it/sec) -training >> step=7272300, episode=1213 reward=0.8070122 (477.22 it/sec) -training >> step=7272400, episode=1213 reward=0.788876 (404.20 it/sec) -training >> step=7272500, episode=1213 reward=0.7936251 (505.04 it/sec) -training >> step=7272600, episode=1213 reward=0.7913923 (521.88 it/sec) -training >> step=7272700, episode=1213 reward=0.7935012 (414.89 it/sec) -training >> step=7272800, episode=1213 reward=0.7820765 (405.88 it/sec) -training >> step=7272900, episode=1213 reward=0.7790052 (426.35 it/sec) -training >> step=7273000, episode=1213 reward=0.7861277 (488.39 it/sec) -training >> step=7273100, episode=1213 reward=0.7952073 (425.11 it/sec) -training >> step=7273200, episode=1213 reward=0.7910056 (438.21 it/sec) -training >> step=7273300, episode=1213 reward=0.7952498 (513.42 it/sec) -training >> step=7273400, episode=1213 reward=0.7660024 (458.92 it/sec) -training >> step=7273500, episode=1213 reward=0.7936547 (456.78 it/sec) -training >> step=7273600, episode=1213 reward=0.805526 (448.84 it/sec) -training >> step=7273700, episode=1213 reward=0.7738286 (475.85 it/sec) -training >> step=7273800, episode=1213 reward=0.7560391 (442.48 it/sec) -training >> step=7273900, episode=1213 reward=0.7916752 (502.77 it/sec) -training >> step=7274000, episode=1213 reward=0.7869982 (443.28 it/sec) -training >> step=7274100, episode=1213 reward=0.7928747 (505.13 it/sec) -training >> step=7274200, episode=1213 reward=0.7607503 (468.37 it/sec) -training >> step=7274300, episode=1213 reward=0.8241754 (452.00 it/sec) -training >> step=7274400, episode=1213 reward=0.7984264 (455.15 it/sec) -training >> step=7274500, episode=1213 reward=0.803299 (371.50 it/sec) -training >> step=7274600, episode=1213 reward=0.785088 (469.89 it/sec) -training >> step=7274700, episode=1213 reward=0.7728813 (494.69 it/sec) -training >> step=7274800, episode=1213 reward=0.7935385 (476.13 it/sec) -training >> step=7274900, episode=1213 reward=0.7883726 (488.70 it/sec) -training >> step=7275000, episode=1213 reward=0.803338 (446.91 it/sec) -training >> step=7275100, episode=1213 reward=0.7604578 (506.54 it/sec) -training >> step=7275200, episode=1213 reward=0.7658149 (475.30 it/sec) -training >> step=7275300, episode=1213 reward=0.8060659 (422.61 it/sec) -training >> step=7275400, episode=1213 reward=0.7899231 (462.86 it/sec) -training >> step=7275500, episode=1213 reward=0.7797388 (443.98 it/sec) -training >> step=7275600, episode=1213 reward=0.784239 (461.67 it/sec) -training >> step=7275700, episode=1213 reward=0.7692735 (490.29 it/sec) -training >> step=7275800, episode=1213 reward=0.7835279 (528.30 it/sec) -training >> step=7275900, episode=1213 reward=0.7962289 (435.26 it/sec) -training >> step=7276000, episode=1213 reward=0.784389 (463.96 it/sec) -training >> step=7276100, episode=1213 reward=0.7768664 (481.56 it/sec) -training >> step=7276200, episode=1213 reward=0.7699457 (477.63 it/sec) -training >> step=7276300, episode=1213 reward=0.7748353 (418.68 it/sec) -training >> step=7276400, episode=1213 reward=0.7720729 (473.83 it/sec) -training >> step=7276500, episode=1213 reward=0.7896211 (453.20 it/sec) -training >> step=7276600, episode=1213 reward=0.7577201 (487.21 it/sec) -training >> step=7276700, episode=1213 reward=0.7721905 (381.36 it/sec) -training >> step=7276800, episode=1213 reward=0.7771718 (407.70 it/sec) -training >> step=7276900, episode=1213 reward=0.785646 (437.91 it/sec) -training >> step=7277000, episode=1213 reward=0.7909436 (438.87 it/sec) -training >> step=7277100, episode=1213 reward=0.7797759 (448.85 it/sec) -training >> step=7277200, episode=1213 reward=0.7755186 (486.19 it/sec) -training >> step=7277300, episode=1214 reward=0.7880103 (107.90 it/sec) -training >> step=7277400, episode=1214 reward=0.7816672 (340.66 it/sec) -training >> step=7277500, episode=1214 reward=0.7854931 (455.46 it/sec) -training >> step=7277600, episode=1214 reward=0.8036466 (417.98 it/sec) -training >> step=7277700, episode=1214 reward=0.7883599 (479.81 it/sec) -training >> step=7277800, episode=1214 reward=0.7913872 (425.43 it/sec) -training >> step=7277900, episode=1214 reward=0.8077657 (431.04 it/sec) -training >> step=7278000, episode=1214 reward=0.7835861 (472.94 it/sec) -training >> step=7278100, episode=1214 reward=0.7827287 (432.02 it/sec) -training >> step=7278200, episode=1214 reward=0.783515 (471.54 it/sec) -training >> step=7278300, episode=1214 reward=0.7950773 (495.57 it/sec) -training >> step=7278400, episode=1214 reward=0.8027769 (489.26 it/sec) -training >> step=7278500, episode=1214 reward=0.7900359 (473.66 it/sec) -training >> step=7278600, episode=1214 reward=0.7712226 (468.71 it/sec) -training >> step=7278700, episode=1214 reward=0.787332 (446.98 it/sec) -training >> step=7278800, episode=1214 reward=0.7778251 (494.85 it/sec) -training >> step=7278900, episode=1214 reward=0.7834119 (485.15 it/sec) -training >> step=7279000, episode=1214 reward=0.7902188 (514.59 it/sec) -training >> step=7279100, episode=1214 reward=0.812922 (470.03 it/sec) -training >> step=7279200, episode=1214 reward=0.7821323 (453.83 it/sec) -training >> step=7279300, episode=1214 reward=0.8039524 (495.98 it/sec) -training >> step=7279400, episode=1214 reward=0.7771875 (492.53 it/sec) -training >> step=7279500, episode=1214 reward=0.7782369 (469.76 it/sec) -training >> step=7279600, episode=1214 reward=0.7975366 (450.33 it/sec) -training >> step=7279700, episode=1214 reward=0.8034585 (502.37 it/sec) -training >> step=7279800, episode=1214 reward=0.7857 (435.62 it/sec) -training >> step=7279900, episode=1214 reward=0.7895944 (461.93 it/sec) -training >> step=7280000, episode=1214 reward=0.804248 (463.57 it/sec) -training >> step=7280100, episode=1214 reward=0.7947231 (499.21 it/sec) -training >> step=7280200, episode=1214 reward=0.7879812 (477.55 it/sec) -training >> step=7280300, episode=1214 reward=0.7803259 (455.77 it/sec) -training >> step=7280400, episode=1214 reward=0.7847078 (481.13 it/sec) -training >> step=7280500, episode=1214 reward=0.7954721 (351.62 it/sec) -training >> step=7280600, episode=1214 reward=0.8045337 (496.98 it/sec) -training >> step=7280700, episode=1214 reward=0.7849333 (494.25 it/sec) -training >> step=7280800, episode=1214 reward=0.7869653 (482.33 it/sec) -training >> step=7280900, episode=1214 reward=0.7795041 (439.96 it/sec) -training >> step=7281000, episode=1214 reward=0.7829008 (440.65 it/sec) -training >> step=7281100, episode=1214 reward=0.7932982 (501.29 it/sec) -training >> step=7281200, episode=1214 reward=0.783472 (515.74 it/sec) -training >> step=7281300, episode=1214 reward=0.7859728 (483.78 it/sec) -training >> step=7281400, episode=1214 reward=0.7977835 (480.96 it/sec) -training >> step=7281500, episode=1214 reward=0.7970533 (474.64 it/sec) -training >> step=7281600, episode=1214 reward=0.7746342 (469.01 it/sec) -training >> step=7281700, episode=1214 reward=0.7780204 (467.87 it/sec) -training >> step=7281800, episode=1214 reward=0.783457 (429.65 it/sec) -training >> step=7281900, episode=1214 reward=0.7838035 (482.10 it/sec) -training >> step=7282000, episode=1214 reward=0.791601 (473.61 it/sec) -training >> step=7282100, episode=1214 reward=0.7795824 (490.75 it/sec) -training >> step=7282200, episode=1214 reward=0.7810659 (495.73 it/sec) -training >> step=7282300, episode=1214 reward=0.7793278 (469.58 it/sec) -training >> step=7282400, episode=1214 reward=0.7883593 (423.86 it/sec) -training >> step=7282500, episode=1214 reward=0.7727634 (450.73 it/sec) -training >> step=7282600, episode=1214 reward=0.7801679 (459.23 it/sec) -training >> step=7282700, episode=1214 reward=0.7808426 (431.07 it/sec) -training >> step=7282800, episode=1214 reward=0.7664623 (373.26 it/sec) -training >> step=7282900, episode=1214 reward=0.7968919 (470.95 it/sec) -training >> step=7283000, episode=1214 reward=0.7778041 (473.43 it/sec) -training >> step=7283100, episode=1214 reward=0.7808121 (390.85 it/sec) -training >> step=7283200, episode=1214 reward=0.7927285 (440.95 it/sec) -training >> step=7283300, episode=1215 reward=0.8044393 (120.31 it/sec) -training >> step=7283400, episode=1215 reward=0.7869578 (450.51 it/sec) -training >> step=7283500, episode=1215 reward=0.7976729 (513.27 it/sec) -training >> step=7283600, episode=1215 reward=0.7809454 (483.94 it/sec) -training >> step=7283700, episode=1215 reward=0.7828148 (466.08 it/sec) -training >> step=7283800, episode=1215 reward=0.7877138 (418.43 it/sec) -training >> step=7283900, episode=1215 reward=0.7935024 (464.48 it/sec) -training >> step=7284000, episode=1215 reward=0.7888871 (541.95 it/sec) -training >> step=7284100, episode=1215 reward=0.794457 (525.22 it/sec) -training >> step=7284200, episode=1215 reward=0.7852162 (522.20 it/sec) -training >> step=7284300, episode=1215 reward=0.7729654 (467.45 it/sec) -training >> step=7284400, episode=1215 reward=0.7959558 (542.17 it/sec) -training >> step=7284500, episode=1215 reward=0.8002694 (481.84 it/sec) -training >> step=7284600, episode=1215 reward=0.8021802 (464.60 it/sec) -training >> step=7284700, episode=1215 reward=0.8081419 (417.00 it/sec) -training >> step=7284800, episode=1215 reward=0.7913647 (422.54 it/sec) -training >> step=7284900, episode=1215 reward=0.7916888 (420.50 it/sec) -training >> step=7285000, episode=1215 reward=0.7899063 (414.58 it/sec) -training >> step=7285100, episode=1215 reward=0.7815588 (461.24 it/sec) -training >> step=7285200, episode=1215 reward=0.7716363 (441.55 it/sec) -training >> step=7285300, episode=1215 reward=0.7853774 (424.34 it/sec) -training >> step=7285400, episode=1215 reward=0.7935215 (511.15 it/sec) -training >> step=7285500, episode=1215 reward=0.7807772 (463.86 it/sec) -training >> step=7285600, episode=1215 reward=0.8010964 (481.15 it/sec) -training >> step=7285700, episode=1215 reward=0.7838699 (488.14 it/sec) -training >> step=7285800, episode=1215 reward=0.7750498 (499.21 it/sec) -training >> step=7285900, episode=1215 reward=0.791887 (501.94 it/sec) -training >> step=7286000, episode=1215 reward=0.7839925 (501.70 it/sec) -training >> step=7286100, episode=1215 reward=0.7754592 (510.07 it/sec) -training >> step=7286200, episode=1215 reward=0.7870516 (431.83 it/sec) -training >> step=7286300, episode=1215 reward=0.7797658 (427.65 it/sec) -training >> step=7286400, episode=1215 reward=0.8007675 (440.12 it/sec) -training >> step=7286500, episode=1215 reward=0.8038265 (489.38 it/sec) -training >> step=7286600, episode=1215 reward=0.7808962 (367.26 it/sec) -training >> step=7286700, episode=1215 reward=0.7696787 (491.93 it/sec) -training >> step=7286800, episode=1215 reward=0.7780737 (535.16 it/sec) -training >> step=7286900, episode=1215 reward=0.7903705 (546.17 it/sec) -training >> step=7287000, episode=1215 reward=0.783708 (484.22 it/sec) -training >> step=7287100, episode=1215 reward=0.7879767 (449.59 it/sec) -training >> step=7287200, episode=1215 reward=0.7814731 (499.07 it/sec) -training >> step=7287300, episode=1215 reward=0.772185 (504.88 it/sec) -training >> step=7287400, episode=1215 reward=0.7851184 (511.62 it/sec) -training >> step=7287500, episode=1215 reward=0.789416 (462.90 it/sec) -training >> step=7287600, episode=1215 reward=0.8169252 (493.96 it/sec) -training >> step=7287700, episode=1215 reward=0.777939 (456.24 it/sec) -training >> step=7287800, episode=1215 reward=0.7963148 (491.90 it/sec) -training >> step=7287900, episode=1215 reward=0.7675038 (477.25 it/sec) -training >> step=7288000, episode=1215 reward=0.752629 (481.21 it/sec) -training >> step=7288100, episode=1215 reward=0.7756731 (526.28 it/sec) -training >> step=7288200, episode=1215 reward=0.7737206 (461.60 it/sec) -training >> step=7288300, episode=1215 reward=0.7747064 (499.98 it/sec) -training >> step=7288400, episode=1215 reward=0.775454 (435.00 it/sec) -training >> step=7288500, episode=1215 reward=0.7811439 (475.90 it/sec) -training >> step=7288600, episode=1215 reward=0.773252 (384.09 it/sec) -training >> step=7288700, episode=1215 reward=0.8041686 (443.78 it/sec) -training >> step=7288800, episode=1215 reward=0.7904964 (452.92 it/sec) -training >> step=7288900, episode=1215 reward=0.7923018 (433.42 it/sec) -training >> step=7289000, episode=1215 reward=0.7625612 (408.76 it/sec) -training >> step=7289100, episode=1215 reward=0.7783204 (383.28 it/sec) -training >> step=7289200, episode=1215 reward=0.7845542 (407.75 it/sec) -training >> step=7289300, episode=1216 reward=0.7972327 (93.07 it/sec) -training >> step=7289400, episode=1216 reward=0.7727898 (515.60 it/sec) -training >> step=7289500, episode=1216 reward=0.7840816 (445.90 it/sec) -training >> step=7289600, episode=1216 reward=0.789315 (454.18 it/sec) -training >> step=7289700, episode=1216 reward=0.7902787 (496.86 it/sec) -training >> step=7289800, episode=1216 reward=0.7736053 (443.55 it/sec) -training >> step=7289900, episode=1216 reward=0.7996178 (505.16 it/sec) -training >> step=7290000, episode=1216 reward=0.7770091 (482.18 it/sec) -training >> step=7290100, episode=1216 reward=0.8059211 (525.97 it/sec) -training >> step=7290200, episode=1216 reward=0.7921495 (499.93 it/sec) -training >> step=7290300, episode=1216 reward=0.8018661 (517.75 it/sec) -training >> step=7290400, episode=1216 reward=0.7813244 (518.53 it/sec) -training >> step=7290500, episode=1216 reward=0.7857398 (543.51 it/sec) -training >> step=7290600, episode=1216 reward=0.7676443 (454.84 it/sec) -training >> step=7290700, episode=1216 reward=0.7951086 (505.57 it/sec) -training >> step=7290800, episode=1216 reward=0.7848451 (487.55 it/sec) -training >> step=7290900, episode=1216 reward=0.7841944 (521.47 it/sec) -training >> step=7291000, episode=1216 reward=0.8116347 (486.48 it/sec) -training >> step=7291100, episode=1216 reward=0.7828141 (491.41 it/sec) -training >> step=7291200, episode=1216 reward=0.7904655 (491.55 it/sec) -training >> step=7291300, episode=1216 reward=0.7930591 (436.86 it/sec) -training >> step=7291400, episode=1216 reward=0.7850313 (454.19 it/sec) -training >> step=7291500, episode=1216 reward=0.7798546 (482.54 it/sec) -training >> step=7291600, episode=1216 reward=0.7993615 (484.84 it/sec) -training >> step=7291700, episode=1216 reward=0.7887216 (501.20 it/sec) -training >> step=7291800, episode=1216 reward=0.7940361 (448.72 it/sec) -training >> step=7291900, episode=1216 reward=0.8016717 (481.78 it/sec) -training >> step=7292000, episode=1216 reward=0.7759254 (395.85 it/sec) -training >> step=7292100, episode=1216 reward=0.7872773 (484.34 it/sec) -training >> step=7292200, episode=1216 reward=0.771639 (434.07 it/sec) -training >> step=7292300, episode=1216 reward=0.8049689 (489.58 it/sec) -training >> step=7292400, episode=1216 reward=0.7892219 (480.76 it/sec) -training >> step=7292500, episode=1216 reward=0.7843221 (516.07 it/sec) -training >> step=7292600, episode=1216 reward=0.8078791 (366.74 it/sec) -training >> step=7292700, episode=1216 reward=0.7923384 (507.07 it/sec) -training >> step=7292800, episode=1216 reward=0.7823472 (490.07 it/sec) -training >> step=7292900, episode=1216 reward=0.784713 (463.22 it/sec) -training >> step=7293000, episode=1216 reward=0.7806595 (511.62 it/sec) -training >> step=7293100, episode=1216 reward=0.7844307 (480.93 it/sec) -training >> step=7293200, episode=1216 reward=0.8141319 (488.77 it/sec) -training >> step=7293300, episode=1216 reward=0.8012906 (486.13 it/sec) -training >> step=7293400, episode=1216 reward=0.7785326 (513.51 it/sec) -training >> step=7293500, episode=1216 reward=0.7705383 (498.71 it/sec) -training >> step=7293600, episode=1216 reward=0.789466 (468.32 it/sec) -training >> step=7293700, episode=1216 reward=0.7792209 (494.54 it/sec) -training >> step=7293800, episode=1216 reward=0.7762805 (511.37 it/sec) -training >> step=7293900, episode=1216 reward=0.782658 (528.69 it/sec) -training >> step=7294000, episode=1216 reward=0.7707062 (472.97 it/sec) -training >> step=7294100, episode=1216 reward=0.7769753 (486.12 it/sec) -training >> step=7294200, episode=1216 reward=0.7830867 (470.13 it/sec) -training >> step=7294300, episode=1216 reward=0.7746513 (500.25 it/sec) -training >> step=7294400, episode=1216 reward=0.799969 (537.74 it/sec) -training >> step=7294500, episode=1216 reward=0.7762017 (541.01 it/sec) -training >> step=7294600, episode=1216 reward=0.7651212 (517.76 it/sec) -training >> step=7294700, episode=1216 reward=0.7748169 (536.66 it/sec) -training >> step=7294800, episode=1216 reward=0.7936227 (525.96 it/sec) -training >> step=7294900, episode=1216 reward=0.7754759 (507.05 it/sec) -training >> step=7295000, episode=1216 reward=0.782995 (520.67 it/sec) -training >> step=7295100, episode=1216 reward=0.7777048 (495.02 it/sec) -training >> step=7295200, episode=1216 reward=0.7860773 (499.68 it/sec) -training >> step=7295300, episode=1217 reward=0.799378 (104.45 it/sec) -training >> step=7295400, episode=1217 reward=0.7916955 (494.03 it/sec) -training >> step=7295500, episode=1217 reward=0.7873905 (484.50 it/sec) -training >> step=7295600, episode=1217 reward=0.7650411 (520.51 it/sec) -training >> step=7295700, episode=1217 reward=0.7668132 (523.52 it/sec) -training >> step=7295800, episode=1217 reward=0.7933159 (500.51 it/sec) -training >> step=7295900, episode=1217 reward=0.7578433 (517.23 it/sec) -training >> step=7296000, episode=1217 reward=0.8012238 (425.43 it/sec) -training >> step=7296100, episode=1217 reward=0.7835329 (546.80 it/sec) -training >> step=7296200, episode=1217 reward=0.8015208 (470.51 it/sec) -training >> step=7296300, episode=1217 reward=0.7767375 (506.60 it/sec) -training >> step=7296400, episode=1217 reward=0.8022665 (473.07 it/sec) -training >> step=7296500, episode=1217 reward=0.7952962 (553.98 it/sec) -training >> step=7296600, episode=1217 reward=0.7811521 (494.22 it/sec) -training >> step=7296700, episode=1217 reward=0.7955869 (434.42 it/sec) -training >> step=7296800, episode=1217 reward=0.7980497 (435.75 it/sec) -training >> step=7296900, episode=1217 reward=0.7767513 (451.10 it/sec) -training >> step=7297000, episode=1217 reward=0.7932146 (526.42 it/sec) -training >> step=7297100, episode=1217 reward=0.787237 (511.66 it/sec) -training >> step=7297200, episode=1217 reward=0.7826504 (543.77 it/sec) -training >> step=7297300, episode=1217 reward=0.8102001 (487.60 it/sec) -training >> step=7297400, episode=1217 reward=0.7783138 (526.00 it/sec) -training >> step=7297500, episode=1217 reward=0.7910065 (470.95 it/sec) -training >> step=7297600, episode=1217 reward=0.7881621 (472.69 it/sec) -training >> step=7297700, episode=1217 reward=0.790785 (533.29 it/sec) -training >> step=7297800, episode=1217 reward=0.7879898 (485.16 it/sec) -training >> step=7297900, episode=1217 reward=0.7865117 (547.14 it/sec) -training >> step=7298000, episode=1217 reward=0.8034191 (489.67 it/sec) -training >> step=7298100, episode=1217 reward=0.7961491 (526.60 it/sec) -training >> step=7298200, episode=1217 reward=0.7981032 (512.82 it/sec) -training >> step=7298300, episode=1217 reward=0.7752255 (406.92 it/sec) -training >> step=7298400, episode=1217 reward=0.7964681 (434.67 it/sec) -training >> step=7298500, episode=1217 reward=0.8041199 (407.63 it/sec) -training >> step=7298600, episode=1217 reward=0.7790149 (450.36 it/sec) -training >> step=7298700, episode=1217 reward=0.7591971 (327.44 it/sec) -training >> step=7298800, episode=1217 reward=0.7691935 (425.15 it/sec) -training >> step=7298900, episode=1217 reward=0.783517 (401.04 it/sec) -training >> step=7299000, episode=1217 reward=0.8090953 (491.31 it/sec) -training >> step=7299100, episode=1217 reward=0.808596 (495.34 it/sec) -training >> step=7299200, episode=1217 reward=0.7782879 (488.46 it/sec) -training >> step=7299300, episode=1217 reward=0.7836681 (517.07 it/sec) -training >> step=7299400, episode=1217 reward=0.8093144 (544.23 it/sec) -training >> step=7299500, episode=1217 reward=0.7851427 (539.96 it/sec) -training >> step=7299600, episode=1217 reward=0.7969152 (531.26 it/sec) -training >> step=7299700, episode=1217 reward=0.7906293 (521.80 it/sec) -training >> step=7299800, episode=1217 reward=0.776414 (515.32 it/sec) -training >> step=7299900, episode=1217 reward=0.7871093 (528.75 it/sec) -training >> step=7300000, episode=1217 reward=0.7929672 (493.71 it/sec) -training >> step=7300100, episode=1217 reward=0.8006793 (553.85 it/sec) -training >> step=7300200, episode=1217 reward=0.7546524 (512.91 it/sec) -training >> step=7300300, episode=1217 reward=0.7454597 (504.58 it/sec) -training >> step=7300400, episode=1217 reward=0.7703134 (544.36 it/sec) -training >> step=7300500, episode=1217 reward=0.7880641 (512.08 it/sec) -training >> step=7300600, episode=1217 reward=0.780168 (531.72 it/sec) -training >> step=7300700, episode=1217 reward=0.76538 (511.65 it/sec) -training >> step=7300800, episode=1217 reward=0.7786162 (522.00 it/sec) -training >> step=7300900, episode=1217 reward=0.7849044 (500.23 it/sec) -training >> step=7301000, episode=1217 reward=0.7990003 (482.70 it/sec) -training >> step=7301100, episode=1217 reward=0.7816964 (522.65 it/sec) -training >> step=7301200, episode=1217 reward=0.7972891 (540.39 it/sec) -training >> step=7301300, episode=1218 reward=0.7938185 (127.92 it/sec) -training >> step=7301400, episode=1218 reward=0.7841195 (502.21 it/sec) -training >> step=7301500, episode=1218 reward=0.7577089 (502.27 it/sec) -training >> step=7301600, episode=1218 reward=0.7929036 (494.85 it/sec) -training >> step=7301700, episode=1218 reward=0.7708467 (496.82 it/sec) -training >> step=7301800, episode=1218 reward=0.8054408 (489.17 it/sec) -training >> step=7301900, episode=1218 reward=0.7928776 (517.37 it/sec) -training >> step=7302000, episode=1218 reward=0.7961682 (505.85 it/sec) -training >> step=7302100, episode=1218 reward=0.7960324 (486.51 it/sec) -training >> step=7302200, episode=1218 reward=0.7874699 (500.40 it/sec) -training >> step=7302300, episode=1218 reward=0.7727026 (507.63 it/sec) -training >> step=7302400, episode=1218 reward=0.7790905 (490.59 it/sec) -training >> step=7302500, episode=1218 reward=0.7866546 (478.42 it/sec) -training >> step=7302600, episode=1218 reward=0.7933882 (525.01 it/sec) -training >> step=7302700, episode=1218 reward=0.7961172 (522.22 it/sec) -training >> step=7302800, episode=1218 reward=0.7823951 (523.15 it/sec) -training >> step=7302900, episode=1218 reward=0.7909339 (530.40 it/sec) -training >> step=7303000, episode=1218 reward=0.7837452 (540.93 it/sec) -training >> step=7303100, episode=1218 reward=0.7870101 (479.70 it/sec) -training >> step=7303200, episode=1218 reward=0.8013607 (509.21 it/sec) -training >> step=7303300, episode=1218 reward=0.766421 (454.03 it/sec) -training >> step=7303400, episode=1218 reward=0.7650077 (499.07 it/sec) -training >> step=7303500, episode=1218 reward=0.7790883 (525.08 it/sec) -training >> step=7303600, episode=1218 reward=0.7747954 (496.37 it/sec) -training >> step=7303700, episode=1218 reward=0.7745203 (550.03 it/sec) -training >> step=7303800, episode=1218 reward=0.7857082 (509.36 it/sec) -training >> step=7303900, episode=1218 reward=0.7804693 (500.17 it/sec) -training >> step=7304000, episode=1218 reward=0.7701517 (562.30 it/sec) -training >> step=7304100, episode=1218 reward=0.7915083 (471.20 it/sec) -training >> step=7304200, episode=1218 reward=0.8046293 (524.86 it/sec) -training >> step=7304300, episode=1218 reward=0.799929 (511.33 it/sec) -training >> step=7304400, episode=1218 reward=0.7763454 (493.65 it/sec) -training >> step=7304500, episode=1218 reward=0.7709292 (521.39 it/sec) -training >> step=7304600, episode=1218 reward=0.8112192 (492.81 it/sec) -training >> step=7304700, episode=1218 reward=0.7936403 (520.82 it/sec) -training >> step=7304800, episode=1218 reward=0.7774636 (552.15 it/sec) -training >> step=7304900, episode=1218 reward=0.7864252 (505.79 it/sec) -training >> step=7305000, episode=1218 reward=0.7833633 (398.68 it/sec) -training >> step=7305100, episode=1218 reward=0.7840577 (519.43 it/sec) -training >> step=7305200, episode=1218 reward=0.7820321 (495.19 it/sec) -training >> step=7305300, episode=1218 reward=0.8032647 (514.29 it/sec) -training >> step=7305400, episode=1218 reward=0.7937996 (527.95 it/sec) -training >> step=7305500, episode=1218 reward=0.7966494 (515.54 it/sec) -training >> step=7305600, episode=1218 reward=0.7922922 (505.20 it/sec) -training >> step=7305700, episode=1218 reward=0.7761365 (462.15 it/sec) -training >> step=7305800, episode=1218 reward=0.7665673 (517.02 it/sec) -training >> step=7305900, episode=1218 reward=0.777393 (483.77 it/sec) -training >> step=7306000, episode=1218 reward=0.7840292 (517.28 it/sec) -training >> step=7306100, episode=1218 reward=0.7766101 (526.62 it/sec) -training >> step=7306200, episode=1218 reward=0.8094002 (501.60 it/sec) -training >> step=7306300, episode=1218 reward=0.7849498 (475.11 it/sec) -training >> step=7306400, episode=1218 reward=0.7868818 (527.41 it/sec) -training >> step=7306500, episode=1218 reward=0.7887869 (500.79 it/sec) -training >> step=7306600, episode=1218 reward=0.7842924 (540.29 it/sec) -training >> step=7306700, episode=1218 reward=0.794008 (517.74 it/sec) -training >> step=7306800, episode=1218 reward=0.7988742 (496.18 it/sec) -training >> step=7306900, episode=1218 reward=0.7814084 (535.93 it/sec) -training >> step=7307000, episode=1218 reward=0.7933853 (519.06 it/sec) -training >> step=7307100, episode=1218 reward=0.7720805 (491.01 it/sec) -training >> step=7307200, episode=1218 reward=0.8093846 (446.96 it/sec) -training >> step=7307300, episode=1219 reward=0.7788238 (133.96 it/sec) -training >> step=7307400, episode=1219 reward=0.7719319 (517.21 it/sec) -training >> step=7307500, episode=1219 reward=0.7804909 (505.21 it/sec) -training >> step=7307600, episode=1219 reward=0.7899403 (522.69 it/sec) -training >> step=7307700, episode=1219 reward=0.7802727 (495.88 it/sec) -training >> step=7307800, episode=1219 reward=0.7839243 (529.91 it/sec) -training >> step=7307900, episode=1219 reward=0.8018801 (468.66 it/sec) -training >> step=7308000, episode=1219 reward=0.7726027 (430.50 it/sec) -training >> step=7308100, episode=1219 reward=0.7820835 (448.25 it/sec) -training >> step=7308200, episode=1219 reward=0.7876354 (461.66 it/sec) -training >> step=7308300, episode=1219 reward=0.7996724 (494.98 it/sec) -training >> step=7308400, episode=1219 reward=0.7815565 (477.42 it/sec) -training >> step=7308500, episode=1219 reward=0.8010699 (424.05 it/sec) -training >> step=7308600, episode=1219 reward=0.8041561 (454.47 it/sec) -training >> step=7308700, episode=1219 reward=0.8048744 (448.91 it/sec) -training >> step=7308800, episode=1219 reward=0.7973992 (505.48 it/sec) -training >> step=7308900, episode=1219 reward=0.807685 (482.56 it/sec) -training >> step=7309000, episode=1219 reward=0.7875315 (418.76 it/sec) -training >> step=7309100, episode=1219 reward=0.786757 (416.50 it/sec) -training >> step=7309200, episode=1219 reward=0.7668166 (404.62 it/sec) -training >> step=7309300, episode=1219 reward=0.7823964 (474.74 it/sec) -training >> step=7309400, episode=1219 reward=0.7951419 (440.65 it/sec) -training >> step=7309500, episode=1219 reward=0.7829394 (446.49 it/sec) -training >> step=7309600, episode=1219 reward=0.7871601 (421.85 it/sec) -training >> step=7309700, episode=1219 reward=0.8068041 (486.56 it/sec) -training >> step=7309800, episode=1219 reward=0.7794399 (433.11 it/sec) -training >> step=7309900, episode=1219 reward=0.7961035 (482.73 it/sec) -training >> step=7310000, episode=1219 reward=0.7718832 (426.09 it/sec) -training >> step=7310100, episode=1219 reward=0.7992984 (514.87 it/sec) -training >> step=7310200, episode=1219 reward=0.7992783 (500.40 it/sec) -training >> step=7310300, episode=1219 reward=0.7814493 (477.62 it/sec) -training >> step=7310400, episode=1219 reward=0.7905554 (426.79 it/sec) -training >> step=7310500, episode=1219 reward=0.7981636 (456.90 it/sec) -training >> step=7310600, episode=1219 reward=0.8099337 (438.20 it/sec) -training >> step=7310700, episode=1219 reward=0.7846959 (483.01 it/sec) -training >> step=7310800, episode=1219 reward=0.805721 (425.34 it/sec) -training >> step=7310900, episode=1219 reward=0.8011503 (412.86 it/sec) -training >> step=7311000, episode=1219 reward=0.8015396 (476.29 it/sec) -training >> step=7311100, episode=1219 reward=0.7995135 (349.69 it/sec) -training >> step=7311200, episode=1219 reward=0.7808356 (536.37 it/sec) -training >> step=7311300, episode=1219 reward=0.7876126 (504.60 it/sec) -training >> step=7311400, episode=1219 reward=0.7775416 (503.35 it/sec) -training >> step=7311500, episode=1219 reward=0.775825 (481.57 it/sec) -training >> step=7311600, episode=1219 reward=0.7779713 (502.73 it/sec) -training >> step=7311700, episode=1219 reward=0.7679822 (412.54 it/sec) -training >> step=7311800, episode=1219 reward=0.8084815 (466.35 it/sec) -training >> step=7311900, episode=1219 reward=0.7913604 (479.69 it/sec) -training >> step=7312000, episode=1219 reward=0.7602773 (521.47 it/sec) -training >> step=7312100, episode=1219 reward=0.8020757 (503.11 it/sec) -training >> step=7312200, episode=1219 reward=0.785305 (488.81 it/sec) -training >> step=7312300, episode=1219 reward=0.7949677 (450.37 it/sec) -training >> step=7312400, episode=1219 reward=0.7753131 (409.55 it/sec) -training >> step=7312500, episode=1219 reward=0.7877098 (430.93 it/sec) -training >> step=7312600, episode=1219 reward=0.7811157 (398.68 it/sec) -training >> step=7312700, episode=1219 reward=0.7906187 (513.70 it/sec) -training >> step=7312800, episode=1219 reward=0.7757218 (510.32 it/sec) -training >> step=7312900, episode=1219 reward=0.7779782 (514.66 it/sec) -training >> step=7313000, episode=1219 reward=0.7733592 (534.71 it/sec) -training >> step=7313100, episode=1219 reward=0.7886872 (484.84 it/sec) -training >> step=7313200, episode=1219 reward=0.8081433 (484.28 it/sec) -training >> step=7313300, episode=1220 reward=0.7750743 (99.50 it/sec) -training >> step=7313400, episode=1220 reward=0.75861 (491.34 it/sec) -training >> step=7313500, episode=1220 reward=0.7773076 (461.22 it/sec) -training >> step=7313600, episode=1220 reward=0.7894654 (469.41 it/sec) -training >> step=7313700, episode=1220 reward=0.7791774 (429.96 it/sec) -training >> step=7313800, episode=1220 reward=0.7740734 (472.19 it/sec) -training >> step=7313900, episode=1220 reward=0.7848428 (507.13 it/sec) -training >> step=7314000, episode=1220 reward=0.7944216 (453.96 it/sec) -training >> step=7314100, episode=1220 reward=0.7756042 (505.74 it/sec) -training >> step=7314200, episode=1220 reward=0.7787766 (459.81 it/sec) -training >> step=7314300, episode=1220 reward=0.8030244 (468.31 it/sec) -training >> step=7314400, episode=1220 reward=0.7931917 (457.95 it/sec) -training >> step=7314500, episode=1220 reward=0.8063006 (449.68 it/sec) -training >> step=7314600, episode=1220 reward=0.7915115 (488.29 it/sec) -training >> step=7314700, episode=1220 reward=0.7994152 (485.90 it/sec) -training >> step=7314800, episode=1220 reward=0.7980433 (467.34 it/sec) -training >> step=7314900, episode=1220 reward=0.7886519 (482.33 it/sec) -training >> step=7315000, episode=1220 reward=0.7970485 (415.99 it/sec) -training >> step=7315100, episode=1220 reward=0.7946306 (448.22 it/sec) -training >> step=7315200, episode=1220 reward=0.7851222 (483.77 it/sec) -training >> step=7315300, episode=1220 reward=0.8015892 (463.34 it/sec) -training >> step=7315400, episode=1220 reward=0.8034383 (504.47 it/sec) -training >> step=7315500, episode=1220 reward=0.7867209 (494.44 it/sec) -training >> step=7315600, episode=1220 reward=0.7784021 (493.24 it/sec) -training >> step=7315700, episode=1220 reward=0.8017154 (490.12 it/sec) -training >> step=7315800, episode=1220 reward=0.7775066 (512.98 it/sec) -training >> step=7315900, episode=1220 reward=0.7781153 (471.30 it/sec) -training >> step=7316000, episode=1220 reward=0.7933479 (404.57 it/sec) -training >> step=7316100, episode=1220 reward=0.7713912 (466.89 it/sec) -training >> step=7316200, episode=1220 reward=0.8041846 (516.97 it/sec) -training >> step=7316300, episode=1220 reward=0.770041 (476.92 it/sec) -training >> step=7316400, episode=1220 reward=0.7751876 (483.16 it/sec) -training >> step=7316500, episode=1220 reward=0.7842169 (488.53 it/sec) -training >> step=7316600, episode=1220 reward=0.786364 (475.69 it/sec) -training >> step=7316700, episode=1220 reward=0.7780787 (483.16 it/sec) -training >> step=7316800, episode=1220 reward=0.7938446 (502.41 it/sec) -training >> step=7316900, episode=1220 reward=0.7813339 (476.77 it/sec) -training >> step=7317000, episode=1220 reward=0.7902238 (468.14 it/sec) -training >> step=7317100, episode=1220 reward=0.8092541 (469.85 it/sec) -training >> step=7317200, episode=1220 reward=0.7919758 (421.46 it/sec) -training >> step=7317300, episode=1220 reward=0.7998452 (471.50 it/sec) -training >> step=7317400, episode=1220 reward=0.7783011 (481.15 it/sec) -training >> step=7317500, episode=1220 reward=0.8045971 (503.83 it/sec) -training >> step=7317600, episode=1220 reward=0.7732899 (501.63 it/sec) -training >> step=7317700, episode=1220 reward=0.7718069 (492.77 it/sec) -training >> step=7317800, episode=1220 reward=0.7781618 (444.25 it/sec) -training >> step=7317900, episode=1220 reward=0.7975013 (488.30 it/sec) -training >> step=7318000, episode=1220 reward=0.8011169 (525.27 it/sec) -training >> step=7318100, episode=1220 reward=0.7824728 (470.89 it/sec) -training >> step=7318200, episode=1220 reward=0.781482 (507.06 it/sec) -training >> step=7318300, episode=1220 reward=0.7735116 (513.33 it/sec) -training >> step=7318400, episode=1220 reward=0.781749 (483.73 it/sec) -training >> step=7318500, episode=1220 reward=0.7890127 (494.83 it/sec) -training >> step=7318600, episode=1220 reward=0.7824827 (486.88 it/sec) -training >> step=7318700, episode=1220 reward=0.788478 (530.84 it/sec) -training >> step=7318800, episode=1220 reward=0.7891607 (491.25 it/sec) -training >> step=7318900, episode=1220 reward=0.7961355 (423.66 it/sec) -training >> step=7319000, episode=1220 reward=0.7823053 (536.27 it/sec) -training >> step=7319100, episode=1220 reward=0.7982154 (503.41 it/sec) -training >> step=7319200, episode=1220 reward=0.7947034 (513.14 it/sec) -training >> step=7319300, episode=1221 reward=0.7960696 (162.34 it/sec) -training >> step=7319400, episode=1221 reward=0.7719659 (492.69 it/sec) -training >> step=7319500, episode=1221 reward=0.7838607 (452.96 it/sec) -training >> step=7319600, episode=1221 reward=0.7800335 (499.16 it/sec) -training >> step=7319700, episode=1221 reward=0.785257 (465.43 it/sec) -training >> step=7319800, episode=1221 reward=0.7806963 (530.84 it/sec) -training >> step=7319900, episode=1221 reward=0.8032753 (498.51 it/sec) -training >> step=7320000, episode=1221 reward=0.79647 (509.31 it/sec) -training >> step=7320100, episode=1221 reward=0.8079894 (495.43 it/sec) -training >> step=7320200, episode=1221 reward=0.7858477 (472.32 it/sec) -training >> step=7320300, episode=1221 reward=0.7895628 (514.53 it/sec) -training >> step=7320400, episode=1221 reward=0.8146976 (488.24 it/sec) -training >> step=7320500, episode=1221 reward=0.7827363 (524.77 it/sec) -training >> step=7320600, episode=1221 reward=0.7903948 (502.71 it/sec) -training >> step=7320700, episode=1221 reward=0.7956858 (454.81 it/sec) -training >> step=7320800, episode=1221 reward=0.8086486 (494.34 it/sec) -training >> step=7320900, episode=1221 reward=0.8004966 (502.79 it/sec) -training >> step=7321000, episode=1221 reward=0.7986607 (485.45 it/sec) -training >> step=7321100, episode=1221 reward=0.7922195 (475.37 it/sec) -training >> step=7321200, episode=1221 reward=0.8035963 (508.80 it/sec) -training >> step=7321300, episode=1221 reward=0.7773023 (478.02 it/sec) -training >> step=7321400, episode=1221 reward=0.7783281 (491.21 it/sec) -training >> step=7321500, episode=1221 reward=0.776498 (512.94 it/sec) -training >> step=7321600, episode=1221 reward=0.7976029 (523.90 it/sec) -training >> step=7321700, episode=1221 reward=0.7894364 (477.63 it/sec) -training >> step=7321800, episode=1221 reward=0.7928884 (470.09 it/sec) -training >> step=7321900, episode=1221 reward=0.8112859 (500.32 it/sec) -training >> step=7322000, episode=1221 reward=0.801586 (520.56 it/sec) -training >> step=7322100, episode=1221 reward=0.8056726 (486.23 it/sec) -training >> step=7322200, episode=1221 reward=0.7861235 (439.97 it/sec) -training >> step=7322300, episode=1221 reward=0.7832056 (489.72 it/sec) -training >> step=7322400, episode=1221 reward=0.785701 (460.90 it/sec) -training >> step=7322500, episode=1221 reward=0.786137 (487.31 it/sec) -training >> step=7322600, episode=1221 reward=0.7938315 (495.30 it/sec) -training >> step=7322700, episode=1221 reward=0.7807109 (505.11 it/sec) -training >> step=7322800, episode=1221 reward=0.7852551 (523.64 it/sec) -training >> step=7322900, episode=1221 reward=0.7901555 (486.23 it/sec) -training >> step=7323000, episode=1221 reward=0.7961752 (518.51 it/sec) -training >> step=7323100, episode=1221 reward=0.7914032 (450.83 it/sec) -training >> step=7323200, episode=1221 reward=0.7739637 (461.82 it/sec) -training >> step=7323300, episode=1221 reward=0.7984153 (370.09 it/sec) -training >> step=7323400, episode=1221 reward=0.8053589 (500.96 it/sec) -training >> step=7323500, episode=1221 reward=0.8013024 (446.55 it/sec) -training >> step=7323600, episode=1221 reward=0.782382 (492.10 it/sec) -training >> step=7323700, episode=1221 reward=0.7683415 (421.74 it/sec) -training >> step=7323800, episode=1221 reward=0.7788698 (494.37 it/sec) -training >> step=7323900, episode=1221 reward=0.7667897 (521.25 it/sec) -training >> step=7324000, episode=1221 reward=0.7928966 (522.69 it/sec) -training >> step=7324100, episode=1221 reward=0.7928419 (523.59 it/sec) -training >> step=7324200, episode=1221 reward=0.76217 (503.71 it/sec) -training >> step=7324300, episode=1221 reward=0.7680584 (475.28 it/sec) -training >> step=7324400, episode=1221 reward=0.7821127 (454.94 it/sec) -training >> step=7324500, episode=1221 reward=0.789823 (485.01 it/sec) -training >> step=7324600, episode=1221 reward=0.7798547 (470.23 it/sec) -training >> step=7324700, episode=1221 reward=0.7768764 (439.11 it/sec) -training >> step=7324800, episode=1221 reward=0.7903289 (516.52 it/sec) -training >> step=7324900, episode=1221 reward=0.7604924 (496.34 it/sec) -training >> step=7325000, episode=1221 reward=0.7646901 (519.96 it/sec) -training >> step=7325100, episode=1221 reward=0.7648797 (539.28 it/sec) -training >> step=7325200, episode=1221 reward=0.7934113 (535.21 it/sec) -training >> step=7325300, episode=1222 reward=0.7736811 (133.17 it/sec) -training >> step=7325400, episode=1222 reward=0.7792606 (455.59 it/sec) -training >> step=7325500, episode=1222 reward=0.7819403 (540.31 it/sec) -training >> step=7325600, episode=1222 reward=0.7916703 (525.56 it/sec) -training >> step=7325700, episode=1222 reward=0.7997107 (527.45 it/sec) -training >> step=7325800, episode=1222 reward=0.787289 (528.53 it/sec) -training >> step=7325900, episode=1222 reward=0.7895938 (495.33 it/sec) -training >> step=7326000, episode=1222 reward=0.783784 (475.95 it/sec) -training >> step=7326100, episode=1222 reward=0.7933038 (503.10 it/sec) -training >> step=7326200, episode=1222 reward=0.7892642 (506.74 it/sec) -training >> step=7326300, episode=1222 reward=0.7792152 (553.91 it/sec) -training >> step=7326400, episode=1222 reward=0.7906365 (524.16 it/sec) -training >> step=7326500, episode=1222 reward=0.7790628 (468.92 it/sec) -training >> step=7326600, episode=1222 reward=0.8028455 (481.13 it/sec) -training >> step=7326700, episode=1222 reward=0.7922128 (507.66 it/sec) -training >> step=7326800, episode=1222 reward=0.7797859 (506.94 it/sec) -training >> step=7326900, episode=1222 reward=0.8026761 (501.54 it/sec) -training >> step=7327000, episode=1222 reward=0.8045588 (519.99 it/sec) -training >> step=7327100, episode=1222 reward=0.767217 (483.10 it/sec) -training >> step=7327200, episode=1222 reward=0.7915573 (516.21 it/sec) -training >> step=7327300, episode=1222 reward=0.7949697 (531.32 it/sec) -training >> step=7327400, episode=1222 reward=0.7840426 (519.54 it/sec) -training >> step=7327500, episode=1222 reward=0.7962817 (529.23 it/sec) -training >> step=7327600, episode=1222 reward=0.7804122 (524.49 it/sec) -training >> step=7327700, episode=1222 reward=0.7743546 (547.53 it/sec) -training >> step=7327800, episode=1222 reward=0.7674786 (521.92 it/sec) -training >> step=7327900, episode=1222 reward=0.7825106 (498.41 it/sec) -training >> step=7328000, episode=1222 reward=0.8125653 (514.46 it/sec) -training >> step=7328100, episode=1222 reward=0.7945602 (536.41 it/sec) -training >> step=7328200, episode=1222 reward=0.7855524 (528.61 it/sec) -training >> step=7328300, episode=1222 reward=0.7964631 (491.20 it/sec) -training >> step=7328400, episode=1222 reward=0.7727272 (510.60 it/sec) -training >> step=7328500, episode=1222 reward=0.8020966 (479.43 it/sec) -training >> step=7328600, episode=1222 reward=0.7934394 (492.71 it/sec) -training >> step=7328700, episode=1222 reward=0.7933379 (514.21 it/sec) -training >> step=7328800, episode=1222 reward=0.792118 (516.22 it/sec) -training >> step=7328900, episode=1222 reward=0.7979483 (523.97 it/sec) -training >> step=7329000, episode=1222 reward=0.786164 (417.14 it/sec) -training >> step=7329100, episode=1222 reward=0.7697637 (489.31 it/sec) -training >> step=7329200, episode=1222 reward=0.7987317 (534.59 it/sec) -training >> step=7329300, episode=1222 reward=0.784206 (474.25 it/sec) -training >> step=7329400, episode=1222 reward=0.7972389 (452.66 it/sec) -training >> step=7329500, episode=1222 reward=0.7811734 (389.30 it/sec) -training >> step=7329600, episode=1222 reward=0.7987875 (498.03 it/sec) -training >> step=7329700, episode=1222 reward=0.7825523 (463.67 it/sec) -training >> step=7329800, episode=1222 reward=0.7854176 (499.36 it/sec) -training >> step=7329900, episode=1222 reward=0.7783167 (447.37 it/sec) -training >> step=7330000, episode=1222 reward=0.7917883 (498.59 it/sec) -training >> step=7330100, episode=1222 reward=0.7870023 (504.26 it/sec) -training >> step=7330200, episode=1222 reward=0.7547127 (520.07 it/sec) -training >> step=7330300, episode=1222 reward=0.7816531 (464.62 it/sec) -training >> step=7330400, episode=1222 reward=0.8071222 (446.35 it/sec) -training >> step=7330500, episode=1222 reward=0.7747909 (463.56 it/sec) -training >> step=7330600, episode=1222 reward=0.7733847 (511.70 it/sec) -training >> step=7330700, episode=1222 reward=0.7994419 (491.47 it/sec) -training >> step=7330800, episode=1222 reward=0.7781014 (498.43 it/sec) -training >> step=7330900, episode=1222 reward=0.8001705 (503.00 it/sec) -training >> step=7331000, episode=1222 reward=0.7885811 (505.11 it/sec) -training >> step=7331100, episode=1222 reward=0.782497 (500.58 it/sec) -training >> step=7331200, episode=1222 reward=0.7838066 (453.15 it/sec) -training >> step=7331300, episode=1223 reward=0.7849155 (70.82 it/sec) -training >> step=7331400, episode=1223 reward=0.780369 (493.78 it/sec) -training >> step=7331500, episode=1223 reward=0.7914549 (499.77 it/sec) -training >> step=7331600, episode=1223 reward=0.7878652 (544.40 it/sec) -training >> step=7331700, episode=1223 reward=0.7729809 (466.07 it/sec) -training >> step=7331800, episode=1223 reward=0.7799972 (426.03 it/sec) -training >> step=7331900, episode=1223 reward=0.7948304 (481.08 it/sec) -training >> step=7332000, episode=1223 reward=0.7889025 (488.03 it/sec) -training >> step=7332100, episode=1223 reward=0.7975983 (504.42 it/sec) -training >> step=7332200, episode=1223 reward=0.7878921 (510.09 it/sec) -training >> step=7332300, episode=1223 reward=0.7925586 (507.80 it/sec) -training >> step=7332400, episode=1223 reward=0.7924144 (492.10 it/sec) -training >> step=7332500, episode=1223 reward=0.7811707 (551.81 it/sec) -training >> step=7332600, episode=1223 reward=0.7905048 (526.11 it/sec) -training >> step=7332700, episode=1223 reward=0.7879584 (512.03 it/sec) -training >> step=7332800, episode=1223 reward=0.8014514 (497.78 it/sec) -training >> step=7332900, episode=1223 reward=0.7851631 (510.97 it/sec) -training >> step=7333000, episode=1223 reward=0.7986282 (481.75 it/sec) -training >> step=7333100, episode=1223 reward=0.7809888 (481.32 it/sec) -training >> step=7333200, episode=1223 reward=0.7775686 (452.37 it/sec) -training >> step=7333300, episode=1223 reward=0.8093624 (529.88 it/sec) -training >> step=7333400, episode=1223 reward=0.7863035 (492.81 it/sec) -training >> step=7333500, episode=1223 reward=0.7728673 (486.95 it/sec) -training >> step=7333600, episode=1223 reward=0.7857544 (526.70 it/sec) -training >> step=7333700, episode=1223 reward=0.7841768 (489.00 it/sec) -training >> step=7333800, episode=1223 reward=0.7848266 (518.68 it/sec) -training >> step=7333900, episode=1223 reward=0.7971291 (472.25 it/sec) -training >> step=7334000, episode=1223 reward=0.8001597 (546.60 it/sec) -training >> step=7334100, episode=1223 reward=0.7897284 (496.02 it/sec) -training >> step=7334200, episode=1223 reward=0.7931268 (511.83 it/sec) -training >> step=7334300, episode=1223 reward=0.7860253 (520.58 it/sec) -training >> step=7334400, episode=1223 reward=0.7954603 (497.23 it/sec) -training >> step=7334500, episode=1223 reward=0.7927454 (502.99 it/sec) -training >> step=7334600, episode=1223 reward=0.7812588 (526.97 it/sec) -training >> step=7334700, episode=1223 reward=0.7897549 (507.61 it/sec) -training >> step=7334800, episode=1223 reward=0.7809973 (488.83 it/sec) -training >> step=7334900, episode=1223 reward=0.7991434 (484.33 it/sec) -training >> step=7335000, episode=1223 reward=0.7616182 (451.07 it/sec) -training >> step=7335100, episode=1223 reward=0.7843145 (471.42 it/sec) -training >> step=7335200, episode=1223 reward=0.7963997 (480.37 it/sec) -training >> step=7335300, episode=1223 reward=0.7811078 (461.79 it/sec) -training >> step=7335400, episode=1223 reward=0.7907658 (502.92 it/sec) -training >> step=7335500, episode=1223 reward=0.7677276 (458.51 it/sec) -training >> step=7335600, episode=1223 reward=0.7804914 (466.56 it/sec) -training >> step=7335700, episode=1223 reward=0.7797065 (464.86 it/sec) -training >> step=7335800, episode=1223 reward=0.780233 (432.57 it/sec) -training >> step=7335900, episode=1223 reward=0.7790381 (348.65 it/sec) -training >> step=7336000, episode=1223 reward=0.7912447 (470.01 it/sec) -training >> step=7336100, episode=1223 reward=0.7960025 (436.83 it/sec) -training >> step=7336200, episode=1223 reward=0.7654561 (508.27 it/sec) -training >> step=7336300, episode=1223 reward=0.7968708 (448.81 it/sec) -training >> step=7336400, episode=1223 reward=0.7798297 (479.30 it/sec) -training >> step=7336500, episode=1223 reward=0.7720433 (493.19 it/sec) -training >> step=7336600, episode=1223 reward=0.7807385 (493.34 it/sec) -training >> step=7336700, episode=1223 reward=0.7806891 (481.75 it/sec) -training >> step=7336800, episode=1223 reward=0.7689976 (501.00 it/sec) -training >> step=7336900, episode=1223 reward=0.7915255 (502.29 it/sec) -training >> step=7337000, episode=1223 reward=0.7710642 (495.89 it/sec) -training >> step=7337100, episode=1223 reward=0.7961047 (467.00 it/sec) -training >> step=7337200, episode=1223 reward=0.7879581 (504.77 it/sec) -training >> step=7337300, episode=1224 reward=0.7776234 (52.72 it/sec) -training >> step=7337400, episode=1224 reward=0.769644 (489.82 it/sec) -training >> step=7337500, episode=1224 reward=0.7824202 (496.41 it/sec) -training >> step=7337600, episode=1224 reward=0.7893139 (442.44 it/sec) -training >> step=7337700, episode=1224 reward=0.7915468 (456.75 it/sec) -training >> step=7337800, episode=1224 reward=0.7916914 (527.27 it/sec) -training >> step=7337900, episode=1224 reward=0.7519866 (530.40 it/sec) -training >> step=7338000, episode=1224 reward=0.7614906 (508.16 it/sec) -training >> step=7338100, episode=1224 reward=0.7938098 (516.88 it/sec) -training >> step=7338200, episode=1224 reward=0.7996553 (502.98 it/sec) -training >> step=7338300, episode=1224 reward=0.7672862 (536.47 it/sec) -training >> step=7338400, episode=1224 reward=0.7964138 (503.20 it/sec) -training >> step=7338500, episode=1224 reward=0.7689509 (538.12 it/sec) -training >> step=7338600, episode=1224 reward=0.7889094 (495.39 it/sec) -training >> step=7338700, episode=1224 reward=0.8081521 (518.58 it/sec) -training >> step=7338800, episode=1224 reward=0.7765138 (468.87 it/sec) -training >> step=7338900, episode=1224 reward=0.7979323 (433.89 it/sec) -training >> step=7339000, episode=1224 reward=0.7905897 (484.46 it/sec) -training >> step=7339100, episode=1224 reward=0.7763182 (451.47 it/sec) -training >> step=7339200, episode=1224 reward=0.7895379 (422.71 it/sec) -training >> step=7339300, episode=1224 reward=0.791164 (467.12 it/sec) -training >> step=7339400, episode=1224 reward=0.7937265 (451.87 it/sec) -training >> step=7339500, episode=1224 reward=0.7887037 (407.19 it/sec) -training >> step=7339600, episode=1224 reward=0.7856318 (460.63 it/sec) -training >> step=7339700, episode=1224 reward=0.7760178 (408.69 it/sec) -training >> step=7339800, episode=1224 reward=0.7908764 (452.35 it/sec) -training >> step=7339900, episode=1224 reward=0.774088 (469.68 it/sec) -training >> step=7340000, episode=1224 reward=0.7776482 (469.69 it/sec) -training >> step=7340100, episode=1224 reward=0.7897809 (440.39 it/sec) -training >> step=7340200, episode=1224 reward=0.7846274 (482.62 it/sec) -training >> step=7340300, episode=1224 reward=0.7896775 (502.66 it/sec) -training >> step=7340400, episode=1224 reward=0.7783471 (484.77 it/sec) -training >> step=7340500, episode=1224 reward=0.7862413 (456.03 it/sec) -training >> step=7340600, episode=1224 reward=0.7943622 (471.91 it/sec) -training >> step=7340700, episode=1224 reward=0.8028184 (454.84 it/sec) -training >> step=7340800, episode=1224 reward=0.7886214 (496.92 it/sec) -training >> step=7340900, episode=1224 reward=0.7822193 (513.55 it/sec) -training >> step=7341000, episode=1224 reward=0.7960951 (486.53 it/sec) -training >> step=7341100, episode=1224 reward=0.7948964 (476.13 it/sec) -training >> step=7341200, episode=1224 reward=0.7935647 (484.73 it/sec) -training >> step=7341300, episode=1224 reward=0.7895061 (511.31 it/sec) -training >> step=7341400, episode=1224 reward=0.7771828 (485.32 it/sec) -training >> step=7341500, episode=1224 reward=0.7919824 (466.69 it/sec) -training >> step=7341600, episode=1224 reward=0.7664213 (507.27 it/sec) -training >> step=7341700, episode=1224 reward=0.7979374 (480.78 it/sec) -training >> step=7341800, episode=1224 reward=0.7897695 (478.72 it/sec) -training >> step=7341900, episode=1224 reward=0.7918639 (511.52 it/sec) -training >> step=7342000, episode=1224 reward=0.7971752 (494.07 it/sec) -training >> step=7342100, episode=1224 reward=0.784089 (386.48 it/sec) -training >> step=7342200, episode=1224 reward=0.796149 (499.18 it/sec) -training >> step=7342300, episode=1224 reward=0.7808493 (472.89 it/sec) -training >> step=7342400, episode=1224 reward=0.7859282 (480.25 it/sec) -training >> step=7342500, episode=1224 reward=0.7914969 (471.67 it/sec) -training >> step=7342600, episode=1224 reward=0.7834746 (465.15 it/sec) -training >> step=7342700, episode=1224 reward=0.7894683 (506.39 it/sec) -training >> step=7342800, episode=1224 reward=0.7820252 (491.82 it/sec) -training >> step=7342900, episode=1224 reward=0.8036668 (483.68 it/sec) -training >> step=7343000, episode=1224 reward=0.8074241 (524.61 it/sec) -training >> step=7343100, episode=1224 reward=0.7960702 (485.40 it/sec) -training >> step=7343200, episode=1224 reward=0.7733057 (453.35 it/sec) -training >> step=7343300, episode=1225 reward=0.7704971 (97.19 it/sec) -training >> step=7343400, episode=1225 reward=0.7747099 (508.97 it/sec) -training >> step=7343500, episode=1225 reward=0.7969173 (465.22 it/sec) -training >> step=7343600, episode=1225 reward=0.7883314 (490.72 it/sec) -training >> step=7343700, episode=1225 reward=0.7910007 (510.68 it/sec) -training >> step=7343800, episode=1225 reward=0.8019615 (470.61 it/sec) -training >> step=7343900, episode=1225 reward=0.7845265 (490.50 it/sec) -training >> step=7344000, episode=1225 reward=0.7712342 (518.30 it/sec) -training >> step=7344100, episode=1225 reward=0.7751318 (474.14 it/sec) -training >> step=7344200, episode=1225 reward=0.8113828 (495.34 it/sec) -training >> step=7344300, episode=1225 reward=0.7668 (509.64 it/sec) -training >> step=7344400, episode=1225 reward=0.792161 (491.63 it/sec) -training >> step=7344500, episode=1225 reward=0.7887565 (490.63 it/sec) -training >> step=7344600, episode=1225 reward=0.8000624 (436.06 it/sec) -training >> step=7344700, episode=1225 reward=0.7789218 (508.11 it/sec) -training >> step=7344800, episode=1225 reward=0.8060023 (496.11 it/sec) -training >> step=7344900, episode=1225 reward=0.7728186 (499.44 it/sec) -training >> step=7345000, episode=1225 reward=0.8047297 (486.31 it/sec) -training >> step=7345100, episode=1225 reward=0.7848924 (472.55 it/sec) -training >> step=7345200, episode=1225 reward=0.7874953 (505.03 it/sec) -training >> step=7345300, episode=1225 reward=0.7979228 (475.51 it/sec) -training >> step=7345400, episode=1225 reward=0.7952849 (505.09 it/sec) -training >> step=7345500, episode=1225 reward=0.7781327 (515.01 it/sec) -training >> step=7345600, episode=1225 reward=0.7795715 (470.48 it/sec) -training >> step=7345700, episode=1225 reward=0.7918165 (493.98 it/sec) -training >> step=7345800, episode=1225 reward=0.7996074 (494.10 it/sec) -training >> step=7345900, episode=1225 reward=0.8041027 (490.20 it/sec) -training >> step=7346000, episode=1225 reward=0.7829033 (515.44 it/sec) -training >> step=7346100, episode=1225 reward=0.8030553 (477.59 it/sec) -training >> step=7346200, episode=1225 reward=0.7881659 (527.37 it/sec) -training >> step=7346300, episode=1225 reward=0.7941669 (487.78 it/sec) -training >> step=7346400, episode=1225 reward=0.8068104 (479.69 it/sec) -training >> step=7346500, episode=1225 reward=0.7910025 (467.68 it/sec) -training >> step=7346600, episode=1225 reward=0.766303 (474.42 it/sec) -training >> step=7346700, episode=1225 reward=0.7779264 (489.78 it/sec) -training >> step=7346800, episode=1225 reward=0.8116241 (484.51 it/sec) -training >> step=7346900, episode=1225 reward=0.8136865 (480.51 it/sec) -training >> step=7347000, episode=1225 reward=0.8162412 (482.23 it/sec) -training >> step=7347100, episode=1225 reward=0.7733735 (491.25 it/sec) -training >> step=7347200, episode=1225 reward=0.7644187 (468.75 it/sec) -training >> step=7347300, episode=1225 reward=0.7794563 (540.97 it/sec) -training >> step=7347400, episode=1225 reward=0.7984155 (478.12 it/sec) -training >> step=7347500, episode=1225 reward=0.784313 (452.29 it/sec) -training >> step=7347600, episode=1225 reward=0.7860634 (481.67 it/sec) -training >> step=7347700, episode=1225 reward=0.7820762 (499.13 it/sec) -training >> step=7347800, episode=1225 reward=0.802307 (493.95 it/sec) -training >> step=7347900, episode=1225 reward=0.7709204 (510.86 it/sec) -training >> step=7348000, episode=1225 reward=0.7707705 (497.21 it/sec) -training >> step=7348100, episode=1225 reward=0.8013151 (456.15 it/sec) -training >> step=7348200, episode=1225 reward=0.7793416 (490.35 it/sec) -training >> step=7348300, episode=1225 reward=0.7763741 (356.34 it/sec) -training >> step=7348400, episode=1225 reward=0.7753154 (529.14 it/sec) -training >> step=7348500, episode=1225 reward=0.7838341 (494.41 it/sec) -training >> step=7348600, episode=1225 reward=0.7842036 (497.71 it/sec) -training >> step=7348700, episode=1225 reward=0.7833913 (516.78 it/sec) -training >> step=7348800, episode=1225 reward=0.7874333 (473.02 it/sec) -training >> step=7348900, episode=1225 reward=0.7838442 (481.60 it/sec) -training >> step=7349000, episode=1225 reward=0.7791908 (459.97 it/sec) -training >> step=7349100, episode=1225 reward=0.7802441 (515.61 it/sec) -training >> step=7349200, episode=1225 reward=0.7799746 (500.86 it/sec) -training >> step=7349300, episode=1226 reward=0.7897931 (90.02 it/sec) -training >> step=7349400, episode=1226 reward=0.7910023 (480.03 it/sec) -training >> step=7349500, episode=1226 reward=0.7666921 (451.23 it/sec) -training >> step=7349600, episode=1226 reward=0.7779964 (475.30 it/sec) -training >> step=7349700, episode=1226 reward=0.7950395 (509.82 it/sec) -training >> step=7349800, episode=1226 reward=0.793447 (497.16 it/sec) -training >> step=7349900, episode=1226 reward=0.7925904 (461.26 it/sec) -training >> step=7350000, episode=1226 reward=0.7976095 (454.05 it/sec) -training >> step=7350100, episode=1226 reward=0.7977297 (487.91 it/sec) -training >> step=7350200, episode=1226 reward=0.7693532 (444.73 it/sec) -training >> step=7350300, episode=1226 reward=0.7946789 (494.79 it/sec) -training >> step=7350400, episode=1226 reward=0.7926619 (508.16 it/sec) -training >> step=7350500, episode=1226 reward=0.8039546 (500.46 it/sec) -training >> step=7350600, episode=1226 reward=0.7963604 (451.69 it/sec) -training >> step=7350700, episode=1226 reward=0.7836623 (430.63 it/sec) -training >> step=7350800, episode=1226 reward=0.7927452 (486.48 it/sec) -training >> step=7350900, episode=1226 reward=0.7710521 (478.78 it/sec) -training >> step=7351000, episode=1226 reward=0.8007346 (497.66 it/sec) -training >> step=7351100, episode=1226 reward=0.7944563 (488.66 it/sec) -training >> step=7351200, episode=1226 reward=0.7849355 (469.56 it/sec) -training >> step=7351300, episode=1226 reward=0.7939987 (477.95 it/sec) -training >> step=7351400, episode=1226 reward=0.7643644 (513.60 it/sec) -training >> step=7351500, episode=1226 reward=0.7962345 (463.70 it/sec) -training >> step=7351600, episode=1226 reward=0.7956757 (492.36 it/sec) -training >> step=7351700, episode=1226 reward=0.7995602 (478.71 it/sec) -training >> step=7351800, episode=1226 reward=0.7852893 (513.61 it/sec) -training >> step=7351900, episode=1226 reward=0.7866225 (467.26 it/sec) -training >> step=7352000, episode=1226 reward=0.774301 (459.73 it/sec) -training >> step=7352100, episode=1226 reward=0.7774435 (469.33 it/sec) -training >> step=7352200, episode=1226 reward=0.7932615 (508.95 it/sec) -training >> step=7352300, episode=1226 reward=0.8003714 (495.11 it/sec) -training >> step=7352400, episode=1226 reward=0.7761866 (407.96 it/sec) -training >> step=7352500, episode=1226 reward=0.7810035 (491.71 it/sec) -training >> step=7352600, episode=1226 reward=0.7798771 (451.70 it/sec) -training >> step=7352700, episode=1226 reward=0.7909094 (471.02 it/sec) -training >> step=7352800, episode=1226 reward=0.7921346 (470.33 it/sec) -training >> step=7352900, episode=1226 reward=0.7979685 (537.05 it/sec) -training >> step=7353000, episode=1226 reward=0.8072723 (467.89 it/sec) -training >> step=7353100, episode=1226 reward=0.7995369 (478.48 it/sec) -training >> step=7353200, episode=1226 reward=0.7840415 (460.73 it/sec) -training >> step=7353300, episode=1226 reward=0.7888079 (483.55 it/sec) -training >> step=7353400, episode=1226 reward=0.7745759 (508.51 it/sec) -training >> step=7353500, episode=1226 reward=0.7937817 (442.87 it/sec) -training >> step=7353600, episode=1226 reward=0.7881551 (521.80 it/sec) -training >> step=7353700, episode=1226 reward=0.795568 (487.63 it/sec) -training >> step=7353800, episode=1226 reward=0.7988399 (508.67 it/sec) -training >> step=7353900, episode=1226 reward=0.7765213 (476.05 it/sec) -training >> step=7354000, episode=1226 reward=0.784953 (506.99 it/sec) -training >> step=7354100, episode=1226 reward=0.7844011 (475.01 it/sec) -training >> step=7354200, episode=1226 reward=0.7812911 (484.04 it/sec) -training >> step=7354300, episode=1226 reward=0.7835645 (495.54 it/sec) -training >> step=7354400, episode=1226 reward=0.7973105 (518.65 it/sec) -training >> step=7354500, episode=1226 reward=0.7772682 (402.79 it/sec) -training >> step=7354600, episode=1226 reward=0.7980664 (481.48 it/sec) -training >> step=7354700, episode=1226 reward=0.7809083 (505.11 it/sec) -training >> step=7354800, episode=1226 reward=0.762669 (492.65 it/sec) -training >> step=7354900, episode=1226 reward=0.7810794 (467.41 it/sec) -training >> step=7355000, episode=1226 reward=0.7867242 (495.48 it/sec) -training >> step=7355100, episode=1226 reward=0.7888826 (526.90 it/sec) -training >> step=7355200, episode=1226 reward=0.8047616 (489.01 it/sec) -training >> step=7355300, episode=1227 reward=0.7962551 (92.05 it/sec) -training >> step=7355400, episode=1227 reward=0.7787431 (494.91 it/sec) -training >> step=7355500, episode=1227 reward=0.7969308 (484.68 it/sec) -training >> step=7355600, episode=1227 reward=0.7853229 (490.93 it/sec) -training >> step=7355700, episode=1227 reward=0.7938581 (497.68 it/sec) -training >> step=7355800, episode=1227 reward=0.7742547 (530.52 it/sec) -training >> step=7355900, episode=1227 reward=0.7943703 (477.67 it/sec) -training >> step=7356000, episode=1227 reward=0.7844136 (476.93 it/sec) -training >> step=7356100, episode=1227 reward=0.7923334 (522.80 it/sec) -training >> step=7356200, episode=1227 reward=0.8040617 (442.12 it/sec) -training >> step=7356300, episode=1227 reward=0.8010299 (415.31 it/sec) -training >> step=7356400, episode=1227 reward=0.7845594 (508.70 it/sec) -training >> step=7356500, episode=1227 reward=0.7936693 (467.33 it/sec) -training >> step=7356600, episode=1227 reward=0.784112 (503.74 it/sec) -training >> step=7356700, episode=1227 reward=0.7817696 (454.78 it/sec) -training >> step=7356800, episode=1227 reward=0.7775487 (465.37 it/sec) -training >> step=7356900, episode=1227 reward=0.7959725 (483.01 it/sec) -training >> step=7357000, episode=1227 reward=0.7942332 (486.55 it/sec) -training >> step=7357100, episode=1227 reward=0.7803192 (451.18 it/sec) -training >> step=7357200, episode=1227 reward=0.7924802 (486.47 it/sec) -training >> step=7357300, episode=1227 reward=0.7845815 (460.56 it/sec) -training >> step=7357400, episode=1227 reward=0.7954872 (499.63 it/sec) -training >> step=7357500, episode=1227 reward=0.782769 (484.37 it/sec) -training >> step=7357600, episode=1227 reward=0.7828953 (486.88 it/sec) -training >> step=7357700, episode=1227 reward=0.7781241 (488.39 it/sec) -training >> step=7357800, episode=1227 reward=0.7851253 (436.64 it/sec) -training >> step=7357900, episode=1227 reward=0.8070148 (505.03 it/sec) -training >> step=7358000, episode=1227 reward=0.7849242 (477.12 it/sec) -training >> step=7358100, episode=1227 reward=0.7871873 (488.48 it/sec) -training >> step=7358200, episode=1227 reward=0.7716615 (467.83 it/sec) -training >> step=7358300, episode=1227 reward=0.7979404 (458.79 it/sec) -training >> step=7358400, episode=1227 reward=0.7915348 (453.03 it/sec) -training >> step=7358500, episode=1227 reward=0.804299 (426.98 it/sec) -training >> step=7358600, episode=1227 reward=0.7814622 (471.96 it/sec) -training >> step=7358700, episode=1227 reward=0.7864242 (451.36 it/sec) -training >> step=7358800, episode=1227 reward=0.773249 (513.29 it/sec) -training >> step=7358900, episode=1227 reward=0.7869739 (497.96 it/sec) -training >> step=7359000, episode=1227 reward=0.7823393 (470.22 it/sec) -training >> step=7359100, episode=1227 reward=0.7958741 (523.63 it/sec) -training >> step=7359200, episode=1227 reward=0.7763116 (491.63 it/sec) -training >> step=7359300, episode=1227 reward=0.7965196 (475.01 it/sec) -training >> step=7359400, episode=1227 reward=0.7824607 (463.28 it/sec) -training >> step=7359500, episode=1227 reward=0.7859399 (472.63 it/sec) -training >> step=7359600, episode=1227 reward=0.7619385 (489.44 it/sec) -training >> step=7359700, episode=1227 reward=0.7859071 (468.98 it/sec) -training >> step=7359800, episode=1227 reward=0.7842599 (483.00 it/sec) -training >> step=7359900, episode=1227 reward=0.7892768 (521.80 it/sec) -training >> step=7360000, episode=1227 reward=0.7893277 (424.79 it/sec) -training >> step=7360100, episode=1227 reward=0.7850093 (491.72 it/sec) -training >> step=7360200, episode=1227 reward=0.7585379 (496.05 it/sec) -training >> step=7360300, episode=1227 reward=0.8078218 (496.58 it/sec) -training >> step=7360400, episode=1227 reward=0.7998415 (488.85 it/sec) -training >> step=7360500, episode=1227 reward=0.771965 (472.77 it/sec) -training >> step=7360600, episode=1227 reward=0.7883124 (524.00 it/sec) -training >> step=7360700, episode=1227 reward=0.7961817 (343.46 it/sec) -training >> step=7360800, episode=1227 reward=0.7870426 (486.57 it/sec) -training >> step=7360900, episode=1227 reward=0.8013864 (483.04 it/sec) -training >> step=7361000, episode=1227 reward=0.7980152 (521.73 it/sec) -training >> step=7361100, episode=1227 reward=0.8003146 (455.10 it/sec) -training >> step=7361200, episode=1227 reward=0.7886513 (507.75 it/sec) -training >> step=7361300, episode=1228 reward=0.7927899 (91.55 it/sec) -training >> step=7361400, episode=1228 reward=0.7783322 (458.11 it/sec) -training >> step=7361500, episode=1228 reward=0.7747859 (490.15 it/sec) -training >> step=7361600, episode=1228 reward=0.7893789 (486.74 it/sec) -training >> step=7361700, episode=1228 reward=0.7886208 (501.78 it/sec) -training >> step=7361800, episode=1228 reward=0.7822241 (493.51 it/sec) -training >> step=7361900, episode=1228 reward=0.80551 (490.88 it/sec) -training >> step=7362000, episode=1228 reward=0.7815156 (444.49 it/sec) -training >> step=7362100, episode=1228 reward=0.7856528 (483.44 it/sec) -training >> step=7362200, episode=1228 reward=0.7872066 (480.14 it/sec) -training >> step=7362300, episode=1228 reward=0.7786638 (491.37 it/sec) -training >> step=7362400, episode=1228 reward=0.7733622 (458.12 it/sec) -training >> step=7362500, episode=1228 reward=0.7788802 (474.72 it/sec) -training >> step=7362600, episode=1228 reward=0.7895684 (504.46 it/sec) -training >> step=7362700, episode=1228 reward=0.7934892 (498.26 it/sec) -training >> step=7362800, episode=1228 reward=0.7878609 (454.52 it/sec) -training >> step=7362900, episode=1228 reward=0.7983489 (483.53 it/sec) -training >> step=7363000, episode=1228 reward=0.780209 (450.88 it/sec) -training >> step=7363100, episode=1228 reward=0.7657738 (462.68 it/sec) -training >> step=7363200, episode=1228 reward=0.8026853 (427.27 it/sec) -training >> step=7363300, episode=1228 reward=0.7816093 (489.05 it/sec) -training >> step=7363400, episode=1228 reward=0.7816343 (507.52 it/sec) -training >> step=7363500, episode=1228 reward=0.77005 (496.38 it/sec) -training >> step=7363600, episode=1228 reward=0.783772 (472.91 it/sec) -training >> step=7363700, episode=1228 reward=0.8037006 (467.14 it/sec) -training >> step=7363800, episode=1228 reward=0.7717013 (483.20 it/sec) -training >> step=7363900, episode=1228 reward=0.7896044 (453.72 it/sec) -training >> step=7364000, episode=1228 reward=0.8139678 (500.80 it/sec) -training >> step=7364100, episode=1228 reward=0.7861978 (464.07 it/sec) -training >> step=7364200, episode=1228 reward=0.7715876 (489.68 it/sec) -training >> step=7364300, episode=1228 reward=0.7875784 (416.85 it/sec) -training >> step=7364400, episode=1228 reward=0.8031482 (475.32 it/sec) -training >> step=7364500, episode=1228 reward=0.8008271 (482.98 it/sec) -training >> step=7364600, episode=1228 reward=0.7832759 (431.65 it/sec) -training >> step=7364700, episode=1228 reward=0.7978636 (474.35 it/sec) -training >> step=7364800, episode=1228 reward=0.7790626 (471.63 it/sec) -training >> step=7364900, episode=1228 reward=0.7755361 (467.26 it/sec) -training >> step=7365000, episode=1228 reward=0.8040862 (482.54 it/sec) -training >> step=7365100, episode=1228 reward=0.7886698 (469.17 it/sec) -training >> step=7365200, episode=1228 reward=0.7944364 (486.37 it/sec) -training >> step=7365300, episode=1228 reward=0.7831292 (468.18 it/sec) -training >> step=7365400, episode=1228 reward=0.7918786 (501.81 it/sec) -training >> step=7365500, episode=1228 reward=0.7907246 (446.31 it/sec) -training >> step=7365600, episode=1228 reward=0.7722383 (492.97 it/sec) -training >> step=7365700, episode=1228 reward=0.783253 (453.93 it/sec) -training >> step=7365800, episode=1228 reward=0.7832136 (462.50 it/sec) -training >> step=7365900, episode=1228 reward=0.791387 (485.02 it/sec) -training >> step=7366000, episode=1228 reward=0.7940057 (425.08 it/sec) -training >> step=7366100, episode=1228 reward=0.7833411 (477.33 it/sec) -training >> step=7366200, episode=1228 reward=0.7792057 (493.56 it/sec) -training >> step=7366300, episode=1228 reward=0.7852048 (496.73 it/sec) -training >> step=7366400, episode=1228 reward=0.7868796 (490.19 it/sec) -training >> step=7366500, episode=1228 reward=0.7834854 (483.31 it/sec) -training >> step=7366600, episode=1228 reward=0.777396 (516.93 it/sec) -training >> step=7366700, episode=1228 reward=0.7803372 (469.24 it/sec) -training >> step=7366800, episode=1228 reward=0.7999017 (506.17 it/sec) -training >> step=7366900, episode=1228 reward=0.7935374 (374.68 it/sec) -training >> step=7367000, episode=1228 reward=0.7935537 (520.08 it/sec) -training >> step=7367100, episode=1228 reward=0.775457 (455.45 it/sec) -training >> step=7367200, episode=1228 reward=0.7876543 (467.90 it/sec) -training >> step=7367300, episode=1229 reward=0.7749881 (98.09 it/sec) -training >> step=7367400, episode=1229 reward=0.7710983 (481.32 it/sec) -training >> step=7367500, episode=1229 reward=0.7780101 (489.39 it/sec) -training >> step=7367600, episode=1229 reward=0.7993442 (482.64 it/sec) -training >> step=7367700, episode=1229 reward=0.7808692 (521.34 it/sec) -training >> step=7367800, episode=1229 reward=0.7904953 (429.71 it/sec) -training >> step=7367900, episode=1229 reward=0.7927256 (449.43 it/sec) -training >> step=7368000, episode=1229 reward=0.7927721 (492.36 it/sec) -training >> step=7368100, episode=1229 reward=0.8036995 (494.72 it/sec) -training >> step=7368200, episode=1229 reward=0.7883874 (511.53 it/sec) -training >> step=7368300, episode=1229 reward=0.7986276 (484.95 it/sec) -training >> step=7368400, episode=1229 reward=0.800608 (492.44 it/sec) -training >> step=7368500, episode=1229 reward=0.807377 (483.27 it/sec) -training >> step=7368600, episode=1229 reward=0.7706186 (484.46 it/sec) -training >> step=7368700, episode=1229 reward=0.7889588 (498.42 it/sec) -training >> step=7368800, episode=1229 reward=0.7818822 (474.64 it/sec) -training >> step=7368900, episode=1229 reward=0.7674986 (454.26 it/sec) -training >> step=7369000, episode=1229 reward=0.8036701 (479.32 it/sec) -training >> step=7369100, episode=1229 reward=0.8032188 (486.00 it/sec) -training >> step=7369200, episode=1229 reward=0.7925591 (464.34 it/sec) -training >> step=7369300, episode=1229 reward=0.783249 (443.35 it/sec) -training >> step=7369400, episode=1229 reward=0.8113596 (486.06 it/sec) -training >> step=7369500, episode=1229 reward=0.8082997 (487.14 it/sec) -training >> step=7369600, episode=1229 reward=0.8126697 (458.83 it/sec) -training >> step=7369700, episode=1229 reward=0.8029349 (530.04 it/sec) -training >> step=7369800, episode=1229 reward=0.8069781 (495.62 it/sec) -training >> step=7369900, episode=1229 reward=0.7809533 (502.93 it/sec) -training >> step=7370000, episode=1229 reward=0.8052255 (417.26 it/sec) -training >> step=7370100, episode=1229 reward=0.7996638 (456.59 it/sec) -training >> step=7370200, episode=1229 reward=0.7756348 (485.97 it/sec) -training >> step=7370300, episode=1229 reward=0.7919375 (446.42 it/sec) -training >> step=7370400, episode=1229 reward=0.7908443 (470.26 it/sec) -training >> step=7370500, episode=1229 reward=0.7883844 (508.33 it/sec) -training >> step=7370600, episode=1229 reward=0.7948962 (447.24 it/sec) -training >> step=7370700, episode=1229 reward=0.7861025 (498.34 it/sec) -training >> step=7370800, episode=1229 reward=0.8021522 (523.78 it/sec) -training >> step=7370900, episode=1229 reward=0.7864061 (479.84 it/sec) -training >> step=7371000, episode=1229 reward=0.7900941 (486.91 it/sec) -training >> step=7371100, episode=1229 reward=0.7753627 (465.39 it/sec) -training >> step=7371200, episode=1229 reward=0.7785119 (523.53 it/sec) -training >> step=7371300, episode=1229 reward=0.7770874 (483.51 it/sec) -training >> step=7371400, episode=1229 reward=0.795478 (490.86 it/sec) -training >> step=7371500, episode=1229 reward=0.777985 (490.67 it/sec) -training >> step=7371600, episode=1229 reward=0.7940376 (493.12 it/sec) -training >> step=7371700, episode=1229 reward=0.777947 (496.43 it/sec) -training >> step=7371800, episode=1229 reward=0.7957586 (509.69 it/sec) -training >> step=7371900, episode=1229 reward=0.7538118 (511.89 it/sec) -training >> step=7372000, episode=1229 reward=0.7998024 (497.04 it/sec) -training >> step=7372100, episode=1229 reward=0.7754 (478.84 it/sec) -training >> step=7372200, episode=1229 reward=0.7965391 (508.99 it/sec) -training >> step=7372300, episode=1229 reward=0.7870337 (468.75 it/sec) -training >> step=7372400, episode=1229 reward=0.7548989 (468.92 it/sec) -training >> step=7372500, episode=1229 reward=0.7678748 (484.24 it/sec) -training >> step=7372600, episode=1229 reward=0.7782224 (503.27 it/sec) -training >> step=7372700, episode=1229 reward=0.7763681 (452.05 it/sec) -training >> step=7372800, episode=1229 reward=0.8134486 (494.95 it/sec) -training >> step=7372900, episode=1229 reward=0.7726006 (490.46 it/sec) -training >> step=7373000, episode=1229 reward=0.7968544 (463.58 it/sec) -training >> step=7373100, episode=1229 reward=0.7976902 (366.36 it/sec) -training >> step=7373200, episode=1229 reward=0.7780086 (498.08 it/sec) -training >> step=7373300, episode=1230 reward=0.7825238 (96.97 it/sec) -training >> step=7373400, episode=1230 reward=0.7638182 (495.71 it/sec) -training >> step=7373500, episode=1230 reward=0.7961664 (487.28 it/sec) -training >> step=7373600, episode=1230 reward=0.7866551 (483.39 it/sec) -training >> step=7373700, episode=1230 reward=0.7892933 (520.78 it/sec) -training >> step=7373800, episode=1230 reward=0.7952257 (448.90 it/sec) -training >> step=7373900, episode=1230 reward=0.7889126 (464.31 it/sec) -training >> step=7374000, episode=1230 reward=0.7942249 (479.99 it/sec) -training >> step=7374100, episode=1230 reward=0.7920679 (491.20 it/sec) -training >> step=7374200, episode=1230 reward=0.7881047 (501.84 it/sec) -training >> step=7374300, episode=1230 reward=0.7812487 (511.11 it/sec) -training >> step=7374400, episode=1230 reward=0.7928737 (499.62 it/sec) -training >> step=7374500, episode=1230 reward=0.7752182 (460.27 it/sec) -training >> step=7374600, episode=1230 reward=0.7857428 (481.16 it/sec) -training >> step=7374700, episode=1230 reward=0.7940795 (438.78 it/sec) -training >> step=7374800, episode=1230 reward=0.8020837 (452.51 it/sec) -training >> step=7374900, episode=1230 reward=0.7896787 (461.74 it/sec) -training >> step=7375000, episode=1230 reward=0.7850511 (522.94 it/sec) -training >> step=7375100, episode=1230 reward=0.7925959 (462.37 it/sec) -training >> step=7375200, episode=1230 reward=0.7992287 (509.94 it/sec) -training >> step=7375300, episode=1230 reward=0.7942287 (455.60 it/sec) -training >> step=7375400, episode=1230 reward=0.8015659 (508.09 it/sec) -training >> step=7375500, episode=1230 reward=0.7901124 (475.33 it/sec) -training >> step=7375600, episode=1230 reward=0.7724171 (493.47 it/sec) -training >> step=7375700, episode=1230 reward=0.8014247 (460.36 it/sec) -training >> step=7375800, episode=1230 reward=0.8067673 (466.74 it/sec) -training >> step=7375900, episode=1230 reward=0.8052996 (460.03 it/sec) -training >> step=7376000, episode=1230 reward=0.7659611 (480.53 it/sec) -training >> step=7376100, episode=1230 reward=0.7943562 (522.51 it/sec) -training >> step=7376200, episode=1230 reward=0.8027373 (501.54 it/sec) -training >> step=7376300, episode=1230 reward=0.7950578 (447.77 it/sec) -training >> step=7376400, episode=1230 reward=0.7717317 (488.31 it/sec) -training >> step=7376500, episode=1230 reward=0.7995073 (492.98 it/sec) -training >> step=7376600, episode=1230 reward=0.7751955 (487.03 it/sec) -training >> step=7376700, episode=1230 reward=0.8120538 (456.24 it/sec) -training >> step=7376800, episode=1230 reward=0.7888033 (498.13 it/sec) -training >> step=7376900, episode=1230 reward=0.7912037 (474.34 it/sec) -training >> step=7377000, episode=1230 reward=0.8135714 (501.87 it/sec) -training >> step=7377100, episode=1230 reward=0.7894837 (508.46 it/sec) -training >> step=7377200, episode=1230 reward=0.7927387 (509.02 it/sec) -training >> step=7377300, episode=1230 reward=0.7864783 (443.93 it/sec) -training >> step=7377400, episode=1230 reward=0.7834306 (483.04 it/sec) -training >> step=7377500, episode=1230 reward=0.7760527 (489.89 it/sec) -training >> step=7377600, episode=1230 reward=0.7801981 (468.18 it/sec) -training >> step=7377700, episode=1230 reward=0.8006216 (437.81 it/sec) -training >> step=7377800, episode=1230 reward=0.7670316 (500.82 it/sec) -training >> step=7377900, episode=1230 reward=0.7819974 (471.68 it/sec) -training >> step=7378000, episode=1230 reward=0.7827646 (461.75 it/sec) -training >> step=7378100, episode=1230 reward=0.7862892 (498.54 it/sec) -training >> step=7378200, episode=1230 reward=0.7639961 (499.53 it/sec) -training >> step=7378300, episode=1230 reward=0.7715803 (475.07 it/sec) -training >> step=7378400, episode=1230 reward=0.7710148 (478.79 it/sec) -training >> step=7378500, episode=1230 reward=0.774863 (445.36 it/sec) -training >> step=7378600, episode=1230 reward=0.7674749 (517.36 it/sec) -training >> step=7378700, episode=1230 reward=0.8004647 (496.85 it/sec) -training >> step=7378800, episode=1230 reward=0.7943801 (490.97 it/sec) -training >> step=7378900, episode=1230 reward=0.7737105 (480.69 it/sec) -training >> step=7379000, episode=1230 reward=0.7884471 (494.45 it/sec) -training >> step=7379100, episode=1230 reward=0.7721212 (439.87 it/sec) -training >> step=7379200, episode=1230 reward=0.7862374 (477.16 it/sec) -training >> step=7379300, episode=1231 reward=0.8119881 (73.19 it/sec) -training >> step=7379400, episode=1231 reward=0.7914585 (473.00 it/sec) -training >> step=7379500, episode=1231 reward=0.7835099 (489.09 it/sec) -training >> step=7379600, episode=1231 reward=0.7693155 (458.83 it/sec) -training >> step=7379700, episode=1231 reward=0.7855732 (444.02 it/sec) -training >> step=7379800, episode=1231 reward=0.793623 (491.24 it/sec) -training >> step=7379900, episode=1231 reward=0.8184187 (518.21 it/sec) -training >> step=7380000, episode=1231 reward=0.7977259 (475.82 it/sec) -training >> step=7380100, episode=1231 reward=0.7928735 (474.64 it/sec) -training >> step=7380200, episode=1231 reward=0.7857659 (457.82 it/sec) -training >> step=7380300, episode=1231 reward=0.8099885 (445.32 it/sec) -training >> step=7380400, episode=1231 reward=0.7767205 (506.95 it/sec) -training >> step=7380500, episode=1231 reward=0.771369 (467.33 it/sec) -training >> step=7380600, episode=1231 reward=0.8043248 (511.38 it/sec) -training >> step=7380700, episode=1231 reward=0.7858196 (503.78 it/sec) -training >> step=7380800, episode=1231 reward=0.803799 (470.24 it/sec) -training >> step=7380900, episode=1231 reward=0.7940466 (491.17 it/sec) -training >> step=7381000, episode=1231 reward=0.7826778 (502.71 it/sec) -training >> step=7381100, episode=1231 reward=0.7916627 (458.95 it/sec) -training >> step=7381200, episode=1231 reward=0.7763699 (489.72 it/sec) -training >> step=7381300, episode=1231 reward=0.7906704 (518.52 it/sec) -training >> step=7381400, episode=1231 reward=0.8080545 (476.27 it/sec) -training >> step=7381500, episode=1231 reward=0.8143162 (465.86 it/sec) -training >> step=7381600, episode=1231 reward=0.7884156 (500.04 it/sec) -training >> step=7381700, episode=1231 reward=0.7952951 (525.90 it/sec) -training >> step=7381800, episode=1231 reward=0.7872375 (451.67 it/sec) -training >> step=7381900, episode=1231 reward=0.787302 (459.83 it/sec) -training >> step=7382000, episode=1231 reward=0.7980346 (506.22 it/sec) -training >> step=7382100, episode=1231 reward=0.8025299 (452.03 it/sec) -training >> step=7382200, episode=1231 reward=0.8080145 (503.13 it/sec) -training >> step=7382300, episode=1231 reward=0.7813231 (459.58 it/sec) -training >> step=7382400, episode=1231 reward=0.8166846 (512.24 it/sec) -training >> step=7382500, episode=1231 reward=0.781853 (494.05 it/sec) -training >> step=7382600, episode=1231 reward=0.7951317 (430.33 it/sec) -training >> step=7382700, episode=1231 reward=0.7613922 (527.62 it/sec) -training >> step=7382800, episode=1231 reward=0.7915458 (498.57 it/sec) -training >> step=7382900, episode=1231 reward=0.798778 (486.93 it/sec) -training >> step=7383000, episode=1231 reward=0.7862816 (504.20 it/sec) -training >> step=7383100, episode=1231 reward=0.7964414 (518.05 it/sec) -training >> step=7383200, episode=1231 reward=0.7987652 (467.01 it/sec) -training >> step=7383300, episode=1231 reward=0.8006531 (495.23 it/sec) -training >> step=7383400, episode=1231 reward=0.780215 (499.83 it/sec) -training >> step=7383500, episode=1231 reward=0.7868462 (460.65 it/sec) -training >> step=7383600, episode=1231 reward=0.7964883 (458.15 it/sec) -training >> step=7383700, episode=1231 reward=0.7975879 (517.79 it/sec) -training >> step=7383800, episode=1231 reward=0.7835515 (507.72 it/sec) -training >> step=7383900, episode=1231 reward=0.7799126 (450.25 it/sec) -training >> step=7384000, episode=1231 reward=0.7912896 (489.82 it/sec) -training >> step=7384100, episode=1231 reward=0.7851838 (508.94 it/sec) -training >> step=7384200, episode=1231 reward=0.793378 (499.41 it/sec) -training >> step=7384300, episode=1231 reward=0.793367 (449.26 it/sec) -training >> step=7384400, episode=1231 reward=0.7679422 (501.00 it/sec) -training >> step=7384500, episode=1231 reward=0.7670031 (503.29 it/sec) -training >> step=7384600, episode=1231 reward=0.8010463 (505.32 it/sec) -training >> step=7384700, episode=1231 reward=0.7517993 (489.54 it/sec) -training >> step=7384800, episode=1231 reward=0.7723256 (484.08 it/sec) -training >> step=7384900, episode=1231 reward=0.7582409 (489.73 it/sec) -training >> step=7385000, episode=1231 reward=0.8038572 (474.38 it/sec) -training >> step=7385100, episode=1231 reward=0.782586 (487.42 it/sec) -training >> step=7385200, episode=1231 reward=0.7832468 (510.86 it/sec) -training >> step=7385300, episode=1232 reward=0.7924007 (90.85 it/sec) -training >> step=7385400, episode=1232 reward=0.7900387 (435.05 it/sec) -training >> step=7385500, episode=1232 reward=0.7920969 (509.36 it/sec) -training >> step=7385600, episode=1232 reward=0.8040141 (491.95 it/sec) -training >> step=7385700, episode=1232 reward=0.7693972 (501.63 it/sec) -training >> step=7385800, episode=1232 reward=0.7770894 (498.04 it/sec) -training >> step=7385900, episode=1232 reward=0.7648507 (519.53 it/sec) -training >> step=7386000, episode=1232 reward=0.7944813 (459.44 it/sec) -training >> step=7386100, episode=1232 reward=0.7887977 (519.76 it/sec) -training >> step=7386200, episode=1232 reward=0.7725093 (457.09 it/sec) -training >> step=7386300, episode=1232 reward=0.7917495 (444.59 it/sec) -training >> step=7386400, episode=1232 reward=0.8038883 (475.08 it/sec) -training >> step=7386500, episode=1232 reward=0.7746691 (490.50 it/sec) -training >> step=7386600, episode=1232 reward=0.788704 (512.72 it/sec) -training >> step=7386700, episode=1232 reward=0.7925434 (489.76 it/sec) -training >> step=7386800, episode=1232 reward=0.7812996 (506.53 it/sec) -training >> step=7386900, episode=1232 reward=0.7736385 (503.37 it/sec) -training >> step=7387000, episode=1232 reward=0.7677937 (483.55 it/sec) -training >> step=7387100, episode=1232 reward=0.8061377 (473.59 it/sec) -training >> step=7387200, episode=1232 reward=0.7958136 (486.00 it/sec) -training >> step=7387300, episode=1232 reward=0.7591593 (521.60 it/sec) -training >> step=7387400, episode=1232 reward=0.7750309 (504.74 it/sec) -training >> step=7387500, episode=1232 reward=0.7749777 (454.18 it/sec) -training >> step=7387600, episode=1232 reward=0.7999814 (527.83 it/sec) -training >> step=7387700, episode=1232 reward=0.8029824 (506.85 it/sec) -training >> step=7387800, episode=1232 reward=0.7868208 (501.00 it/sec) -training >> step=7387900, episode=1232 reward=0.7812978 (460.75 it/sec) -training >> step=7388000, episode=1232 reward=0.7997072 (527.87 it/sec) -training >> step=7388100, episode=1232 reward=0.8002076 (442.32 it/sec) -training >> step=7388200, episode=1232 reward=0.7768602 (488.04 it/sec) -training >> step=7388300, episode=1232 reward=0.7886739 (504.16 it/sec) -training >> step=7388400, episode=1232 reward=0.7811129 (488.86 it/sec) -training >> step=7388500, episode=1232 reward=0.7876176 (473.21 it/sec) -training >> step=7388600, episode=1232 reward=0.790806 (489.48 it/sec) -training >> step=7388700, episode=1232 reward=0.7845209 (493.55 it/sec) -training >> step=7388800, episode=1232 reward=0.807623 (461.84 it/sec) -training >> step=7388900, episode=1232 reward=0.8079066 (495.67 it/sec) -training >> step=7389000, episode=1232 reward=0.7710837 (478.85 it/sec) -training >> step=7389100, episode=1232 reward=0.8053403 (502.02 it/sec) -training >> step=7389200, episode=1232 reward=0.7759435 (498.40 it/sec) -training >> step=7389300, episode=1232 reward=0.7856438 (467.17 it/sec) -training >> step=7389400, episode=1232 reward=0.7875164 (495.20 it/sec) -training >> step=7389500, episode=1232 reward=0.7806634 (462.82 it/sec) -training >> step=7389600, episode=1232 reward=0.768032 (504.93 it/sec) -training >> step=7389700, episode=1232 reward=0.7919932 (495.85 it/sec) -training >> step=7389800, episode=1232 reward=0.7869542 (508.98 it/sec) -training >> step=7389900, episode=1232 reward=0.7648023 (465.92 it/sec) -training >> step=7390000, episode=1232 reward=0.8020988 (448.50 it/sec) -training >> step=7390100, episode=1232 reward=0.7794155 (495.89 it/sec) -training >> step=7390200, episode=1232 reward=0.8013955 (494.76 it/sec) -training >> step=7390300, episode=1232 reward=0.7741365 (483.49 it/sec) -training >> step=7390400, episode=1232 reward=0.7737831 (484.14 it/sec) -training >> step=7390500, episode=1232 reward=0.779164 (503.93 it/sec) -training >> step=7390600, episode=1232 reward=0.7584279 (515.04 it/sec) -training >> step=7390700, episode=1232 reward=0.7649239 (496.72 it/sec) -training >> step=7390800, episode=1232 reward=0.7903899 (498.16 it/sec) -training >> step=7390900, episode=1232 reward=0.7840575 (525.73 it/sec) -training >> step=7391000, episode=1232 reward=0.7925643 (467.89 it/sec) -training >> step=7391100, episode=1232 reward=0.8026051 (496.14 it/sec) -training >> step=7391200, episode=1232 reward=0.7978733 (451.89 it/sec) -training >> step=7391300, episode=1233 reward=0.7944755 (84.97 it/sec) -training >> step=7391400, episode=1233 reward=0.7333348 (470.00 it/sec) -training >> step=7391500, episode=1233 reward=0.8008216 (465.82 it/sec) -training >> step=7391600, episode=1233 reward=0.8041982 (517.41 it/sec) -training >> step=7391700, episode=1233 reward=0.7751791 (452.54 it/sec) -training >> step=7391800, episode=1233 reward=0.7729281 (502.60 it/sec) -training >> step=7391900, episode=1233 reward=0.7572926 (518.06 it/sec) -training >> step=7392000, episode=1233 reward=0.7879608 (544.12 it/sec) -training >> step=7392100, episode=1233 reward=0.8115719 (489.79 it/sec) -training >> step=7392200, episode=1233 reward=0.7869608 (494.54 it/sec) -training >> step=7392300, episode=1233 reward=0.7944464 (521.42 it/sec) -training >> step=7392400, episode=1233 reward=0.7822865 (460.42 it/sec) -training >> step=7392500, episode=1233 reward=0.7879546 (469.20 it/sec) -training >> step=7392600, episode=1233 reward=0.789907 (498.56 it/sec) -training >> step=7392700, episode=1233 reward=0.7861446 (471.15 it/sec) -training >> step=7392800, episode=1233 reward=0.7838031 (423.93 it/sec) -training >> step=7392900, episode=1233 reward=0.7795226 (480.11 it/sec) -training >> step=7393000, episode=1233 reward=0.7742876 (487.19 it/sec) -training >> step=7393100, episode=1233 reward=0.7952594 (535.32 it/sec) -training >> step=7393200, episode=1233 reward=0.7699305 (470.05 it/sec) -training >> step=7393300, episode=1233 reward=0.786905 (498.59 it/sec) -training >> step=7393400, episode=1233 reward=0.8122494 (539.17 it/sec) -training >> step=7393500, episode=1233 reward=0.7894167 (453.81 it/sec) -training >> step=7393600, episode=1233 reward=0.7743155 (488.24 it/sec) -training >> step=7393700, episode=1233 reward=0.7877538 (511.20 it/sec) -training >> step=7393800, episode=1233 reward=0.791106 (524.23 it/sec) -training >> step=7393900, episode=1233 reward=0.7826201 (468.56 it/sec) -training >> step=7394000, episode=1233 reward=0.7999288 (438.01 it/sec) -training >> step=7394100, episode=1233 reward=0.805103 (518.87 it/sec) -training >> step=7394200, episode=1233 reward=0.7833437 (470.26 it/sec) -training >> step=7394300, episode=1233 reward=0.7989033 (478.65 it/sec) -training >> step=7394400, episode=1233 reward=0.7932882 (490.56 it/sec) -training >> step=7394500, episode=1233 reward=0.7709476 (471.84 it/sec) -training >> step=7394600, episode=1233 reward=0.8147069 (447.46 it/sec) -training >> step=7394700, episode=1233 reward=0.7756692 (460.20 it/sec) -training >> step=7394800, episode=1233 reward=0.7783223 (485.93 it/sec) -training >> step=7394900, episode=1233 reward=0.7765238 (482.29 it/sec) -training >> step=7395000, episode=1233 reward=0.7836004 (488.59 it/sec) -training >> step=7395100, episode=1233 reward=0.7859417 (481.78 it/sec) -training >> step=7395200, episode=1233 reward=0.7701816 (456.71 it/sec) -training >> step=7395300, episode=1233 reward=0.7794833 (489.79 it/sec) -training >> step=7395400, episode=1233 reward=0.779594 (468.13 it/sec) -training >> step=7395500, episode=1233 reward=0.7844586 (470.34 it/sec) -training >> step=7395600, episode=1233 reward=0.7997526 (512.91 it/sec) -training >> step=7395700, episode=1233 reward=0.7787247 (447.70 it/sec) -training >> step=7395800, episode=1233 reward=0.76731 (479.22 it/sec) -training >> step=7395900, episode=1233 reward=0.7670391 (487.24 it/sec) -training >> step=7396000, episode=1233 reward=0.7661964 (452.87 it/sec) -training >> step=7396100, episode=1233 reward=0.7892732 (478.69 it/sec) -training >> step=7396200, episode=1233 reward=0.7975413 (458.70 it/sec) -training >> step=7396300, episode=1233 reward=0.7803671 (533.00 it/sec) -training >> step=7396400, episode=1233 reward=0.7530005 (491.84 it/sec) -training >> step=7396500, episode=1233 reward=0.7881615 (468.02 it/sec) -training >> step=7396600, episode=1233 reward=0.7544613 (477.97 it/sec) -training >> step=7396700, episode=1233 reward=0.7794649 (488.19 it/sec) -training >> step=7396800, episode=1233 reward=0.7854409 (482.30 it/sec) -training >> step=7396900, episode=1233 reward=0.7929078 (507.65 it/sec) -training >> step=7397000, episode=1233 reward=0.7737154 (493.21 it/sec) -training >> step=7397100, episode=1233 reward=0.7664062 (495.34 it/sec) -training >> step=7397200, episode=1233 reward=0.8001398 (508.74 it/sec) -training >> step=7397300, episode=1234 reward=0.8025299 (95.22 it/sec) -training >> step=7397400, episode=1234 reward=0.7883033 (506.99 it/sec) -training >> step=7397500, episode=1234 reward=0.7799656 (476.20 it/sec) -training >> step=7397600, episode=1234 reward=0.7993575 (429.55 it/sec) -training >> step=7397700, episode=1234 reward=0.7726227 (500.06 it/sec) -training >> step=7397800, episode=1234 reward=0.808905 (523.28 it/sec) -training >> step=7397900, episode=1234 reward=0.7778618 (490.63 it/sec) -training >> step=7398000, episode=1234 reward=0.7964495 (494.82 it/sec) -training >> step=7398100, episode=1234 reward=0.790473 (460.45 it/sec) -training >> step=7398200, episode=1234 reward=0.7971105 (483.72 it/sec) -training >> step=7398300, episode=1234 reward=0.7807721 (502.59 it/sec) -training >> step=7398400, episode=1234 reward=0.7690639 (525.87 it/sec) -training >> step=7398500, episode=1234 reward=0.80309 (515.11 it/sec) -training >> step=7398600, episode=1234 reward=0.7910941 (466.58 it/sec) -training >> step=7398700, episode=1234 reward=0.7817082 (453.25 it/sec) -training >> step=7398800, episode=1234 reward=0.7860568 (458.59 it/sec) -training >> step=7398900, episode=1234 reward=0.7877702 (473.33 it/sec) -training >> step=7399000, episode=1234 reward=0.7879573 (452.55 it/sec) -training >> step=7399100, episode=1234 reward=0.7692037 (458.78 it/sec) -training >> step=7399200, episode=1234 reward=0.7764434 (482.73 it/sec) -training >> step=7399300, episode=1234 reward=0.7823481 (493.39 it/sec) -training >> step=7399400, episode=1234 reward=0.779156 (542.09 it/sec) -training >> step=7399500, episode=1234 reward=0.7985333 (447.75 it/sec) -training >> step=7399600, episode=1234 reward=0.8026263 (455.84 it/sec) -training >> step=7399700, episode=1234 reward=0.8017775 (470.67 it/sec) -training >> step=7399800, episode=1234 reward=0.7738017 (521.37 it/sec) -training >> step=7399900, episode=1234 reward=0.7734908 (473.14 it/sec) -training >> step=7400000, episode=1234 reward=0.7891743 (483.28 it/sec) -training >> step=7400100, episode=1234 reward=0.7720474 (532.37 it/sec) -training >> step=7400200, episode=1234 reward=0.801276 (506.01 it/sec) -training >> step=7400300, episode=1234 reward=0.7667471 (463.74 it/sec) -training >> step=7400400, episode=1234 reward=0.7661524 (490.50 it/sec) -training >> step=7400500, episode=1234 reward=0.7855434 (504.35 it/sec) -training >> step=7400600, episode=1234 reward=0.7945354 (453.27 it/sec) -training >> step=7400700, episode=1234 reward=0.7694715 (514.58 it/sec) -training >> step=7400800, episode=1234 reward=0.7883177 (495.65 it/sec) -training >> step=7400900, episode=1234 reward=0.7682037 (507.08 it/sec) -training >> step=7401000, episode=1234 reward=0.7821755 (484.88 it/sec) -training >> step=7401100, episode=1234 reward=0.7857394 (486.82 it/sec) -training >> step=7401200, episode=1234 reward=0.7800167 (460.49 it/sec) -training >> step=7401300, episode=1234 reward=0.7931149 (480.70 it/sec) -training >> step=7401400, episode=1234 reward=0.7642763 (466.05 it/sec) -training >> step=7401500, episode=1234 reward=0.7859185 (511.76 it/sec) -training >> step=7401600, episode=1234 reward=0.7703287 (507.87 it/sec) -training >> step=7401700, episode=1234 reward=0.7802665 (480.74 it/sec) -training >> step=7401800, episode=1234 reward=0.7780914 (498.63 it/sec) -training >> step=7401900, episode=1234 reward=0.7826378 (507.83 it/sec) -training >> step=7402000, episode=1234 reward=0.7717051 (538.51 it/sec) -training >> step=7402100, episode=1234 reward=0.7916417 (461.15 it/sec) -training >> step=7402200, episode=1234 reward=0.7771802 (455.86 it/sec) -training >> step=7402300, episode=1234 reward=0.7685074 (429.07 it/sec) -training >> step=7402400, episode=1234 reward=0.7718046 (489.21 it/sec) -training >> step=7402500, episode=1234 reward=0.740287 (505.40 it/sec) -training >> step=7402600, episode=1234 reward=0.7802829 (492.28 it/sec) -training >> step=7402700, episode=1234 reward=0.7629521 (542.83 it/sec) -training >> step=7402800, episode=1234 reward=0.7692267 (499.33 it/sec) -training >> step=7402900, episode=1234 reward=0.7714201 (451.79 it/sec) -training >> step=7403000, episode=1234 reward=0.7761382 (525.03 it/sec) -training >> step=7403100, episode=1234 reward=0.7886921 (507.02 it/sec) -training >> step=7403200, episode=1234 reward=0.7898688 (510.44 it/sec) -training >> step=7403300, episode=1235 reward=0.7919838 (89.53 it/sec) -training >> step=7403400, episode=1235 reward=0.795193 (477.91 it/sec) -training >> step=7403500, episode=1235 reward=0.7743275 (457.00 it/sec) -training >> step=7403600, episode=1235 reward=0.7990861 (488.83 it/sec) -training >> step=7403700, episode=1235 reward=0.7824575 (481.45 it/sec) -training >> step=7403800, episode=1235 reward=0.8018053 (511.36 it/sec) -training >> step=7403900, episode=1235 reward=0.7774082 (442.16 it/sec) -training >> step=7404000, episode=1235 reward=0.7816792 (525.41 it/sec) -training >> step=7404100, episode=1235 reward=0.8054618 (436.34 it/sec) -training >> step=7404200, episode=1235 reward=0.8053866 (500.56 it/sec) -training >> step=7404300, episode=1235 reward=0.7851645 (483.96 it/sec) -training >> step=7404400, episode=1235 reward=0.7948183 (528.13 it/sec) -training >> step=7404500, episode=1235 reward=0.7825282 (461.13 it/sec) -training >> step=7404600, episode=1235 reward=0.8057709 (476.94 it/sec) -training >> step=7404700, episode=1235 reward=0.7953438 (493.69 it/sec) -training >> step=7404800, episode=1235 reward=0.7751511 (552.00 it/sec) -training >> step=7404900, episode=1235 reward=0.7992151 (473.76 it/sec) -training >> step=7405000, episode=1235 reward=0.8001491 (486.11 it/sec) -training >> step=7405100, episode=1235 reward=0.7949852 (532.33 it/sec) -training >> step=7405200, episode=1235 reward=0.7806073 (457.05 it/sec) -training >> step=7405300, episode=1235 reward=0.7754837 (493.08 it/sec) -training >> step=7405400, episode=1235 reward=0.7958112 (457.99 it/sec) -training >> step=7405500, episode=1235 reward=0.7901322 (473.98 it/sec) -training >> step=7405600, episode=1235 reward=0.8023342 (507.49 it/sec) -training >> step=7405700, episode=1235 reward=0.7768583 (483.96 it/sec) -training >> step=7405800, episode=1235 reward=0.7790592 (495.85 it/sec) -training >> step=7405900, episode=1235 reward=0.7870568 (431.96 it/sec) -training >> step=7406000, episode=1235 reward=0.7782152 (442.08 it/sec) -training >> step=7406100, episode=1235 reward=0.7656133 (492.00 it/sec) -training >> step=7406200, episode=1235 reward=0.7810429 (504.71 it/sec) -training >> step=7406300, episode=1235 reward=0.7809382 (484.92 it/sec) -training >> step=7406400, episode=1235 reward=0.7868081 (463.11 it/sec) -training >> step=7406500, episode=1235 reward=0.7967876 (467.49 it/sec) -training >> step=7406600, episode=1235 reward=0.7743428 (459.58 it/sec) -training >> step=7406700, episode=1235 reward=0.7740251 (437.60 it/sec) -training >> step=7406800, episode=1235 reward=0.7739209 (497.17 it/sec) -training >> step=7406900, episode=1235 reward=0.7901849 (455.57 it/sec) -training >> step=7407000, episode=1235 reward=0.7966684 (466.11 it/sec) -training >> step=7407100, episode=1235 reward=0.7719783 (468.36 it/sec) -training >> step=7407200, episode=1235 reward=0.806187 (476.48 it/sec) -training >> step=7407300, episode=1235 reward=0.7812038 (504.98 it/sec) -training >> step=7407400, episode=1235 reward=0.7904854 (516.65 it/sec) -training >> step=7407500, episode=1235 reward=0.7735487 (498.62 it/sec) -training >> step=7407600, episode=1235 reward=0.7708291 (477.73 it/sec) -training >> step=7407700, episode=1235 reward=0.7724685 (507.47 it/sec) -training >> step=7407800, episode=1235 reward=0.7867143 (499.86 it/sec) -training >> step=7407900, episode=1235 reward=0.7853633 (514.98 it/sec) -training >> step=7408000, episode=1235 reward=0.7410005 (460.61 it/sec) -training >> step=7408100, episode=1235 reward=0.7620867 (491.90 it/sec) -training >> step=7408200, episode=1235 reward=0.7796944 (472.51 it/sec) -training >> step=7408300, episode=1235 reward=0.7624695 (495.48 it/sec) -training >> step=7408400, episode=1235 reward=0.7856162 (454.88 it/sec) -training >> step=7408500, episode=1235 reward=0.7819857 (467.82 it/sec) -training >> step=7408600, episode=1235 reward=0.7883224 (497.32 it/sec) -training >> step=7408700, episode=1235 reward=0.7335469 (523.21 it/sec) -training >> step=7408800, episode=1235 reward=0.7782568 (493.69 it/sec) -training >> step=7408900, episode=1235 reward=0.75001 (474.54 it/sec) -training >> step=7409000, episode=1235 reward=0.7741531 (526.48 it/sec) -training >> step=7409100, episode=1235 reward=0.7686266 (427.10 it/sec) -training >> step=7409200, episode=1235 reward=0.7571736 (514.73 it/sec) -training >> step=7409300, episode=1236 reward=0.7570708 (95.52 it/sec) -training >> step=7409400, episode=1236 reward=0.7519526 (483.95 it/sec) -training >> step=7409500, episode=1236 reward=0.7943343 (494.60 it/sec) -training >> step=7409600, episode=1236 reward=0.7877531 (466.69 it/sec) -training >> step=7409700, episode=1236 reward=0.7818801 (472.27 it/sec) -training >> step=7409800, episode=1236 reward=0.7936344 (501.52 it/sec) -training >> step=7409900, episode=1236 reward=0.7702184 (454.61 it/sec) -training >> step=7410000, episode=1236 reward=0.7857059 (441.89 it/sec) -training >> step=7410100, episode=1236 reward=0.7896907 (475.04 it/sec) -training >> step=7410200, episode=1236 reward=0.7715489 (490.23 it/sec) -training >> step=7410300, episode=1236 reward=0.8155516 (513.71 it/sec) -training >> step=7410400, episode=1236 reward=0.7631957 (460.00 it/sec) -training >> step=7410500, episode=1236 reward=0.7965606 (516.77 it/sec) -training >> step=7410600, episode=1236 reward=0.8019388 (474.71 it/sec) -training >> step=7410700, episode=1236 reward=0.8031805 (519.73 it/sec) -training >> step=7410800, episode=1236 reward=0.8013399 (504.52 it/sec) -training >> step=7410900, episode=1236 reward=0.7983457 (491.40 it/sec) -training >> step=7411000, episode=1236 reward=0.7995148 (478.25 it/sec) -training >> step=7411100, episode=1236 reward=0.7737502 (481.65 it/sec) -training >> step=7411200, episode=1236 reward=0.7683384 (486.41 it/sec) -training >> step=7411300, episode=1236 reward=0.7875105 (492.73 it/sec) -training >> step=7411400, episode=1236 reward=0.7765119 (503.73 it/sec) -training >> step=7411500, episode=1236 reward=0.7767828 (510.01 it/sec) -training >> step=7411600, episode=1236 reward=0.7816825 (514.59 it/sec) -training >> step=7411700, episode=1236 reward=0.776972 (450.67 it/sec) -training >> step=7411800, episode=1236 reward=0.7998459 (547.91 it/sec) -training >> step=7411900, episode=1236 reward=0.7933493 (476.73 it/sec) -training >> step=7412000, episode=1236 reward=0.7585502 (478.46 it/sec) -training >> step=7412100, episode=1236 reward=0.7902241 (499.05 it/sec) -training >> step=7412200, episode=1236 reward=0.7930385 (482.80 it/sec) -training >> step=7412300, episode=1236 reward=0.7705308 (503.04 it/sec) -training >> step=7412400, episode=1236 reward=0.7862788 (487.09 it/sec) -training >> step=7412500, episode=1236 reward=0.7837942 (496.83 it/sec) -training >> step=7412600, episode=1236 reward=0.780771 (505.50 it/sec) -training >> step=7412700, episode=1236 reward=0.8104323 (486.68 it/sec) -training >> step=7412800, episode=1236 reward=0.7799734 (500.03 it/sec) -training >> step=7412900, episode=1236 reward=0.7966037 (530.14 it/sec) -training >> step=7413000, episode=1236 reward=0.7694977 (463.48 it/sec) -training >> step=7413100, episode=1236 reward=0.7580529 (461.05 it/sec) -training >> step=7413200, episode=1236 reward=0.7668309 (502.35 it/sec) -training >> step=7413300, episode=1236 reward=0.7889224 (493.48 it/sec) -training >> step=7413400, episode=1236 reward=0.7866948 (503.68 it/sec) -training >> step=7413500, episode=1236 reward=0.7672685 (458.55 it/sec) -training >> step=7413600, episode=1236 reward=0.8057429 (496.09 it/sec) -training >> step=7413700, episode=1236 reward=0.7797803 (462.40 it/sec) -training >> step=7413800, episode=1236 reward=0.7833165 (472.33 it/sec) -training >> step=7413900, episode=1236 reward=0.7895536 (505.42 it/sec) -training >> step=7414000, episode=1236 reward=0.7797595 (510.25 it/sec) -training >> step=7414100, episode=1236 reward=0.7638738 (502.97 it/sec) -training >> step=7414200, episode=1236 reward=0.7723514 (507.57 it/sec) -training >> step=7414300, episode=1236 reward=0.7802514 (507.74 it/sec) -training >> step=7414400, episode=1236 reward=0.7925169 (512.38 it/sec) -training >> step=7414500, episode=1236 reward=0.766351 (508.37 it/sec) -training >> step=7414600, episode=1236 reward=0.7730328 (507.92 it/sec) -training >> step=7414700, episode=1236 reward=0.7752886 (535.68 it/sec) -training >> step=7414800, episode=1236 reward=0.7512498 (500.65 it/sec) -training >> step=7414900, episode=1236 reward=0.7896369 (477.73 it/sec) -training >> step=7415000, episode=1236 reward=0.7809314 (474.00 it/sec) -training >> step=7415100, episode=1236 reward=0.7708494 (483.08 it/sec) -training >> step=7415200, episode=1236 reward=0.7457416 (482.11 it/sec) -training >> step=7415300, episode=1237 reward=0.7600744 (89.54 it/sec) -training >> step=7415400, episode=1237 reward=0.7829954 (492.32 it/sec) -training >> step=7415500, episode=1237 reward=0.7949405 (458.61 it/sec) -training >> step=7415600, episode=1237 reward=0.7746468 (476.37 it/sec) -training >> step=7415700, episode=1237 reward=0.7858516 (517.91 it/sec) -training >> step=7415800, episode=1237 reward=0.7778165 (501.12 it/sec) -training >> step=7415900, episode=1237 reward=0.7861764 (496.67 it/sec) -training >> step=7416000, episode=1237 reward=0.7767106 (439.39 it/sec) -training >> step=7416100, episode=1237 reward=0.7991719 (489.89 it/sec) -training >> step=7416200, episode=1237 reward=0.8052313 (479.68 it/sec) -training >> step=7416300, episode=1237 reward=0.7920052 (493.82 it/sec) -training >> step=7416400, episode=1237 reward=0.7788984 (536.18 it/sec) -training >> step=7416500, episode=1237 reward=0.7737156 (419.08 it/sec) -training >> step=7416600, episode=1237 reward=0.7948618 (491.37 it/sec) -training >> step=7416700, episode=1237 reward=0.7880448 (462.87 it/sec) -training >> step=7416800, episode=1237 reward=0.7735679 (468.42 it/sec) -training >> step=7416900, episode=1237 reward=0.7938687 (502.66 it/sec) -training >> step=7417000, episode=1237 reward=0.7869757 (478.01 it/sec) -training >> step=7417100, episode=1237 reward=0.7956879 (526.44 it/sec) -training >> step=7417200, episode=1237 reward=0.7706001 (484.15 it/sec) -training >> step=7417300, episode=1237 reward=0.7968494 (498.49 it/sec) -training >> step=7417400, episode=1237 reward=0.8113149 (520.83 it/sec) -training >> step=7417500, episode=1237 reward=0.778477 (518.46 it/sec) -training >> step=7417600, episode=1237 reward=0.7916948 (500.34 it/sec) -training >> step=7417700, episode=1237 reward=0.7979658 (479.21 it/sec) -training >> step=7417800, episode=1237 reward=0.7633669 (480.69 it/sec) -training >> step=7417900, episode=1237 reward=0.7892774 (525.98 it/sec) -training >> step=7418000, episode=1237 reward=0.7909878 (508.23 it/sec) -training >> step=7418100, episode=1237 reward=0.7911261 (466.88 it/sec) -training >> step=7418200, episode=1237 reward=0.7726749 (462.04 it/sec) -training >> step=7418300, episode=1237 reward=0.7959416 (486.42 it/sec) -training >> step=7418400, episode=1237 reward=0.806268 (476.03 it/sec) -training >> step=7418500, episode=1237 reward=0.7953827 (518.46 it/sec) -training >> step=7418600, episode=1237 reward=0.7805235 (483.55 it/sec) -training >> step=7418700, episode=1237 reward=0.7884281 (512.33 it/sec) -training >> step=7418800, episode=1237 reward=0.782305 (478.19 it/sec) -training >> step=7418900, episode=1237 reward=0.7900659 (538.17 it/sec) -training >> step=7419000, episode=1237 reward=0.7972274 (459.74 it/sec) -training >> step=7419100, episode=1237 reward=0.7907933 (492.87 it/sec) -training >> step=7419200, episode=1237 reward=0.7816907 (495.50 it/sec) -training >> step=7419300, episode=1237 reward=0.7771837 (473.48 it/sec) -training >> step=7419400, episode=1237 reward=0.7719223 (498.91 it/sec) -training >> step=7419500, episode=1237 reward=0.7795789 (458.88 it/sec) -training >> step=7419600, episode=1237 reward=0.7798479 (422.69 it/sec) -training >> step=7419700, episode=1237 reward=0.7674719 (423.99 it/sec) -training >> step=7419800, episode=1237 reward=0.7412402 (444.91 it/sec) -training >> step=7419900, episode=1237 reward=0.7686496 (454.52 it/sec) -training >> step=7420000, episode=1237 reward=0.7936851 (501.09 it/sec) -training >> step=7420100, episode=1237 reward=0.7481375 (463.77 it/sec) -training >> step=7420200, episode=1237 reward=0.7657213 (474.44 it/sec) -training >> step=7420300, episode=1237 reward=0.771259 (457.06 it/sec) -training >> step=7420400, episode=1237 reward=0.7904041 (375.75 it/sec) -training >> step=7420500, episode=1237 reward=0.7788483 (444.32 it/sec) -training >> step=7420600, episode=1237 reward=0.7785981 (456.75 it/sec) -training >> step=7420700, episode=1237 reward=0.7782931 (527.78 it/sec) -training >> step=7420800, episode=1237 reward=0.7741555 (469.84 it/sec) -training >> step=7420900, episode=1237 reward=0.7484435 (478.34 it/sec) -training >> step=7421000, episode=1237 reward=0.7991247 (500.93 it/sec) -training >> step=7421100, episode=1237 reward=0.7820398 (453.07 it/sec) -training >> step=7421200, episode=1237 reward=0.7893894 (467.42 it/sec) -training >> step=7421300, episode=1238 reward=0.7701405 (92.62 it/sec) -training >> step=7421400, episode=1238 reward=0.7760208 (466.50 it/sec) -training >> step=7421500, episode=1238 reward=0.7816927 (463.74 it/sec) -training >> step=7421600, episode=1238 reward=0.7746408 (513.24 it/sec) -training >> step=7421700, episode=1238 reward=0.7831099 (494.74 it/sec) -training >> step=7421800, episode=1238 reward=0.7889439 (488.56 it/sec) -training >> step=7421900, episode=1238 reward=0.7891092 (495.86 it/sec) -training >> step=7422000, episode=1238 reward=0.7993616 (514.38 it/sec) -training >> step=7422100, episode=1238 reward=0.7873201 (488.10 it/sec) -training >> step=7422200, episode=1238 reward=0.7569469 (507.61 it/sec) -training >> step=7422300, episode=1238 reward=0.8007718 (447.68 it/sec) -training >> step=7422400, episode=1238 reward=0.7890738 (527.79 it/sec) -training >> step=7422500, episode=1238 reward=0.7919385 (452.46 it/sec) -training >> step=7422600, episode=1238 reward=0.8025918 (437.57 it/sec) -training >> step=7422700, episode=1238 reward=0.7787198 (458.43 it/sec) -training >> step=7422800, episode=1238 reward=0.7906292 (503.93 it/sec) -training >> step=7422900, episode=1238 reward=0.8110895 (492.21 it/sec) -training >> step=7423000, episode=1238 reward=0.7732098 (496.92 it/sec) -training >> step=7423100, episode=1238 reward=0.7847135 (447.97 it/sec) -training >> step=7423200, episode=1238 reward=0.7816668 (484.71 it/sec) -training >> step=7423300, episode=1238 reward=0.7929152 (487.28 it/sec) -training >> step=7423400, episode=1238 reward=0.7897118 (484.14 it/sec) -training >> step=7423500, episode=1238 reward=0.7691079 (529.37 it/sec) -training >> step=7423600, episode=1238 reward=0.7844812 (407.87 it/sec) -training >> step=7423700, episode=1238 reward=0.8107318 (497.70 it/sec) -training >> step=7423800, episode=1238 reward=0.7974656 (485.49 it/sec) -training >> step=7423900, episode=1238 reward=0.7749946 (508.00 it/sec) -training >> step=7424000, episode=1238 reward=0.7683347 (498.00 it/sec) -training >> step=7424100, episode=1238 reward=0.7831679 (505.29 it/sec) -training >> step=7424200, episode=1238 reward=0.7620024 (498.30 it/sec) -training >> step=7424300, episode=1238 reward=0.7875194 (475.15 it/sec) -training >> step=7424400, episode=1238 reward=0.7929546 (459.43 it/sec) -training >> step=7424500, episode=1238 reward=0.7797872 (513.33 it/sec) -training >> step=7424600, episode=1238 reward=0.7650441 (500.41 it/sec) -training >> step=7424700, episode=1238 reward=0.7943757 (489.46 it/sec) -training >> step=7424800, episode=1238 reward=0.7893479 (449.65 it/sec) -training >> step=7424900, episode=1238 reward=0.7716905 (481.68 it/sec) -training >> step=7425000, episode=1238 reward=0.800831 (473.09 it/sec) -training >> step=7425100, episode=1238 reward=0.7814079 (478.69 it/sec) -training >> step=7425200, episode=1238 reward=0.7861371 (510.07 it/sec) -training >> step=7425300, episode=1238 reward=0.7758186 (471.88 it/sec) -training >> step=7425400, episode=1238 reward=0.7984204 (503.42 it/sec) -training >> step=7425500, episode=1238 reward=0.7857155 (457.90 it/sec) -training >> step=7425600, episode=1238 reward=0.8007032 (478.50 it/sec) -training >> step=7425700, episode=1238 reward=0.7746431 (478.38 it/sec) -training >> step=7425800, episode=1238 reward=0.7748213 (486.12 it/sec) -training >> step=7425900, episode=1238 reward=0.7964247 (486.81 it/sec) -training >> step=7426000, episode=1238 reward=0.7752996 (481.54 it/sec) -training >> step=7426100, episode=1238 reward=0.7804989 (478.46 it/sec) -training >> step=7426200, episode=1238 reward=0.7720386 (485.93 it/sec) -training >> step=7426300, episode=1238 reward=0.7862605 (507.80 it/sec) -training >> step=7426400, episode=1238 reward=0.7546615 (503.69 it/sec) -training >> step=7426500, episode=1238 reward=0.7498597 (446.14 it/sec) -training >> step=7426600, episode=1238 reward=0.7741369 (432.78 it/sec) -training >> step=7426700, episode=1238 reward=0.7693715 (516.52 it/sec) -training >> step=7426800, episode=1238 reward=0.7745649 (450.43 it/sec) -training >> step=7426900, episode=1238 reward=0.7665979 (480.30 it/sec) -training >> step=7427000, episode=1238 reward=0.7871616 (424.24 it/sec) -training >> step=7427100, episode=1238 reward=0.7685086 (502.93 it/sec) -training >> step=7427200, episode=1238 reward=0.7616535 (481.17 it/sec) -training >> step=7427300, episode=1239 reward=0.7867967 (97.56 it/sec) -training >> step=7427400, episode=1239 reward=0.7999493 (491.29 it/sec) -training >> step=7427500, episode=1239 reward=0.7760187 (492.72 it/sec) -training >> step=7427600, episode=1239 reward=0.810476 (486.50 it/sec) -training >> step=7427700, episode=1239 reward=0.7788123 (515.46 it/sec) -training >> step=7427800, episode=1239 reward=0.8029133 (521.37 it/sec) -training >> step=7427900, episode=1239 reward=0.7944913 (504.94 it/sec) -training >> step=7428000, episode=1239 reward=0.8141307 (516.33 it/sec) -training >> step=7428100, episode=1239 reward=0.79195 (489.45 it/sec) -training >> step=7428200, episode=1239 reward=0.7908791 (497.07 it/sec) -training >> step=7428300, episode=1239 reward=0.8038808 (491.85 it/sec) -training >> step=7428400, episode=1239 reward=0.8225134 (508.18 it/sec) -training >> step=7428500, episode=1239 reward=0.7869866 (485.48 it/sec) -training >> step=7428600, episode=1239 reward=0.7887281 (479.57 it/sec) -training >> step=7428700, episode=1239 reward=0.7846757 (504.02 it/sec) -training >> step=7428800, episode=1239 reward=0.7930371 (492.37 it/sec) -training >> step=7428900, episode=1239 reward=0.7967838 (504.19 it/sec) -training >> step=7429000, episode=1239 reward=0.7893986 (472.48 it/sec) -training >> step=7429100, episode=1239 reward=0.7817972 (477.33 it/sec) -training >> step=7429200, episode=1239 reward=0.7810763 (454.17 it/sec) -training >> step=7429300, episode=1239 reward=0.7836626 (435.60 it/sec) -training >> step=7429400, episode=1239 reward=0.7811301 (460.40 it/sec) -training >> step=7429500, episode=1239 reward=0.7815819 (545.46 it/sec) -training >> step=7429600, episode=1239 reward=0.8005654 (476.40 it/sec) -training >> step=7429700, episode=1239 reward=0.8170601 (480.34 it/sec) -training >> step=7429800, episode=1239 reward=0.7836398 (520.17 it/sec) -training >> step=7429900, episode=1239 reward=0.7918611 (507.17 it/sec) -training >> step=7430000, episode=1239 reward=0.7808556 (505.92 it/sec) -training >> step=7430100, episode=1239 reward=0.7892545 (460.95 it/sec) -training >> step=7430200, episode=1239 reward=0.8084005 (512.14 it/sec) -training >> step=7430300, episode=1239 reward=0.7898946 (457.73 it/sec) -training >> step=7430400, episode=1239 reward=0.7949834 (488.42 it/sec) -training >> step=7430500, episode=1239 reward=0.7890538 (494.47 it/sec) -training >> step=7430600, episode=1239 reward=0.8047823 (502.37 it/sec) -training >> step=7430700, episode=1239 reward=0.787386 (475.40 it/sec) -training >> step=7430800, episode=1239 reward=0.781103 (491.14 it/sec) -training >> step=7430900, episode=1239 reward=0.7877895 (516.77 it/sec) -training >> step=7431000, episode=1239 reward=0.7870135 (520.51 it/sec) -training >> step=7431100, episode=1239 reward=0.7880359 (465.22 it/sec) -training >> step=7431200, episode=1239 reward=0.7823961 (478.94 it/sec) -training >> step=7431300, episode=1239 reward=0.7863103 (528.80 it/sec) -training >> step=7431400, episode=1239 reward=0.7661905 (464.28 it/sec) -training >> step=7431500, episode=1239 reward=0.7662939 (492.86 it/sec) -training >> step=7431600, episode=1239 reward=0.7879694 (452.19 it/sec) -training >> step=7431700, episode=1239 reward=0.7852757 (505.64 it/sec) -training >> step=7431800, episode=1239 reward=0.7653129 (510.98 it/sec) -training >> step=7431900, episode=1239 reward=0.7559307 (485.68 it/sec) -training >> step=7432000, episode=1239 reward=0.789945 (530.40 it/sec) -training >> step=7432100, episode=1239 reward=0.7551272 (487.82 it/sec) -training >> step=7432200, episode=1239 reward=0.7742849 (472.16 it/sec) -training >> step=7432300, episode=1239 reward=0.7913614 (455.65 it/sec) -training >> step=7432400, episode=1239 reward=0.76791 (481.97 it/sec) -training >> step=7432500, episode=1239 reward=0.7752022 (465.83 it/sec) -training >> step=7432600, episode=1239 reward=0.7559765 (508.50 it/sec) -training >> step=7432700, episode=1239 reward=0.7828341 (457.44 it/sec) -training >> step=7432800, episode=1239 reward=0.7787865 (522.18 it/sec) -training >> step=7432900, episode=1239 reward=0.7698577 (488.30 it/sec) -training >> step=7433000, episode=1239 reward=0.7708803 (470.63 it/sec) -training >> step=7433100, episode=1239 reward=0.772643 (440.18 it/sec) -training >> step=7433200, episode=1239 reward=0.7971578 (477.87 it/sec) -training >> step=7433300, episode=1240 reward=0.7920625 (91.09 it/sec) -training >> step=7433400, episode=1240 reward=0.7934939 (477.29 it/sec) -training >> step=7433500, episode=1240 reward=0.7868351 (502.40 it/sec) -training >> step=7433600, episode=1240 reward=0.7903865 (508.83 it/sec) -training >> step=7433700, episode=1240 reward=0.7905313 (449.95 it/sec) -training >> step=7433800, episode=1240 reward=0.8130057 (465.48 it/sec) -training >> step=7433900, episode=1240 reward=0.7795252 (464.74 it/sec) -training >> step=7434000, episode=1240 reward=0.7890902 (471.79 it/sec) -training >> step=7434100, episode=1240 reward=0.7896191 (498.26 it/sec) -training >> step=7434200, episode=1240 reward=0.799827 (468.28 it/sec) -training >> step=7434300, episode=1240 reward=0.7932507 (497.14 it/sec) -training >> step=7434400, episode=1240 reward=0.780691 (481.28 it/sec) -training >> step=7434500, episode=1240 reward=0.8038194 (479.77 it/sec) -training >> step=7434600, episode=1240 reward=0.7862661 (513.99 it/sec) -training >> step=7434700, episode=1240 reward=0.771134 (503.82 it/sec) -training >> step=7434800, episode=1240 reward=0.7821923 (531.51 it/sec) -training >> step=7434900, episode=1240 reward=0.7885164 (528.73 it/sec) -training >> step=7435000, episode=1240 reward=0.8021067 (525.83 it/sec) -training >> step=7435100, episode=1240 reward=0.7715009 (499.49 it/sec) -training >> step=7435200, episode=1240 reward=0.7992431 (531.90 it/sec) -training >> step=7435300, episode=1240 reward=0.7870106 (406.26 it/sec) -training >> step=7435400, episode=1240 reward=0.8039731 (485.31 it/sec) -training >> step=7435500, episode=1240 reward=0.7682979 (456.40 it/sec) -training >> step=7435600, episode=1240 reward=0.7861342 (481.84 it/sec) -training >> step=7435700, episode=1240 reward=0.7884182 (419.02 it/sec) -training >> step=7435800, episode=1240 reward=0.7758422 (372.10 it/sec) -training >> step=7435900, episode=1240 reward=0.7874095 (445.28 it/sec) -training >> step=7436000, episode=1240 reward=0.7763382 (407.27 it/sec) -training >> step=7436100, episode=1240 reward=0.7963639 (435.87 it/sec) -training >> step=7436200, episode=1240 reward=0.7746486 (488.58 it/sec) -training >> step=7436300, episode=1240 reward=0.7914925 (520.35 it/sec) -training >> step=7436400, episode=1240 reward=0.8036724 (447.61 it/sec) -training >> step=7436500, episode=1240 reward=0.7851986 (469.82 it/sec) -training >> step=7436600, episode=1240 reward=0.7871373 (419.63 it/sec) -training >> step=7436700, episode=1240 reward=0.7934601 (481.31 it/sec) -training >> step=7436800, episode=1240 reward=0.8037786 (458.29 it/sec) -training >> step=7436900, episode=1240 reward=0.7968487 (429.84 it/sec) -training >> step=7437000, episode=1240 reward=0.7978665 (517.91 it/sec) -training >> step=7437100, episode=1240 reward=0.7933627 (462.54 it/sec) -training >> step=7437200, episode=1240 reward=0.7725168 (474.63 it/sec) -training >> step=7437300, episode=1240 reward=0.7742218 (470.10 it/sec) -training >> step=7437400, episode=1240 reward=0.7671003 (520.18 it/sec) -training >> step=7437500, episode=1240 reward=0.779288 (490.17 it/sec) -training >> step=7437600, episode=1240 reward=0.7732041 (505.69 it/sec) -training >> step=7437700, episode=1240 reward=0.777916 (513.53 it/sec) -training >> step=7437800, episode=1240 reward=0.780403 (503.90 it/sec) -training >> step=7437900, episode=1240 reward=0.787361 (478.30 it/sec) -training >> step=7438000, episode=1240 reward=0.7777961 (466.80 it/sec) -training >> step=7438100, episode=1240 reward=0.7812882 (517.60 it/sec) -training >> step=7438200, episode=1240 reward=0.7571307 (503.23 it/sec) -training >> step=7438300, episode=1240 reward=0.7698925 (454.05 it/sec) -training >> step=7438400, episode=1240 reward=0.7991441 (450.72 it/sec) -training >> step=7438500, episode=1240 reward=0.7540395 (401.28 it/sec) -training >> step=7438600, episode=1240 reward=0.7647844 (434.82 it/sec) -training >> step=7438700, episode=1240 reward=0.7897062 (412.35 it/sec) -training >> step=7438800, episode=1240 reward=0.762185 (413.32 it/sec) -training >> step=7438900, episode=1240 reward=0.7777296 (414.18 it/sec) -training >> step=7439000, episode=1240 reward=0.7748128 (428.65 it/sec) -training >> step=7439100, episode=1240 reward=0.7874184 (500.09 it/sec) -training >> step=7439200, episode=1240 reward=0.7829169 (481.82 it/sec) -training >> step=7439300, episode=1241 reward=0.7893363 (109.04 it/sec) -training >> step=7439400, episode=1241 reward=0.7633643 (397.01 it/sec) -training >> step=7439500, episode=1241 reward=0.7770278 (481.23 it/sec) -training >> step=7439600, episode=1241 reward=0.7807872 (478.21 it/sec) -training >> step=7439700, episode=1241 reward=0.7586851 (463.34 it/sec) -training >> step=7439800, episode=1241 reward=0.7867697 (430.72 it/sec) -training >> step=7439900, episode=1241 reward=0.7866594 (446.87 it/sec) -training >> step=7440000, episode=1241 reward=0.8094528 (443.77 it/sec) -training >> step=7440100, episode=1241 reward=0.7783879 (406.89 it/sec) -training >> step=7440200, episode=1241 reward=0.7991413 (443.01 it/sec) -training >> step=7440300, episode=1241 reward=0.7916014 (430.42 it/sec) -training >> step=7440400, episode=1241 reward=0.7952338 (420.15 it/sec) -training >> step=7440500, episode=1241 reward=0.7965465 (418.28 it/sec) -training >> step=7440600, episode=1241 reward=0.792884 (411.94 it/sec) -training >> step=7440700, episode=1241 reward=0.7842634 (416.96 it/sec) -training >> step=7440800, episode=1241 reward=0.7990071 (393.64 it/sec) -training >> step=7440900, episode=1241 reward=0.788362 (385.84 it/sec) -training >> step=7441000, episode=1241 reward=0.7902488 (486.10 it/sec) -training >> step=7441100, episode=1241 reward=0.7857944 (456.78 it/sec) -training >> step=7441200, episode=1241 reward=0.7943234 (463.72 it/sec) -training >> step=7441300, episode=1241 reward=0.780114 (466.29 it/sec) -training >> step=7441400, episode=1241 reward=0.7907577 (475.30 it/sec) -training >> step=7441500, episode=1241 reward=0.7984226 (440.05 it/sec) -training >> step=7441600, episode=1241 reward=0.7759816 (477.73 it/sec) -training >> step=7441700, episode=1241 reward=0.7840382 (448.35 it/sec) -training >> step=7441800, episode=1241 reward=0.7967937 (494.89 it/sec) -training >> step=7441900, episode=1241 reward=0.7834305 (442.95 it/sec) -training >> step=7442000, episode=1241 reward=0.7788396 (442.50 it/sec) -training >> step=7442100, episode=1241 reward=0.7698914 (468.91 it/sec) -training >> step=7442200, episode=1241 reward=0.7891287 (445.17 it/sec) -training >> step=7442300, episode=1241 reward=0.7863945 (443.38 it/sec) -training >> step=7442400, episode=1241 reward=0.7723935 (455.08 it/sec) -training >> step=7442500, episode=1241 reward=0.7968821 (466.75 it/sec) -training >> step=7442600, episode=1241 reward=0.7788531 (422.63 it/sec) -training >> step=7442700, episode=1241 reward=0.8029413 (440.18 it/sec) -training >> step=7442800, episode=1241 reward=0.7870274 (459.63 it/sec) -training >> step=7442900, episode=1241 reward=0.7729385 (511.10 it/sec) -training >> step=7443000, episode=1241 reward=0.7822195 (472.13 it/sec) -training >> step=7443100, episode=1241 reward=0.7672213 (496.72 it/sec) -training >> step=7443200, episode=1241 reward=0.7800211 (540.50 it/sec) -training >> step=7443300, episode=1241 reward=0.7885796 (435.74 it/sec) -training >> step=7443400, episode=1241 reward=0.7647985 (441.01 it/sec) -training >> step=7443500, episode=1241 reward=0.7671672 (410.67 it/sec) -training >> step=7443600, episode=1241 reward=0.7860484 (463.45 it/sec) -training >> step=7443700, episode=1241 reward=0.8002867 (453.27 it/sec) -training >> step=7443800, episode=1241 reward=0.775856 (423.33 it/sec) -training >> step=7443900, episode=1241 reward=0.7741805 (411.20 it/sec) -training >> step=7444000, episode=1241 reward=0.7775246 (455.72 it/sec) -training >> step=7444100, episode=1241 reward=0.7751186 (465.14 it/sec) -training >> step=7444200, episode=1241 reward=0.7728981 (474.19 it/sec) -training >> step=7444300, episode=1241 reward=0.7793779 (508.15 it/sec) -training >> step=7444400, episode=1241 reward=0.7948319 (512.87 it/sec) -training >> step=7444500, episode=1241 reward=0.7660121 (413.75 it/sec) -training >> step=7444600, episode=1241 reward=0.7782133 (526.06 it/sec) -training >> step=7444700, episode=1241 reward=0.7836776 (483.16 it/sec) -training >> step=7444800, episode=1241 reward=0.7694514 (465.02 it/sec) -training >> step=7444900, episode=1241 reward=0.7671225 (472.67 it/sec) -training >> step=7445000, episode=1241 reward=0.7831161 (489.36 it/sec) -training >> step=7445100, episode=1241 reward=0.7750089 (480.40 it/sec) -training >> step=7445200, episode=1241 reward=0.7754439 (510.34 it/sec) -training >> step=7445300, episode=1242 reward=0.778264 (102.41 it/sec) -training >> step=7445400, episode=1242 reward=0.7830442 (450.53 it/sec) -training >> step=7445500, episode=1242 reward=0.7722212 (438.66 it/sec) -training >> step=7445600, episode=1242 reward=0.7967624 (425.75 it/sec) -training >> step=7445700, episode=1242 reward=0.7746599 (480.61 it/sec) -training >> step=7445800, episode=1242 reward=0.7766728 (475.76 it/sec) -training >> step=7445900, episode=1242 reward=0.7976004 (470.94 it/sec) -training >> step=7446000, episode=1242 reward=0.8050053 (459.80 it/sec) -training >> step=7446100, episode=1242 reward=0.7807329 (478.21 it/sec) -training >> step=7446200, episode=1242 reward=0.7786739 (489.64 it/sec) -training >> step=7446300, episode=1242 reward=0.7906175 (439.35 it/sec) -training >> step=7446400, episode=1242 reward=0.7971054 (501.75 it/sec) -training >> step=7446500, episode=1242 reward=0.7937053 (425.40 it/sec) -training >> step=7446600, episode=1242 reward=0.7895193 (435.78 it/sec) -training >> step=7446700, episode=1242 reward=0.8004618 (449.19 it/sec) -training >> step=7446800, episode=1242 reward=0.8026337 (455.10 it/sec) -training >> step=7446900, episode=1242 reward=0.7821895 (450.81 it/sec) -training >> step=7447000, episode=1242 reward=0.7848887 (473.60 it/sec) -training >> step=7447100, episode=1242 reward=0.7734375 (502.21 it/sec) -training >> step=7447200, episode=1242 reward=0.7959583 (521.85 it/sec) -training >> step=7447300, episode=1242 reward=0.7849976 (423.48 it/sec) -training >> step=7447400, episode=1242 reward=0.7669492 (444.94 it/sec) -training >> step=7447500, episode=1242 reward=0.7720739 (481.65 it/sec) -training >> step=7447600, episode=1242 reward=0.783279 (425.77 it/sec) -training >> step=7447700, episode=1242 reward=0.786694 (423.19 it/sec) -training >> step=7447800, episode=1242 reward=0.7813154 (465.02 it/sec) -training >> step=7447900, episode=1242 reward=0.7841303 (458.69 it/sec) -training >> step=7448000, episode=1242 reward=0.7881917 (440.00 it/sec) -training >> step=7448100, episode=1242 reward=0.785971 (470.01 it/sec) -training >> step=7448200, episode=1242 reward=0.8117832 (475.82 it/sec) -training >> step=7448300, episode=1242 reward=0.7855407 (491.86 it/sec) -training >> step=7448400, episode=1242 reward=0.7862321 (382.99 it/sec) -training >> step=7448500, episode=1242 reward=0.780419 (408.12 it/sec) -training >> step=7448600, episode=1242 reward=0.7655238 (385.01 it/sec) -training >> step=7448700, episode=1242 reward=0.7692778 (378.47 it/sec) -training >> step=7448800, episode=1242 reward=0.7966917 (339.74 it/sec) -training >> step=7448900, episode=1242 reward=0.7783392 (366.14 it/sec) -training >> step=7449000, episode=1242 reward=0.7911588 (437.79 it/sec) -training >> step=7449100, episode=1242 reward=0.7963657 (417.67 it/sec) -training >> step=7449200, episode=1242 reward=0.778961 (416.95 it/sec) -training >> step=7449300, episode=1242 reward=0.7904818 (478.44 it/sec) -training >> step=7449400, episode=1242 reward=0.783912 (494.94 it/sec) -training >> step=7449500, episode=1242 reward=0.7829407 (480.41 it/sec) -training >> step=7449600, episode=1242 reward=0.7865581 (454.79 it/sec) -training >> step=7449700, episode=1242 reward=0.7861748 (442.46 it/sec) -training >> step=7449800, episode=1242 reward=0.7707538 (473.81 it/sec) -training >> step=7449900, episode=1242 reward=0.7830623 (433.97 it/sec) -training >> step=7450000, episode=1242 reward=0.7841563 (484.83 it/sec) -training >> step=7450100, episode=1242 reward=0.7735149 (427.69 it/sec) -training >> step=7450200, episode=1242 reward=0.7753366 (476.88 it/sec) -training >> step=7450300, episode=1242 reward=0.7660404 (423.39 it/sec) -training >> step=7450400, episode=1242 reward=0.7857647 (482.46 it/sec) -training >> step=7450500, episode=1242 reward=0.7522777 (461.17 it/sec) -training >> step=7450600, episode=1242 reward=0.7930189 (449.48 it/sec) -training >> step=7450700, episode=1242 reward=0.7723568 (461.69 it/sec) -training >> step=7450800, episode=1242 reward=0.7706386 (499.69 it/sec) -training >> step=7450900, episode=1242 reward=0.7849222 (474.51 it/sec) -training >> step=7451000, episode=1242 reward=0.7969531 (437.57 it/sec) -training >> step=7451100, episode=1242 reward=0.7822208 (428.26 it/sec) -training >> step=7451200, episode=1242 reward=0.7906638 (475.99 it/sec) -training >> step=7451300, episode=1243 reward=0.8011472 (87.36 it/sec) -training >> step=7451400, episode=1243 reward=0.7723901 (464.57 it/sec) -training >> step=7451500, episode=1243 reward=0.792306 (526.36 it/sec) -training >> step=7451600, episode=1243 reward=0.7772759 (472.86 it/sec) -training >> step=7451700, episode=1243 reward=0.8012178 (445.49 it/sec) -training >> step=7451800, episode=1243 reward=0.7920744 (508.25 it/sec) -training >> step=7451900, episode=1243 reward=0.7797827 (459.68 it/sec) -training >> step=7452000, episode=1243 reward=0.7850468 (492.85 it/sec) -training >> step=7452100, episode=1243 reward=0.7994488 (488.99 it/sec) -training >> step=7452200, episode=1243 reward=0.7941098 (525.23 it/sec) -training >> step=7452300, episode=1243 reward=0.7901411 (463.67 it/sec) -training >> step=7452400, episode=1243 reward=0.8016909 (448.09 it/sec) -training >> step=7452500, episode=1243 reward=0.7906804 (515.76 it/sec) -training >> step=7452600, episode=1243 reward=0.8004515 (444.72 it/sec) -training >> step=7452700, episode=1243 reward=0.7909483 (481.99 it/sec) -training >> step=7452800, episode=1243 reward=0.7880291 (455.28 it/sec) -training >> step=7452900, episode=1243 reward=0.7729213 (442.96 it/sec) -training >> step=7453000, episode=1243 reward=0.7878297 (478.60 it/sec) -training >> step=7453100, episode=1243 reward=0.7669777 (454.97 it/sec) -training >> step=7453200, episode=1243 reward=0.7892084 (495.59 it/sec) -training >> step=7453300, episode=1243 reward=0.7867888 (469.09 it/sec) -training >> step=7453400, episode=1243 reward=0.7845931 (458.35 it/sec) -training >> step=7453500, episode=1243 reward=0.7816974 (499.08 it/sec) -training >> step=7453600, episode=1243 reward=0.798319 (472.14 it/sec) -training >> step=7453700, episode=1243 reward=0.7856441 (510.96 it/sec) -training >> step=7453800, episode=1243 reward=0.7943805 (422.28 it/sec) -training >> step=7453900, episode=1243 reward=0.7783795 (437.44 it/sec) -training >> step=7454000, episode=1243 reward=0.7829165 (464.89 it/sec) -training >> step=7454100, episode=1243 reward=0.7773876 (452.37 it/sec) -training >> step=7454200, episode=1243 reward=0.792728 (469.55 it/sec) -training >> step=7454300, episode=1243 reward=0.7982733 (427.75 it/sec) -training >> step=7454400, episode=1243 reward=0.7949888 (438.80 it/sec) -training >> step=7454500, episode=1243 reward=0.7870151 (450.11 it/sec) -training >> step=7454600, episode=1243 reward=0.8161727 (485.87 it/sec) -training >> step=7454700, episode=1243 reward=0.7840536 (449.95 it/sec) -training >> step=7454800, episode=1243 reward=0.7641786 (463.06 it/sec) -training >> step=7454900, episode=1243 reward=0.7828683 (451.59 it/sec) -training >> step=7455000, episode=1243 reward=0.7884502 (503.65 it/sec) -training >> step=7455100, episode=1243 reward=0.800053 (451.80 it/sec) -training >> step=7455200, episode=1243 reward=0.7790371 (449.29 it/sec) -training >> step=7455300, episode=1243 reward=0.7810162 (469.05 it/sec) -training >> step=7455400, episode=1243 reward=0.7668381 (440.81 it/sec) -training >> step=7455500, episode=1243 reward=0.7900439 (453.48 it/sec) -training >> step=7455600, episode=1243 reward=0.7765575 (463.04 it/sec) -training >> step=7455700, episode=1243 reward=0.7700837 (460.91 it/sec) -training >> step=7455800, episode=1243 reward=0.7704484 (460.10 it/sec) -training >> step=7455900, episode=1243 reward=0.7896816 (387.59 it/sec) -training >> step=7456000, episode=1243 reward=0.7766163 (463.77 it/sec) -training >> step=7456100, episode=1243 reward=0.7659255 (507.09 it/sec) -training >> step=7456200, episode=1243 reward=0.7820971 (479.86 it/sec) -training >> step=7456300, episode=1243 reward=0.7773302 (508.91 it/sec) -training >> step=7456400, episode=1243 reward=0.7767226 (483.33 it/sec) -training >> step=7456500, episode=1243 reward=0.7600207 (449.38 it/sec) -training >> step=7456600, episode=1243 reward=0.7921302 (444.02 it/sec) -training >> step=7456700, episode=1243 reward=0.7807474 (462.60 it/sec) -training >> step=7456800, episode=1243 reward=0.7704409 (511.07 it/sec) -training >> step=7456900, episode=1243 reward=0.7827331 (456.74 it/sec) -training >> step=7457000, episode=1243 reward=0.7568555 (481.67 it/sec) -training >> step=7457100, episode=1243 reward=0.7920953 (476.30 it/sec) -training >> step=7457200, episode=1243 reward=0.7970175 (453.13 it/sec) -training >> step=7457300, episode=1244 reward=0.7849607 (97.82 it/sec) -training >> step=7457400, episode=1244 reward=0.8014306 (449.64 it/sec) -training >> step=7457500, episode=1244 reward=0.7797682 (480.15 it/sec) -training >> step=7457600, episode=1244 reward=0.8064781 (496.42 it/sec) -training >> step=7457700, episode=1244 reward=0.7826189 (442.67 it/sec) -training >> step=7457800, episode=1244 reward=0.7751881 (461.83 it/sec) -training >> step=7457900, episode=1244 reward=0.7942104 (449.55 it/sec) -training >> step=7458000, episode=1244 reward=0.7884356 (494.02 it/sec) -training >> step=7458100, episode=1244 reward=0.770256 (433.25 it/sec) -training >> step=7458200, episode=1244 reward=0.7951338 (497.73 it/sec) -training >> step=7458300, episode=1244 reward=0.7998903 (431.49 it/sec) -training >> step=7458400, episode=1244 reward=0.7952703 (423.86 it/sec) -training >> step=7458500, episode=1244 reward=0.7837044 (461.75 it/sec) -training >> step=7458600, episode=1244 reward=0.7721413 (481.11 it/sec) -training >> step=7458700, episode=1244 reward=0.7758086 (452.76 it/sec) -training >> step=7458800, episode=1244 reward=0.8078853 (437.19 it/sec) -training >> step=7458900, episode=1244 reward=0.8047938 (448.76 it/sec) -training >> step=7459000, episode=1244 reward=0.7795081 (429.49 it/sec) -training >> step=7459100, episode=1244 reward=0.7904522 (423.08 it/sec) -training >> step=7459200, episode=1244 reward=0.7913907 (491.93 it/sec) -training >> step=7459300, episode=1244 reward=0.797833 (482.19 it/sec) -training >> step=7459400, episode=1244 reward=0.7765872 (436.06 it/sec) -training >> step=7459500, episode=1244 reward=0.7815046 (477.77 it/sec) -training >> step=7459600, episode=1244 reward=0.7953086 (447.89 it/sec) -training >> step=7459700, episode=1244 reward=0.7884864 (454.35 it/sec) -training >> step=7459800, episode=1244 reward=0.7701609 (431.65 it/sec) -training >> step=7459900, episode=1244 reward=0.7855061 (431.22 it/sec) -training >> step=7460000, episode=1244 reward=0.7963439 (476.50 it/sec) -training >> step=7460100, episode=1244 reward=0.7755581 (461.49 it/sec) -training >> step=7460200, episode=1244 reward=0.7912634 (406.26 it/sec) -training >> step=7460300, episode=1244 reward=0.8025118 (455.77 it/sec) -training >> step=7460400, episode=1244 reward=0.8031985 (474.87 it/sec) -training >> step=7460500, episode=1244 reward=0.7561665 (408.77 it/sec) -training >> step=7460600, episode=1244 reward=0.7908058 (448.36 it/sec) -training >> step=7460700, episode=1244 reward=0.7854318 (447.86 it/sec) -training >> step=7460800, episode=1244 reward=0.7828809 (474.42 it/sec) -training >> step=7460900, episode=1244 reward=0.7928508 (465.14 it/sec) -training >> step=7461000, episode=1244 reward=0.7944896 (487.89 it/sec) -training >> step=7461100, episode=1244 reward=0.7852948 (435.75 it/sec) -training >> step=7461200, episode=1244 reward=0.8067622 (443.63 it/sec) -training >> step=7461300, episode=1244 reward=0.7828227 (456.72 it/sec) -training >> step=7461400, episode=1244 reward=0.7923371 (425.44 it/sec) -training >> step=7461500, episode=1244 reward=0.7767501 (407.31 it/sec) -training >> step=7461600, episode=1244 reward=0.7741873 (392.07 it/sec) -training >> step=7461700, episode=1244 reward=0.7655669 (399.43 it/sec) -training >> step=7461800, episode=1244 reward=0.7669585 (376.67 it/sec) -training >> step=7461900, episode=1244 reward=0.7772589 (398.95 it/sec) -training >> step=7462000, episode=1244 reward=0.7814445 (400.03 it/sec) -training >> step=7462100, episode=1244 reward=0.753899 (455.07 it/sec) -training >> step=7462200, episode=1244 reward=0.761178 (459.94 it/sec) -training >> step=7462300, episode=1244 reward=0.7778637 (460.57 it/sec) -training >> step=7462400, episode=1244 reward=0.7484829 (477.83 it/sec) -training >> step=7462500, episode=1244 reward=0.7675207 (437.03 it/sec) -training >> step=7462600, episode=1244 reward=0.7674047 (469.74 it/sec) -training >> step=7462700, episode=1244 reward=0.7680365 (426.19 it/sec) -training >> step=7462800, episode=1244 reward=0.7754699 (440.99 it/sec) -training >> step=7462900, episode=1244 reward=0.7557822 (411.21 it/sec) -training >> step=7463000, episode=1244 reward=0.7650231 (446.41 it/sec) -training >> step=7463100, episode=1244 reward=0.7911125 (449.94 it/sec) -training >> step=7463200, episode=1244 reward=0.7760215 (424.70 it/sec) -training >> step=7463300, episode=1245 reward=0.7802426 (93.88 it/sec) -training >> step=7463400, episode=1245 reward=0.7724977 (481.68 it/sec) -training >> step=7463500, episode=1245 reward=0.7801086 (473.29 it/sec) -training >> step=7463600, episode=1245 reward=0.7713022 (514.48 it/sec) -training >> step=7463700, episode=1245 reward=0.785673 (501.19 it/sec) -training >> step=7463800, episode=1245 reward=0.7974659 (481.56 it/sec) -training >> step=7463900, episode=1245 reward=0.7921168 (510.32 it/sec) -training >> step=7464000, episode=1245 reward=0.8025631 (420.94 it/sec) -training >> step=7464100, episode=1245 reward=0.7810463 (435.32 it/sec) -training >> step=7464200, episode=1245 reward=0.7724045 (472.10 it/sec) -training >> step=7464300, episode=1245 reward=0.8003334 (463.59 it/sec) -training >> step=7464400, episode=1245 reward=0.7990739 (460.48 it/sec) -training >> step=7464500, episode=1245 reward=0.7802559 (500.70 it/sec) -training >> step=7464600, episode=1245 reward=0.7744432 (454.38 it/sec) -training >> step=7464700, episode=1245 reward=0.7703673 (389.91 it/sec) -training >> step=7464800, episode=1245 reward=0.7865768 (454.51 it/sec) -training >> step=7464900, episode=1245 reward=0.7774684 (418.90 it/sec) -training >> step=7465000, episode=1245 reward=0.8063542 (439.30 it/sec) -training >> step=7465100, episode=1245 reward=0.781088 (422.90 it/sec) -training >> step=7465200, episode=1245 reward=0.7892569 (418.12 it/sec) -training >> step=7465300, episode=1245 reward=0.7842275 (441.75 it/sec) -training >> step=7465400, episode=1245 reward=0.7874809 (425.36 it/sec) -training >> step=7465500, episode=1245 reward=0.7877288 (434.64 it/sec) -training >> step=7465600, episode=1245 reward=0.7830619 (517.64 it/sec) -training >> step=7465700, episode=1245 reward=0.7992616 (486.88 it/sec) -training >> step=7465800, episode=1245 reward=0.7894894 (489.15 it/sec) -training >> step=7465900, episode=1245 reward=0.7945242 (517.49 it/sec) -training >> step=7466000, episode=1245 reward=0.7894163 (500.38 it/sec) -training >> step=7466100, episode=1245 reward=0.8053517 (514.09 it/sec) -training >> step=7466200, episode=1245 reward=0.78341 (512.73 it/sec) -training >> step=7466300, episode=1245 reward=0.7835328 (520.38 it/sec) -training >> step=7466400, episode=1245 reward=0.7499185 (455.67 it/sec) -training >> step=7466500, episode=1245 reward=0.7759649 (457.00 it/sec) -training >> step=7466600, episode=1245 reward=0.7921288 (535.62 it/sec) -training >> step=7466700, episode=1245 reward=0.7866457 (510.28 it/sec) -training >> step=7466800, episode=1245 reward=0.7902679 (509.81 it/sec) -training >> step=7466900, episode=1245 reward=0.774901 (472.95 it/sec) -training >> step=7467000, episode=1245 reward=0.7936105 (520.54 it/sec) -training >> step=7467100, episode=1245 reward=0.7832299 (479.47 it/sec) -training >> step=7467200, episode=1245 reward=0.7789774 (459.45 it/sec) -training >> step=7467300, episode=1245 reward=0.7914464 (524.20 it/sec) -training >> step=7467400, episode=1245 reward=0.7963124 (498.91 it/sec) -training >> step=7467500, episode=1245 reward=0.7759804 (481.91 it/sec) -training >> step=7467600, episode=1245 reward=0.7679431 (492.90 it/sec) -training >> step=7467700, episode=1245 reward=0.7944067 (512.38 it/sec) -training >> step=7467800, episode=1245 reward=0.7660853 (524.07 it/sec) -training >> step=7467900, episode=1245 reward=0.7897894 (489.12 it/sec) -training >> step=7468000, episode=1245 reward=0.7760253 (494.65 it/sec) -training >> step=7468100, episode=1245 reward=0.7791038 (546.39 it/sec) -training >> step=7468200, episode=1245 reward=0.7610824 (467.31 it/sec) -training >> step=7468300, episode=1245 reward=0.7805992 (463.60 it/sec) -training >> step=7468400, episode=1245 reward=0.748159 (452.66 it/sec) -training >> step=7468500, episode=1245 reward=0.7808856 (436.94 it/sec) -training >> step=7468600, episode=1245 reward=0.7841005 (493.98 it/sec) -training >> step=7468700, episode=1245 reward=0.7755984 (484.81 it/sec) -training >> step=7468800, episode=1245 reward=0.7786919 (449.93 it/sec) -training >> step=7468900, episode=1245 reward=0.7733994 (455.26 it/sec) -training >> step=7469000, episode=1245 reward=0.7914037 (464.26 it/sec) -training >> step=7469100, episode=1245 reward=0.7703062 (433.16 it/sec) -training >> step=7469200, episode=1245 reward=0.7586261 (501.64 it/sec) -training >> step=7469300, episode=1246 reward=0.7791581 (113.58 it/sec) -training >> step=7469400, episode=1246 reward=0.7888566 (492.27 it/sec) -training >> step=7469500, episode=1246 reward=0.7780429 (428.29 it/sec) -training >> step=7469600, episode=1246 reward=0.7852001 (434.57 it/sec) -training >> step=7469700, episode=1246 reward=0.7812148 (420.27 it/sec) -training >> step=7469800, episode=1246 reward=0.7749947 (457.22 it/sec) -training >> step=7469900, episode=1246 reward=0.7838157 (492.22 it/sec) -training >> step=7470000, episode=1246 reward=0.7836127 (480.09 it/sec) -training >> step=7470100, episode=1246 reward=0.7932803 (420.14 it/sec) -training >> step=7470200, episode=1246 reward=0.7831032 (518.07 it/sec) -training >> step=7470300, episode=1246 reward=0.7681926 (450.58 it/sec) -training >> step=7470400, episode=1246 reward=0.7751209 (494.80 it/sec) -training >> step=7470500, episode=1246 reward=0.772487 (453.34 it/sec) -training >> step=7470600, episode=1246 reward=0.7990805 (485.27 it/sec) -training >> step=7470700, episode=1246 reward=0.7809734 (506.83 it/sec) -training >> step=7470800, episode=1246 reward=0.7894799 (472.30 it/sec) -training >> step=7470900, episode=1246 reward=0.802216 (483.43 it/sec) -training >> step=7471000, episode=1246 reward=0.794497 (450.78 it/sec) -training >> step=7471100, episode=1246 reward=0.7999133 (472.23 it/sec) -training >> step=7471200, episode=1246 reward=0.7800996 (475.25 it/sec) -training >> step=7471300, episode=1246 reward=0.7850313 (498.81 it/sec) -training >> step=7471400, episode=1246 reward=0.7713222 (421.01 it/sec) -training >> step=7471500, episode=1246 reward=0.793323 (480.93 it/sec) -training >> step=7471600, episode=1246 reward=0.774448 (491.50 it/sec) -training >> step=7471700, episode=1246 reward=0.7914912 (454.07 it/sec) -training >> step=7471800, episode=1246 reward=0.7910361 (454.59 it/sec) -training >> step=7471900, episode=1246 reward=0.7665747 (467.53 it/sec) -training >> step=7472000, episode=1246 reward=0.7834595 (467.80 it/sec) -training >> step=7472100, episode=1246 reward=0.7897558 (496.51 it/sec) -training >> step=7472200, episode=1246 reward=0.7917084 (506.92 it/sec) -training >> step=7472300, episode=1246 reward=0.8092679 (471.24 it/sec) -training >> step=7472400, episode=1246 reward=0.7759597 (463.84 it/sec) -training >> step=7472500, episode=1246 reward=0.803741 (506.27 it/sec) -training >> step=7472600, episode=1246 reward=0.789247 (465.77 it/sec) -training >> step=7472700, episode=1246 reward=0.7917499 (496.19 it/sec) -training >> step=7472800, episode=1246 reward=0.7825767 (525.99 it/sec) -training >> step=7472900, episode=1246 reward=0.7970135 (494.13 it/sec) -training >> step=7473000, episode=1246 reward=0.7991321 (504.01 it/sec) -training >> step=7473100, episode=1246 reward=0.7857216 (538.99 it/sec) -training >> step=7473200, episode=1246 reward=0.7855915 (461.54 it/sec) -training >> step=7473300, episode=1246 reward=0.7713827 (472.83 it/sec) -training >> step=7473400, episode=1246 reward=0.7908355 (461.94 it/sec) -training >> step=7473500, episode=1246 reward=0.772669 (468.16 it/sec) -training >> step=7473600, episode=1246 reward=0.7921972 (439.35 it/sec) -training >> step=7473700, episode=1246 reward=0.7776288 (494.68 it/sec) -training >> step=7473800, episode=1246 reward=0.774635 (485.25 it/sec) -training >> step=7473900, episode=1246 reward=0.7477377 (477.98 it/sec) -training >> step=7474000, episode=1246 reward=0.7822539 (466.97 it/sec) -training >> step=7474100, episode=1246 reward=0.7738418 (503.39 it/sec) -training >> step=7474200, episode=1246 reward=0.7806818 (490.86 it/sec) -training >> step=7474300, episode=1246 reward=0.7796786 (495.68 it/sec) -training >> step=7474400, episode=1246 reward=0.7514391 (467.52 it/sec) -training >> step=7474500, episode=1246 reward=0.7945625 (459.81 it/sec) -training >> step=7474600, episode=1246 reward=0.7999737 (441.07 it/sec) -training >> step=7474700, episode=1246 reward=0.7948827 (397.20 it/sec) -training >> step=7474800, episode=1246 reward=0.775633 (439.52 it/sec) -training >> step=7474900, episode=1246 reward=0.792518 (401.16 it/sec) -training >> step=7475000, episode=1246 reward=0.7849942 (422.25 it/sec) -training >> step=7475100, episode=1246 reward=0.8086556 (397.85 it/sec) -training >> step=7475200, episode=1246 reward=0.7980629 (390.26 it/sec) -training >> step=7475300, episode=1247 reward=0.7787959 (95.39 it/sec) -training >> step=7475400, episode=1247 reward=0.7762524 (457.41 it/sec) -training >> step=7475500, episode=1247 reward=0.7835872 (505.61 it/sec) -training >> step=7475600, episode=1247 reward=0.7834845 (510.03 it/sec) -training >> step=7475700, episode=1247 reward=0.7869192 (446.64 it/sec) -training >> step=7475800, episode=1247 reward=0.7943965 (496.98 it/sec) -training >> step=7475900, episode=1247 reward=0.8032802 (464.25 it/sec) -training >> step=7476000, episode=1247 reward=0.7877058 (476.10 it/sec) -training >> step=7476100, episode=1247 reward=0.7972194 (500.25 it/sec) -training >> step=7476200, episode=1247 reward=0.8053118 (514.42 it/sec) -training >> step=7476300, episode=1247 reward=0.7858336 (460.38 it/sec) -training >> step=7476400, episode=1247 reward=0.7888265 (431.93 it/sec) -training >> step=7476500, episode=1247 reward=0.7817396 (480.91 it/sec) -training >> step=7476600, episode=1247 reward=0.7783295 (498.33 it/sec) -training >> step=7476700, episode=1247 reward=0.7678545 (507.43 it/sec) -training >> step=7476800, episode=1247 reward=0.7957852 (407.36 it/sec) -training >> step=7476900, episode=1247 reward=0.7819289 (490.41 it/sec) -training >> step=7477000, episode=1247 reward=0.7972135 (431.01 it/sec) -training >> step=7477100, episode=1247 reward=0.7858286 (462.56 it/sec) -training >> step=7477200, episode=1247 reward=0.7912304 (458.63 it/sec) -training >> step=7477300, episode=1247 reward=0.8130479 (467.83 it/sec) -training >> step=7477400, episode=1247 reward=0.7874105 (482.07 it/sec) -training >> step=7477500, episode=1247 reward=0.7755432 (456.33 it/sec) -training >> step=7477600, episode=1247 reward=0.7941643 (423.12 it/sec) -training >> step=7477700, episode=1247 reward=0.7824774 (465.78 it/sec) -training >> step=7477800, episode=1247 reward=0.7997195 (479.89 it/sec) -training >> step=7477900, episode=1247 reward=0.7825856 (457.55 it/sec) -training >> step=7478000, episode=1247 reward=0.7727364 (502.45 it/sec) -training >> step=7478100, episode=1247 reward=0.7835464 (489.82 it/sec) -training >> step=7478200, episode=1247 reward=0.8094162 (426.91 it/sec) -training >> step=7478300, episode=1247 reward=0.8180381 (413.15 it/sec) -training >> step=7478400, episode=1247 reward=0.7806547 (507.28 it/sec) -training >> step=7478500, episode=1247 reward=0.7819695 (481.23 it/sec) -training >> step=7478600, episode=1247 reward=0.789487 (504.75 it/sec) -training >> step=7478700, episode=1247 reward=0.7872709 (478.02 it/sec) -training >> step=7478800, episode=1247 reward=0.7666353 (505.15 it/sec) -training >> step=7478900, episode=1247 reward=0.7895523 (507.99 it/sec) -training >> step=7479000, episode=1247 reward=0.7917444 (477.72 it/sec) -training >> step=7479100, episode=1247 reward=0.7824773 (519.82 it/sec) -training >> step=7479200, episode=1247 reward=0.7793161 (491.95 it/sec) -training >> step=7479300, episode=1247 reward=0.7919993 (468.22 it/sec) -training >> step=7479400, episode=1247 reward=0.7841896 (472.57 it/sec) -training >> step=7479500, episode=1247 reward=0.7811279 (531.13 it/sec) -training >> step=7479600, episode=1247 reward=0.7611608 (467.81 it/sec) -training >> step=7479700, episode=1247 reward=0.7764516 (442.32 it/sec) -training >> step=7479800, episode=1247 reward=0.8006389 (500.54 it/sec) -training >> step=7479900, episode=1247 reward=0.7830163 (521.10 it/sec) -training >> step=7480000, episode=1247 reward=0.7861562 (496.54 it/sec) -training >> step=7480100, episode=1247 reward=0.7835035 (482.22 it/sec) -training >> step=7480200, episode=1247 reward=0.7867073 (526.95 it/sec) -training >> step=7480300, episode=1247 reward=0.7668039 (439.02 it/sec) -training >> step=7480400, episode=1247 reward=0.7899735 (508.69 it/sec) -training >> step=7480500, episode=1247 reward=0.7589002 (493.91 it/sec) -training >> step=7480600, episode=1247 reward=0.790635 (507.74 it/sec) -training >> step=7480700, episode=1247 reward=0.7995686 (446.64 it/sec) -training >> step=7480800, episode=1247 reward=0.7844141 (450.72 it/sec) -training >> step=7480900, episode=1247 reward=0.7577383 (531.57 it/sec) -training >> step=7481000, episode=1247 reward=0.7826858 (499.97 it/sec) -training >> step=7481100, episode=1247 reward=0.7854364 (459.92 it/sec) -training >> step=7481200, episode=1247 reward=0.7767444 (430.27 it/sec) -training >> step=7481300, episode=1248 reward=0.7989134 (100.18 it/sec) -training >> step=7481400, episode=1248 reward=0.7818007 (410.88 it/sec) -training >> step=7481500, episode=1248 reward=0.7638395 (445.64 it/sec) -training >> step=7481600, episode=1248 reward=0.7872803 (494.06 it/sec) -training >> step=7481700, episode=1248 reward=0.784052 (522.59 it/sec) -training >> step=7481800, episode=1248 reward=0.7976658 (426.16 it/sec) -training >> step=7481900, episode=1248 reward=0.7818166 (387.18 it/sec) -training >> step=7482000, episode=1248 reward=0.7932809 (439.43 it/sec) -training >> step=7482100, episode=1248 reward=0.7862854 (503.61 it/sec) -training >> step=7482200, episode=1248 reward=0.8182532 (538.39 it/sec) -training >> step=7482300, episode=1248 reward=0.8038039 (503.76 it/sec) -training >> step=7482400, episode=1248 reward=0.7862946 (497.16 it/sec) -training >> step=7482500, episode=1248 reward=0.801529 (525.09 it/sec) -training >> step=7482600, episode=1248 reward=0.7906547 (461.09 it/sec) -training >> step=7482700, episode=1248 reward=0.767274 (494.85 it/sec) -training >> step=7482800, episode=1248 reward=0.7817276 (435.58 it/sec) -training >> step=7482900, episode=1248 reward=0.7803395 (433.20 it/sec) -training >> step=7483000, episode=1248 reward=0.7940279 (452.12 it/sec) -training >> step=7483100, episode=1248 reward=0.7798376 (460.66 it/sec) -training >> step=7483200, episode=1248 reward=0.793107 (436.38 it/sec) -training >> step=7483300, episode=1248 reward=0.7793397 (414.52 it/sec) -training >> step=7483400, episode=1248 reward=0.7773139 (413.12 it/sec) -training >> step=7483500, episode=1248 reward=0.7981141 (357.58 it/sec) -training >> step=7483600, episode=1248 reward=0.7779918 (431.13 it/sec) -training >> step=7483700, episode=1248 reward=0.7831076 (396.10 it/sec) -training >> step=7483800, episode=1248 reward=0.7900225 (446.99 it/sec) -training >> step=7483900, episode=1248 reward=0.7870899 (451.71 it/sec) -training >> step=7484000, episode=1248 reward=0.8013415 (456.52 it/sec) -training >> step=7484100, episode=1248 reward=0.7778423 (510.60 it/sec) -training >> step=7484200, episode=1248 reward=0.7909448 (491.22 it/sec) -training >> step=7484300, episode=1248 reward=0.8172576 (507.55 it/sec) -training >> step=7484400, episode=1248 reward=0.7991592 (535.95 it/sec) -training >> step=7484500, episode=1248 reward=0.7971488 (487.33 it/sec) -training >> step=7484600, episode=1248 reward=0.7996513 (455.77 it/sec) -training >> step=7484700, episode=1248 reward=0.7898643 (455.87 it/sec) -training >> step=7484800, episode=1248 reward=0.7979329 (470.83 it/sec) -training >> step=7484900, episode=1248 reward=0.7651753 (487.72 it/sec) -training >> step=7485000, episode=1248 reward=0.7878083 (443.34 it/sec) -training >> step=7485100, episode=1248 reward=0.7980522 (482.73 it/sec) -training >> step=7485200, episode=1248 reward=0.7866371 (493.47 it/sec) -training >> step=7485300, episode=1248 reward=0.7898554 (484.57 it/sec) -training >> step=7485400, episode=1248 reward=0.788479 (492.99 it/sec) -training >> step=7485500, episode=1248 reward=0.7798516 (504.10 it/sec) -training >> step=7485600, episode=1248 reward=0.769042 (501.53 it/sec) -training >> step=7485700, episode=1248 reward=0.7802136 (501.78 it/sec) -training >> step=7485800, episode=1248 reward=0.7847363 (495.97 it/sec) -training >> step=7485900, episode=1248 reward=0.7772491 (505.95 it/sec) -training >> step=7486000, episode=1248 reward=0.7875439 (490.53 it/sec) -training >> step=7486100, episode=1248 reward=0.7881768 (484.59 it/sec) -training >> step=7486200, episode=1248 reward=0.7837123 (447.50 it/sec) -training >> step=7486300, episode=1248 reward=0.7923064 (516.84 it/sec) -training >> step=7486400, episode=1248 reward=0.7627489 (457.49 it/sec) -training >> step=7486500, episode=1248 reward=0.7779701 (416.26 it/sec) -training >> step=7486600, episode=1248 reward=0.7922292 (455.00 it/sec) -training >> step=7486700, episode=1248 reward=0.7807773 (456.57 it/sec) -training >> step=7486800, episode=1248 reward=0.7791814 (498.94 it/sec) -training >> step=7486900, episode=1248 reward=0.7565601 (472.36 it/sec) -training >> step=7487000, episode=1248 reward=0.7663868 (459.51 it/sec) -training >> step=7487100, episode=1248 reward=0.7877002 (417.67 it/sec) -training >> step=7487200, episode=1248 reward=0.7640167 (476.98 it/sec) -training >> step=7487300, episode=1249 reward=0.7661099 (133.21 it/sec) -training >> step=7487400, episode=1249 reward=0.7779126 (541.62 it/sec) -training >> step=7487500, episode=1249 reward=0.7930875 (539.71 it/sec) -training >> step=7487600, episode=1249 reward=0.7990447 (492.19 it/sec) -training >> step=7487700, episode=1249 reward=0.788762 (482.50 it/sec) -training >> step=7487800, episode=1249 reward=0.8157575 (417.76 it/sec) -training >> step=7487900, episode=1249 reward=0.7841063 (404.12 it/sec) -training >> step=7488000, episode=1249 reward=0.7788618 (400.23 it/sec) -training >> step=7488100, episode=1249 reward=0.7681362 (434.83 it/sec) -training >> step=7488200, episode=1249 reward=0.8041258 (416.94 it/sec) -training >> step=7488300, episode=1249 reward=0.7885659 (359.70 it/sec) -training >> step=7488400, episode=1249 reward=0.7807736 (404.32 it/sec) -training >> step=7488500, episode=1249 reward=0.8002576 (443.08 it/sec) -training >> step=7488600, episode=1249 reward=0.7846334 (455.45 it/sec) -training >> step=7488700, episode=1249 reward=0.7823535 (437.21 it/sec) -training >> step=7488800, episode=1249 reward=0.7811508 (487.53 it/sec) -training >> step=7488900, episode=1249 reward=0.7831578 (435.21 it/sec) -training >> step=7489000, episode=1249 reward=0.7990201 (410.28 it/sec) -training >> step=7489100, episode=1249 reward=0.7787153 (418.29 it/sec) -training >> step=7489200, episode=1249 reward=0.7765588 (473.61 it/sec) -training >> step=7489300, episode=1249 reward=0.7785696 (468.57 it/sec) -training >> step=7489400, episode=1249 reward=0.7880507 (416.95 it/sec) -training >> step=7489500, episode=1249 reward=0.780457 (460.89 it/sec) -training >> step=7489600, episode=1249 reward=0.7951365 (438.16 it/sec) -training >> step=7489700, episode=1249 reward=0.7890868 (479.32 it/sec) -training >> step=7489800, episode=1249 reward=0.7626112 (457.52 it/sec) -training >> step=7489900, episode=1249 reward=0.7818998 (470.57 it/sec) -training >> step=7490000, episode=1249 reward=0.7938314 (492.15 it/sec) -training >> step=7490100, episode=1249 reward=0.8008585 (446.74 it/sec) -training >> step=7490200, episode=1249 reward=0.789444 (531.43 it/sec) -training >> step=7490300, episode=1249 reward=0.7828936 (534.21 it/sec) -training >> step=7490400, episode=1249 reward=0.7992014 (509.19 it/sec) -training >> step=7490500, episode=1249 reward=0.792155 (525.97 it/sec) -training >> step=7490600, episode=1249 reward=0.7983811 (500.34 it/sec) -training >> step=7490700, episode=1249 reward=0.7882487 (521.28 it/sec) -training >> step=7490800, episode=1249 reward=0.798312 (526.70 it/sec) -training >> step=7490900, episode=1249 reward=0.810439 (499.20 it/sec) -training >> step=7491000, episode=1249 reward=0.7965233 (512.91 it/sec) -training >> step=7491100, episode=1249 reward=0.7656254 (495.52 it/sec) -training >> step=7491200, episode=1249 reward=0.7647447 (494.05 it/sec) -training >> step=7491300, episode=1249 reward=0.7915514 (519.90 it/sec) -training >> step=7491400, episode=1249 reward=0.7902519 (539.41 it/sec) -training >> step=7491500, episode=1249 reward=0.798264 (518.37 it/sec) -training >> step=7491600, episode=1249 reward=0.7606958 (448.12 it/sec) -training >> step=7491700, episode=1249 reward=0.7684105 (481.69 it/sec) -training >> step=7491800, episode=1249 reward=0.78543 (487.96 it/sec) -training >> step=7491900, episode=1249 reward=0.7685917 (451.52 it/sec) -training >> step=7492000, episode=1249 reward=0.7823476 (491.88 it/sec) -training >> step=7492100, episode=1249 reward=0.7858864 (489.38 it/sec) -training >> step=7492200, episode=1249 reward=0.7700557 (441.39 it/sec) -training >> step=7492300, episode=1249 reward=0.7667632 (460.90 it/sec) -training >> step=7492400, episode=1249 reward=0.7805746 (474.10 it/sec) -training >> step=7492500, episode=1249 reward=0.7789387 (477.50 it/sec) -training >> step=7492600, episode=1249 reward=0.7909175 (472.34 it/sec) -training >> step=7492700, episode=1249 reward=0.7881935 (458.94 it/sec) -training >> step=7492800, episode=1249 reward=0.7724578 (510.70 it/sec) -training >> step=7492900, episode=1249 reward=0.7759335 (471.91 it/sec) -training >> step=7493000, episode=1249 reward=0.7874746 (459.99 it/sec) -training >> step=7493100, episode=1249 reward=0.7790766 (447.59 it/sec) -training >> step=7493200, episode=1249 reward=0.7863064 (458.40 it/sec) -training >> step=7493300, episode=1250 reward=0.7737247 (93.30 it/sec) -training >> step=7493400, episode=1250 reward=0.7824206 (452.63 it/sec) -training >> step=7493500, episode=1250 reward=0.7819862 (427.87 it/sec) -training >> step=7493600, episode=1250 reward=0.790563 (450.03 it/sec) -training >> step=7493700, episode=1250 reward=0.8032342 (479.55 it/sec) -training >> step=7493800, episode=1250 reward=0.7799924 (492.12 it/sec) -training >> step=7493900, episode=1250 reward=0.7640496 (447.53 it/sec) -training >> step=7494000, episode=1250 reward=0.7731168 (481.16 it/sec) -training >> step=7494100, episode=1250 reward=0.7878983 (436.39 it/sec) -training >> step=7494200, episode=1250 reward=0.7918889 (474.69 it/sec) -training >> step=7494300, episode=1250 reward=0.7902852 (446.87 it/sec) -training >> step=7494400, episode=1250 reward=0.7790367 (455.09 it/sec) -training >> step=7494500, episode=1250 reward=0.803485 (458.24 it/sec) -training >> step=7494600, episode=1250 reward=0.7840922 (480.37 it/sec) -training >> step=7494700, episode=1250 reward=0.7858617 (457.36 it/sec) -training >> step=7494800, episode=1250 reward=0.7732953 (469.95 it/sec) -training >> step=7494900, episode=1250 reward=0.7929449 (440.15 it/sec) -training >> step=7495000, episode=1250 reward=0.7936673 (455.35 it/sec) -training >> step=7495100, episode=1250 reward=0.7925707 (480.26 it/sec) -training >> step=7495200, episode=1250 reward=0.7690732 (492.81 it/sec) -training >> step=7495300, episode=1250 reward=0.8089202 (399.68 it/sec) -training >> step=7495400, episode=1250 reward=0.7860474 (435.94 it/sec) -training >> step=7495500, episode=1250 reward=0.7915053 (469.12 it/sec) -training >> step=7495600, episode=1250 reward=0.8108807 (457.59 it/sec) -training >> step=7495700, episode=1250 reward=0.7905911 (451.37 it/sec) -training >> step=7495800, episode=1250 reward=0.7770208 (427.47 it/sec) -training >> step=7495900, episode=1250 reward=0.7902943 (462.39 it/sec) -training >> step=7496000, episode=1250 reward=0.7868941 (436.25 it/sec) -training >> step=7496100, episode=1250 reward=0.7850832 (430.70 it/sec) -training >> step=7496200, episode=1250 reward=0.7915177 (500.20 it/sec) -training >> step=7496300, episode=1250 reward=0.7881752 (469.56 it/sec) -training >> step=7496400, episode=1250 reward=0.7873477 (452.27 it/sec) -training >> step=7496500, episode=1250 reward=0.7695411 (450.26 it/sec) -training >> step=7496600, episode=1250 reward=0.7688434 (432.18 it/sec) -training >> step=7496700, episode=1250 reward=0.8115788 (420.21 it/sec) -training >> step=7496800, episode=1250 reward=0.8122231 (457.74 it/sec) -training >> step=7496900, episode=1250 reward=0.7775266 (432.63 it/sec) -training >> step=7497000, episode=1250 reward=0.791052 (495.00 it/sec) -training >> step=7497100, episode=1250 reward=0.7824574 (419.92 it/sec) -training >> step=7497200, episode=1250 reward=0.7796484 (459.45 it/sec) -training >> step=7497300, episode=1250 reward=0.7840683 (437.39 it/sec) -training >> step=7497400, episode=1250 reward=0.7732745 (433.44 it/sec) -training >> step=7497500, episode=1250 reward=0.7685947 (476.64 it/sec) -training >> step=7497600, episode=1250 reward=0.7595571 (460.08 it/sec) -training >> step=7497700, episode=1250 reward=0.7898316 (420.83 it/sec) -training >> step=7497800, episode=1250 reward=0.796333 (457.33 it/sec) -training >> step=7497900, episode=1250 reward=0.7750688 (457.92 it/sec) -training >> step=7498000, episode=1250 reward=0.7632194 (493.20 it/sec) -training >> step=7498100, episode=1250 reward=0.7664324 (486.36 it/sec) -training >> step=7498200, episode=1250 reward=0.7686568 (485.72 it/sec) -training >> step=7498300, episode=1250 reward=0.7474202 (455.59 it/sec) -training >> step=7498400, episode=1250 reward=0.7812669 (470.86 it/sec) -training >> step=7498500, episode=1250 reward=0.7881683 (416.35 it/sec) -training >> step=7498600, episode=1250 reward=0.7714673 (488.04 it/sec) -training >> step=7498700, episode=1250 reward=0.7717593 (450.23 it/sec) -training >> step=7498800, episode=1250 reward=0.7814408 (471.34 it/sec) -training >> step=7498900, episode=1250 reward=0.7714225 (443.59 it/sec) -training >> step=7499000, episode=1250 reward=0.7882206 (454.67 it/sec) -training >> step=7499100, episode=1250 reward=0.7866867 (470.86 it/sec) -training >> step=7499200, episode=1250 reward=0.7781673 (454.23 it/sec) -training >> step=7499300, episode=1251 reward=0.763232 (116.97 it/sec) -training >> step=7499400, episode=1251 reward=0.7967603 (425.86 it/sec) -training >> step=7499500, episode=1251 reward=0.7721089 (439.98 it/sec) -training >> step=7499600, episode=1251 reward=0.7899482 (421.03 it/sec) -training >> step=7499700, episode=1251 reward=0.8063501 (451.36 it/sec) -training >> step=7499800, episode=1251 reward=0.8020618 (449.13 it/sec) -training >> step=7499900, episode=1251 reward=0.7784811 (490.20 it/sec) -training >> step=7500000, episode=1251 reward=0.795778 (462.84 it/sec) -training >> step=7500100, episode=1251 reward=0.7998811 (472.28 it/sec) -training >> step=7500200, episode=1251 reward=0.7942702 (448.55 it/sec) -training >> step=7500300, episode=1251 reward=0.8054628 (463.06 it/sec) -training >> step=7500400, episode=1251 reward=0.8025849 (441.86 it/sec) -training >> step=7500500, episode=1251 reward=0.7780386 (437.33 it/sec) -training >> step=7500600, episode=1251 reward=0.7842597 (451.71 it/sec) -training >> step=7500700, episode=1251 reward=0.776145 (431.88 it/sec) -training >> step=7500800, episode=1251 reward=0.7795112 (418.37 it/sec) -training >> step=7500900, episode=1251 reward=0.8154891 (433.05 it/sec) -training >> step=7501000, episode=1251 reward=0.7720373 (483.09 it/sec) -training >> step=7501100, episode=1251 reward=0.8039059 (439.66 it/sec) -training >> step=7501200, episode=1251 reward=0.7858986 (397.70 it/sec) -training >> step=7501300, episode=1251 reward=0.7719189 (377.79 it/sec) -training >> step=7501400, episode=1251 reward=0.8033723 (393.30 it/sec) -training >> step=7501500, episode=1251 reward=0.8025689 (423.30 it/sec) -training >> step=7501600, episode=1251 reward=0.7891026 (410.40 it/sec) -training >> step=7501700, episode=1251 reward=0.7913635 (436.72 it/sec) -training >> step=7501800, episode=1251 reward=0.7814512 (433.69 it/sec) -training >> step=7501900, episode=1251 reward=0.7996062 (445.66 it/sec) -training >> step=7502000, episode=1251 reward=0.7962306 (456.05 it/sec) -training >> step=7502100, episode=1251 reward=0.7933235 (476.13 it/sec) -training >> step=7502200, episode=1251 reward=0.7915664 (454.63 it/sec) -training >> step=7502300, episode=1251 reward=0.7905248 (434.07 it/sec) -training >> step=7502400, episode=1251 reward=0.7954549 (455.49 it/sec) -training >> step=7502500, episode=1251 reward=0.787352 (432.48 it/sec) -training >> step=7502600, episode=1251 reward=0.7780105 (419.49 it/sec) -training >> step=7502700, episode=1251 reward=0.8036876 (428.29 it/sec) -training >> step=7502800, episode=1251 reward=0.8042946 (485.51 it/sec) -training >> step=7502900, episode=1251 reward=0.769088 (445.34 it/sec) -training >> step=7503000, episode=1251 reward=0.798314 (427.17 it/sec) -training >> step=7503100, episode=1251 reward=0.787185 (450.08 it/sec) -training >> step=7503200, episode=1251 reward=0.8044323 (467.20 it/sec) -training >> step=7503300, episode=1251 reward=0.7776586 (429.32 it/sec) -training >> step=7503400, episode=1251 reward=0.7672493 (447.49 it/sec) -training >> step=7503500, episode=1251 reward=0.7812003 (463.28 it/sec) -training >> step=7503600, episode=1251 reward=0.8034801 (444.90 it/sec) -training >> step=7503700, episode=1251 reward=0.7838366 (437.40 it/sec) -training >> step=7503800, episode=1251 reward=0.7683823 (487.52 it/sec) -training >> step=7503900, episode=1251 reward=0.7717296 (423.97 it/sec) -training >> step=7504000, episode=1251 reward=0.7794668 (397.03 it/sec) -training >> step=7504100, episode=1251 reward=0.7604454 (435.03 it/sec) -training >> step=7504200, episode=1251 reward=0.787169 (485.30 it/sec) -training >> step=7504300, episode=1251 reward=0.7673255 (423.72 it/sec) -training >> step=7504400, episode=1251 reward=0.7789534 (419.29 it/sec) -training >> step=7504500, episode=1251 reward=0.7714047 (431.80 it/sec) -training >> step=7504600, episode=1251 reward=0.7642603 (476.59 it/sec) -training >> step=7504700, episode=1251 reward=0.7743579 (471.88 it/sec) -training >> step=7504800, episode=1251 reward=0.7848496 (399.10 it/sec) -training >> step=7504900, episode=1251 reward=0.767903 (461.68 it/sec) -training >> step=7505000, episode=1251 reward=0.7994795 (431.07 it/sec) -training >> step=7505100, episode=1251 reward=0.7650473 (470.32 it/sec) -training >> step=7505200, episode=1251 reward=0.7659335 (427.10 it/sec) -training >> step=7505300, episode=1252 reward=0.7966779 (89.88 it/sec) -training >> step=7505400, episode=1252 reward=0.7541694 (430.95 it/sec) -training >> step=7505500, episode=1252 reward=0.7741707 (424.02 it/sec) -training >> step=7505600, episode=1252 reward=0.7659843 (471.81 it/sec) -training >> step=7505700, episode=1252 reward=0.7921242 (447.55 it/sec) -training >> step=7505800, episode=1252 reward=0.7763456 (443.82 it/sec) -training >> step=7505900, episode=1252 reward=0.7798 (436.52 it/sec) -training >> step=7506000, episode=1252 reward=0.7963554 (487.05 it/sec) -training >> step=7506100, episode=1252 reward=0.7829415 (465.92 it/sec) -training >> step=7506200, episode=1252 reward=0.7746188 (461.27 it/sec) -training >> step=7506300, episode=1252 reward=0.8066944 (422.14 it/sec) -training >> step=7506400, episode=1252 reward=0.7951186 (453.46 it/sec) -training >> step=7506500, episode=1252 reward=0.8037269 (456.95 it/sec) -training >> step=7506600, episode=1252 reward=0.7701641 (440.64 it/sec) -training >> step=7506700, episode=1252 reward=0.7924977 (449.69 it/sec) -training >> step=7506800, episode=1252 reward=0.7997081 (437.16 it/sec) -training >> step=7506900, episode=1252 reward=0.7936568 (482.60 it/sec) -training >> step=7507000, episode=1252 reward=0.7563264 (462.48 it/sec) -training >> step=7507100, episode=1252 reward=0.786788 (460.32 it/sec) -training >> step=7507200, episode=1252 reward=0.7789787 (444.35 it/sec) -training >> step=7507300, episode=1252 reward=0.7814316 (431.79 it/sec) -training >> step=7507400, episode=1252 reward=0.7870269 (444.19 it/sec) -training >> step=7507500, episode=1252 reward=0.7986228 (494.81 it/sec) -training >> step=7507600, episode=1252 reward=0.7887945 (483.36 it/sec) -training >> step=7507700, episode=1252 reward=0.7979307 (448.44 it/sec) -training >> step=7507800, episode=1252 reward=0.7888012 (431.76 it/sec) -training >> step=7507900, episode=1252 reward=0.782136 (457.42 it/sec) -training >> step=7508000, episode=1252 reward=0.7915219 (413.71 it/sec) -training >> step=7508100, episode=1252 reward=0.7709894 (379.24 it/sec) -training >> step=7508200, episode=1252 reward=0.7814994 (465.30 it/sec) -training >> step=7508300, episode=1252 reward=0.7845019 (448.78 it/sec) -training >> step=7508400, episode=1252 reward=0.7760741 (457.58 it/sec) -training >> step=7508500, episode=1252 reward=0.8065782 (452.08 it/sec) -training >> step=7508600, episode=1252 reward=0.7861565 (431.82 it/sec) -training >> step=7508700, episode=1252 reward=0.7891035 (424.96 it/sec) -training >> step=7508800, episode=1252 reward=0.8077841 (465.47 it/sec) -training >> step=7508900, episode=1252 reward=0.7898976 (463.22 it/sec) -training >> step=7509000, episode=1252 reward=0.7980516 (465.32 it/sec) -training >> step=7509100, episode=1252 reward=0.7645524 (447.31 it/sec) -training >> step=7509200, episode=1252 reward=0.7721536 (434.86 it/sec) -training >> step=7509300, episode=1252 reward=0.7949601 (427.32 it/sec) -training >> step=7509400, episode=1252 reward=0.7945218 (442.46 it/sec) -training >> step=7509500, episode=1252 reward=0.785144 (486.36 it/sec) -training >> step=7509600, episode=1252 reward=0.8022544 (475.17 it/sec) -training >> step=7509700, episode=1252 reward=0.7933002 (480.01 it/sec) -training >> step=7509800, episode=1252 reward=0.7830117 (441.07 it/sec) -training >> step=7509900, episode=1252 reward=0.7765187 (456.99 it/sec) -training >> step=7510000, episode=1252 reward=0.7863834 (427.90 it/sec) -training >> step=7510100, episode=1252 reward=0.7830469 (484.81 it/sec) -training >> step=7510200, episode=1252 reward=0.7796406 (489.03 it/sec) -training >> step=7510300, episode=1252 reward=0.8071231 (500.01 it/sec) -training >> step=7510400, episode=1252 reward=0.7979229 (463.11 it/sec) -training >> step=7510500, episode=1252 reward=0.7712772 (460.08 it/sec) -training >> step=7510600, episode=1252 reward=0.7902032 (436.78 it/sec) -training >> step=7510700, episode=1252 reward=0.7619005 (464.63 it/sec) -training >> step=7510800, episode=1252 reward=0.753897 (449.71 it/sec) -training >> step=7510900, episode=1252 reward=0.7778026 (469.10 it/sec) -training >> step=7511000, episode=1252 reward=0.7661266 (446.19 it/sec) -training >> step=7511100, episode=1252 reward=0.7615256 (467.56 it/sec) -training >> step=7511200, episode=1252 reward=0.7788228 (494.98 it/sec) -training >> step=7511300, episode=1253 reward=0.7837976 (98.78 it/sec) -training >> step=7511400, episode=1253 reward=0.7558928 (454.33 it/sec) -training >> step=7511500, episode=1253 reward=0.8010111 (453.40 it/sec) -training >> step=7511600, episode=1253 reward=0.7831924 (480.26 it/sec) -training >> step=7511700, episode=1253 reward=0.7862075 (533.70 it/sec) -training >> step=7511800, episode=1253 reward=0.7823205 (528.83 it/sec) -training >> step=7511900, episode=1253 reward=0.7812712 (488.03 it/sec) -training >> step=7512000, episode=1253 reward=0.7962912 (487.34 it/sec) -training >> step=7512100, episode=1253 reward=0.7764507 (515.21 it/sec) -training >> step=7512200, episode=1253 reward=0.7799702 (497.67 it/sec) -training >> step=7512300, episode=1253 reward=0.8064406 (532.44 it/sec) -training >> step=7512400, episode=1253 reward=0.7894374 (457.78 it/sec) -training >> step=7512500, episode=1253 reward=0.8128618 (474.78 it/sec) -training >> step=7512600, episode=1253 reward=0.7875981 (499.05 it/sec) -training >> step=7512700, episode=1253 reward=0.7798623 (554.65 it/sec) -training >> step=7512800, episode=1253 reward=0.8031372 (500.45 it/sec) -training >> step=7512900, episode=1253 reward=0.7710362 (495.40 it/sec) -training >> step=7513000, episode=1253 reward=0.7969846 (542.48 it/sec) -training >> step=7513100, episode=1253 reward=0.7892131 (524.77 it/sec) -training >> step=7513200, episode=1253 reward=0.7887859 (524.06 it/sec) -training >> step=7513300, episode=1253 reward=0.8209686 (527.70 it/sec) -training >> step=7513400, episode=1253 reward=0.791289 (539.52 it/sec) -training >> step=7513500, episode=1253 reward=0.8108329 (485.58 it/sec) -training >> step=7513600, episode=1253 reward=0.8007415 (483.90 it/sec) -training >> step=7513700, episode=1253 reward=0.7986068 (481.82 it/sec) -training >> step=7513800, episode=1253 reward=0.780413 (523.43 it/sec) -training >> step=7513900, episode=1253 reward=0.7950829 (484.66 it/sec) -training >> step=7514000, episode=1253 reward=0.7640827 (499.10 it/sec) -training >> step=7514100, episode=1253 reward=0.8023545 (520.53 it/sec) -training >> step=7514200, episode=1253 reward=0.8046403 (445.58 it/sec) -training >> step=7514300, episode=1253 reward=0.7956257 (483.68 it/sec) -training >> step=7514400, episode=1253 reward=0.8066392 (503.73 it/sec) -training >> step=7514500, episode=1253 reward=0.7968759 (540.14 it/sec) -training >> step=7514600, episode=1253 reward=0.7887461 (489.35 it/sec) -training >> step=7514700, episode=1253 reward=0.7949714 (500.61 it/sec) -training >> step=7514800, episode=1253 reward=0.7818415 (534.80 it/sec) -training >> step=7514900, episode=1253 reward=0.7854739 (542.10 it/sec) -training >> step=7515000, episode=1253 reward=0.7913921 (506.41 it/sec) -training >> step=7515100, episode=1253 reward=0.7720708 (501.99 it/sec) -training >> step=7515200, episode=1253 reward=0.7870574 (503.94 it/sec) -training >> step=7515300, episode=1253 reward=0.7899963 (520.34 it/sec) -training >> step=7515400, episode=1253 reward=0.7765191 (504.03 it/sec) -training >> step=7515500, episode=1253 reward=0.8007622 (475.59 it/sec) -training >> step=7515600, episode=1253 reward=0.7870921 (534.61 it/sec) -training >> step=7515700, episode=1253 reward=0.7749282 (496.50 it/sec) -training >> step=7515800, episode=1253 reward=0.7947031 (505.86 it/sec) -training >> step=7515900, episode=1253 reward=0.7882645 (479.75 it/sec) -training >> step=7516000, episode=1253 reward=0.7739333 (510.86 it/sec) -training >> step=7516100, episode=1253 reward=0.7941995 (494.80 it/sec) -training >> step=7516200, episode=1253 reward=0.7794656 (489.78 it/sec) -training >> step=7516300, episode=1253 reward=0.7879415 (510.72 it/sec) -training >> step=7516400, episode=1253 reward=0.7735813 (513.83 it/sec) -training >> step=7516500, episode=1253 reward=0.7787237 (445.19 it/sec) -training >> step=7516600, episode=1253 reward=0.7908238 (459.30 it/sec) -training >> step=7516700, episode=1253 reward=0.792501 (481.52 it/sec) -training >> step=7516800, episode=1253 reward=0.7613176 (436.12 it/sec) -training >> step=7516900, episode=1253 reward=0.7910599 (449.29 it/sec) -training >> step=7517000, episode=1253 reward=0.7766635 (491.90 it/sec) -training >> step=7517100, episode=1253 reward=0.7946545 (487.71 it/sec) -training >> step=7517200, episode=1253 reward=0.7806675 (447.97 it/sec) -training >> step=7517300, episode=1254 reward=0.784703 (109.75 it/sec) -training >> step=7517400, episode=1254 reward=0.7706235 (501.30 it/sec) -training >> step=7517500, episode=1254 reward=0.7786272 (484.84 it/sec) -training >> step=7517600, episode=1254 reward=0.7926431 (483.06 it/sec) -training >> step=7517700, episode=1254 reward=0.7831945 (488.01 it/sec) -training >> step=7517800, episode=1254 reward=0.790737 (477.72 it/sec) -training >> step=7517900, episode=1254 reward=0.7987918 (490.56 it/sec) -training >> step=7518000, episode=1254 reward=0.7914566 (458.81 it/sec) -training >> step=7518100, episode=1254 reward=0.7995493 (484.58 it/sec) -training >> step=7518200, episode=1254 reward=0.8147994 (466.00 it/sec) -training >> step=7518300, episode=1254 reward=0.7992985 (483.47 it/sec) -training >> step=7518400, episode=1254 reward=0.8039932 (477.01 it/sec) -training >> step=7518500, episode=1254 reward=0.804091 (514.28 it/sec) -training >> step=7518600, episode=1254 reward=0.8002074 (423.73 it/sec) -training >> step=7518700, episode=1254 reward=0.7982012 (445.96 it/sec) -training >> step=7518800, episode=1254 reward=0.7911813 (467.58 it/sec) -training >> step=7518900, episode=1254 reward=0.780887 (537.40 it/sec) -training >> step=7519000, episode=1254 reward=0.7678245 (483.22 it/sec) -training >> step=7519100, episode=1254 reward=0.7931418 (437.91 it/sec) -training >> step=7519200, episode=1254 reward=0.7744632 (443.15 it/sec) -training >> step=7519300, episode=1254 reward=0.7774657 (428.13 it/sec) -training >> step=7519400, episode=1254 reward=0.7790855 (480.42 it/sec) -training >> step=7519500, episode=1254 reward=0.7897762 (492.38 it/sec) -training >> step=7519600, episode=1254 reward=0.7791871 (499.94 it/sec) -training >> step=7519700, episode=1254 reward=0.7884404 (425.19 it/sec) -training >> step=7519800, episode=1254 reward=0.7898621 (421.09 it/sec) -training >> step=7519900, episode=1254 reward=0.7634259 (452.93 it/sec) -training >> step=7520000, episode=1254 reward=0.7840682 (458.17 it/sec) -training >> step=7520100, episode=1254 reward=0.7865511 (415.73 it/sec) -training >> step=7520200, episode=1254 reward=0.7964103 (404.41 it/sec) -training >> step=7520300, episode=1254 reward=0.792469 (430.34 it/sec) -training >> step=7520400, episode=1254 reward=0.7975053 (449.05 it/sec) -training >> step=7520500, episode=1254 reward=0.7913807 (478.41 it/sec) -training >> step=7520600, episode=1254 reward=0.7779582 (507.15 it/sec) -training >> step=7520700, episode=1254 reward=0.8017796 (514.39 it/sec) -training >> step=7520800, episode=1254 reward=0.7955863 (451.36 it/sec) -training >> step=7520900, episode=1254 reward=0.7854575 (505.16 it/sec) -training >> step=7521000, episode=1254 reward=0.7944036 (476.19 it/sec) -training >> step=7521100, episode=1254 reward=0.7862471 (458.43 it/sec) -training >> step=7521200, episode=1254 reward=0.7869898 (480.14 it/sec) -training >> step=7521300, episode=1254 reward=0.7830815 (460.64 it/sec) -training >> step=7521400, episode=1254 reward=0.8010846 (486.67 it/sec) -training >> step=7521500, episode=1254 reward=0.7922139 (480.59 it/sec) -training >> step=7521600, episode=1254 reward=0.7651802 (499.96 it/sec) -training >> step=7521700, episode=1254 reward=0.7986389 (509.75 it/sec) -training >> step=7521800, episode=1254 reward=0.7684817 (482.52 it/sec) -training >> step=7521900, episode=1254 reward=0.7921088 (491.94 it/sec) -training >> step=7522000, episode=1254 reward=0.773596 (512.73 it/sec) -training >> step=7522100, episode=1254 reward=0.7808276 (558.15 it/sec) -training >> step=7522200, episode=1254 reward=0.7940191 (506.65 it/sec) -training >> step=7522300, episode=1254 reward=0.7835528 (530.02 it/sec) -training >> step=7522400, episode=1254 reward=0.7604656 (490.94 it/sec) -training >> step=7522500, episode=1254 reward=0.7879711 (546.69 it/sec) -training >> step=7522600, episode=1254 reward=0.8009256 (499.71 it/sec) -training >> step=7522700, episode=1254 reward=0.7795104 (545.68 it/sec) -training >> step=7522800, episode=1254 reward=0.789476 (517.66 it/sec) -training >> step=7522900, episode=1254 reward=0.7701795 (503.59 it/sec) -training >> step=7523000, episode=1254 reward=0.7771306 (505.64 it/sec) -training >> step=7523100, episode=1254 reward=0.7727057 (488.06 it/sec) -training >> step=7523200, episode=1254 reward=0.7696956 (475.90 it/sec) -training >> step=7523300, episode=1255 reward=0.7867116 (120.96 it/sec) -training >> step=7523400, episode=1255 reward=0.7962657 (477.35 it/sec) -training >> step=7523500, episode=1255 reward=0.787468 (454.31 it/sec) -training >> step=7523600, episode=1255 reward=0.7984008 (485.44 it/sec) -training >> step=7523700, episode=1255 reward=0.7659457 (477.40 it/sec) -training >> step=7523800, episode=1255 reward=0.7810464 (509.93 it/sec) -training >> step=7523900, episode=1255 reward=0.8032439 (523.84 it/sec) -training >> step=7524000, episode=1255 reward=0.7852664 (409.67 it/sec) -training >> step=7524100, episode=1255 reward=0.7950473 (432.29 it/sec) -training >> step=7524200, episode=1255 reward=0.7859478 (403.12 it/sec) -training >> step=7524300, episode=1255 reward=0.8126729 (446.31 it/sec) -training >> step=7524400, episode=1255 reward=0.8125026 (403.33 it/sec) -training >> step=7524500, episode=1255 reward=0.7787532 (438.90 it/sec) -training >> step=7524600, episode=1255 reward=0.7877666 (560.54 it/sec) -training >> step=7524700, episode=1255 reward=0.8032203 (462.86 it/sec) -training >> step=7524800, episode=1255 reward=0.7952029 (546.69 it/sec) -training >> step=7524900, episode=1255 reward=0.7879276 (533.68 it/sec) -training >> step=7525000, episode=1255 reward=0.7927998 (527.89 it/sec) -training >> step=7525100, episode=1255 reward=0.766761 (524.94 it/sec) -training >> step=7525200, episode=1255 reward=0.7955726 (511.22 it/sec) -training >> step=7525300, episode=1255 reward=0.775211 (514.05 it/sec) -training >> step=7525400, episode=1255 reward=0.7825967 (486.80 it/sec) -training >> step=7525500, episode=1255 reward=0.7936593 (472.68 it/sec) -training >> step=7525600, episode=1255 reward=0.776852 (474.08 it/sec) -training >> step=7525700, episode=1255 reward=0.7947133 (456.97 it/sec) -training >> step=7525800, episode=1255 reward=0.7804421 (449.25 it/sec) -training >> step=7525900, episode=1255 reward=0.8071959 (477.57 it/sec) -training >> step=7526000, episode=1255 reward=0.7822818 (452.13 it/sec) -training >> step=7526100, episode=1255 reward=0.805288 (508.67 it/sec) -training >> step=7526200, episode=1255 reward=0.7830057 (470.92 it/sec) -training >> step=7526300, episode=1255 reward=0.7842367 (478.59 it/sec) -training >> step=7526400, episode=1255 reward=0.8014092 (408.43 it/sec) -training >> step=7526500, episode=1255 reward=0.781743 (506.69 it/sec) -training >> step=7526600, episode=1255 reward=0.7858767 (514.25 it/sec) -training >> step=7526700, episode=1255 reward=0.789148 (442.52 it/sec) -training >> step=7526800, episode=1255 reward=0.7850098 (489.58 it/sec) -training >> step=7526900, episode=1255 reward=0.7909014 (433.78 it/sec) -training >> step=7527000, episode=1255 reward=0.7733044 (489.74 it/sec) -training >> step=7527100, episode=1255 reward=0.7916343 (510.99 it/sec) -training >> step=7527200, episode=1255 reward=0.7859237 (540.12 it/sec) -training >> step=7527300, episode=1255 reward=0.7829167 (517.52 it/sec) -training >> step=7527400, episode=1255 reward=0.7900792 (437.86 it/sec) -training >> step=7527500, episode=1255 reward=0.786034 (450.10 it/sec) -training >> step=7527600, episode=1255 reward=0.7782762 (415.62 it/sec) -training >> step=7527700, episode=1255 reward=0.7980448 (412.05 it/sec) -training >> step=7527800, episode=1255 reward=0.7864596 (465.10 it/sec) -training >> step=7527900, episode=1255 reward=0.7747701 (490.90 it/sec) -training >> step=7528000, episode=1255 reward=0.7796764 (546.86 it/sec) -training >> step=7528100, episode=1255 reward=0.7906441 (502.41 it/sec) -training >> step=7528200, episode=1255 reward=0.7756366 (533.31 it/sec) -training >> step=7528300, episode=1255 reward=0.7853206 (504.34 it/sec) -training >> step=7528400, episode=1255 reward=0.7833914 (439.32 it/sec) -training >> step=7528500, episode=1255 reward=0.7597398 (469.99 it/sec) -training >> step=7528600, episode=1255 reward=0.744611 (480.46 it/sec) -training >> step=7528700, episode=1255 reward=0.7864187 (484.43 it/sec) -training >> step=7528800, episode=1255 reward=0.7627959 (491.26 it/sec) -training >> step=7528900, episode=1255 reward=0.7910135 (557.26 it/sec) -training >> step=7529000, episode=1255 reward=0.8011096 (496.01 it/sec) -training >> step=7529100, episode=1255 reward=0.7730356 (530.14 it/sec) -training >> step=7529200, episode=1255 reward=0.7638362 (521.79 it/sec) -training >> step=7529300, episode=1256 reward=0.7857609 (107.76 it/sec) -training >> step=7529400, episode=1256 reward=0.7608616 (443.42 it/sec) -training >> step=7529500, episode=1256 reward=0.7773171 (496.99 it/sec) -training >> step=7529600, episode=1256 reward=0.7838209 (475.24 it/sec) -training >> step=7529700, episode=1256 reward=0.7943364 (497.96 it/sec) -training >> step=7529800, episode=1256 reward=0.7826201 (481.55 it/sec) -training >> step=7529900, episode=1256 reward=0.7798723 (438.82 it/sec) -training >> step=7530000, episode=1256 reward=0.7962505 (508.16 it/sec) -training >> step=7530100, episode=1256 reward=0.7995221 (524.59 it/sec) -training >> step=7530200, episode=1256 reward=0.7788823 (479.01 it/sec) -training >> step=7530300, episode=1256 reward=0.7989827 (501.58 it/sec) -training >> step=7530400, episode=1256 reward=0.7728139 (532.13 it/sec) -training >> step=7530500, episode=1256 reward=0.7776627 (474.12 it/sec) -training >> step=7530600, episode=1256 reward=0.7908692 (510.03 it/sec) -training >> step=7530700, episode=1256 reward=0.788795 (482.48 it/sec) -training >> step=7530800, episode=1256 reward=0.7815362 (514.75 it/sec) -training >> step=7530900, episode=1256 reward=0.770773 (486.30 it/sec) -training >> step=7531000, episode=1256 reward=0.7957855 (486.91 it/sec) -training >> step=7531100, episode=1256 reward=0.79158 (507.30 it/sec) -training >> step=7531200, episode=1256 reward=0.7802609 (491.16 it/sec) -training >> step=7531300, episode=1256 reward=0.7922066 (473.67 it/sec) -training >> step=7531400, episode=1256 reward=0.7884329 (483.61 it/sec) -training >> step=7531500, episode=1256 reward=0.7960358 (487.99 it/sec) -training >> step=7531600, episode=1256 reward=0.7830043 (429.42 it/sec) -training >> step=7531700, episode=1256 reward=0.7805315 (492.00 it/sec) -training >> step=7531800, episode=1256 reward=0.7843444 (468.52 it/sec) -training >> step=7531900, episode=1256 reward=0.7805088 (501.67 it/sec) -training >> step=7532000, episode=1256 reward=0.794449 (502.52 it/sec) -training >> step=7532100, episode=1256 reward=0.7652347 (475.14 it/sec) -training >> step=7532200, episode=1256 reward=0.7871008 (509.11 it/sec) -training >> step=7532300, episode=1256 reward=0.7867408 (519.06 it/sec) -training >> step=7532400, episode=1256 reward=0.7836055 (508.32 it/sec) -training >> step=7532500, episode=1256 reward=0.7937194 (483.35 it/sec) -training >> step=7532600, episode=1256 reward=0.7871506 (481.00 it/sec) -training >> step=7532700, episode=1256 reward=0.7863674 (467.66 it/sec) -training >> step=7532800, episode=1256 reward=0.7801628 (483.02 it/sec) -training >> step=7532900, episode=1256 reward=0.7968222 (492.64 it/sec) -training >> step=7533000, episode=1256 reward=0.7780747 (485.03 it/sec) -training >> step=7533100, episode=1256 reward=0.7950788 (494.41 it/sec) -training >> step=7533200, episode=1256 reward=0.7847175 (457.76 it/sec) -training >> step=7533300, episode=1256 reward=0.7922767 (453.90 it/sec) -training >> step=7533400, episode=1256 reward=0.7613461 (500.65 it/sec) -training >> step=7533500, episode=1256 reward=0.7618685 (461.91 it/sec) -training >> step=7533600, episode=1256 reward=0.7832028 (502.88 it/sec) -training >> step=7533700, episode=1256 reward=0.784574 (508.99 it/sec) -training >> step=7533800, episode=1256 reward=0.7976837 (477.27 it/sec) -training >> step=7533900, episode=1256 reward=0.7707057 (494.50 it/sec) -training >> step=7534000, episode=1256 reward=0.7804992 (541.94 it/sec) -training >> step=7534100, episode=1256 reward=0.7807851 (475.57 it/sec) -training >> step=7534200, episode=1256 reward=0.7726227 (507.60 it/sec) -training >> step=7534300, episode=1256 reward=0.7918754 (487.11 it/sec) -training >> step=7534400, episode=1256 reward=0.7786414 (483.71 it/sec) -training >> step=7534500, episode=1256 reward=0.7713697 (486.59 it/sec) -training >> step=7534600, episode=1256 reward=0.7790586 (482.19 it/sec) -training >> step=7534700, episode=1256 reward=0.7768211 (523.40 it/sec) -training >> step=7534800, episode=1256 reward=0.7808756 (502.06 it/sec) -training >> step=7534900, episode=1256 reward=0.7739623 (499.11 it/sec) -training >> step=7535000, episode=1256 reward=0.7896913 (493.75 it/sec) -training >> step=7535100, episode=1256 reward=0.7730229 (520.36 it/sec) -training >> step=7535200, episode=1256 reward=0.7859553 (460.39 it/sec) -training >> step=7535300, episode=1257 reward=0.7869986 (71.37 it/sec) -training >> step=7535400, episode=1257 reward=0.7977987 (490.47 it/sec) -training >> step=7535500, episode=1257 reward=0.797143 (476.64 it/sec) -training >> step=7535600, episode=1257 reward=0.7935646 (519.38 it/sec) -training >> step=7535700, episode=1257 reward=0.7912311 (510.46 it/sec) -training >> step=7535800, episode=1257 reward=0.789981 (470.34 it/sec) -training >> step=7535900, episode=1257 reward=0.7880428 (448.89 it/sec) -training >> step=7536000, episode=1257 reward=0.7948707 (509.80 it/sec) -training >> step=7536100, episode=1257 reward=0.8030684 (507.59 it/sec) -training >> step=7536200, episode=1257 reward=0.7837045 (522.22 it/sec) -training >> step=7536300, episode=1257 reward=0.798375 (473.39 it/sec) -training >> step=7536400, episode=1257 reward=0.7789043 (541.57 it/sec) -training >> step=7536500, episode=1257 reward=0.7749479 (448.48 it/sec) -training >> step=7536600, episode=1257 reward=0.779743 (533.51 it/sec) -training >> step=7536700, episode=1257 reward=0.7962498 (516.83 it/sec) -training >> step=7536800, episode=1257 reward=0.7821836 (466.54 it/sec) -training >> step=7536900, episode=1257 reward=0.812659 (506.18 it/sec) -training >> step=7537000, episode=1257 reward=0.807467 (459.97 it/sec) -training >> step=7537100, episode=1257 reward=0.7662357 (488.22 it/sec) -training >> step=7537200, episode=1257 reward=0.8036869 (503.38 it/sec) -training >> step=7537300, episode=1257 reward=0.7794003 (466.13 it/sec) -training >> step=7537400, episode=1257 reward=0.7886869 (493.53 it/sec) -training >> step=7537500, episode=1257 reward=0.7924919 (474.97 it/sec) -training >> step=7537600, episode=1257 reward=0.7940227 (460.63 it/sec) -training >> step=7537700, episode=1257 reward=0.8026385 (523.18 it/sec) -training >> step=7537800, episode=1257 reward=0.7997916 (444.71 it/sec) -training >> step=7537900, episode=1257 reward=0.7871033 (466.27 it/sec) -training >> step=7538000, episode=1257 reward=0.77939 (490.57 it/sec) -training >> step=7538100, episode=1257 reward=0.8015268 (493.91 it/sec) -training >> step=7538200, episode=1257 reward=0.7706444 (482.19 it/sec) -training >> step=7538300, episode=1257 reward=0.7838301 (426.01 it/sec) -training >> step=7538400, episode=1257 reward=0.7884967 (520.13 it/sec) -training >> step=7538500, episode=1257 reward=0.792137 (469.83 it/sec) -training >> step=7538600, episode=1257 reward=0.8108803 (502.87 it/sec) -training >> step=7538700, episode=1257 reward=0.7803705 (487.11 it/sec) -training >> step=7538800, episode=1257 reward=0.7887929 (534.63 it/sec) -training >> step=7538900, episode=1257 reward=0.7816986 (484.54 it/sec) -training >> step=7539000, episode=1257 reward=0.7893668 (467.30 it/sec) -training >> step=7539100, episode=1257 reward=0.7962717 (551.38 it/sec) -training >> step=7539200, episode=1257 reward=0.7872348 (492.99 it/sec) -training >> step=7539300, episode=1257 reward=0.7986994 (497.94 it/sec) -training >> step=7539400, episode=1257 reward=0.8083625 (480.99 it/sec) -training >> step=7539500, episode=1257 reward=0.7832744 (494.03 it/sec) -training >> step=7539600, episode=1257 reward=0.8006346 (485.83 it/sec) -training >> step=7539700, episode=1257 reward=0.7635088 (491.74 it/sec) -training >> step=7539800, episode=1257 reward=0.7838155 (469.63 it/sec) -training >> step=7539900, episode=1257 reward=0.7712948 (530.75 it/sec) -training >> step=7540000, episode=1257 reward=0.7923591 (473.48 it/sec) -training >> step=7540100, episode=1257 reward=0.7899401 (450.09 it/sec) -training >> step=7540200, episode=1257 reward=0.7772644 (500.39 it/sec) -training >> step=7540300, episode=1257 reward=0.7988917 (509.60 it/sec) -training >> step=7540400, episode=1257 reward=0.767188 (488.94 it/sec) -training >> step=7540500, episode=1257 reward=0.7806209 (482.06 it/sec) -training >> step=7540600, episode=1257 reward=0.7985244 (496.33 it/sec) -training >> step=7540700, episode=1257 reward=0.786379 (484.07 it/sec) -training >> step=7540800, episode=1257 reward=0.798836 (489.66 it/sec) -training >> step=7540900, episode=1257 reward=0.7938808 (471.23 it/sec) -training >> step=7541000, episode=1257 reward=0.7857016 (512.68 it/sec) -training >> step=7541100, episode=1257 reward=0.7741267 (511.11 it/sec) -training >> step=7541200, episode=1257 reward=0.7753085 (462.29 it/sec) -training >> step=7541300, episode=1258 reward=0.7885031 (90.43 it/sec) -training >> step=7541400, episode=1258 reward=0.7756732 (457.92 it/sec) -training >> step=7541500, episode=1258 reward=0.7863591 (478.86 it/sec) -training >> step=7541600, episode=1258 reward=0.7720492 (481.38 it/sec) -training >> step=7541700, episode=1258 reward=0.7766796 (539.10 it/sec) -training >> step=7541800, episode=1258 reward=0.7844523 (476.50 it/sec) -training >> step=7541900, episode=1258 reward=0.7855839 (451.07 it/sec) -training >> step=7542000, episode=1258 reward=0.7964088 (512.32 it/sec) -training >> step=7542100, episode=1258 reward=0.8042253 (497.76 it/sec) -training >> step=7542200, episode=1258 reward=0.7942464 (509.56 it/sec) -training >> step=7542300, episode=1258 reward=0.8039702 (491.49 it/sec) -training >> step=7542400, episode=1258 reward=0.7906224 (491.21 it/sec) -training >> step=7542500, episode=1258 reward=0.7960622 (467.14 it/sec) -training >> step=7542600, episode=1258 reward=0.8090289 (455.44 it/sec) -training >> step=7542700, episode=1258 reward=0.7809231 (478.60 it/sec) -training >> step=7542800, episode=1258 reward=0.7809156 (488.96 it/sec) -training >> step=7542900, episode=1258 reward=0.7848497 (464.61 it/sec) -training >> step=7543000, episode=1258 reward=0.8005003 (452.23 it/sec) -training >> step=7543100, episode=1258 reward=0.7805774 (501.18 it/sec) -training >> step=7543200, episode=1258 reward=0.800096 (480.70 it/sec) -training >> step=7543300, episode=1258 reward=0.7816461 (453.89 it/sec) -training >> step=7543400, episode=1258 reward=0.7952757 (519.38 it/sec) -training >> step=7543500, episode=1258 reward=0.7795787 (498.96 it/sec) -training >> step=7543600, episode=1258 reward=0.8086542 (515.60 it/sec) -training >> step=7543700, episode=1258 reward=0.7866855 (454.06 it/sec) -training >> step=7543800, episode=1258 reward=0.7863483 (473.16 it/sec) -training >> step=7543900, episode=1258 reward=0.8005548 (474.27 it/sec) -training >> step=7544000, episode=1258 reward=0.7914339 (479.25 it/sec) -training >> step=7544100, episode=1258 reward=0.7790827 (526.80 it/sec) -training >> step=7544200, episode=1258 reward=0.7873486 (478.12 it/sec) -training >> step=7544300, episode=1258 reward=0.7896886 (488.93 it/sec) -training >> step=7544400, episode=1258 reward=0.7900701 (474.19 it/sec) -training >> step=7544500, episode=1258 reward=0.7871829 (480.04 it/sec) -training >> step=7544600, episode=1258 reward=0.7779743 (464.47 it/sec) -training >> step=7544700, episode=1258 reward=0.7911364 (486.13 it/sec) -training >> step=7544800, episode=1258 reward=0.7997586 (470.09 it/sec) -training >> step=7544900, episode=1258 reward=0.8054322 (461.21 it/sec) -training >> step=7545000, episode=1258 reward=0.7752481 (488.27 it/sec) -training >> step=7545100, episode=1258 reward=0.7883556 (483.54 it/sec) -training >> step=7545200, episode=1258 reward=0.7649981 (523.67 it/sec) -training >> step=7545300, episode=1258 reward=0.7969025 (508.00 it/sec) -training >> step=7545400, episode=1258 reward=0.7928577 (506.35 it/sec) -training >> step=7545500, episode=1258 reward=0.8047419 (523.77 it/sec) -training >> step=7545600, episode=1258 reward=0.7794992 (501.43 it/sec) -training >> step=7545700, episode=1258 reward=0.7816161 (462.30 it/sec) -training >> step=7545800, episode=1258 reward=0.7556503 (483.91 it/sec) -training >> step=7545900, episode=1258 reward=0.7784265 (522.13 it/sec) -training >> step=7546000, episode=1258 reward=0.7706257 (503.79 it/sec) -training >> step=7546100, episode=1258 reward=0.7848726 (496.11 it/sec) -training >> step=7546200, episode=1258 reward=0.7748938 (498.50 it/sec) -training >> step=7546300, episode=1258 reward=0.7922116 (481.61 it/sec) -training >> step=7546400, episode=1258 reward=0.7826679 (489.38 it/sec) -training >> step=7546500, episode=1258 reward=0.7961807 (480.89 it/sec) -training >> step=7546600, episode=1258 reward=0.8099507 (546.43 it/sec) -training >> step=7546700, episode=1258 reward=0.8075568 (496.89 it/sec) -training >> step=7546800, episode=1258 reward=0.7785076 (463.55 it/sec) -training >> step=7546900, episode=1258 reward=0.7823696 (493.09 it/sec) -training >> step=7547000, episode=1258 reward=0.774451 (515.62 it/sec) -training >> step=7547100, episode=1258 reward=0.7830439 (476.59 it/sec) -training >> step=7547200, episode=1258 reward=0.7942092 (489.18 it/sec) -training >> step=7547300, episode=1259 reward=0.7790412 (73.28 it/sec) -training >> step=7547400, episode=1259 reward=0.8051866 (409.97 it/sec) -training >> step=7547500, episode=1259 reward=0.7923856 (493.97 it/sec) -training >> step=7547600, episode=1259 reward=0.7821515 (498.86 it/sec) -training >> step=7547700, episode=1259 reward=0.8038756 (497.82 it/sec) -training >> step=7547800, episode=1259 reward=0.7651042 (505.07 it/sec) -training >> step=7547900, episode=1259 reward=0.7988318 (509.90 it/sec) -training >> step=7548000, episode=1259 reward=0.7636372 (509.67 it/sec) -training >> step=7548100, episode=1259 reward=0.7879295 (504.01 it/sec) -training >> step=7548200, episode=1259 reward=0.7956771 (487.91 it/sec) -training >> step=7548300, episode=1259 reward=0.7809718 (474.97 it/sec) -training >> step=7548400, episode=1259 reward=0.8020579 (491.59 it/sec) -training >> step=7548500, episode=1259 reward=0.7943062 (490.36 it/sec) -training >> step=7548600, episode=1259 reward=0.7538332 (515.96 it/sec) -training >> step=7548700, episode=1259 reward=0.8128409 (501.50 it/sec) -training >> step=7548800, episode=1259 reward=0.7948618 (489.60 it/sec) -training >> step=7548900, episode=1259 reward=0.7662584 (495.67 it/sec) -training >> step=7549000, episode=1259 reward=0.7879453 (475.96 it/sec) -training >> step=7549100, episode=1259 reward=0.8084808 (505.64 it/sec) -training >> step=7549200, episode=1259 reward=0.8078341 (512.87 it/sec) -training >> step=7549300, episode=1259 reward=0.7903308 (502.60 it/sec) -training >> step=7549400, episode=1259 reward=0.8010368 (466.19 it/sec) -training >> step=7549500, episode=1259 reward=0.7723557 (479.87 it/sec) -training >> step=7549600, episode=1259 reward=0.8014584 (507.33 it/sec) -training >> step=7549700, episode=1259 reward=0.777039 (476.43 it/sec) -training >> step=7549800, episode=1259 reward=0.7964131 (497.52 it/sec) -training >> step=7549900, episode=1259 reward=0.8004081 (490.65 it/sec) -training >> step=7550000, episode=1259 reward=0.7874481 (484.98 it/sec) -training >> step=7550100, episode=1259 reward=0.8038293 (513.11 it/sec) -training >> step=7550200, episode=1259 reward=0.7879766 (495.38 it/sec) -training >> step=7550300, episode=1259 reward=0.7886935 (497.28 it/sec) -training >> step=7550400, episode=1259 reward=0.7853488 (521.05 it/sec) -training >> step=7550500, episode=1259 reward=0.8070315 (488.35 it/sec) -training >> step=7550600, episode=1259 reward=0.7966841 (493.85 it/sec) -training >> step=7550700, episode=1259 reward=0.7799464 (451.52 it/sec) -training >> step=7550800, episode=1259 reward=0.7786985 (426.57 it/sec) -training >> step=7550900, episode=1259 reward=0.7922199 (487.85 it/sec) -training >> step=7551000, episode=1259 reward=0.7990079 (517.37 it/sec) -training >> step=7551100, episode=1259 reward=0.7890822 (480.41 it/sec) -training >> step=7551200, episode=1259 reward=0.7916844 (508.31 it/sec) -training >> step=7551300, episode=1259 reward=0.7941505 (462.64 it/sec) -training >> step=7551400, episode=1259 reward=0.8069687 (516.46 it/sec) -training >> step=7551500, episode=1259 reward=0.79254 (503.13 it/sec) -training >> step=7551600, episode=1259 reward=0.7948433 (510.72 it/sec) -training >> step=7551700, episode=1259 reward=0.7846085 (506.04 it/sec) -training >> step=7551800, episode=1259 reward=0.7791117 (492.10 it/sec) -training >> step=7551900, episode=1259 reward=0.7599844 (486.78 it/sec) -training >> step=7552000, episode=1259 reward=0.7786024 (505.85 it/sec) -training >> step=7552100, episode=1259 reward=0.78589 (524.88 it/sec) -training >> step=7552200, episode=1259 reward=0.787281 (497.48 it/sec) -training >> step=7552300, episode=1259 reward=0.7995314 (516.98 it/sec) -training >> step=7552400, episode=1259 reward=0.7897139 (517.08 it/sec) -training >> step=7552500, episode=1259 reward=0.7932862 (474.36 it/sec) -training >> step=7552600, episode=1259 reward=0.7862158 (505.91 it/sec) -training >> step=7552700, episode=1259 reward=0.798381 (477.33 it/sec) -training >> step=7552800, episode=1259 reward=0.7946886 (510.53 it/sec) -training >> step=7552900, episode=1259 reward=0.7773225 (497.33 it/sec) -training >> step=7553000, episode=1259 reward=0.8013591 (460.27 it/sec) -training >> step=7553100, episode=1259 reward=0.7912761 (520.74 it/sec) -training >> step=7553200, episode=1259 reward=0.7684479 (494.57 it/sec) -training >> step=7553300, episode=1260 reward=0.7714791 (85.72 it/sec) -training >> step=7553400, episode=1260 reward=0.7615156 (341.61 it/sec) -training >> step=7553500, episode=1260 reward=0.7797419 (495.60 it/sec) -training >> step=7553600, episode=1260 reward=0.7850637 (508.72 it/sec) -training >> step=7553700, episode=1260 reward=0.7946373 (436.70 it/sec) -training >> step=7553800, episode=1260 reward=0.789938 (507.76 it/sec) -training >> step=7553900, episode=1260 reward=0.7733037 (528.53 it/sec) -training >> step=7554000, episode=1260 reward=0.7939882 (488.77 it/sec) -training >> step=7554100, episode=1260 reward=0.7795547 (474.90 it/sec) -training >> step=7554200, episode=1260 reward=0.7878488 (553.40 it/sec) -training >> step=7554300, episode=1260 reward=0.787508 (485.53 it/sec) -training >> step=7554400, episode=1260 reward=0.808716 (455.84 it/sec) -training >> step=7554500, episode=1260 reward=0.778882 (472.38 it/sec) -training >> step=7554600, episode=1260 reward=0.7808532 (485.21 it/sec) -training >> step=7554700, episode=1260 reward=0.7890644 (485.58 it/sec) -training >> step=7554800, episode=1260 reward=0.7914681 (472.60 it/sec) -training >> step=7554900, episode=1260 reward=0.7694458 (519.20 it/sec) -training >> step=7555000, episode=1260 reward=0.7799693 (460.26 it/sec) -training >> step=7555100, episode=1260 reward=0.7827756 (500.99 it/sec) -training >> step=7555200, episode=1260 reward=0.7917938 (465.20 it/sec) -training >> step=7555300, episode=1260 reward=0.7823231 (533.90 it/sec) -training >> step=7555400, episode=1260 reward=0.8066967 (456.96 it/sec) -training >> step=7555500, episode=1260 reward=0.7834509 (510.41 it/sec) -training >> step=7555600, episode=1260 reward=0.7809129 (477.78 it/sec) -training >> step=7555700, episode=1260 reward=0.7858441 (529.95 it/sec) -training >> step=7555800, episode=1260 reward=0.8026857 (497.53 it/sec) -training >> step=7555900, episode=1260 reward=0.7692506 (455.33 it/sec) -training >> step=7556000, episode=1260 reward=0.7900738 (469.29 it/sec) -training >> step=7556100, episode=1260 reward=0.7905958 (402.79 it/sec) -training >> step=7556200, episode=1260 reward=0.787252 (441.18 it/sec) -training >> step=7556300, episode=1260 reward=0.8054976 (387.65 it/sec) -training >> step=7556400, episode=1260 reward=0.8156508 (373.18 it/sec) -training >> step=7556500, episode=1260 reward=0.7994626 (452.12 it/sec) -training >> step=7556600, episode=1260 reward=0.7975274 (490.48 it/sec) -training >> step=7556700, episode=1260 reward=0.7882888 (487.96 it/sec) -training >> step=7556800, episode=1260 reward=0.8069519 (457.32 it/sec) -training >> step=7556900, episode=1260 reward=0.7782733 (422.36 it/sec) -training >> step=7557000, episode=1260 reward=0.7853773 (418.06 it/sec) -training >> step=7557100, episode=1260 reward=0.8068302 (423.74 it/sec) -training >> step=7557200, episode=1260 reward=0.7989907 (430.25 it/sec) -training >> step=7557300, episode=1260 reward=0.8029402 (452.83 it/sec) -training >> step=7557400, episode=1260 reward=0.786297 (483.86 it/sec) -training >> step=7557500, episode=1260 reward=0.7905107 (453.42 it/sec) -training >> step=7557600, episode=1260 reward=0.7962088 (414.03 it/sec) -training >> step=7557700, episode=1260 reward=0.7796583 (458.67 it/sec) -training >> step=7557800, episode=1260 reward=0.7869955 (466.64 it/sec) -training >> step=7557900, episode=1260 reward=0.7807574 (535.84 it/sec) -training >> step=7558000, episode=1260 reward=0.7802734 (480.23 it/sec) -training >> step=7558100, episode=1260 reward=0.7778226 (467.20 it/sec) -training >> step=7558200, episode=1260 reward=0.7869117 (448.68 it/sec) -training >> step=7558300, episode=1260 reward=0.7846152 (458.09 it/sec) -training >> step=7558400, episode=1260 reward=0.7914733 (472.38 it/sec) -training >> step=7558500, episode=1260 reward=0.7925087 (488.91 it/sec) -training >> step=7558600, episode=1260 reward=0.773372 (478.41 it/sec) -training >> step=7558700, episode=1260 reward=0.7661176 (446.20 it/sec) -training >> step=7558800, episode=1260 reward=0.7795918 (456.65 it/sec) -training >> step=7558900, episode=1260 reward=0.794567 (501.53 it/sec) -training >> step=7559000, episode=1260 reward=0.7843314 (479.60 it/sec) -training >> step=7559100, episode=1260 reward=0.7925329 (486.49 it/sec) -training >> step=7559200, episode=1260 reward=0.7706081 (447.31 it/sec) -training >> step=7559300, episode=1261 reward=0.786301 (86.84 it/sec) -training >> step=7559400, episode=1261 reward=0.7895054 (474.55 it/sec) -training >> step=7559500, episode=1261 reward=0.7614263 (449.97 it/sec) -training >> step=7559600, episode=1261 reward=0.7809089 (328.79 it/sec) -training >> step=7559700, episode=1261 reward=0.7979506 (495.90 it/sec) -training >> step=7559800, episode=1261 reward=0.7953259 (447.52 it/sec) -training >> step=7559900, episode=1261 reward=0.7903199 (469.21 it/sec) -training >> step=7560000, episode=1261 reward=0.8153294 (477.11 it/sec) -training >> step=7560100, episode=1261 reward=0.7914518 (460.40 it/sec) -training >> step=7560200, episode=1261 reward=0.7913553 (427.80 it/sec) -training >> step=7560300, episode=1261 reward=0.7598664 (465.12 it/sec) -training >> step=7560400, episode=1261 reward=0.7719403 (446.14 it/sec) -training >> step=7560500, episode=1261 reward=0.7927133 (452.83 it/sec) -training >> step=7560600, episode=1261 reward=0.7811803 (442.91 it/sec) -training >> step=7560700, episode=1261 reward=0.7842774 (481.61 it/sec) -training >> step=7560800, episode=1261 reward=0.7833002 (488.97 it/sec) -training >> step=7560900, episode=1261 reward=0.7931685 (497.31 it/sec) -training >> step=7561000, episode=1261 reward=0.7820213 (522.72 it/sec) -training >> step=7561100, episode=1261 reward=0.7712325 (505.21 it/sec) -training >> step=7561200, episode=1261 reward=0.814175 (491.94 it/sec) -training >> step=7561300, episode=1261 reward=0.7770413 (477.10 it/sec) -training >> step=7561400, episode=1261 reward=0.7854195 (425.32 it/sec) -training >> step=7561500, episode=1261 reward=0.8032001 (430.86 it/sec) -training >> step=7561600, episode=1261 reward=0.7838707 (436.51 it/sec) -training >> step=7561700, episode=1261 reward=0.7753572 (460.73 it/sec) -training >> step=7561800, episode=1261 reward=0.7926664 (409.94 it/sec) -training >> step=7561900, episode=1261 reward=0.8053677 (401.10 it/sec) -training >> step=7562000, episode=1261 reward=0.8076901 (412.32 it/sec) -training >> step=7562100, episode=1261 reward=0.8074063 (469.33 it/sec) -training >> step=7562200, episode=1261 reward=0.7951896 (406.31 it/sec) -training >> step=7562300, episode=1261 reward=0.7826729 (402.77 it/sec) -training >> step=7562400, episode=1261 reward=0.8001513 (421.65 it/sec) -training >> step=7562500, episode=1261 reward=0.7942649 (453.41 it/sec) -training >> step=7562600, episode=1261 reward=0.8174121 (455.60 it/sec) -training >> step=7562700, episode=1261 reward=0.8041721 (444.91 it/sec) -training >> step=7562800, episode=1261 reward=0.7916247 (498.81 it/sec) -training >> step=7562900, episode=1261 reward=0.7817544 (468.93 it/sec) -training >> step=7563000, episode=1261 reward=0.7962032 (404.34 it/sec) -training >> step=7563100, episode=1261 reward=0.8034646 (479.56 it/sec) -training >> step=7563200, episode=1261 reward=0.7702779 (463.50 it/sec) -training >> step=7563300, episode=1261 reward=0.7969816 (450.23 it/sec) -training >> step=7563400, episode=1261 reward=0.8009171 (489.14 it/sec) -training >> step=7563500, episode=1261 reward=0.7926502 (525.98 it/sec) -training >> step=7563600, episode=1261 reward=0.7866508 (510.83 it/sec) -training >> step=7563700, episode=1261 reward=0.7868254 (445.07 it/sec) -training >> step=7563800, episode=1261 reward=0.8051788 (437.76 it/sec) -training >> step=7563900, episode=1261 reward=0.7849609 (479.18 it/sec) -training >> step=7564000, episode=1261 reward=0.7810474 (473.95 it/sec) -training >> step=7564100, episode=1261 reward=0.799656 (483.23 it/sec) -training >> step=7564200, episode=1261 reward=0.7930488 (474.87 it/sec) -training >> step=7564300, episode=1261 reward=0.7906326 (521.55 it/sec) -training >> step=7564400, episode=1261 reward=0.7819211 (508.01 it/sec) -training >> step=7564500, episode=1261 reward=0.801101 (502.55 it/sec) -training >> step=7564600, episode=1261 reward=0.7845833 (511.25 it/sec) -training >> step=7564700, episode=1261 reward=0.7878413 (526.04 it/sec) -training >> step=7564800, episode=1261 reward=0.7718131 (509.36 it/sec) -training >> step=7564900, episode=1261 reward=0.7835243 (477.93 it/sec) -training >> step=7565000, episode=1261 reward=0.7973775 (496.78 it/sec) -training >> step=7565100, episode=1261 reward=0.7717444 (468.15 it/sec) -training >> step=7565200, episode=1261 reward=0.7916604 (526.95 it/sec) -training >> step=7565300, episode=1262 reward=0.795633 (118.75 it/sec) -training >> step=7565400, episode=1262 reward=0.7724106 (540.51 it/sec) -training >> step=7565500, episode=1262 reward=0.7761911 (504.04 it/sec) -training >> step=7565600, episode=1262 reward=0.7877932 (524.68 it/sec) -training >> step=7565700, episode=1262 reward=0.7824506 (511.09 it/sec) -training >> step=7565800, episode=1262 reward=0.7895969 (430.15 it/sec) -training >> step=7565900, episode=1262 reward=0.8054643 (510.40 it/sec) -training >> step=7566000, episode=1262 reward=0.7937243 (501.03 it/sec) -training >> step=7566100, episode=1262 reward=0.781541 (537.11 it/sec) -training >> step=7566200, episode=1262 reward=0.783587 (496.29 it/sec) -training >> step=7566300, episode=1262 reward=0.7754115 (516.99 it/sec) -training >> step=7566400, episode=1262 reward=0.8108666 (474.82 it/sec) -training >> step=7566500, episode=1262 reward=0.7873049 (510.16 it/sec) -training >> step=7566600, episode=1262 reward=0.7798185 (445.00 it/sec) -training >> step=7566700, episode=1262 reward=0.7990467 (484.23 it/sec) -training >> step=7566800, episode=1262 reward=0.7850589 (469.13 it/sec) -training >> step=7566900, episode=1262 reward=0.775095 (487.68 it/sec) -training >> step=7567000, episode=1262 reward=0.7914678 (492.48 it/sec) -training >> step=7567100, episode=1262 reward=0.794328 (465.04 it/sec) -training >> step=7567200, episode=1262 reward=0.7796876 (494.20 it/sec) -training >> step=7567300, episode=1262 reward=0.7966121 (465.28 it/sec) -training >> step=7567400, episode=1262 reward=0.7935132 (475.22 it/sec) -training >> step=7567500, episode=1262 reward=0.7858959 (467.07 it/sec) -training >> step=7567600, episode=1262 reward=0.7798309 (537.08 it/sec) -training >> step=7567700, episode=1262 reward=0.7969174 (461.37 it/sec) -training >> step=7567800, episode=1262 reward=0.7898449 (488.00 it/sec) -training >> step=7567900, episode=1262 reward=0.7831442 (497.21 it/sec) -training >> step=7568000, episode=1262 reward=0.7864613 (572.13 it/sec) -training >> step=7568100, episode=1262 reward=0.7895871 (542.26 it/sec) -training >> step=7568200, episode=1262 reward=0.7888604 (497.67 it/sec) -training >> step=7568300, episode=1262 reward=0.7981917 (461.47 it/sec) -training >> step=7568400, episode=1262 reward=0.7991008 (475.15 it/sec) -training >> step=7568500, episode=1262 reward=0.8161756 (472.97 it/sec) -training >> step=7568600, episode=1262 reward=0.800467 (453.92 it/sec) -training >> step=7568700, episode=1262 reward=0.7851796 (437.13 it/sec) -training >> step=7568800, episode=1262 reward=0.7948451 (483.04 it/sec) -training >> step=7568900, episode=1262 reward=0.7955382 (430.72 it/sec) -training >> step=7569000, episode=1262 reward=0.7822942 (436.03 it/sec) -training >> step=7569100, episode=1262 reward=0.7822945 (467.44 it/sec) -training >> step=7569200, episode=1262 reward=0.7913729 (477.88 it/sec) -training >> step=7569300, episode=1262 reward=0.7953514 (474.77 it/sec) -training >> step=7569400, episode=1262 reward=0.7805938 (483.82 it/sec) -training >> step=7569500, episode=1262 reward=0.7835584 (434.69 it/sec) -training >> step=7569600, episode=1262 reward=0.7979443 (496.41 it/sec) -training >> step=7569700, episode=1262 reward=0.7782242 (495.37 it/sec) -training >> step=7569800, episode=1262 reward=0.8012022 (530.09 it/sec) -training >> step=7569900, episode=1262 reward=0.7899485 (514.16 it/sec) -training >> step=7570000, episode=1262 reward=0.7981077 (496.30 it/sec) -training >> step=7570100, episode=1262 reward=0.7765413 (508.56 it/sec) -training >> step=7570200, episode=1262 reward=0.7971138 (506.78 it/sec) -training >> step=7570300, episode=1262 reward=0.7863113 (436.41 it/sec) -training >> step=7570400, episode=1262 reward=0.7923835 (445.51 it/sec) -training >> step=7570500, episode=1262 reward=0.7898116 (506.13 it/sec) -training >> step=7570600, episode=1262 reward=0.7911469 (471.18 it/sec) -training >> step=7570700, episode=1262 reward=0.7952616 (463.47 it/sec) -training >> step=7570800, episode=1262 reward=0.7805514 (455.31 it/sec) -training >> step=7570900, episode=1262 reward=0.7811697 (515.19 it/sec) -training >> step=7571000, episode=1262 reward=0.7949187 (506.16 it/sec) -training >> step=7571100, episode=1262 reward=0.7913346 (442.79 it/sec) -training >> step=7571200, episode=1262 reward=0.7924482 (507.50 it/sec) -training >> step=7571300, episode=1263 reward=0.7867963 (115.26 it/sec) -training >> step=7571400, episode=1263 reward=0.7701503 (499.32 it/sec) -training >> step=7571500, episode=1263 reward=0.7655069 (498.96 it/sec) -training >> step=7571600, episode=1263 reward=0.7802143 (542.92 it/sec) -training >> step=7571700, episode=1263 reward=0.770277 (477.38 it/sec) -training >> step=7571800, episode=1263 reward=0.7801164 (489.43 it/sec) -training >> step=7571900, episode=1263 reward=0.7873622 (415.71 it/sec) -training >> step=7572000, episode=1263 reward=0.7978366 (494.77 it/sec) -training >> step=7572100, episode=1263 reward=0.7892983 (478.50 it/sec) -training >> step=7572200, episode=1263 reward=0.7880247 (486.94 it/sec) -training >> step=7572300, episode=1263 reward=0.799339 (510.15 it/sec) -training >> step=7572400, episode=1263 reward=0.7851847 (510.22 it/sec) -training >> step=7572500, episode=1263 reward=0.7962086 (507.30 it/sec) -training >> step=7572600, episode=1263 reward=0.7775269 (498.85 it/sec) -training >> step=7572700, episode=1263 reward=0.797205 (531.66 it/sec) -training >> step=7572800, episode=1263 reward=0.8090897 (484.62 it/sec) -training >> step=7572900, episode=1263 reward=0.7780921 (463.25 it/sec) -training >> step=7573000, episode=1263 reward=0.8036804 (474.19 it/sec) -training >> step=7573100, episode=1263 reward=0.8048244 (526.28 it/sec) -training >> step=7573200, episode=1263 reward=0.7826914 (462.17 it/sec) -training >> step=7573300, episode=1263 reward=0.7830352 (471.62 it/sec) -training >> step=7573400, episode=1263 reward=0.7930163 (503.12 it/sec) -training >> step=7573500, episode=1263 reward=0.7801117 (504.25 it/sec) -training >> step=7573600, episode=1263 reward=0.7918038 (507.04 it/sec) -training >> step=7573700, episode=1263 reward=0.7838771 (522.70 it/sec) -training >> step=7573800, episode=1263 reward=0.7713621 (522.62 it/sec) -training >> step=7573900, episode=1263 reward=0.7899243 (485.39 it/sec) -training >> step=7574000, episode=1263 reward=0.7821108 (510.25 it/sec) -training >> step=7574100, episode=1263 reward=0.7965681 (515.13 it/sec) -training >> step=7574200, episode=1263 reward=0.8065225 (538.50 it/sec) -training >> step=7574300, episode=1263 reward=0.7868383 (497.41 it/sec) -training >> step=7574400, episode=1263 reward=0.7886297 (504.25 it/sec) -training >> step=7574500, episode=1263 reward=0.7920341 (502.60 it/sec) -training >> step=7574600, episode=1263 reward=0.7984504 (527.16 it/sec) -training >> step=7574700, episode=1263 reward=0.7837747 (515.72 it/sec) -training >> step=7574800, episode=1263 reward=0.7614225 (487.03 it/sec) -training >> step=7574900, episode=1263 reward=0.7839236 (514.59 it/sec) -training >> step=7575000, episode=1263 reward=0.7921947 (519.48 it/sec) -training >> step=7575100, episode=1263 reward=0.7786527 (486.62 it/sec) -training >> step=7575200, episode=1263 reward=0.7937324 (500.14 it/sec) -training >> step=7575300, episode=1263 reward=0.804041 (530.07 it/sec) -training >> step=7575400, episode=1263 reward=0.7967043 (458.57 it/sec) -training >> step=7575500, episode=1263 reward=0.798402 (481.48 it/sec) -training >> step=7575600, episode=1263 reward=0.77894 (532.69 it/sec) -training >> step=7575700, episode=1263 reward=0.7846233 (476.28 it/sec) -training >> step=7575800, episode=1263 reward=0.7826018 (490.01 it/sec) -training >> step=7575900, episode=1263 reward=0.7892039 (502.20 it/sec) -training >> step=7576000, episode=1263 reward=0.7699353 (517.90 it/sec) -training >> step=7576100, episode=1263 reward=0.7975792 (518.90 it/sec) -training >> step=7576200, episode=1263 reward=0.771386 (525.05 it/sec) -training >> step=7576300, episode=1263 reward=0.7824213 (484.39 it/sec) -training >> step=7576400, episode=1263 reward=0.7886037 (450.46 it/sec) -training >> step=7576500, episode=1263 reward=0.7804593 (512.24 it/sec) -training >> step=7576600, episode=1263 reward=0.7795123 (484.98 it/sec) -training >> step=7576700, episode=1263 reward=0.7779661 (499.93 it/sec) -training >> step=7576800, episode=1263 reward=0.7988843 (491.31 it/sec) -training >> step=7576900, episode=1263 reward=0.7957842 (501.57 it/sec) -training >> step=7577000, episode=1263 reward=0.7898001 (502.28 it/sec) -training >> step=7577100, episode=1263 reward=0.772275 (527.59 it/sec) -training >> step=7577200, episode=1263 reward=0.8049884 (527.45 it/sec) -training >> step=7577300, episode=1264 reward=0.7973385 (113.15 it/sec) -training >> step=7577400, episode=1264 reward=0.7938812 (504.69 it/sec) -training >> step=7577500, episode=1264 reward=0.7818483 (523.46 it/sec) -training >> step=7577600, episode=1264 reward=0.7761042 (503.17 it/sec) -training >> step=7577700, episode=1264 reward=0.7793387 (509.09 it/sec) -training >> step=7577800, episode=1264 reward=0.8042576 (496.02 it/sec) -training >> step=7577900, episode=1264 reward=0.7907574 (480.36 it/sec) -training >> step=7578000, episode=1264 reward=0.7997694 (507.16 it/sec) -training >> step=7578100, episode=1264 reward=0.7863149 (369.54 it/sec) -training >> step=7578200, episode=1264 reward=0.7866271 (449.49 it/sec) -training >> step=7578300, episode=1264 reward=0.7920773 (483.42 it/sec) -training >> step=7578400, episode=1264 reward=0.7936606 (476.28 it/sec) -training >> step=7578500, episode=1264 reward=0.7889928 (473.43 it/sec) -training >> step=7578600, episode=1264 reward=0.7563806 (499.83 it/sec) -training >> step=7578700, episode=1264 reward=0.7803094 (498.21 it/sec) -training >> step=7578800, episode=1264 reward=0.7832111 (504.51 it/sec) -training >> step=7578900, episode=1264 reward=0.7760701 (536.97 it/sec) -training >> step=7579000, episode=1264 reward=0.7800341 (475.78 it/sec) -training >> step=7579100, episode=1264 reward=0.8010752 (482.33 it/sec) -training >> step=7579200, episode=1264 reward=0.78829 (485.05 it/sec) -training >> step=7579300, episode=1264 reward=0.7909884 (480.36 it/sec) -training >> step=7579400, episode=1264 reward=0.8001873 (454.00 it/sec) -training >> step=7579500, episode=1264 reward=0.7913604 (430.82 it/sec) -training >> step=7579600, episode=1264 reward=0.817082 (481.91 it/sec) -training >> step=7579700, episode=1264 reward=0.7981158 (445.50 it/sec) -training >> step=7579800, episode=1264 reward=0.7795874 (496.75 it/sec) -training >> step=7579900, episode=1264 reward=0.7840592 (491.47 it/sec) -training >> step=7580000, episode=1264 reward=0.7763944 (508.96 it/sec) -training >> step=7580100, episode=1264 reward=0.7809498 (462.34 it/sec) -training >> step=7580200, episode=1264 reward=0.7853515 (494.14 it/sec) -training >> step=7580300, episode=1264 reward=0.7986836 (519.10 it/sec) -training >> step=7580400, episode=1264 reward=0.7846481 (507.59 it/sec) -training >> step=7580500, episode=1264 reward=0.811147 (482.80 it/sec) -training >> step=7580600, episode=1264 reward=0.7955348 (361.91 it/sec) -training >> step=7580700, episode=1264 reward=0.7798229 (433.56 it/sec) -training >> step=7580800, episode=1264 reward=0.7866718 (386.73 it/sec) -training >> step=7580900, episode=1264 reward=0.7716137 (376.71 it/sec) -training >> step=7581000, episode=1264 reward=0.7992343 (481.95 it/sec) -training >> step=7581100, episode=1264 reward=0.7861364 (492.39 it/sec) -training >> step=7581200, episode=1264 reward=0.7907408 (498.47 it/sec) -training >> step=7581300, episode=1264 reward=0.8068387 (473.84 it/sec) -training >> step=7581400, episode=1264 reward=0.7921795 (508.99 it/sec) -training >> step=7581500, episode=1264 reward=0.7904398 (481.74 it/sec) -training >> step=7581600, episode=1264 reward=0.7959204 (453.78 it/sec) -training >> step=7581700, episode=1264 reward=0.7843611 (442.90 it/sec) -training >> step=7581800, episode=1264 reward=0.7908857 (466.83 it/sec) -training >> step=7581900, episode=1264 reward=0.777088 (478.92 it/sec) -training >> step=7582000, episode=1264 reward=0.7821568 (473.85 it/sec) -training >> step=7582100, episode=1264 reward=0.7742857 (467.59 it/sec) -training >> step=7582200, episode=1264 reward=0.785145 (474.39 it/sec) -training >> step=7582300, episode=1264 reward=0.7870778 (481.56 it/sec) -training >> step=7582400, episode=1264 reward=0.7885901 (416.01 it/sec) -training >> step=7582500, episode=1264 reward=0.7755251 (419.19 it/sec) -training >> step=7582600, episode=1264 reward=0.7846553 (395.15 it/sec) -training >> step=7582700, episode=1264 reward=0.7839498 (425.46 it/sec) -training >> step=7582800, episode=1264 reward=0.7901806 (453.01 it/sec) -training >> step=7582900, episode=1264 reward=0.7873035 (449.48 it/sec) -training >> step=7583000, episode=1264 reward=0.7927724 (516.25 it/sec) -training >> step=7583100, episode=1264 reward=0.7520455 (477.97 it/sec) -training >> step=7583200, episode=1264 reward=0.7857275 (508.58 it/sec) -training >> step=7583300, episode=1265 reward=0.777072 (68.10 it/sec) -training >> step=7583400, episode=1265 reward=0.7626715 (388.79 it/sec) -training >> step=7583500, episode=1265 reward=0.7976334 (430.93 it/sec) -training >> step=7583600, episode=1265 reward=0.781975 (489.83 it/sec) -training >> step=7583700, episode=1265 reward=0.7639586 (456.54 it/sec) -training >> step=7583800, episode=1265 reward=0.7865591 (516.75 it/sec) -training >> step=7583900, episode=1265 reward=0.8048794 (518.86 it/sec) -training >> step=7584000, episode=1265 reward=0.7789286 (521.26 it/sec) -training >> step=7584100, episode=1265 reward=0.7920454 (514.57 it/sec) -training >> step=7584200, episode=1265 reward=0.7755575 (487.93 it/sec) -training >> step=7584300, episode=1265 reward=0.7887317 (451.81 it/sec) -training >> step=7584400, episode=1265 reward=0.7877703 (335.15 it/sec) -training >> step=7584500, episode=1265 reward=0.7754495 (528.45 it/sec) -training >> step=7584600, episode=1265 reward=0.7750754 (472.27 it/sec) -training >> step=7584700, episode=1265 reward=0.7929716 (493.26 it/sec) -training >> step=7584800, episode=1265 reward=0.7974377 (399.56 it/sec) -training >> step=7584900, episode=1265 reward=0.812997 (420.55 it/sec) -training >> step=7585000, episode=1265 reward=0.7753572 (460.19 it/sec) -training >> step=7585100, episode=1265 reward=0.7887046 (436.31 it/sec) -training >> step=7585200, episode=1265 reward=0.7919129 (472.51 it/sec) -training >> step=7585300, episode=1265 reward=0.7785326 (447.29 it/sec) -training >> step=7585400, episode=1265 reward=0.7899341 (375.87 it/sec) -training >> step=7585500, episode=1265 reward=0.8036003 (452.55 it/sec) -training >> step=7585600, episode=1265 reward=0.7821145 (484.07 it/sec) -training >> step=7585700, episode=1265 reward=0.7906346 (468.04 it/sec) -training >> step=7585800, episode=1265 reward=0.7756921 (524.71 it/sec) -training >> step=7585900, episode=1265 reward=0.8013455 (461.51 it/sec) -training >> step=7586000, episode=1265 reward=0.7775323 (484.96 it/sec) -training >> step=7586100, episode=1265 reward=0.83301 (458.14 it/sec) -training >> step=7586200, episode=1265 reward=0.8017029 (466.18 it/sec) -training >> step=7586300, episode=1265 reward=0.7862031 (480.22 it/sec) -training >> step=7586400, episode=1265 reward=0.7919679 (508.58 it/sec) -training >> step=7586500, episode=1265 reward=0.7841135 (504.09 it/sec) -training >> step=7586600, episode=1265 reward=0.7930161 (438.87 it/sec) -training >> step=7586700, episode=1265 reward=0.7962273 (493.44 it/sec) -training >> step=7586800, episode=1265 reward=0.779713 (512.11 it/sec) -training >> step=7586900, episode=1265 reward=0.7794479 (529.16 it/sec) -training >> step=7587000, episode=1265 reward=0.7824221 (462.51 it/sec) -training >> step=7587100, episode=1265 reward=0.7917432 (426.04 it/sec) -training >> step=7587200, episode=1265 reward=0.7727422 (487.37 it/sec) -training >> step=7587300, episode=1265 reward=0.7758605 (485.08 it/sec) -training >> step=7587400, episode=1265 reward=0.7900354 (469.88 it/sec) -training >> step=7587500, episode=1265 reward=0.79551 (435.20 it/sec) -training >> step=7587600, episode=1265 reward=0.7867196 (467.82 it/sec) -training >> step=7587700, episode=1265 reward=0.7891616 (471.33 it/sec) -training >> step=7587800, episode=1265 reward=0.762727 (412.78 it/sec) -training >> step=7587900, episode=1265 reward=0.8022124 (430.21 it/sec) -training >> step=7588000, episode=1265 reward=0.7881851 (454.77 it/sec) -training >> step=7588100, episode=1265 reward=0.7816803 (513.54 it/sec) -training >> step=7588200, episode=1265 reward=0.7952464 (463.95 it/sec) -training >> step=7588300, episode=1265 reward=0.7911505 (500.43 it/sec) -training >> step=7588400, episode=1265 reward=0.7836866 (466.90 it/sec) -training >> step=7588500, episode=1265 reward=0.778403 (458.04 it/sec) -training >> step=7588600, episode=1265 reward=0.7867125 (481.10 it/sec) -training >> step=7588700, episode=1265 reward=0.768809 (480.92 it/sec) -training >> step=7588800, episode=1265 reward=0.7869354 (505.82 it/sec) -training >> step=7588900, episode=1265 reward=0.7791075 (491.76 it/sec) -training >> step=7589000, episode=1265 reward=0.7919861 (478.63 it/sec) -training >> step=7589100, episode=1265 reward=0.771847 (499.19 it/sec) -training >> step=7589200, episode=1265 reward=0.7902567 (518.30 it/sec) -training >> step=7589300, episode=1266 reward=0.7758543 (65.11 it/sec) -training >> step=7589400, episode=1266 reward=0.7787271 (462.94 it/sec) -training >> step=7589500, episode=1266 reward=0.7843612 (497.86 it/sec) -training >> step=7589600, episode=1266 reward=0.8058904 (494.76 it/sec) -training >> step=7589700, episode=1266 reward=0.7718085 (513.37 it/sec) -training >> step=7589800, episode=1266 reward=0.788061 (512.64 it/sec) -training >> step=7589900, episode=1266 reward=0.8037174 (487.00 it/sec) -training >> step=7590000, episode=1266 reward=0.7884614 (540.56 it/sec) -training >> step=7590100, episode=1266 reward=0.786291 (495.77 it/sec) -training >> step=7590200, episode=1266 reward=0.8012069 (531.39 it/sec) -training >> step=7590300, episode=1266 reward=0.7842595 (501.83 it/sec) -training >> step=7590400, episode=1266 reward=0.7910748 (495.68 it/sec) -training >> step=7590500, episode=1266 reward=0.7916771 (514.63 it/sec) -training >> step=7590600, episode=1266 reward=0.7726923 (381.95 it/sec) -training >> step=7590700, episode=1266 reward=0.7854332 (533.09 it/sec) -training >> step=7590800, episode=1266 reward=0.8026549 (540.99 it/sec) -training >> step=7590900, episode=1266 reward=0.8019943 (525.42 it/sec) -training >> step=7591000, episode=1266 reward=0.7789016 (518.12 it/sec) -training >> step=7591100, episode=1266 reward=0.7808098 (523.74 it/sec) -training >> step=7591200, episode=1266 reward=0.7942439 (513.22 it/sec) -training >> step=7591300, episode=1266 reward=0.8068036 (540.89 it/sec) -training >> step=7591400, episode=1266 reward=0.7801864 (515.45 it/sec) -training >> step=7591500, episode=1266 reward=0.8015165 (523.47 it/sec) -training >> step=7591600, episode=1266 reward=0.7781205 (517.35 it/sec) -training >> step=7591700, episode=1266 reward=0.7734608 (480.92 it/sec) -training >> step=7591800, episode=1266 reward=0.7838629 (483.19 it/sec) -training >> step=7591900, episode=1266 reward=0.7795861 (504.97 it/sec) -training >> step=7592000, episode=1266 reward=0.8182803 (493.63 it/sec) -training >> step=7592100, episode=1266 reward=0.7654552 (532.80 it/sec) -training >> step=7592200, episode=1266 reward=0.7900886 (526.01 it/sec) -training >> step=7592300, episode=1266 reward=0.8093514 (438.78 it/sec) -training >> step=7592400, episode=1266 reward=0.8024772 (540.40 it/sec) -training >> step=7592500, episode=1266 reward=0.7750239 (508.05 it/sec) -training >> step=7592600, episode=1266 reward=0.7879602 (527.37 it/sec) -training >> step=7592700, episode=1266 reward=0.8008067 (515.99 it/sec) -training >> step=7592800, episode=1266 reward=0.785439 (487.91 it/sec) -training >> step=7592900, episode=1266 reward=0.7909309 (487.17 it/sec) -training >> step=7593000, episode=1266 reward=0.780228 (492.13 it/sec) -training >> step=7593100, episode=1266 reward=0.787711 (487.00 it/sec) -training >> step=7593200, episode=1266 reward=0.7849916 (543.44 it/sec) -training >> step=7593300, episode=1266 reward=0.7970729 (463.88 it/sec) -training >> step=7593400, episode=1266 reward=0.7917613 (486.53 it/sec) -training >> step=7593500, episode=1266 reward=0.7849733 (522.02 it/sec) -training >> step=7593600, episode=1266 reward=0.7765702 (477.44 it/sec) -training >> step=7593700, episode=1266 reward=0.7868012 (510.49 it/sec) -training >> step=7593800, episode=1266 reward=0.7931473 (511.20 it/sec) -training >> step=7593900, episode=1266 reward=0.7866303 (531.78 it/sec) -training >> step=7594000, episode=1266 reward=0.7777463 (466.55 it/sec) -training >> step=7594100, episode=1266 reward=0.7930197 (492.60 it/sec) -training >> step=7594200, episode=1266 reward=0.7767875 (508.33 it/sec) -training >> step=7594300, episode=1266 reward=0.7824178 (508.04 it/sec) -training >> step=7594400, episode=1266 reward=0.7836108 (518.81 it/sec) -training >> step=7594500, episode=1266 reward=0.7820575 (508.55 it/sec) -training >> step=7594600, episode=1266 reward=0.7858242 (515.46 it/sec) -training >> step=7594700, episode=1266 reward=0.7875839 (536.14 it/sec) -training >> step=7594800, episode=1266 reward=0.7814655 (469.81 it/sec) -training >> step=7594900, episode=1266 reward=0.7938331 (509.28 it/sec) -training >> step=7595000, episode=1266 reward=0.7848498 (550.85 it/sec) -training >> step=7595100, episode=1266 reward=0.7764835 (462.56 it/sec) -training >> step=7595200, episode=1266 reward=0.8063801 (482.45 it/sec) -training >> step=7595300, episode=1267 reward=0.7917886 (51.87 it/sec) -training >> step=7595400, episode=1267 reward=0.773161 (403.72 it/sec) -training >> step=7595500, episode=1267 reward=0.7906535 (426.92 it/sec) -training >> step=7595600, episode=1267 reward=0.8036284 (397.61 it/sec) -training >> step=7595700, episode=1267 reward=0.8049267 (414.36 it/sec) -training >> step=7595800, episode=1267 reward=0.7815309 (418.02 it/sec) -training >> step=7595900, episode=1267 reward=0.7858709 (478.10 it/sec) -training >> step=7596000, episode=1267 reward=0.7886128 (449.15 it/sec) -training >> step=7596100, episode=1267 reward=0.7934887 (464.19 it/sec) -training >> step=7596200, episode=1267 reward=0.8023089 (453.36 it/sec) -training >> step=7596300, episode=1267 reward=0.7917675 (436.68 it/sec) -training >> step=7596400, episode=1267 reward=0.7919768 (421.29 it/sec) -training >> step=7596500, episode=1267 reward=0.7775199 (418.24 it/sec) -training >> step=7596600, episode=1267 reward=0.7810868 (410.97 it/sec) -training >> step=7596700, episode=1267 reward=0.7997507 (396.66 it/sec) -training >> step=7596800, episode=1267 reward=0.7876286 (443.01 it/sec) -training >> step=7596900, episode=1267 reward=0.8061264 (327.24 it/sec) -training >> step=7597000, episode=1267 reward=0.8013059 (466.50 it/sec) -training >> step=7597100, episode=1267 reward=0.7871082 (453.92 it/sec) -training >> step=7597200, episode=1267 reward=0.8094083 (445.54 it/sec) -training >> step=7597300, episode=1267 reward=0.7987452 (392.25 it/sec) -training >> step=7597400, episode=1267 reward=0.7943256 (403.08 it/sec) -training >> step=7597500, episode=1267 reward=0.8067065 (370.79 it/sec) -training >> step=7597600, episode=1267 reward=0.7910492 (415.58 it/sec) -training >> step=7597700, episode=1267 reward=0.7931694 (393.16 it/sec) -training >> step=7597800, episode=1267 reward=0.7896603 (399.10 it/sec) -training >> step=7597900, episode=1267 reward=0.7617067 (419.21 it/sec) -training >> step=7598000, episode=1267 reward=0.7908309 (447.91 it/sec) -training >> step=7598100, episode=1267 reward=0.8058009 (452.20 it/sec) -training >> step=7598200, episode=1267 reward=0.7765188 (474.00 it/sec) -training >> step=7598300, episode=1267 reward=0.7984921 (432.02 it/sec) -training >> step=7598400, episode=1267 reward=0.7809054 (451.11 it/sec) -training >> step=7598500, episode=1267 reward=0.8059304 (464.39 it/sec) -training >> step=7598600, episode=1267 reward=0.7950409 (478.09 it/sec) -training >> step=7598700, episode=1267 reward=0.7690607 (416.21 it/sec) -training >> step=7598800, episode=1267 reward=0.7868829 (412.48 it/sec) -training >> step=7598900, episode=1267 reward=0.7918971 (497.73 it/sec) -training >> step=7599000, episode=1267 reward=0.8001942 (502.36 it/sec) -training >> step=7599100, episode=1267 reward=0.7834262 (468.75 it/sec) -training >> step=7599200, episode=1267 reward=0.7927089 (476.26 it/sec) -training >> step=7599300, episode=1267 reward=0.7901147 (452.38 it/sec) -training >> step=7599400, episode=1267 reward=0.8030859 (480.25 it/sec) -training >> step=7599500, episode=1267 reward=0.7881449 (493.41 it/sec) -training >> step=7599600, episode=1267 reward=0.7873071 (469.44 it/sec) -training >> step=7599700, episode=1267 reward=0.776604 (476.85 it/sec) -training >> step=7599800, episode=1267 reward=0.7750765 (441.47 it/sec) -training >> step=7599900, episode=1267 reward=0.7613636 (476.04 it/sec) -training >> step=7600000, episode=1267 reward=0.7937944 (516.18 it/sec) -training >> step=7600100, episode=1267 reward=0.7880973 (494.30 it/sec) -training >> step=7600200, episode=1267 reward=0.7823949 (445.00 it/sec) -training >> step=7600300, episode=1267 reward=0.7802787 (445.67 it/sec) -training >> step=7600400, episode=1267 reward=0.7791839 (518.15 it/sec) -training >> step=7600500, episode=1267 reward=0.7790812 (466.17 it/sec) -training >> step=7600600, episode=1267 reward=0.7867355 (445.74 it/sec) -training >> step=7600700, episode=1267 reward=0.7844319 (459.24 it/sec) -training >> step=7600800, episode=1267 reward=0.7880818 (481.57 it/sec) -training >> step=7600900, episode=1267 reward=0.7802371 (469.99 it/sec) -training >> step=7601000, episode=1267 reward=0.77537 (484.55 it/sec) -training >> step=7601100, episode=1267 reward=0.785329 (489.91 it/sec) -training >> step=7601200, episode=1267 reward=0.7921323 (493.10 it/sec) -training >> step=7601300, episode=1268 reward=0.788472 (117.92 it/sec) -training >> step=7601400, episode=1268 reward=0.7922845 (439.11 it/sec) -training >> step=7601500, episode=1268 reward=0.8010558 (516.98 it/sec) -training >> step=7601600, episode=1268 reward=0.795992 (462.49 it/sec) -training >> step=7601700, episode=1268 reward=0.7838218 (460.71 it/sec) -training >> step=7601800, episode=1268 reward=0.810708 (486.83 it/sec) -training >> step=7601900, episode=1268 reward=0.7980325 (470.31 it/sec) -training >> step=7602000, episode=1268 reward=0.7870736 (479.38 it/sec) -training >> step=7602100, episode=1268 reward=0.8000867 (426.20 it/sec) -training >> step=7602200, episode=1268 reward=0.7784157 (508.94 it/sec) -training >> step=7602300, episode=1268 reward=0.7958955 (449.37 it/sec) -training >> step=7602400, episode=1268 reward=0.7630883 (459.17 it/sec) -training >> step=7602500, episode=1268 reward=0.7940718 (506.11 it/sec) -training >> step=7602600, episode=1268 reward=0.7867246 (504.89 it/sec) -training >> step=7602700, episode=1268 reward=0.7873324 (439.03 it/sec) -training >> step=7602800, episode=1268 reward=0.7675915 (447.78 it/sec) -training >> step=7602900, episode=1268 reward=0.8007822 (491.95 it/sec) -training >> step=7603000, episode=1268 reward=0.7947384 (353.68 it/sec) -training >> step=7603100, episode=1268 reward=0.7989687 (500.08 it/sec) -training >> step=7603200, episode=1268 reward=0.7887577 (439.61 it/sec) -training >> step=7603300, episode=1268 reward=0.7823928 (478.39 it/sec) -training >> step=7603400, episode=1268 reward=0.7700801 (451.06 it/sec) -training >> step=7603500, episode=1268 reward=0.78649 (505.26 it/sec) -training >> step=7603600, episode=1268 reward=0.799422 (496.00 it/sec) -training >> step=7603700, episode=1268 reward=0.7992228 (483.64 it/sec) -training >> step=7603800, episode=1268 reward=0.8172382 (472.23 it/sec) -training >> step=7603900, episode=1268 reward=0.7738775 (480.24 it/sec) -training >> step=7604000, episode=1268 reward=0.7965471 (503.95 it/sec) -training >> step=7604100, episode=1268 reward=0.8016902 (498.83 it/sec) -training >> step=7604200, episode=1268 reward=0.783618 (467.43 it/sec) -training >> step=7604300, episode=1268 reward=0.7993611 (476.16 it/sec) -training >> step=7604400, episode=1268 reward=0.7785659 (496.31 it/sec) -training >> step=7604500, episode=1268 reward=0.7848964 (485.29 it/sec) -training >> step=7604600, episode=1268 reward=0.7940197 (490.28 it/sec) -training >> step=7604700, episode=1268 reward=0.7948969 (505.15 it/sec) -training >> step=7604800, episode=1268 reward=0.7822935 (459.54 it/sec) -training >> step=7604900, episode=1268 reward=0.8025829 (465.47 it/sec) -training >> step=7605000, episode=1268 reward=0.7880287 (496.01 it/sec) -training >> step=7605100, episode=1268 reward=0.788958 (470.50 it/sec) -training >> step=7605200, episode=1268 reward=0.7982468 (471.63 it/sec) -training >> step=7605300, episode=1268 reward=0.7837806 (473.49 it/sec) -training >> step=7605400, episode=1268 reward=0.7866604 (502.97 it/sec) -training >> step=7605500, episode=1268 reward=0.7912334 (496.21 it/sec) -training >> step=7605600, episode=1268 reward=0.7793657 (498.58 it/sec) -training >> step=7605700, episode=1268 reward=0.7817833 (457.80 it/sec) -training >> step=7605800, episode=1268 reward=0.7932429 (507.24 it/sec) -training >> step=7605900, episode=1268 reward=0.7768437 (488.64 it/sec) -training >> step=7606000, episode=1268 reward=0.7834147 (467.67 it/sec) -training >> step=7606100, episode=1268 reward=0.7863912 (513.97 it/sec) -training >> step=7606200, episode=1268 reward=0.7865042 (478.36 it/sec) -training >> step=7606300, episode=1268 reward=0.7797826 (460.05 it/sec) -training >> step=7606400, episode=1268 reward=0.7890976 (469.22 it/sec) -training >> step=7606500, episode=1268 reward=0.777584 (521.03 it/sec) -training >> step=7606600, episode=1268 reward=0.776342 (423.46 it/sec) -training >> step=7606700, episode=1268 reward=0.7792176 (445.44 it/sec) -training >> step=7606800, episode=1268 reward=0.782681 (471.46 it/sec) -training >> step=7606900, episode=1268 reward=0.7914098 (479.46 it/sec) -training >> step=7607000, episode=1268 reward=0.7857516 (493.00 it/sec) -training >> step=7607100, episode=1268 reward=0.7789375 (489.15 it/sec) -training >> step=7607200, episode=1268 reward=0.7880301 (482.26 it/sec) -training >> step=7607300, episode=1269 reward=0.7958182 (135.43 it/sec) -training >> step=7607400, episode=1269 reward=0.7620075 (480.51 it/sec) -training >> step=7607500, episode=1269 reward=0.7812604 (476.76 it/sec) -training >> step=7607600, episode=1269 reward=0.7864065 (487.10 it/sec) -training >> step=7607700, episode=1269 reward=0.7844464 (487.73 it/sec) -training >> step=7607800, episode=1269 reward=0.7874721 (505.38 it/sec) -training >> step=7607900, episode=1269 reward=0.7818335 (449.87 it/sec) -training >> step=7608000, episode=1269 reward=0.7933291 (470.82 it/sec) -training >> step=7608100, episode=1269 reward=0.7896795 (454.97 it/sec) -training >> step=7608200, episode=1269 reward=0.78708 (480.71 it/sec) -training >> step=7608300, episode=1269 reward=0.7913804 (478.54 it/sec) -training >> step=7608400, episode=1269 reward=0.783076 (460.62 it/sec) -training >> step=7608500, episode=1269 reward=0.7841432 (481.57 it/sec) -training >> step=7608600, episode=1269 reward=0.7926219 (494.07 it/sec) -training >> step=7608700, episode=1269 reward=0.7840871 (441.61 it/sec) -training >> step=7608800, episode=1269 reward=0.7804497 (465.44 it/sec) -training >> step=7608900, episode=1269 reward=0.7990633 (516.52 it/sec) -training >> step=7609000, episode=1269 reward=0.7921234 (459.04 it/sec) -training >> step=7609100, episode=1269 reward=0.7952093 (472.28 it/sec) -training >> step=7609200, episode=1269 reward=0.7828507 (460.82 it/sec) -training >> step=7609300, episode=1269 reward=0.7911314 (337.94 it/sec) -training >> step=7609400, episode=1269 reward=0.7831415 (464.31 it/sec) -training >> step=7609500, episode=1269 reward=0.7898896 (444.97 it/sec) -training >> step=7609600, episode=1269 reward=0.7925869 (497.08 it/sec) -training >> step=7609700, episode=1269 reward=0.7861992 (484.75 it/sec) -training >> step=7609800, episode=1269 reward=0.7870294 (474.50 it/sec) -training >> step=7609900, episode=1269 reward=0.7703795 (483.34 it/sec) -training >> step=7610000, episode=1269 reward=0.7736244 (483.23 it/sec) -training >> step=7610100, episode=1269 reward=0.7870793 (459.57 it/sec) -training >> step=7610200, episode=1269 reward=0.7813945 (478.00 it/sec) -training >> step=7610300, episode=1269 reward=0.7975752 (444.92 it/sec) -training >> step=7610400, episode=1269 reward=0.7847366 (499.06 it/sec) -training >> step=7610500, episode=1269 reward=0.8029032 (468.38 it/sec) -training >> step=7610600, episode=1269 reward=0.7805721 (445.67 it/sec) -training >> step=7610700, episode=1269 reward=0.7883039 (508.91 it/sec) -training >> step=7610800, episode=1269 reward=0.779674 (458.98 it/sec) -training >> step=7610900, episode=1269 reward=0.7976562 (505.78 it/sec) -training >> step=7611000, episode=1269 reward=0.7927143 (483.96 it/sec) -training >> step=7611100, episode=1269 reward=0.7660257 (475.91 it/sec) -training >> step=7611200, episode=1269 reward=0.8022048 (487.19 it/sec) -training >> step=7611300, episode=1269 reward=0.7840163 (465.39 it/sec) -training >> step=7611400, episode=1269 reward=0.8056682 (484.92 it/sec) -training >> step=7611500, episode=1269 reward=0.7893361 (453.87 it/sec) -training >> step=7611600, episode=1269 reward=0.7735325 (467.07 it/sec) -training >> step=7611700, episode=1269 reward=0.7628001 (503.74 it/sec) -training >> step=7611800, episode=1269 reward=0.7842709 (511.14 it/sec) -training >> step=7611900, episode=1269 reward=0.7889349 (505.55 it/sec) -training >> step=7612000, episode=1269 reward=0.7988702 (451.09 it/sec) -training >> step=7612100, episode=1269 reward=0.7786625 (506.76 it/sec) -training >> step=7612200, episode=1269 reward=0.7809962 (440.99 it/sec) -training >> step=7612300, episode=1269 reward=0.7717617 (490.61 it/sec) -training >> step=7612400, episode=1269 reward=0.7790945 (512.79 it/sec) -training >> step=7612500, episode=1269 reward=0.7901003 (462.52 it/sec) -training >> step=7612600, episode=1269 reward=0.7986637 (488.20 it/sec) -training >> step=7612700, episode=1269 reward=0.8018874 (495.09 it/sec) -training >> step=7612800, episode=1269 reward=0.7978257 (470.40 it/sec) -training >> step=7612900, episode=1269 reward=0.7918277 (458.81 it/sec) -training >> step=7613000, episode=1269 reward=0.7636411 (432.33 it/sec) -training >> step=7613100, episode=1269 reward=0.7971771 (461.58 it/sec) -training >> step=7613200, episode=1269 reward=0.7696189 (444.40 it/sec) -training >> step=7613300, episode=1270 reward=0.815189 (136.70 it/sec) -training >> step=7613400, episode=1270 reward=0.7803441 (468.74 it/sec) -training >> step=7613500, episode=1270 reward=0.7888395 (491.38 it/sec) -training >> step=7613600, episode=1270 reward=0.7863646 (531.72 it/sec) -training >> step=7613700, episode=1270 reward=0.8058951 (484.05 it/sec) -training >> step=7613800, episode=1270 reward=0.7804583 (465.70 it/sec) -training >> step=7613900, episode=1270 reward=0.8109262 (496.23 it/sec) -training >> step=7614000, episode=1270 reward=0.7788291 (493.42 it/sec) -training >> step=7614100, episode=1270 reward=0.777577 (510.87 it/sec) -training >> step=7614200, episode=1270 reward=0.778529 (473.05 it/sec) -training >> step=7614300, episode=1270 reward=0.777445 (533.70 it/sec) -training >> step=7614400, episode=1270 reward=0.8001075 (477.77 it/sec) -training >> step=7614500, episode=1270 reward=0.7732989 (483.85 it/sec) -training >> step=7614600, episode=1270 reward=0.7885427 (502.70 it/sec) -training >> step=7614700, episode=1270 reward=0.7799983 (508.84 it/sec) -training >> step=7614800, episode=1270 reward=0.8008547 (494.17 it/sec) -training >> step=7614900, episode=1270 reward=0.7895172 (479.00 it/sec) -training >> step=7615000, episode=1270 reward=0.7921379 (505.59 it/sec) -training >> step=7615100, episode=1270 reward=0.7778481 (462.67 it/sec) -training >> step=7615200, episode=1270 reward=0.7903842 (449.34 it/sec) -training >> step=7615300, episode=1270 reward=0.7809251 (475.13 it/sec) -training >> step=7615400, episode=1270 reward=0.7783369 (404.22 it/sec) -training >> step=7615500, episode=1270 reward=0.7830492 (493.15 it/sec) -training >> step=7615600, episode=1270 reward=0.7862059 (457.90 it/sec) -training >> step=7615700, episode=1270 reward=0.777082 (435.46 it/sec) -training >> step=7615800, episode=1270 reward=0.7995002 (506.04 it/sec) -training >> step=7615900, episode=1270 reward=0.7898686 (459.96 it/sec) -training >> step=7616000, episode=1270 reward=0.7795453 (477.85 it/sec) -training >> step=7616100, episode=1270 reward=0.779036 (524.83 it/sec) -training >> step=7616200, episode=1270 reward=0.7833399 (454.44 it/sec) -training >> step=7616300, episode=1270 reward=0.7995037 (453.92 it/sec) -training >> step=7616400, episode=1270 reward=0.7920127 (457.96 it/sec) -training >> step=7616500, episode=1270 reward=0.7743878 (504.92 it/sec) -training >> step=7616600, episode=1270 reward=0.805685 (504.77 it/sec) -training >> step=7616700, episode=1270 reward=0.7931964 (472.76 it/sec) -training >> step=7616800, episode=1270 reward=0.7975466 (513.00 it/sec) -training >> step=7616900, episode=1270 reward=0.793661 (497.20 it/sec) -training >> step=7617000, episode=1270 reward=0.7967834 (497.63 it/sec) -training >> step=7617100, episode=1270 reward=0.8025849 (461.76 it/sec) -training >> step=7617200, episode=1270 reward=0.7696073 (434.29 it/sec) -training >> step=7617300, episode=1270 reward=0.7831708 (498.65 it/sec) -training >> step=7617400, episode=1270 reward=0.7984565 (463.17 it/sec) -training >> step=7617500, episode=1270 reward=0.7936736 (505.34 it/sec) -training >> step=7617600, episode=1270 reward=0.7910669 (510.59 it/sec) -training >> step=7617700, episode=1270 reward=0.7812338 (465.77 it/sec) -training >> step=7617800, episode=1270 reward=0.792375 (490.13 it/sec) -training >> step=7617900, episode=1270 reward=0.7733572 (472.88 it/sec) -training >> step=7618000, episode=1270 reward=0.7982885 (451.83 it/sec) -training >> step=7618100, episode=1270 reward=0.7751965 (493.38 it/sec) -training >> step=7618200, episode=1270 reward=0.7746884 (475.69 it/sec) -training >> step=7618300, episode=1270 reward=0.7733353 (491.71 it/sec) -training >> step=7618400, episode=1270 reward=0.7842747 (462.52 it/sec) -training >> step=7618500, episode=1270 reward=0.7900306 (492.28 it/sec) -training >> step=7618600, episode=1270 reward=0.7920695 (465.63 it/sec) -training >> step=7618700, episode=1270 reward=0.8101282 (470.05 it/sec) -training >> step=7618800, episode=1270 reward=0.7923446 (501.01 it/sec) -training >> step=7618900, episode=1270 reward=0.8090547 (504.55 it/sec) -training >> step=7619000, episode=1270 reward=0.8103022 (515.83 it/sec) -training >> step=7619100, episode=1270 reward=0.7850784 (492.05 it/sec) -training >> step=7619200, episode=1270 reward=0.7925541 (454.39 it/sec) -training >> step=7619300, episode=1271 reward=0.7958206 (133.98 it/sec) -training >> step=7619400, episode=1271 reward=0.7840668 (493.03 it/sec) -training >> step=7619500, episode=1271 reward=0.7818065 (465.35 it/sec) -training >> step=7619600, episode=1271 reward=0.7659286 (435.04 it/sec) -training >> step=7619700, episode=1271 reward=0.7724831 (522.05 it/sec) -training >> step=7619800, episode=1271 reward=0.7957371 (502.53 it/sec) -training >> step=7619900, episode=1271 reward=0.7741786 (497.44 it/sec) -training >> step=7620000, episode=1271 reward=0.7590356 (459.08 it/sec) -training >> step=7620100, episode=1271 reward=0.7518587 (497.09 it/sec) -training >> step=7620200, episode=1271 reward=0.8091565 (512.63 it/sec) -training >> step=7620300, episode=1271 reward=0.783559 (474.76 it/sec) -training >> step=7620400, episode=1271 reward=0.7898306 (504.29 it/sec) -training >> step=7620500, episode=1271 reward=0.7938508 (449.91 it/sec) -training >> step=7620600, episode=1271 reward=0.785932 (473.51 it/sec) -training >> step=7620700, episode=1271 reward=0.7775701 (492.34 it/sec) -training >> step=7620800, episode=1271 reward=0.7769714 (509.03 it/sec) -training >> step=7620900, episode=1271 reward=0.7871904 (498.36 it/sec) -training >> step=7621000, episode=1271 reward=0.7870153 (472.87 it/sec) -training >> step=7621100, episode=1271 reward=0.7781082 (497.88 it/sec) -training >> step=7621200, episode=1271 reward=0.7924034 (423.26 it/sec) -training >> step=7621300, episode=1271 reward=0.7870914 (464.54 it/sec) -training >> step=7621400, episode=1271 reward=0.7853066 (521.77 it/sec) -training >> step=7621500, episode=1271 reward=0.8031206 (483.82 it/sec) -training >> step=7621600, episode=1271 reward=0.8127096 (375.62 it/sec) -training >> step=7621700, episode=1271 reward=0.784353 (482.75 it/sec) -training >> step=7621800, episode=1271 reward=0.8091711 (470.93 it/sec) -training >> step=7621900, episode=1271 reward=0.7827355 (461.38 it/sec) -training >> step=7622000, episode=1271 reward=0.7795344 (440.74 it/sec) -training >> step=7622100, episode=1271 reward=0.7970495 (483.15 it/sec) -training >> step=7622200, episode=1271 reward=0.7756574 (488.43 it/sec) -training >> step=7622300, episode=1271 reward=0.7872853 (482.62 it/sec) -training >> step=7622400, episode=1271 reward=0.8124583 (487.24 it/sec) -training >> step=7622500, episode=1271 reward=0.7920298 (487.45 it/sec) -training >> step=7622600, episode=1271 reward=0.8028749 (487.10 it/sec) -training >> step=7622700, episode=1271 reward=0.7963037 (497.29 it/sec) -training >> step=7622800, episode=1271 reward=0.7842117 (483.93 it/sec) -training >> step=7622900, episode=1271 reward=0.7959151 (452.43 it/sec) -training >> step=7623000, episode=1271 reward=0.7729315 (486.43 it/sec) -training >> step=7623100, episode=1271 reward=0.7991381 (504.96 it/sec) -training >> step=7623200, episode=1271 reward=0.793879 (494.76 it/sec) -training >> step=7623300, episode=1271 reward=0.7902972 (519.03 it/sec) -training >> step=7623400, episode=1271 reward=0.8005571 (482.33 it/sec) -training >> step=7623500, episode=1271 reward=0.7931123 (412.60 it/sec) -training >> step=7623600, episode=1271 reward=0.7969957 (480.91 it/sec) -training >> step=7623700, episode=1271 reward=0.793443 (459.76 it/sec) -training >> step=7623800, episode=1271 reward=0.7747878 (466.41 it/sec) -training >> step=7623900, episode=1271 reward=0.7853696 (492.84 it/sec) -training >> step=7624000, episode=1271 reward=0.7691685 (503.05 it/sec) -training >> step=7624100, episode=1271 reward=0.7865621 (445.92 it/sec) -training >> step=7624200, episode=1271 reward=0.7995654 (494.47 it/sec) -training >> step=7624300, episode=1271 reward=0.8053395 (495.37 it/sec) -training >> step=7624400, episode=1271 reward=0.7911145 (530.66 it/sec) -training >> step=7624500, episode=1271 reward=0.7775794 (476.67 it/sec) -training >> step=7624600, episode=1271 reward=0.7763106 (504.60 it/sec) -training >> step=7624700, episode=1271 reward=0.7721627 (505.51 it/sec) -training >> step=7624800, episode=1271 reward=0.7848172 (478.01 it/sec) -training >> step=7624900, episode=1271 reward=0.779886 (507.08 it/sec) -training >> step=7625000, episode=1271 reward=0.7981196 (474.50 it/sec) -training >> step=7625100, episode=1271 reward=0.7902483 (539.44 it/sec) -training >> step=7625200, episode=1271 reward=0.7675458 (502.85 it/sec) -training >> step=7625300, episode=1272 reward=0.7891762 (128.52 it/sec) -training >> step=7625400, episode=1272 reward=0.7748863 (501.87 it/sec) -training >> step=7625500, episode=1272 reward=0.7787251 (480.06 it/sec) -training >> step=7625600, episode=1272 reward=0.7854332 (468.93 it/sec) -training >> step=7625700, episode=1272 reward=0.7955929 (491.78 it/sec) -training >> step=7625800, episode=1272 reward=0.8034139 (467.02 it/sec) -training >> step=7625900, episode=1272 reward=0.7629529 (451.28 it/sec) -training >> step=7626000, episode=1272 reward=0.7857243 (468.07 it/sec) -training >> step=7626100, episode=1272 reward=0.7897246 (493.60 it/sec) -training >> step=7626200, episode=1272 reward=0.8078489 (496.29 it/sec) -training >> step=7626300, episode=1272 reward=0.7808456 (499.83 it/sec) -training >> step=7626400, episode=1272 reward=0.8040397 (471.51 it/sec) -training >> step=7626500, episode=1272 reward=0.796954 (488.50 it/sec) -training >> step=7626600, episode=1272 reward=0.7864322 (505.67 it/sec) -training >> step=7626700, episode=1272 reward=0.7838454 (483.50 it/sec) -training >> step=7626800, episode=1272 reward=0.7861327 (464.98 it/sec) -training >> step=7626900, episode=1272 reward=0.7980001 (498.82 it/sec) -training >> step=7627000, episode=1272 reward=0.7869678 (512.88 it/sec) -training >> step=7627100, episode=1272 reward=0.8026443 (443.42 it/sec) -training >> step=7627200, episode=1272 reward=0.7994363 (520.51 it/sec) -training >> step=7627300, episode=1272 reward=0.7971757 (493.41 it/sec) -training >> step=7627400, episode=1272 reward=0.7849363 (491.12 it/sec) -training >> step=7627500, episode=1272 reward=0.79109 (505.07 it/sec) -training >> step=7627600, episode=1272 reward=0.792545 (495.98 it/sec) -training >> step=7627700, episode=1272 reward=0.7801126 (314.50 it/sec) -training >> step=7627800, episode=1272 reward=0.7873179 (501.28 it/sec) -training >> step=7627900, episode=1272 reward=0.7910029 (498.49 it/sec) -training >> step=7628000, episode=1272 reward=0.7822188 (530.36 it/sec) -training >> step=7628100, episode=1272 reward=0.7962634 (507.37 it/sec) -training >> step=7628200, episode=1272 reward=0.7691317 (426.07 it/sec) -training >> step=7628300, episode=1272 reward=0.791541 (442.20 it/sec) -training >> step=7628400, episode=1272 reward=0.7819521 (445.34 it/sec) -training >> step=7628500, episode=1272 reward=0.7837834 (429.76 it/sec) -training >> step=7628600, episode=1272 reward=0.7748454 (491.77 it/sec) -training >> step=7628700, episode=1272 reward=0.7758989 (529.23 it/sec) -training >> step=7628800, episode=1272 reward=0.8063738 (466.24 it/sec) -training >> step=7628900, episode=1272 reward=0.7967913 (428.17 it/sec) -training >> step=7629000, episode=1272 reward=0.7869642 (445.86 it/sec) -training >> step=7629100, episode=1272 reward=0.7924595 (392.08 it/sec) -training >> step=7629200, episode=1272 reward=0.779316 (425.66 it/sec) -training >> step=7629300, episode=1272 reward=0.7924522 (427.94 it/sec) -training >> step=7629400, episode=1272 reward=0.7731782 (492.35 it/sec) -training >> step=7629500, episode=1272 reward=0.785912 (452.33 it/sec) -training >> step=7629600, episode=1272 reward=0.789783 (444.37 it/sec) -training >> step=7629700, episode=1272 reward=0.7755222 (493.65 it/sec) -training >> step=7629800, episode=1272 reward=0.7998396 (500.76 it/sec) -training >> step=7629900, episode=1272 reward=0.7725846 (503.89 it/sec) -training >> step=7630000, episode=1272 reward=0.7570446 (466.75 it/sec) -training >> step=7630100, episode=1272 reward=0.7758633 (477.75 it/sec) -training >> step=7630200, episode=1272 reward=0.8022253 (455.84 it/sec) -training >> step=7630300, episode=1272 reward=0.8018047 (457.97 it/sec) -training >> step=7630400, episode=1272 reward=0.7912053 (479.21 it/sec) -training >> step=7630500, episode=1272 reward=0.7955396 (497.20 it/sec) -training >> step=7630600, episode=1272 reward=0.7828553 (441.52 it/sec) -training >> step=7630700, episode=1272 reward=0.8106138 (512.50 it/sec) -training >> step=7630800, episode=1272 reward=0.7951308 (477.47 it/sec) -training >> step=7630900, episode=1272 reward=0.7887192 (527.76 it/sec) -training >> step=7631000, episode=1272 reward=0.7841973 (477.12 it/sec) -training >> step=7631100, episode=1272 reward=0.7792684 (467.66 it/sec) -training >> step=7631200, episode=1272 reward=0.7924614 (498.62 it/sec) -training >> step=7631300, episode=1273 reward=0.7857431 (129.39 it/sec) -training >> step=7631400, episode=1273 reward=0.7615573 (472.51 it/sec) -training >> step=7631500, episode=1273 reward=0.8149588 (491.78 it/sec) -training >> step=7631600, episode=1273 reward=0.7835932 (477.62 it/sec) -training >> step=7631700, episode=1273 reward=0.7862272 (493.50 it/sec) -training >> step=7631800, episode=1273 reward=0.7708472 (464.94 it/sec) -training >> step=7631900, episode=1273 reward=0.7886007 (477.97 it/sec) -training >> step=7632000, episode=1273 reward=0.7965702 (490.86 it/sec) -training >> step=7632100, episode=1273 reward=0.793442 (507.16 it/sec) -training >> step=7632200, episode=1273 reward=0.7723704 (463.19 it/sec) -training >> step=7632300, episode=1273 reward=0.7877034 (445.46 it/sec) -training >> step=7632400, episode=1273 reward=0.788923 (464.17 it/sec) -training >> step=7632500, episode=1273 reward=0.8010073 (494.24 it/sec) -training >> step=7632600, episode=1273 reward=0.7975382 (495.43 it/sec) -training >> step=7632700, episode=1273 reward=0.783136 (493.98 it/sec) -training >> step=7632800, episode=1273 reward=0.7882925 (474.08 it/sec) -training >> step=7632900, episode=1273 reward=0.7798756 (470.99 it/sec) -training >> step=7633000, episode=1273 reward=0.7819496 (522.45 it/sec) -training >> step=7633100, episode=1273 reward=0.7839673 (494.92 it/sec) -training >> step=7633200, episode=1273 reward=0.8029243 (475.59 it/sec) -training >> step=7633300, episode=1273 reward=0.7960411 (464.88 it/sec) -training >> step=7633400, episode=1273 reward=0.7896697 (506.77 it/sec) -training >> step=7633500, episode=1273 reward=0.757597 (502.87 it/sec) -training >> step=7633600, episode=1273 reward=0.801851 (479.52 it/sec) -training >> step=7633700, episode=1273 reward=0.7889134 (492.88 it/sec) -training >> step=7633800, episode=1273 reward=0.8063634 (456.79 it/sec) -training >> step=7633900, episode=1273 reward=0.7929478 (497.07 it/sec) -training >> step=7634000, episode=1273 reward=0.8079826 (364.48 it/sec) -training >> step=7634100, episode=1273 reward=0.7872552 (519.69 it/sec) -training >> step=7634200, episode=1273 reward=0.7930405 (448.76 it/sec) -training >> step=7634300, episode=1273 reward=0.793615 (456.44 it/sec) -training >> step=7634400, episode=1273 reward=0.800524 (408.43 it/sec) -training >> step=7634500, episode=1273 reward=0.80484 (520.57 it/sec) -training >> step=7634600, episode=1273 reward=0.7860303 (494.40 it/sec) -training >> step=7634700, episode=1273 reward=0.7870212 (486.34 it/sec) -training >> step=7634800, episode=1273 reward=0.787754 (474.54 it/sec) -training >> step=7634900, episode=1273 reward=0.7867329 (505.34 it/sec) -training >> step=7635000, episode=1273 reward=0.7952138 (491.39 it/sec) -training >> step=7635100, episode=1273 reward=0.79825 (471.33 it/sec) -training >> step=7635200, episode=1273 reward=0.785539 (493.61 it/sec) -training >> step=7635300, episode=1273 reward=0.776818 (478.46 it/sec) -training >> step=7635400, episode=1273 reward=0.7950502 (478.50 it/sec) -training >> step=7635500, episode=1273 reward=0.8045018 (516.74 it/sec) -training >> step=7635600, episode=1273 reward=0.7828227 (503.88 it/sec) -training >> step=7635700, episode=1273 reward=0.7804216 (467.23 it/sec) -training >> step=7635800, episode=1273 reward=0.8057228 (453.86 it/sec) -training >> step=7635900, episode=1273 reward=0.7870142 (522.79 it/sec) -training >> step=7636000, episode=1273 reward=0.7585552 (477.14 it/sec) -training >> step=7636100, episode=1273 reward=0.7829195 (453.15 it/sec) -training >> step=7636200, episode=1273 reward=0.7931818 (492.69 it/sec) -training >> step=7636300, episode=1273 reward=0.7837614 (521.06 it/sec) -training >> step=7636400, episode=1273 reward=0.7703758 (499.08 it/sec) -training >> step=7636500, episode=1273 reward=0.7951917 (466.20 it/sec) -training >> step=7636600, episode=1273 reward=0.77835 (475.12 it/sec) -training >> step=7636700, episode=1273 reward=0.7870201 (474.91 it/sec) -training >> step=7636800, episode=1273 reward=0.794521 (500.19 it/sec) -training >> step=7636900, episode=1273 reward=0.7831244 (494.83 it/sec) -training >> step=7637000, episode=1273 reward=0.7983986 (493.82 it/sec) -training >> step=7637100, episode=1273 reward=0.7994787 (490.89 it/sec) -training >> step=7637200, episode=1273 reward=0.8008788 (463.82 it/sec) -training >> step=7637300, episode=1274 reward=0.7755414 (100.65 it/sec) -training >> step=7637400, episode=1274 reward=0.7966914 (461.75 it/sec) -training >> step=7637500, episode=1274 reward=0.7728966 (462.43 it/sec) -training >> step=7637600, episode=1274 reward=0.7995234 (468.69 it/sec) -training >> step=7637700, episode=1274 reward=0.7773324 (487.31 it/sec) -training >> step=7637800, episode=1274 reward=0.7899164 (415.27 it/sec) -training >> step=7637900, episode=1274 reward=0.7884189 (480.92 it/sec) -training >> step=7638000, episode=1274 reward=0.783384 (515.64 it/sec) -training >> step=7638100, episode=1274 reward=0.8020996 (516.39 it/sec) -training >> step=7638200, episode=1274 reward=0.7863051 (504.45 it/sec) -training >> step=7638300, episode=1274 reward=0.7981078 (467.73 it/sec) -training >> step=7638400, episode=1274 reward=0.7898112 (483.43 it/sec) -training >> step=7638500, episode=1274 reward=0.795029 (506.37 it/sec) -training >> step=7638600, episode=1274 reward=0.7976894 (450.95 it/sec) -training >> step=7638700, episode=1274 reward=0.8038432 (473.37 it/sec) -training >> step=7638800, episode=1274 reward=0.7920635 (455.33 it/sec) -training >> step=7638900, episode=1274 reward=0.7766131 (456.48 it/sec) -training >> step=7639000, episode=1274 reward=0.8096682 (526.78 it/sec) -training >> step=7639100, episode=1274 reward=0.7931904 (466.55 it/sec) -training >> step=7639200, episode=1274 reward=0.7723995 (473.30 it/sec) -training >> step=7639300, episode=1274 reward=0.7678875 (443.34 it/sec) -training >> step=7639400, episode=1274 reward=0.7980843 (513.77 it/sec) -training >> step=7639500, episode=1274 reward=0.7792233 (488.92 it/sec) -training >> step=7639600, episode=1274 reward=0.7904572 (517.28 it/sec) -training >> step=7639700, episode=1274 reward=0.7879541 (505.93 it/sec) -training >> step=7639800, episode=1274 reward=0.7938892 (439.72 it/sec) -training >> step=7639900, episode=1274 reward=0.7861075 (485.97 it/sec) -training >> step=7640000, episode=1274 reward=0.7981302 (469.06 it/sec) -training >> step=7640100, episode=1274 reward=0.7919398 (517.91 it/sec) -training >> step=7640200, episode=1274 reward=0.7995782 (352.33 it/sec) -training >> step=7640300, episode=1274 reward=0.7991641 (460.69 it/sec) -training >> step=7640400, episode=1274 reward=0.7786191 (479.93 it/sec) -training >> step=7640500, episode=1274 reward=0.7741435 (476.58 it/sec) -training >> step=7640600, episode=1274 reward=0.7845606 (484.75 it/sec) -training >> step=7640700, episode=1274 reward=0.7943457 (450.46 it/sec) -training >> step=7640800, episode=1274 reward=0.7941808 (447.67 it/sec) -training >> step=7640900, episode=1274 reward=0.7864596 (450.84 it/sec) -training >> step=7641000, episode=1274 reward=0.7607556 (497.22 it/sec) -training >> step=7641100, episode=1274 reward=0.8014498 (496.38 it/sec) -training >> step=7641200, episode=1274 reward=0.7909916 (442.48 it/sec) -training >> step=7641300, episode=1274 reward=0.7960178 (491.03 it/sec) -training >> step=7641400, episode=1274 reward=0.801762 (466.86 it/sec) -training >> step=7641500, episode=1274 reward=0.7887208 (514.28 it/sec) -training >> step=7641600, episode=1274 reward=0.8014551 (498.29 it/sec) -training >> step=7641700, episode=1274 reward=0.8008946 (478.41 it/sec) -training >> step=7641800, episode=1274 reward=0.7671881 (484.80 it/sec) -training >> step=7641900, episode=1274 reward=0.7846799 (484.46 it/sec) -training >> step=7642000, episode=1274 reward=0.7991132 (478.26 it/sec) -training >> step=7642100, episode=1274 reward=0.7693868 (488.20 it/sec) -training >> step=7642200, episode=1274 reward=0.8001246 (486.85 it/sec) -training >> step=7642300, episode=1274 reward=0.7720293 (525.35 it/sec) -training >> step=7642400, episode=1274 reward=0.7804648 (487.59 it/sec) -training >> step=7642500, episode=1274 reward=0.8009855 (469.31 it/sec) -training >> step=7642600, episode=1274 reward=0.7966635 (498.03 it/sec) -training >> step=7642700, episode=1274 reward=0.7843767 (468.39 it/sec) -training >> step=7642800, episode=1274 reward=0.7671203 (476.74 it/sec) -training >> step=7642900, episode=1274 reward=0.780803 (487.38 it/sec) -training >> step=7643000, episode=1274 reward=0.797708 (505.11 it/sec) -training >> step=7643100, episode=1274 reward=0.7869605 (497.00 it/sec) -training >> step=7643200, episode=1274 reward=0.7921721 (421.73 it/sec) -training >> step=7643300, episode=1275 reward=0.7802699 (115.16 it/sec) -training >> step=7643400, episode=1275 reward=0.7774493 (498.13 it/sec) -training >> step=7643500, episode=1275 reward=0.8074649 (437.69 it/sec) -training >> step=7643600, episode=1275 reward=0.7921312 (481.17 it/sec) -training >> step=7643700, episode=1275 reward=0.7859305 (479.37 it/sec) -training >> step=7643800, episode=1275 reward=0.7882311 (475.36 it/sec) -training >> step=7643900, episode=1275 reward=0.8096717 (481.19 it/sec) -training >> step=7644000, episode=1275 reward=0.7965556 (491.64 it/sec) -training >> step=7644100, episode=1275 reward=0.7698379 (451.43 it/sec) -training >> step=7644200, episode=1275 reward=0.7987084 (469.47 it/sec) -training >> step=7644300, episode=1275 reward=0.764443 (466.41 it/sec) -training >> step=7644400, episode=1275 reward=0.7747111 (470.03 it/sec) -training >> step=7644500, episode=1275 reward=0.7904611 (490.76 it/sec) -training >> step=7644600, episode=1275 reward=0.7945191 (462.88 it/sec) -training >> step=7644700, episode=1275 reward=0.7885482 (457.13 it/sec) -training >> step=7644800, episode=1275 reward=0.7877771 (482.14 it/sec) -training >> step=7644900, episode=1275 reward=0.8153467 (491.64 it/sec) -training >> step=7645000, episode=1275 reward=0.7851806 (472.13 it/sec) -training >> step=7645100, episode=1275 reward=0.7988939 (504.69 it/sec) -training >> step=7645200, episode=1275 reward=0.7956653 (502.48 it/sec) -training >> step=7645300, episode=1275 reward=0.7719208 (472.61 it/sec) -training >> step=7645400, episode=1275 reward=0.7907746 (471.53 it/sec) -training >> step=7645500, episode=1275 reward=0.7776407 (480.72 it/sec) -training >> step=7645600, episode=1275 reward=0.7803955 (446.00 it/sec) -training >> step=7645700, episode=1275 reward=0.7609981 (495.20 it/sec) -training >> step=7645800, episode=1275 reward=0.7780472 (457.28 it/sec) -training >> step=7645900, episode=1275 reward=0.8122864 (516.93 it/sec) -training >> step=7646000, episode=1275 reward=0.77318 (454.16 it/sec) -training >> step=7646100, episode=1275 reward=0.8009542 (455.57 it/sec) -training >> step=7646200, episode=1275 reward=0.7942467 (492.13 it/sec) -training >> step=7646300, episode=1275 reward=0.7760879 (373.55 it/sec) -training >> step=7646400, episode=1275 reward=0.7860492 (485.69 it/sec) -training >> step=7646500, episode=1275 reward=0.8027026 (500.80 it/sec) -training >> step=7646600, episode=1275 reward=0.7862373 (535.79 it/sec) -training >> step=7646700, episode=1275 reward=0.7870361 (494.99 it/sec) -training >> step=7646800, episode=1275 reward=0.7873349 (443.58 it/sec) -training >> step=7646900, episode=1275 reward=0.8022004 (504.98 it/sec) -training >> step=7647000, episode=1275 reward=0.7940313 (486.27 it/sec) -training >> step=7647100, episode=1275 reward=0.7915941 (475.75 it/sec) -training >> step=7647200, episode=1275 reward=0.8053077 (468.41 it/sec) -training >> step=7647300, episode=1275 reward=0.8007234 (522.61 it/sec) -training >> step=7647400, episode=1275 reward=0.7872446 (474.13 it/sec) -training >> step=7647500, episode=1275 reward=0.8031687 (507.68 it/sec) -training >> step=7647600, episode=1275 reward=0.795655 (478.64 it/sec) -training >> step=7647700, episode=1275 reward=0.8155149 (529.66 it/sec) -training >> step=7647800, episode=1275 reward=0.7695355 (494.54 it/sec) -training >> step=7647900, episode=1275 reward=0.7816296 (472.87 it/sec) -training >> step=7648000, episode=1275 reward=0.7851544 (487.66 it/sec) -training >> step=7648100, episode=1275 reward=0.7938977 (496.42 it/sec) -training >> step=7648200, episode=1275 reward=0.7881047 (503.83 it/sec) -training >> step=7648300, episode=1275 reward=0.7710242 (456.88 it/sec) -training >> step=7648400, episode=1275 reward=0.793457 (510.49 it/sec) -training >> step=7648500, episode=1275 reward=0.7798225 (480.46 it/sec) -training >> step=7648600, episode=1275 reward=0.7928309 (506.72 it/sec) -training >> step=7648700, episode=1275 reward=0.7885845 (489.64 it/sec) -training >> step=7648800, episode=1275 reward=0.7874488 (507.65 it/sec) -training >> step=7648900, episode=1275 reward=0.7861241 (453.60 it/sec) -training >> step=7649000, episode=1275 reward=0.7850842 (500.73 it/sec) -training >> step=7649100, episode=1275 reward=0.770498 (520.13 it/sec) -training >> step=7649200, episode=1275 reward=0.7849843 (450.81 it/sec) -training >> step=7649300, episode=1276 reward=0.7951129 (127.23 it/sec) -training >> step=7649400, episode=1276 reward=0.8080757 (444.95 it/sec) -training >> step=7649500, episode=1276 reward=0.7962476 (496.00 it/sec) -training >> step=7649600, episode=1276 reward=0.7922764 (481.42 it/sec) -training >> step=7649700, episode=1276 reward=0.8014994 (483.35 it/sec) -training >> step=7649800, episode=1276 reward=0.7811689 (508.61 it/sec) -training >> step=7649900, episode=1276 reward=0.7627409 (478.52 it/sec) -training >> step=7650000, episode=1276 reward=0.797598 (488.32 it/sec) -training >> step=7650100, episode=1276 reward=0.7837006 (505.29 it/sec) -training >> step=7650200, episode=1276 reward=0.7923369 (534.35 it/sec) -training >> step=7650300, episode=1276 reward=0.7748485 (479.53 it/sec) -training >> step=7650400, episode=1276 reward=0.7886748 (505.27 it/sec) -training >> step=7650500, episode=1276 reward=0.785359 (486.39 it/sec) -training >> step=7650600, episode=1276 reward=0.8024291 (505.67 it/sec) -training >> step=7650700, episode=1276 reward=0.7741625 (466.44 it/sec) -training >> step=7650800, episode=1276 reward=0.781979 (526.61 it/sec) -training >> step=7650900, episode=1276 reward=0.7991248 (513.74 it/sec) -training >> step=7651000, episode=1276 reward=0.7908548 (471.44 it/sec) -training >> step=7651100, episode=1276 reward=0.7955063 (511.78 it/sec) -training >> step=7651200, episode=1276 reward=0.8125668 (464.70 it/sec) -training >> step=7651300, episode=1276 reward=0.7828681 (475.07 it/sec) -training >> step=7651400, episode=1276 reward=0.7945249 (502.29 it/sec) -training >> step=7651500, episode=1276 reward=0.7839609 (443.94 it/sec) -training >> step=7651600, episode=1276 reward=0.7846045 (497.58 it/sec) -training >> step=7651700, episode=1276 reward=0.7859411 (497.17 it/sec) -training >> step=7651800, episode=1276 reward=0.7838571 (475.69 it/sec) -training >> step=7651900, episode=1276 reward=0.7882519 (506.83 it/sec) -training >> step=7652000, episode=1276 reward=0.7924523 (511.50 it/sec) -training >> step=7652100, episode=1276 reward=0.7943745 (425.04 it/sec) -training >> step=7652200, episode=1276 reward=0.7982994 (508.46 it/sec) -training >> step=7652300, episode=1276 reward=0.8065977 (507.31 it/sec) -training >> step=7652400, episode=1276 reward=0.8026807 (404.61 it/sec) -training >> step=7652500, episode=1276 reward=0.7899944 (468.89 it/sec) -training >> step=7652600, episode=1276 reward=0.7706256 (462.73 it/sec) -training >> step=7652700, episode=1276 reward=0.7820802 (453.28 it/sec) -training >> step=7652800, episode=1276 reward=0.8032983 (509.65 it/sec) -training >> step=7652900, episode=1276 reward=0.7766027 (508.68 it/sec) -training >> step=7653000, episode=1276 reward=0.7825111 (444.32 it/sec) -training >> step=7653100, episode=1276 reward=0.7821628 (513.99 it/sec) -training >> step=7653200, episode=1276 reward=0.7945413 (476.15 it/sec) -training >> step=7653300, episode=1276 reward=0.7865079 (491.01 it/sec) -training >> step=7653400, episode=1276 reward=0.7806079 (499.24 it/sec) -training >> step=7653500, episode=1276 reward=0.785115 (465.61 it/sec) -training >> step=7653600, episode=1276 reward=0.7936673 (463.82 it/sec) -training >> step=7653700, episode=1276 reward=0.8154518 (452.52 it/sec) -training >> step=7653800, episode=1276 reward=0.7831218 (535.61 it/sec) -training >> step=7653900, episode=1276 reward=0.7824948 (477.16 it/sec) -training >> step=7654000, episode=1276 reward=0.7876563 (486.23 it/sec) -training >> step=7654100, episode=1276 reward=0.7888892 (498.04 it/sec) -training >> step=7654200, episode=1276 reward=0.7959744 (525.87 it/sec) -training >> step=7654300, episode=1276 reward=0.7738769 (498.61 it/sec) -training >> step=7654400, episode=1276 reward=0.7704409 (502.85 it/sec) -training >> step=7654500, episode=1276 reward=0.774811 (510.08 it/sec) -training >> step=7654600, episode=1276 reward=0.7755674 (443.13 it/sec) -training >> step=7654700, episode=1276 reward=0.7788505 (517.92 it/sec) -training >> step=7654800, episode=1276 reward=0.7902243 (481.49 it/sec) -training >> step=7654900, episode=1276 reward=0.7961295 (466.28 it/sec) -training >> step=7655000, episode=1276 reward=0.7925914 (496.24 it/sec) -training >> step=7655100, episode=1276 reward=0.7772215 (438.86 it/sec) -training >> step=7655200, episode=1276 reward=0.8128469 (519.95 it/sec) -training >> step=7655300, episode=1277 reward=0.8040909 (115.72 it/sec) -training >> step=7655400, episode=1277 reward=0.7790304 (334.42 it/sec) -training >> step=7655500, episode=1277 reward=0.7951183 (469.59 it/sec) -training >> step=7655600, episode=1277 reward=0.7818215 (490.62 it/sec) -training >> step=7655700, episode=1277 reward=0.8051999 (466.27 it/sec) -training >> step=7655800, episode=1277 reward=0.7895577 (486.19 it/sec) -training >> step=7655900, episode=1277 reward=0.7900172 (518.49 it/sec) -training >> step=7656000, episode=1277 reward=0.7882995 (527.07 it/sec) -training >> step=7656100, episode=1277 reward=0.7927544 (494.32 it/sec) -training >> step=7656200, episode=1277 reward=0.7941499 (469.19 it/sec) -training >> step=7656300, episode=1277 reward=0.7930903 (502.21 it/sec) -training >> step=7656400, episode=1277 reward=0.7721097 (467.37 it/sec) -training >> step=7656500, episode=1277 reward=0.805023 (488.11 it/sec) -training >> step=7656600, episode=1277 reward=0.7760336 (514.76 it/sec) -training >> step=7656700, episode=1277 reward=0.7703068 (471.78 it/sec) -training >> step=7656800, episode=1277 reward=0.7819757 (506.85 it/sec) -training >> step=7656900, episode=1277 reward=0.7734014 (500.22 it/sec) -training >> step=7657000, episode=1277 reward=0.8002114 (497.68 it/sec) -training >> step=7657100, episode=1277 reward=0.8026946 (458.72 it/sec) -training >> step=7657200, episode=1277 reward=0.7744383 (472.05 it/sec) -training >> step=7657300, episode=1277 reward=0.789836 (513.34 it/sec) -training >> step=7657400, episode=1277 reward=0.7919686 (483.86 it/sec) -training >> step=7657500, episode=1277 reward=0.7690107 (458.09 it/sec) -training >> step=7657600, episode=1277 reward=0.7829211 (496.89 it/sec) -training >> step=7657700, episode=1277 reward=0.82192 (498.12 it/sec) -training >> step=7657800, episode=1277 reward=0.7803681 (484.38 it/sec) -training >> step=7657900, episode=1277 reward=0.8077936 (490.82 it/sec) -training >> step=7658000, episode=1277 reward=0.7778358 (500.63 it/sec) -training >> step=7658100, episode=1277 reward=0.8007604 (468.06 it/sec) -training >> step=7658200, episode=1277 reward=0.7907717 (495.28 it/sec) -training >> step=7658300, episode=1277 reward=0.7683254 (482.43 it/sec) -training >> step=7658400, episode=1277 reward=0.7987835 (413.25 it/sec) -training >> step=7658500, episode=1277 reward=0.7818703 (462.02 it/sec) -training >> step=7658600, episode=1277 reward=0.7703306 (490.12 it/sec) -training >> step=7658700, episode=1277 reward=0.7701394 (462.82 it/sec) -training >> step=7658800, episode=1277 reward=0.8008013 (500.31 it/sec) -training >> step=7658900, episode=1277 reward=0.7937758 (511.46 it/sec) -training >> step=7659000, episode=1277 reward=0.7922403 (471.23 it/sec) -training >> step=7659100, episode=1277 reward=0.7895558 (502.76 it/sec) -training >> step=7659200, episode=1277 reward=0.7757266 (456.01 it/sec) -training >> step=7659300, episode=1277 reward=0.8141984 (493.56 it/sec) -training >> step=7659400, episode=1277 reward=0.7916231 (502.73 it/sec) -training >> step=7659500, episode=1277 reward=0.7799116 (512.10 it/sec) -training >> step=7659600, episode=1277 reward=0.8008503 (445.38 it/sec) -training >> step=7659700, episode=1277 reward=0.7813779 (495.98 it/sec) -training >> step=7659800, episode=1277 reward=0.7833483 (558.93 it/sec) -training >> step=7659900, episode=1277 reward=0.8013946 (511.75 it/sec) -training >> step=7660000, episode=1277 reward=0.8049219 (473.76 it/sec) -training >> step=7660100, episode=1277 reward=0.7927922 (462.19 it/sec) -training >> step=7660200, episode=1277 reward=0.803651 (507.24 it/sec) -training >> step=7660300, episode=1277 reward=0.8056105 (490.37 it/sec) -training >> step=7660400, episode=1277 reward=0.7875472 (506.41 it/sec) -training >> step=7660500, episode=1277 reward=0.7827742 (462.76 it/sec) -training >> step=7660600, episode=1277 reward=0.8006196 (481.25 it/sec) -training >> step=7660700, episode=1277 reward=0.7827395 (500.84 it/sec) -training >> step=7660800, episode=1277 reward=0.792008 (507.30 it/sec) -training >> step=7660900, episode=1277 reward=0.7705823 (533.92 it/sec) -training >> step=7661000, episode=1277 reward=0.7687301 (519.29 it/sec) -training >> step=7661100, episode=1277 reward=0.7741085 (443.67 it/sec) -training >> step=7661200, episode=1277 reward=0.8120872 (459.99 it/sec) -training >> step=7661300, episode=1278 reward=0.7913628 (120.47 it/sec) -training >> step=7661400, episode=1278 reward=0.8056929 (474.55 it/sec) -training >> step=7661500, episode=1278 reward=0.7837568 (488.96 it/sec) -training >> step=7661600, episode=1278 reward=0.7963877 (472.73 it/sec) -training >> step=7661700, episode=1278 reward=0.7903243 (517.11 it/sec) -training >> step=7661800, episode=1278 reward=0.7963032 (473.28 it/sec) -training >> step=7661900, episode=1278 reward=0.7530244 (481.90 it/sec) -training >> step=7662000, episode=1278 reward=0.8088416 (520.87 it/sec) -training >> step=7662100, episode=1278 reward=0.7953114 (464.14 it/sec) -training >> step=7662200, episode=1278 reward=0.781545 (443.73 it/sec) -training >> step=7662300, episode=1278 reward=0.7985803 (488.47 it/sec) -training >> step=7662400, episode=1278 reward=0.802165 (466.96 it/sec) -training >> step=7662500, episode=1278 reward=0.7896419 (485.67 it/sec) -training >> step=7662600, episode=1278 reward=0.7771462 (490.15 it/sec) -training >> step=7662700, episode=1278 reward=0.803875 (493.80 it/sec) -training >> step=7662800, episode=1278 reward=0.788116 (377.93 it/sec) -training >> step=7662900, episode=1278 reward=0.7984862 (495.60 it/sec) -training >> step=7663000, episode=1278 reward=0.7964162 (495.91 it/sec) -training >> step=7663100, episode=1278 reward=0.7878982 (510.01 it/sec) -training >> step=7663200, episode=1278 reward=0.7764899 (455.74 it/sec) -training >> step=7663300, episode=1278 reward=0.7951289 (472.94 it/sec) -training >> step=7663400, episode=1278 reward=0.7749922 (437.92 it/sec) -training >> step=7663500, episode=1278 reward=0.8025025 (494.39 it/sec) -training >> step=7663600, episode=1278 reward=0.7862971 (490.14 it/sec) -training >> step=7663700, episode=1278 reward=0.7715817 (474.56 it/sec) -training >> step=7663800, episode=1278 reward=0.8129573 (500.61 it/sec) -training >> step=7663900, episode=1278 reward=0.7848446 (502.40 it/sec) -training >> step=7664000, episode=1278 reward=0.7677563 (504.32 it/sec) -training >> step=7664100, episode=1278 reward=0.7875643 (498.93 it/sec) -training >> step=7664200, episode=1278 reward=0.7880458 (502.28 it/sec) -training >> step=7664300, episode=1278 reward=0.7971149 (472.34 it/sec) -training >> step=7664400, episode=1278 reward=0.7797757 (433.59 it/sec) -training >> step=7664500, episode=1278 reward=0.7859908 (394.88 it/sec) -training >> step=7664600, episode=1278 reward=0.8032511 (526.15 it/sec) -training >> step=7664700, episode=1278 reward=0.7826625 (468.37 it/sec) -training >> step=7664800, episode=1278 reward=0.7602364 (496.45 it/sec) -training >> step=7664900, episode=1278 reward=0.789043 (515.67 it/sec) -training >> step=7665000, episode=1278 reward=0.8007476 (504.20 it/sec) -training >> step=7665100, episode=1278 reward=0.8006749 (494.07 it/sec) -training >> step=7665200, episode=1278 reward=0.8070164 (472.05 it/sec) -training >> step=7665300, episode=1278 reward=0.8074831 (501.12 it/sec) -training >> step=7665400, episode=1278 reward=0.7898318 (497.23 it/sec) -training >> step=7665500, episode=1278 reward=0.7869617 (477.19 it/sec) -training >> step=7665600, episode=1278 reward=0.7859017 (532.00 it/sec) -training >> step=7665700, episode=1278 reward=0.7880996 (430.87 it/sec) -training >> step=7665800, episode=1278 reward=0.7751366 (460.71 it/sec) -training >> step=7665900, episode=1278 reward=0.7891248 (504.12 it/sec) -training >> step=7666000, episode=1278 reward=0.7779848 (517.21 it/sec) -training >> step=7666100, episode=1278 reward=0.7730749 (479.22 it/sec) -training >> step=7666200, episode=1278 reward=0.7961833 (467.40 it/sec) -training >> step=7666300, episode=1278 reward=0.7784476 (500.91 it/sec) -training >> step=7666400, episode=1278 reward=0.767678 (524.63 it/sec) -training >> step=7666500, episode=1278 reward=0.7961372 (482.00 it/sec) -training >> step=7666600, episode=1278 reward=0.7855759 (479.35 it/sec) -training >> step=7666700, episode=1278 reward=0.7627944 (452.44 it/sec) -training >> step=7666800, episode=1278 reward=0.7960418 (472.03 it/sec) -training >> step=7666900, episode=1278 reward=0.7826076 (476.59 it/sec) -training >> step=7667000, episode=1278 reward=0.7924601 (472.88 it/sec) -training >> step=7667100, episode=1278 reward=0.7995731 (518.98 it/sec) -training >> step=7667200, episode=1278 reward=0.7843256 (469.27 it/sec) -training >> step=7667300, episode=1279 reward=0.7995978 (120.02 it/sec) -training >> step=7667400, episode=1279 reward=0.785655 (444.07 it/sec) -training >> step=7667500, episode=1279 reward=0.7848832 (460.43 it/sec) -training >> step=7667600, episode=1279 reward=0.7958702 (494.46 it/sec) -training >> step=7667700, episode=1279 reward=0.7754961 (457.74 it/sec) -training >> step=7667800, episode=1279 reward=0.782325 (496.46 it/sec) -training >> step=7667900, episode=1279 reward=0.7936076 (475.75 it/sec) -training >> step=7668000, episode=1279 reward=0.794873 (468.66 it/sec) -training >> step=7668100, episode=1279 reward=0.7699968 (507.39 it/sec) -training >> step=7668200, episode=1279 reward=0.7932077 (490.76 it/sec) -training >> step=7668300, episode=1279 reward=0.7786338 (500.51 it/sec) -training >> step=7668400, episode=1279 reward=0.8000774 (483.07 it/sec) -training >> step=7668500, episode=1279 reward=0.7999054 (465.66 it/sec) -training >> step=7668600, episode=1279 reward=0.7936229 (478.31 it/sec) -training >> step=7668700, episode=1279 reward=0.7826154 (495.25 it/sec) -training >> step=7668800, episode=1279 reward=0.792671 (477.55 it/sec) -training >> step=7668900, episode=1279 reward=0.8054181 (462.42 it/sec) -training >> step=7669000, episode=1279 reward=0.7889192 (456.08 it/sec) -training >> step=7669100, episode=1279 reward=0.7895647 (506.76 it/sec) -training >> step=7669200, episode=1279 reward=0.7826123 (409.14 it/sec) -training >> step=7669300, episode=1279 reward=0.7832326 (503.83 it/sec) -training >> step=7669400, episode=1279 reward=0.7977951 (462.92 it/sec) -training >> step=7669500, episode=1279 reward=0.7951524 (498.72 it/sec) -training >> step=7669600, episode=1279 reward=0.7918475 (476.78 it/sec) -training >> step=7669700, episode=1279 reward=0.7788066 (472.82 it/sec) -training >> step=7669800, episode=1279 reward=0.7998737 (465.75 it/sec) -training >> step=7669900, episode=1279 reward=0.7895883 (452.56 it/sec) -training >> step=7670000, episode=1279 reward=0.7769876 (497.01 it/sec) -training >> step=7670100, episode=1279 reward=0.7798899 (509.44 it/sec) -training >> step=7670200, episode=1279 reward=0.7677695 (500.66 it/sec) -training >> step=7670300, episode=1279 reward=0.7879508 (446.73 it/sec) -training >> step=7670400, episode=1279 reward=0.8008133 (416.99 it/sec) -training >> step=7670500, episode=1279 reward=0.7977299 (463.20 it/sec) -training >> step=7670600, episode=1279 reward=0.8025877 (502.60 it/sec) -training >> step=7670700, episode=1279 reward=0.7897205 (471.89 it/sec) -training >> step=7670800, episode=1279 reward=0.7901566 (521.18 it/sec) -training >> step=7670900, episode=1279 reward=0.7875734 (469.30 it/sec) -training >> step=7671000, episode=1279 reward=0.788801 (511.96 it/sec) -training >> step=7671100, episode=1279 reward=0.776993 (392.85 it/sec) -training >> step=7671200, episode=1279 reward=0.8077931 (503.56 it/sec) -training >> step=7671300, episode=1279 reward=0.7938082 (469.69 it/sec) -training >> step=7671400, episode=1279 reward=0.7746434 (467.85 it/sec) -training >> step=7671500, episode=1279 reward=0.7668577 (531.78 it/sec) -training >> step=7671600, episode=1279 reward=0.7898939 (476.97 it/sec) -training >> step=7671700, episode=1279 reward=0.8047979 (471.10 it/sec) -training >> step=7671800, episode=1279 reward=0.7946123 (484.58 it/sec) -training >> step=7671900, episode=1279 reward=0.7908735 (474.79 it/sec) -training >> step=7672000, episode=1279 reward=0.7823184 (481.08 it/sec) -training >> step=7672100, episode=1279 reward=0.7794243 (505.80 it/sec) -training >> step=7672200, episode=1279 reward=0.7892421 (473.63 it/sec) -training >> step=7672300, episode=1279 reward=0.7738628 (486.99 it/sec) -training >> step=7672400, episode=1279 reward=0.8011367 (484.06 it/sec) -training >> step=7672500, episode=1279 reward=0.773455 (479.17 it/sec) -training >> step=7672600, episode=1279 reward=0.7942597 (523.59 it/sec) -training >> step=7672700, episode=1279 reward=0.8031251 (510.76 it/sec) -training >> step=7672800, episode=1279 reward=0.7890572 (490.15 it/sec) -training >> step=7672900, episode=1279 reward=0.7800638 (513.76 it/sec) -training >> step=7673000, episode=1279 reward=0.7750794 (520.22 it/sec) -training >> step=7673100, episode=1279 reward=0.764202 (470.44 it/sec) -training >> step=7673200, episode=1279 reward=0.7850935 (456.60 it/sec) -training >> step=7673300, episode=1280 reward=0.7975435 (135.85 it/sec) -training >> step=7673400, episode=1280 reward=0.7975768 (501.84 it/sec) -training >> step=7673500, episode=1280 reward=0.8058645 (479.51 it/sec) -training >> step=7673600, episode=1280 reward=0.7989699 (517.86 it/sec) -training >> step=7673700, episode=1280 reward=0.7875803 (498.60 it/sec) -training >> step=7673800, episode=1280 reward=0.7846773 (493.60 it/sec) -training >> step=7673900, episode=1280 reward=0.7756019 (501.05 it/sec) -training >> step=7674000, episode=1280 reward=0.7774411 (520.27 it/sec) -training >> step=7674100, episode=1280 reward=0.7849849 (492.26 it/sec) -training >> step=7674200, episode=1280 reward=0.8052661 (479.30 it/sec) -training >> step=7674300, episode=1280 reward=0.8207158 (502.03 it/sec) -training >> step=7674400, episode=1280 reward=0.7754991 (515.59 it/sec) -training >> step=7674500, episode=1280 reward=0.7995507 (504.32 it/sec) -training >> step=7674600, episode=1280 reward=0.7922109 (500.50 it/sec) -training >> step=7674700, episode=1280 reward=0.7685207 (505.12 it/sec) -training >> step=7674800, episode=1280 reward=0.7804868 (503.61 it/sec) -training >> step=7674900, episode=1280 reward=0.7766467 (496.22 it/sec) -training >> step=7675000, episode=1280 reward=0.8025167 (463.96 it/sec) -training >> step=7675100, episode=1280 reward=0.7884663 (498.45 it/sec) -training >> step=7675200, episode=1280 reward=0.7988096 (505.68 it/sec) -training >> step=7675300, episode=1280 reward=0.7813346 (497.11 it/sec) -training >> step=7675400, episode=1280 reward=0.8036811 (476.51 it/sec) -training >> step=7675500, episode=1280 reward=0.804213 (506.04 it/sec) -training >> step=7675600, episode=1280 reward=0.7901571 (484.79 it/sec) -training >> step=7675700, episode=1280 reward=0.7870547 (510.73 it/sec) -training >> step=7675800, episode=1280 reward=0.7850444 (491.43 it/sec) -training >> step=7675900, episode=1280 reward=0.7909043 (489.71 it/sec) -training >> step=7676000, episode=1280 reward=0.7958641 (476.00 it/sec) -training >> step=7676100, episode=1280 reward=0.8114535 (493.64 it/sec) -training >> step=7676200, episode=1280 reward=0.8010798 (535.38 it/sec) -training >> step=7676300, episode=1280 reward=0.8062174 (487.87 it/sec) -training >> step=7676400, episode=1280 reward=0.7899064 (430.13 it/sec) -training >> step=7676500, episode=1280 reward=0.7859144 (516.66 it/sec) -training >> step=7676600, episode=1280 reward=0.7959712 (397.05 it/sec) -training >> step=7676700, episode=1280 reward=0.7993912 (498.06 it/sec) -training >> step=7676800, episode=1280 reward=0.7735752 (488.34 it/sec) -training >> step=7676900, episode=1280 reward=0.7970882 (511.82 it/sec) -training >> step=7677000, episode=1280 reward=0.7896993 (431.54 it/sec) -training >> step=7677100, episode=1280 reward=0.7906299 (480.98 it/sec) -training >> step=7677200, episode=1280 reward=0.8002402 (516.44 it/sec) -training >> step=7677300, episode=1280 reward=0.7675806 (474.86 it/sec) -training >> step=7677400, episode=1280 reward=0.7876919 (495.76 it/sec) -training >> step=7677500, episode=1280 reward=0.780206 (475.75 it/sec) -training >> step=7677600, episode=1280 reward=0.7878413 (488.98 it/sec) -training >> step=7677700, episode=1280 reward=0.7771756 (477.55 it/sec) -training >> step=7677800, episode=1280 reward=0.7762993 (440.13 it/sec) -training >> step=7677900, episode=1280 reward=0.7776223 (490.89 it/sec) -training >> step=7678000, episode=1280 reward=0.7813056 (535.98 it/sec) -training >> step=7678100, episode=1280 reward=0.7784122 (508.48 it/sec) -training >> step=7678200, episode=1280 reward=0.7985463 (452.50 it/sec) -training >> step=7678300, episode=1280 reward=0.7983471 (515.83 it/sec) -training >> step=7678400, episode=1280 reward=0.8128558 (511.93 it/sec) -training >> step=7678500, episode=1280 reward=0.7905385 (496.02 it/sec) -training >> step=7678600, episode=1280 reward=0.779097 (493.79 it/sec) -training >> step=7678700, episode=1280 reward=0.7757559 (494.63 it/sec) -training >> step=7678800, episode=1280 reward=0.7659618 (473.78 it/sec) -training >> step=7678900, episode=1280 reward=0.7883065 (461.74 it/sec) -training >> step=7679000, episode=1280 reward=0.7908345 (508.75 it/sec) -training >> step=7679100, episode=1280 reward=0.7741445 (536.08 it/sec) -training >> step=7679200, episode=1280 reward=0.7762423 (505.79 it/sec) -training >> step=7679300, episode=1281 reward=0.7948501 (115.49 it/sec) -training >> step=7679400, episode=1281 reward=0.7888799 (518.04 it/sec) -training >> step=7679500, episode=1281 reward=0.7890249 (498.44 it/sec) -training >> step=7679600, episode=1281 reward=0.7887383 (485.64 it/sec) -training >> step=7679700, episode=1281 reward=0.7697189 (487.00 it/sec) -training >> step=7679800, episode=1281 reward=0.7889593 (516.95 it/sec) -training >> step=7679900, episode=1281 reward=0.7774578 (463.52 it/sec) -training >> step=7680000, episode=1281 reward=0.7725637 (468.84 it/sec) -training >> step=7680100, episode=1281 reward=0.7734962 (499.80 it/sec) -training >> step=7680200, episode=1281 reward=0.7876025 (525.70 it/sec) -training >> step=7680300, episode=1281 reward=0.7755219 (476.37 it/sec) -training >> step=7680400, episode=1281 reward=0.7995155 (477.13 it/sec) -training >> step=7680500, episode=1281 reward=0.7827917 (479.40 it/sec) -training >> step=7680600, episode=1281 reward=0.7956119 (499.22 it/sec) -training >> step=7680700, episode=1281 reward=0.7787043 (459.83 it/sec) -training >> step=7680800, episode=1281 reward=0.7771344 (458.16 it/sec) -training >> step=7680900, episode=1281 reward=0.8042024 (518.79 it/sec) -training >> step=7681000, episode=1281 reward=0.7616295 (464.38 it/sec) -training >> step=7681100, episode=1281 reward=0.7823427 (483.32 it/sec) -training >> step=7681200, episode=1281 reward=0.7934351 (526.91 it/sec) -training >> step=7681300, episode=1281 reward=0.8015847 (483.61 it/sec) -training >> step=7681400, episode=1281 reward=0.7919704 (487.32 it/sec) -training >> step=7681500, episode=1281 reward=0.7930393 (484.10 it/sec) -training >> step=7681600, episode=1281 reward=0.7905299 (532.01 it/sec) -training >> step=7681700, episode=1281 reward=0.7843282 (453.81 it/sec) -training >> step=7681800, episode=1281 reward=0.8011089 (491.40 it/sec) -training >> step=7681900, episode=1281 reward=0.7911813 (494.80 it/sec) -training >> step=7682000, episode=1281 reward=0.7919327 (520.62 it/sec) -training >> step=7682100, episode=1281 reward=0.7953702 (444.79 it/sec) -training >> step=7682200, episode=1281 reward=0.7792274 (496.32 it/sec) -training >> step=7682300, episode=1281 reward=0.7941642 (493.61 it/sec) -training >> step=7682400, episode=1281 reward=0.7950919 (432.46 it/sec) -training >> step=7682500, episode=1281 reward=0.7844982 (487.99 it/sec) -training >> step=7682600, episode=1281 reward=0.8104966 (456.89 it/sec) -training >> step=7682700, episode=1281 reward=0.7784319 (377.67 it/sec) -training >> step=7682800, episode=1281 reward=0.7963207 (463.74 it/sec) -training >> step=7682900, episode=1281 reward=0.7844291 (492.64 it/sec) -training >> step=7683000, episode=1281 reward=0.8003199 (471.97 it/sec) -training >> step=7683100, episode=1281 reward=0.7896472 (503.23 it/sec) -training >> step=7683200, episode=1281 reward=0.7878327 (499.98 it/sec) -training >> step=7683300, episode=1281 reward=0.7850465 (481.53 it/sec) -training >> step=7683400, episode=1281 reward=0.8077927 (491.64 it/sec) -training >> step=7683500, episode=1281 reward=0.7873925 (430.37 it/sec) -training >> step=7683600, episode=1281 reward=0.8019884 (484.03 it/sec) -training >> step=7683700, episode=1281 reward=0.7824323 (493.24 it/sec) -training >> step=7683800, episode=1281 reward=0.7964883 (497.43 it/sec) -training >> step=7683900, episode=1281 reward=0.7900406 (467.45 it/sec) -training >> step=7684000, episode=1281 reward=0.780031 (482.40 it/sec) -training >> step=7684100, episode=1281 reward=0.7813872 (496.10 it/sec) -training >> step=7684200, episode=1281 reward=0.7888861 (478.36 it/sec) -training >> step=7684300, episode=1281 reward=0.7824346 (457.11 it/sec) -training >> step=7684400, episode=1281 reward=0.77673 (480.39 it/sec) -training >> step=7684500, episode=1281 reward=0.7719459 (516.93 it/sec) -training >> step=7684600, episode=1281 reward=0.7891818 (474.39 it/sec) -training >> step=7684700, episode=1281 reward=0.7925679 (487.96 it/sec) -training >> step=7684800, episode=1281 reward=0.7806851 (502.88 it/sec) -training >> step=7684900, episode=1281 reward=0.7787542 (484.58 it/sec) -training >> step=7685000, episode=1281 reward=0.787966 (446.17 it/sec) -training >> step=7685100, episode=1281 reward=0.7896109 (496.89 it/sec) -training >> step=7685200, episode=1281 reward=0.7779716 (452.04 it/sec) -training >> step=7685300, episode=1282 reward=0.794459 (128.45 it/sec) -training >> step=7685400, episode=1282 reward=0.7806812 (489.85 it/sec) -training >> step=7685500, episode=1282 reward=0.7766321 (498.19 it/sec) -training >> step=7685600, episode=1282 reward=0.7813261 (436.73 it/sec) -training >> step=7685700, episode=1282 reward=0.8003948 (491.67 it/sec) -training >> step=7685800, episode=1282 reward=0.7858511 (509.03 it/sec) -training >> step=7685900, episode=1282 reward=0.8133572 (510.69 it/sec) -training >> step=7686000, episode=1282 reward=0.7807008 (479.50 it/sec) -training >> step=7686100, episode=1282 reward=0.7966237 (503.92 it/sec) -training >> step=7686200, episode=1282 reward=0.8007616 (478.87 it/sec) -training >> step=7686300, episode=1282 reward=0.7876061 (480.55 it/sec) -training >> step=7686400, episode=1282 reward=0.7989719 (506.69 it/sec) -training >> step=7686500, episode=1282 reward=0.7730178 (498.23 it/sec) -training >> step=7686600, episode=1282 reward=0.775382 (494.57 it/sec) -training >> step=7686700, episode=1282 reward=0.7980058 (472.57 it/sec) -training >> step=7686800, episode=1282 reward=0.7762242 (480.21 it/sec) -training >> step=7686900, episode=1282 reward=0.7823076 (438.82 it/sec) -training >> step=7687000, episode=1282 reward=0.792605 (496.47 it/sec) -training >> step=7687100, episode=1282 reward=0.7785826 (437.97 it/sec) -training >> step=7687200, episode=1282 reward=0.7877658 (458.01 it/sec) -training >> step=7687300, episode=1282 reward=0.7884405 (470.37 it/sec) -training >> step=7687400, episode=1282 reward=0.7941963 (531.37 it/sec) -training >> step=7687500, episode=1282 reward=0.7809137 (467.93 it/sec) -training >> step=7687600, episode=1282 reward=0.7692428 (504.68 it/sec) -training >> step=7687700, episode=1282 reward=0.8047038 (474.24 it/sec) -training >> step=7687800, episode=1282 reward=0.8051012 (532.44 it/sec) -training >> step=7687900, episode=1282 reward=0.8031689 (517.43 it/sec) -training >> step=7688000, episode=1282 reward=0.8031165 (440.39 it/sec) -training >> step=7688100, episode=1282 reward=0.7914191 (500.17 it/sec) -training >> step=7688200, episode=1282 reward=0.7895436 (500.81 it/sec) -training >> step=7688300, episode=1282 reward=0.7848755 (496.33 it/sec) -training >> step=7688400, episode=1282 reward=0.7885579 (457.73 it/sec) -training >> step=7688500, episode=1282 reward=0.7926042 (506.76 it/sec) -training >> step=7688600, episode=1282 reward=0.7985573 (463.84 it/sec) -training >> step=7688700, episode=1282 reward=0.8084889 (512.97 it/sec) -training >> step=7688800, episode=1282 reward=0.7760655 (482.17 it/sec) -training >> step=7688900, episode=1282 reward=0.8041487 (540.24 it/sec) -training >> step=7689000, episode=1282 reward=0.798601 (357.54 it/sec) -training >> step=7689100, episode=1282 reward=0.7980807 (480.94 it/sec) -training >> step=7689200, episode=1282 reward=0.7825667 (521.90 it/sec) -training >> step=7689300, episode=1282 reward=0.7920668 (499.80 it/sec) -training >> step=7689400, episode=1282 reward=0.7876179 (510.93 it/sec) -training >> step=7689500, episode=1282 reward=0.7871319 (486.40 it/sec) -training >> step=7689600, episode=1282 reward=0.7880849 (521.66 it/sec) -training >> step=7689700, episode=1282 reward=0.8015562 (486.19 it/sec) -training >> step=7689800, episode=1282 reward=0.7895231 (416.21 it/sec) -training >> step=7689900, episode=1282 reward=0.8014756 (498.66 it/sec) -training >> step=7690000, episode=1282 reward=0.7906363 (532.64 it/sec) -training >> step=7690100, episode=1282 reward=0.7791721 (515.54 it/sec) -training >> step=7690200, episode=1282 reward=0.7767872 (491.53 it/sec) -training >> step=7690300, episode=1282 reward=0.7651052 (473.93 it/sec) -training >> step=7690400, episode=1282 reward=0.7922897 (463.97 it/sec) -training >> step=7690500, episode=1282 reward=0.7988204 (476.36 it/sec) -training >> step=7690600, episode=1282 reward=0.7972857 (470.00 it/sec) -training >> step=7690700, episode=1282 reward=0.792981 (523.33 it/sec) -training >> step=7690800, episode=1282 reward=0.7871447 (465.67 it/sec) -training >> step=7690900, episode=1282 reward=0.8030877 (515.43 it/sec) -training >> step=7691000, episode=1282 reward=0.7900741 (497.29 it/sec) -training >> step=7691100, episode=1282 reward=0.7934477 (509.43 it/sec) -training >> step=7691200, episode=1282 reward=0.770116 (482.54 it/sec) -training >> step=7691300, episode=1283 reward=0.7570922 (119.72 it/sec) -training >> step=7691400, episode=1283 reward=0.7705877 (467.59 it/sec) -training >> step=7691500, episode=1283 reward=0.7883279 (501.23 it/sec) -training >> step=7691600, episode=1283 reward=0.7837102 (476.27 it/sec) -training >> step=7691700, episode=1283 reward=0.8077295 (483.23 it/sec) -training >> step=7691800, episode=1283 reward=0.7717476 (512.61 it/sec) -training >> step=7691900, episode=1283 reward=0.7924584 (499.37 it/sec) -training >> step=7692000, episode=1283 reward=0.7893365 (481.36 it/sec) -training >> step=7692100, episode=1283 reward=0.7886616 (511.22 it/sec) -training >> step=7692200, episode=1283 reward=0.791441 (470.53 it/sec) -training >> step=7692300, episode=1283 reward=0.8100332 (498.26 it/sec) -training >> step=7692400, episode=1283 reward=0.7907706 (488.57 it/sec) -training >> step=7692500, episode=1283 reward=0.7895784 (524.79 it/sec) -training >> step=7692600, episode=1283 reward=0.7875677 (492.73 it/sec) -training >> step=7692700, episode=1283 reward=0.7823024 (501.82 it/sec) -training >> step=7692800, episode=1283 reward=0.8030754 (499.81 it/sec) -training >> step=7692900, episode=1283 reward=0.799968 (483.31 it/sec) -training >> step=7693000, episode=1283 reward=0.7943 (470.95 it/sec) -training >> step=7693100, episode=1283 reward=0.797763 (492.94 it/sec) -training >> step=7693200, episode=1283 reward=0.7897393 (494.10 it/sec) -training >> step=7693300, episode=1283 reward=0.8021921 (498.91 it/sec) -training >> step=7693400, episode=1283 reward=0.7799864 (482.02 it/sec) -training >> step=7693500, episode=1283 reward=0.810699 (496.70 it/sec) -training >> step=7693600, episode=1283 reward=0.7981277 (525.11 it/sec) -training >> step=7693700, episode=1283 reward=0.7951378 (490.46 it/sec) -training >> step=7693800, episode=1283 reward=0.7812595 (458.90 it/sec) -training >> step=7693900, episode=1283 reward=0.7851881 (491.14 it/sec) -training >> step=7694000, episode=1283 reward=0.8004294 (464.66 it/sec) -training >> step=7694100, episode=1283 reward=0.7777857 (497.80 it/sec) -training >> step=7694200, episode=1283 reward=0.7963591 (436.40 it/sec) -training >> step=7694300, episode=1283 reward=0.7844002 (463.47 it/sec) -training >> step=7694400, episode=1283 reward=0.8017909 (443.68 it/sec) -training >> step=7694500, episode=1283 reward=0.7849461 (485.52 it/sec) -training >> step=7694600, episode=1283 reward=0.7854218 (485.69 it/sec) -training >> step=7694700, episode=1283 reward=0.8196182 (499.55 it/sec) -training >> step=7694800, episode=1283 reward=0.8040035 (499.47 it/sec) -training >> step=7694900, episode=1283 reward=0.7923458 (492.65 it/sec) -training >> step=7695000, episode=1283 reward=0.7801479 (508.94 it/sec) -training >> step=7695100, episode=1283 reward=0.7908317 (486.57 it/sec) -training >> step=7695200, episode=1283 reward=0.780504 (498.33 it/sec) -training >> step=7695300, episode=1283 reward=0.7717607 (364.25 it/sec) -training >> step=7695400, episode=1283 reward=0.7937211 (478.88 it/sec) -training >> step=7695500, episode=1283 reward=0.7927172 (495.05 it/sec) -training >> step=7695600, episode=1283 reward=0.7922699 (481.85 it/sec) -training >> step=7695700, episode=1283 reward=0.7982993 (500.30 it/sec) -training >> step=7695800, episode=1283 reward=0.7926148 (519.98 it/sec) -training >> step=7695900, episode=1283 reward=0.781288 (494.54 it/sec) -training >> step=7696000, episode=1283 reward=0.7946708 (511.39 it/sec) -training >> step=7696100, episode=1283 reward=0.811353 (519.90 it/sec) -training >> step=7696200, episode=1283 reward=0.7780762 (465.96 it/sec) -training >> step=7696300, episode=1283 reward=0.7717874 (491.44 it/sec) -training >> step=7696400, episode=1283 reward=0.7694131 (464.52 it/sec) -training >> step=7696500, episode=1283 reward=0.794523 (520.14 it/sec) -training >> step=7696600, episode=1283 reward=0.8031509 (486.44 it/sec) -training >> step=7696700, episode=1283 reward=0.7864068 (512.27 it/sec) -training >> step=7696800, episode=1283 reward=0.793444 (514.04 it/sec) -training >> step=7696900, episode=1283 reward=0.7981615 (505.30 it/sec) -training >> step=7697000, episode=1283 reward=0.7734685 (522.69 it/sec) -training >> step=7697100, episode=1283 reward=0.7800832 (490.41 it/sec) -training >> step=7697200, episode=1283 reward=0.783857 (517.03 it/sec) -training >> step=7697300, episode=1284 reward=0.8019704 (117.21 it/sec) -training >> step=7697400, episode=1284 reward=0.7830878 (494.66 it/sec) -training >> step=7697500, episode=1284 reward=0.7862259 (493.56 it/sec) -training >> step=7697600, episode=1284 reward=0.7921821 (514.78 it/sec) -training >> step=7697700, episode=1284 reward=0.8024523 (484.00 it/sec) -training >> step=7697800, episode=1284 reward=0.797309 (465.92 it/sec) -training >> step=7697900, episode=1284 reward=0.7866625 (508.80 it/sec) -training >> step=7698000, episode=1284 reward=0.7823184 (492.67 it/sec) -training >> step=7698100, episode=1284 reward=0.78298 (477.81 it/sec) -training >> step=7698200, episode=1284 reward=0.785805 (453.66 it/sec) -training >> step=7698300, episode=1284 reward=0.7980449 (519.79 it/sec) -training >> step=7698400, episode=1284 reward=0.7961246 (465.46 it/sec) -training >> step=7698500, episode=1284 reward=0.7867457 (497.75 it/sec) -training >> step=7698600, episode=1284 reward=0.7792592 (477.06 it/sec) -training >> step=7698700, episode=1284 reward=0.7818933 (513.62 it/sec) -training >> step=7698800, episode=1284 reward=0.7980247 (513.39 it/sec) -training >> step=7698900, episode=1284 reward=0.7942595 (490.72 it/sec) -training >> step=7699000, episode=1284 reward=0.7936284 (470.45 it/sec) -training >> step=7699100, episode=1284 reward=0.8020141 (448.17 it/sec) -training >> step=7699200, episode=1284 reward=0.7931241 (510.83 it/sec) -training >> step=7699300, episode=1284 reward=0.7860807 (503.82 it/sec) -training >> step=7699400, episode=1284 reward=0.798377 (522.07 it/sec) -training >> step=7699500, episode=1284 reward=0.7877505 (494.14 it/sec) -training >> step=7699600, episode=1284 reward=0.7947639 (481.07 it/sec) -training >> step=7699700, episode=1284 reward=0.7942129 (518.45 it/sec) -training >> step=7699800, episode=1284 reward=0.7702212 (506.05 it/sec) -training >> step=7699900, episode=1284 reward=0.7985724 (491.98 it/sec) -training >> step=7700000, episode=1284 reward=0.7876638 (509.92 it/sec) -training >> step=7700100, episode=1284 reward=0.8013252 (467.72 it/sec) -training >> step=7700200, episode=1284 reward=0.7860766 (463.10 it/sec) -training >> step=7700300, episode=1284 reward=0.7961257 (462.73 it/sec) -training >> step=7700400, episode=1284 reward=0.8017328 (483.12 it/sec) -training >> step=7700500, episode=1284 reward=0.7895558 (496.25 it/sec) -training >> step=7700600, episode=1284 reward=0.7985247 (451.06 it/sec) -training >> step=7700700, episode=1284 reward=0.8005548 (511.32 it/sec) -training >> step=7700800, episode=1284 reward=0.7828287 (475.81 it/sec) -training >> step=7700900, episode=1284 reward=0.7879443 (489.77 it/sec) -training >> step=7701000, episode=1284 reward=0.7865493 (480.42 it/sec) -training >> step=7701100, episode=1284 reward=0.7606497 (503.19 it/sec) -training >> step=7701200, episode=1284 reward=0.7938648 (492.31 it/sec) -training >> step=7701300, episode=1284 reward=0.7860726 (494.37 it/sec) -training >> step=7701400, episode=1284 reward=0.7632387 (351.08 it/sec) -training >> step=7701500, episode=1284 reward=0.8071747 (503.24 it/sec) -training >> step=7701600, episode=1284 reward=0.7637283 (497.58 it/sec) -training >> step=7701700, episode=1284 reward=0.7853737 (517.91 it/sec) -training >> step=7701800, episode=1284 reward=0.8028343 (469.41 it/sec) -training >> step=7701900, episode=1284 reward=0.7793089 (520.24 it/sec) -training >> step=7702000, episode=1284 reward=0.7898166 (489.26 it/sec) -training >> step=7702100, episode=1284 reward=0.7931828 (512.86 it/sec) -training >> step=7702200, episode=1284 reward=0.777895 (487.01 it/sec) -training >> step=7702300, episode=1284 reward=0.7942107 (492.45 it/sec) -training >> step=7702400, episode=1284 reward=0.7745422 (476.06 it/sec) -training >> step=7702500, episode=1284 reward=0.77522 (467.94 it/sec) -training >> step=7702600, episode=1284 reward=0.7771387 (511.91 it/sec) -training >> step=7702700, episode=1284 reward=0.8220799 (501.84 it/sec) -training >> step=7702800, episode=1284 reward=0.7893205 (516.10 it/sec) -training >> step=7702900, episode=1284 reward=0.797592 (502.81 it/sec) -training >> step=7703000, episode=1284 reward=0.7805728 (531.85 it/sec) -training >> step=7703100, episode=1284 reward=0.7864946 (484.47 it/sec) -training >> step=7703200, episode=1284 reward=0.8149204 (505.72 it/sec) -training >> step=7703300, episode=1285 reward=0.7870931 (122.63 it/sec) -training >> step=7703400, episode=1285 reward=0.7688838 (445.42 it/sec) -training >> step=7703500, episode=1285 reward=0.7918143 (344.65 it/sec) -training >> step=7703600, episode=1285 reward=0.781209 (467.42 it/sec) -training >> step=7703700, episode=1285 reward=0.7895631 (500.51 it/sec) -training >> step=7703800, episode=1285 reward=0.7835471 (492.55 it/sec) -training >> step=7703900, episode=1285 reward=0.7828752 (500.33 it/sec) -training >> step=7704000, episode=1285 reward=0.8020215 (501.16 it/sec) -training >> step=7704100, episode=1285 reward=0.7847725 (508.53 it/sec) -training >> step=7704200, episode=1285 reward=0.8081443 (494.15 it/sec) -training >> step=7704300, episode=1285 reward=0.787401 (482.64 it/sec) -training >> step=7704400, episode=1285 reward=0.7963662 (489.94 it/sec) -training >> step=7704500, episode=1285 reward=0.7942981 (495.46 it/sec) -training >> step=7704600, episode=1285 reward=0.8016198 (466.55 it/sec) -training >> step=7704700, episode=1285 reward=0.7948112 (406.69 it/sec) -training >> step=7704800, episode=1285 reward=0.7781056 (501.99 it/sec) -training >> step=7704900, episode=1285 reward=0.791443 (466.93 it/sec) -training >> step=7705000, episode=1285 reward=0.7885419 (500.02 it/sec) -training >> step=7705100, episode=1285 reward=0.7838237 (493.04 it/sec) -training >> step=7705200, episode=1285 reward=0.7788011 (493.83 it/sec) -training >> step=7705300, episode=1285 reward=0.7978615 (495.69 it/sec) -training >> step=7705400, episode=1285 reward=0.7770285 (483.75 it/sec) -training >> step=7705500, episode=1285 reward=0.7813294 (499.00 it/sec) -training >> step=7705600, episode=1285 reward=0.8097259 (429.97 it/sec) -training >> step=7705700, episode=1285 reward=0.801424 (430.10 it/sec) -training >> step=7705800, episode=1285 reward=0.7863588 (488.69 it/sec) -training >> step=7705900, episode=1285 reward=0.7902864 (528.80 it/sec) -training >> step=7706000, episode=1285 reward=0.7866749 (437.71 it/sec) -training >> step=7706100, episode=1285 reward=0.7985888 (479.11 it/sec) -training >> step=7706200, episode=1285 reward=0.7882729 (469.64 it/sec) -training >> step=7706300, episode=1285 reward=0.8235032 (461.52 it/sec) -training >> step=7706400, episode=1285 reward=0.7924839 (452.84 it/sec) -training >> step=7706500, episode=1285 reward=0.7817987 (467.72 it/sec) -training >> step=7706600, episode=1285 reward=0.8091256 (516.64 it/sec) -training >> step=7706700, episode=1285 reward=0.7781824 (507.07 it/sec) -training >> step=7706800, episode=1285 reward=0.8068322 (456.76 it/sec) -training >> step=7706900, episode=1285 reward=0.7929864 (493.60 it/sec) -training >> step=7707000, episode=1285 reward=0.78098 (483.61 it/sec) -training >> step=7707100, episode=1285 reward=0.8010575 (478.77 it/sec) -training >> step=7707200, episode=1285 reward=0.8041115 (486.61 it/sec) -training >> step=7707300, episode=1285 reward=0.7988234 (515.85 it/sec) -training >> step=7707400, episode=1285 reward=0.7917069 (351.98 it/sec) -training >> step=7707500, episode=1285 reward=0.767791 (437.19 it/sec) -training >> step=7707600, episode=1285 reward=0.7893072 (455.73 it/sec) -training >> step=7707700, episode=1285 reward=0.7912663 (467.19 it/sec) -training >> step=7707800, episode=1285 reward=0.8025537 (468.49 it/sec) -training >> step=7707900, episode=1285 reward=0.7902269 (473.44 it/sec) -training >> step=7708000, episode=1285 reward=0.7786794 (476.24 it/sec) -training >> step=7708100, episode=1285 reward=0.7829413 (538.61 it/sec) -training >> step=7708200, episode=1285 reward=0.7619028 (498.10 it/sec) -training >> step=7708300, episode=1285 reward=0.7976214 (454.77 it/sec) -training >> step=7708400, episode=1285 reward=0.7894372 (492.20 it/sec) -training >> step=7708500, episode=1285 reward=0.7845873 (492.24 it/sec) -training >> step=7708600, episode=1285 reward=0.7816657 (509.12 it/sec) -training >> step=7708700, episode=1285 reward=0.7747955 (496.78 it/sec) -training >> step=7708800, episode=1285 reward=0.8164532 (530.44 it/sec) -training >> step=7708900, episode=1285 reward=0.7959124 (458.80 it/sec) -training >> step=7709000, episode=1285 reward=0.7976187 (503.63 it/sec) -training >> step=7709100, episode=1285 reward=0.7855512 (516.81 it/sec) -training >> step=7709200, episode=1285 reward=0.7931284 (501.51 it/sec) -training >> step=7709300, episode=1286 reward=0.7998073 (135.13 it/sec) -training >> step=7709400, episode=1286 reward=0.7799575 (451.62 it/sec) -training >> step=7709500, episode=1286 reward=0.7735699 (466.81 it/sec) -training >> step=7709600, episode=1286 reward=0.8017199 (502.65 it/sec) -training >> step=7709700, episode=1286 reward=0.7786109 (497.33 it/sec) -training >> step=7709800, episode=1286 reward=0.7753461 (506.97 it/sec) -training >> step=7709900, episode=1286 reward=0.7764254 (462.26 it/sec) -training >> step=7710000, episode=1286 reward=0.7844205 (479.38 it/sec) -training >> step=7710100, episode=1286 reward=0.8048043 (457.23 it/sec) -training >> step=7710200, episode=1286 reward=0.7912014 (488.45 it/sec) -training >> step=7710300, episode=1286 reward=0.7923414 (520.98 it/sec) -training >> step=7710400, episode=1286 reward=0.8034562 (465.95 it/sec) -training >> step=7710500, episode=1286 reward=0.796618 (508.96 it/sec) -training >> step=7710600, episode=1286 reward=0.7941487 (494.16 it/sec) -training >> step=7710700, episode=1286 reward=0.7878359 (470.76 it/sec) -training >> step=7710800, episode=1286 reward=0.7748387 (457.05 it/sec) -training >> step=7710900, episode=1286 reward=0.8062249 (507.44 it/sec) -training >> step=7711000, episode=1286 reward=0.7783946 (492.42 it/sec) -training >> step=7711100, episode=1286 reward=0.8147261 (484.71 it/sec) -training >> step=7711200, episode=1286 reward=0.7820418 (515.76 it/sec) -training >> step=7711300, episode=1286 reward=0.7886267 (485.19 it/sec) -training >> step=7711400, episode=1286 reward=0.7955506 (496.17 it/sec) -training >> step=7711500, episode=1286 reward=0.8098067 (475.72 it/sec) -training >> step=7711600, episode=1286 reward=0.7997783 (505.11 it/sec) -training >> step=7711700, episode=1286 reward=0.7908096 (519.63 it/sec) -training >> step=7711800, episode=1286 reward=0.7859539 (491.96 it/sec) -training >> step=7711900, episode=1286 reward=0.7818616 (495.26 it/sec) -training >> step=7712000, episode=1286 reward=0.7485466 (489.40 it/sec) -training >> step=7712100, episode=1286 reward=0.7900571 (467.49 it/sec) -training >> step=7712200, episode=1286 reward=0.7824624 (466.55 it/sec) -training >> step=7712300, episode=1286 reward=0.8031607 (456.75 it/sec) -training >> step=7712400, episode=1286 reward=0.7744644 (491.12 it/sec) -training >> step=7712500, episode=1286 reward=0.7998012 (458.07 it/sec) -training >> step=7712600, episode=1286 reward=0.7933969 (517.02 it/sec) -training >> step=7712700, episode=1286 reward=0.7722992 (510.03 it/sec) -training >> step=7712800, episode=1286 reward=0.8042778 (481.73 it/sec) -training >> step=7712900, episode=1286 reward=0.8082652 (516.63 it/sec) -training >> step=7713000, episode=1286 reward=0.7925551 (461.98 it/sec) -training >> step=7713100, episode=1286 reward=0.7920092 (483.17 it/sec) -training >> step=7713200, episode=1286 reward=0.7948887 (523.76 it/sec) -training >> step=7713300, episode=1286 reward=0.7952322 (440.10 it/sec) -training >> step=7713400, episode=1286 reward=0.8037105 (488.59 it/sec) -training >> step=7713500, episode=1286 reward=0.8016214 (519.51 it/sec) -training >> step=7713600, episode=1286 reward=0.7956588 (365.94 it/sec) -training >> step=7713700, episode=1286 reward=0.8063486 (477.68 it/sec) -training >> step=7713800, episode=1286 reward=0.7915742 (485.91 it/sec) -training >> step=7713900, episode=1286 reward=0.8061403 (475.74 it/sec) -training >> step=7714000, episode=1286 reward=0.7915837 (448.92 it/sec) -training >> step=7714100, episode=1286 reward=0.7933131 (521.80 it/sec) -training >> step=7714200, episode=1286 reward=0.7851499 (476.37 it/sec) -training >> step=7714300, episode=1286 reward=0.7848259 (521.39 it/sec) -training >> step=7714400, episode=1286 reward=0.7750396 (490.89 it/sec) -training >> step=7714500, episode=1286 reward=0.7789996 (483.83 it/sec) -training >> step=7714600, episode=1286 reward=0.7693059 (479.17 it/sec) -training >> step=7714700, episode=1286 reward=0.7932937 (490.51 it/sec) -training >> step=7714800, episode=1286 reward=0.7851751 (490.44 it/sec) -training >> step=7714900, episode=1286 reward=0.8040294 (490.30 it/sec) -training >> step=7715000, episode=1286 reward=0.7746401 (510.03 it/sec) -training >> step=7715100, episode=1286 reward=0.79145 (477.71 it/sec) -training >> step=7715200, episode=1286 reward=0.7798657 (466.30 it/sec) -training >> step=7715300, episode=1287 reward=0.7786694 (166.05 it/sec) -training >> step=7715400, episode=1287 reward=0.7903587 (435.84 it/sec) -training >> step=7715500, episode=1287 reward=0.7881737 (495.45 it/sec) -training >> step=7715600, episode=1287 reward=0.7687463 (477.59 it/sec) -training >> step=7715700, episode=1287 reward=0.7823296 (430.91 it/sec) -training >> step=7715800, episode=1287 reward=0.7872937 (512.57 it/sec) -training >> step=7715900, episode=1287 reward=0.7992243 (500.89 it/sec) -training >> step=7716000, episode=1287 reward=0.8102598 (489.24 it/sec) -training >> step=7716100, episode=1287 reward=0.7879571 (508.05 it/sec) -training >> step=7716200, episode=1287 reward=0.7941184 (448.70 it/sec) -training >> step=7716300, episode=1287 reward=0.7935439 (483.81 it/sec) -training >> step=7716400, episode=1287 reward=0.7711395 (534.10 it/sec) -training >> step=7716500, episode=1287 reward=0.7871993 (491.02 it/sec) -training >> step=7716600, episode=1287 reward=0.7987667 (465.86 it/sec) -training >> step=7716700, episode=1287 reward=0.8110687 (409.70 it/sec) -training >> step=7716800, episode=1287 reward=0.7711535 (533.45 it/sec) -training >> step=7716900, episode=1287 reward=0.7915107 (474.66 it/sec) -training >> step=7717000, episode=1287 reward=0.7900986 (451.64 it/sec) -training >> step=7717100, episode=1287 reward=0.7735691 (520.65 it/sec) -training >> step=7717200, episode=1287 reward=0.7908725 (453.94 it/sec) -training >> step=7717300, episode=1287 reward=0.7921197 (488.47 it/sec) -training >> step=7717400, episode=1287 reward=0.7939467 (516.25 it/sec) -training >> step=7717500, episode=1287 reward=0.7821578 (475.63 it/sec) -training >> step=7717600, episode=1287 reward=0.7976129 (477.38 it/sec) -training >> step=7717700, episode=1287 reward=0.796608 (466.20 it/sec) -training >> step=7717800, episode=1287 reward=0.7822468 (504.75 it/sec) -training >> step=7717900, episode=1287 reward=0.8052598 (508.38 it/sec) -training >> step=7718000, episode=1287 reward=0.792978 (482.89 it/sec) -training >> step=7718100, episode=1287 reward=0.7921947 (494.19 it/sec) -training >> step=7718200, episode=1287 reward=0.7869194 (528.58 it/sec) -training >> step=7718300, episode=1287 reward=0.8026011 (463.85 it/sec) -training >> step=7718400, episode=1287 reward=0.8043833 (481.79 it/sec) -training >> step=7718500, episode=1287 reward=0.7734269 (475.36 it/sec) -training >> step=7718600, episode=1287 reward=0.7994232 (494.94 it/sec) -training >> step=7718700, episode=1287 reward=0.8045332 (452.08 it/sec) -training >> step=7718800, episode=1287 reward=0.7809008 (506.16 it/sec) -training >> step=7718900, episode=1287 reward=0.8005683 (469.45 it/sec) -training >> step=7719000, episode=1287 reward=0.7900619 (458.14 it/sec) -training >> step=7719100, episode=1287 reward=0.7928366 (473.81 it/sec) -training >> step=7719200, episode=1287 reward=0.8014913 (496.05 it/sec) -training >> step=7719300, episode=1287 reward=0.7819746 (451.50 it/sec) -training >> step=7719400, episode=1287 reward=0.7873263 (487.88 it/sec) -training >> step=7719500, episode=1287 reward=0.7721862 (476.58 it/sec) -training >> step=7719600, episode=1287 reward=0.803721 (505.36 it/sec) -training >> step=7719700, episode=1287 reward=0.7829953 (480.86 it/sec) -training >> step=7719800, episode=1287 reward=0.7918224 (385.16 it/sec) -training >> step=7719900, episode=1287 reward=0.7980376 (509.90 it/sec) -training >> step=7720000, episode=1287 reward=0.7991107 (518.42 it/sec) -training >> step=7720100, episode=1287 reward=0.7828603 (455.26 it/sec) -training >> step=7720200, episode=1287 reward=0.7723425 (501.81 it/sec) -training >> step=7720300, episode=1287 reward=0.8040482 (460.84 it/sec) -training >> step=7720400, episode=1287 reward=0.787553 (530.98 it/sec) -training >> step=7720500, episode=1287 reward=0.7930647 (483.72 it/sec) -training >> step=7720600, episode=1287 reward=0.7843556 (496.25 it/sec) -training >> step=7720700, episode=1287 reward=0.7981323 (497.31 it/sec) -training >> step=7720800, episode=1287 reward=0.7878519 (460.25 it/sec) -training >> step=7720900, episode=1287 reward=0.7862899 (501.19 it/sec) -training >> step=7721000, episode=1287 reward=0.7861903 (447.05 it/sec) -training >> step=7721100, episode=1287 reward=0.7849409 (466.06 it/sec) -training >> step=7721200, episode=1287 reward=0.7933474 (480.08 it/sec) -training >> step=7721300, episode=1288 reward=0.7957288 (137.90 it/sec) -training >> step=7721400, episode=1288 reward=0.7786653 (483.04 it/sec) -training >> step=7721500, episode=1288 reward=0.7963102 (485.53 it/sec) -training >> step=7721600, episode=1288 reward=0.8006649 (483.48 it/sec) -training >> step=7721700, episode=1288 reward=0.7956514 (490.73 it/sec) -training >> step=7721800, episode=1288 reward=0.7892488 (516.49 it/sec) -training >> step=7721900, episode=1288 reward=0.7837729 (459.80 it/sec) -training >> step=7722000, episode=1288 reward=0.7891619 (444.77 it/sec) -training >> step=7722100, episode=1288 reward=0.7906681 (473.11 it/sec) -training >> step=7722200, episode=1288 reward=0.7845506 (487.26 it/sec) -training >> step=7722300, episode=1288 reward=0.7833778 (477.85 it/sec) -training >> step=7722400, episode=1288 reward=0.7926196 (493.82 it/sec) -training >> step=7722500, episode=1288 reward=0.7774981 (452.12 it/sec) -training >> step=7722600, episode=1288 reward=0.7837574 (520.98 it/sec) -training >> step=7722700, episode=1288 reward=0.7713673 (471.83 it/sec) -training >> step=7722800, episode=1288 reward=0.8152109 (442.58 it/sec) -training >> step=7722900, episode=1288 reward=0.7953218 (481.30 it/sec) -training >> step=7723000, episode=1288 reward=0.7871091 (491.03 it/sec) -training >> step=7723100, episode=1288 reward=0.8023427 (489.19 it/sec) -training >> step=7723200, episode=1288 reward=0.7825833 (498.68 it/sec) -training >> step=7723300, episode=1288 reward=0.7739943 (480.42 it/sec) -training >> step=7723400, episode=1288 reward=0.7924566 (492.62 it/sec) -training >> step=7723500, episode=1288 reward=0.780293 (479.04 it/sec) -training >> step=7723600, episode=1288 reward=0.7885423 (534.31 it/sec) -training >> step=7723700, episode=1288 reward=0.7897822 (456.70 it/sec) -training >> step=7723800, episode=1288 reward=0.8002128 (513.64 it/sec) -training >> step=7723900, episode=1288 reward=0.787951 (517.42 it/sec) -training >> step=7724000, episode=1288 reward=0.7777846 (499.99 it/sec) -training >> step=7724100, episode=1288 reward=0.7812603 (458.19 it/sec) -training >> step=7724200, episode=1288 reward=0.7944825 (458.01 it/sec) -training >> step=7724300, episode=1288 reward=0.7892336 (478.02 it/sec) -training >> step=7724400, episode=1288 reward=0.7823809 (465.57 it/sec) -training >> step=7724500, episode=1288 reward=0.7913424 (502.74 it/sec) -training >> step=7724600, episode=1288 reward=0.7815396 (494.00 it/sec) -training >> step=7724700, episode=1288 reward=0.8043891 (480.56 it/sec) -training >> step=7724800, episode=1288 reward=0.7823163 (473.53 it/sec) -training >> step=7724900, episode=1288 reward=0.7732716 (457.00 it/sec) -training >> step=7725000, episode=1288 reward=0.7952607 (502.21 it/sec) -training >> step=7725100, episode=1288 reward=0.8171936 (490.12 it/sec) -training >> step=7725200, episode=1288 reward=0.7986377 (503.92 it/sec) -training >> step=7725300, episode=1288 reward=0.7769334 (468.90 it/sec) -training >> step=7725400, episode=1288 reward=0.7874132 (522.51 it/sec) -training >> step=7725500, episode=1288 reward=0.7954358 (474.95 it/sec) -training >> step=7725600, episode=1288 reward=0.8029529 (480.94 it/sec) -training >> step=7725700, episode=1288 reward=0.80185 (477.93 it/sec) -training >> step=7725800, episode=1288 reward=0.7948484 (457.64 it/sec) -training >> step=7725900, episode=1288 reward=0.7716582 (474.99 it/sec) -training >> step=7726000, episode=1288 reward=0.7741311 (471.60 it/sec) -training >> step=7726100, episode=1288 reward=0.7858937 (335.70 it/sec) -training >> step=7726200, episode=1288 reward=0.7678164 (500.41 it/sec) -training >> step=7726300, episode=1288 reward=0.7890199 (469.69 it/sec) -training >> step=7726400, episode=1288 reward=0.7801896 (457.61 it/sec) -training >> step=7726500, episode=1288 reward=0.7792402 (538.67 it/sec) -training >> step=7726600, episode=1288 reward=0.7661334 (464.23 it/sec) -training >> step=7726700, episode=1288 reward=0.8111112 (459.23 it/sec) -training >> step=7726800, episode=1288 reward=0.7965534 (483.66 it/sec) -training >> step=7726900, episode=1288 reward=0.7819349 (507.98 it/sec) -training >> step=7727000, episode=1288 reward=0.7865313 (452.81 it/sec) -training >> step=7727100, episode=1288 reward=0.7832785 (458.00 it/sec) -training >> step=7727200, episode=1288 reward=0.7824868 (463.57 it/sec) -training >> step=7727300, episode=1289 reward=0.7813357 (139.13 it/sec) -training >> step=7727400, episode=1289 reward=0.7886446 (497.85 it/sec) -training >> step=7727500, episode=1289 reward=0.7800099 (504.10 it/sec) -training >> step=7727600, episode=1289 reward=0.7973418 (463.23 it/sec) -training >> step=7727700, episode=1289 reward=0.793232 (451.92 it/sec) -training >> step=7727800, episode=1289 reward=0.7706793 (447.53 it/sec) -training >> step=7727900, episode=1289 reward=0.7985851 (501.36 it/sec) -training >> step=7728000, episode=1289 reward=0.7994605 (524.34 it/sec) -training >> step=7728100, episode=1289 reward=0.7979088 (472.82 it/sec) -training >> step=7728200, episode=1289 reward=0.7844233 (482.90 it/sec) -training >> step=7728300, episode=1289 reward=0.7874158 (539.86 it/sec) -training >> step=7728400, episode=1289 reward=0.7954349 (467.38 it/sec) -training >> step=7728500, episode=1289 reward=0.7620041 (479.20 it/sec) -training >> step=7728600, episode=1289 reward=0.7935604 (446.05 it/sec) -training >> step=7728700, episode=1289 reward=0.7987452 (518.33 it/sec) -training >> step=7728800, episode=1289 reward=0.8031916 (478.43 it/sec) -training >> step=7728900, episode=1289 reward=0.8075462 (477.76 it/sec) -training >> step=7729000, episode=1289 reward=0.7837538 (461.74 it/sec) -training >> step=7729100, episode=1289 reward=0.7843977 (472.88 it/sec) -training >> step=7729200, episode=1289 reward=0.7807861 (471.05 it/sec) -training >> step=7729300, episode=1289 reward=0.7856434 (511.03 it/sec) -training >> step=7729400, episode=1289 reward=0.793789 (506.15 it/sec) -training >> step=7729500, episode=1289 reward=0.8006053 (503.78 it/sec) -training >> step=7729600, episode=1289 reward=0.8117058 (469.13 it/sec) -training >> step=7729700, episode=1289 reward=0.7976524 (435.55 it/sec) -training >> step=7729800, episode=1289 reward=0.7830239 (514.52 it/sec) -training >> step=7729900, episode=1289 reward=0.7901599 (449.84 it/sec) -training >> step=7730000, episode=1289 reward=0.8005386 (473.43 it/sec) -training >> step=7730100, episode=1289 reward=0.803054 (515.49 it/sec) -training >> step=7730200, episode=1289 reward=0.7926612 (513.78 it/sec) -training >> step=7730300, episode=1289 reward=0.8149468 (453.65 it/sec) -training >> step=7730400, episode=1289 reward=0.8081914 (439.76 it/sec) -training >> step=7730500, episode=1289 reward=0.7927701 (515.50 it/sec) -training >> step=7730600, episode=1289 reward=0.8012938 (483.11 it/sec) -training >> step=7730700, episode=1289 reward=0.7854599 (517.12 it/sec) -training >> step=7730800, episode=1289 reward=0.7837837 (510.18 it/sec) -training >> step=7730900, episode=1289 reward=0.7827123 (503.22 it/sec) -training >> step=7731000, episode=1289 reward=0.7784414 (473.74 it/sec) -training >> step=7731100, episode=1289 reward=0.7853812 (490.47 it/sec) -training >> step=7731200, episode=1289 reward=0.8000409 (511.26 it/sec) -training >> step=7731300, episode=1289 reward=0.7845556 (475.34 it/sec) -training >> step=7731400, episode=1289 reward=0.7775996 (470.29 it/sec) -training >> step=7731500, episode=1289 reward=0.7802575 (499.82 it/sec) -training >> step=7731600, episode=1289 reward=0.7946153 (463.69 it/sec) -training >> step=7731700, episode=1289 reward=0.8050212 (454.29 it/sec) -training >> step=7731800, episode=1289 reward=0.7924078 (464.24 it/sec) -training >> step=7731900, episode=1289 reward=0.7958831 (498.88 it/sec) -training >> step=7732000, episode=1289 reward=0.7960981 (473.48 it/sec) -training >> step=7732100, episode=1289 reward=0.7737311 (434.85 it/sec) -training >> step=7732200, episode=1289 reward=0.7992581 (372.86 it/sec) -training >> step=7732300, episode=1289 reward=0.8011237 (508.26 it/sec) -training >> step=7732400, episode=1289 reward=0.7956668 (497.86 it/sec) -training >> step=7732500, episode=1289 reward=0.7903827 (460.73 it/sec) -training >> step=7732600, episode=1289 reward=0.7805861 (511.58 it/sec) -training >> step=7732700, episode=1289 reward=0.7936707 (521.12 it/sec) -training >> step=7732800, episode=1289 reward=0.7780603 (506.18 it/sec) -training >> step=7732900, episode=1289 reward=0.789453 (474.57 it/sec) -training >> step=7733000, episode=1289 reward=0.7946219 (452.17 it/sec) -training >> step=7733100, episode=1289 reward=0.7948524 (522.24 it/sec) -training >> step=7733200, episode=1289 reward=0.7978204 (490.50 it/sec) -training >> step=7733300, episode=1290 reward=0.7925117 (125.22 it/sec) -training >> step=7733400, episode=1290 reward=0.7984517 (476.76 it/sec) -training >> step=7733500, episode=1290 reward=0.7803278 (444.72 it/sec) -training >> step=7733600, episode=1290 reward=0.7810453 (488.17 it/sec) -training >> step=7733700, episode=1290 reward=0.7995523 (495.13 it/sec) -training >> step=7733800, episode=1290 reward=0.8075188 (522.15 it/sec) -training >> step=7733900, episode=1290 reward=0.7807502 (516.93 it/sec) -training >> step=7734000, episode=1290 reward=0.7959891 (476.51 it/sec) -training >> step=7734100, episode=1290 reward=0.7887973 (521.27 it/sec) -training >> step=7734200, episode=1290 reward=0.7821254 (486.63 it/sec) -training >> step=7734300, episode=1290 reward=0.7827682 (503.06 it/sec) -training >> step=7734400, episode=1290 reward=0.7852165 (485.00 it/sec) -training >> step=7734500, episode=1290 reward=0.7786064 (510.87 it/sec) -training >> step=7734600, episode=1290 reward=0.790908 (485.58 it/sec) -training >> step=7734700, episode=1290 reward=0.7907616 (497.09 it/sec) -training >> step=7734800, episode=1290 reward=0.802286 (463.48 it/sec) -training >> step=7734900, episode=1290 reward=0.7945738 (467.47 it/sec) -training >> step=7735000, episode=1290 reward=0.8029994 (473.56 it/sec) -training >> step=7735100, episode=1290 reward=0.783756 (469.35 it/sec) -training >> step=7735200, episode=1290 reward=0.8005823 (489.50 it/sec) -training >> step=7735300, episode=1290 reward=0.7827119 (507.51 it/sec) -training >> step=7735400, episode=1290 reward=0.7897329 (507.08 it/sec) -training >> step=7735500, episode=1290 reward=0.7996761 (507.07 it/sec) -training >> step=7735600, episode=1290 reward=0.8002217 (503.45 it/sec) -training >> step=7735700, episode=1290 reward=0.7860277 (474.95 it/sec) -training >> step=7735800, episode=1290 reward=0.8171278 (471.88 it/sec) -training >> step=7735900, episode=1290 reward=0.8005425 (486.71 it/sec) -training >> step=7736000, episode=1290 reward=0.7957312 (503.34 it/sec) -training >> step=7736100, episode=1290 reward=0.7950017 (454.08 it/sec) -training >> step=7736200, episode=1290 reward=0.8166817 (465.74 it/sec) -training >> step=7736300, episode=1290 reward=0.8062605 (469.32 it/sec) -training >> step=7736400, episode=1290 reward=0.7655103 (456.60 it/sec) -training >> step=7736500, episode=1290 reward=0.7983112 (494.20 it/sec) -training >> step=7736600, episode=1290 reward=0.7708091 (464.37 it/sec) -training >> step=7736700, episode=1290 reward=0.7776054 (513.76 it/sec) -training >> step=7736800, episode=1290 reward=0.7889188 (502.03 it/sec) -training >> step=7736900, episode=1290 reward=0.7891202 (513.29 it/sec) -training >> step=7737000, episode=1290 reward=0.7660878 (505.32 it/sec) -training >> step=7737100, episode=1290 reward=0.7941899 (446.51 it/sec) -training >> step=7737200, episode=1290 reward=0.7951401 (474.48 it/sec) -training >> step=7737300, episode=1290 reward=0.7877406 (446.64 it/sec) -training >> step=7737400, episode=1290 reward=0.7780865 (481.65 it/sec) -training >> step=7737500, episode=1290 reward=0.7839362 (477.86 it/sec) -training >> step=7737600, episode=1290 reward=0.7568657 (436.07 it/sec) -training >> step=7737700, episode=1290 reward=0.7886456 (476.12 it/sec) -training >> step=7737800, episode=1290 reward=0.7927183 (529.19 it/sec) -training >> step=7737900, episode=1290 reward=0.8000437 (507.94 it/sec) -training >> step=7738000, episode=1290 reward=0.7675838 (451.57 it/sec) -training >> step=7738100, episode=1290 reward=0.7854114 (459.01 it/sec) -training >> step=7738200, episode=1290 reward=0.7949051 (491.02 it/sec) -training >> step=7738300, episode=1290 reward=0.8049675 (492.01 it/sec) -training >> step=7738400, episode=1290 reward=0.7950853 (399.05 it/sec) -training >> step=7738500, episode=1290 reward=0.7764897 (437.11 it/sec) -training >> step=7738600, episode=1290 reward=0.8027372 (455.56 it/sec) -training >> step=7738700, episode=1290 reward=0.7713962 (475.58 it/sec) -training >> step=7738800, episode=1290 reward=0.7899418 (501.32 it/sec) -training >> step=7738900, episode=1290 reward=0.8023147 (524.87 it/sec) -training >> step=7739000, episode=1290 reward=0.7953407 (478.29 it/sec) -training >> step=7739100, episode=1290 reward=0.781262 (493.88 it/sec) -training >> step=7739200, episode=1290 reward=0.7727014 (493.20 it/sec) -training >> step=7739300, episode=1291 reward=0.8066337 (123.94 it/sec) -training >> step=7739400, episode=1291 reward=0.7754691 (491.89 it/sec) -training >> step=7739500, episode=1291 reward=0.7754148 (503.73 it/sec) -training >> step=7739600, episode=1291 reward=0.7979961 (515.21 it/sec) -training >> step=7739700, episode=1291 reward=0.7960468 (506.43 it/sec) -training >> step=7739800, episode=1291 reward=0.7862434 (447.48 it/sec) -training >> step=7739900, episode=1291 reward=0.7777503 (519.69 it/sec) -training >> step=7740000, episode=1291 reward=0.7974249 (502.86 it/sec) -training >> step=7740100, episode=1291 reward=0.7965538 (505.78 it/sec) -training >> step=7740200, episode=1291 reward=0.7925856 (470.49 it/sec) -training >> step=7740300, episode=1291 reward=0.7841892 (494.32 it/sec) -training >> step=7740400, episode=1291 reward=0.7964159 (525.93 it/sec) -training >> step=7740500, episode=1291 reward=0.7924587 (489.03 it/sec) -training >> step=7740600, episode=1291 reward=0.7864815 (500.69 it/sec) -training >> step=7740700, episode=1291 reward=0.7804971 (487.37 it/sec) -training >> step=7740800, episode=1291 reward=0.7901948 (490.10 it/sec) -training >> step=7740900, episode=1291 reward=0.7986068 (495.57 it/sec) -training >> step=7741000, episode=1291 reward=0.7960471 (513.99 it/sec) -training >> step=7741100, episode=1291 reward=0.7742898 (457.19 it/sec) -training >> step=7741200, episode=1291 reward=0.7988551 (476.59 it/sec) -training >> step=7741300, episode=1291 reward=0.7952582 (481.90 it/sec) -training >> step=7741400, episode=1291 reward=0.8182493 (527.67 it/sec) -training >> step=7741500, episode=1291 reward=0.8071913 (487.70 it/sec) -training >> step=7741600, episode=1291 reward=0.7899444 (481.55 it/sec) -training >> step=7741700, episode=1291 reward=0.7916083 (510.23 it/sec) -training >> step=7741800, episode=1291 reward=0.7949871 (507.82 it/sec) -training >> step=7741900, episode=1291 reward=0.8095143 (511.07 it/sec) -training >> step=7742000, episode=1291 reward=0.7844146 (504.97 it/sec) -training >> step=7742100, episode=1291 reward=0.7833183 (521.11 it/sec) -training >> step=7742200, episode=1291 reward=0.7971148 (499.43 it/sec) -training >> step=7742300, episode=1291 reward=0.7871161 (494.04 it/sec) -training >> step=7742400, episode=1291 reward=0.8043039 (480.58 it/sec) -training >> step=7742500, episode=1291 reward=0.7886004 (500.81 it/sec) -training >> step=7742600, episode=1291 reward=0.7950951 (475.75 it/sec) -training >> step=7742700, episode=1291 reward=0.8052058 (480.49 it/sec) -training >> step=7742800, episode=1291 reward=0.7981001 (488.64 it/sec) -training >> step=7742900, episode=1291 reward=0.7907337 (474.05 it/sec) -training >> step=7743000, episode=1291 reward=0.7832873 (497.02 it/sec) -training >> step=7743100, episode=1291 reward=0.7842858 (469.92 it/sec) -training >> step=7743200, episode=1291 reward=0.7752888 (505.89 it/sec) -training >> step=7743300, episode=1291 reward=0.7895635 (490.63 it/sec) -training >> step=7743400, episode=1291 reward=0.7788184 (463.72 it/sec) -training >> step=7743500, episode=1291 reward=0.7923115 (448.47 it/sec) -training >> step=7743600, episode=1291 reward=0.8046845 (500.08 it/sec) -training >> step=7743700, episode=1291 reward=0.7885311 (498.25 it/sec) -training >> step=7743800, episode=1291 reward=0.7808565 (494.71 it/sec) -training >> step=7743900, episode=1291 reward=0.7891508 (513.27 it/sec) -training >> step=7744000, episode=1291 reward=0.8117894 (478.78 it/sec) -training >> step=7744100, episode=1291 reward=0.8130378 (489.06 it/sec) -training >> step=7744200, episode=1291 reward=0.7921833 (455.79 it/sec) -training >> step=7744300, episode=1291 reward=0.7956483 (524.50 it/sec) -training >> step=7744400, episode=1291 reward=0.7886268 (493.63 it/sec) -training >> step=7744500, episode=1291 reward=0.7936776 (514.16 it/sec) -training >> step=7744600, episode=1291 reward=0.8029217 (479.18 it/sec) -training >> step=7744700, episode=1291 reward=0.8082106 (341.58 it/sec) -training >> step=7744800, episode=1291 reward=0.7976781 (490.95 it/sec) -training >> step=7744900, episode=1291 reward=0.7905499 (465.34 it/sec) -training >> step=7745000, episode=1291 reward=0.7925628 (531.57 it/sec) -training >> step=7745100, episode=1291 reward=0.7837295 (490.49 it/sec) -training >> step=7745200, episode=1291 reward=0.7619799 (473.12 it/sec) -training >> step=7745300, episode=1292 reward=0.7720791 (125.81 it/sec) -training >> step=7745400, episode=1292 reward=0.7750161 (490.84 it/sec) -training >> step=7745500, episode=1292 reward=0.7943165 (494.66 it/sec) -training >> step=7745600, episode=1292 reward=0.804157 (495.32 it/sec) -training >> step=7745700, episode=1292 reward=0.7774725 (471.57 it/sec) -training >> step=7745800, episode=1292 reward=0.7949062 (511.04 it/sec) -training >> step=7745900, episode=1292 reward=0.8139499 (471.83 it/sec) -training >> step=7746000, episode=1292 reward=0.8051189 (499.53 it/sec) -training >> step=7746100, episode=1292 reward=0.7905954 (499.63 it/sec) -training >> step=7746200, episode=1292 reward=0.7955201 (472.33 it/sec) -training >> step=7746300, episode=1292 reward=0.7831984 (475.72 it/sec) -training >> step=7746400, episode=1292 reward=0.7931764 (476.69 it/sec) -training >> step=7746500, episode=1292 reward=0.7817081 (497.95 it/sec) -training >> step=7746600, episode=1292 reward=0.7846492 (490.05 it/sec) -training >> step=7746700, episode=1292 reward=0.78007 (461.74 it/sec) -training >> step=7746800, episode=1292 reward=0.7824923 (502.04 it/sec) -training >> step=7746900, episode=1292 reward=0.7990155 (495.69 it/sec) -training >> step=7747000, episode=1292 reward=0.7939672 (493.06 it/sec) -training >> step=7747100, episode=1292 reward=0.7930512 (464.01 it/sec) -training >> step=7747200, episode=1292 reward=0.7984967 (487.09 it/sec) -training >> step=7747300, episode=1292 reward=0.8029312 (469.51 it/sec) -training >> step=7747400, episode=1292 reward=0.7785416 (470.46 it/sec) -training >> step=7747500, episode=1292 reward=0.8063574 (512.17 it/sec) -training >> step=7747600, episode=1292 reward=0.8034166 (491.96 it/sec) -training >> step=7747700, episode=1292 reward=0.7933539 (475.42 it/sec) -training >> step=7747800, episode=1292 reward=0.798208 (511.47 it/sec) -training >> step=7747900, episode=1292 reward=0.7951499 (483.50 it/sec) -training >> step=7748000, episode=1292 reward=0.7732646 (511.50 it/sec) -training >> step=7748100, episode=1292 reward=0.7962652 (502.14 it/sec) -training >> step=7748200, episode=1292 reward=0.787231 (500.00 it/sec) -training >> step=7748300, episode=1292 reward=0.786512 (485.33 it/sec) -training >> step=7748400, episode=1292 reward=0.7818002 (445.28 it/sec) -training >> step=7748500, episode=1292 reward=0.7938139 (524.48 it/sec) -training >> step=7748600, episode=1292 reward=0.7715175 (500.84 it/sec) -training >> step=7748700, episode=1292 reward=0.7841377 (493.19 it/sec) -training >> step=7748800, episode=1292 reward=0.7870727 (472.19 it/sec) -training >> step=7748900, episode=1292 reward=0.7779874 (484.45 it/sec) -training >> step=7749000, episode=1292 reward=0.8153944 (491.04 it/sec) -training >> step=7749100, episode=1292 reward=0.7887119 (480.95 it/sec) -training >> step=7749200, episode=1292 reward=0.7965284 (510.11 it/sec) -training >> step=7749300, episode=1292 reward=0.7875325 (468.30 it/sec) -training >> step=7749400, episode=1292 reward=0.7892831 (447.17 it/sec) -training >> step=7749500, episode=1292 reward=0.7763228 (467.59 it/sec) -training >> step=7749600, episode=1292 reward=0.7841743 (510.45 it/sec) -training >> step=7749700, episode=1292 reward=0.8086732 (510.96 it/sec) -training >> step=7749800, episode=1292 reward=0.8020597 (470.87 it/sec) -training >> step=7749900, episode=1292 reward=0.7825648 (501.82 it/sec) -training >> step=7750000, episode=1292 reward=0.8030633 (528.70 it/sec) -training >> step=7750100, episode=1292 reward=0.7822619 (474.59 it/sec) -training >> step=7750200, episode=1292 reward=0.7681093 (488.01 it/sec) -training >> step=7750300, episode=1292 reward=0.7891428 (507.60 it/sec) -training >> step=7750400, episode=1292 reward=0.7789052 (460.59 it/sec) -training >> step=7750500, episode=1292 reward=0.7697539 (503.23 it/sec) -training >> step=7750600, episode=1292 reward=0.7932229 (500.78 it/sec) -training >> step=7750700, episode=1292 reward=0.7916327 (503.29 it/sec) -training >> step=7750800, episode=1292 reward=0.785092 (477.37 it/sec) -training >> step=7750900, episode=1292 reward=0.7660472 (334.19 it/sec) -training >> step=7751000, episode=1292 reward=0.7869114 (507.31 it/sec) -training >> step=7751100, episode=1292 reward=0.7915112 (515.28 it/sec) -training >> step=7751200, episode=1292 reward=0.7854937 (494.20 it/sec) -training >> step=7751300, episode=1293 reward=0.7992243 (102.39 it/sec) -training >> step=7751400, episode=1293 reward=0.7836651 (429.39 it/sec) -training >> step=7751500, episode=1293 reward=0.7950553 (468.38 it/sec) -training >> step=7751600, episode=1293 reward=0.7842727 (479.20 it/sec) -training >> step=7751700, episode=1293 reward=0.7771568 (496.90 it/sec) -training >> step=7751800, episode=1293 reward=0.7860687 (445.33 it/sec) -training >> step=7751900, episode=1293 reward=0.7734957 (441.34 it/sec) -training >> step=7752000, episode=1293 reward=0.780619 (496.41 it/sec) -training >> step=7752100, episode=1293 reward=0.785956 (506.46 it/sec) -training >> step=7752200, episode=1293 reward=0.7813686 (470.16 it/sec) -training >> step=7752300, episode=1293 reward=0.7966005 (501.37 it/sec) -training >> step=7752400, episode=1293 reward=0.7884439 (496.51 it/sec) -training >> step=7752500, episode=1293 reward=0.8058276 (512.15 it/sec) -training >> step=7752600, episode=1293 reward=0.8042883 (485.25 it/sec) -training >> step=7752700, episode=1293 reward=0.8008283 (445.80 it/sec) -training >> step=7752800, episode=1293 reward=0.7578615 (521.40 it/sec) -training >> step=7752900, episode=1293 reward=0.7831463 (513.30 it/sec) -training >> step=7753000, episode=1293 reward=0.7827711 (496.87 it/sec) -training >> step=7753100, episode=1293 reward=0.8126937 (449.59 it/sec) -training >> step=7753200, episode=1293 reward=0.794624 (440.44 it/sec) -training >> step=7753300, episode=1293 reward=0.7929432 (523.87 it/sec) -training >> step=7753400, episode=1293 reward=0.7969918 (518.60 it/sec) -training >> step=7753500, episode=1293 reward=0.7897988 (454.07 it/sec) -training >> step=7753600, episode=1293 reward=0.8108672 (520.78 it/sec) -training >> step=7753700, episode=1293 reward=0.8161108 (479.71 it/sec) -training >> step=7753800, episode=1293 reward=0.8167967 (495.99 it/sec) -training >> step=7753900, episode=1293 reward=0.7721496 (500.45 it/sec) -training >> step=7754000, episode=1293 reward=0.8045433 (467.37 it/sec) -training >> step=7754100, episode=1293 reward=0.7974761 (490.07 it/sec) -training >> step=7754200, episode=1293 reward=0.7904447 (503.19 it/sec) -training >> step=7754300, episode=1293 reward=0.795371 (478.13 it/sec) -training >> step=7754400, episode=1293 reward=0.8207448 (490.97 it/sec) -training >> step=7754500, episode=1293 reward=0.8100615 (495.77 it/sec) -training >> step=7754600, episode=1293 reward=0.7743731 (522.79 it/sec) -training >> step=7754700, episode=1293 reward=0.7964947 (463.07 it/sec) -training >> step=7754800, episode=1293 reward=0.7833593 (455.96 it/sec) -training >> step=7754900, episode=1293 reward=0.7680261 (509.63 it/sec) -training >> step=7755000, episode=1293 reward=0.7854565 (477.72 it/sec) -training >> step=7755100, episode=1293 reward=0.771901 (470.22 it/sec) -training >> step=7755200, episode=1293 reward=0.7876132 (471.41 it/sec) -training >> step=7755300, episode=1293 reward=0.7787135 (539.72 it/sec) -training >> step=7755400, episode=1293 reward=0.81067 (520.42 it/sec) -training >> step=7755500, episode=1293 reward=0.7910617 (471.59 it/sec) -training >> step=7755600, episode=1293 reward=0.7919648 (494.54 it/sec) -training >> step=7755700, episode=1293 reward=0.8019342 (447.32 it/sec) -training >> step=7755800, episode=1293 reward=0.7857438 (512.53 it/sec) -training >> step=7755900, episode=1293 reward=0.8053359 (463.46 it/sec) -training >> step=7756000, episode=1293 reward=0.7931889 (505.49 it/sec) -training >> step=7756100, episode=1293 reward=0.7818106 (496.96 it/sec) -training >> step=7756200, episode=1293 reward=0.7882092 (458.96 it/sec) -training >> step=7756300, episode=1293 reward=0.7784868 (476.43 it/sec) -training >> step=7756400, episode=1293 reward=0.7786862 (528.50 it/sec) -training >> step=7756500, episode=1293 reward=0.7877774 (480.20 it/sec) -training >> step=7756600, episode=1293 reward=0.8034884 (510.10 it/sec) -training >> step=7756700, episode=1293 reward=0.7889806 (467.69 it/sec) -training >> step=7756800, episode=1293 reward=0.7677131 (382.54 it/sec) -training >> step=7756900, episode=1293 reward=0.7764165 (517.42 it/sec) -training >> step=7757000, episode=1293 reward=0.7979273 (481.86 it/sec) -training >> step=7757100, episode=1293 reward=0.7979122 (485.91 it/sec) -training >> step=7757200, episode=1293 reward=0.7891487 (482.62 it/sec) -training >> step=7757300, episode=1294 reward=0.7849961 (141.17 it/sec) -training >> step=7757400, episode=1294 reward=0.7829567 (497.92 it/sec) -training >> step=7757500, episode=1294 reward=0.7907019 (505.09 it/sec) -training >> step=7757600, episode=1294 reward=0.8012925 (470.96 it/sec) -training >> step=7757700, episode=1294 reward=0.7926362 (511.55 it/sec) -training >> step=7757800, episode=1294 reward=0.7976471 (506.58 it/sec) -training >> step=7757900, episode=1294 reward=0.8050814 (481.86 it/sec) -training >> step=7758000, episode=1294 reward=0.7938145 (434.23 it/sec) -training >> step=7758100, episode=1294 reward=0.7979795 (497.83 it/sec) -training >> step=7758200, episode=1294 reward=0.7867172 (511.47 it/sec) -training >> step=7758300, episode=1294 reward=0.7949799 (488.25 it/sec) -training >> step=7758400, episode=1294 reward=0.7850036 (499.24 it/sec) -training >> step=7758500, episode=1294 reward=0.785441 (501.44 it/sec) -training >> step=7758600, episode=1294 reward=0.7906589 (499.56 it/sec) -training >> step=7758700, episode=1294 reward=0.7768408 (441.72 it/sec) -training >> step=7758800, episode=1294 reward=0.778052 (511.19 it/sec) -training >> step=7758900, episode=1294 reward=0.7811123 (535.78 it/sec) -training >> step=7759000, episode=1294 reward=0.8022473 (501.75 it/sec) -training >> step=7759100, episode=1294 reward=0.792863 (466.97 it/sec) -training >> step=7759200, episode=1294 reward=0.8022423 (503.52 it/sec) -training >> step=7759300, episode=1294 reward=0.7973129 (525.79 it/sec) -training >> step=7759400, episode=1294 reward=0.7878581 (479.93 it/sec) -training >> step=7759500, episode=1294 reward=0.799637 (476.53 it/sec) -training >> step=7759600, episode=1294 reward=0.7886266 (500.75 it/sec) -training >> step=7759700, episode=1294 reward=0.7687936 (457.28 it/sec) -training >> step=7759800, episode=1294 reward=0.7952986 (470.11 it/sec) -training >> step=7759900, episode=1294 reward=0.807569 (499.27 it/sec) -training >> step=7760000, episode=1294 reward=0.7758974 (493.97 it/sec) -training >> step=7760100, episode=1294 reward=0.7906125 (468.80 it/sec) -training >> step=7760200, episode=1294 reward=0.7842885 (476.65 it/sec) -training >> step=7760300, episode=1294 reward=0.7800382 (510.28 it/sec) -training >> step=7760400, episode=1294 reward=0.799939 (503.17 it/sec) -training >> step=7760500, episode=1294 reward=0.809003 (453.05 it/sec) -training >> step=7760600, episode=1294 reward=0.7886373 (457.32 it/sec) -training >> step=7760700, episode=1294 reward=0.8030168 (505.59 it/sec) -training >> step=7760800, episode=1294 reward=0.803062 (457.81 it/sec) -training >> step=7760900, episode=1294 reward=0.7928049 (496.53 it/sec) -training >> step=7761000, episode=1294 reward=0.7993498 (505.43 it/sec) -training >> step=7761100, episode=1294 reward=0.801382 (517.99 it/sec) -training >> step=7761200, episode=1294 reward=0.7853032 (465.74 it/sec) -training >> step=7761300, episode=1294 reward=0.7853975 (455.97 it/sec) -training >> step=7761400, episode=1294 reward=0.7816322 (501.20 it/sec) -training >> step=7761500, episode=1294 reward=0.8084089 (481.46 it/sec) -training >> step=7761600, episode=1294 reward=0.7939147 (506.83 it/sec) -training >> step=7761700, episode=1294 reward=0.7952437 (452.93 it/sec) -training >> step=7761800, episode=1294 reward=0.7737035 (539.55 it/sec) -training >> step=7761900, episode=1294 reward=0.785157 (511.88 it/sec) -training >> step=7762000, episode=1294 reward=0.8028631 (457.74 it/sec) -training >> step=7762100, episode=1294 reward=0.7895952 (526.45 it/sec) -training >> step=7762200, episode=1294 reward=0.7859257 (484.03 it/sec) -training >> step=7762300, episode=1294 reward=0.7905603 (508.36 it/sec) -training >> step=7762400, episode=1294 reward=0.7773439 (515.60 it/sec) -training >> step=7762500, episode=1294 reward=0.779055 (496.40 it/sec) -training >> step=7762600, episode=1294 reward=0.7685601 (514.52 it/sec) -training >> step=7762700, episode=1294 reward=0.789305 (498.03 it/sec) -training >> step=7762800, episode=1294 reward=0.80075 (524.27 it/sec) -training >> step=7762900, episode=1294 reward=0.7860773 (542.59 it/sec) -training >> step=7763000, episode=1294 reward=0.7584484 (345.44 it/sec) -training >> step=7763100, episode=1294 reward=0.8004981 (507.48 it/sec) -training >> step=7763200, episode=1294 reward=0.773828 (490.75 it/sec) -training >> step=7763300, episode=1295 reward=0.7860764 (118.39 it/sec) -training >> step=7763400, episode=1295 reward=0.7849357 (493.92 it/sec) -training >> step=7763500, episode=1295 reward=0.785371 (480.65 it/sec) -training >> step=7763600, episode=1295 reward=0.774496 (497.79 it/sec) -training >> step=7763700, episode=1295 reward=0.7839698 (514.70 it/sec) -training >> step=7763800, episode=1295 reward=0.7964292 (457.31 it/sec) -training >> step=7763900, episode=1295 reward=0.7835295 (481.69 it/sec) -training >> step=7764000, episode=1295 reward=0.7998545 (449.06 it/sec) -training >> step=7764100, episode=1295 reward=0.7902364 (499.51 it/sec) -training >> step=7764200, episode=1295 reward=0.7843384 (460.31 it/sec) -training >> step=7764300, episode=1295 reward=0.8007477 (499.09 it/sec) -training >> step=7764400, episode=1295 reward=0.7820989 (481.11 it/sec) -training >> step=7764500, episode=1295 reward=0.7894195 (478.78 it/sec) -training >> step=7764600, episode=1295 reward=0.7737774 (450.63 it/sec) -training >> step=7764700, episode=1295 reward=0.8016671 (512.48 it/sec) -training >> step=7764800, episode=1295 reward=0.792937 (467.06 it/sec) -training >> step=7764900, episode=1295 reward=0.802443 (468.31 it/sec) -training >> step=7765000, episode=1295 reward=0.7959102 (508.68 it/sec) -training >> step=7765100, episode=1295 reward=0.8151935 (466.93 it/sec) -training >> step=7765200, episode=1295 reward=0.7847211 (476.15 it/sec) -training >> step=7765300, episode=1295 reward=0.7992573 (503.96 it/sec) -training >> step=7765400, episode=1295 reward=0.790875 (515.73 it/sec) -training >> step=7765500, episode=1295 reward=0.8090288 (514.54 it/sec) -training >> step=7765600, episode=1295 reward=0.7960879 (450.21 it/sec) -training >> step=7765700, episode=1295 reward=0.8140119 (498.19 it/sec) -training >> step=7765800, episode=1295 reward=0.8062164 (516.96 it/sec) -training >> step=7765900, episode=1295 reward=0.7749984 (477.81 it/sec) -training >> step=7766000, episode=1295 reward=0.7781593 (503.97 it/sec) -training >> step=7766100, episode=1295 reward=0.7966328 (488.54 it/sec) -training >> step=7766200, episode=1295 reward=0.7826045 (485.89 it/sec) -training >> step=7766300, episode=1295 reward=0.8187768 (513.23 it/sec) -training >> step=7766400, episode=1295 reward=0.792903 (457.02 it/sec) -training >> step=7766500, episode=1295 reward=0.7930891 (526.24 it/sec) -training >> step=7766600, episode=1295 reward=0.7869163 (512.94 it/sec) -training >> step=7766700, episode=1295 reward=0.8076436 (500.81 it/sec) -training >> step=7766800, episode=1295 reward=0.7974887 (468.21 it/sec) -training >> step=7766900, episode=1295 reward=0.8021435 (503.53 it/sec) -training >> step=7767000, episode=1295 reward=0.795882 (496.50 it/sec) -training >> step=7767100, episode=1295 reward=0.7909165 (499.25 it/sec) -training >> step=7767200, episode=1295 reward=0.778267 (533.83 it/sec) -training >> step=7767300, episode=1295 reward=0.7648367 (499.92 it/sec) -training >> step=7767400, episode=1295 reward=0.7818185 (471.15 it/sec) -training >> step=7767500, episode=1295 reward=0.7965506 (481.02 it/sec) -training >> step=7767600, episode=1295 reward=0.7843978 (505.86 it/sec) -training >> step=7767700, episode=1295 reward=0.7774715 (489.26 it/sec) -training >> step=7767800, episode=1295 reward=0.8133091 (512.23 it/sec) -training >> step=7767900, episode=1295 reward=0.7934093 (500.99 it/sec) -training >> step=7768000, episode=1295 reward=0.8107076 (497.98 it/sec) -training >> step=7768100, episode=1295 reward=0.7646926 (516.52 it/sec) -training >> step=7768200, episode=1295 reward=0.7951241 (481.88 it/sec) -training >> step=7768300, episode=1295 reward=0.7932398 (543.07 it/sec) -training >> step=7768400, episode=1295 reward=0.7781795 (459.05 it/sec) -training >> step=7768500, episode=1295 reward=0.7720888 (486.78 it/sec) -training >> step=7768600, episode=1295 reward=0.8064254 (446.06 it/sec) -training >> step=7768700, episode=1295 reward=0.791112 (505.61 it/sec) -training >> step=7768800, episode=1295 reward=0.8030358 (480.08 it/sec) -training >> step=7768900, episode=1295 reward=0.7710457 (463.21 it/sec) -training >> step=7769000, episode=1295 reward=0.7869002 (441.59 it/sec) -training >> step=7769100, episode=1295 reward=0.7994212 (367.07 it/sec) -training >> step=7769200, episode=1295 reward=0.7882081 (479.10 it/sec) -training >> step=7769300, episode=1296 reward=0.7880636 (120.74 it/sec) -training >> step=7769400, episode=1296 reward=0.7856709 (484.76 it/sec) -training >> step=7769500, episode=1296 reward=0.7899054 (513.45 it/sec) -training >> step=7769600, episode=1296 reward=0.800266 (470.83 it/sec) -training >> step=7769700, episode=1296 reward=0.7712808 (487.05 it/sec) -training >> step=7769800, episode=1296 reward=0.7936854 (488.21 it/sec) -training >> step=7769900, episode=1296 reward=0.7952645 (457.49 it/sec) -training >> step=7770000, episode=1296 reward=0.7939653 (475.96 it/sec) -training >> step=7770100, episode=1296 reward=0.7920731 (501.75 it/sec) -training >> step=7770200, episode=1296 reward=0.8020235 (503.28 it/sec) -training >> step=7770300, episode=1296 reward=0.7740596 (510.08 it/sec) -training >> step=7770400, episode=1296 reward=0.8123874 (472.10 it/sec) -training >> step=7770500, episode=1296 reward=0.7809561 (506.41 it/sec) -training >> step=7770600, episode=1296 reward=0.7850789 (468.06 it/sec) -training >> step=7770700, episode=1296 reward=0.797738 (514.61 it/sec) -training >> step=7770800, episode=1296 reward=0.8036991 (499.92 it/sec) -training >> step=7770900, episode=1296 reward=0.7927805 (521.47 it/sec) -training >> step=7771000, episode=1296 reward=0.7989884 (491.31 it/sec) -training >> step=7771100, episode=1296 reward=0.7651284 (460.05 it/sec) -training >> step=7771200, episode=1296 reward=0.8031803 (494.90 it/sec) -training >> step=7771300, episode=1296 reward=0.7758065 (529.17 it/sec) -training >> step=7771400, episode=1296 reward=0.7894442 (504.70 it/sec) -training >> step=7771500, episode=1296 reward=0.812771 (496.36 it/sec) -training >> step=7771600, episode=1296 reward=0.805823 (480.31 it/sec) -training >> step=7771700, episode=1296 reward=0.7738034 (489.15 it/sec) -training >> step=7771800, episode=1296 reward=0.7726543 (499.47 it/sec) -training >> step=7771900, episode=1296 reward=0.7804337 (501.02 it/sec) -training >> step=7772000, episode=1296 reward=0.7895728 (537.87 it/sec) -training >> step=7772100, episode=1296 reward=0.7955028 (423.24 it/sec) -training >> step=7772200, episode=1296 reward=0.7881789 (470.32 it/sec) -training >> step=7772300, episode=1296 reward=0.7712425 (515.89 it/sec) -training >> step=7772400, episode=1296 reward=0.7824463 (513.38 it/sec) -training >> step=7772500, episode=1296 reward=0.7824302 (459.49 it/sec) -training >> step=7772600, episode=1296 reward=0.7887556 (490.03 it/sec) -training >> step=7772700, episode=1296 reward=0.7882669 (538.39 it/sec) -training >> step=7772800, episode=1296 reward=0.7688571 (458.94 it/sec) -training >> step=7772900, episode=1296 reward=0.8004088 (494.64 it/sec) -training >> step=7773000, episode=1296 reward=0.7862223 (496.15 it/sec) -training >> step=7773100, episode=1296 reward=0.7894241 (521.83 it/sec) -training >> step=7773200, episode=1296 reward=0.804845 (492.29 it/sec) -training >> step=7773300, episode=1296 reward=0.7898769 (487.52 it/sec) -training >> step=7773400, episode=1296 reward=0.8023834 (464.48 it/sec) -training >> step=7773500, episode=1296 reward=0.7796204 (515.20 it/sec) -training >> step=7773600, episode=1296 reward=0.7939547 (514.66 it/sec) -training >> step=7773700, episode=1296 reward=0.7883729 (467.25 it/sec) -training >> step=7773800, episode=1296 reward=0.7815675 (467.17 it/sec) -training >> step=7773900, episode=1296 reward=0.7860214 (472.69 it/sec) -training >> step=7774000, episode=1296 reward=0.7809968 (521.56 it/sec) -training >> step=7774100, episode=1296 reward=0.8021392 (482.57 it/sec) -training >> step=7774200, episode=1296 reward=0.7971089 (523.43 it/sec) -training >> step=7774300, episode=1296 reward=0.7952514 (507.64 it/sec) -training >> step=7774400, episode=1296 reward=0.7741867 (477.71 it/sec) -training >> step=7774500, episode=1296 reward=0.7903323 (482.67 it/sec) -training >> step=7774600, episode=1296 reward=0.7782479 (520.73 it/sec) -training >> step=7774700, episode=1296 reward=0.7775809 (490.83 it/sec) -training >> step=7774800, episode=1296 reward=0.7938364 (502.12 it/sec) -training >> step=7774900, episode=1296 reward=0.7641314 (507.30 it/sec) -training >> step=7775000, episode=1296 reward=0.7970795 (522.33 it/sec) -training >> step=7775100, episode=1296 reward=0.7923502 (491.73 it/sec) -training >> step=7775200, episode=1296 reward=0.8016915 (337.44 it/sec) -training >> step=7775300, episode=1297 reward=0.8009837 (147.04 it/sec) -training >> step=7775400, episode=1297 reward=0.7846627 (495.43 it/sec) -training >> step=7775500, episode=1297 reward=0.7684833 (476.58 it/sec) -training >> step=7775600, episode=1297 reward=0.7840304 (471.66 it/sec) -training >> step=7775700, episode=1297 reward=0.7874682 (503.37 it/sec) -training >> step=7775800, episode=1297 reward=0.7646714 (499.29 it/sec) -training >> step=7775900, episode=1297 reward=0.7823449 (498.23 it/sec) -training >> step=7776000, episode=1297 reward=0.8029669 (471.47 it/sec) -training >> step=7776100, episode=1297 reward=0.7891526 (510.75 it/sec) -training >> step=7776200, episode=1297 reward=0.7778638 (503.95 it/sec) -training >> step=7776300, episode=1297 reward=0.7764665 (508.36 it/sec) -training >> step=7776400, episode=1297 reward=0.7612875 (521.30 it/sec) -training >> step=7776500, episode=1297 reward=0.7717946 (493.19 it/sec) -training >> step=7776600, episode=1297 reward=0.7888838 (514.89 it/sec) -training >> step=7776700, episode=1297 reward=0.7901028 (513.61 it/sec) -training >> step=7776800, episode=1297 reward=0.7736771 (519.51 it/sec) -training >> step=7776900, episode=1297 reward=0.7818052 (496.24 it/sec) -training >> step=7777000, episode=1297 reward=0.7967494 (535.47 it/sec) -training >> step=7777100, episode=1297 reward=0.8053898 (482.05 it/sec) -training >> step=7777200, episode=1297 reward=0.8009469 (510.47 it/sec) -training >> step=7777300, episode=1297 reward=0.7918162 (489.30 it/sec) -training >> step=7777400, episode=1297 reward=0.8024347 (523.44 it/sec) -training >> step=7777500, episode=1297 reward=0.7843614 (511.13 it/sec) -training >> step=7777600, episode=1297 reward=0.787291 (529.55 it/sec) -training >> step=7777700, episode=1297 reward=0.8066973 (499.31 it/sec) -training >> step=7777800, episode=1297 reward=0.7966075 (546.57 it/sec) -training >> step=7777900, episode=1297 reward=0.8012542 (430.65 it/sec) -training >> step=7778000, episode=1297 reward=0.794687 (504.68 it/sec) -training >> step=7778100, episode=1297 reward=0.8044062 (524.79 it/sec) -training >> step=7778200, episode=1297 reward=0.7865076 (458.39 it/sec) -training >> step=7778300, episode=1297 reward=0.7855414 (482.94 it/sec) -training >> step=7778400, episode=1297 reward=0.7902347 (504.20 it/sec) -training >> step=7778500, episode=1297 reward=0.7726434 (492.51 it/sec) -training >> step=7778600, episode=1297 reward=0.7893282 (508.73 it/sec) -training >> step=7778700, episode=1297 reward=0.7886405 (462.71 it/sec) -training >> step=7778800, episode=1297 reward=0.7934614 (494.23 it/sec) -training >> step=7778900, episode=1297 reward=0.7837915 (519.07 it/sec) -training >> step=7779000, episode=1297 reward=0.7799873 (531.58 it/sec) -training >> step=7779100, episode=1297 reward=0.7781459 (471.58 it/sec) -training >> step=7779200, episode=1297 reward=0.7947176 (476.88 it/sec) -training >> step=7779300, episode=1297 reward=0.7983614 (431.51 it/sec) -training >> step=7779400, episode=1297 reward=0.790727 (494.79 it/sec) -training >> step=7779500, episode=1297 reward=0.7972113 (484.51 it/sec) -training >> step=7779600, episode=1297 reward=0.7963282 (480.14 it/sec) -training >> step=7779700, episode=1297 reward=0.7779929 (484.77 it/sec) -training >> step=7779800, episode=1297 reward=0.7982413 (506.64 it/sec) -training >> step=7779900, episode=1297 reward=0.7759087 (488.77 it/sec) -training >> step=7780000, episode=1297 reward=0.78367 (517.61 it/sec) -training >> step=7780100, episode=1297 reward=0.7809944 (470.32 it/sec) -training >> step=7780200, episode=1297 reward=0.7822143 (488.97 it/sec) -training >> step=7780300, episode=1297 reward=0.7921322 (507.80 it/sec) -training >> step=7780400, episode=1297 reward=0.7700526 (485.54 it/sec) -training >> step=7780500, episode=1297 reward=0.7854099 (491.48 it/sec) -training >> step=7780600, episode=1297 reward=0.802574 (488.44 it/sec) -training >> step=7780700, episode=1297 reward=0.779881 (489.92 it/sec) -training >> step=7780800, episode=1297 reward=0.8037247 (489.18 it/sec) -training >> step=7780900, episode=1297 reward=0.7959511 (510.43 it/sec) -training >> step=7781000, episode=1297 reward=0.7784704 (505.43 it/sec) -training >> step=7781100, episode=1297 reward=0.7672228 (535.74 it/sec) -training >> step=7781200, episode=1297 reward=0.7801389 (511.34 it/sec) -training >> step=7781300, episode=1298 reward=0.7940549 (123.11 it/sec) -training >> step=7781400, episode=1298 reward=0.7945396 (496.73 it/sec) -training >> step=7781500, episode=1298 reward=0.780327 (495.51 it/sec) -training >> step=7781600, episode=1298 reward=0.7855022 (476.72 it/sec) -training >> step=7781700, episode=1298 reward=0.7916859 (505.21 it/sec) -training >> step=7781800, episode=1298 reward=0.7815973 (490.62 it/sec) -training >> step=7781900, episode=1298 reward=0.7690642 (481.08 it/sec) -training >> step=7782000, episode=1298 reward=0.7747336 (480.89 it/sec) -training >> step=7782100, episode=1298 reward=0.7874965 (511.05 it/sec) -training >> step=7782200, episode=1298 reward=0.7922107 (492.40 it/sec) -training >> step=7782300, episode=1298 reward=0.7946974 (499.93 it/sec) -training >> step=7782400, episode=1298 reward=0.7991516 (493.36 it/sec) -training >> step=7782500, episode=1298 reward=0.7952483 (503.38 it/sec) -training >> step=7782600, episode=1298 reward=0.7689635 (507.87 it/sec) -training >> step=7782700, episode=1298 reward=0.7949034 (448.76 it/sec) -training >> step=7782800, episode=1298 reward=0.7716725 (479.14 it/sec) -training >> step=7782900, episode=1298 reward=0.7968677 (473.60 it/sec) -training >> step=7783000, episode=1298 reward=0.7713104 (509.77 it/sec) -training >> step=7783100, episode=1298 reward=0.7871874 (490.05 it/sec) -training >> step=7783200, episode=1298 reward=0.7979628 (531.67 it/sec) -training >> step=7783300, episode=1298 reward=0.7872556 (499.83 it/sec) -training >> step=7783400, episode=1298 reward=0.7748767 (461.17 it/sec) -training >> step=7783500, episode=1298 reward=0.7742065 (489.83 it/sec) -training >> step=7783600, episode=1298 reward=0.7913852 (514.91 it/sec) -training >> step=7783700, episode=1298 reward=0.7853302 (514.18 it/sec) -training >> step=7783800, episode=1298 reward=0.8127658 (475.74 it/sec) -training >> step=7783900, episode=1298 reward=0.7942567 (471.41 it/sec) -training >> step=7784000, episode=1298 reward=0.7829857 (516.63 it/sec) -training >> step=7784100, episode=1298 reward=0.7756667 (502.44 it/sec) -training >> step=7784200, episode=1298 reward=0.7966592 (521.05 it/sec) -training >> step=7784300, episode=1298 reward=0.8081498 (500.89 it/sec) -training >> step=7784400, episode=1298 reward=0.7893476 (437.54 it/sec) -training >> step=7784500, episode=1298 reward=0.8000795 (467.98 it/sec) -training >> step=7784600, episode=1298 reward=0.7815179 (463.55 it/sec) -training >> step=7784700, episode=1298 reward=0.773481 (485.29 it/sec) -training >> step=7784800, episode=1298 reward=0.7859749 (491.48 it/sec) -training >> step=7784900, episode=1298 reward=0.7625912 (496.30 it/sec) -training >> step=7785000, episode=1298 reward=0.7926253 (461.96 it/sec) -training >> step=7785100, episode=1298 reward=0.7995399 (487.13 it/sec) -training >> step=7785200, episode=1298 reward=0.817326 (477.40 it/sec) -training >> step=7785300, episode=1298 reward=0.7906925 (466.97 it/sec) -training >> step=7785400, episode=1298 reward=0.7802109 (529.93 it/sec) -training >> step=7785500, episode=1298 reward=0.8057163 (500.10 it/sec) -training >> step=7785600, episode=1298 reward=0.7818533 (457.33 it/sec) -training >> step=7785700, episode=1298 reward=0.7831677 (522.34 it/sec) -training >> step=7785800, episode=1298 reward=0.7577358 (513.77 it/sec) -training >> step=7785900, episode=1298 reward=0.7760245 (471.80 it/sec) -training >> step=7786000, episode=1298 reward=0.7841165 (511.18 it/sec) -training >> step=7786100, episode=1298 reward=0.8027571 (491.10 it/sec) -training >> step=7786200, episode=1298 reward=0.7911572 (504.94 it/sec) -training >> step=7786300, episode=1298 reward=0.7745593 (473.43 it/sec) -training >> step=7786400, episode=1298 reward=0.8045361 (493.34 it/sec) -training >> step=7786500, episode=1298 reward=0.7849753 (498.22 it/sec) -training >> step=7786600, episode=1298 reward=0.8021808 (501.71 it/sec) -training >> step=7786700, episode=1298 reward=0.7942587 (451.19 it/sec) -training >> step=7786800, episode=1298 reward=0.8014274 (487.22 it/sec) -training >> step=7786900, episode=1298 reward=0.78478 (482.80 it/sec) -training >> step=7787000, episode=1298 reward=0.800454 (506.73 it/sec) -training >> step=7787100, episode=1298 reward=0.7930805 (516.24 it/sec) -training >> step=7787200, episode=1298 reward=0.7938636 (535.46 it/sec) -training >> step=7787300, episode=1299 reward=0.7766688 (114.28 it/sec) -training >> step=7787400, episode=1299 reward=0.7977942 (456.77 it/sec) -training >> step=7787500, episode=1299 reward=0.7932374 (443.89 it/sec) -training >> step=7787600, episode=1299 reward=0.7831962 (525.15 it/sec) -training >> step=7787700, episode=1299 reward=0.7827332 (458.23 it/sec) -training >> step=7787800, episode=1299 reward=0.8082276 (481.72 it/sec) -training >> step=7787900, episode=1299 reward=0.7834216 (508.27 it/sec) -training >> step=7788000, episode=1299 reward=0.7918488 (482.48 it/sec) -training >> step=7788100, episode=1299 reward=0.7912269 (505.59 it/sec) -training >> step=7788200, episode=1299 reward=0.7763551 (487.69 it/sec) -training >> step=7788300, episode=1299 reward=0.7609918 (520.78 it/sec) -training >> step=7788400, episode=1299 reward=0.7799391 (538.83 it/sec) -training >> step=7788500, episode=1299 reward=0.7961842 (503.41 it/sec) -training >> step=7788600, episode=1299 reward=0.7963265 (512.13 it/sec) -training >> step=7788700, episode=1299 reward=0.7801192 (477.39 it/sec) -training >> step=7788800, episode=1299 reward=0.7798005 (472.13 it/sec) -training >> step=7788900, episode=1299 reward=0.7940447 (475.17 it/sec) -training >> step=7789000, episode=1299 reward=0.7912804 (468.73 it/sec) -training >> step=7789100, episode=1299 reward=0.768958 (452.84 it/sec) -training >> step=7789200, episode=1299 reward=0.7859759 (481.33 it/sec) -training >> step=7789300, episode=1299 reward=0.7848351 (498.84 it/sec) -training >> step=7789400, episode=1299 reward=0.7861825 (530.94 it/sec) -training >> step=7789500, episode=1299 reward=0.7650089 (520.90 it/sec) -training >> step=7789600, episode=1299 reward=0.8022418 (451.33 it/sec) -training >> step=7789700, episode=1299 reward=0.7878593 (473.88 it/sec) -training >> step=7789800, episode=1299 reward=0.8057927 (458.06 it/sec) -training >> step=7789900, episode=1299 reward=0.7983248 (488.78 it/sec) -training >> step=7790000, episode=1299 reward=0.7979854 (503.61 it/sec) -training >> step=7790100, episode=1299 reward=0.7686229 (493.52 it/sec) -training >> step=7790200, episode=1299 reward=0.7866002 (514.09 it/sec) -training >> step=7790300, episode=1299 reward=0.7935465 (486.93 it/sec) -training >> step=7790400, episode=1299 reward=0.8000048 (514.03 it/sec) -training >> step=7790500, episode=1299 reward=0.806136 (496.28 it/sec) -training >> step=7790600, episode=1299 reward=0.7713883 (501.11 it/sec) -training >> step=7790700, episode=1299 reward=0.785247 (482.41 it/sec) -training >> step=7790800, episode=1299 reward=0.7984357 (493.48 it/sec) -training >> step=7790900, episode=1299 reward=0.7869104 (519.89 it/sec) -training >> step=7791000, episode=1299 reward=0.7834115 (501.26 it/sec) -training >> step=7791100, episode=1299 reward=0.7804388 (468.86 it/sec) -training >> step=7791200, episode=1299 reward=0.7767595 (517.30 it/sec) -training >> step=7791300, episode=1299 reward=0.7894115 (531.23 it/sec) -training >> step=7791400, episode=1299 reward=0.8036537 (503.75 it/sec) -training >> step=7791500, episode=1299 reward=0.8092465 (504.52 it/sec) -training >> step=7791600, episode=1299 reward=0.7953727 (482.56 it/sec) -training >> step=7791700, episode=1299 reward=0.8014184 (504.69 it/sec) -training >> step=7791800, episode=1299 reward=0.8078899 (417.12 it/sec) -training >> step=7791900, episode=1299 reward=0.7962615 (516.89 it/sec) -training >> step=7792000, episode=1299 reward=0.7749967 (509.20 it/sec) -training >> step=7792100, episode=1299 reward=0.803848 (434.89 it/sec) -training >> step=7792200, episode=1299 reward=0.8043351 (456.87 it/sec) -training >> step=7792300, episode=1299 reward=0.7862213 (468.68 it/sec) -training >> step=7792400, episode=1299 reward=0.8096898 (464.70 it/sec) -training >> step=7792500, episode=1299 reward=0.7722226 (480.81 it/sec) -training >> step=7792600, episode=1299 reward=0.7901352 (497.01 it/sec) -training >> step=7792700, episode=1299 reward=0.7834522 (473.16 it/sec) -training >> step=7792800, episode=1299 reward=0.8042241 (488.09 it/sec) -training >> step=7792900, episode=1299 reward=0.7688679 (471.59 it/sec) -training >> step=7793000, episode=1299 reward=0.7870623 (460.07 it/sec) -training >> step=7793100, episode=1299 reward=0.7671527 (484.68 it/sec) -training >> step=7793200, episode=1299 reward=0.7928632 (476.44 it/sec) -training >> step=7793300, episode=1300 reward=0.7910172 (132.32 it/sec) -training >> step=7793400, episode=1300 reward=0.7562981 (479.04 it/sec) -training >> step=7793500, episode=1300 reward=0.7743456 (484.57 it/sec) -training >> step=7793600, episode=1300 reward=0.7983 (511.82 it/sec) -training >> step=7793700, episode=1300 reward=0.783882 (495.95 it/sec) -training >> step=7793800, episode=1300 reward=0.7620775 (433.57 it/sec) -training >> step=7793900, episode=1300 reward=0.7963147 (491.53 it/sec) -training >> step=7794000, episode=1300 reward=0.7923402 (499.48 it/sec) -training >> step=7794100, episode=1300 reward=0.7895119 (506.46 it/sec) -training >> step=7794200, episode=1300 reward=0.769199 (460.41 it/sec) -training >> step=7794300, episode=1300 reward=0.7511247 (511.90 it/sec) -training >> step=7794400, episode=1300 reward=0.7835618 (492.39 it/sec) -training >> step=7794500, episode=1300 reward=0.7835332 (466.19 it/sec) -training >> step=7794600, episode=1300 reward=0.772701 (469.43 it/sec) -training >> step=7794700, episode=1300 reward=0.7891901 (478.11 it/sec) -training >> step=7794800, episode=1300 reward=0.7882829 (475.09 it/sec) -training >> step=7794900, episode=1300 reward=0.777653 (495.70 it/sec) -training >> step=7795000, episode=1300 reward=0.7803093 (465.14 it/sec) -training >> step=7795100, episode=1300 reward=0.7749308 (514.10 it/sec) -training >> step=7795200, episode=1300 reward=0.7950436 (450.11 it/sec) -training >> step=7795300, episode=1300 reward=0.8166695 (523.69 it/sec) -training >> step=7795400, episode=1300 reward=0.7733601 (525.89 it/sec) -training >> step=7795500, episode=1300 reward=0.7769137 (506.65 it/sec) -training >> step=7795600, episode=1300 reward=0.7651147 (482.58 it/sec) -training >> step=7795700, episode=1300 reward=0.811762 (516.67 it/sec) -training >> step=7795800, episode=1300 reward=0.7863825 (513.78 it/sec) -training >> step=7795900, episode=1300 reward=0.7747506 (479.83 it/sec) -training >> step=7796000, episode=1300 reward=0.7681264 (494.10 it/sec) -training >> step=7796100, episode=1300 reward=0.7755396 (488.33 it/sec) -training >> step=7796200, episode=1300 reward=0.8097001 (491.47 it/sec) -training >> step=7796300, episode=1300 reward=0.7903925 (481.02 it/sec) -training >> step=7796400, episode=1300 reward=0.7860416 (498.20 it/sec) -training >> step=7796500, episode=1300 reward=0.7894399 (457.99 it/sec) -training >> step=7796600, episode=1300 reward=0.7797561 (483.00 it/sec) -training >> step=7796700, episode=1300 reward=0.7784069 (508.22 it/sec) -training >> step=7796800, episode=1300 reward=0.7964844 (501.03 it/sec) -training >> step=7796900, episode=1300 reward=0.7871011 (489.64 it/sec) -training >> step=7797000, episode=1300 reward=0.7870901 (456.44 it/sec) -training >> step=7797100, episode=1300 reward=0.7883937 (517.30 it/sec) -training >> step=7797200, episode=1300 reward=0.7755411 (485.35 it/sec) -training >> step=7797300, episode=1300 reward=0.7748536 (485.47 it/sec) -training >> step=7797400, episode=1300 reward=0.7880648 (470.16 it/sec) -training >> step=7797500, episode=1300 reward=0.7732356 (507.21 it/sec) -training >> step=7797600, episode=1300 reward=0.7808086 (522.50 it/sec) -training >> step=7797700, episode=1300 reward=0.8018735 (484.69 it/sec) -training >> step=7797800, episode=1300 reward=0.7846091 (480.35 it/sec) -training >> step=7797900, episode=1300 reward=0.7933387 (503.13 it/sec) -training >> step=7798000, episode=1300 reward=0.7759101 (493.49 it/sec) -training >> step=7798100, episode=1300 reward=0.7932055 (515.24 it/sec) -training >> step=7798200, episode=1300 reward=0.8107718 (486.53 it/sec) -training >> step=7798300, episode=1300 reward=0.8041999 (453.15 it/sec) -training >> step=7798400, episode=1300 reward=0.7916974 (490.68 it/sec) -training >> step=7798500, episode=1300 reward=0.7902575 (448.01 it/sec) -training >> step=7798600, episode=1300 reward=0.7981331 (498.09 it/sec) -training >> step=7798700, episode=1300 reward=0.7720015 (509.19 it/sec) -training >> step=7798800, episode=1300 reward=0.8116549 (495.40 it/sec) -training >> step=7798900, episode=1300 reward=0.8045064 (512.90 it/sec) -training >> step=7799000, episode=1300 reward=0.8055214 (525.74 it/sec) -training >> step=7799100, episode=1300 reward=0.7943155 (463.89 it/sec) -training >> step=7799200, episode=1300 reward=0.7845045 (511.81 it/sec) -training >> step=7799300, episode=1301 reward=0.7892335 (120.33 it/sec) -training >> step=7799400, episode=1301 reward=0.7797267 (471.92 it/sec) -training >> step=7799500, episode=1301 reward=0.8075816 (463.21 it/sec) -training >> step=7799600, episode=1301 reward=0.7682907 (470.28 it/sec) -training >> step=7799700, episode=1301 reward=0.7840571 (521.80 it/sec) -training >> step=7799800, episode=1301 reward=0.7894571 (458.06 it/sec) -training >> step=7799900, episode=1301 reward=0.7899806 (501.15 it/sec) -training >> step=7800000, episode=1301 reward=0.7895049 (470.35 it/sec) -training >> step=7800100, episode=1301 reward=0.7828007 (496.69 it/sec) -training >> step=7800200, episode=1301 reward=0.7731372 (499.16 it/sec) -training >> step=7800300, episode=1301 reward=0.7773474 (441.84 it/sec) -training >> step=7800400, episode=1301 reward=0.7979947 (514.43 it/sec) -training >> step=7800500, episode=1301 reward=0.7853793 (474.95 it/sec) -training >> step=7800600, episode=1301 reward=0.7912232 (506.59 it/sec) -training >> step=7800700, episode=1301 reward=0.7995259 (519.29 it/sec) -training >> step=7800800, episode=1301 reward=0.7884702 (507.26 it/sec) -training >> step=7800900, episode=1301 reward=0.7868202 (453.76 it/sec) -training >> step=7801000, episode=1301 reward=0.7892942 (439.56 it/sec) -training >> step=7801100, episode=1301 reward=0.7996572 (468.52 it/sec) -training >> step=7801200, episode=1301 reward=0.7832166 (507.45 it/sec) -training >> step=7801300, episode=1301 reward=0.7896743 (469.32 it/sec) -training >> step=7801400, episode=1301 reward=0.784964 (512.06 it/sec) -training >> step=7801500, episode=1301 reward=0.7906364 (502.33 it/sec) -training >> step=7801600, episode=1301 reward=0.777202 (475.70 it/sec) -training >> step=7801700, episode=1301 reward=0.7852653 (496.57 it/sec) -training >> step=7801800, episode=1301 reward=0.7853085 (534.41 it/sec) -training >> step=7801900, episode=1301 reward=0.7839004 (513.54 it/sec) -training >> step=7802000, episode=1301 reward=0.7890769 (467.84 it/sec) -training >> step=7802100, episode=1301 reward=0.7963953 (524.27 it/sec) -training >> step=7802200, episode=1301 reward=0.7897282 (525.42 it/sec) -training >> step=7802300, episode=1301 reward=0.7705802 (447.84 it/sec) -training >> step=7802400, episode=1301 reward=0.7846078 (492.53 it/sec) -training >> step=7802500, episode=1301 reward=0.7809693 (505.95 it/sec) -training >> step=7802600, episode=1301 reward=0.7808372 (515.11 it/sec) -training >> step=7802700, episode=1301 reward=0.8018717 (501.09 it/sec) -training >> step=7802800, episode=1301 reward=0.7783381 (497.90 it/sec) -training >> step=7802900, episode=1301 reward=0.7863357 (485.83 it/sec) -training >> step=7803000, episode=1301 reward=0.7889627 (483.77 it/sec) -training >> step=7803100, episode=1301 reward=0.7896348 (433.28 it/sec) -training >> step=7803200, episode=1301 reward=0.7697953 (497.63 it/sec) -training >> step=7803300, episode=1301 reward=0.7844251 (522.58 it/sec) -training >> step=7803400, episode=1301 reward=0.8036073 (517.13 it/sec) -training >> step=7803500, episode=1301 reward=0.7870573 (490.37 it/sec) -training >> step=7803600, episode=1301 reward=0.7903621 (529.69 it/sec) -training >> step=7803700, episode=1301 reward=0.773554 (486.56 it/sec) -training >> step=7803800, episode=1301 reward=0.8061625 (478.44 it/sec) -training >> step=7803900, episode=1301 reward=0.7854716 (501.86 it/sec) -training >> step=7804000, episode=1301 reward=0.8223077 (495.40 it/sec) -training >> step=7804100, episode=1301 reward=0.7890855 (505.95 it/sec) -training >> step=7804200, episode=1301 reward=0.789395 (502.85 it/sec) -training >> step=7804300, episode=1301 reward=0.7938982 (521.75 it/sec) -training >> step=7804400, episode=1301 reward=0.7899798 (516.20 it/sec) -training >> step=7804500, episode=1301 reward=0.7792348 (450.94 it/sec) -training >> step=7804600, episode=1301 reward=0.7859326 (464.31 it/sec) -training >> step=7804700, episode=1301 reward=0.7894342 (540.43 it/sec) -training >> step=7804800, episode=1301 reward=0.8008595 (515.06 it/sec) -training >> step=7804900, episode=1301 reward=0.784741 (497.29 it/sec) -training >> step=7805000, episode=1301 reward=0.7814444 (518.04 it/sec) -training >> step=7805100, episode=1301 reward=0.7979738 (479.55 it/sec) -training >> step=7805200, episode=1301 reward=0.8057222 (485.12 it/sec) -training >> step=7805300, episode=1302 reward=0.8020277 (119.95 it/sec) -training >> step=7805400, episode=1302 reward=0.7874951 (496.70 it/sec) -training >> step=7805500, episode=1302 reward=0.7831902 (467.96 it/sec) -training >> step=7805600, episode=1302 reward=0.7671354 (526.70 it/sec) -training >> step=7805700, episode=1302 reward=0.7821147 (468.07 it/sec) -training >> step=7805800, episode=1302 reward=0.769536 (502.29 it/sec) -training >> step=7805900, episode=1302 reward=0.7635487 (514.25 it/sec) -training >> step=7806000, episode=1302 reward=0.7837554 (496.60 it/sec) -training >> step=7806100, episode=1302 reward=0.7822407 (508.31 it/sec) -training >> step=7806200, episode=1302 reward=0.7871528 (512.57 it/sec) -training >> step=7806300, episode=1302 reward=0.8002539 (451.65 it/sec) -training >> step=7806400, episode=1302 reward=0.7948597 (515.81 it/sec) -training >> step=7806500, episode=1302 reward=0.7846814 (501.43 it/sec) -training >> step=7806600, episode=1302 reward=0.7676321 (509.67 it/sec) -training >> step=7806700, episode=1302 reward=0.7977595 (514.64 it/sec) -training >> step=7806800, episode=1302 reward=0.7723438 (507.66 it/sec) -training >> step=7806900, episode=1302 reward=0.8024636 (496.87 it/sec) -training >> step=7807000, episode=1302 reward=0.7933759 (514.67 it/sec) -training >> step=7807100, episode=1302 reward=0.7882 (504.65 it/sec) -training >> step=7807200, episode=1302 reward=0.7964156 (471.61 it/sec) -training >> step=7807300, episode=1302 reward=0.801554 (500.58 it/sec) -training >> step=7807400, episode=1302 reward=0.79116 (441.37 it/sec) -training >> step=7807500, episode=1302 reward=0.7891088 (506.04 it/sec) -training >> step=7807600, episode=1302 reward=0.7926862 (539.65 it/sec) -training >> step=7807700, episode=1302 reward=0.8054261 (498.78 it/sec) -training >> step=7807800, episode=1302 reward=0.7975102 (448.96 it/sec) -training >> step=7807900, episode=1302 reward=0.794689 (527.05 it/sec) -training >> step=7808000, episode=1302 reward=0.7863523 (475.45 it/sec) -training >> step=7808100, episode=1302 reward=0.7972667 (438.32 it/sec) -training >> step=7808200, episode=1302 reward=0.7858462 (501.89 it/sec) -training >> step=7808300, episode=1302 reward=0.7769344 (480.29 it/sec) -training >> step=7808400, episode=1302 reward=0.8035775 (495.40 it/sec) -training >> step=7808500, episode=1302 reward=0.7858701 (486.33 it/sec) -training >> step=7808600, episode=1302 reward=0.7913294 (435.63 it/sec) -training >> step=7808700, episode=1302 reward=0.7773438 (528.79 it/sec) -training >> step=7808800, episode=1302 reward=0.7799446 (451.43 it/sec) -training >> step=7808900, episode=1302 reward=0.7821659 (483.74 it/sec) -training >> step=7809000, episode=1302 reward=0.7857012 (498.01 it/sec) -training >> step=7809100, episode=1302 reward=0.803171 (489.84 it/sec) -training >> step=7809200, episode=1302 reward=0.794514 (469.25 it/sec) -training >> step=7809300, episode=1302 reward=0.7972087 (481.31 it/sec) -training >> step=7809400, episode=1302 reward=0.8039396 (507.86 it/sec) -training >> step=7809500, episode=1302 reward=0.7802151 (501.07 it/sec) -training >> step=7809600, episode=1302 reward=0.8026134 (448.15 it/sec) -training >> step=7809700, episode=1302 reward=0.8074625 (520.21 it/sec) -training >> step=7809800, episode=1302 reward=0.7828062 (482.50 it/sec) -training >> step=7809900, episode=1302 reward=0.7992388 (475.49 it/sec) -training >> step=7810000, episode=1302 reward=0.8066667 (506.35 it/sec) -training >> step=7810100, episode=1302 reward=0.7852141 (528.55 it/sec) -training >> step=7810200, episode=1302 reward=0.8009625 (492.32 it/sec) -training >> step=7810300, episode=1302 reward=0.7849212 (483.52 it/sec) -training >> step=7810400, episode=1302 reward=0.7814937 (528.90 it/sec) -training >> step=7810500, episode=1302 reward=0.7813262 (499.59 it/sec) -training >> step=7810600, episode=1302 reward=0.7970785 (502.92 it/sec) -training >> step=7810700, episode=1302 reward=0.7664849 (510.42 it/sec) -training >> step=7810800, episode=1302 reward=0.7794708 (529.39 it/sec) -training >> step=7810900, episode=1302 reward=0.7976691 (512.23 it/sec) -training >> step=7811000, episode=1302 reward=0.7812639 (455.50 it/sec) -training >> step=7811100, episode=1302 reward=0.7975328 (495.52 it/sec) -training >> step=7811200, episode=1302 reward=0.7884204 (513.27 it/sec) -training >> step=7811300, episode=1303 reward=0.7844537 (117.77 it/sec) -training >> step=7811400, episode=1303 reward=0.7798807 (488.96 it/sec) -training >> step=7811500, episode=1303 reward=0.7916736 (496.87 it/sec) -training >> step=7811600, episode=1303 reward=0.784508 (474.97 it/sec) -training >> step=7811700, episode=1303 reward=0.7981902 (490.29 it/sec) -training >> step=7811800, episode=1303 reward=0.7905113 (463.55 it/sec) -training >> step=7811900, episode=1303 reward=0.7722587 (506.44 it/sec) -training >> step=7812000, episode=1303 reward=0.7767685 (521.45 it/sec) -training >> step=7812100, episode=1303 reward=0.7918218 (443.48 it/sec) -training >> step=7812200, episode=1303 reward=0.7739033 (512.77 it/sec) -training >> step=7812300, episode=1303 reward=0.7792839 (496.38 it/sec) -training >> step=7812400, episode=1303 reward=0.8004559 (490.78 it/sec) -training >> step=7812500, episode=1303 reward=0.8055385 (490.92 it/sec) -training >> step=7812600, episode=1303 reward=0.7836849 (486.39 it/sec) -training >> step=7812700, episode=1303 reward=0.8013601 (480.56 it/sec) -training >> step=7812800, episode=1303 reward=0.7951888 (474.74 it/sec) -training >> step=7812900, episode=1303 reward=0.7851564 (502.45 it/sec) -training >> step=7813000, episode=1303 reward=0.7643892 (520.34 it/sec) -training >> step=7813100, episode=1303 reward=0.7618669 (510.75 it/sec) -training >> step=7813200, episode=1303 reward=0.7745057 (480.64 it/sec) -training >> step=7813300, episode=1303 reward=0.7746875 (473.74 it/sec) -training >> step=7813400, episode=1303 reward=0.788492 (531.20 it/sec) -training >> step=7813500, episode=1303 reward=0.7867648 (468.93 it/sec) -training >> step=7813600, episode=1303 reward=0.7875488 (487.60 it/sec) -training >> step=7813700, episode=1303 reward=0.8030198 (521.47 it/sec) -training >> step=7813800, episode=1303 reward=0.8033226 (513.60 it/sec) -training >> step=7813900, episode=1303 reward=0.7947262 (496.81 it/sec) -training >> step=7814000, episode=1303 reward=0.7974698 (504.55 it/sec) -training >> step=7814100, episode=1303 reward=0.798575 (520.05 it/sec) -training >> step=7814200, episode=1303 reward=0.8182662 (471.40 it/sec) -training >> step=7814300, episode=1303 reward=0.7874646 (474.39 it/sec) -training >> step=7814400, episode=1303 reward=0.7972888 (463.47 it/sec) -training >> step=7814500, episode=1303 reward=0.7863051 (492.61 it/sec) -training >> step=7814600, episode=1303 reward=0.8076386 (511.00 it/sec) -training >> step=7814700, episode=1303 reward=0.7582176 (498.08 it/sec) -training >> step=7814800, episode=1303 reward=0.7883299 (537.46 it/sec) -training >> step=7814900, episode=1303 reward=0.8038777 (508.51 it/sec) -training >> step=7815000, episode=1303 reward=0.788623 (484.70 it/sec) -training >> step=7815100, episode=1303 reward=0.8002596 (478.60 it/sec) -training >> step=7815200, episode=1303 reward=0.7814937 (517.66 it/sec) -training >> step=7815300, episode=1303 reward=0.7953819 (514.30 it/sec) -training >> step=7815400, episode=1303 reward=0.7859609 (473.34 it/sec) -training >> step=7815500, episode=1303 reward=0.7899209 (512.22 it/sec) -training >> step=7815600, episode=1303 reward=0.773216 (451.31 it/sec) -training >> step=7815700, episode=1303 reward=0.7940018 (514.77 it/sec) -training >> step=7815800, episode=1303 reward=0.7762839 (452.86 it/sec) -training >> step=7815900, episode=1303 reward=0.8206514 (509.02 it/sec) -training >> step=7816000, episode=1303 reward=0.7922134 (497.23 it/sec) -training >> step=7816100, episode=1303 reward=0.8155404 (450.65 it/sec) -training >> step=7816200, episode=1303 reward=0.7920738 (475.90 it/sec) -training >> step=7816300, episode=1303 reward=0.7857506 (509.53 it/sec) -training >> step=7816400, episode=1303 reward=0.7926553 (494.66 it/sec) -training >> step=7816500, episode=1303 reward=0.8026946 (495.63 it/sec) -training >> step=7816600, episode=1303 reward=0.777563 (511.82 it/sec) -training >> step=7816700, episode=1303 reward=0.7995313 (502.73 it/sec) -training >> step=7816800, episode=1303 reward=0.7949632 (490.74 it/sec) -training >> step=7816900, episode=1303 reward=0.7865095 (516.72 it/sec) -training >> step=7817000, episode=1303 reward=0.7730685 (514.15 it/sec) -training >> step=7817100, episode=1303 reward=0.7972577 (487.57 it/sec) -training >> step=7817200, episode=1303 reward=0.7842484 (479.13 it/sec) -training >> step=7817300, episode=1304 reward=0.7904909 (134.72 it/sec) -training >> step=7817400, episode=1304 reward=0.8017018 (507.47 it/sec) -training >> step=7817500, episode=1304 reward=0.7744762 (507.61 it/sec) -training >> step=7817600, episode=1304 reward=0.7590752 (512.90 it/sec) -training >> step=7817700, episode=1304 reward=0.7955464 (543.38 it/sec) -training >> step=7817800, episode=1304 reward=0.7775738 (456.10 it/sec) -training >> step=7817900, episode=1304 reward=0.778916 (486.10 it/sec) -training >> step=7818000, episode=1304 reward=0.8080384 (527.04 it/sec) -training >> step=7818100, episode=1304 reward=0.7867015 (468.35 it/sec) -training >> step=7818200, episode=1304 reward=0.7882882 (502.92 it/sec) -training >> step=7818300, episode=1304 reward=0.7824118 (496.07 it/sec) -training >> step=7818400, episode=1304 reward=0.7879953 (526.97 it/sec) -training >> step=7818500, episode=1304 reward=0.801063 (478.29 it/sec) -training >> step=7818600, episode=1304 reward=0.7854364 (473.54 it/sec) -training >> step=7818700, episode=1304 reward=0.7712533 (526.18 it/sec) -training >> step=7818800, episode=1304 reward=0.7739983 (468.46 it/sec) -training >> step=7818900, episode=1304 reward=0.782933 (503.96 it/sec) -training >> step=7819000, episode=1304 reward=0.7673315 (488.69 it/sec) -training >> step=7819100, episode=1304 reward=0.7965415 (539.32 it/sec) -training >> step=7819200, episode=1304 reward=0.8122334 (518.57 it/sec) -training >> step=7819300, episode=1304 reward=0.7856741 (460.56 it/sec) -training >> step=7819400, episode=1304 reward=0.8037163 (498.61 it/sec) -training >> step=7819500, episode=1304 reward=0.7925723 (500.69 it/sec) -training >> step=7819600, episode=1304 reward=0.7877201 (459.98 it/sec) -training >> step=7819700, episode=1304 reward=0.7732824 (522.82 it/sec) -training >> step=7819800, episode=1304 reward=0.8087386 (419.66 it/sec) -training >> step=7819900, episode=1304 reward=0.8051761 (467.20 it/sec) -training >> step=7820000, episode=1304 reward=0.8017347 (531.29 it/sec) -training >> step=7820100, episode=1304 reward=0.7937028 (495.15 it/sec) -training >> step=7820200, episode=1304 reward=0.7768186 (535.97 it/sec) -training >> step=7820300, episode=1304 reward=0.7879633 (510.79 it/sec) -training >> step=7820400, episode=1304 reward=0.7799452 (485.78 it/sec) -training >> step=7820500, episode=1304 reward=0.7971737 (526.91 it/sec) -training >> step=7820600, episode=1304 reward=0.8115284 (454.94 it/sec) -training >> step=7820700, episode=1304 reward=0.7954906 (505.22 it/sec) -training >> step=7820800, episode=1304 reward=0.7943611 (503.61 it/sec) -training >> step=7820900, episode=1304 reward=0.7777956 (479.32 it/sec) -training >> step=7821000, episode=1304 reward=0.755769 (510.07 it/sec) -training >> step=7821100, episode=1304 reward=0.7818908 (472.46 it/sec) -training >> step=7821200, episode=1304 reward=0.8001664 (470.93 it/sec) -training >> step=7821300, episode=1304 reward=0.7709544 (477.56 it/sec) -training >> step=7821400, episode=1304 reward=0.7996654 (495.38 it/sec) -training >> step=7821500, episode=1304 reward=0.793467 (485.26 it/sec) -training >> step=7821600, episode=1304 reward=0.7883735 (506.93 it/sec) -training >> step=7821700, episode=1304 reward=0.7948057 (475.98 it/sec) -training >> step=7821800, episode=1304 reward=0.8058976 (532.81 it/sec) -training >> step=7821900, episode=1304 reward=0.7791492 (506.05 it/sec) -training >> step=7822000, episode=1304 reward=0.778226 (515.90 it/sec) -training >> step=7822100, episode=1304 reward=0.7912714 (511.13 it/sec) -training >> step=7822200, episode=1304 reward=0.8012042 (485.13 it/sec) -training >> step=7822300, episode=1304 reward=0.7923925 (531.70 it/sec) -training >> step=7822400, episode=1304 reward=0.7754524 (503.30 it/sec) -training >> step=7822500, episode=1304 reward=0.8015002 (486.36 it/sec) -training >> step=7822600, episode=1304 reward=0.8075029 (468.96 it/sec) -training >> step=7822700, episode=1304 reward=0.7965298 (532.74 it/sec) -training >> step=7822800, episode=1304 reward=0.7944431 (495.85 it/sec) -training >> step=7822900, episode=1304 reward=0.7665589 (515.05 it/sec) -training >> step=7823000, episode=1304 reward=0.7894282 (489.48 it/sec) -training >> step=7823100, episode=1304 reward=0.7754357 (539.64 it/sec) -training >> step=7823200, episode=1304 reward=0.7826744 (511.98 it/sec) -training >> step=7823300, episode=1305 reward=0.804421 (122.57 it/sec) -training >> step=7823400, episode=1305 reward=0.7643837 (520.33 it/sec) -training >> step=7823500, episode=1305 reward=0.794022 (501.00 it/sec) -training >> step=7823600, episode=1305 reward=0.7743732 (512.24 it/sec) -training >> step=7823700, episode=1305 reward=0.7813392 (484.70 it/sec) -training >> step=7823800, episode=1305 reward=0.7907291 (479.09 it/sec) -training >> step=7823900, episode=1305 reward=0.7895136 (543.92 it/sec) -training >> step=7824000, episode=1305 reward=0.7962646 (494.57 it/sec) -training >> step=7824100, episode=1305 reward=0.7715942 (519.06 it/sec) -training >> step=7824200, episode=1305 reward=0.7943689 (529.11 it/sec) -training >> step=7824300, episode=1305 reward=0.797448 (502.31 it/sec) -training >> step=7824400, episode=1305 reward=0.7949103 (510.73 it/sec) -training >> step=7824500, episode=1305 reward=0.7777601 (532.78 it/sec) -training >> step=7824600, episode=1305 reward=0.7726242 (487.07 it/sec) -training >> step=7824700, episode=1305 reward=0.7941641 (464.65 it/sec) -training >> step=7824800, episode=1305 reward=0.7799841 (470.65 it/sec) -training >> step=7824900, episode=1305 reward=0.8070902 (492.36 it/sec) -training >> step=7825000, episode=1305 reward=0.7901042 (512.14 it/sec) -training >> step=7825100, episode=1305 reward=0.7748586 (479.74 it/sec) -training >> step=7825200, episode=1305 reward=0.7977773 (503.58 it/sec) -training >> step=7825300, episode=1305 reward=0.7974716 (473.91 it/sec) -training >> step=7825400, episode=1305 reward=0.7878511 (492.69 it/sec) -training >> step=7825500, episode=1305 reward=0.788905 (485.02 it/sec) -training >> step=7825600, episode=1305 reward=0.773744 (519.42 it/sec) -training >> step=7825700, episode=1305 reward=0.7936307 (515.16 it/sec) -training >> step=7825800, episode=1305 reward=0.7994688 (442.49 it/sec) -training >> step=7825900, episode=1305 reward=0.774176 (477.43 it/sec) -training >> step=7826000, episode=1305 reward=0.7841159 (496.95 it/sec) -training >> step=7826100, episode=1305 reward=0.7926503 (478.01 it/sec) -training >> step=7826200, episode=1305 reward=0.8013201 (465.48 it/sec) -training >> step=7826300, episode=1305 reward=0.7732912 (495.94 it/sec) -training >> step=7826400, episode=1305 reward=0.7635903 (489.23 it/sec) -training >> step=7826500, episode=1305 reward=0.7889348 (470.01 it/sec) -training >> step=7826600, episode=1305 reward=0.7816969 (492.83 it/sec) -training >> step=7826700, episode=1305 reward=0.7914893 (484.15 it/sec) -training >> step=7826800, episode=1305 reward=0.7947327 (477.28 it/sec) -training >> step=7826900, episode=1305 reward=0.7850021 (515.43 it/sec) -training >> step=7827000, episode=1305 reward=0.7833345 (513.56 it/sec) -training >> step=7827100, episode=1305 reward=0.7848353 (501.51 it/sec) -training >> step=7827200, episode=1305 reward=0.7995823 (512.97 it/sec) -training >> step=7827300, episode=1305 reward=0.7953187 (473.89 it/sec) -training >> step=7827400, episode=1305 reward=0.8029396 (538.44 it/sec) -training >> step=7827500, episode=1305 reward=0.7789644 (472.55 it/sec) -training >> step=7827600, episode=1305 reward=0.7860852 (441.64 it/sec) -training >> step=7827700, episode=1305 reward=0.8194584 (482.69 it/sec) -training >> step=7827800, episode=1305 reward=0.7954575 (496.11 it/sec) -training >> step=7827900, episode=1305 reward=0.7784584 (486.49 it/sec) -training >> step=7828000, episode=1305 reward=0.7869185 (450.26 it/sec) -training >> step=7828100, episode=1305 reward=0.7791782 (528.31 it/sec) -training >> step=7828200, episode=1305 reward=0.7793471 (514.46 it/sec) -training >> step=7828300, episode=1305 reward=0.7866862 (424.02 it/sec) -training >> step=7828400, episode=1305 reward=0.7945988 (523.53 it/sec) -training >> step=7828500, episode=1305 reward=0.8021306 (524.05 it/sec) -training >> step=7828600, episode=1305 reward=0.793864 (478.08 it/sec) -training >> step=7828700, episode=1305 reward=0.7916927 (538.76 it/sec) -training >> step=7828800, episode=1305 reward=0.7943252 (517.05 it/sec) -training >> step=7828900, episode=1305 reward=0.7941914 (434.87 it/sec) -training >> step=7829000, episode=1305 reward=0.779575 (527.21 it/sec) -training >> step=7829100, episode=1305 reward=0.7781069 (512.42 it/sec) -training >> step=7829200, episode=1305 reward=0.7841085 (498.81 it/sec) -training >> step=7829300, episode=1306 reward=0.764649 (145.89 it/sec) -training >> step=7829400, episode=1306 reward=0.7765086 (493.07 it/sec) -training >> step=7829500, episode=1306 reward=0.789012 (483.87 it/sec) -training >> step=7829600, episode=1306 reward=0.7890486 (468.56 it/sec) -training >> step=7829700, episode=1306 reward=0.7791041 (503.29 it/sec) -training >> step=7829800, episode=1306 reward=0.801187 (484.77 it/sec) -training >> step=7829900, episode=1306 reward=0.7854721 (476.52 it/sec) -training >> step=7830000, episode=1306 reward=0.7970678 (472.50 it/sec) -training >> step=7830100, episode=1306 reward=0.7800154 (496.04 it/sec) -training >> step=7830200, episode=1306 reward=0.7705423 (520.58 it/sec) -training >> step=7830300, episode=1306 reward=0.7885928 (511.45 it/sec) -training >> step=7830400, episode=1306 reward=0.7900562 (475.18 it/sec) -training >> step=7830500, episode=1306 reward=0.7891373 (478.24 it/sec) -training >> step=7830600, episode=1306 reward=0.786504 (495.06 it/sec) -training >> step=7830700, episode=1306 reward=0.78719 (478.97 it/sec) -training >> step=7830800, episode=1306 reward=0.7831684 (445.82 it/sec) -training >> step=7830900, episode=1306 reward=0.79204 (521.56 it/sec) -training >> step=7831000, episode=1306 reward=0.8109152 (530.33 it/sec) -training >> step=7831100, episode=1306 reward=0.7805433 (453.62 it/sec) -training >> step=7831200, episode=1306 reward=0.7945736 (504.60 it/sec) -training >> step=7831300, episode=1306 reward=0.8110917 (524.97 it/sec) -training >> step=7831400, episode=1306 reward=0.8036486 (532.02 it/sec) -training >> step=7831500, episode=1306 reward=0.7964012 (494.76 it/sec) -training >> step=7831600, episode=1306 reward=0.7938181 (476.44 it/sec) -training >> step=7831700, episode=1306 reward=0.8022693 (532.77 it/sec) -training >> step=7831800, episode=1306 reward=0.7863801 (430.63 it/sec) -training >> step=7831900, episode=1306 reward=0.7910393 (454.68 it/sec) -training >> step=7832000, episode=1306 reward=0.771998 (512.41 it/sec) -training >> step=7832100, episode=1306 reward=0.7871618 (469.18 it/sec) -training >> step=7832200, episode=1306 reward=0.7991368 (490.18 it/sec) -training >> step=7832300, episode=1306 reward=0.7705044 (485.57 it/sec) -training >> step=7832400, episode=1306 reward=0.7634326 (536.92 it/sec) -training >> step=7832500, episode=1306 reward=0.7937043 (492.13 it/sec) -training >> step=7832600, episode=1306 reward=0.7987739 (437.73 it/sec) -training >> step=7832700, episode=1306 reward=0.7889693 (497.14 it/sec) -training >> step=7832800, episode=1306 reward=0.7907481 (485.27 it/sec) -training >> step=7832900, episode=1306 reward=0.7975652 (471.37 it/sec) -training >> step=7833000, episode=1306 reward=0.7845411 (434.30 it/sec) -training >> step=7833100, episode=1306 reward=0.7970151 (463.04 it/sec) -training >> step=7833200, episode=1306 reward=0.7919884 (512.52 it/sec) -training >> step=7833300, episode=1306 reward=0.7904504 (468.20 it/sec) -training >> step=7833400, episode=1306 reward=0.7717716 (475.38 it/sec) -training >> step=7833500, episode=1306 reward=0.790143 (496.83 it/sec) -training >> step=7833600, episode=1306 reward=0.8040183 (497.25 it/sec) -training >> step=7833700, episode=1306 reward=0.7704535 (498.58 it/sec) -training >> step=7833800, episode=1306 reward=0.7871035 (502.20 it/sec) -training >> step=7833900, episode=1306 reward=0.7963281 (539.00 it/sec) -training >> step=7834000, episode=1306 reward=0.7591838 (523.76 it/sec) -training >> step=7834100, episode=1306 reward=0.7905156 (468.77 it/sec) -training >> step=7834200, episode=1306 reward=0.8162183 (517.93 it/sec) -training >> step=7834300, episode=1306 reward=0.784565 (505.33 it/sec) -training >> step=7834400, episode=1306 reward=0.7642387 (529.33 it/sec) -training >> step=7834500, episode=1306 reward=0.7899908 (481.28 it/sec) -training >> step=7834600, episode=1306 reward=0.7962031 (514.31 it/sec) -training >> step=7834700, episode=1306 reward=0.7855696 (433.01 it/sec) -training >> step=7834800, episode=1306 reward=0.788843 (485.12 it/sec) -training >> step=7834900, episode=1306 reward=0.7758905 (479.83 it/sec) -training >> step=7835000, episode=1306 reward=0.790417 (461.59 it/sec) -training >> step=7835100, episode=1306 reward=0.7829747 (463.37 it/sec) -training >> step=7835200, episode=1306 reward=0.7899557 (485.37 it/sec) -training >> step=7835300, episode=1307 reward=0.7897038 (156.45 it/sec) -training >> step=7835400, episode=1307 reward=0.7761152 (476.48 it/sec) -training >> step=7835500, episode=1307 reward=0.7759389 (468.78 it/sec) -training >> step=7835600, episode=1307 reward=0.8046681 (495.99 it/sec) -training >> step=7835700, episode=1307 reward=0.7784895 (543.91 it/sec) -training >> step=7835800, episode=1307 reward=0.7966138 (509.47 it/sec) -training >> step=7835900, episode=1307 reward=0.7859187 (483.85 it/sec) -training >> step=7836000, episode=1307 reward=0.7908183 (501.90 it/sec) -training >> step=7836100, episode=1307 reward=0.7919585 (469.86 it/sec) -training >> step=7836200, episode=1307 reward=0.7995679 (539.47 it/sec) -training >> step=7836300, episode=1307 reward=0.7877349 (519.08 it/sec) -training >> step=7836400, episode=1307 reward=0.8058406 (504.80 it/sec) -training >> step=7836500, episode=1307 reward=0.7896479 (497.56 it/sec) -training >> step=7836600, episode=1307 reward=0.7691089 (484.90 it/sec) -training >> step=7836700, episode=1307 reward=0.7944197 (545.92 it/sec) -training >> step=7836800, episode=1307 reward=0.7880915 (532.32 it/sec) -training >> step=7836900, episode=1307 reward=0.7909968 (494.74 it/sec) -training >> step=7837000, episode=1307 reward=0.7933502 (510.47 it/sec) -training >> step=7837100, episode=1307 reward=0.7916906 (485.57 it/sec) -training >> step=7837200, episode=1307 reward=0.7850811 (493.36 it/sec) -training >> step=7837300, episode=1307 reward=0.7834169 (471.04 it/sec) -training >> step=7837400, episode=1307 reward=0.784638 (495.47 it/sec) -training >> step=7837500, episode=1307 reward=0.791687 (533.81 it/sec) -training >> step=7837600, episode=1307 reward=0.8048234 (471.19 it/sec) -training >> step=7837700, episode=1307 reward=0.7835447 (434.12 it/sec) -training >> step=7837800, episode=1307 reward=0.7854757 (555.42 it/sec) -training >> step=7837900, episode=1307 reward=0.7946511 (486.12 it/sec) -training >> step=7838000, episode=1307 reward=0.7748999 (442.73 it/sec) -training >> step=7838100, episode=1307 reward=0.7904155 (480.21 it/sec) -training >> step=7838200, episode=1307 reward=0.7881061 (508.26 it/sec) -training >> step=7838300, episode=1307 reward=0.8042443 (497.13 it/sec) -training >> step=7838400, episode=1307 reward=0.7958526 (429.12 it/sec) -training >> step=7838500, episode=1307 reward=0.792867 (470.96 it/sec) -training >> step=7838600, episode=1307 reward=0.7964252 (520.56 it/sec) -training >> step=7838700, episode=1307 reward=0.7723675 (511.93 it/sec) -training >> step=7838800, episode=1307 reward=0.8057655 (446.85 it/sec) -training >> step=7838900, episode=1307 reward=0.7722524 (481.34 it/sec) -training >> step=7839000, episode=1307 reward=0.7902134 (508.40 it/sec) -training >> step=7839100, episode=1307 reward=0.7831938 (465.78 it/sec) -training >> step=7839200, episode=1307 reward=0.7867186 (512.14 it/sec) -training >> step=7839300, episode=1307 reward=0.7850482 (540.12 it/sec) -training >> step=7839400, episode=1307 reward=0.7877643 (483.89 it/sec) -training >> step=7839500, episode=1307 reward=0.801866 (483.38 it/sec) -training >> step=7839600, episode=1307 reward=0.7848253 (469.49 it/sec) -training >> step=7839700, episode=1307 reward=0.7990048 (517.06 it/sec) -training >> step=7839800, episode=1307 reward=0.8085999 (474.34 it/sec) -training >> step=7839900, episode=1307 reward=0.7851206 (460.97 it/sec) -training >> step=7840000, episode=1307 reward=0.806435 (484.62 it/sec) -training >> step=7840100, episode=1307 reward=0.7965649 (502.07 it/sec) -training >> step=7840200, episode=1307 reward=0.7792624 (529.57 it/sec) -training >> step=7840300, episode=1307 reward=0.7921372 (514.09 it/sec) -training >> step=7840400, episode=1307 reward=0.7899125 (506.94 it/sec) -training >> step=7840500, episode=1307 reward=0.7703626 (433.77 it/sec) -training >> step=7840600, episode=1307 reward=0.7857309 (492.96 it/sec) -training >> step=7840700, episode=1307 reward=0.7856131 (488.49 it/sec) -training >> step=7840800, episode=1307 reward=0.78686 (489.20 it/sec) -training >> step=7840900, episode=1307 reward=0.7736378 (481.74 it/sec) -training >> step=7841000, episode=1307 reward=0.8017702 (493.71 it/sec) -training >> step=7841100, episode=1307 reward=0.7765576 (524.53 it/sec) -training >> step=7841200, episode=1307 reward=0.7746116 (479.87 it/sec) -training >> step=7841300, episode=1308 reward=0.7707953 (123.67 it/sec) -training >> step=7841400, episode=1308 reward=0.7890446 (452.67 it/sec) -training >> step=7841500, episode=1308 reward=0.7887827 (501.00 it/sec) -training >> step=7841600, episode=1308 reward=0.7888982 (474.36 it/sec) -training >> step=7841700, episode=1308 reward=0.7834163 (437.37 it/sec) -training >> step=7841800, episode=1308 reward=0.8009068 (451.04 it/sec) -training >> step=7841900, episode=1308 reward=0.7785332 (517.60 it/sec) -training >> step=7842000, episode=1308 reward=0.8062198 (491.60 it/sec) -training >> step=7842100, episode=1308 reward=0.7918954 (528.03 it/sec) -training >> step=7842200, episode=1308 reward=0.7909974 (488.64 it/sec) -training >> step=7842300, episode=1308 reward=0.7813194 (478.54 it/sec) -training >> step=7842400, episode=1308 reward=0.7832606 (478.48 it/sec) -training >> step=7842500, episode=1308 reward=0.7865799 (514.36 it/sec) -training >> step=7842600, episode=1308 reward=0.7715558 (516.68 it/sec) -training >> step=7842700, episode=1308 reward=0.7832518 (480.57 it/sec) -training >> step=7842800, episode=1308 reward=0.7922392 (456.99 it/sec) -training >> step=7842900, episode=1308 reward=0.788187 (497.15 it/sec) -training >> step=7843000, episode=1308 reward=0.7817631 (503.24 it/sec) -training >> step=7843100, episode=1308 reward=0.7784449 (524.56 it/sec) -training >> step=7843200, episode=1308 reward=0.7878352 (442.04 it/sec) -training >> step=7843300, episode=1308 reward=0.7962379 (438.57 it/sec) -training >> step=7843400, episode=1308 reward=0.7864683 (423.85 it/sec) -training >> step=7843500, episode=1308 reward=0.7828913 (476.18 it/sec) -training >> step=7843600, episode=1308 reward=0.7947371 (542.56 it/sec) -training >> step=7843700, episode=1308 reward=0.7961847 (477.65 it/sec) -training >> step=7843800, episode=1308 reward=0.7885591 (525.16 it/sec) -training >> step=7843900, episode=1308 reward=0.7976717 (460.05 it/sec) -training >> step=7844000, episode=1308 reward=0.7833085 (517.96 it/sec) -training >> step=7844100, episode=1308 reward=0.7907549 (493.31 it/sec) -training >> step=7844200, episode=1308 reward=0.7946565 (434.20 it/sec) -training >> step=7844300, episode=1308 reward=0.7771665 (462.88 it/sec) -training >> step=7844400, episode=1308 reward=0.788201 (477.75 it/sec) -training >> step=7844500, episode=1308 reward=0.792944 (465.47 it/sec) -training >> step=7844600, episode=1308 reward=0.7891084 (479.06 it/sec) -training >> step=7844700, episode=1308 reward=0.7889543 (493.18 it/sec) -training >> step=7844800, episode=1308 reward=0.8097157 (479.21 it/sec) -training >> step=7844900, episode=1308 reward=0.7865074 (501.71 it/sec) -training >> step=7845000, episode=1308 reward=0.798785 (513.53 it/sec) -training >> step=7845100, episode=1308 reward=0.7949839 (524.25 it/sec) -training >> step=7845200, episode=1308 reward=0.7682669 (452.99 it/sec) -training >> step=7845300, episode=1308 reward=0.7903986 (472.30 it/sec) -training >> step=7845400, episode=1308 reward=0.7902983 (508.62 it/sec) -training >> step=7845500, episode=1308 reward=0.797183 (512.15 it/sec) -training >> step=7845600, episode=1308 reward=0.7881123 (506.79 it/sec) -training >> step=7845700, episode=1308 reward=0.7887319 (475.09 it/sec) -training >> step=7845800, episode=1308 reward=0.7631042 (511.93 it/sec) -training >> step=7845900, episode=1308 reward=0.7896044 (517.78 it/sec) -training >> step=7846000, episode=1308 reward=0.8054819 (477.79 it/sec) -training >> step=7846100, episode=1308 reward=0.7899529 (503.99 it/sec) -training >> step=7846200, episode=1308 reward=0.8143398 (511.53 it/sec) -training >> step=7846300, episode=1308 reward=0.7942635 (501.50 it/sec) -training >> step=7846400, episode=1308 reward=0.8010319 (518.64 it/sec) -training >> step=7846500, episode=1308 reward=0.7708786 (501.22 it/sec) -training >> step=7846600, episode=1308 reward=0.784068 (499.31 it/sec) -training >> step=7846700, episode=1308 reward=0.77998 (489.52 it/sec) -training >> step=7846800, episode=1308 reward=0.7891594 (538.58 it/sec) -training >> step=7846900, episode=1308 reward=0.7911776 (433.82 it/sec) -training >> step=7847000, episode=1308 reward=0.7861629 (463.52 it/sec) -training >> step=7847100, episode=1308 reward=0.7742466 (461.53 it/sec) -training >> step=7847200, episode=1308 reward=0.7890729 (521.90 it/sec) -training >> step=7847300, episode=1309 reward=0.7945653 (119.17 it/sec) -training >> step=7847400, episode=1309 reward=0.7898375 (461.90 it/sec) -training >> step=7847500, episode=1309 reward=0.7995946 (490.88 it/sec) -training >> step=7847600, episode=1309 reward=0.7824386 (499.87 it/sec) -training >> step=7847700, episode=1309 reward=0.7729758 (476.63 it/sec) -training >> step=7847800, episode=1309 reward=0.795542 (488.99 it/sec) -training >> step=7847900, episode=1309 reward=0.8018495 (473.48 it/sec) -training >> step=7848000, episode=1309 reward=0.7819275 (498.24 it/sec) -training >> step=7848100, episode=1309 reward=0.8005289 (464.73 it/sec) -training >> step=7848200, episode=1309 reward=0.7763962 (496.43 it/sec) -training >> step=7848300, episode=1309 reward=0.7747639 (528.92 it/sec) -training >> step=7848400, episode=1309 reward=0.7777477 (479.52 it/sec) -training >> step=7848500, episode=1309 reward=0.7797881 (488.96 it/sec) -training >> step=7848600, episode=1309 reward=0.7848899 (505.63 it/sec) -training >> step=7848700, episode=1309 reward=0.7804857 (477.81 it/sec) -training >> step=7848800, episode=1309 reward=0.7865024 (469.70 it/sec) -training >> step=7848900, episode=1309 reward=0.7993259 (412.59 it/sec) -training >> step=7849000, episode=1309 reward=0.7784259 (516.45 it/sec) -training >> step=7849100, episode=1309 reward=0.7862864 (473.79 it/sec) -training >> step=7849200, episode=1309 reward=0.7927841 (439.95 it/sec) -training >> step=7849300, episode=1309 reward=0.7968174 (506.38 it/sec) -training >> step=7849400, episode=1309 reward=0.8026984 (425.11 it/sec) -training >> step=7849500, episode=1309 reward=0.7915595 (436.64 it/sec) -training >> step=7849600, episode=1309 reward=0.8026973 (452.27 it/sec) -training >> step=7849700, episode=1309 reward=0.804454 (532.68 it/sec) -training >> step=7849800, episode=1309 reward=0.7816626 (475.57 it/sec) -training >> step=7849900, episode=1309 reward=0.781711 (476.44 it/sec) -training >> step=7850000, episode=1309 reward=0.755555 (452.83 it/sec) -training >> step=7850100, episode=1309 reward=0.7870007 (523.45 it/sec) -training >> step=7850200, episode=1309 reward=0.7946319 (499.86 it/sec) -training >> step=7850300, episode=1309 reward=0.7814888 (469.49 it/sec) -training >> step=7850400, episode=1309 reward=0.7955664 (511.05 it/sec) -training >> step=7850500, episode=1309 reward=0.7841092 (502.82 it/sec) -training >> step=7850600, episode=1309 reward=0.7854534 (502.72 it/sec) -training >> step=7850700, episode=1309 reward=0.7897943 (505.25 it/sec) -training >> step=7850800, episode=1309 reward=0.7892772 (499.65 it/sec) -training >> step=7850900, episode=1309 reward=0.7834108 (473.80 it/sec) -training >> step=7851000, episode=1309 reward=0.7854446 (507.53 it/sec) -training >> step=7851100, episode=1309 reward=0.7758991 (467.86 it/sec) -training >> step=7851200, episode=1309 reward=0.7795147 (524.06 it/sec) -training >> step=7851300, episode=1309 reward=0.8059539 (499.55 it/sec) -training >> step=7851400, episode=1309 reward=0.8034429 (503.44 it/sec) -training >> step=7851500, episode=1309 reward=0.792617 (547.08 it/sec) -training >> step=7851600, episode=1309 reward=0.7769807 (475.89 it/sec) -training >> step=7851700, episode=1309 reward=0.789977 (500.24 it/sec) -training >> step=7851800, episode=1309 reward=0.7718071 (494.39 it/sec) -training >> step=7851900, episode=1309 reward=0.8169052 (505.32 it/sec) -training >> step=7852000, episode=1309 reward=0.8107052 (528.10 it/sec) -training >> step=7852100, episode=1309 reward=0.7965634 (493.09 it/sec) -training >> step=7852200, episode=1309 reward=0.7853298 (524.72 it/sec) -training >> step=7852300, episode=1309 reward=0.7705532 (494.27 it/sec) -training >> step=7852400, episode=1309 reward=0.7860145 (492.56 it/sec) -training >> step=7852500, episode=1309 reward=0.7931283 (512.22 it/sec) -training >> step=7852600, episode=1309 reward=0.779145 (512.96 it/sec) -training >> step=7852700, episode=1309 reward=0.7976025 (499.41 it/sec) -training >> step=7852800, episode=1309 reward=0.7914366 (437.30 it/sec) -training >> step=7852900, episode=1309 reward=0.8033917 (513.77 it/sec) -training >> step=7853000, episode=1309 reward=0.7975072 (530.63 it/sec) -training >> step=7853100, episode=1309 reward=0.7767807 (501.82 it/sec) -training >> step=7853200, episode=1309 reward=0.784586 (508.29 it/sec) -training >> step=7853300, episode=1310 reward=0.7862092 (111.33 it/sec) -training >> step=7853400, episode=1310 reward=0.7751957 (478.03 it/sec) -training >> step=7853500, episode=1310 reward=0.7862729 (510.44 it/sec) -training >> step=7853600, episode=1310 reward=0.7841243 (504.75 it/sec) -training >> step=7853700, episode=1310 reward=0.7766259 (494.75 it/sec) -training >> step=7853800, episode=1310 reward=0.7991968 (481.17 it/sec) -training >> step=7853900, episode=1310 reward=0.7823034 (513.47 it/sec) -training >> step=7854000, episode=1310 reward=0.7949458 (474.99 it/sec) -training >> step=7854100, episode=1310 reward=0.7899708 (451.55 it/sec) -training >> step=7854200, episode=1310 reward=0.790643 (509.96 it/sec) -training >> step=7854300, episode=1310 reward=0.7906296 (481.97 it/sec) -training >> step=7854400, episode=1310 reward=0.7848506 (466.81 it/sec) -training >> step=7854500, episode=1310 reward=0.7746226 (507.06 it/sec) -training >> step=7854600, episode=1310 reward=0.782826 (492.82 it/sec) -training >> step=7854700, episode=1310 reward=0.7981865 (509.85 it/sec) -training >> step=7854800, episode=1310 reward=0.7837094 (499.35 it/sec) -training >> step=7854900, episode=1310 reward=0.7900973 (499.33 it/sec) -training >> step=7855000, episode=1310 reward=0.7714987 (513.19 it/sec) -training >> step=7855100, episode=1310 reward=0.7789965 (517.42 it/sec) -training >> step=7855200, episode=1310 reward=0.7980313 (508.16 it/sec) -training >> step=7855300, episode=1310 reward=0.7892758 (479.54 it/sec) -training >> step=7855400, episode=1310 reward=0.7924047 (478.20 it/sec) -training >> step=7855500, episode=1310 reward=0.7926508 (450.96 it/sec) -training >> step=7855600, episode=1310 reward=0.7993361 (530.96 it/sec) -training >> step=7855700, episode=1310 reward=0.7951213 (486.04 it/sec) -training >> step=7855800, episode=1310 reward=0.7947125 (451.69 it/sec) -training >> step=7855900, episode=1310 reward=0.7831557 (520.03 it/sec) -training >> step=7856000, episode=1310 reward=0.775004 (502.76 it/sec) -training >> step=7856100, episode=1310 reward=0.8008946 (507.72 it/sec) -training >> step=7856200, episode=1310 reward=0.7653409 (502.76 it/sec) -training >> step=7856300, episode=1310 reward=0.7647792 (510.32 it/sec) -training >> step=7856400, episode=1310 reward=0.8037115 (475.56 it/sec) -training >> step=7856500, episode=1310 reward=0.8098345 (496.73 it/sec) -training >> step=7856600, episode=1310 reward=0.7872047 (506.76 it/sec) -training >> step=7856700, episode=1310 reward=0.7866366 (534.24 it/sec) -training >> step=7856800, episode=1310 reward=0.7863296 (483.57 it/sec) -training >> step=7856900, episode=1310 reward=0.7750241 (461.03 it/sec) -training >> step=7857000, episode=1310 reward=0.7708001 (505.74 it/sec) -training >> step=7857100, episode=1310 reward=0.7891318 (489.24 it/sec) -training >> step=7857200, episode=1310 reward=0.781453 (514.40 it/sec) -training >> step=7857300, episode=1310 reward=0.7860669 (518.22 it/sec) -training >> step=7857400, episode=1310 reward=0.7716636 (536.37 it/sec) -training >> step=7857500, episode=1310 reward=0.8138016 (501.57 it/sec) -training >> step=7857600, episode=1310 reward=0.7943074 (481.32 it/sec) -training >> step=7857700, episode=1310 reward=0.778815 (468.03 it/sec) -training >> step=7857800, episode=1310 reward=0.8132212 (498.85 it/sec) -training >> step=7857900, episode=1310 reward=0.7954313 (507.53 it/sec) -training >> step=7858000, episode=1310 reward=0.7913138 (464.19 it/sec) -training >> step=7858100, episode=1310 reward=0.7860641 (482.37 it/sec) -training >> step=7858200, episode=1310 reward=0.7895631 (498.43 it/sec) -training >> step=7858300, episode=1310 reward=0.8103691 (525.01 it/sec) -training >> step=7858400, episode=1310 reward=0.7955619 (476.32 it/sec) -training >> step=7858500, episode=1310 reward=0.8008493 (464.65 it/sec) -training >> step=7858600, episode=1310 reward=0.7814111 (470.27 it/sec) -training >> step=7858700, episode=1310 reward=0.7846634 (500.14 it/sec) -training >> step=7858800, episode=1310 reward=0.799832 (534.03 it/sec) -training >> step=7858900, episode=1310 reward=0.7855827 (509.47 it/sec) -training >> step=7859000, episode=1310 reward=0.7968248 (521.23 it/sec) -training >> step=7859100, episode=1310 reward=0.7865604 (488.83 it/sec) -training >> step=7859200, episode=1310 reward=0.7929056 (492.98 it/sec) -training >> step=7859300, episode=1311 reward=0.7944754 (124.39 it/sec) -training >> step=7859400, episode=1311 reward=0.7975516 (492.06 it/sec) -training >> step=7859500, episode=1311 reward=0.7894136 (492.42 it/sec) -training >> step=7859600, episode=1311 reward=0.8122997 (498.71 it/sec) -training >> step=7859700, episode=1311 reward=0.7628006 (480.39 it/sec) -training >> step=7859800, episode=1311 reward=0.7763057 (489.35 it/sec) -training >> step=7859900, episode=1311 reward=0.7788674 (464.92 it/sec) -training >> step=7860000, episode=1311 reward=0.8004682 (494.79 it/sec) -training >> step=7860100, episode=1311 reward=0.7976078 (506.25 it/sec) -training >> step=7860200, episode=1311 reward=0.782576 (509.36 it/sec) -training >> step=7860300, episode=1311 reward=0.7789354 (481.47 it/sec) -training >> step=7860400, episode=1311 reward=0.785574 (442.20 it/sec) -training >> step=7860500, episode=1311 reward=0.7977387 (491.87 it/sec) -training >> step=7860600, episode=1311 reward=0.7779281 (463.88 it/sec) -training >> step=7860700, episode=1311 reward=0.7848033 (518.80 it/sec) -training >> step=7860800, episode=1311 reward=0.8030186 (536.78 it/sec) -training >> step=7860900, episode=1311 reward=0.7874246 (498.88 it/sec) -training >> step=7861000, episode=1311 reward=0.7779468 (491.40 it/sec) -training >> step=7861100, episode=1311 reward=0.7862231 (503.16 it/sec) -training >> step=7861200, episode=1311 reward=0.7928341 (517.62 it/sec) -training >> step=7861300, episode=1311 reward=0.7895697 (486.41 it/sec) -training >> step=7861400, episode=1311 reward=0.7698616 (542.99 it/sec) -training >> step=7861500, episode=1311 reward=0.7874858 (468.65 it/sec) -training >> step=7861600, episode=1311 reward=0.8060874 (467.30 it/sec) -training >> step=7861700, episode=1311 reward=0.7979212 (528.72 it/sec) -training >> step=7861800, episode=1311 reward=0.7762742 (517.25 it/sec) -training >> step=7861900, episode=1311 reward=0.8014424 (461.72 it/sec) -training >> step=7862000, episode=1311 reward=0.7921085 (467.07 it/sec) -training >> step=7862100, episode=1311 reward=0.8016551 (529.73 it/sec) -training >> step=7862200, episode=1311 reward=0.8056143 (493.17 it/sec) -training >> step=7862300, episode=1311 reward=0.7972415 (516.90 it/sec) -training >> step=7862400, episode=1311 reward=0.7828131 (554.37 it/sec) -training >> step=7862500, episode=1311 reward=0.785434 (517.62 it/sec) -training >> step=7862600, episode=1311 reward=0.7920707 (487.75 it/sec) -training >> step=7862700, episode=1311 reward=0.7908962 (485.58 it/sec) -training >> step=7862800, episode=1311 reward=0.7992151 (538.24 it/sec) -training >> step=7862900, episode=1311 reward=0.8021685 (462.53 it/sec) -training >> step=7863000, episode=1311 reward=0.7988037 (506.64 it/sec) -training >> step=7863100, episode=1311 reward=0.7715518 (488.35 it/sec) -training >> step=7863200, episode=1311 reward=0.7833944 (491.41 it/sec) -training >> step=7863300, episode=1311 reward=0.7804722 (471.45 it/sec) -training >> step=7863400, episode=1311 reward=0.772751 (474.12 it/sec) -training >> step=7863500, episode=1311 reward=0.7791419 (483.88 it/sec) -training >> step=7863600, episode=1311 reward=0.7998831 (511.82 it/sec) -training >> step=7863700, episode=1311 reward=0.7794546 (475.38 it/sec) -training >> step=7863800, episode=1311 reward=0.8005106 (510.37 it/sec) -training >> step=7863900, episode=1311 reward=0.7906519 (542.44 it/sec) -training >> step=7864000, episode=1311 reward=0.7946866 (500.62 it/sec) -training >> step=7864100, episode=1311 reward=0.7747427 (517.79 it/sec) -training >> step=7864200, episode=1311 reward=0.7860644 (480.58 it/sec) -training >> step=7864300, episode=1311 reward=0.7764826 (519.07 it/sec) -training >> step=7864400, episode=1311 reward=0.7882878 (517.97 it/sec) -training >> step=7864500, episode=1311 reward=0.7914782 (481.15 it/sec) -training >> step=7864600, episode=1311 reward=0.8023067 (543.21 it/sec) -training >> step=7864700, episode=1311 reward=0.7761301 (507.83 it/sec) -training >> step=7864800, episode=1311 reward=0.7947233 (515.65 it/sec) -training >> step=7864900, episode=1311 reward=0.7747895 (509.92 it/sec) -training >> step=7865000, episode=1311 reward=0.7826294 (560.51 it/sec) -training >> step=7865100, episode=1311 reward=0.7920572 (511.16 it/sec) -training >> step=7865200, episode=1311 reward=0.7914972 (473.05 it/sec) -training >> step=7865300, episode=1312 reward=0.8012484 (146.43 it/sec) -training >> step=7865400, episode=1312 reward=0.7993094 (482.58 it/sec) -training >> step=7865500, episode=1312 reward=0.7877405 (471.70 it/sec) -training >> step=7865600, episode=1312 reward=0.8003523 (476.44 it/sec) -training >> step=7865700, episode=1312 reward=0.789341 (518.90 it/sec) -training >> step=7865800, episode=1312 reward=0.7810951 (520.63 it/sec) -training >> step=7865900, episode=1312 reward=0.7712507 (460.25 it/sec) -training >> step=7866000, episode=1312 reward=0.8055077 (473.31 it/sec) -training >> step=7866100, episode=1312 reward=0.7543973 (465.17 it/sec) -training >> step=7866200, episode=1312 reward=0.8027248 (519.90 it/sec) -training >> step=7866300, episode=1312 reward=0.8014524 (476.12 it/sec) -training >> step=7866400, episode=1312 reward=0.7965155 (417.01 it/sec) -training >> step=7866500, episode=1312 reward=0.7727873 (473.22 it/sec) -training >> step=7866600, episode=1312 reward=0.8064322 (510.47 it/sec) -training >> step=7866700, episode=1312 reward=0.788467 (521.18 it/sec) -training >> step=7866800, episode=1312 reward=0.7872263 (490.67 it/sec) -training >> step=7866900, episode=1312 reward=0.7947864 (491.10 it/sec) -training >> step=7867000, episode=1312 reward=0.7872036 (482.86 it/sec) -training >> step=7867100, episode=1312 reward=0.8107145 (432.27 it/sec) -training >> step=7867200, episode=1312 reward=0.7941884 (514.67 it/sec) -training >> step=7867300, episode=1312 reward=0.8034989 (541.70 it/sec) -training >> step=7867400, episode=1312 reward=0.7971017 (488.90 it/sec) -training >> step=7867500, episode=1312 reward=0.8017566 (490.67 it/sec) -training >> step=7867600, episode=1312 reward=0.7857223 (530.61 it/sec) -training >> step=7867700, episode=1312 reward=0.7948558 (508.42 it/sec) -training >> step=7867800, episode=1312 reward=0.7982645 (489.93 it/sec) -training >> step=7867900, episode=1312 reward=0.7977564 (496.06 it/sec) -training >> step=7868000, episode=1312 reward=0.782648 (533.65 it/sec) -training >> step=7868100, episode=1312 reward=0.8068556 (494.49 it/sec) -training >> step=7868200, episode=1312 reward=0.7762623 (494.14 it/sec) -training >> step=7868300, episode=1312 reward=0.789039 (507.30 it/sec) -training >> step=7868400, episode=1312 reward=0.7902568 (519.62 it/sec) -training >> step=7868500, episode=1312 reward=0.7893744 (508.96 it/sec) -training >> step=7868600, episode=1312 reward=0.7887101 (438.15 it/sec) -training >> step=7868700, episode=1312 reward=0.7870585 (532.47 it/sec) -training >> step=7868800, episode=1312 reward=0.8102079 (515.97 it/sec) -training >> step=7868900, episode=1312 reward=0.8005356 (508.58 it/sec) -training >> step=7869000, episode=1312 reward=0.7755399 (506.10 it/sec) -training >> step=7869100, episode=1312 reward=0.7937026 (500.87 it/sec) -training >> step=7869200, episode=1312 reward=0.7850955 (512.94 it/sec) -training >> step=7869300, episode=1312 reward=0.7949458 (517.02 it/sec) -training >> step=7869400, episode=1312 reward=0.7851385 (500.35 it/sec) -training >> step=7869500, episode=1312 reward=0.7918013 (509.24 it/sec) -training >> step=7869600, episode=1312 reward=0.7917686 (514.66 it/sec) -training >> step=7869700, episode=1312 reward=0.8035908 (495.00 it/sec) -training >> step=7869800, episode=1312 reward=0.8014917 (504.78 it/sec) -training >> step=7869900, episode=1312 reward=0.7879626 (458.52 it/sec) -training >> step=7870000, episode=1312 reward=0.7817459 (477.35 it/sec) -training >> step=7870100, episode=1312 reward=0.7958769 (477.74 it/sec) -training >> step=7870200, episode=1312 reward=0.7928836 (491.76 it/sec) -training >> step=7870300, episode=1312 reward=0.7859672 (486.88 it/sec) -training >> step=7870400, episode=1312 reward=0.7898096 (494.44 it/sec) -training >> step=7870500, episode=1312 reward=0.7953684 (504.07 it/sec) -training >> step=7870600, episode=1312 reward=0.7956533 (534.42 it/sec) -training >> step=7870700, episode=1312 reward=0.7781932 (463.45 it/sec) -training >> step=7870800, episode=1312 reward=0.8144329 (494.03 it/sec) -training >> step=7870900, episode=1312 reward=0.7934833 (552.74 it/sec) -training >> step=7871000, episode=1312 reward=0.8033134 (512.78 it/sec) -training >> step=7871100, episode=1312 reward=0.7800552 (487.79 it/sec) -training >> step=7871200, episode=1312 reward=0.7983331 (514.01 it/sec) -training >> step=7871300, episode=1313 reward=0.7807433 (135.59 it/sec) -training >> step=7871400, episode=1313 reward=0.7987438 (468.49 it/sec) -training >> step=7871500, episode=1313 reward=0.785775 (505.44 it/sec) -training >> step=7871600, episode=1313 reward=0.7812079 (502.44 it/sec) -training >> step=7871700, episode=1313 reward=0.7787 (514.43 it/sec) -training >> step=7871800, episode=1313 reward=0.7843329 (502.95 it/sec) -training >> step=7871900, episode=1313 reward=0.8062481 (482.23 it/sec) -training >> step=7872000, episode=1313 reward=0.7917746 (466.20 it/sec) -training >> step=7872100, episode=1313 reward=0.7944405 (521.36 it/sec) -training >> step=7872200, episode=1313 reward=0.7743005 (501.83 it/sec) -training >> step=7872300, episode=1313 reward=0.8121373 (513.44 it/sec) -training >> step=7872400, episode=1313 reward=0.7901775 (470.77 it/sec) -training >> step=7872500, episode=1313 reward=0.7861396 (435.64 it/sec) -training >> step=7872600, episode=1313 reward=0.8009071 (486.16 it/sec) -training >> step=7872700, episode=1313 reward=0.7806305 (490.13 it/sec) -training >> step=7872800, episode=1313 reward=0.8015529 (533.68 it/sec) -training >> step=7872900, episode=1313 reward=0.8097462 (504.87 it/sec) -training >> step=7873000, episode=1313 reward=0.8050674 (506.28 it/sec) -training >> step=7873100, episode=1313 reward=0.7754611 (487.94 it/sec) -training >> step=7873200, episode=1313 reward=0.7808648 (501.91 it/sec) -training >> step=7873300, episode=1313 reward=0.8012767 (487.67 it/sec) -training >> step=7873400, episode=1313 reward=0.79646 (503.02 it/sec) -training >> step=7873500, episode=1313 reward=0.8100663 (539.13 it/sec) -training >> step=7873600, episode=1313 reward=0.7896891 (513.89 it/sec) -training >> step=7873700, episode=1313 reward=0.7931302 (446.18 it/sec) -training >> step=7873800, episode=1313 reward=0.7953987 (503.30 it/sec) -training >> step=7873900, episode=1313 reward=0.800254 (518.01 it/sec) -training >> step=7874000, episode=1313 reward=0.7984567 (501.98 it/sec) -training >> step=7874100, episode=1313 reward=0.7845247 (471.26 it/sec) -training >> step=7874200, episode=1313 reward=0.7932736 (503.94 it/sec) -training >> step=7874300, episode=1313 reward=0.7681819 (485.74 it/sec) -training >> step=7874400, episode=1313 reward=0.7810049 (508.90 it/sec) -training >> step=7874500, episode=1313 reward=0.8031961 (464.83 it/sec) -training >> step=7874600, episode=1313 reward=0.7788783 (531.10 it/sec) -training >> step=7874700, episode=1313 reward=0.783682 (486.59 it/sec) -training >> step=7874800, episode=1313 reward=0.7928783 (504.36 it/sec) -training >> step=7874900, episode=1313 reward=0.7945946 (462.06 it/sec) -training >> step=7875000, episode=1313 reward=0.7871914 (509.52 it/sec) -training >> step=7875100, episode=1313 reward=0.7700536 (466.52 it/sec) -training >> step=7875200, episode=1313 reward=0.7937987 (496.19 it/sec) -training >> step=7875300, episode=1313 reward=0.7978664 (530.47 it/sec) -training >> step=7875400, episode=1313 reward=0.7781115 (512.03 it/sec) -training >> step=7875500, episode=1313 reward=0.7799831 (451.98 it/sec) -training >> step=7875600, episode=1313 reward=0.7685458 (542.96 it/sec) -training >> step=7875700, episode=1313 reward=0.7840856 (498.95 it/sec) -training >> step=7875800, episode=1313 reward=0.7895697 (513.55 it/sec) -training >> step=7875900, episode=1313 reward=0.7883511 (470.09 it/sec) -training >> step=7876000, episode=1313 reward=0.7763696 (509.63 it/sec) -training >> step=7876100, episode=1313 reward=0.7857967 (530.49 it/sec) -training >> step=7876200, episode=1313 reward=0.7953831 (473.09 it/sec) -training >> step=7876300, episode=1313 reward=0.7976243 (470.96 it/sec) -training >> step=7876400, episode=1313 reward=0.7880474 (518.51 it/sec) -training >> step=7876500, episode=1313 reward=0.809343 (486.22 it/sec) -training >> step=7876600, episode=1313 reward=0.7984229 (494.31 it/sec) -training >> step=7876700, episode=1313 reward=0.7856908 (487.79 it/sec) -training >> step=7876800, episode=1313 reward=0.7989716 (436.44 it/sec) -training >> step=7876900, episode=1313 reward=0.8104433 (497.89 it/sec) -training >> step=7877000, episode=1313 reward=0.8056599 (498.39 it/sec) -training >> step=7877100, episode=1313 reward=0.7701274 (498.13 it/sec) -training >> step=7877200, episode=1313 reward=0.78979 (432.47 it/sec) -training >> step=7877300, episode=1314 reward=0.7781639 (143.36 it/sec) -training >> step=7877400, episode=1314 reward=0.7989315 (444.15 it/sec) -training >> step=7877500, episode=1314 reward=0.7951643 (534.33 it/sec) -training >> step=7877600, episode=1314 reward=0.7861347 (492.61 it/sec) -training >> step=7877700, episode=1314 reward=0.7890869 (522.23 it/sec) -training >> step=7877800, episode=1314 reward=0.7765551 (477.71 it/sec) -training >> step=7877900, episode=1314 reward=0.7778646 (464.83 it/sec) -training >> step=7878000, episode=1314 reward=0.7894549 (517.19 it/sec) -training >> step=7878100, episode=1314 reward=0.79247 (494.40 it/sec) -training >> step=7878200, episode=1314 reward=0.7857319 (522.68 it/sec) -training >> step=7878300, episode=1314 reward=0.7949645 (522.74 it/sec) -training >> step=7878400, episode=1314 reward=0.8099263 (521.96 it/sec) -training >> step=7878500, episode=1314 reward=0.8008443 (498.46 it/sec) -training >> step=7878600, episode=1314 reward=0.7766768 (501.22 it/sec) -training >> step=7878700, episode=1314 reward=0.7804195 (525.32 it/sec) -training >> step=7878800, episode=1314 reward=0.7962211 (520.85 it/sec) -training >> step=7878900, episode=1314 reward=0.7938341 (536.93 it/sec) -training >> step=7879000, episode=1314 reward=0.8131687 (521.17 it/sec) -training >> step=7879100, episode=1314 reward=0.79269 (486.99 it/sec) -training >> step=7879200, episode=1314 reward=0.7898109 (518.88 it/sec) -training >> step=7879300, episode=1314 reward=0.7934063 (518.44 it/sec) -training >> step=7879400, episode=1314 reward=0.7849621 (488.45 it/sec) -training >> step=7879500, episode=1314 reward=0.8036804 (508.74 it/sec) -training >> step=7879600, episode=1314 reward=0.8009092 (510.41 it/sec) -training >> step=7879700, episode=1314 reward=0.7978355 (463.49 it/sec) -training >> step=7879800, episode=1314 reward=0.7924279 (497.90 it/sec) -training >> step=7879900, episode=1314 reward=0.7743009 (494.44 it/sec) -training >> step=7880000, episode=1314 reward=0.8021833 (544.12 it/sec) -training >> step=7880100, episode=1314 reward=0.7951947 (528.47 it/sec) -training >> step=7880200, episode=1314 reward=0.7899086 (499.77 it/sec) -training >> step=7880300, episode=1314 reward=0.8065832 (499.30 it/sec) -training >> step=7880400, episode=1314 reward=0.7789482 (498.78 it/sec) -training >> step=7880500, episode=1314 reward=0.7857832 (465.10 it/sec) -training >> step=7880600, episode=1314 reward=0.7767809 (464.43 it/sec) -training >> step=7880700, episode=1314 reward=0.7732063 (494.43 it/sec) -training >> step=7880800, episode=1314 reward=0.7688478 (443.05 it/sec) -training >> step=7880900, episode=1314 reward=0.8045264 (477.61 it/sec) -training >> step=7881000, episode=1314 reward=0.7933799 (526.41 it/sec) -training >> step=7881100, episode=1314 reward=0.7937777 (512.18 it/sec) -training >> step=7881200, episode=1314 reward=0.8122774 (512.39 it/sec) -training >> step=7881300, episode=1314 reward=0.7818227 (506.10 it/sec) -training >> step=7881400, episode=1314 reward=0.7874066 (515.29 it/sec) -training >> step=7881500, episode=1314 reward=0.7671935 (506.81 it/sec) -training >> step=7881600, episode=1314 reward=0.7846075 (479.90 it/sec) -training >> step=7881700, episode=1314 reward=0.7893256 (473.84 it/sec) -training >> step=7881800, episode=1314 reward=0.7934622 (537.96 it/sec) -training >> step=7881900, episode=1314 reward=0.8081259 (481.81 it/sec) -training >> step=7882000, episode=1314 reward=0.7952132 (499.93 it/sec) -training >> step=7882100, episode=1314 reward=0.7918908 (531.22 it/sec) -training >> step=7882200, episode=1314 reward=0.8161949 (464.67 it/sec) -training >> step=7882300, episode=1314 reward=0.7944855 (485.29 it/sec) -training >> step=7882400, episode=1314 reward=0.7906881 (481.45 it/sec) -training >> step=7882500, episode=1314 reward=0.8037614 (537.52 it/sec) -training >> step=7882600, episode=1314 reward=0.7826695 (480.59 it/sec) -training >> step=7882700, episode=1314 reward=0.7919655 (480.95 it/sec) -training >> step=7882800, episode=1314 reward=0.7873152 (511.85 it/sec) -training >> step=7882900, episode=1314 reward=0.7935916 (527.85 it/sec) -training >> step=7883000, episode=1314 reward=0.7977754 (509.76 it/sec) -training >> step=7883100, episode=1314 reward=0.7894751 (504.61 it/sec) -training >> step=7883200, episode=1314 reward=0.7855183 (524.38 it/sec) -training >> step=7883300, episode=1315 reward=0.7880578 (146.54 it/sec) -training >> step=7883400, episode=1315 reward=0.7700087 (497.25 it/sec) -training >> step=7883500, episode=1315 reward=0.7900816 (497.76 it/sec) -training >> step=7883600, episode=1315 reward=0.7786133 (541.50 it/sec) -training >> step=7883700, episode=1315 reward=0.7584995 (501.21 it/sec) -training >> step=7883800, episode=1315 reward=0.7986295 (495.00 it/sec) -training >> step=7883900, episode=1315 reward=0.8008083 (492.92 it/sec) -training >> step=7884000, episode=1315 reward=0.7857735 (477.32 it/sec) -training >> step=7884100, episode=1315 reward=0.7925619 (516.98 it/sec) -training >> step=7884200, episode=1315 reward=0.8036621 (504.78 it/sec) -training >> step=7884300, episode=1315 reward=0.7849512 (534.28 it/sec) -training >> step=7884400, episode=1315 reward=0.7747942 (473.44 it/sec) -training >> step=7884500, episode=1315 reward=0.7862834 (470.41 it/sec) -training >> step=7884600, episode=1315 reward=0.7796726 (519.44 it/sec) -training >> step=7884700, episode=1315 reward=0.7924504 (516.76 it/sec) -training >> step=7884800, episode=1315 reward=0.7911891 (479.94 it/sec) -training >> step=7884900, episode=1315 reward=0.7952261 (503.81 it/sec) -training >> step=7885000, episode=1315 reward=0.7811953 (550.85 it/sec) -training >> step=7885100, episode=1315 reward=0.7915708 (483.31 it/sec) -training >> step=7885200, episode=1315 reward=0.7957806 (486.91 it/sec) -training >> step=7885300, episode=1315 reward=0.7704313 (532.26 it/sec) -training >> step=7885400, episode=1315 reward=0.7752014 (499.77 it/sec) -training >> step=7885500, episode=1315 reward=0.8094443 (521.47 it/sec) -training >> step=7885600, episode=1315 reward=0.7956787 (473.30 it/sec) -training >> step=7885700, episode=1315 reward=0.7953596 (510.09 it/sec) -training >> step=7885800, episode=1315 reward=0.7789153 (503.65 it/sec) -training >> step=7885900, episode=1315 reward=0.7891536 (464.41 it/sec) -training >> step=7886000, episode=1315 reward=0.7694117 (520.77 it/sec) -training >> step=7886100, episode=1315 reward=0.809861 (510.18 it/sec) -training >> step=7886200, episode=1315 reward=0.7901777 (516.89 it/sec) -training >> step=7886300, episode=1315 reward=0.7725715 (473.58 it/sec) -training >> step=7886400, episode=1315 reward=0.7794821 (540.56 it/sec) -training >> step=7886500, episode=1315 reward=0.7955648 (534.31 it/sec) -training >> step=7886600, episode=1315 reward=0.7961147 (489.68 it/sec) -training >> step=7886700, episode=1315 reward=0.7764486 (470.66 it/sec) -training >> step=7886800, episode=1315 reward=0.7685987 (548.07 it/sec) -training >> step=7886900, episode=1315 reward=0.7798404 (469.10 it/sec) -training >> step=7887000, episode=1315 reward=0.7960865 (505.04 it/sec) -training >> step=7887100, episode=1315 reward=0.7912886 (498.83 it/sec) -training >> step=7887200, episode=1315 reward=0.816594 (499.79 it/sec) -training >> step=7887300, episode=1315 reward=0.7991037 (501.02 it/sec) -training >> step=7887400, episode=1315 reward=0.7963998 (511.42 it/sec) -training >> step=7887500, episode=1315 reward=0.76421 (479.39 it/sec) -training >> step=7887600, episode=1315 reward=0.7818305 (507.94 it/sec) -training >> step=7887700, episode=1315 reward=0.7739097 (508.76 it/sec) -training >> step=7887800, episode=1315 reward=0.7955896 (499.60 it/sec) -training >> step=7887900, episode=1315 reward=0.7669152 (503.21 it/sec) -training >> step=7888000, episode=1315 reward=0.780201 (472.82 it/sec) -training >> step=7888100, episode=1315 reward=0.7968957 (493.00 it/sec) -training >> step=7888200, episode=1315 reward=0.8045169 (509.18 it/sec) -training >> step=7888300, episode=1315 reward=0.7871976 (460.98 it/sec) -training >> step=7888400, episode=1315 reward=0.7845276 (467.85 it/sec) -training >> step=7888500, episode=1315 reward=0.7898135 (515.12 it/sec) -training >> step=7888600, episode=1315 reward=0.7738924 (526.68 it/sec) -training >> step=7888700, episode=1315 reward=0.8036934 (485.48 it/sec) -training >> step=7888800, episode=1315 reward=0.795701 (465.74 it/sec) -training >> step=7888900, episode=1315 reward=0.7804807 (481.68 it/sec) -training >> step=7889000, episode=1315 reward=0.7970291 (529.74 it/sec) -training >> step=7889100, episode=1315 reward=0.7977818 (517.40 it/sec) -training >> step=7889200, episode=1315 reward=0.8057491 (482.31 it/sec) -training >> step=7889300, episode=1316 reward=0.774226 (118.46 it/sec) -training >> step=7889400, episode=1316 reward=0.764483 (465.30 it/sec) -training >> step=7889500, episode=1316 reward=0.7962751 (484.03 it/sec) -training >> step=7889600, episode=1316 reward=0.7730125 (492.21 it/sec) -training >> step=7889700, episode=1316 reward=0.7792103 (495.31 it/sec) -training >> step=7889800, episode=1316 reward=0.7748612 (445.55 it/sec) -training >> step=7889900, episode=1316 reward=0.8005096 (493.87 it/sec) -training >> step=7890000, episode=1316 reward=0.7987413 (514.97 it/sec) -training >> step=7890100, episode=1316 reward=0.7970793 (552.51 it/sec) -training >> step=7890200, episode=1316 reward=0.7980163 (466.15 it/sec) -training >> step=7890300, episode=1316 reward=0.78013 (506.13 it/sec) -training >> step=7890400, episode=1316 reward=0.7820858 (506.36 it/sec) -training >> step=7890500, episode=1316 reward=0.7813603 (485.49 it/sec) -training >> step=7890600, episode=1316 reward=0.7855561 (514.65 it/sec) -training >> step=7890700, episode=1316 reward=0.7825739 (504.51 it/sec) -training >> step=7890800, episode=1316 reward=0.7845225 (493.97 it/sec) -training >> step=7890900, episode=1316 reward=0.7796152 (480.87 it/sec) -training >> step=7891000, episode=1316 reward=0.7869926 (493.88 it/sec) -training >> step=7891100, episode=1316 reward=0.7706416 (509.13 it/sec) -training >> step=7891200, episode=1316 reward=0.7869011 (514.03 it/sec) -training >> step=7891300, episode=1316 reward=0.7930338 (502.05 it/sec) -training >> step=7891400, episode=1316 reward=0.7958999 (495.85 it/sec) -training >> step=7891500, episode=1316 reward=0.7992547 (525.17 it/sec) -training >> step=7891600, episode=1316 reward=0.7852066 (463.12 it/sec) -training >> step=7891700, episode=1316 reward=0.7957558 (462.11 it/sec) -training >> step=7891800, episode=1316 reward=0.8055491 (499.38 it/sec) -training >> step=7891900, episode=1316 reward=0.7887294 (494.89 it/sec) -training >> step=7892000, episode=1316 reward=0.789952 (522.15 it/sec) -training >> step=7892100, episode=1316 reward=0.7875494 (495.72 it/sec) -training >> step=7892200, episode=1316 reward=0.7681081 (506.08 it/sec) -training >> step=7892300, episode=1316 reward=0.8050721 (482.47 it/sec) -training >> step=7892400, episode=1316 reward=0.7774578 (493.10 it/sec) -training >> step=7892500, episode=1316 reward=0.7886547 (454.86 it/sec) -training >> step=7892600, episode=1316 reward=0.7991242 (482.14 it/sec) -training >> step=7892700, episode=1316 reward=0.7904048 (506.06 it/sec) -training >> step=7892800, episode=1316 reward=0.7957954 (509.46 it/sec) -training >> step=7892900, episode=1316 reward=0.7647655 (516.87 it/sec) -training >> step=7893000, episode=1316 reward=0.7972402 (521.93 it/sec) -training >> step=7893100, episode=1316 reward=0.7960455 (494.84 it/sec) -training >> step=7893200, episode=1316 reward=0.773116 (477.57 it/sec) -training >> step=7893300, episode=1316 reward=0.8019044 (514.40 it/sec) -training >> step=7893400, episode=1316 reward=0.7499583 (525.29 it/sec) -training >> step=7893500, episode=1316 reward=0.7946491 (496.54 it/sec) -training >> step=7893600, episode=1316 reward=0.799458 (493.36 it/sec) -training >> step=7893700, episode=1316 reward=0.7968578 (527.03 it/sec) -training >> step=7893800, episode=1316 reward=0.7787748 (512.22 it/sec) -training >> step=7893900, episode=1316 reward=0.8127744 (454.90 it/sec) -training >> step=7894000, episode=1316 reward=0.7981242 (534.41 it/sec) -training >> step=7894100, episode=1316 reward=0.7732559 (489.79 it/sec) -training >> step=7894200, episode=1316 reward=0.7865974 (494.48 it/sec) -training >> step=7894300, episode=1316 reward=0.7990568 (508.54 it/sec) -training >> step=7894400, episode=1316 reward=0.7861307 (507.44 it/sec) -training >> step=7894500, episode=1316 reward=0.7834624 (502.07 it/sec) -training >> step=7894600, episode=1316 reward=0.7838289 (464.38 it/sec) -training >> step=7894700, episode=1316 reward=0.7887081 (534.72 it/sec) -training >> step=7894800, episode=1316 reward=0.770452 (508.74 it/sec) -training >> step=7894900, episode=1316 reward=0.772527 (503.88 it/sec) -training >> step=7895000, episode=1316 reward=0.787574 (517.04 it/sec) -training >> step=7895100, episode=1316 reward=0.7766156 (531.95 it/sec) -training >> step=7895200, episode=1316 reward=0.7895815 (484.42 it/sec) -training >> step=7895300, episode=1317 reward=0.7732944 (116.66 it/sec) -training >> step=7895400, episode=1317 reward=0.7967131 (489.21 it/sec) -training >> step=7895500, episode=1317 reward=0.7776823 (518.73 it/sec) -training >> step=7895600, episode=1317 reward=0.789308 (505.70 it/sec) -training >> step=7895700, episode=1317 reward=0.797421 (444.01 it/sec) -training >> step=7895800, episode=1317 reward=0.7803645 (519.03 it/sec) -training >> step=7895900, episode=1317 reward=0.8050507 (497.68 it/sec) -training >> step=7896000, episode=1317 reward=0.7930938 (504.19 it/sec) -training >> step=7896100, episode=1317 reward=0.8133669 (506.28 it/sec) -training >> step=7896200, episode=1317 reward=0.7984192 (537.50 it/sec) -training >> step=7896300, episode=1317 reward=0.787603 (507.30 it/sec) -training >> step=7896400, episode=1317 reward=0.8028432 (486.65 it/sec) -training >> step=7896500, episode=1317 reward=0.7773087 (524.27 it/sec) -training >> step=7896600, episode=1317 reward=0.7885177 (485.46 it/sec) -training >> step=7896700, episode=1317 reward=0.7931708 (519.77 it/sec) -training >> step=7896800, episode=1317 reward=0.7868759 (498.44 it/sec) -training >> step=7896900, episode=1317 reward=0.788534 (535.17 it/sec) -training >> step=7897000, episode=1317 reward=0.7948657 (506.89 it/sec) -training >> step=7897100, episode=1317 reward=0.7797393 (516.62 it/sec) -training >> step=7897200, episode=1317 reward=0.8122637 (512.59 it/sec) -training >> step=7897300, episode=1317 reward=0.7759748 (528.50 it/sec) -training >> step=7897400, episode=1317 reward=0.8125537 (497.03 it/sec) -training >> step=7897500, episode=1317 reward=0.7728154 (505.62 it/sec) -training >> step=7897600, episode=1317 reward=0.7867091 (526.03 it/sec) -training >> step=7897700, episode=1317 reward=0.796186 (477.18 it/sec) -training >> step=7897800, episode=1317 reward=0.7987233 (507.24 it/sec) -training >> step=7897900, episode=1317 reward=0.7945468 (500.33 it/sec) -training >> step=7898000, episode=1317 reward=0.7879968 (498.05 it/sec) -training >> step=7898100, episode=1317 reward=0.7852533 (500.19 it/sec) -training >> step=7898200, episode=1317 reward=0.7847115 (485.27 it/sec) -training >> step=7898300, episode=1317 reward=0.7881989 (498.26 it/sec) -training >> step=7898400, episode=1317 reward=0.7922984 (506.65 it/sec) -training >> step=7898500, episode=1317 reward=0.7902463 (504.70 it/sec) -training >> step=7898600, episode=1317 reward=0.7851242 (516.05 it/sec) -training >> step=7898700, episode=1317 reward=0.7872385 (524.25 it/sec) -training >> step=7898800, episode=1317 reward=0.779816 (458.30 it/sec) -training >> step=7898900, episode=1317 reward=0.803437 (506.40 it/sec) -training >> step=7899000, episode=1317 reward=0.7817553 (503.65 it/sec) -training >> step=7899100, episode=1317 reward=0.7764439 (499.49 it/sec) -training >> step=7899200, episode=1317 reward=0.7910152 (501.42 it/sec) -training >> step=7899300, episode=1317 reward=0.8041262 (511.08 it/sec) -training >> step=7899400, episode=1317 reward=0.7882815 (534.04 it/sec) -training >> step=7899500, episode=1317 reward=0.7797282 (498.02 it/sec) -training >> step=7899600, episode=1317 reward=0.8014579 (518.78 it/sec) -training >> step=7899700, episode=1317 reward=0.7921972 (469.33 it/sec) -training >> step=7899800, episode=1317 reward=0.7854014 (529.21 it/sec) -training >> step=7899900, episode=1317 reward=0.7850992 (466.52 it/sec) -training >> step=7900000, episode=1317 reward=0.80599 (470.27 it/sec) -training >> step=7900100, episode=1317 reward=0.8017115 (504.15 it/sec) -training >> step=7900200, episode=1317 reward=0.7761196 (463.02 it/sec) -training >> step=7900300, episode=1317 reward=0.7761367 (480.53 it/sec) -training >> step=7900400, episode=1317 reward=0.7976921 (474.29 it/sec) -training >> step=7900500, episode=1317 reward=0.772514 (511.25 it/sec) -training >> step=7900600, episode=1317 reward=0.7848551 (502.60 it/sec) -training >> step=7900700, episode=1317 reward=0.771005 (486.69 it/sec) -training >> step=7900800, episode=1317 reward=0.7893485 (470.02 it/sec) -training >> step=7900900, episode=1317 reward=0.7699851 (466.25 it/sec) -training >> step=7901000, episode=1317 reward=0.7714608 (501.03 it/sec) -training >> step=7901100, episode=1317 reward=0.7801832 (513.15 it/sec) -training >> step=7901200, episode=1317 reward=0.8061391 (471.93 it/sec) -training >> step=7901300, episode=1318 reward=0.7977704 (119.83 it/sec) -training >> step=7901400, episode=1318 reward=0.7884116 (496.82 it/sec) -training >> step=7901500, episode=1318 reward=0.7762422 (503.96 it/sec) -training >> step=7901600, episode=1318 reward=0.7958091 (502.85 it/sec) -training >> step=7901700, episode=1318 reward=0.7876178 (467.68 it/sec) -training >> step=7901800, episode=1318 reward=0.7920499 (522.01 it/sec) -training >> step=7901900, episode=1318 reward=0.7847367 (492.69 it/sec) -training >> step=7902000, episode=1318 reward=0.7801985 (532.87 it/sec) -training >> step=7902100, episode=1318 reward=0.811361 (521.52 it/sec) -training >> step=7902200, episode=1318 reward=0.7814981 (449.72 it/sec) -training >> step=7902300, episode=1318 reward=0.7804048 (533.89 it/sec) -training >> step=7902400, episode=1318 reward=0.7804613 (477.31 it/sec) -training >> step=7902500, episode=1318 reward=0.7887511 (478.29 it/sec) -training >> step=7902600, episode=1318 reward=0.7818663 (507.76 it/sec) -training >> step=7902700, episode=1318 reward=0.7980996 (530.77 it/sec) -training >> step=7902800, episode=1318 reward=0.7851294 (464.68 it/sec) -training >> step=7902900, episode=1318 reward=0.7908419 (518.37 it/sec) -training >> step=7903000, episode=1318 reward=0.7976689 (493.75 it/sec) -training >> step=7903100, episode=1318 reward=0.7909815 (502.13 it/sec) -training >> step=7903200, episode=1318 reward=0.7913411 (503.58 it/sec) -training >> step=7903300, episode=1318 reward=0.7861388 (472.02 it/sec) -training >> step=7903400, episode=1318 reward=0.7868443 (487.08 it/sec) -training >> step=7903500, episode=1318 reward=0.7923782 (523.68 it/sec) -training >> step=7903600, episode=1318 reward=0.7679374 (479.41 it/sec) -training >> step=7903700, episode=1318 reward=0.7874061 (482.27 it/sec) -training >> step=7903800, episode=1318 reward=0.7745228 (536.80 it/sec) -training >> step=7903900, episode=1318 reward=0.7889196 (471.35 it/sec) -training >> step=7904000, episode=1318 reward=0.7970331 (470.95 it/sec) -training >> step=7904100, episode=1318 reward=0.7805851 (511.43 it/sec) -training >> step=7904200, episode=1318 reward=0.7930806 (533.31 it/sec) -training >> step=7904300, episode=1318 reward=0.8018571 (467.61 it/sec) -training >> step=7904400, episode=1318 reward=0.7826081 (479.47 it/sec) -training >> step=7904500, episode=1318 reward=0.7794016 (524.89 it/sec) -training >> step=7904600, episode=1318 reward=0.7886304 (509.93 it/sec) -training >> step=7904700, episode=1318 reward=0.7960202 (465.43 it/sec) -training >> step=7904800, episode=1318 reward=0.802063 (477.71 it/sec) -training >> step=7904900, episode=1318 reward=0.7729698 (500.55 it/sec) -training >> step=7905000, episode=1318 reward=0.7800118 (522.92 it/sec) -training >> step=7905100, episode=1318 reward=0.7833806 (516.99 it/sec) -training >> step=7905200, episode=1318 reward=0.7973818 (504.17 it/sec) -training >> step=7905300, episode=1318 reward=0.7751606 (534.14 it/sec) -training >> step=7905400, episode=1318 reward=0.7954277 (503.53 it/sec) -training >> step=7905500, episode=1318 reward=0.8127195 (515.19 it/sec) -training >> step=7905600, episode=1318 reward=0.771121 (520.01 it/sec) -training >> step=7905700, episode=1318 reward=0.7924207 (507.85 it/sec) -training >> step=7905800, episode=1318 reward=0.787765 (504.48 it/sec) -training >> step=7905900, episode=1318 reward=0.7830147 (506.65 it/sec) -training >> step=7906000, episode=1318 reward=0.7827922 (481.81 it/sec) -training >> step=7906100, episode=1318 reward=0.79904 (500.13 it/sec) -training >> step=7906200, episode=1318 reward=0.7863968 (473.10 it/sec) -training >> step=7906300, episode=1318 reward=0.7944259 (473.39 it/sec) -training >> step=7906400, episode=1318 reward=0.8019718 (539.11 it/sec) -training >> step=7906500, episode=1318 reward=0.7941658 (505.13 it/sec) -training >> step=7906600, episode=1318 reward=0.7544834 (509.58 it/sec) -training >> step=7906700, episode=1318 reward=0.7889623 (535.23 it/sec) -training >> step=7906800, episode=1318 reward=0.789575 (444.35 it/sec) -training >> step=7906900, episode=1318 reward=0.8129727 (508.34 it/sec) -training >> step=7907000, episode=1318 reward=0.7830436 (516.85 it/sec) -training >> step=7907100, episode=1318 reward=0.7945977 (496.60 it/sec) -training >> step=7907200, episode=1318 reward=0.7944244 (522.76 it/sec) -training >> step=7907300, episode=1319 reward=0.765002 (119.56 it/sec) -training >> step=7907400, episode=1319 reward=0.7854397 (484.46 it/sec) -training >> step=7907500, episode=1319 reward=0.7816135 (491.01 it/sec) -training >> step=7907600, episode=1319 reward=0.7758764 (488.21 it/sec) -training >> step=7907700, episode=1319 reward=0.8036171 (456.91 it/sec) -training >> step=7907800, episode=1319 reward=0.7916133 (552.10 it/sec) -training >> step=7907900, episode=1319 reward=0.796884 (509.50 it/sec) -training >> step=7908000, episode=1319 reward=0.7816546 (473.20 it/sec) -training >> step=7908100, episode=1319 reward=0.7640554 (507.50 it/sec) -training >> step=7908200, episode=1319 reward=0.7853656 (539.62 it/sec) -training >> step=7908300, episode=1319 reward=0.7968744 (479.28 it/sec) -training >> step=7908400, episode=1319 reward=0.7749116 (500.88 it/sec) -training >> step=7908500, episode=1319 reward=0.7859305 (519.08 it/sec) -training >> step=7908600, episode=1319 reward=0.7991409 (528.32 it/sec) -training >> step=7908700, episode=1319 reward=0.8079239 (468.27 it/sec) -training >> step=7908800, episode=1319 reward=0.7808698 (459.04 it/sec) -training >> step=7908900, episode=1319 reward=0.7966766 (537.71 it/sec) -training >> step=7909000, episode=1319 reward=0.8008349 (502.85 it/sec) -training >> step=7909100, episode=1319 reward=0.7940028 (460.06 it/sec) -training >> step=7909200, episode=1319 reward=0.785081 (509.20 it/sec) -training >> step=7909300, episode=1319 reward=0.7837225 (531.50 it/sec) -training >> step=7909400, episode=1319 reward=0.7906587 (530.92 it/sec) -training >> step=7909500, episode=1319 reward=0.7914209 (509.45 it/sec) -training >> step=7909600, episode=1319 reward=0.7963701 (508.17 it/sec) -training >> step=7909700, episode=1319 reward=0.785361 (501.74 it/sec) -training >> step=7909800, episode=1319 reward=0.7877327 (502.33 it/sec) -training >> step=7909900, episode=1319 reward=0.8141136 (527.59 it/sec) -training >> step=7910000, episode=1319 reward=0.8098107 (529.71 it/sec) -training >> step=7910100, episode=1319 reward=0.7962284 (479.04 it/sec) -training >> step=7910200, episode=1319 reward=0.787712 (454.64 it/sec) -training >> step=7910300, episode=1319 reward=0.7679563 (553.85 it/sec) -training >> step=7910400, episode=1319 reward=0.779237 (486.37 it/sec) -training >> step=7910500, episode=1319 reward=0.7793933 (478.21 it/sec) -training >> step=7910600, episode=1319 reward=0.8066266 (494.21 it/sec) -training >> step=7910700, episode=1319 reward=0.7972417 (526.37 it/sec) -training >> step=7910800, episode=1319 reward=0.7885842 (460.65 it/sec) -training >> step=7910900, episode=1319 reward=0.7928813 (504.77 it/sec) -training >> step=7911000, episode=1319 reward=0.77559 (501.37 it/sec) -training >> step=7911100, episode=1319 reward=0.7940997 (488.98 it/sec) -training >> step=7911200, episode=1319 reward=0.7922678 (473.35 it/sec) -training >> step=7911300, episode=1319 reward=0.7816018 (461.60 it/sec) -training >> step=7911400, episode=1319 reward=0.7940454 (550.54 it/sec) -training >> step=7911500, episode=1319 reward=0.8158536 (520.26 it/sec) -training >> step=7911600, episode=1319 reward=0.8013135 (471.20 it/sec) -training >> step=7911700, episode=1319 reward=0.7907934 (480.84 it/sec) -training >> step=7911800, episode=1319 reward=0.7814137 (498.18 it/sec) -training >> step=7911900, episode=1319 reward=0.7781328 (474.87 it/sec) -training >> step=7912000, episode=1319 reward=0.7678868 (507.28 it/sec) -training >> step=7912100, episode=1319 reward=0.7982717 (501.32 it/sec) -training >> step=7912200, episode=1319 reward=0.794461 (523.58 it/sec) -training >> step=7912300, episode=1319 reward=0.7948771 (487.94 it/sec) -training >> step=7912400, episode=1319 reward=0.8002018 (527.77 it/sec) -training >> step=7912500, episode=1319 reward=0.8051375 (520.22 it/sec) -training >> step=7912600, episode=1319 reward=0.7812102 (500.12 it/sec) -training >> step=7912700, episode=1319 reward=0.8098853 (502.20 it/sec) -training >> step=7912800, episode=1319 reward=0.7902145 (506.09 it/sec) -training >> step=7912900, episode=1319 reward=0.7882193 (484.57 it/sec) -training >> step=7913000, episode=1319 reward=0.7945934 (550.52 it/sec) -training >> step=7913100, episode=1319 reward=0.7830811 (496.92 it/sec) -training >> step=7913200, episode=1319 reward=0.7975958 (500.08 it/sec) -training >> step=7913300, episode=1320 reward=0.7875091 (134.41 it/sec) -training >> step=7913400, episode=1320 reward=0.7719527 (473.19 it/sec) -training >> step=7913500, episode=1320 reward=0.7830481 (493.43 it/sec) -training >> step=7913600, episode=1320 reward=0.7785341 (491.13 it/sec) -training >> step=7913700, episode=1320 reward=0.7847196 (509.53 it/sec) -training >> step=7913800, episode=1320 reward=0.7695689 (503.41 it/sec) -training >> step=7913900, episode=1320 reward=0.8015242 (468.78 it/sec) -training >> step=7914000, episode=1320 reward=0.7851759 (529.76 it/sec) -training >> step=7914100, episode=1320 reward=0.7884236 (525.98 it/sec) -training >> step=7914200, episode=1320 reward=0.7931158 (444.60 it/sec) -training >> step=7914300, episode=1320 reward=0.7772312 (509.45 it/sec) -training >> step=7914400, episode=1320 reward=0.7725047 (498.35 it/sec) -training >> step=7914500, episode=1320 reward=0.7902341 (513.46 it/sec) -training >> step=7914600, episode=1320 reward=0.8119682 (494.38 it/sec) -training >> step=7914700, episode=1320 reward=0.8015301 (533.39 it/sec) -training >> step=7914800, episode=1320 reward=0.775884 (489.23 it/sec) -training >> step=7914900, episode=1320 reward=0.8036554 (506.65 it/sec) -training >> step=7915000, episode=1320 reward=0.783877 (480.44 it/sec) -training >> step=7915100, episode=1320 reward=0.8009048 (517.28 it/sec) -training >> step=7915200, episode=1320 reward=0.7769729 (523.75 it/sec) -training >> step=7915300, episode=1320 reward=0.7797909 (498.18 it/sec) -training >> step=7915400, episode=1320 reward=0.8054892 (496.37 it/sec) -training >> step=7915500, episode=1320 reward=0.801212 (473.77 it/sec) -training >> step=7915600, episode=1320 reward=0.7781158 (461.54 it/sec) -training >> step=7915700, episode=1320 reward=0.8090199 (465.61 it/sec) -training >> step=7915800, episode=1320 reward=0.7705975 (531.92 it/sec) -training >> step=7915900, episode=1320 reward=0.7924572 (494.54 it/sec) -training >> step=7916000, episode=1320 reward=0.7725108 (474.32 it/sec) -training >> step=7916100, episode=1320 reward=0.8008675 (477.34 it/sec) -training >> step=7916200, episode=1320 reward=0.7953343 (526.61 it/sec) -training >> step=7916300, episode=1320 reward=0.789467 (483.75 it/sec) -training >> step=7916400, episode=1320 reward=0.8077232 (504.47 it/sec) -training >> step=7916500, episode=1320 reward=0.797391 (496.49 it/sec) -training >> step=7916600, episode=1320 reward=0.8074647 (466.10 it/sec) -training >> step=7916700, episode=1320 reward=0.779844 (514.81 it/sec) -training >> step=7916800, episode=1320 reward=0.7803431 (468.13 it/sec) -training >> step=7916900, episode=1320 reward=0.8091187 (526.86 it/sec) -training >> step=7917000, episode=1320 reward=0.776615 (456.17 it/sec) -training >> step=7917100, episode=1320 reward=0.7896518 (513.05 it/sec) -training >> step=7917200, episode=1320 reward=0.7957365 (526.58 it/sec) -training >> step=7917300, episode=1320 reward=0.7936706 (471.56 it/sec) -training >> step=7917400, episode=1320 reward=0.789623 (510.06 it/sec) -training >> step=7917500, episode=1320 reward=0.785536 (481.64 it/sec) -training >> step=7917600, episode=1320 reward=0.7932244 (518.73 it/sec) -training >> step=7917700, episode=1320 reward=0.7837502 (473.10 it/sec) -training >> step=7917800, episode=1320 reward=0.7856579 (526.67 it/sec) -training >> step=7917900, episode=1320 reward=0.7924451 (495.35 it/sec) -training >> step=7918000, episode=1320 reward=0.783718 (522.06 it/sec) -training >> step=7918100, episode=1320 reward=0.7834903 (476.48 it/sec) -training >> step=7918200, episode=1320 reward=0.8021166 (477.03 it/sec) -training >> step=7918300, episode=1320 reward=0.7985014 (517.55 it/sec) -training >> step=7918400, episode=1320 reward=0.7960152 (497.62 it/sec) -training >> step=7918500, episode=1320 reward=0.7895187 (496.03 it/sec) -training >> step=7918600, episode=1320 reward=0.7882228 (513.64 it/sec) -training >> step=7918700, episode=1320 reward=0.7963156 (556.97 it/sec) -training >> step=7918800, episode=1320 reward=0.7784184 (493.96 it/sec) -training >> step=7918900, episode=1320 reward=0.7884705 (459.04 it/sec) -training >> step=7919000, episode=1320 reward=0.7800359 (498.26 it/sec) -training >> step=7919100, episode=1320 reward=0.8044418 (513.88 it/sec) -training >> step=7919200, episode=1320 reward=0.7967397 (492.38 it/sec) -training >> step=7919300, episode=1321 reward=0.7697695 (116.36 it/sec) -training >> step=7919400, episode=1321 reward=0.7850597 (558.67 it/sec) -training >> step=7919500, episode=1321 reward=0.7804373 (465.31 it/sec) -training >> step=7919600, episode=1321 reward=0.7843027 (499.53 it/sec) -training >> step=7919700, episode=1321 reward=0.7840493 (482.09 it/sec) -training >> step=7919800, episode=1321 reward=0.7814059 (472.24 it/sec) -training >> step=7919900, episode=1321 reward=0.7963395 (510.37 it/sec) -training >> step=7920000, episode=1321 reward=0.7965636 (516.73 it/sec) -training >> step=7920100, episode=1321 reward=0.7919263 (507.17 it/sec) -training >> step=7920200, episode=1321 reward=0.7853441 (522.97 it/sec) -training >> step=7920300, episode=1321 reward=0.7820198 (470.14 it/sec) -training >> step=7920400, episode=1321 reward=0.7956174 (455.64 it/sec) -training >> step=7920500, episode=1321 reward=0.7773952 (521.30 it/sec) -training >> step=7920600, episode=1321 reward=0.8005965 (483.85 it/sec) -training >> step=7920700, episode=1321 reward=0.789597 (504.86 it/sec) -training >> step=7920800, episode=1321 reward=0.7997993 (486.93 it/sec) -training >> step=7920900, episode=1321 reward=0.794878 (526.71 it/sec) -training >> step=7921000, episode=1321 reward=0.8007373 (487.36 it/sec) -training >> step=7921100, episode=1321 reward=0.7847301 (483.10 it/sec) -training >> step=7921200, episode=1321 reward=0.7875991 (452.69 it/sec) -training >> step=7921300, episode=1321 reward=0.800658 (468.76 it/sec) -training >> step=7921400, episode=1321 reward=0.7792898 (529.12 it/sec) -training >> step=7921500, episode=1321 reward=0.784553 (504.91 it/sec) -training >> step=7921600, episode=1321 reward=0.7878523 (513.89 it/sec) -training >> step=7921700, episode=1321 reward=0.7841075 (455.36 it/sec) -training >> step=7921800, episode=1321 reward=0.7916914 (491.98 it/sec) -training >> step=7921900, episode=1321 reward=0.7884119 (488.26 it/sec) -training >> step=7922000, episode=1321 reward=0.795049 (492.04 it/sec) -training >> step=7922100, episode=1321 reward=0.8003414 (512.76 it/sec) -training >> step=7922200, episode=1321 reward=0.7994199 (490.31 it/sec) -training >> step=7922300, episode=1321 reward=0.7901025 (534.50 it/sec) -training >> step=7922400, episode=1321 reward=0.783662 (473.93 it/sec) -training >> step=7922500, episode=1321 reward=0.7876357 (521.91 it/sec) -training >> step=7922600, episode=1321 reward=0.7884687 (449.86 it/sec) -training >> step=7922700, episode=1321 reward=0.7925416 (518.72 it/sec) -training >> step=7922800, episode=1321 reward=0.7732018 (485.47 it/sec) -training >> step=7922900, episode=1321 reward=0.796173 (486.06 it/sec) -training >> step=7923000, episode=1321 reward=0.7642115 (522.42 it/sec) -training >> step=7923100, episode=1321 reward=0.7929982 (513.43 it/sec) -training >> step=7923200, episode=1321 reward=0.7825604 (509.65 it/sec) -training >> step=7923300, episode=1321 reward=0.7687151 (469.78 it/sec) -training >> step=7923400, episode=1321 reward=0.8073933 (477.01 it/sec) -training >> step=7923500, episode=1321 reward=0.7854369 (518.78 it/sec) -training >> step=7923600, episode=1321 reward=0.7909058 (503.22 it/sec) -training >> step=7923700, episode=1321 reward=0.7919862 (520.73 it/sec) -training >> step=7923800, episode=1321 reward=0.8109987 (457.13 it/sec) -training >> step=7923900, episode=1321 reward=0.7899323 (485.85 it/sec) -training >> step=7924000, episode=1321 reward=0.7960962 (503.11 it/sec) -training >> step=7924100, episode=1321 reward=0.77569 (520.99 it/sec) -training >> step=7924200, episode=1321 reward=0.7894831 (520.25 it/sec) -training >> step=7924300, episode=1321 reward=0.7809118 (466.36 it/sec) -training >> step=7924400, episode=1321 reward=0.7923719 (512.62 it/sec) -training >> step=7924500, episode=1321 reward=0.7870184 (545.14 it/sec) -training >> step=7924600, episode=1321 reward=0.7854198 (483.06 it/sec) -training >> step=7924700, episode=1321 reward=0.7829766 (482.45 it/sec) -training >> step=7924800, episode=1321 reward=0.7889975 (460.51 it/sec) -training >> step=7924900, episode=1321 reward=0.7863858 (510.79 it/sec) -training >> step=7925000, episode=1321 reward=0.7910157 (527.81 it/sec) -training >> step=7925100, episode=1321 reward=0.7974478 (492.49 it/sec) -training >> step=7925200, episode=1321 reward=0.7662047 (452.03 it/sec) -training >> step=7925300, episode=1322 reward=0.7976167 (113.88 it/sec) -training >> step=7925400, episode=1322 reward=0.7757072 (491.18 it/sec) -training >> step=7925500, episode=1322 reward=0.7861084 (447.16 it/sec) -training >> step=7925600, episode=1322 reward=0.7940804 (514.81 it/sec) -training >> step=7925700, episode=1322 reward=0.7858385 (483.41 it/sec) -training >> step=7925800, episode=1322 reward=0.7866501 (493.44 it/sec) -training >> step=7925900, episode=1322 reward=0.7966611 (493.83 it/sec) -training >> step=7926000, episode=1322 reward=0.8025872 (487.70 it/sec) -training >> step=7926100, episode=1322 reward=0.7765072 (509.59 it/sec) -training >> step=7926200, episode=1322 reward=0.7703888 (452.80 it/sec) -training >> step=7926300, episode=1322 reward=0.7948567 (472.75 it/sec) -training >> step=7926400, episode=1322 reward=0.795993 (491.21 it/sec) -training >> step=7926500, episode=1322 reward=0.7929232 (491.61 it/sec) -training >> step=7926600, episode=1322 reward=0.8068036 (530.75 it/sec) -training >> step=7926700, episode=1322 reward=0.7968777 (524.08 it/sec) -training >> step=7926800, episode=1322 reward=0.7922913 (499.73 it/sec) -training >> step=7926900, episode=1322 reward=0.779228 (450.97 it/sec) -training >> step=7927000, episode=1322 reward=0.7819051 (469.81 it/sec) -training >> step=7927100, episode=1322 reward=0.786332 (488.77 it/sec) -training >> step=7927200, episode=1322 reward=0.7965227 (526.47 it/sec) -training >> step=7927300, episode=1322 reward=0.7692829 (503.21 it/sec) -training >> step=7927400, episode=1322 reward=0.8106573 (498.48 it/sec) -training >> step=7927500, episode=1322 reward=0.7985067 (479.55 it/sec) -training >> step=7927600, episode=1322 reward=0.7778578 (460.33 it/sec) -training >> step=7927700, episode=1322 reward=0.7959508 (464.80 it/sec) -training >> step=7927800, episode=1322 reward=0.7993786 (482.69 it/sec) -training >> step=7927900, episode=1322 reward=0.7820761 (506.73 it/sec) -training >> step=7928000, episode=1322 reward=0.789883 (457.87 it/sec) -training >> step=7928100, episode=1322 reward=0.7862485 (476.85 it/sec) -training >> step=7928200, episode=1322 reward=0.7828894 (487.50 it/sec) -training >> step=7928300, episode=1322 reward=0.7931997 (514.11 it/sec) -training >> step=7928400, episode=1322 reward=0.7813547 (471.49 it/sec) -training >> step=7928500, episode=1322 reward=0.774648 (457.94 it/sec) -training >> step=7928600, episode=1322 reward=0.7981381 (500.31 it/sec) -training >> step=7928700, episode=1322 reward=0.7844098 (496.59 it/sec) -training >> step=7928800, episode=1322 reward=0.7823166 (506.85 it/sec) -training >> step=7928900, episode=1322 reward=0.7986528 (476.41 it/sec) -training >> step=7929000, episode=1322 reward=0.8038157 (490.99 it/sec) -training >> step=7929100, episode=1322 reward=0.7814927 (512.44 it/sec) -training >> step=7929200, episode=1322 reward=0.7889515 (522.52 it/sec) -training >> step=7929300, episode=1322 reward=0.8022788 (520.64 it/sec) -training >> step=7929400, episode=1322 reward=0.8037246 (489.53 it/sec) -training >> step=7929500, episode=1322 reward=0.7934365 (466.57 it/sec) -training >> step=7929600, episode=1322 reward=0.8020647 (490.84 it/sec) -training >> step=7929700, episode=1322 reward=0.7897233 (504.58 it/sec) -training >> step=7929800, episode=1322 reward=0.7843891 (501.78 it/sec) -training >> step=7929900, episode=1322 reward=0.7895049 (449.40 it/sec) -training >> step=7930000, episode=1322 reward=0.7917131 (460.00 it/sec) -training >> step=7930100, episode=1322 reward=0.7997264 (541.53 it/sec) -training >> step=7930200, episode=1322 reward=0.7880159 (519.11 it/sec) -training >> step=7930300, episode=1322 reward=0.7871188 (455.89 it/sec) -training >> step=7930400, episode=1322 reward=0.7743689 (497.08 it/sec) -training >> step=7930500, episode=1322 reward=0.7838224 (477.17 it/sec) -training >> step=7930600, episode=1322 reward=0.7923129 (478.58 it/sec) -training >> step=7930700, episode=1322 reward=0.7989851 (515.62 it/sec) -training >> step=7930800, episode=1322 reward=0.7686123 (518.26 it/sec) -training >> step=7930900, episode=1322 reward=0.7943251 (472.86 it/sec) -training >> step=7931000, episode=1322 reward=0.7967533 (467.02 it/sec) -training >> step=7931100, episode=1322 reward=0.7851781 (517.16 it/sec) -training >> step=7931200, episode=1322 reward=0.7888556 (499.68 it/sec) -training >> step=7931300, episode=1323 reward=0.7778221 (130.69 it/sec) -training >> step=7931400, episode=1323 reward=0.787597 (516.89 it/sec) -training >> step=7931500, episode=1323 reward=0.8043826 (512.70 it/sec) -training >> step=7931600, episode=1323 reward=0.7688702 (502.81 it/sec) -training >> step=7931700, episode=1323 reward=0.7764754 (503.01 it/sec) -training >> step=7931800, episode=1323 reward=0.7909063 (481.23 it/sec) -training >> step=7931900, episode=1323 reward=0.7940724 (549.85 it/sec) -training >> step=7932000, episode=1323 reward=0.7861399 (511.72 it/sec) -training >> step=7932100, episode=1323 reward=0.7869862 (530.97 it/sec) -training >> step=7932200, episode=1323 reward=0.7964656 (520.50 it/sec) -training >> step=7932300, episode=1323 reward=0.7690666 (478.19 it/sec) -training >> step=7932400, episode=1323 reward=0.7864538 (529.40 it/sec) -training >> step=7932500, episode=1323 reward=0.8066222 (523.73 it/sec) -training >> step=7932600, episode=1323 reward=0.8047884 (526.93 it/sec) -training >> step=7932700, episode=1323 reward=0.7829663 (488.86 it/sec) -training >> step=7932800, episode=1323 reward=0.7872697 (449.02 it/sec) -training >> step=7932900, episode=1323 reward=0.8171698 (520.66 it/sec) -training >> step=7933000, episode=1323 reward=0.77083 (515.51 it/sec) -training >> step=7933100, episode=1323 reward=0.7865406 (526.70 it/sec) -training >> step=7933200, episode=1323 reward=0.7988302 (510.72 it/sec) -training >> step=7933300, episode=1323 reward=0.806592 (511.66 it/sec) -training >> step=7933400, episode=1323 reward=0.789273 (512.83 it/sec) -training >> step=7933500, episode=1323 reward=0.7814463 (499.13 it/sec) -training >> step=7933600, episode=1323 reward=0.776607 (496.81 it/sec) -training >> step=7933700, episode=1323 reward=0.7974777 (508.71 it/sec) -training >> step=7933800, episode=1323 reward=0.7804354 (481.53 it/sec) -training >> step=7933900, episode=1323 reward=0.7832367 (493.30 it/sec) -training >> step=7934000, episode=1323 reward=0.8037468 (528.01 it/sec) -training >> step=7934100, episode=1323 reward=0.7942836 (513.64 it/sec) -training >> step=7934200, episode=1323 reward=0.7832507 (525.55 it/sec) -training >> step=7934300, episode=1323 reward=0.8058104 (478.13 it/sec) -training >> step=7934400, episode=1323 reward=0.7791727 (465.28 it/sec) -training >> step=7934500, episode=1323 reward=0.7889389 (541.94 it/sec) -training >> step=7934600, episode=1323 reward=0.7667688 (467.18 it/sec) -training >> step=7934700, episode=1323 reward=0.7762791 (479.58 it/sec) -training >> step=7934800, episode=1323 reward=0.7837821 (526.13 it/sec) -training >> step=7934900, episode=1323 reward=0.7901266 (502.83 it/sec) -training >> step=7935000, episode=1323 reward=0.7803421 (482.51 it/sec) -training >> step=7935100, episode=1323 reward=0.7955152 (499.80 it/sec) -training >> step=7935200, episode=1323 reward=0.8076276 (486.34 it/sec) -training >> step=7935300, episode=1323 reward=0.7960114 (470.98 it/sec) -training >> step=7935400, episode=1323 reward=0.810286 (454.93 it/sec) -training >> step=7935500, episode=1323 reward=0.7809385 (467.36 it/sec) -training >> step=7935600, episode=1323 reward=0.8062322 (498.36 it/sec) -training >> step=7935700, episode=1323 reward=0.7741653 (479.73 it/sec) -training >> step=7935800, episode=1323 reward=0.7963797 (501.74 it/sec) -training >> step=7935900, episode=1323 reward=0.7808304 (484.96 it/sec) -training >> step=7936000, episode=1323 reward=0.772092 (495.26 it/sec) -training >> step=7936100, episode=1323 reward=0.7947307 (501.24 it/sec) -training >> step=7936200, episode=1323 reward=0.7974535 (487.38 it/sec) -training >> step=7936300, episode=1323 reward=0.7695907 (489.17 it/sec) -training >> step=7936400, episode=1323 reward=0.7913237 (489.69 it/sec) -training >> step=7936500, episode=1323 reward=0.7746186 (501.36 it/sec) -training >> step=7936600, episode=1323 reward=0.7768251 (523.16 it/sec) -training >> step=7936700, episode=1323 reward=0.7914581 (528.13 it/sec) -training >> step=7936800, episode=1323 reward=0.780704 (455.31 it/sec) -training >> step=7936900, episode=1323 reward=0.7961414 (517.27 it/sec) -training >> step=7937000, episode=1323 reward=0.7933284 (498.66 it/sec) -training >> step=7937100, episode=1323 reward=0.778966 (522.20 it/sec) -training >> step=7937200, episode=1323 reward=0.8089184 (468.18 it/sec) -training >> step=7937300, episode=1324 reward=0.7782764 (116.96 it/sec) -training >> step=7937400, episode=1324 reward=0.7987174 (511.67 it/sec) -training >> step=7937500, episode=1324 reward=0.7929668 (497.01 it/sec) -training >> step=7937600, episode=1324 reward=0.7708912 (501.33 it/sec) -training >> step=7937700, episode=1324 reward=0.8015643 (513.68 it/sec) -training >> step=7937800, episode=1324 reward=0.7817413 (540.58 it/sec) -training >> step=7937900, episode=1324 reward=0.7910374 (506.30 it/sec) -training >> step=7938000, episode=1324 reward=0.8005577 (504.88 it/sec) -training >> step=7938100, episode=1324 reward=0.7734613 (489.45 it/sec) -training >> step=7938200, episode=1324 reward=0.7619817 (503.35 it/sec) -training >> step=7938300, episode=1324 reward=0.7959986 (511.37 it/sec) -training >> step=7938400, episode=1324 reward=0.8169559 (537.46 it/sec) -training >> step=7938500, episode=1324 reward=0.7926912 (505.54 it/sec) -training >> step=7938600, episode=1324 reward=0.7667888 (461.37 it/sec) -training >> step=7938700, episode=1324 reward=0.7907827 (530.09 it/sec) -training >> step=7938800, episode=1324 reward=0.7720951 (515.99 it/sec) -training >> step=7938900, episode=1324 reward=0.7714942 (527.84 it/sec) -training >> step=7939000, episode=1324 reward=0.8029689 (484.21 it/sec) -training >> step=7939100, episode=1324 reward=0.7785565 (517.15 it/sec) -training >> step=7939200, episode=1324 reward=0.7801085 (505.66 it/sec) -training >> step=7939300, episode=1324 reward=0.7873731 (486.29 it/sec) -training >> step=7939400, episode=1324 reward=0.7909974 (522.70 it/sec) -training >> step=7939500, episode=1324 reward=0.7865641 (475.90 it/sec) -training >> step=7939600, episode=1324 reward=0.8008161 (515.41 it/sec) -training >> step=7939700, episode=1324 reward=0.7908785 (479.21 it/sec) -training >> step=7939800, episode=1324 reward=0.7973151 (536.61 it/sec) -training >> step=7939900, episode=1324 reward=0.7779711 (501.25 it/sec) -training >> step=7940000, episode=1324 reward=0.7910629 (505.43 it/sec) -training >> step=7940100, episode=1324 reward=0.7787099 (473.98 it/sec) -training >> step=7940200, episode=1324 reward=0.7825417 (487.79 it/sec) -training >> step=7940300, episode=1324 reward=0.7784983 (458.39 it/sec) -training >> step=7940400, episode=1324 reward=0.7910502 (447.80 it/sec) -training >> step=7940500, episode=1324 reward=0.8036367 (456.38 it/sec) -training >> step=7940600, episode=1324 reward=0.7912143 (482.52 it/sec) -training >> step=7940700, episode=1324 reward=0.7854538 (471.25 it/sec) -training >> step=7940800, episode=1324 reward=0.7828218 (513.75 it/sec) -training >> step=7940900, episode=1324 reward=0.8108771 (497.13 it/sec) -training >> step=7941000, episode=1324 reward=0.7955028 (502.45 it/sec) -training >> step=7941100, episode=1324 reward=0.7905738 (468.08 it/sec) -training >> step=7941200, episode=1324 reward=0.8019355 (481.72 it/sec) -training >> step=7941300, episode=1324 reward=0.7981697 (528.06 it/sec) -training >> step=7941400, episode=1324 reward=0.7927734 (452.66 it/sec) -training >> step=7941500, episode=1324 reward=0.7941396 (492.32 it/sec) -training >> step=7941600, episode=1324 reward=0.793895 (533.82 it/sec) -training >> step=7941700, episode=1324 reward=0.7779952 (507.61 it/sec) -training >> step=7941800, episode=1324 reward=0.7982643 (502.37 it/sec) -training >> step=7941900, episode=1324 reward=0.8107082 (509.12 it/sec) -training >> step=7942000, episode=1324 reward=0.7856897 (518.30 it/sec) -training >> step=7942100, episode=1324 reward=0.7981827 (496.30 it/sec) -training >> step=7942200, episode=1324 reward=0.784767 (462.16 it/sec) -training >> step=7942300, episode=1324 reward=0.8118739 (517.79 it/sec) -training >> step=7942400, episode=1324 reward=0.7610551 (490.74 it/sec) -training >> step=7942500, episode=1324 reward=0.8028738 (475.72 it/sec) -training >> step=7942600, episode=1324 reward=0.7935759 (495.86 it/sec) -training >> step=7942700, episode=1324 reward=0.7937338 (501.41 it/sec) -training >> step=7942800, episode=1324 reward=0.7765886 (500.22 it/sec) -training >> step=7942900, episode=1324 reward=0.8058889 (519.74 it/sec) -training >> step=7943000, episode=1324 reward=0.8035672 (512.91 it/sec) -training >> step=7943100, episode=1324 reward=0.784451 (527.25 it/sec) -training >> step=7943200, episode=1324 reward=0.7997275 (501.50 it/sec) -training >> step=7943300, episode=1325 reward=0.7953292 (109.38 it/sec) -training >> step=7943400, episode=1325 reward=0.8012305 (521.41 it/sec) -training >> step=7943500, episode=1325 reward=0.7922886 (515.10 it/sec) -training >> step=7943600, episode=1325 reward=0.7948027 (508.66 it/sec) -training >> step=7943700, episode=1325 reward=0.7844536 (447.18 it/sec) -training >> step=7943800, episode=1325 reward=0.7859287 (498.37 it/sec) -training >> step=7943900, episode=1325 reward=0.7877409 (448.45 it/sec) -training >> step=7944000, episode=1325 reward=0.7920259 (488.25 it/sec) -training >> step=7944100, episode=1325 reward=0.7981899 (432.28 it/sec) -training >> step=7944200, episode=1325 reward=0.7891102 (466.17 it/sec) -training >> step=7944300, episode=1325 reward=0.7957932 (511.84 it/sec) -training >> step=7944400, episode=1325 reward=0.803201 (510.86 it/sec) -training >> step=7944500, episode=1325 reward=0.7899043 (521.17 it/sec) -training >> step=7944600, episode=1325 reward=0.8046343 (488.07 it/sec) -training >> step=7944700, episode=1325 reward=0.7815548 (508.69 it/sec) -training >> step=7944800, episode=1325 reward=0.7951186 (521.85 it/sec) -training >> step=7944900, episode=1325 reward=0.807457 (524.20 it/sec) -training >> step=7945000, episode=1325 reward=0.7927973 (488.51 it/sec) -training >> step=7945100, episode=1325 reward=0.7856289 (503.29 it/sec) -training >> step=7945200, episode=1325 reward=0.7693111 (509.11 it/sec) -training >> step=7945300, episode=1325 reward=0.8021827 (497.48 it/sec) -training >> step=7945400, episode=1325 reward=0.8090886 (480.71 it/sec) -training >> step=7945500, episode=1325 reward=0.7988977 (521.89 it/sec) -training >> step=7945600, episode=1325 reward=0.7924845 (541.29 it/sec) -training >> step=7945700, episode=1325 reward=0.7797749 (475.67 it/sec) -training >> step=7945800, episode=1325 reward=0.8077615 (516.67 it/sec) -training >> step=7945900, episode=1325 reward=0.7790182 (503.65 it/sec) -training >> step=7946000, episode=1325 reward=0.7551773 (494.06 it/sec) -training >> step=7946100, episode=1325 reward=0.7909824 (524.53 it/sec) -training >> step=7946200, episode=1325 reward=0.7976115 (517.64 it/sec) -training >> step=7946300, episode=1325 reward=0.7982332 (494.96 it/sec) -training >> step=7946400, episode=1325 reward=0.7976226 (476.30 it/sec) -training >> step=7946500, episode=1325 reward=0.7809114 (468.76 it/sec) -training >> step=7946600, episode=1325 reward=0.7925722 (481.36 it/sec) -training >> step=7946700, episode=1325 reward=0.7945153 (518.53 it/sec) -training >> step=7946800, episode=1325 reward=0.8059149 (530.17 it/sec) -training >> step=7946900, episode=1325 reward=0.8154427 (523.43 it/sec) -training >> step=7947000, episode=1325 reward=0.7682359 (492.17 it/sec) -training >> step=7947100, episode=1325 reward=0.792258 (513.21 it/sec) -training >> step=7947200, episode=1325 reward=0.7843318 (502.61 it/sec) -training >> step=7947300, episode=1325 reward=0.7833926 (496.46 it/sec) -training >> step=7947400, episode=1325 reward=0.7724977 (534.16 it/sec) -training >> step=7947500, episode=1325 reward=0.7921347 (489.91 it/sec) -training >> step=7947600, episode=1325 reward=0.7864992 (462.80 it/sec) -training >> step=7947700, episode=1325 reward=0.78188 (506.68 it/sec) -training >> step=7947800, episode=1325 reward=0.7806342 (496.28 it/sec) -training >> step=7947900, episode=1325 reward=0.7913988 (511.66 it/sec) -training >> step=7948000, episode=1325 reward=0.7923725 (509.01 it/sec) -training >> step=7948100, episode=1325 reward=0.7934884 (527.47 it/sec) -training >> step=7948200, episode=1325 reward=0.7947398 (516.38 it/sec) -training >> step=7948300, episode=1325 reward=0.7795693 (514.31 it/sec) -training >> step=7948400, episode=1325 reward=0.7837208 (452.53 it/sec) -training >> step=7948500, episode=1325 reward=0.776722 (551.42 it/sec) -training >> step=7948600, episode=1325 reward=0.7640297 (517.95 it/sec) -training >> step=7948700, episode=1325 reward=0.7973529 (503.59 it/sec) -training >> step=7948800, episode=1325 reward=0.7804713 (478.50 it/sec) -training >> step=7948900, episode=1325 reward=0.7604843 (512.34 it/sec) -training >> step=7949000, episode=1325 reward=0.7920424 (482.74 it/sec) -training >> step=7949100, episode=1325 reward=0.7735318 (486.44 it/sec) -training >> step=7949200, episode=1325 reward=0.7973183 (501.37 it/sec) -training >> step=7949300, episode=1326 reward=0.7859037 (123.87 it/sec) -training >> step=7949400, episode=1326 reward=0.7979673 (492.06 it/sec) -training >> step=7949500, episode=1326 reward=0.8014894 (489.78 it/sec) -training >> step=7949600, episode=1326 reward=0.7939867 (498.54 it/sec) -training >> step=7949700, episode=1326 reward=0.7911427 (500.72 it/sec) -training >> step=7949800, episode=1326 reward=0.7978841 (501.89 it/sec) -training >> step=7949900, episode=1326 reward=0.7649852 (525.57 it/sec) -training >> step=7950000, episode=1326 reward=0.7712512 (471.22 it/sec) -training >> step=7950100, episode=1326 reward=0.775043 (516.67 it/sec) -training >> step=7950200, episode=1326 reward=0.7984891 (516.63 it/sec) -training >> step=7950300, episode=1326 reward=0.786975 (490.82 it/sec) -training >> step=7950400, episode=1326 reward=0.7826312 (490.83 it/sec) -training >> step=7950500, episode=1326 reward=0.7989757 (482.43 it/sec) -training >> step=7950600, episode=1326 reward=0.8048248 (539.02 it/sec) -training >> step=7950700, episode=1326 reward=0.7893068 (522.76 it/sec) -training >> step=7950800, episode=1326 reward=0.7819808 (492.68 it/sec) -training >> step=7950900, episode=1326 reward=0.7912262 (513.36 it/sec) -training >> step=7951000, episode=1326 reward=0.800149 (477.86 it/sec) -training >> step=7951100, episode=1326 reward=0.7990118 (481.32 it/sec) -training >> step=7951200, episode=1326 reward=0.8152007 (514.22 it/sec) -training >> step=7951300, episode=1326 reward=0.7791334 (467.53 it/sec) -training >> step=7951400, episode=1326 reward=0.7904425 (544.82 it/sec) -training >> step=7951500, episode=1326 reward=0.7986023 (488.46 it/sec) -training >> step=7951600, episode=1326 reward=0.8217061 (441.57 it/sec) -training >> step=7951700, episode=1326 reward=0.8019512 (491.89 it/sec) -training >> step=7951800, episode=1326 reward=0.7943224 (425.91 it/sec) -training >> step=7951900, episode=1326 reward=0.7839952 (522.68 it/sec) -training >> step=7952000, episode=1326 reward=0.7884101 (494.44 it/sec) -training >> step=7952100, episode=1326 reward=0.7903079 (508.80 it/sec) -training >> step=7952200, episode=1326 reward=0.7845525 (537.17 it/sec) -training >> step=7952300, episode=1326 reward=0.7941759 (449.92 it/sec) -training >> step=7952400, episode=1326 reward=0.7830953 (480.28 it/sec) -training >> step=7952500, episode=1326 reward=0.787237 (531.50 it/sec) -training >> step=7952600, episode=1326 reward=0.7665957 (475.00 it/sec) -training >> step=7952700, episode=1326 reward=0.7837762 (512.96 it/sec) -training >> step=7952800, episode=1326 reward=0.7942215 (505.96 it/sec) -training >> step=7952900, episode=1326 reward=0.7750478 (515.45 it/sec) -training >> step=7953000, episode=1326 reward=0.7881064 (517.69 it/sec) -training >> step=7953100, episode=1326 reward=0.7829378 (461.90 it/sec) -training >> step=7953200, episode=1326 reward=0.7849776 (526.60 it/sec) -training >> step=7953300, episode=1326 reward=0.7967808 (506.86 it/sec) -training >> step=7953400, episode=1326 reward=0.7876046 (481.76 it/sec) -training >> step=7953500, episode=1326 reward=0.765744 (480.55 it/sec) -training >> step=7953600, episode=1326 reward=0.7884219 (527.37 it/sec) -training >> step=7953700, episode=1326 reward=0.7860837 (473.97 it/sec) -training >> step=7953800, episode=1326 reward=0.7870813 (486.37 it/sec) -training >> step=7953900, episode=1326 reward=0.8028992 (489.32 it/sec) -training >> step=7954000, episode=1326 reward=0.8137453 (552.94 it/sec) -training >> step=7954100, episode=1326 reward=0.8043126 (474.80 it/sec) -training >> step=7954200, episode=1326 reward=0.7941883 (504.04 it/sec) -training >> step=7954300, episode=1326 reward=0.7825076 (531.25 it/sec) -training >> step=7954400, episode=1326 reward=0.7832962 (505.39 it/sec) -training >> step=7954500, episode=1326 reward=0.7916706 (487.46 it/sec) -training >> step=7954600, episode=1326 reward=0.7952882 (513.05 it/sec) -training >> step=7954700, episode=1326 reward=0.7868683 (498.87 it/sec) -training >> step=7954800, episode=1326 reward=0.7948104 (446.04 it/sec) -training >> step=7954900, episode=1326 reward=0.791918 (502.33 it/sec) -training >> step=7955000, episode=1326 reward=0.7793936 (532.65 it/sec) -training >> step=7955100, episode=1326 reward=0.7791844 (495.98 it/sec) -training >> step=7955200, episode=1326 reward=0.7603811 (492.41 it/sec) -training >> step=7955300, episode=1327 reward=0.8017612 (128.95 it/sec) -training >> step=7955400, episode=1327 reward=0.7936104 (472.99 it/sec) -training >> step=7955500, episode=1327 reward=0.7724162 (496.16 it/sec) -training >> step=7955600, episode=1327 reward=0.7955981 (495.62 it/sec) -training >> step=7955700, episode=1327 reward=0.7814178 (502.88 it/sec) -training >> step=7955800, episode=1327 reward=0.7907624 (509.96 it/sec) -training >> step=7955900, episode=1327 reward=0.7833691 (470.75 it/sec) -training >> step=7956000, episode=1327 reward=0.7845133 (510.32 it/sec) -training >> step=7956100, episode=1327 reward=0.790157 (477.12 it/sec) -training >> step=7956200, episode=1327 reward=0.7878771 (509.67 it/sec) -training >> step=7956300, episode=1327 reward=0.780857 (466.91 it/sec) -training >> step=7956400, episode=1327 reward=0.7888634 (440.46 it/sec) -training >> step=7956500, episode=1327 reward=0.7814722 (529.97 it/sec) -training >> step=7956600, episode=1327 reward=0.7840673 (473.75 it/sec) -training >> step=7956700, episode=1327 reward=0.8004785 (504.11 it/sec) -training >> step=7956800, episode=1327 reward=0.7934532 (502.45 it/sec) -training >> step=7956900, episode=1327 reward=0.772936 (501.63 it/sec) -training >> step=7957000, episode=1327 reward=0.7633999 (513.33 it/sec) -training >> step=7957100, episode=1327 reward=0.7878781 (490.23 it/sec) -training >> step=7957200, episode=1327 reward=0.7963897 (514.55 it/sec) -training >> step=7957300, episode=1327 reward=0.7798628 (531.69 it/sec) -training >> step=7957400, episode=1327 reward=0.8034857 (478.52 it/sec) -training >> step=7957500, episode=1327 reward=0.7797655 (491.40 it/sec) -training >> step=7957600, episode=1327 reward=0.8015804 (511.59 it/sec) -training >> step=7957700, episode=1327 reward=0.7940584 (514.35 it/sec) -training >> step=7957800, episode=1327 reward=0.7840899 (490.89 it/sec) -training >> step=7957900, episode=1327 reward=0.7976862 (468.04 it/sec) -training >> step=7958000, episode=1327 reward=0.7956675 (476.03 it/sec) -training >> step=7958100, episode=1327 reward=0.7766529 (526.34 it/sec) -training >> step=7958200, episode=1327 reward=0.7819796 (493.42 it/sec) -training >> step=7958300, episode=1327 reward=0.7679153 (427.59 it/sec) -training >> step=7958400, episode=1327 reward=0.8130088 (490.41 it/sec) -training >> step=7958500, episode=1327 reward=0.7792054 (453.64 it/sec) -training >> step=7958600, episode=1327 reward=0.7942722 (466.79 it/sec) -training >> step=7958700, episode=1327 reward=0.7920228 (539.70 it/sec) -training >> step=7958800, episode=1327 reward=0.7921355 (501.42 it/sec) -training >> step=7958900, episode=1327 reward=0.7946196 (506.81 it/sec) -training >> step=7959000, episode=1327 reward=0.7769856 (520.91 it/sec) -training >> step=7959100, episode=1327 reward=0.7774776 (499.42 it/sec) -training >> step=7959200, episode=1327 reward=0.8002396 (495.16 it/sec) -training >> step=7959300, episode=1327 reward=0.814239 (519.27 it/sec) -training >> step=7959400, episode=1327 reward=0.7944144 (481.82 it/sec) -training >> step=7959500, episode=1327 reward=0.7902141 (469.03 it/sec) -training >> step=7959600, episode=1327 reward=0.7918291 (509.40 it/sec) -training >> step=7959700, episode=1327 reward=0.7945845 (486.91 it/sec) -training >> step=7959800, episode=1327 reward=0.798204 (538.56 it/sec) -training >> step=7959900, episode=1327 reward=0.815351 (495.62 it/sec) -training >> step=7960000, episode=1327 reward=0.7961804 (510.31 it/sec) -training >> step=7960100, episode=1327 reward=0.794448 (480.02 it/sec) -training >> step=7960200, episode=1327 reward=0.7815883 (525.47 it/sec) -training >> step=7960300, episode=1327 reward=0.790579 (514.54 it/sec) -training >> step=7960400, episode=1327 reward=0.7965303 (506.20 it/sec) -training >> step=7960500, episode=1327 reward=0.7946152 (452.93 it/sec) -training >> step=7960600, episode=1327 reward=0.7760392 (494.53 it/sec) -training >> step=7960700, episode=1327 reward=0.7940989 (512.01 it/sec) -training >> step=7960800, episode=1327 reward=0.7720003 (522.07 it/sec) -training >> step=7960900, episode=1327 reward=0.7871738 (467.89 it/sec) -training >> step=7961000, episode=1327 reward=0.7783445 (441.65 it/sec) -training >> step=7961100, episode=1327 reward=0.8273122 (512.97 it/sec) -training >> step=7961200, episode=1327 reward=0.781007 (477.03 it/sec) -training >> step=7961300, episode=1328 reward=0.7725043 (129.34 it/sec) -training >> step=7961400, episode=1328 reward=0.7846802 (526.96 it/sec) -training >> step=7961500, episode=1328 reward=0.8071802 (509.97 it/sec) -training >> step=7961600, episode=1328 reward=0.7861028 (477.15 it/sec) -training >> step=7961700, episode=1328 reward=0.8044774 (449.97 it/sec) -training >> step=7961800, episode=1328 reward=0.788538 (459.11 it/sec) -training >> step=7961900, episode=1328 reward=0.7862227 (462.26 it/sec) -training >> step=7962000, episode=1328 reward=0.7832384 (526.49 it/sec) -training >> step=7962100, episode=1328 reward=0.8085258 (524.49 it/sec) -training >> step=7962200, episode=1328 reward=0.7827804 (482.65 it/sec) -training >> step=7962300, episode=1328 reward=0.7784544 (488.57 it/sec) -training >> step=7962400, episode=1328 reward=0.7733392 (522.48 it/sec) -training >> step=7962500, episode=1328 reward=0.7945561 (505.82 it/sec) -training >> step=7962600, episode=1328 reward=0.7829894 (510.99 it/sec) -training >> step=7962700, episode=1328 reward=0.7808718 (485.22 it/sec) -training >> step=7962800, episode=1328 reward=0.7979066 (514.68 it/sec) -training >> step=7962900, episode=1328 reward=0.7800407 (453.03 it/sec) -training >> step=7963000, episode=1328 reward=0.7876831 (505.34 it/sec) -training >> step=7963100, episode=1328 reward=0.7992469 (514.63 it/sec) -training >> step=7963200, episode=1328 reward=0.7835311 (468.59 it/sec) -training >> step=7963300, episode=1328 reward=0.7922427 (504.61 it/sec) -training >> step=7963400, episode=1328 reward=0.8046542 (511.59 it/sec) -training >> step=7963500, episode=1328 reward=0.7723635 (492.87 it/sec) -training >> step=7963600, episode=1328 reward=0.7689292 (507.86 it/sec) -training >> step=7963700, episode=1328 reward=0.8020564 (497.07 it/sec) -training >> step=7963800, episode=1328 reward=0.7948663 (519.83 it/sec) -training >> step=7963900, episode=1328 reward=0.8041644 (498.31 it/sec) -training >> step=7964000, episode=1328 reward=0.7845121 (505.10 it/sec) -training >> step=7964100, episode=1328 reward=0.7831079 (432.54 it/sec) -training >> step=7964200, episode=1328 reward=0.8057156 (521.43 it/sec) -training >> step=7964300, episode=1328 reward=0.7719558 (495.06 it/sec) -training >> step=7964400, episode=1328 reward=0.8023616 (502.96 it/sec) -training >> step=7964500, episode=1328 reward=0.7962021 (514.33 it/sec) -training >> step=7964600, episode=1328 reward=0.7796386 (490.47 it/sec) -training >> step=7964700, episode=1328 reward=0.7970369 (496.06 it/sec) -training >> step=7964800, episode=1328 reward=0.7862775 (487.81 it/sec) -training >> step=7964900, episode=1328 reward=0.7945026 (492.65 it/sec) -training >> step=7965000, episode=1328 reward=0.7956728 (490.56 it/sec) -training >> step=7965100, episode=1328 reward=0.7504699 (509.67 it/sec) -training >> step=7965200, episode=1328 reward=0.8001719 (535.62 it/sec) -training >> step=7965300, episode=1328 reward=0.7938815 (499.89 it/sec) -training >> step=7965400, episode=1328 reward=0.7924258 (541.44 it/sec) -training >> step=7965500, episode=1328 reward=0.793789 (474.74 it/sec) -training >> step=7965600, episode=1328 reward=0.8118129 (532.67 it/sec) -training >> step=7965700, episode=1328 reward=0.7999212 (492.91 it/sec) -training >> step=7965800, episode=1328 reward=0.7863779 (509.24 it/sec) -training >> step=7965900, episode=1328 reward=0.7880303 (487.91 it/sec) -training >> step=7966000, episode=1328 reward=0.7829883 (505.29 it/sec) -training >> step=7966100, episode=1328 reward=0.7872405 (513.74 it/sec) -training >> step=7966200, episode=1328 reward=0.798268 (490.04 it/sec) -training >> step=7966300, episode=1328 reward=0.7821205 (538.67 it/sec) -training >> step=7966400, episode=1328 reward=0.7882532 (448.56 it/sec) -training >> step=7966500, episode=1328 reward=0.7791973 (506.07 it/sec) -training >> step=7966600, episode=1328 reward=0.797816 (484.67 it/sec) -training >> step=7966700, episode=1328 reward=0.7689331 (545.32 it/sec) -training >> step=7966800, episode=1328 reward=0.7793221 (493.99 it/sec) -training >> step=7966900, episode=1328 reward=0.7942028 (524.08 it/sec) -training >> step=7967000, episode=1328 reward=0.7714196 (489.15 it/sec) -training >> step=7967100, episode=1328 reward=0.7798138 (504.97 it/sec) -training >> step=7967200, episode=1328 reward=0.7867804 (475.67 it/sec) -training >> step=7967300, episode=1329 reward=0.8003342 (127.16 it/sec) -training >> step=7967400, episode=1329 reward=0.7919213 (476.94 it/sec) -training >> step=7967500, episode=1329 reward=0.787872 (539.25 it/sec) -training >> step=7967600, episode=1329 reward=0.7722415 (495.56 it/sec) -training >> step=7967700, episode=1329 reward=0.7842962 (514.83 it/sec) -training >> step=7967800, episode=1329 reward=0.8034035 (481.71 it/sec) -training >> step=7967900, episode=1329 reward=0.8007974 (527.53 it/sec) -training >> step=7968000, episode=1329 reward=0.8115687 (530.03 it/sec) -training >> step=7968100, episode=1329 reward=0.8037119 (513.95 it/sec) -training >> step=7968200, episode=1329 reward=0.7816923 (517.60 it/sec) -training >> step=7968300, episode=1329 reward=0.7739182 (501.46 it/sec) -training >> step=7968400, episode=1329 reward=0.8011188 (529.28 it/sec) -training >> step=7968500, episode=1329 reward=0.7899925 (526.62 it/sec) -training >> step=7968600, episode=1329 reward=0.7765214 (519.15 it/sec) -training >> step=7968700, episode=1329 reward=0.7952093 (480.90 it/sec) -training >> step=7968800, episode=1329 reward=0.7995077 (486.13 it/sec) -training >> step=7968900, episode=1329 reward=0.7761338 (528.14 it/sec) -training >> step=7969000, episode=1329 reward=0.7926196 (505.59 it/sec) -training >> step=7969100, episode=1329 reward=0.7867994 (511.29 it/sec) -training >> step=7969200, episode=1329 reward=0.7900665 (518.03 it/sec) -training >> step=7969300, episode=1329 reward=0.7825502 (545.75 it/sec) -training >> step=7969400, episode=1329 reward=0.7908536 (467.74 it/sec) -training >> step=7969500, episode=1329 reward=0.7866078 (477.71 it/sec) -training >> step=7969600, episode=1329 reward=0.7906114 (515.22 it/sec) -training >> step=7969700, episode=1329 reward=0.7610527 (529.42 it/sec) -training >> step=7969800, episode=1329 reward=0.7916101 (480.38 it/sec) -training >> step=7969900, episode=1329 reward=0.7697313 (462.87 it/sec) -training >> step=7970000, episode=1329 reward=0.7874305 (481.60 it/sec) -training >> step=7970100, episode=1329 reward=0.7760098 (511.79 it/sec) -training >> step=7970200, episode=1329 reward=0.8064882 (511.42 it/sec) -training >> step=7970300, episode=1329 reward=0.789641 (504.70 it/sec) -training >> step=7970400, episode=1329 reward=0.8162869 (525.89 it/sec) -training >> step=7970500, episode=1329 reward=0.7817078 (498.42 it/sec) -training >> step=7970600, episode=1329 reward=0.7711006 (524.68 it/sec) -training >> step=7970700, episode=1329 reward=0.7844831 (518.79 it/sec) -training >> step=7970800, episode=1329 reward=0.7648169 (549.31 it/sec) -training >> step=7970900, episode=1329 reward=0.8003963 (494.22 it/sec) -training >> step=7971000, episode=1329 reward=0.7859032 (521.80 it/sec) -training >> step=7971100, episode=1329 reward=0.7943481 (539.34 it/sec) -training >> step=7971200, episode=1329 reward=0.782858 (505.01 it/sec) -training >> step=7971300, episode=1329 reward=0.7903393 (515.06 it/sec) -training >> step=7971400, episode=1329 reward=0.7875376 (527.53 it/sec) -training >> step=7971500, episode=1329 reward=0.7787675 (512.65 it/sec) -training >> step=7971600, episode=1329 reward=0.7985072 (518.72 it/sec) -training >> step=7971700, episode=1329 reward=0.7915148 (510.18 it/sec) -training >> step=7971800, episode=1329 reward=0.7825065 (514.68 it/sec) -training >> step=7971900, episode=1329 reward=0.7676069 (521.72 it/sec) -training >> step=7972000, episode=1329 reward=0.7772444 (503.83 it/sec) -training >> step=7972100, episode=1329 reward=0.7749349 (457.52 it/sec) -training >> step=7972200, episode=1329 reward=0.7934023 (540.69 it/sec) -training >> step=7972300, episode=1329 reward=0.7665569 (521.70 it/sec) -training >> step=7972400, episode=1329 reward=0.7892436 (430.83 it/sec) -training >> step=7972500, episode=1329 reward=0.7796801 (519.04 it/sec) -training >> step=7972600, episode=1329 reward=0.7925307 (462.87 it/sec) -training >> step=7972700, episode=1329 reward=0.8051332 (500.10 it/sec) -training >> step=7972800, episode=1329 reward=0.8020778 (503.41 it/sec) -training >> step=7972900, episode=1329 reward=0.7720925 (514.19 it/sec) -training >> step=7973000, episode=1329 reward=0.785557 (452.28 it/sec) -training >> step=7973100, episode=1329 reward=0.7803233 (516.80 it/sec) -training >> step=7973200, episode=1329 reward=0.7867056 (468.88 it/sec) -training >> step=7973300, episode=1330 reward=0.7899816 (132.58 it/sec) -training >> step=7973400, episode=1330 reward=0.7988957 (517.45 it/sec) -training >> step=7973500, episode=1330 reward=0.7917513 (496.02 it/sec) -training >> step=7973600, episode=1330 reward=0.7722203 (440.62 it/sec) -training >> step=7973700, episode=1330 reward=0.7825605 (485.24 it/sec) -training >> step=7973800, episode=1330 reward=0.7775 (450.98 it/sec) -training >> step=7973900, episode=1330 reward=0.7908176 (520.84 it/sec) -training >> step=7974000, episode=1330 reward=0.8034677 (525.33 it/sec) -training >> step=7974100, episode=1330 reward=0.7972976 (486.40 it/sec) -training >> step=7974200, episode=1330 reward=0.7966149 (468.72 it/sec) -training >> step=7974300, episode=1330 reward=0.787887 (518.87 it/sec) -training >> step=7974400, episode=1330 reward=0.8094906 (516.34 it/sec) -training >> step=7974500, episode=1330 reward=0.787999 (545.29 it/sec) -training >> step=7974600, episode=1330 reward=0.8119276 (535.05 it/sec) -training >> step=7974700, episode=1330 reward=0.7811273 (471.68 it/sec) -training >> step=7974800, episode=1330 reward=0.7695135 (507.93 it/sec) -training >> step=7974900, episode=1330 reward=0.7926193 (535.62 it/sec) -training >> step=7975000, episode=1330 reward=0.7978491 (489.46 it/sec) -training >> step=7975100, episode=1330 reward=0.7735782 (489.48 it/sec) -training >> step=7975200, episode=1330 reward=0.7903153 (495.29 it/sec) -training >> step=7975300, episode=1330 reward=0.7823172 (510.02 it/sec) -training >> step=7975400, episode=1330 reward=0.8167309 (486.72 it/sec) -training >> step=7975500, episode=1330 reward=0.8027771 (480.98 it/sec) -training >> step=7975600, episode=1330 reward=0.7828133 (538.60 it/sec) -training >> step=7975700, episode=1330 reward=0.7627431 (496.82 it/sec) -training >> step=7975800, episode=1330 reward=0.7975903 (498.34 it/sec) -training >> step=7975900, episode=1330 reward=0.7933065 (539.90 it/sec) -training >> step=7976000, episode=1330 reward=0.806708 (499.48 it/sec) -training >> step=7976100, episode=1330 reward=0.7979211 (521.50 it/sec) -training >> step=7976200, episode=1330 reward=0.7888395 (524.29 it/sec) -training >> step=7976300, episode=1330 reward=0.7915651 (480.16 it/sec) -training >> step=7976400, episode=1330 reward=0.8000339 (524.62 it/sec) -training >> step=7976500, episode=1330 reward=0.7720523 (505.40 it/sec) -training >> step=7976600, episode=1330 reward=0.7786453 (477.47 it/sec) -training >> step=7976700, episode=1330 reward=0.7892332 (481.86 it/sec) -training >> step=7976800, episode=1330 reward=0.7876059 (480.98 it/sec) -training >> step=7976900, episode=1330 reward=0.7783871 (505.84 it/sec) -training >> step=7977000, episode=1330 reward=0.7867519 (537.79 it/sec) -training >> step=7977100, episode=1330 reward=0.7832766 (513.13 it/sec) -training >> step=7977200, episode=1330 reward=0.7644132 (507.09 it/sec) -training >> step=7977300, episode=1330 reward=0.7904795 (490.12 it/sec) -training >> step=7977400, episode=1330 reward=0.7855758 (511.14 it/sec) -training >> step=7977500, episode=1330 reward=0.786859 (500.12 it/sec) -training >> step=7977600, episode=1330 reward=0.788587 (488.68 it/sec) -training >> step=7977700, episode=1330 reward=0.7808542 (483.53 it/sec) -training >> step=7977800, episode=1330 reward=0.7936004 (476.15 it/sec) -training >> step=7977900, episode=1330 reward=0.8001292 (494.44 it/sec) -training >> step=7978000, episode=1330 reward=0.7845411 (513.16 it/sec) -training >> step=7978100, episode=1330 reward=0.7995989 (507.82 it/sec) -training >> step=7978200, episode=1330 reward=0.7866961 (483.33 it/sec) -training >> step=7978300, episode=1330 reward=0.7925112 (503.84 it/sec) -training >> step=7978400, episode=1330 reward=0.7765448 (502.55 it/sec) -training >> step=7978500, episode=1330 reward=0.7917387 (513.08 it/sec) -training >> step=7978600, episode=1330 reward=0.7851871 (456.97 it/sec) -training >> step=7978700, episode=1330 reward=0.7749619 (518.47 it/sec) -training >> step=7978800, episode=1330 reward=0.7990293 (510.20 it/sec) -training >> step=7978900, episode=1330 reward=0.8013911 (498.17 it/sec) -training >> step=7979000, episode=1330 reward=0.7928503 (473.32 it/sec) -training >> step=7979100, episode=1330 reward=0.7833307 (500.63 it/sec) -training >> step=7979200, episode=1330 reward=0.7987304 (546.84 it/sec) -training >> step=7979300, episode=1331 reward=0.786021 (121.73 it/sec) -training >> step=7979400, episode=1331 reward=0.7778431 (508.16 it/sec) -training >> step=7979500, episode=1331 reward=0.7946866 (504.88 it/sec) -training >> step=7979600, episode=1331 reward=0.7914224 (470.46 it/sec) -training >> step=7979700, episode=1331 reward=0.7783583 (479.82 it/sec) -training >> step=7979800, episode=1331 reward=0.7880226 (502.91 it/sec) -training >> step=7979900, episode=1331 reward=0.7915572 (507.01 it/sec) -training >> step=7980000, episode=1331 reward=0.8004285 (482.67 it/sec) -training >> step=7980100, episode=1331 reward=0.7897162 (480.88 it/sec) -training >> step=7980200, episode=1331 reward=0.7858151 (484.93 it/sec) -training >> step=7980300, episode=1331 reward=0.8016021 (543.98 it/sec) -training >> step=7980400, episode=1331 reward=0.7831813 (487.82 it/sec) -training >> step=7980500, episode=1331 reward=0.7872964 (473.22 it/sec) -training >> step=7980600, episode=1331 reward=0.7792426 (472.04 it/sec) -training >> step=7980700, episode=1331 reward=0.7790951 (520.17 it/sec) -training >> step=7980800, episode=1331 reward=0.7939721 (486.47 it/sec) -training >> step=7980900, episode=1331 reward=0.8058677 (512.42 it/sec) -training >> step=7981000, episode=1331 reward=0.7797459 (487.96 it/sec) -training >> step=7981100, episode=1331 reward=0.7957693 (459.07 it/sec) -training >> step=7981200, episode=1331 reward=0.7826712 (508.21 it/sec) -training >> step=7981300, episode=1331 reward=0.7852613 (524.93 it/sec) -training >> step=7981400, episode=1331 reward=0.7927648 (470.71 it/sec) -training >> step=7981500, episode=1331 reward=0.7867537 (442.99 it/sec) -training >> step=7981600, episode=1331 reward=0.7973343 (486.45 it/sec) -training >> step=7981700, episode=1331 reward=0.7847651 (478.19 it/sec) -training >> step=7981800, episode=1331 reward=0.7906591 (508.81 it/sec) -training >> step=7981900, episode=1331 reward=0.7787613 (499.11 it/sec) -training >> step=7982000, episode=1331 reward=0.7845057 (501.56 it/sec) -training >> step=7982100, episode=1331 reward=0.7718059 (494.22 it/sec) -training >> step=7982200, episode=1331 reward=0.7683222 (487.73 it/sec) -training >> step=7982300, episode=1331 reward=0.7684557 (494.83 it/sec) -training >> step=7982400, episode=1331 reward=0.7969715 (515.89 it/sec) -training >> step=7982500, episode=1331 reward=0.7905563 (446.75 it/sec) -training >> step=7982600, episode=1331 reward=0.7858969 (520.76 it/sec) -training >> step=7982700, episode=1331 reward=0.7932028 (470.03 it/sec) -training >> step=7982800, episode=1331 reward=0.7917287 (510.65 it/sec) -training >> step=7982900, episode=1331 reward=0.7687137 (514.97 it/sec) -training >> step=7983000, episode=1331 reward=0.7691058 (514.14 it/sec) -training >> step=7983100, episode=1331 reward=0.7970138 (480.02 it/sec) -training >> step=7983200, episode=1331 reward=0.7942445 (489.67 it/sec) -training >> step=7983300, episode=1331 reward=0.7900998 (494.16 it/sec) -training >> step=7983400, episode=1331 reward=0.7923207 (499.31 it/sec) -training >> step=7983500, episode=1331 reward=0.7839637 (483.10 it/sec) -training >> step=7983600, episode=1331 reward=0.7836038 (470.89 it/sec) -training >> step=7983700, episode=1331 reward=0.7982116 (541.50 it/sec) -training >> step=7983800, episode=1331 reward=0.7738437 (496.64 it/sec) -training >> step=7983900, episode=1331 reward=0.8016551 (511.85 it/sec) -training >> step=7984000, episode=1331 reward=0.7821191 (515.56 it/sec) -training >> step=7984100, episode=1331 reward=0.7878674 (469.69 it/sec) -training >> step=7984200, episode=1331 reward=0.7877706 (475.10 it/sec) -training >> step=7984300, episode=1331 reward=0.7729812 (517.15 it/sec) -training >> step=7984400, episode=1331 reward=0.8018124 (515.08 it/sec) -training >> step=7984500, episode=1331 reward=0.7780362 (488.76 it/sec) -training >> step=7984600, episode=1331 reward=0.7874686 (482.62 it/sec) -training >> step=7984700, episode=1331 reward=0.7797214 (498.00 it/sec) -training >> step=7984800, episode=1331 reward=0.7964368 (539.73 it/sec) -training >> step=7984900, episode=1331 reward=0.8010153 (524.28 it/sec) -training >> step=7985000, episode=1331 reward=0.7892832 (500.80 it/sec) -training >> step=7985100, episode=1331 reward=0.7989364 (507.69 it/sec) -training >> step=7985200, episode=1331 reward=0.7886789 (509.60 it/sec) -training >> step=7985300, episode=1332 reward=0.8001812 (133.53 it/sec) -training >> step=7985400, episode=1332 reward=0.7849801 (474.91 it/sec) -training >> step=7985500, episode=1332 reward=0.7871656 (480.85 it/sec) -training >> step=7985600, episode=1332 reward=0.7820938 (504.37 it/sec) -training >> step=7985700, episode=1332 reward=0.7767096 (502.95 it/sec) -training >> step=7985800, episode=1332 reward=0.789058 (467.85 it/sec) -training >> step=7985900, episode=1332 reward=0.7978554 (497.78 it/sec) -training >> step=7986000, episode=1332 reward=0.769928 (516.57 it/sec) -training >> step=7986100, episode=1332 reward=0.7836483 (474.99 it/sec) -training >> step=7986200, episode=1332 reward=0.7859322 (521.94 it/sec) -training >> step=7986300, episode=1332 reward=0.7853849 (499.33 it/sec) -training >> step=7986400, episode=1332 reward=0.7829571 (476.48 it/sec) -training >> step=7986500, episode=1332 reward=0.7993788 (473.69 it/sec) -training >> step=7986600, episode=1332 reward=0.7695491 (507.34 it/sec) -training >> step=7986700, episode=1332 reward=0.7927548 (486.68 it/sec) -training >> step=7986800, episode=1332 reward=0.7981364 (471.36 it/sec) -training >> step=7986900, episode=1332 reward=0.7956088 (554.84 it/sec) -training >> step=7987000, episode=1332 reward=0.7930889 (488.16 it/sec) -training >> step=7987100, episode=1332 reward=0.7884247 (512.78 it/sec) -training >> step=7987200, episode=1332 reward=0.8000292 (477.10 it/sec) -training >> step=7987300, episode=1332 reward=0.7937013 (487.85 it/sec) -training >> step=7987400, episode=1332 reward=0.795029 (503.03 it/sec) -training >> step=7987500, episode=1332 reward=0.7868832 (505.00 it/sec) -training >> step=7987600, episode=1332 reward=0.7815036 (476.89 it/sec) -training >> step=7987700, episode=1332 reward=0.7833326 (503.58 it/sec) -training >> step=7987800, episode=1332 reward=0.8044934 (506.00 it/sec) -training >> step=7987900, episode=1332 reward=0.7949155 (517.18 it/sec) -training >> step=7988000, episode=1332 reward=0.7999542 (547.25 it/sec) -training >> step=7988100, episode=1332 reward=0.7931558 (454.05 it/sec) -training >> step=7988200, episode=1332 reward=0.7766153 (529.22 it/sec) -training >> step=7988300, episode=1332 reward=0.7907344 (445.21 it/sec) -training >> step=7988400, episode=1332 reward=0.8042499 (423.34 it/sec) -training >> step=7988500, episode=1332 reward=0.7883152 (541.75 it/sec) -training >> step=7988600, episode=1332 reward=0.791352 (489.01 it/sec) -training >> step=7988700, episode=1332 reward=0.790176 (511.37 it/sec) -training >> step=7988800, episode=1332 reward=0.7810761 (500.02 it/sec) -training >> step=7988900, episode=1332 reward=0.7876912 (504.02 it/sec) -training >> step=7989000, episode=1332 reward=0.7766226 (479.01 it/sec) -training >> step=7989100, episode=1332 reward=0.776327 (511.03 it/sec) -training >> step=7989200, episode=1332 reward=0.8086043 (510.23 it/sec) -training >> step=7989300, episode=1332 reward=0.7876169 (521.26 it/sec) -training >> step=7989400, episode=1332 reward=0.7895504 (513.21 it/sec) -training >> step=7989500, episode=1332 reward=0.7993973 (491.30 it/sec) -training >> step=7989600, episode=1332 reward=0.7684353 (515.07 it/sec) -training >> step=7989700, episode=1332 reward=0.7969208 (487.58 it/sec) -training >> step=7989800, episode=1332 reward=0.7954363 (493.29 it/sec) -training >> step=7989900, episode=1332 reward=0.7867401 (479.50 it/sec) -training >> step=7990000, episode=1332 reward=0.7623979 (469.14 it/sec) -training >> step=7990100, episode=1332 reward=0.7792237 (460.31 it/sec) -training >> step=7990200, episode=1332 reward=0.7827802 (482.51 it/sec) -training >> step=7990300, episode=1332 reward=0.7808447 (473.00 it/sec) -training >> step=7990400, episode=1332 reward=0.7790241 (487.05 it/sec) -training >> step=7990500, episode=1332 reward=0.7646556 (515.30 it/sec) -training >> step=7990600, episode=1332 reward=0.79397 (503.95 it/sec) -training >> step=7990700, episode=1332 reward=0.7884132 (492.20 it/sec) -training >> step=7990800, episode=1332 reward=0.8004579 (509.77 it/sec) -training >> step=7990900, episode=1332 reward=0.7923085 (522.10 it/sec) -training >> step=7991000, episode=1332 reward=0.7637594 (522.04 it/sec) -training >> step=7991100, episode=1332 reward=0.7990154 (518.11 it/sec) -training >> step=7991200, episode=1332 reward=0.7655218 (502.63 it/sec) -training >> step=7991300, episode=1333 reward=0.7779432 (142.21 it/sec) -training >> step=7991400, episode=1333 reward=0.7781275 (417.67 it/sec) -training >> step=7991500, episode=1333 reward=0.789636 (496.01 it/sec) -training >> step=7991600, episode=1333 reward=0.7800785 (497.36 it/sec) -training >> step=7991700, episode=1333 reward=0.7844192 (496.69 it/sec) -training >> step=7991800, episode=1333 reward=0.782461 (507.63 it/sec) -training >> step=7991900, episode=1333 reward=0.7786673 (480.49 it/sec) -training >> step=7992000, episode=1333 reward=0.8124591 (504.72 it/sec) -training >> step=7992100, episode=1333 reward=0.7737776 (513.24 it/sec) -training >> step=7992200, episode=1333 reward=0.8017705 (517.91 it/sec) -training >> step=7992300, episode=1333 reward=0.7818027 (487.02 it/sec) -training >> step=7992400, episode=1333 reward=0.7825843 (514.76 it/sec) -training >> step=7992500, episode=1333 reward=0.7985471 (493.93 it/sec) -training >> step=7992600, episode=1333 reward=0.7932842 (538.95 it/sec) -training >> step=7992700, episode=1333 reward=0.8058118 (505.19 it/sec) -training >> step=7992800, episode=1333 reward=0.7887036 (514.96 it/sec) -training >> step=7992900, episode=1333 reward=0.7879882 (517.02 it/sec) -training >> step=7993000, episode=1333 reward=0.7804698 (538.87 it/sec) -training >> step=7993100, episode=1333 reward=0.8051594 (506.48 it/sec) -training >> step=7993200, episode=1333 reward=0.7903956 (510.29 it/sec) -training >> step=7993300, episode=1333 reward=0.7785672 (464.58 it/sec) -training >> step=7993400, episode=1333 reward=0.8082417 (499.57 it/sec) -training >> step=7993500, episode=1333 reward=0.7982762 (537.28 it/sec) -training >> step=7993600, episode=1333 reward=0.7713377 (496.69 it/sec) -training >> step=7993700, episode=1333 reward=0.7919244 (516.95 it/sec) -training >> step=7993800, episode=1333 reward=0.7747031 (470.51 it/sec) -training >> step=7993900, episode=1333 reward=0.7998151 (489.37 it/sec) -training >> step=7994000, episode=1333 reward=0.8036397 (524.93 it/sec) -training >> step=7994100, episode=1333 reward=0.8024428 (474.48 it/sec) -training >> step=7994200, episode=1333 reward=0.7788474 (463.23 it/sec) -training >> step=7994300, episode=1333 reward=0.790581 (510.51 it/sec) -training >> step=7994400, episode=1333 reward=0.799105 (535.76 it/sec) -training >> step=7994500, episode=1333 reward=0.7816125 (499.24 it/sec) -training >> step=7994600, episode=1333 reward=0.7702156 (516.07 it/sec) -training >> step=7994700, episode=1333 reward=0.7854132 (521.99 it/sec) -training >> step=7994800, episode=1333 reward=0.7698635 (533.10 it/sec) -training >> step=7994900, episode=1333 reward=0.7890067 (511.78 it/sec) -training >> step=7995000, episode=1333 reward=0.7601826 (497.53 it/sec) -training >> step=7995100, episode=1333 reward=0.7928877 (501.65 it/sec) -training >> step=7995200, episode=1333 reward=0.7899178 (459.34 it/sec) -training >> step=7995300, episode=1333 reward=0.7915674 (539.43 it/sec) -training >> step=7995400, episode=1333 reward=0.7926493 (500.86 it/sec) -training >> step=7995500, episode=1333 reward=0.7912025 (478.76 it/sec) -training >> step=7995600, episode=1333 reward=0.7768319 (488.96 it/sec) -training >> step=7995700, episode=1333 reward=0.8036638 (494.29 it/sec) -training >> step=7995800, episode=1333 reward=0.7791351 (477.46 it/sec) -training >> step=7995900, episode=1333 reward=0.7888421 (518.08 it/sec) -training >> step=7996000, episode=1333 reward=0.7830991 (510.72 it/sec) -training >> step=7996100, episode=1333 reward=0.7539459 (454.53 it/sec) -training >> step=7996200, episode=1333 reward=0.7918007 (519.68 it/sec) -training >> step=7996300, episode=1333 reward=0.7964307 (509.45 it/sec) -training >> step=7996400, episode=1333 reward=0.7778348 (510.79 it/sec) -training >> step=7996500, episode=1333 reward=0.7761337 (512.87 it/sec) -training >> step=7996600, episode=1333 reward=0.8017235 (485.86 it/sec) -training >> step=7996700, episode=1333 reward=0.804375 (461.16 it/sec) -training >> step=7996800, episode=1333 reward=0.788655 (461.32 it/sec) -training >> step=7996900, episode=1333 reward=0.7918757 (528.09 it/sec) -training >> step=7997000, episode=1333 reward=0.7954389 (484.03 it/sec) -training >> step=7997100, episode=1333 reward=0.7675101 (474.88 it/sec) -training >> step=7997200, episode=1333 reward=0.8009098 (505.87 it/sec) -training >> step=7997300, episode=1334 reward=0.801359 (132.04 it/sec) -training >> step=7997400, episode=1334 reward=0.7879809 (527.91 it/sec) -training >> step=7997500, episode=1334 reward=0.7819288 (387.22 it/sec) -training >> step=7997600, episode=1334 reward=0.8001122 (517.46 it/sec) -training >> step=7997700, episode=1334 reward=0.7921137 (510.62 it/sec) -training >> step=7997800, episode=1334 reward=0.7939968 (453.37 it/sec) -training >> step=7997900, episode=1334 reward=0.7955018 (501.61 it/sec) -training >> step=7998000, episode=1334 reward=0.778775 (509.75 it/sec) -training >> step=7998100, episode=1334 reward=0.786395 (463.53 it/sec) -training >> step=7998200, episode=1334 reward=0.7872619 (479.46 it/sec) -training >> step=7998300, episode=1334 reward=0.7553989 (473.82 it/sec) -training >> step=7998400, episode=1334 reward=0.8036266 (520.25 it/sec) -training >> step=7998500, episode=1334 reward=0.7939907 (488.54 it/sec) -training >> step=7998600, episode=1334 reward=0.7860842 (523.04 it/sec) -training >> step=7998700, episode=1334 reward=0.8019843 (475.94 it/sec) -training >> step=7998800, episode=1334 reward=0.8015862 (505.77 it/sec) -training >> step=7998900, episode=1334 reward=0.7938991 (511.32 it/sec) -training >> step=7999000, episode=1334 reward=0.7750742 (468.24 it/sec) -training >> step=7999100, episode=1334 reward=0.7807491 (544.08 it/sec) -training >> step=7999200, episode=1334 reward=0.7917638 (531.82 it/sec) -training >> step=7999300, episode=1334 reward=0.7914767 (484.02 it/sec) -training >> step=7999400, episode=1334 reward=0.7843587 (466.55 it/sec) -training >> step=7999500, episode=1334 reward=0.7851353 (542.65 it/sec) -training >> step=7999600, episode=1334 reward=0.7732582 (523.02 it/sec) -training >> step=7999700, episode=1334 reward=0.8073229 (512.38 it/sec) -training >> step=7999800, episode=1334 reward=0.7980464 (509.03 it/sec) -training >> step=7999900, episode=1334 reward=0.776861 (440.09 it/sec) -training >> step=8000000, episode=1334 reward=0.7889233 (481.57 it/sec) -training >> step=8000100, episode=1334 reward=0.7802943 (503.14 it/sec) -training >> step=8000200, episode=1334 reward=0.8072332 (532.64 it/sec) -training >> step=8000300, episode=1334 reward=0.7869059 (444.63 it/sec) -training >> step=8000400, episode=1334 reward=0.8025122 (472.41 it/sec) -training >> step=8000500, episode=1334 reward=0.7908584 (481.16 it/sec) -training >> step=8000600, episode=1334 reward=0.7713684 (527.45 it/sec) -training >> step=8000700, episode=1334 reward=0.8011084 (509.58 it/sec) -training >> step=8000800, episode=1334 reward=0.7778243 (508.17 it/sec) -training >> step=8000900, episode=1334 reward=0.7696136 (504.67 it/sec) -training >> step=8001000, episode=1334 reward=0.7730455 (503.18 it/sec) -training >> step=8001100, episode=1334 reward=0.7933992 (523.73 it/sec) -training >> step=8001200, episode=1334 reward=0.7820765 (524.18 it/sec) -training >> step=8001300, episode=1334 reward=0.783937 (534.18 it/sec) -training >> step=8001400, episode=1334 reward=0.7927226 (436.95 it/sec) -training >> step=8001500, episode=1334 reward=0.7949359 (399.46 it/sec) -training >> step=8001600, episode=1334 reward=0.7662759 (476.52 it/sec) -training >> step=8001700, episode=1334 reward=0.7859015 (462.80 it/sec) -training >> step=8001800, episode=1334 reward=0.7810459 (434.60 it/sec) -training >> step=8001900, episode=1334 reward=0.7877994 (466.81 it/sec) -training >> step=8002000, episode=1334 reward=0.785009 (507.84 it/sec) -training >> step=8002100, episode=1334 reward=0.7837132 (467.11 it/sec) -training >> step=8002200, episode=1334 reward=0.7842354 (491.46 it/sec) -training >> step=8002300, episode=1334 reward=0.7644606 (464.94 it/sec) -training >> step=8002400, episode=1334 reward=0.774905 (431.42 it/sec) -training >> step=8002500, episode=1334 reward=0.7874438 (459.84 it/sec) -training >> step=8002600, episode=1334 reward=0.7681978 (465.72 it/sec) -training >> step=8002700, episode=1334 reward=0.8093392 (448.07 it/sec) -training >> step=8002800, episode=1334 reward=0.796257 (435.55 it/sec) -training >> step=8002900, episode=1334 reward=0.7996337 (438.46 it/sec) -training >> step=8003000, episode=1334 reward=0.7954063 (399.75 it/sec) -training >> step=8003100, episode=1334 reward=0.7791262 (398.43 it/sec) -training >> step=8003200, episode=1334 reward=0.7953012 (390.56 it/sec) -training >> step=8003300, episode=1335 reward=0.7970949 (98.09 it/sec) -training >> step=8003400, episode=1335 reward=0.7860807 (506.70 it/sec) -training >> step=8003500, episode=1335 reward=0.8046994 (508.09 it/sec) -training >> step=8003600, episode=1335 reward=0.7829895 (336.39 it/sec) -training >> step=8003700, episode=1335 reward=0.7797285 (521.83 it/sec) -training >> step=8003800, episode=1335 reward=0.7721741 (490.29 it/sec) -training >> step=8003900, episode=1335 reward=0.7863855 (486.26 it/sec) -training >> step=8004000, episode=1335 reward=0.7797055 (452.59 it/sec) -training >> step=8004100, episode=1335 reward=0.7879057 (496.80 it/sec) -training >> step=8004200, episode=1335 reward=0.7963704 (472.95 it/sec) -training >> step=8004300, episode=1335 reward=0.77701 (444.88 it/sec) -training >> step=8004400, episode=1335 reward=0.7979193 (544.08 it/sec) -training >> step=8004500, episode=1335 reward=0.7896035 (477.48 it/sec) -training >> step=8004600, episode=1335 reward=0.7751035 (425.07 it/sec) -training >> step=8004700, episode=1335 reward=0.7735193 (448.98 it/sec) -training >> step=8004800, episode=1335 reward=0.7864039 (475.82 it/sec) -training >> step=8004900, episode=1335 reward=0.782231 (440.99 it/sec) -training >> step=8005000, episode=1335 reward=0.7920894 (433.53 it/sec) -training >> step=8005100, episode=1335 reward=0.7783279 (401.51 it/sec) -training >> step=8005200, episode=1335 reward=0.7838228 (492.38 it/sec) -training >> step=8005300, episode=1335 reward=0.7791061 (481.69 it/sec) -training >> step=8005400, episode=1335 reward=0.7796081 (414.05 it/sec) -training >> step=8005500, episode=1335 reward=0.7869167 (438.66 it/sec) -training >> step=8005600, episode=1335 reward=0.7839351 (448.57 it/sec) -training >> step=8005700, episode=1335 reward=0.7919029 (427.00 it/sec) -training >> step=8005800, episode=1335 reward=0.7898231 (439.51 it/sec) -training >> step=8005900, episode=1335 reward=0.7995164 (465.05 it/sec) -training >> step=8006000, episode=1335 reward=0.77799 (468.98 it/sec) -training >> step=8006100, episode=1335 reward=0.7873977 (433.30 it/sec) -training >> step=8006200, episode=1335 reward=0.7999426 (494.55 it/sec) -training >> step=8006300, episode=1335 reward=0.7787402 (497.44 it/sec) -training >> step=8006400, episode=1335 reward=0.7785273 (507.71 it/sec) -training >> step=8006500, episode=1335 reward=0.7779416 (470.64 it/sec) -training >> step=8006600, episode=1335 reward=0.7536875 (515.50 it/sec) -training >> step=8006700, episode=1335 reward=0.7988971 (487.02 it/sec) -training >> step=8006800, episode=1335 reward=0.760693 (455.07 it/sec) -training >> step=8006900, episode=1335 reward=0.7861784 (510.61 it/sec) -training >> step=8007000, episode=1335 reward=0.7881263 (522.81 it/sec) -training >> step=8007100, episode=1335 reward=0.8080169 (467.84 it/sec) -training >> step=8007200, episode=1335 reward=0.8099414 (467.32 it/sec) -training >> step=8007300, episode=1335 reward=0.7875963 (515.32 it/sec) -training >> step=8007400, episode=1335 reward=0.7896622 (509.52 it/sec) -training >> step=8007500, episode=1335 reward=0.7944242 (432.40 it/sec) -training >> step=8007600, episode=1335 reward=0.7955678 (481.30 it/sec) -training >> step=8007700, episode=1335 reward=0.7849604 (524.53 it/sec) -training >> step=8007800, episode=1335 reward=0.779637 (463.46 it/sec) -training >> step=8007900, episode=1335 reward=0.7883374 (465.92 it/sec) -training >> step=8008000, episode=1335 reward=0.7895882 (517.93 it/sec) -training >> step=8008100, episode=1335 reward=0.8105125 (512.08 it/sec) -training >> step=8008200, episode=1335 reward=0.7955232 (497.67 it/sec) -training >> step=8008300, episode=1335 reward=0.7955198 (483.01 it/sec) -training >> step=8008400, episode=1335 reward=0.7839305 (489.03 it/sec) -training >> step=8008500, episode=1335 reward=0.7853834 (480.57 it/sec) -training >> step=8008600, episode=1335 reward=0.7637339 (477.01 it/sec) -training >> step=8008700, episode=1335 reward=0.7801015 (481.85 it/sec) -training >> step=8008800, episode=1335 reward=0.7803759 (486.95 it/sec) -training >> step=8008900, episode=1335 reward=0.7946697 (477.32 it/sec) -training >> step=8009000, episode=1335 reward=0.7855968 (443.99 it/sec) -training >> step=8009100, episode=1335 reward=0.8037878 (450.93 it/sec) -training >> step=8009200, episode=1335 reward=0.8025343 (510.34 it/sec) -training >> step=8009300, episode=1336 reward=0.7765086 (66.94 it/sec) -training >> step=8009400, episode=1336 reward=0.7928724 (497.80 it/sec) -training >> step=8009500, episode=1336 reward=0.7849189 (494.19 it/sec) -training >> step=8009600, episode=1336 reward=0.7909249 (491.49 it/sec) -training >> step=8009700, episode=1336 reward=0.8137328 (440.61 it/sec) -training >> step=8009800, episode=1336 reward=0.779221 (491.90 it/sec) -training >> step=8009900, episode=1336 reward=0.7802752 (306.40 it/sec) -training >> step=8010000, episode=1336 reward=0.7864851 (428.01 it/sec) -training >> step=8010100, episode=1336 reward=0.8080748 (424.10 it/sec) -training >> step=8010200, episode=1336 reward=0.797288 (431.53 it/sec) -training >> step=8010300, episode=1336 reward=0.7895908 (457.00 it/sec) -training >> step=8010400, episode=1336 reward=0.7845293 (454.36 it/sec) -training >> step=8010500, episode=1336 reward=0.7649846 (459.19 it/sec) -training >> step=8010600, episode=1336 reward=0.7814823 (441.17 it/sec) -training >> step=8010700, episode=1336 reward=0.7749668 (538.15 it/sec) -training >> step=8010800, episode=1336 reward=0.7835466 (496.15 it/sec) -training >> step=8010900, episode=1336 reward=0.7819031 (463.91 it/sec) -training >> step=8011000, episode=1336 reward=0.7811283 (505.28 it/sec) -training >> step=8011100, episode=1336 reward=0.8026867 (528.17 it/sec) -training >> step=8011200, episode=1336 reward=0.7801663 (443.51 it/sec) -training >> step=8011300, episode=1336 reward=0.8111382 (482.71 it/sec) -training >> step=8011400, episode=1336 reward=0.8028167 (464.02 it/sec) -training >> step=8011500, episode=1336 reward=0.8104764 (540.30 it/sec) -training >> step=8011600, episode=1336 reward=0.778914 (515.09 it/sec) -training >> step=8011700, episode=1336 reward=0.7876415 (500.77 it/sec) -training >> step=8011800, episode=1336 reward=0.7759687 (479.46 it/sec) -training >> step=8011900, episode=1336 reward=0.759572 (493.15 it/sec) -training >> step=8012000, episode=1336 reward=0.7825451 (524.40 it/sec) -training >> step=8012100, episode=1336 reward=0.7731939 (503.47 it/sec) -training >> step=8012200, episode=1336 reward=0.7975302 (457.45 it/sec) -training >> step=8012300, episode=1336 reward=0.7743469 (466.92 it/sec) -training >> step=8012400, episode=1336 reward=0.7654212 (450.25 it/sec) -training >> step=8012500, episode=1336 reward=0.7727878 (436.04 it/sec) -training >> step=8012600, episode=1336 reward=0.7850093 (489.27 it/sec) -training >> step=8012700, episode=1336 reward=0.7904476 (459.95 it/sec) -training >> step=8012800, episode=1336 reward=0.7998927 (448.11 it/sec) -training >> step=8012900, episode=1336 reward=0.7860571 (485.73 it/sec) -training >> step=8013000, episode=1336 reward=0.8061804 (502.11 it/sec) -training >> step=8013100, episode=1336 reward=0.79564 (465.51 it/sec) -training >> step=8013200, episode=1336 reward=0.7754279 (533.13 it/sec) -training >> step=8013300, episode=1336 reward=0.7746903 (484.30 it/sec) -training >> step=8013400, episode=1336 reward=0.7951813 (512.67 it/sec) -training >> step=8013500, episode=1336 reward=0.7861633 (431.03 it/sec) -training >> step=8013600, episode=1336 reward=0.7824218 (509.29 it/sec) -training >> step=8013700, episode=1336 reward=0.7893379 (474.59 it/sec) -training >> step=8013800, episode=1336 reward=0.7995014 (508.75 it/sec) -training >> step=8013900, episode=1336 reward=0.7735732 (512.09 it/sec) -training >> step=8014000, episode=1336 reward=0.7607369 (519.07 it/sec) -training >> step=8014100, episode=1336 reward=0.8049553 (510.47 it/sec) -training >> step=8014200, episode=1336 reward=0.7891555 (466.33 it/sec) -training >> step=8014300, episode=1336 reward=0.7896092 (501.18 it/sec) -training >> step=8014400, episode=1336 reward=0.8079538 (464.02 it/sec) -training >> step=8014500, episode=1336 reward=0.7843208 (523.22 it/sec) -training >> step=8014600, episode=1336 reward=0.7797316 (512.40 it/sec) -training >> step=8014700, episode=1336 reward=0.7943406 (494.46 it/sec) -training >> step=8014800, episode=1336 reward=0.7835665 (507.72 it/sec) -training >> step=8014900, episode=1336 reward=0.7873566 (405.44 it/sec) -training >> step=8015000, episode=1336 reward=0.7770448 (504.65 it/sec) -training >> step=8015100, episode=1336 reward=0.7791891 (496.18 it/sec) -training >> step=8015200, episode=1336 reward=0.7938882 (507.48 it/sec) -training >> step=8015300, episode=1337 reward=0.8053105 (138.81 it/sec) -training >> step=8015400, episode=1337 reward=0.7935213 (505.37 it/sec) -training >> step=8015500, episode=1337 reward=0.7926583 (519.12 it/sec) -training >> step=8015600, episode=1337 reward=0.7802292 (504.46 it/sec) -training >> step=8015700, episode=1337 reward=0.7690242 (473.55 it/sec) -training >> step=8015800, episode=1337 reward=0.7779658 (541.04 it/sec) -training >> step=8015900, episode=1337 reward=0.7960148 (467.77 it/sec) -training >> step=8016000, episode=1337 reward=0.7788613 (468.67 it/sec) -training >> step=8016100, episode=1337 reward=0.7905971 (364.67 it/sec) -training >> step=8016200, episode=1337 reward=0.8096339 (481.72 it/sec) -training >> step=8016300, episode=1337 reward=0.7786145 (470.44 it/sec) -training >> step=8016400, episode=1337 reward=0.7823948 (508.30 it/sec) -training >> step=8016500, episode=1337 reward=0.7946154 (509.98 it/sec) -training >> step=8016600, episode=1337 reward=0.7730815 (471.45 it/sec) -training >> step=8016700, episode=1337 reward=0.7772895 (499.79 it/sec) -training >> step=8016800, episode=1337 reward=0.8163835 (515.57 it/sec) -training >> step=8016900, episode=1337 reward=0.7791008 (499.19 it/sec) -training >> step=8017000, episode=1337 reward=0.7861478 (519.98 it/sec) -training >> step=8017100, episode=1337 reward=0.7920184 (480.39 it/sec) -training >> step=8017200, episode=1337 reward=0.7874636 (549.30 it/sec) -training >> step=8017300, episode=1337 reward=0.7860484 (466.56 it/sec) -training >> step=8017400, episode=1337 reward=0.7976074 (480.34 it/sec) -training >> step=8017500, episode=1337 reward=0.7694115 (522.74 it/sec) -training >> step=8017600, episode=1337 reward=0.7936044 (499.37 it/sec) -training >> step=8017700, episode=1337 reward=0.7827026 (516.96 it/sec) -training >> step=8017800, episode=1337 reward=0.7889413 (484.85 it/sec) -training >> step=8017900, episode=1337 reward=0.7890158 (498.54 it/sec) -training >> step=8018000, episode=1337 reward=0.7801383 (471.41 it/sec) -training >> step=8018100, episode=1337 reward=0.7904742 (488.17 it/sec) -training >> step=8018200, episode=1337 reward=0.7792299 (530.14 it/sec) -training >> step=8018300, episode=1337 reward=0.7737886 (466.04 it/sec) -training >> step=8018400, episode=1337 reward=0.7923248 (480.48 it/sec) -training >> step=8018500, episode=1337 reward=0.7785807 (499.50 it/sec) -training >> step=8018600, episode=1337 reward=0.7958941 (522.55 it/sec) -training >> step=8018700, episode=1337 reward=0.7847877 (491.57 it/sec) -training >> step=8018800, episode=1337 reward=0.7807968 (456.32 it/sec) -training >> step=8018900, episode=1337 reward=0.7679812 (507.09 it/sec) -training >> step=8019000, episode=1337 reward=0.7889499 (554.42 it/sec) -training >> step=8019100, episode=1337 reward=0.7821995 (492.81 it/sec) -training >> step=8019200, episode=1337 reward=0.7865909 (489.38 it/sec) -training >> step=8019300, episode=1337 reward=0.7960098 (470.33 it/sec) -training >> step=8019400, episode=1337 reward=0.7629617 (500.48 it/sec) -training >> step=8019500, episode=1337 reward=0.8045126 (513.69 it/sec) -training >> step=8019600, episode=1337 reward=0.7930312 (498.31 it/sec) -training >> step=8019700, episode=1337 reward=0.7984905 (531.47 it/sec) -training >> step=8019800, episode=1337 reward=0.7960574 (482.20 it/sec) -training >> step=8019900, episode=1337 reward=0.7681321 (452.76 it/sec) -training >> step=8020000, episode=1337 reward=0.7841471 (456.39 it/sec) -training >> step=8020100, episode=1337 reward=0.7775919 (506.15 it/sec) -training >> step=8020200, episode=1337 reward=0.7633362 (463.34 it/sec) -training >> step=8020300, episode=1337 reward=0.8004851 (428.51 it/sec) -training >> step=8020400, episode=1337 reward=0.7959746 (507.41 it/sec) -training >> step=8020500, episode=1337 reward=0.7809288 (510.52 it/sec) -training >> step=8020600, episode=1337 reward=0.7838514 (527.91 it/sec) -training >> step=8020700, episode=1337 reward=0.7871622 (463.57 it/sec) -training >> step=8020800, episode=1337 reward=0.7842913 (514.86 it/sec) -training >> step=8020900, episode=1337 reward=0.7849822 (492.24 it/sec) -training >> step=8021000, episode=1337 reward=0.7922708 (519.79 it/sec) -training >> step=8021100, episode=1337 reward=0.775088 (487.43 it/sec) -training >> step=8021200, episode=1337 reward=0.7876754 (505.09 it/sec) -training >> step=8021300, episode=1338 reward=0.7735381 (112.44 it/sec) -training >> step=8021400, episode=1338 reward=0.8103513 (485.90 it/sec) -training >> step=8021500, episode=1338 reward=0.7935077 (437.04 it/sec) -training >> step=8021600, episode=1338 reward=0.7919539 (448.95 it/sec) -training >> step=8021700, episode=1338 reward=0.7865474 (433.09 it/sec) -training >> step=8021800, episode=1338 reward=0.8019293 (446.34 it/sec) -training >> step=8021900, episode=1338 reward=0.7954235 (487.02 it/sec) -training >> step=8022000, episode=1338 reward=0.7968609 (387.06 it/sec) -training >> step=8022100, episode=1338 reward=0.7889403 (437.44 it/sec) -training >> step=8022200, episode=1338 reward=0.7907623 (297.13 it/sec) -training >> step=8022300, episode=1338 reward=0.7767333 (506.38 it/sec) -training >> step=8022400, episode=1338 reward=0.7833465 (445.82 it/sec) -training >> step=8022500, episode=1338 reward=0.7815083 (492.12 it/sec) -training >> step=8022600, episode=1338 reward=0.783316 (405.82 it/sec) -training >> step=8022700, episode=1338 reward=0.7796398 (418.15 it/sec) -training >> step=8022800, episode=1338 reward=0.7795256 (426.11 it/sec) -training >> step=8022900, episode=1338 reward=0.8046373 (380.79 it/sec) -training >> step=8023000, episode=1338 reward=0.7990888 (430.56 it/sec) -training >> step=8023100, episode=1338 reward=0.7836598 (397.95 it/sec) -training >> step=8023200, episode=1338 reward=0.7817331 (398.66 it/sec) -training >> step=8023300, episode=1338 reward=0.7769023 (524.28 it/sec) -training >> step=8023400, episode=1338 reward=0.7726501 (506.35 it/sec) -training >> step=8023500, episode=1338 reward=0.7896946 (510.91 it/sec) -training >> step=8023600, episode=1338 reward=0.7875455 (490.04 it/sec) -training >> step=8023700, episode=1338 reward=0.8009133 (435.70 it/sec) -training >> step=8023800, episode=1338 reward=0.7900518 (511.75 it/sec) -training >> step=8023900, episode=1338 reward=0.7590232 (444.81 it/sec) -training >> step=8024000, episode=1338 reward=0.8015925 (484.85 it/sec) -training >> step=8024100, episode=1338 reward=0.8035823 (453.11 it/sec) -training >> step=8024200, episode=1338 reward=0.7962319 (411.56 it/sec) -training >> step=8024300, episode=1338 reward=0.7720978 (461.39 it/sec) -training >> step=8024400, episode=1338 reward=0.795842 (482.11 it/sec) -training >> step=8024500, episode=1338 reward=0.7698931 (448.59 it/sec) -training >> step=8024600, episode=1338 reward=0.7795861 (502.73 it/sec) -training >> step=8024700, episode=1338 reward=0.7921022 (499.55 it/sec) -training >> step=8024800, episode=1338 reward=0.7797654 (534.17 it/sec) -training >> step=8024900, episode=1338 reward=0.7977694 (508.21 it/sec) -training >> step=8025000, episode=1338 reward=0.793278 (448.04 it/sec) -training >> step=8025100, episode=1338 reward=0.7985914 (545.97 it/sec) -training >> step=8025200, episode=1338 reward=0.785286 (438.33 it/sec) -training >> step=8025300, episode=1338 reward=0.7797002 (514.64 it/sec) -training >> step=8025400, episode=1338 reward=0.7918637 (486.25 it/sec) -training >> step=8025500, episode=1338 reward=0.7869182 (498.24 it/sec) -training >> step=8025600, episode=1338 reward=0.7962663 (503.85 it/sec) -training >> step=8025700, episode=1338 reward=0.7623355 (450.83 it/sec) -training >> step=8025800, episode=1338 reward=0.7887857 (497.99 it/sec) -training >> step=8025900, episode=1338 reward=0.775872 (529.13 it/sec) -training >> step=8026000, episode=1338 reward=0.8093568 (485.80 it/sec) -training >> step=8026100, episode=1338 reward=0.7938513 (508.76 it/sec) -training >> step=8026200, episode=1338 reward=0.7866148 (468.21 it/sec) -training >> step=8026300, episode=1338 reward=0.7827544 (502.77 it/sec) -training >> step=8026400, episode=1338 reward=0.7753525 (499.34 it/sec) -training >> step=8026500, episode=1338 reward=0.7820286 (499.40 it/sec) -training >> step=8026600, episode=1338 reward=0.803202 (499.47 it/sec) -training >> step=8026700, episode=1338 reward=0.7774406 (491.33 it/sec) -training >> step=8026800, episode=1338 reward=0.8117982 (487.40 it/sec) -training >> step=8026900, episode=1338 reward=0.776825 (535.39 it/sec) -training >> step=8027000, episode=1338 reward=0.7752523 (472.66 it/sec) -training >> step=8027100, episode=1338 reward=0.7945815 (443.63 it/sec) -training >> step=8027200, episode=1338 reward=0.7914623 (446.03 it/sec) -training >> step=8027300, episode=1339 reward=0.7596104 (135.65 it/sec) -training >> step=8027400, episode=1339 reward=0.7893974 (453.25 it/sec) -training >> step=8027500, episode=1339 reward=0.800213 (518.30 it/sec) -training >> step=8027600, episode=1339 reward=0.7695876 (471.75 it/sec) -training >> step=8027700, episode=1339 reward=0.7863327 (524.88 it/sec) -training >> step=8027800, episode=1339 reward=0.8067179 (474.04 it/sec) -training >> step=8027900, episode=1339 reward=0.8011547 (454.22 it/sec) -training >> step=8028000, episode=1339 reward=0.8076679 (531.79 it/sec) -training >> step=8028100, episode=1339 reward=0.7831258 (499.38 it/sec) -training >> step=8028200, episode=1339 reward=0.7942717 (478.21 it/sec) -training >> step=8028300, episode=1339 reward=0.8043567 (481.03 it/sec) -training >> step=8028400, episode=1339 reward=0.8058648 (360.48 it/sec) -training >> step=8028500, episode=1339 reward=0.7942065 (450.90 it/sec) -training >> step=8028600, episode=1339 reward=0.7845923 (464.73 it/sec) -training >> step=8028700, episode=1339 reward=0.791328 (504.61 it/sec) -training >> step=8028800, episode=1339 reward=0.7664555 (436.65 it/sec) -training >> step=8028900, episode=1339 reward=0.7738129 (470.29 it/sec) -training >> step=8029000, episode=1339 reward=0.8030893 (498.96 it/sec) -training >> step=8029100, episode=1339 reward=0.792628 (506.31 it/sec) -training >> step=8029200, episode=1339 reward=0.7735456 (460.55 it/sec) -training >> step=8029300, episode=1339 reward=0.7747445 (458.82 it/sec) -training >> step=8029400, episode=1339 reward=0.8070006 (504.84 it/sec) -training >> step=8029500, episode=1339 reward=0.795601 (521.47 it/sec) -training >> step=8029600, episode=1339 reward=0.7799422 (512.85 it/sec) -training >> step=8029700, episode=1339 reward=0.7715894 (503.66 it/sec) -training >> step=8029800, episode=1339 reward=0.7951073 (480.74 it/sec) -training >> step=8029900, episode=1339 reward=0.8070059 (503.99 it/sec) -training >> step=8030000, episode=1339 reward=0.7693307 (485.28 it/sec) -training >> step=8030100, episode=1339 reward=0.7643234 (481.66 it/sec) -training >> step=8030200, episode=1339 reward=0.7778588 (473.12 it/sec) -training >> step=8030300, episode=1339 reward=0.7873433 (462.93 it/sec) -training >> step=8030400, episode=1339 reward=0.7698112 (496.21 it/sec) -training >> step=8030500, episode=1339 reward=0.771138 (510.90 it/sec) -training >> step=8030600, episode=1339 reward=0.7851478 (487.96 it/sec) -training >> step=8030700, episode=1339 reward=0.7713271 (526.51 it/sec) -training >> step=8030800, episode=1339 reward=0.7879364 (511.48 it/sec) -training >> step=8030900, episode=1339 reward=0.79899 (509.40 it/sec) -training >> step=8031000, episode=1339 reward=0.7899516 (507.13 it/sec) -training >> step=8031100, episode=1339 reward=0.7868136 (461.09 it/sec) -training >> step=8031200, episode=1339 reward=0.7742831 (483.45 it/sec) -training >> step=8031300, episode=1339 reward=0.7877101 (498.41 it/sec) -training >> step=8031400, episode=1339 reward=0.7973331 (498.53 it/sec) -training >> step=8031500, episode=1339 reward=0.792336 (507.83 it/sec) -training >> step=8031600, episode=1339 reward=0.789927 (532.28 it/sec) -training >> step=8031700, episode=1339 reward=0.7925566 (510.37 it/sec) -training >> step=8031800, episode=1339 reward=0.7883812 (434.99 it/sec) -training >> step=8031900, episode=1339 reward=0.7932834 (494.09 it/sec) -training >> step=8032000, episode=1339 reward=0.7873884 (492.27 it/sec) -training >> step=8032100, episode=1339 reward=0.8059658 (502.52 it/sec) -training >> step=8032200, episode=1339 reward=0.7827318 (497.97 it/sec) -training >> step=8032300, episode=1339 reward=0.7874638 (501.70 it/sec) -training >> step=8032400, episode=1339 reward=0.773639 (476.89 it/sec) -training >> step=8032500, episode=1339 reward=0.8001933 (497.94 it/sec) -training >> step=8032600, episode=1339 reward=0.8062261 (425.15 it/sec) -training >> step=8032700, episode=1339 reward=0.7882154 (463.10 it/sec) -training >> step=8032800, episode=1339 reward=0.7859588 (434.31 it/sec) -training >> step=8032900, episode=1339 reward=0.7914772 (442.36 it/sec) -training >> step=8033000, episode=1339 reward=0.7722158 (466.52 it/sec) -training >> step=8033100, episode=1339 reward=0.7853714 (460.74 it/sec) -training >> step=8033200, episode=1339 reward=0.786721 (405.22 it/sec) -training >> step=8033300, episode=1340 reward=0.8022896 (161.91 it/sec) -training >> step=8033400, episode=1340 reward=0.7795251 (487.85 it/sec) -training >> step=8033500, episode=1340 reward=0.7819792 (480.04 it/sec) -training >> step=8033600, episode=1340 reward=0.7753106 (462.31 it/sec) -training >> step=8033700, episode=1340 reward=0.8159847 (482.93 it/sec) -training >> step=8033800, episode=1340 reward=0.7901685 (497.76 it/sec) -training >> step=8033900, episode=1340 reward=0.7824762 (488.01 it/sec) -training >> step=8034000, episode=1340 reward=0.7873126 (474.05 it/sec) -training >> step=8034100, episode=1340 reward=0.7806953 (487.12 it/sec) -training >> step=8034200, episode=1340 reward=0.8009344 (467.21 it/sec) -training >> step=8034300, episode=1340 reward=0.8022788 (407.67 it/sec) -training >> step=8034400, episode=1340 reward=0.7864302 (479.07 it/sec) -training >> step=8034500, episode=1340 reward=0.7700408 (432.14 it/sec) -training >> step=8034600, episode=1340 reward=0.783491 (340.85 it/sec) -training >> step=8034700, episode=1340 reward=0.7680054 (409.94 it/sec) -training >> step=8034800, episode=1340 reward=0.7920769 (400.92 it/sec) -training >> step=8034900, episode=1340 reward=0.7729677 (364.19 it/sec) -training >> step=8035000, episode=1340 reward=0.7835596 (393.08 it/sec) -training >> step=8035100, episode=1340 reward=0.7999493 (419.54 it/sec) -training >> step=8035200, episode=1340 reward=0.8050205 (447.00 it/sec) -training >> step=8035300, episode=1340 reward=0.8077366 (512.45 it/sec) -training >> step=8035400, episode=1340 reward=0.7864683 (476.01 it/sec) -training >> step=8035500, episode=1340 reward=0.7984076 (444.18 it/sec) -training >> step=8035600, episode=1340 reward=0.7695058 (517.19 it/sec) -training >> step=8035700, episode=1340 reward=0.785788 (483.73 it/sec) -training >> step=8035800, episode=1340 reward=0.7977045 (458.90 it/sec) -training >> step=8035900, episode=1340 reward=0.7921161 (449.41 it/sec) -training >> step=8036000, episode=1340 reward=0.7670697 (534.31 it/sec) -training >> step=8036100, episode=1340 reward=0.7696355 (450.12 it/sec) -training >> step=8036200, episode=1340 reward=0.79168 (464.90 it/sec) -training >> step=8036300, episode=1340 reward=0.7834254 (464.59 it/sec) -training >> step=8036400, episode=1340 reward=0.7941356 (424.13 it/sec) -training >> step=8036500, episode=1340 reward=0.807833 (469.71 it/sec) -training >> step=8036600, episode=1340 reward=0.782651 (475.70 it/sec) -training >> step=8036700, episode=1340 reward=0.7698072 (453.84 it/sec) -training >> step=8036800, episode=1340 reward=0.7881159 (441.81 it/sec) -training >> step=8036900, episode=1340 reward=0.7859958 (434.29 it/sec) -training >> step=8037000, episode=1340 reward=0.7813019 (411.33 it/sec) -training >> step=8037100, episode=1340 reward=0.7802183 (492.19 it/sec) -training >> step=8037200, episode=1340 reward=0.7925401 (475.14 it/sec) -training >> step=8037300, episode=1340 reward=0.7809086 (467.15 it/sec) -training >> step=8037400, episode=1340 reward=0.7915937 (488.72 it/sec) -training >> step=8037500, episode=1340 reward=0.7977707 (488.45 it/sec) -training >> step=8037600, episode=1340 reward=0.7923107 (436.31 it/sec) -training >> step=8037700, episode=1340 reward=0.7883537 (458.31 it/sec) -training >> step=8037800, episode=1340 reward=0.8017006 (431.97 it/sec) -training >> step=8037900, episode=1340 reward=0.7761188 (479.69 it/sec) -training >> step=8038000, episode=1340 reward=0.7700939 (438.17 it/sec) -training >> step=8038100, episode=1340 reward=0.7764369 (538.69 it/sec) -training >> step=8038200, episode=1340 reward=0.7858098 (504.94 it/sec) -training >> step=8038300, episode=1340 reward=0.7881883 (499.27 it/sec) -training >> step=8038400, episode=1340 reward=0.7985015 (519.58 it/sec) -training >> step=8038500, episode=1340 reward=0.7942063 (490.62 it/sec) -training >> step=8038600, episode=1340 reward=0.779315 (500.95 it/sec) -training >> step=8038700, episode=1340 reward=0.7983546 (500.07 it/sec) -training >> step=8038800, episode=1340 reward=0.7794975 (507.80 it/sec) -training >> step=8038900, episode=1340 reward=0.7963374 (525.44 it/sec) -training >> step=8039000, episode=1340 reward=0.796497 (508.19 it/sec) -training >> step=8039100, episode=1340 reward=0.7871867 (463.75 it/sec) -training >> step=8039200, episode=1340 reward=0.7824901 (473.55 it/sec) -training >> step=8039300, episode=1341 reward=0.7929824 (141.25 it/sec) -training >> step=8039400, episode=1341 reward=0.7869908 (495.83 it/sec) -training >> step=8039500, episode=1341 reward=0.781653 (462.93 it/sec) -training >> step=8039600, episode=1341 reward=0.7875663 (473.16 it/sec) -training >> step=8039700, episode=1341 reward=0.7740097 (462.86 it/sec) -training >> step=8039800, episode=1341 reward=0.7912802 (429.60 it/sec) -training >> step=8039900, episode=1341 reward=0.7692451 (443.89 it/sec) -training >> step=8040000, episode=1341 reward=0.7873535 (413.66 it/sec) -training >> step=8040100, episode=1341 reward=0.7932762 (446.74 it/sec) -training >> step=8040200, episode=1341 reward=0.7713264 (450.73 it/sec) -training >> step=8040300, episode=1341 reward=0.7828236 (422.53 it/sec) -training >> step=8040400, episode=1341 reward=0.7842107 (419.44 it/sec) -training >> step=8040500, episode=1341 reward=0.786415 (416.56 it/sec) -training >> step=8040600, episode=1341 reward=0.7699848 (506.64 it/sec) -training >> step=8040700, episode=1341 reward=0.7866769 (349.84 it/sec) -training >> step=8040800, episode=1341 reward=0.7907071 (449.96 it/sec) -training >> step=8040900, episode=1341 reward=0.8035242 (479.90 it/sec) -training >> step=8041000, episode=1341 reward=0.7862793 (521.56 it/sec) -training >> step=8041100, episode=1341 reward=0.7821211 (477.91 it/sec) -training >> step=8041200, episode=1341 reward=0.7839834 (474.51 it/sec) -training >> step=8041300, episode=1341 reward=0.7959884 (482.95 it/sec) -training >> step=8041400, episode=1341 reward=0.768988 (460.76 it/sec) -training >> step=8041500, episode=1341 reward=0.7974456 (469.40 it/sec) -training >> step=8041600, episode=1341 reward=0.7919832 (507.63 it/sec) -training >> step=8041700, episode=1341 reward=0.7945824 (443.12 it/sec) -training >> step=8041800, episode=1341 reward=0.7802154 (499.51 it/sec) -training >> step=8041900, episode=1341 reward=0.781957 (422.21 it/sec) -training >> step=8042000, episode=1341 reward=0.7865438 (456.81 it/sec) -training >> step=8042100, episode=1341 reward=0.7593644 (451.33 it/sec) -training >> step=8042200, episode=1341 reward=0.7861975 (433.22 it/sec) -training >> step=8042300, episode=1341 reward=0.7949285 (508.40 it/sec) -training >> step=8042400, episode=1341 reward=0.7688808 (483.25 it/sec) -training >> step=8042500, episode=1341 reward=0.8064876 (501.68 it/sec) -training >> step=8042600, episode=1341 reward=0.771264 (513.63 it/sec) -training >> step=8042700, episode=1341 reward=0.7899161 (497.01 it/sec) -training >> step=8042800, episode=1341 reward=0.79747 (500.85 it/sec) -training >> step=8042900, episode=1341 reward=0.7767995 (479.62 it/sec) -training >> step=8043000, episode=1341 reward=0.7570177 (496.94 it/sec) -training >> step=8043100, episode=1341 reward=0.7736884 (479.80 it/sec) -training >> step=8043200, episode=1341 reward=0.7900933 (497.87 it/sec) -training >> step=8043300, episode=1341 reward=0.7853541 (486.74 it/sec) -training >> step=8043400, episode=1341 reward=0.7797347 (502.03 it/sec) -training >> step=8043500, episode=1341 reward=0.7859401 (450.22 it/sec) -training >> step=8043600, episode=1341 reward=0.8053173 (503.25 it/sec) -training >> step=8043700, episode=1341 reward=0.7912472 (444.69 it/sec) -training >> step=8043800, episode=1341 reward=0.7854925 (406.45 it/sec) -training >> step=8043900, episode=1341 reward=0.7912664 (448.31 it/sec) -training >> step=8044000, episode=1341 reward=0.7890823 (467.05 it/sec) -training >> step=8044100, episode=1341 reward=0.7941766 (499.53 it/sec) -training >> step=8044200, episode=1341 reward=0.7897176 (499.53 it/sec) -training >> step=8044300, episode=1341 reward=0.7965869 (525.50 it/sec) -training >> step=8044400, episode=1341 reward=0.7938776 (503.72 it/sec) -training >> step=8044500, episode=1341 reward=0.7887287 (497.60 it/sec) -training >> step=8044600, episode=1341 reward=0.8103687 (544.29 it/sec) -training >> step=8044700, episode=1341 reward=0.788463 (484.32 it/sec) -training >> step=8044800, episode=1341 reward=0.7807627 (418.60 it/sec) -training >> step=8044900, episode=1341 reward=0.7832428 (465.89 it/sec) -training >> step=8045000, episode=1341 reward=0.7859033 (505.49 it/sec) -training >> step=8045100, episode=1341 reward=0.8011456 (489.59 it/sec) -training >> step=8045200, episode=1341 reward=0.8059022 (487.07 it/sec) -training >> step=8045300, episode=1342 reward=0.7814049 (141.09 it/sec) -training >> step=8045400, episode=1342 reward=0.8035724 (526.13 it/sec) -training >> step=8045500, episode=1342 reward=0.7856776 (446.38 it/sec) -training >> step=8045600, episode=1342 reward=0.7858118 (471.02 it/sec) -training >> step=8045700, episode=1342 reward=0.7938819 (416.24 it/sec) -training >> step=8045800, episode=1342 reward=0.77456 (437.04 it/sec) -training >> step=8045900, episode=1342 reward=0.797133 (449.66 it/sec) -training >> step=8046000, episode=1342 reward=0.7929989 (467.53 it/sec) -training >> step=8046100, episode=1342 reward=0.8124219 (454.68 it/sec) -training >> step=8046200, episode=1342 reward=0.7845582 (408.36 it/sec) -training >> step=8046300, episode=1342 reward=0.7962896 (445.93 it/sec) -training >> step=8046400, episode=1342 reward=0.7726339 (446.87 it/sec) -training >> step=8046500, episode=1342 reward=0.8009695 (497.24 it/sec) -training >> step=8046600, episode=1342 reward=0.8139679 (501.74 it/sec) -training >> step=8046700, episode=1342 reward=0.8061827 (496.55 it/sec) -training >> step=8046800, episode=1342 reward=0.7808614 (358.22 it/sec) -training >> step=8046900, episode=1342 reward=0.778828 (437.18 it/sec) -training >> step=8047000, episode=1342 reward=0.7947639 (492.11 it/sec) -training >> step=8047100, episode=1342 reward=0.7854566 (440.26 it/sec) -training >> step=8047200, episode=1342 reward=0.810258 (437.08 it/sec) -training >> step=8047300, episode=1342 reward=0.815791 (443.67 it/sec) -training >> step=8047400, episode=1342 reward=0.7911714 (464.35 it/sec) -training >> step=8047500, episode=1342 reward=0.7778283 (443.73 it/sec) -training >> step=8047600, episode=1342 reward=0.773771 (470.56 it/sec) -training >> step=8047700, episode=1342 reward=0.7910928 (448.51 it/sec) -training >> step=8047800, episode=1342 reward=0.7902502 (452.01 it/sec) -training >> step=8047900, episode=1342 reward=0.7902049 (394.08 it/sec) -training >> step=8048000, episode=1342 reward=0.7666602 (447.04 it/sec) -training >> step=8048100, episode=1342 reward=0.7875457 (484.24 it/sec) -training >> step=8048200, episode=1342 reward=0.7805784 (425.39 it/sec) -training >> step=8048300, episode=1342 reward=0.7912748 (418.23 it/sec) -training >> step=8048400, episode=1342 reward=0.7784538 (422.09 it/sec) -training >> step=8048500, episode=1342 reward=0.7759812 (390.66 it/sec) -training >> step=8048600, episode=1342 reward=0.7873448 (401.80 it/sec) -training >> step=8048700, episode=1342 reward=0.7815176 (445.88 it/sec) -training >> step=8048800, episode=1342 reward=0.7833418 (437.20 it/sec) -training >> step=8048900, episode=1342 reward=0.7834582 (542.45 it/sec) -training >> step=8049000, episode=1342 reward=0.7826112 (510.51 it/sec) -training >> step=8049100, episode=1342 reward=0.769538 (511.33 it/sec) -training >> step=8049200, episode=1342 reward=0.793 (483.91 it/sec) -training >> step=8049300, episode=1342 reward=0.7702589 (514.03 it/sec) -training >> step=8049400, episode=1342 reward=0.7851719 (460.10 it/sec) -training >> step=8049500, episode=1342 reward=0.7985591 (510.61 it/sec) -training >> step=8049600, episode=1342 reward=0.7832682 (468.21 it/sec) -training >> step=8049700, episode=1342 reward=0.7980571 (476.91 it/sec) -training >> step=8049800, episode=1342 reward=0.7710345 (436.84 it/sec) -training >> step=8049900, episode=1342 reward=0.7795139 (417.89 it/sec) -training >> step=8050000, episode=1342 reward=0.7836173 (464.93 it/sec) -training >> step=8050100, episode=1342 reward=0.7912746 (425.05 it/sec) -training >> step=8050200, episode=1342 reward=0.7790244 (451.71 it/sec) -training >> step=8050300, episode=1342 reward=0.7938229 (463.07 it/sec) -training >> step=8050400, episode=1342 reward=0.7606007 (461.81 it/sec) -training >> step=8050500, episode=1342 reward=0.7953606 (466.09 it/sec) -training >> step=8050600, episode=1342 reward=0.7840021 (415.58 it/sec) -training >> step=8050700, episode=1342 reward=0.7826253 (460.16 it/sec) -training >> step=8050800, episode=1342 reward=0.7760729 (426.41 it/sec) -training >> step=8050900, episode=1342 reward=0.8005063 (445.56 it/sec) -training >> step=8051000, episode=1342 reward=0.7764297 (394.70 it/sec) -training >> step=8051100, episode=1342 reward=0.7848126 (364.35 it/sec) -training >> step=8051200, episode=1342 reward=0.7847964 (403.90 it/sec) -training >> step=8051300, episode=1343 reward=0.7951919 (71.43 it/sec) -training >> step=8051400, episode=1343 reward=0.7891063 (413.02 it/sec) -training >> step=8051500, episode=1343 reward=0.7887629 (417.59 it/sec) -training >> step=8051600, episode=1343 reward=0.7697688 (478.52 it/sec) -training >> step=8051700, episode=1343 reward=0.7836306 (476.01 it/sec) -training >> step=8051800, episode=1343 reward=0.7834265 (467.15 it/sec) -training >> step=8051900, episode=1343 reward=0.7636355 (465.65 it/sec) -training >> step=8052000, episode=1343 reward=0.7754965 (476.85 it/sec) -training >> step=8052100, episode=1343 reward=0.7947105 (460.04 it/sec) -training >> step=8052200, episode=1343 reward=0.771816 (446.98 it/sec) -training >> step=8052300, episode=1343 reward=0.7995686 (474.41 it/sec) -training >> step=8052400, episode=1343 reward=0.7907968 (484.69 it/sec) -training >> step=8052500, episode=1343 reward=0.7783382 (447.47 it/sec) -training >> step=8052600, episode=1343 reward=0.7738657 (464.01 it/sec) -training >> step=8052700, episode=1343 reward=0.7825386 (516.81 it/sec) -training >> step=8052800, episode=1343 reward=0.7942213 (432.83 it/sec) -training >> step=8052900, episode=1343 reward=0.7840681 (416.54 it/sec) -training >> step=8053000, episode=1343 reward=0.7946842 (275.30 it/sec) -training >> step=8053100, episode=1343 reward=0.7808676 (475.01 it/sec) -training >> step=8053200, episode=1343 reward=0.8030416 (439.79 it/sec) -training >> step=8053300, episode=1343 reward=0.7940812 (453.68 it/sec) -training >> step=8053400, episode=1343 reward=0.7861087 (445.80 it/sec) -training >> step=8053500, episode=1343 reward=0.8033052 (491.24 it/sec) -training >> step=8053600, episode=1343 reward=0.7895284 (454.22 it/sec) -training >> step=8053700, episode=1343 reward=0.7949292 (409.62 it/sec) -training >> step=8053800, episode=1343 reward=0.7955714 (446.11 it/sec) -training >> step=8053900, episode=1343 reward=0.8013572 (458.63 it/sec) -training >> step=8054000, episode=1343 reward=0.7948958 (489.44 it/sec) -training >> step=8054100, episode=1343 reward=0.8022153 (496.15 it/sec) -training >> step=8054200, episode=1343 reward=0.7852329 (471.33 it/sec) -training >> step=8054300, episode=1343 reward=0.7863344 (496.14 it/sec) -training >> step=8054400, episode=1343 reward=0.7982869 (536.78 it/sec) -training >> step=8054500, episode=1343 reward=0.775404 (515.54 it/sec) -training >> step=8054600, episode=1343 reward=0.7919584 (505.69 it/sec) -training >> step=8054700, episode=1343 reward=0.7806451 (532.72 it/sec) -training >> step=8054800, episode=1343 reward=0.784688 (531.09 it/sec) -training >> step=8054900, episode=1343 reward=0.8009596 (520.33 it/sec) -training >> step=8055000, episode=1343 reward=0.7865767 (478.97 it/sec) -training >> step=8055100, episode=1343 reward=0.7728092 (503.37 it/sec) -training >> step=8055200, episode=1343 reward=0.7928566 (469.52 it/sec) -training >> step=8055300, episode=1343 reward=0.7930288 (515.31 it/sec) -training >> step=8055400, episode=1343 reward=0.7745059 (512.25 it/sec) -training >> step=8055500, episode=1343 reward=0.7629923 (539.37 it/sec) -training >> step=8055600, episode=1343 reward=0.8020979 (504.73 it/sec) -training >> step=8055700, episode=1343 reward=0.783533 (465.93 it/sec) -training >> step=8055800, episode=1343 reward=0.7713929 (481.32 it/sec) -training >> step=8055900, episode=1343 reward=0.7907621 (521.53 it/sec) -training >> step=8056000, episode=1343 reward=0.7909086 (523.66 it/sec) -training >> step=8056100, episode=1343 reward=0.7975455 (538.19 it/sec) -training >> step=8056200, episode=1343 reward=0.7683029 (527.25 it/sec) -training >> step=8056300, episode=1343 reward=0.7637163 (477.01 it/sec) -training >> step=8056400, episode=1343 reward=0.7945955 (455.97 it/sec) -training >> step=8056500, episode=1343 reward=0.7832962 (467.75 it/sec) -training >> step=8056600, episode=1343 reward=0.7913263 (378.15 it/sec) -training >> step=8056700, episode=1343 reward=0.7949004 (373.67 it/sec) -training >> step=8056800, episode=1343 reward=0.7730692 (438.96 it/sec) -training >> step=8056900, episode=1343 reward=0.7845004 (486.27 it/sec) -training >> step=8057000, episode=1343 reward=0.809018 (480.17 it/sec) -training >> step=8057100, episode=1343 reward=0.7798159 (477.28 it/sec) -training >> step=8057200, episode=1343 reward=0.7703236 (481.56 it/sec) -training >> step=8057300, episode=1344 reward=0.7814092 (128.79 it/sec) -training >> step=8057400, episode=1344 reward=0.7584313 (526.44 it/sec) -training >> step=8057500, episode=1344 reward=0.7798491 (526.18 it/sec) -training >> step=8057600, episode=1344 reward=0.8071471 (554.23 it/sec) -training >> step=8057700, episode=1344 reward=0.8003389 (524.17 it/sec) -training >> step=8057800, episode=1344 reward=0.7798841 (544.87 it/sec) -training >> step=8057900, episode=1344 reward=0.7899942 (517.61 it/sec) -training >> step=8058000, episode=1344 reward=0.77539 (566.60 it/sec) -training >> step=8058100, episode=1344 reward=0.8030571 (541.08 it/sec) -training >> step=8058200, episode=1344 reward=0.7817909 (502.36 it/sec) -training >> step=8058300, episode=1344 reward=0.7918495 (575.55 it/sec) -training >> step=8058400, episode=1344 reward=0.8018089 (556.86 it/sec) -training >> step=8058500, episode=1344 reward=0.804684 (532.40 it/sec) -training >> step=8058600, episode=1344 reward=0.7717943 (494.07 it/sec) -training >> step=8058700, episode=1344 reward=0.7756122 (581.21 it/sec) -training >> step=8058800, episode=1344 reward=0.787743 (517.19 it/sec) -training >> step=8058900, episode=1344 reward=0.8040132 (499.54 it/sec) -training >> step=8059000, episode=1344 reward=0.794942 (517.67 it/sec) -training >> step=8059100, episode=1344 reward=0.7754586 (572.28 it/sec) -training >> step=8059200, episode=1344 reward=0.7878053 (352.23 it/sec) -training >> step=8059300, episode=1344 reward=0.79363 (500.14 it/sec) -training >> step=8059400, episode=1344 reward=0.791428 (533.68 it/sec) -training >> step=8059500, episode=1344 reward=0.8066767 (501.49 it/sec) -training >> step=8059600, episode=1344 reward=0.808802 (545.92 it/sec) -training >> step=8059700, episode=1344 reward=0.7932097 (490.38 it/sec) -training >> step=8059800, episode=1344 reward=0.7755563 (498.60 it/sec) -training >> step=8059900, episode=1344 reward=0.8010684 (487.75 it/sec) -training >> step=8060000, episode=1344 reward=0.7852307 (500.31 it/sec) -training >> step=8060100, episode=1344 reward=0.8005221 (503.02 it/sec) -training >> step=8060200, episode=1344 reward=0.7895287 (536.03 it/sec) -training >> step=8060300, episode=1344 reward=0.7846408 (477.40 it/sec) -training >> step=8060400, episode=1344 reward=0.7889394 (450.23 it/sec) -training >> step=8060500, episode=1344 reward=0.772291 (505.65 it/sec) -training >> step=8060600, episode=1344 reward=0.7892246 (469.09 it/sec) -training >> step=8060700, episode=1344 reward=0.7790142 (505.47 it/sec) -training >> step=8060800, episode=1344 reward=0.8048882 (480.61 it/sec) -training >> step=8060900, episode=1344 reward=0.782398 (541.59 it/sec) -training >> step=8061000, episode=1344 reward=0.7855338 (495.45 it/sec) -training >> step=8061100, episode=1344 reward=0.7891623 (491.32 it/sec) -training >> step=8061200, episode=1344 reward=0.7846859 (506.10 it/sec) -training >> step=8061300, episode=1344 reward=0.8014579 (492.72 it/sec) -training >> step=8061400, episode=1344 reward=0.7953409 (507.89 it/sec) -training >> step=8061500, episode=1344 reward=0.7824869 (506.04 it/sec) -training >> step=8061600, episode=1344 reward=0.7862516 (490.14 it/sec) -training >> step=8061700, episode=1344 reward=0.7887653 (447.27 it/sec) -training >> step=8061800, episode=1344 reward=0.7839512 (458.37 it/sec) -training >> step=8061900, episode=1344 reward=0.7861559 (484.25 it/sec) -training >> step=8062000, episode=1344 reward=0.7898277 (471.45 it/sec) -training >> step=8062100, episode=1344 reward=0.8032806 (477.34 it/sec) -training >> step=8062200, episode=1344 reward=0.7793567 (440.87 it/sec) -training >> step=8062300, episode=1344 reward=0.7922366 (506.27 it/sec) -training >> step=8062400, episode=1344 reward=0.7864172 (476.09 it/sec) -training >> step=8062500, episode=1344 reward=0.7892579 (500.68 it/sec) -training >> step=8062600, episode=1344 reward=0.7948268 (465.18 it/sec) -training >> step=8062700, episode=1344 reward=0.780268 (465.20 it/sec) -training >> step=8062800, episode=1344 reward=0.7764066 (414.41 it/sec) -training >> step=8062900, episode=1344 reward=0.7945229 (425.92 it/sec) -training >> step=8063000, episode=1344 reward=0.8032398 (487.28 it/sec) -training >> step=8063100, episode=1344 reward=0.7926181 (495.95 it/sec) -training >> step=8063200, episode=1344 reward=0.8054635 (503.56 it/sec) -training >> step=8063300, episode=1345 reward=0.772378 (113.79 it/sec) -training >> step=8063400, episode=1345 reward=0.7755926 (576.67 it/sec) -training >> step=8063500, episode=1345 reward=0.810955 (525.13 it/sec) -training >> step=8063600, episode=1345 reward=0.7901187 (558.97 it/sec) -training >> step=8063700, episode=1345 reward=0.7947658 (501.48 it/sec) -training >> step=8063800, episode=1345 reward=0.7701667 (559.62 it/sec) -training >> step=8063900, episode=1345 reward=0.785555 (544.69 it/sec) -training >> step=8064000, episode=1345 reward=0.7951857 (489.52 it/sec) -training >> step=8064100, episode=1345 reward=0.8124819 (546.96 it/sec) -training >> step=8064200, episode=1345 reward=0.7860422 (534.34 it/sec) -training >> step=8064300, episode=1345 reward=0.7979926 (523.54 it/sec) -training >> step=8064400, episode=1345 reward=0.7894991 (508.18 it/sec) -training >> step=8064500, episode=1345 reward=0.7976476 (547.25 it/sec) -training >> step=8064600, episode=1345 reward=0.7838883 (552.20 it/sec) -training >> step=8064700, episode=1345 reward=0.7885211 (530.01 it/sec) -training >> step=8064800, episode=1345 reward=0.8079161 (533.09 it/sec) -training >> step=8064900, episode=1345 reward=0.7931103 (527.47 it/sec) -training >> step=8065000, episode=1345 reward=0.7921154 (512.18 it/sec) -training >> step=8065100, episode=1345 reward=0.7793055 (529.06 it/sec) -training >> step=8065200, episode=1345 reward=0.7798382 (566.52 it/sec) -training >> step=8065300, episode=1345 reward=0.7781636 (350.90 it/sec) -training >> step=8065400, episode=1345 reward=0.8052761 (555.40 it/sec) -training >> step=8065500, episode=1345 reward=0.7834424 (531.94 it/sec) -training >> step=8065600, episode=1345 reward=0.7908556 (543.52 it/sec) -training >> step=8065700, episode=1345 reward=0.8013386 (539.64 it/sec) -training >> step=8065800, episode=1345 reward=0.7904401 (527.98 it/sec) -training >> step=8065900, episode=1345 reward=0.7793066 (538.80 it/sec) -training >> step=8066000, episode=1345 reward=0.7598358 (513.40 it/sec) -training >> step=8066100, episode=1345 reward=0.785192 (523.48 it/sec) -training >> step=8066200, episode=1345 reward=0.7818936 (526.19 it/sec) -training >> step=8066300, episode=1345 reward=0.7899321 (561.84 it/sec) -training >> step=8066400, episode=1345 reward=0.7801338 (486.34 it/sec) -training >> step=8066500, episode=1345 reward=0.7937914 (498.62 it/sec) -training >> step=8066600, episode=1345 reward=0.7860339 (472.46 it/sec) -training >> step=8066700, episode=1345 reward=0.8059046 (509.63 it/sec) -training >> step=8066800, episode=1345 reward=0.7939219 (524.49 it/sec) -training >> step=8066900, episode=1345 reward=0.8023909 (518.00 it/sec) -training >> step=8067000, episode=1345 reward=0.7952623 (552.92 it/sec) -training >> step=8067100, episode=1345 reward=0.7600193 (503.38 it/sec) -training >> step=8067200, episode=1345 reward=0.8036419 (523.98 it/sec) -training >> step=8067300, episode=1345 reward=0.7817756 (528.33 it/sec) -training >> step=8067400, episode=1345 reward=0.7868549 (560.43 it/sec) -training >> step=8067500, episode=1345 reward=0.77924 (505.16 it/sec) -training >> step=8067600, episode=1345 reward=0.7769165 (514.57 it/sec) -training >> step=8067700, episode=1345 reward=0.7879297 (556.06 it/sec) -training >> step=8067800, episode=1345 reward=0.774351 (533.23 it/sec) -training >> step=8067900, episode=1345 reward=0.7999272 (524.31 it/sec) -training >> step=8068000, episode=1345 reward=0.7986778 (520.76 it/sec) -training >> step=8068100, episode=1345 reward=0.8177372 (532.34 it/sec) -training >> step=8068200, episode=1345 reward=0.7938735 (480.66 it/sec) -training >> step=8068300, episode=1345 reward=0.7714362 (519.92 it/sec) -training >> step=8068400, episode=1345 reward=0.8074658 (517.14 it/sec) -training >> step=8068500, episode=1345 reward=0.7713802 (552.67 it/sec) -training >> step=8068600, episode=1345 reward=0.7836393 (495.15 it/sec) -training >> step=8068700, episode=1345 reward=0.7914255 (484.46 it/sec) -training >> step=8068800, episode=1345 reward=0.8028864 (548.01 it/sec) -training >> step=8068900, episode=1345 reward=0.7800491 (530.37 it/sec) -training >> step=8069000, episode=1345 reward=0.7920502 (514.26 it/sec) -training >> step=8069100, episode=1345 reward=0.7773899 (525.96 it/sec) -training >> step=8069200, episode=1345 reward=0.7708809 (524.41 it/sec) -training >> step=8069300, episode=1346 reward=0.7991847 (104.09 it/sec) -training >> step=8069400, episode=1346 reward=0.7894056 (551.52 it/sec) -training >> step=8069500, episode=1346 reward=0.7875755 (544.86 it/sec) -training >> step=8069600, episode=1346 reward=0.7802814 (566.99 it/sec) -training >> step=8069700, episode=1346 reward=0.7874328 (534.99 it/sec) -training >> step=8069800, episode=1346 reward=0.7937867 (520.82 it/sec) -training >> step=8069900, episode=1346 reward=0.798229 (531.43 it/sec) -training >> step=8070000, episode=1346 reward=0.787226 (547.88 it/sec) -training >> step=8070100, episode=1346 reward=0.7909757 (557.70 it/sec) -training >> step=8070200, episode=1346 reward=0.7987346 (548.57 it/sec) -training >> step=8070300, episode=1346 reward=0.8034759 (547.46 it/sec) -training >> step=8070400, episode=1346 reward=0.7964999 (526.74 it/sec) -training >> step=8070500, episode=1346 reward=0.7814243 (513.94 it/sec) -training >> step=8070600, episode=1346 reward=0.7924303 (553.35 it/sec) -training >> step=8070700, episode=1346 reward=0.795852 (538.65 it/sec) -training >> step=8070800, episode=1346 reward=0.7885368 (533.78 it/sec) -training >> step=8070900, episode=1346 reward=0.7716414 (513.79 it/sec) -training >> step=8071000, episode=1346 reward=0.8150819 (553.19 it/sec) -training >> step=8071100, episode=1346 reward=0.8020664 (491.27 it/sec) -training >> step=8071200, episode=1346 reward=0.7870489 (510.60 it/sec) -training >> step=8071300, episode=1346 reward=0.7773098 (535.68 it/sec) -training >> step=8071400, episode=1346 reward=0.7990237 (403.99 it/sec) -training >> step=8071500, episode=1346 reward=0.7818666 (486.92 it/sec) -training >> step=8071600, episode=1346 reward=0.7930149 (510.92 it/sec) -training >> step=8071700, episode=1346 reward=0.7918097 (554.91 it/sec) -training >> step=8071800, episode=1346 reward=0.7929739 (527.45 it/sec) -training >> step=8071900, episode=1346 reward=0.7955473 (502.15 it/sec) -training >> step=8072000, episode=1346 reward=0.798377 (523.83 it/sec) -training >> step=8072100, episode=1346 reward=0.7908555 (509.55 it/sec) -training >> step=8072200, episode=1346 reward=0.8081297 (525.61 it/sec) -training >> step=8072300, episode=1346 reward=0.8010299 (509.80 it/sec) -training >> step=8072400, episode=1346 reward=0.8045183 (510.21 it/sec) -training >> step=8072500, episode=1346 reward=0.7960542 (510.83 it/sec) -training >> step=8072600, episode=1346 reward=0.7744293 (511.62 it/sec) -training >> step=8072700, episode=1346 reward=0.7732648 (513.55 it/sec) -training >> step=8072800, episode=1346 reward=0.806944 (546.64 it/sec) -training >> step=8072900, episode=1346 reward=0.7601514 (482.76 it/sec) -training >> step=8073000, episode=1346 reward=0.7973439 (491.71 it/sec) -training >> step=8073100, episode=1346 reward=0.7700554 (495.94 it/sec) -training >> step=8073200, episode=1346 reward=0.7820141 (524.86 it/sec) -training >> step=8073300, episode=1346 reward=0.7585576 (531.01 it/sec) -training >> step=8073400, episode=1346 reward=0.7949566 (511.00 it/sec) -training >> step=8073500, episode=1346 reward=0.7910691 (512.74 it/sec) -training >> step=8073600, episode=1346 reward=0.784833 (490.58 it/sec) -training >> step=8073700, episode=1346 reward=0.7792715 (475.65 it/sec) -training >> step=8073800, episode=1346 reward=0.7770264 (483.58 it/sec) -training >> step=8073900, episode=1346 reward=0.795437 (514.08 it/sec) -training >> step=8074000, episode=1346 reward=0.8014987 (506.22 it/sec) -training >> step=8074100, episode=1346 reward=0.7675297 (435.96 it/sec) -training >> step=8074200, episode=1346 reward=0.8044733 (517.24 it/sec) -training >> step=8074300, episode=1346 reward=0.781307 (500.85 it/sec) -training >> step=8074400, episode=1346 reward=0.7985355 (454.21 it/sec) -training >> step=8074500, episode=1346 reward=0.7958401 (439.93 it/sec) -training >> step=8074600, episode=1346 reward=0.794356 (485.11 it/sec) -training >> step=8074700, episode=1346 reward=0.7818409 (468.95 it/sec) -training >> step=8074800, episode=1346 reward=0.7804913 (451.64 it/sec) -training >> step=8074900, episode=1346 reward=0.779618 (477.82 it/sec) -training >> step=8075000, episode=1346 reward=0.8015721 (507.56 it/sec) -training >> step=8075100, episode=1346 reward=0.8016533 (480.00 it/sec) -training >> step=8075200, episode=1346 reward=0.7831922 (508.04 it/sec) -training >> step=8075300, episode=1347 reward=0.7779079 (110.45 it/sec) -training >> step=8075400, episode=1347 reward=0.8010834 (527.59 it/sec) -training >> step=8075500, episode=1347 reward=0.7830871 (535.36 it/sec) -training >> step=8075600, episode=1347 reward=0.7744539 (514.20 it/sec) -training >> step=8075700, episode=1347 reward=0.7961946 (542.27 it/sec) -training >> step=8075800, episode=1347 reward=0.7837165 (515.80 it/sec) -training >> step=8075900, episode=1347 reward=0.7790557 (537.29 it/sec) -training >> step=8076000, episode=1347 reward=0.8061816 (553.92 it/sec) -training >> step=8076100, episode=1347 reward=0.7985722 (479.97 it/sec) -training >> step=8076200, episode=1347 reward=0.8034211 (420.29 it/sec) -training >> step=8076300, episode=1347 reward=0.8115081 (529.03 it/sec) -training >> step=8076400, episode=1347 reward=0.8108529 (528.40 it/sec) -training >> step=8076500, episode=1347 reward=0.7868243 (546.03 it/sec) -training >> step=8076600, episode=1347 reward=0.7929543 (544.39 it/sec) -training >> step=8076700, episode=1347 reward=0.7966515 (526.04 it/sec) -training >> step=8076800, episode=1347 reward=0.8047429 (532.77 it/sec) -training >> step=8076900, episode=1347 reward=0.8038139 (505.83 it/sec) -training >> step=8077000, episode=1347 reward=0.7920421 (520.51 it/sec) -training >> step=8077100, episode=1347 reward=0.7977316 (572.88 it/sec) -training >> step=8077200, episode=1347 reward=0.7980825 (490.48 it/sec) -training >> step=8077300, episode=1347 reward=0.809777 (517.20 it/sec) -training >> step=8077400, episode=1347 reward=0.780266 (514.95 it/sec) -training >> step=8077500, episode=1347 reward=0.7992142 (382.75 it/sec) -training >> step=8077600, episode=1347 reward=0.788242 (549.48 it/sec) -training >> step=8077700, episode=1347 reward=0.7765369 (536.18 it/sec) -training >> step=8077800, episode=1347 reward=0.781602 (542.06 it/sec) -training >> step=8077900, episode=1347 reward=0.7847509 (533.55 it/sec) -training >> step=8078000, episode=1347 reward=0.7748411 (506.88 it/sec) -training >> step=8078100, episode=1347 reward=0.7857081 (533.57 it/sec) -training >> step=8078200, episode=1347 reward=0.7938607 (508.02 it/sec) -training >> step=8078300, episode=1347 reward=0.7812372 (486.29 it/sec) -training >> step=8078400, episode=1347 reward=0.7749692 (483.46 it/sec) -training >> step=8078500, episode=1347 reward=0.7855737 (515.27 it/sec) -training >> step=8078600, episode=1347 reward=0.7882065 (520.82 it/sec) -training >> step=8078700, episode=1347 reward=0.7862377 (532.80 it/sec) -training >> step=8078800, episode=1347 reward=0.8122219 (478.59 it/sec) -training >> step=8078900, episode=1347 reward=0.7932273 (507.16 it/sec) -training >> step=8079000, episode=1347 reward=0.7901927 (472.10 it/sec) -training >> step=8079100, episode=1347 reward=0.7821922 (482.88 it/sec) -training >> step=8079200, episode=1347 reward=0.7850969 (516.88 it/sec) -training >> step=8079300, episode=1347 reward=0.8042885 (536.63 it/sec) -training >> step=8079400, episode=1347 reward=0.800102 (514.64 it/sec) -training >> step=8079500, episode=1347 reward=0.7923182 (501.27 it/sec) -training >> step=8079600, episode=1347 reward=0.7747354 (543.14 it/sec) -training >> step=8079700, episode=1347 reward=0.780534 (529.70 it/sec) -training >> step=8079800, episode=1347 reward=0.8003372 (525.52 it/sec) -training >> step=8079900, episode=1347 reward=0.7997608 (507.40 it/sec) -training >> step=8080000, episode=1347 reward=0.7836662 (515.69 it/sec) -training >> step=8080100, episode=1347 reward=0.8010886 (507.99 it/sec) -training >> step=8080200, episode=1347 reward=0.7670289 (530.61 it/sec) -training >> step=8080300, episode=1347 reward=0.7969374 (574.78 it/sec) -training >> step=8080400, episode=1347 reward=0.8224347 (511.64 it/sec) -training >> step=8080500, episode=1347 reward=0.7988731 (514.75 it/sec) -training >> step=8080600, episode=1347 reward=0.7913494 (497.64 it/sec) -training >> step=8080700, episode=1347 reward=0.7843301 (554.64 it/sec) -training >> step=8080800, episode=1347 reward=0.7693108 (521.97 it/sec) -training >> step=8080900, episode=1347 reward=0.7674139 (507.94 it/sec) -training >> step=8081000, episode=1347 reward=0.7825756 (518.44 it/sec) -training >> step=8081100, episode=1347 reward=0.7793529 (507.27 it/sec) -training >> step=8081200, episode=1347 reward=0.7678081 (513.99 it/sec) -training >> step=8081300, episode=1348 reward=0.7907816 (111.05 it/sec) -training >> step=8081400, episode=1348 reward=0.7697259 (537.50 it/sec) -training >> step=8081500, episode=1348 reward=0.7569383 (546.92 it/sec) -training >> step=8081600, episode=1348 reward=0.7832868 (525.83 it/sec) -training >> step=8081700, episode=1348 reward=0.7872217 (540.13 it/sec) -training >> step=8081800, episode=1348 reward=0.7695317 (555.73 it/sec) -training >> step=8081900, episode=1348 reward=0.7848503 (536.46 it/sec) -training >> step=8082000, episode=1348 reward=0.7891973 (542.62 it/sec) -training >> step=8082100, episode=1348 reward=0.7900625 (545.76 it/sec) -training >> step=8082200, episode=1348 reward=0.7880166 (539.25 it/sec) -training >> step=8082300, episode=1348 reward=0.7887209 (523.60 it/sec) -training >> step=8082400, episode=1348 reward=0.7820412 (527.12 it/sec) -training >> step=8082500, episode=1348 reward=0.7840065 (573.97 it/sec) -training >> step=8082600, episode=1348 reward=0.8010471 (522.04 it/sec) -training >> step=8082700, episode=1348 reward=0.7884055 (526.74 it/sec) -training >> step=8082800, episode=1348 reward=0.7794254 (533.09 it/sec) -training >> step=8082900, episode=1348 reward=0.7952852 (546.64 it/sec) -training >> step=8083000, episode=1348 reward=0.7780206 (539.01 it/sec) -training >> step=8083100, episode=1348 reward=0.7897896 (545.46 it/sec) -training >> step=8083200, episode=1348 reward=0.7805444 (538.04 it/sec) -training >> step=8083300, episode=1348 reward=0.8106865 (501.90 it/sec) -training >> step=8083400, episode=1348 reward=0.785089 (516.13 it/sec) -training >> step=8083500, episode=1348 reward=0.8022423 (511.30 it/sec) -training >> step=8083600, episode=1348 reward=0.7842007 (382.46 it/sec) -training >> step=8083700, episode=1348 reward=0.8064615 (542.26 it/sec) -training >> step=8083800, episode=1348 reward=0.7798883 (498.26 it/sec) -training >> step=8083900, episode=1348 reward=0.8116548 (525.63 it/sec) -training >> step=8084000, episode=1348 reward=0.7996728 (470.44 it/sec) -training >> step=8084100, episode=1348 reward=0.7954143 (525.33 it/sec) -training >> step=8084200, episode=1348 reward=0.7737175 (525.03 it/sec) -training >> step=8084300, episode=1348 reward=0.7933833 (547.42 it/sec) -training >> step=8084400, episode=1348 reward=0.783336 (479.83 it/sec) -training >> step=8084500, episode=1348 reward=0.7804309 (510.92 it/sec) -training >> step=8084600, episode=1348 reward=0.7832804 (524.69 it/sec) -training >> step=8084700, episode=1348 reward=0.7865176 (554.52 it/sec) -training >> step=8084800, episode=1348 reward=0.7803344 (520.54 it/sec) -training >> step=8084900, episode=1348 reward=0.7943886 (502.61 it/sec) -training >> step=8085000, episode=1348 reward=0.7917939 (499.20 it/sec) -training >> step=8085100, episode=1348 reward=0.8011643 (518.19 it/sec) -training >> step=8085200, episode=1348 reward=0.7984253 (507.67 it/sec) -training >> step=8085300, episode=1348 reward=0.8148333 (532.78 it/sec) -training >> step=8085400, episode=1348 reward=0.7902218 (487.01 it/sec) -training >> step=8085500, episode=1348 reward=0.7919604 (523.04 it/sec) -training >> step=8085600, episode=1348 reward=0.8015885 (481.15 it/sec) -training >> step=8085700, episode=1348 reward=0.7788143 (540.97 it/sec) -training >> step=8085800, episode=1348 reward=0.7662213 (541.67 it/sec) -training >> step=8085900, episode=1348 reward=0.7954293 (518.12 it/sec) -training >> step=8086000, episode=1348 reward=0.8266318 (504.79 it/sec) -training >> step=8086100, episode=1348 reward=0.7993049 (518.73 it/sec) -training >> step=8086200, episode=1348 reward=0.7915882 (522.26 it/sec) -training >> step=8086300, episode=1348 reward=0.7928762 (518.45 it/sec) -training >> step=8086400, episode=1348 reward=0.7850586 (522.92 it/sec) -training >> step=8086500, episode=1348 reward=0.791444 (497.24 it/sec) -training >> step=8086600, episode=1348 reward=0.799768 (456.74 it/sec) -training >> step=8086700, episode=1348 reward=0.7954452 (482.52 it/sec) -training >> step=8086800, episode=1348 reward=0.7787076 (522.38 it/sec) -training >> step=8086900, episode=1348 reward=0.7935075 (473.57 it/sec) -training >> step=8087000, episode=1348 reward=0.7812643 (468.85 it/sec) -training >> step=8087100, episode=1348 reward=0.788633 (439.63 it/sec) -training >> step=8087200, episode=1348 reward=0.7937199 (488.66 it/sec) -training >> step=8087300, episode=1349 reward=0.7942202 (112.77 it/sec) -training >> step=8087400, episode=1349 reward=0.7800214 (539.06 it/sec) -training >> step=8087500, episode=1349 reward=0.7812551 (556.57 it/sec) -training >> step=8087600, episode=1349 reward=0.7843689 (529.17 it/sec) -training >> step=8087700, episode=1349 reward=0.8055856 (530.41 it/sec) -training >> step=8087800, episode=1349 reward=0.7847233 (477.28 it/sec) -training >> step=8087900, episode=1349 reward=0.7937819 (536.60 it/sec) -training >> step=8088000, episode=1349 reward=0.7710016 (586.75 it/sec) -training >> step=8088100, episode=1349 reward=0.7780521 (537.11 it/sec) -training >> step=8088200, episode=1349 reward=0.8032995 (520.03 it/sec) -training >> step=8088300, episode=1349 reward=0.7985832 (558.83 it/sec) -training >> step=8088400, episode=1349 reward=0.7858885 (527.49 it/sec) -training >> step=8088500, episode=1349 reward=0.7868297 (524.64 it/sec) -training >> step=8088600, episode=1349 reward=0.8022097 (545.40 it/sec) -training >> step=8088700, episode=1349 reward=0.7961733 (541.92 it/sec) -training >> step=8088800, episode=1349 reward=0.7705026 (457.48 it/sec) -training >> step=8088900, episode=1349 reward=0.779775 (518.93 it/sec) -training >> step=8089000, episode=1349 reward=0.7929085 (516.62 it/sec) -training >> step=8089100, episode=1349 reward=0.8031936 (548.93 it/sec) -training >> step=8089200, episode=1349 reward=0.7907459 (517.45 it/sec) -training >> step=8089300, episode=1349 reward=0.7963006 (522.16 it/sec) -training >> step=8089400, episode=1349 reward=0.7849692 (530.68 it/sec) -training >> step=8089500, episode=1349 reward=0.7984979 (501.63 it/sec) -training >> step=8089600, episode=1349 reward=0.7875616 (528.01 it/sec) -training >> step=8089700, episode=1349 reward=0.8033217 (480.38 it/sec) -training >> step=8089800, episode=1349 reward=0.7660809 (390.93 it/sec) -training >> step=8089900, episode=1349 reward=0.7728495 (536.65 it/sec) -training >> step=8090000, episode=1349 reward=0.8063108 (509.37 it/sec) -training >> step=8090100, episode=1349 reward=0.75232 (550.57 it/sec) -training >> step=8090200, episode=1349 reward=0.7973683 (511.92 it/sec) -training >> step=8090300, episode=1349 reward=0.8000019 (485.74 it/sec) -training >> step=8090400, episode=1349 reward=0.7905103 (496.80 it/sec) -training >> step=8090500, episode=1349 reward=0.7820416 (500.14 it/sec) -training >> step=8090600, episode=1349 reward=0.8041693 (440.43 it/sec) -training >> step=8090700, episode=1349 reward=0.7912037 (442.60 it/sec) -training >> step=8090800, episode=1349 reward=0.7845976 (460.92 it/sec) -training >> step=8090900, episode=1349 reward=0.7768143 (481.48 it/sec) -training >> step=8091000, episode=1349 reward=0.7756421 (468.63 it/sec) -training >> step=8091100, episode=1349 reward=0.7843809 (515.12 it/sec) -training >> step=8091200, episode=1349 reward=0.8186284 (518.10 it/sec) -training >> step=8091300, episode=1349 reward=0.7909271 (512.95 it/sec) -training >> step=8091400, episode=1349 reward=0.8002803 (509.96 it/sec) -training >> step=8091500, episode=1349 reward=0.7713581 (487.93 it/sec) -training >> step=8091600, episode=1349 reward=0.7773064 (547.92 it/sec) -training >> step=8091700, episode=1349 reward=0.8100521 (513.30 it/sec) -training >> step=8091800, episode=1349 reward=0.8006858 (495.44 it/sec) -training >> step=8091900, episode=1349 reward=0.8008946 (518.98 it/sec) -training >> step=8092000, episode=1349 reward=0.7774307 (523.45 it/sec) -training >> step=8092100, episode=1349 reward=0.7794105 (520.16 it/sec) -training >> step=8092200, episode=1349 reward=0.8120798 (515.75 it/sec) -training >> step=8092300, episode=1349 reward=0.7932099 (524.05 it/sec) -training >> step=8092400, episode=1349 reward=0.7861812 (505.34 it/sec) -training >> step=8092500, episode=1349 reward=0.7776593 (510.66 it/sec) -training >> step=8092600, episode=1349 reward=0.7997234 (501.22 it/sec) -training >> step=8092700, episode=1349 reward=0.7849153 (527.46 it/sec) -training >> step=8092800, episode=1349 reward=0.782202 (514.28 it/sec) -training >> step=8092900, episode=1349 reward=0.7597662 (523.99 it/sec) -training >> step=8093000, episode=1349 reward=0.7803519 (549.36 it/sec) -training >> step=8093100, episode=1349 reward=0.7944313 (460.78 it/sec) -training >> step=8093200, episode=1349 reward=0.7750273 (514.42 it/sec) -training >> step=8093300, episode=1350 reward=0.7889041 (116.97 it/sec) -training >> step=8093400, episode=1350 reward=0.7846689 (573.91 it/sec) -training >> step=8093500, episode=1350 reward=0.7872434 (512.48 it/sec) -training >> step=8093600, episode=1350 reward=0.7831926 (525.40 it/sec) -training >> step=8093700, episode=1350 reward=0.7897468 (512.20 it/sec) -training >> step=8093800, episode=1350 reward=0.7652432 (513.19 it/sec) -training >> step=8093900, episode=1350 reward=0.7894628 (553.12 it/sec) -training >> step=8094000, episode=1350 reward=0.8065568 (542.10 it/sec) -training >> step=8094100, episode=1350 reward=0.7758304 (549.63 it/sec) -training >> step=8094200, episode=1350 reward=0.7867882 (536.82 it/sec) -training >> step=8094300, episode=1350 reward=0.8010086 (493.23 it/sec) -training >> step=8094400, episode=1350 reward=0.8063957 (560.80 it/sec) -training >> step=8094500, episode=1350 reward=0.7784485 (552.75 it/sec) -training >> step=8094600, episode=1350 reward=0.7962933 (532.29 it/sec) -training >> step=8094700, episode=1350 reward=0.7911449 (536.46 it/sec) -training >> step=8094800, episode=1350 reward=0.7927048 (525.49 it/sec) -training >> step=8094900, episode=1350 reward=0.7924321 (491.36 it/sec) -training >> step=8095000, episode=1350 reward=0.7850194 (569.75 it/sec) -training >> step=8095100, episode=1350 reward=0.7868351 (533.37 it/sec) -training >> step=8095200, episode=1350 reward=0.7832204 (553.57 it/sec) -training >> step=8095300, episode=1350 reward=0.7638333 (529.59 it/sec) -training >> step=8095400, episode=1350 reward=0.7928494 (547.35 it/sec) -training >> step=8095500, episode=1350 reward=0.7795485 (550.89 it/sec) -training >> step=8095600, episode=1350 reward=0.7849473 (545.02 it/sec) -training >> step=8095700, episode=1350 reward=0.7981691 (544.67 it/sec) -training >> step=8095800, episode=1350 reward=0.7865354 (536.40 it/sec) -training >> step=8095900, episode=1350 reward=0.7946154 (530.27 it/sec) -training >> step=8096000, episode=1350 reward=0.8001492 (477.18 it/sec) -training >> step=8096100, episode=1350 reward=0.7934267 (356.74 it/sec) -training >> step=8096200, episode=1350 reward=0.7833128 (529.96 it/sec) -training >> step=8096300, episode=1350 reward=0.7873186 (565.30 it/sec) -training >> step=8096400, episode=1350 reward=0.7833915 (520.00 it/sec) -training >> step=8096500, episode=1350 reward=0.7893323 (512.83 it/sec) -training >> step=8096600, episode=1350 reward=0.8083973 (537.08 it/sec) -training >> step=8096700, episode=1350 reward=0.7749119 (539.51 it/sec) -training >> step=8096800, episode=1350 reward=0.7629362 (526.14 it/sec) -training >> step=8096900, episode=1350 reward=0.7784517 (510.97 it/sec) -training >> step=8097000, episode=1350 reward=0.7804374 (551.00 it/sec) -training >> step=8097100, episode=1350 reward=0.7971118 (517.03 it/sec) -training >> step=8097200, episode=1350 reward=0.7823363 (487.59 it/sec) -training >> step=8097300, episode=1350 reward=0.7899868 (529.10 it/sec) -training >> step=8097400, episode=1350 reward=0.7915412 (526.13 it/sec) -training >> step=8097500, episode=1350 reward=0.7940517 (502.53 it/sec) -training >> step=8097600, episode=1350 reward=0.7942474 (507.11 it/sec) -training >> step=8097700, episode=1350 reward=0.7833306 (534.34 it/sec) -training >> step=8097800, episode=1350 reward=0.7745265 (509.70 it/sec) -training >> step=8097900, episode=1350 reward=0.7926095 (526.14 it/sec) -training >> step=8098000, episode=1350 reward=0.7919402 (494.72 it/sec) -training >> step=8098100, episode=1350 reward=0.7990226 (530.89 it/sec) -training >> step=8098200, episode=1350 reward=0.7810545 (540.38 it/sec) -training >> step=8098300, episode=1350 reward=0.78696 (525.09 it/sec) -training >> step=8098400, episode=1350 reward=0.8009673 (508.28 it/sec) -training >> step=8098500, episode=1350 reward=0.8121185 (508.53 it/sec) -training >> step=8098600, episode=1350 reward=0.806249 (411.27 it/sec) -training >> step=8098700, episode=1350 reward=0.7984366 (478.60 it/sec) -training >> step=8098800, episode=1350 reward=0.7704279 (479.41 it/sec) -training >> step=8098900, episode=1350 reward=0.7997267 (475.45 it/sec) -training >> step=8099000, episode=1350 reward=0.7674203 (445.12 it/sec) -training >> step=8099100, episode=1350 reward=0.7908488 (467.85 it/sec) -training >> step=8099200, episode=1350 reward=0.7766684 (482.44 it/sec) -training >> step=8099300, episode=1351 reward=0.78013 (97.93 it/sec) -training >> step=8099400, episode=1351 reward=0.7765665 (520.46 it/sec) -training >> step=8099500, episode=1351 reward=0.7747533 (551.50 it/sec) -training >> step=8099600, episode=1351 reward=0.7965031 (488.56 it/sec) -training >> step=8099700, episode=1351 reward=0.7818196 (526.34 it/sec) -training >> step=8099800, episode=1351 reward=0.7797629 (559.87 it/sec) -training >> step=8099900, episode=1351 reward=0.7604657 (532.75 it/sec) -training >> step=8100000, episode=1351 reward=0.7728854 (531.65 it/sec) -training >> step=8100100, episode=1351 reward=0.8065736 (529.39 it/sec) -training >> step=8100200, episode=1351 reward=0.7726386 (541.49 it/sec) -training >> step=8100300, episode=1351 reward=0.7877749 (500.02 it/sec) -training >> step=8100400, episode=1351 reward=0.7800725 (520.79 it/sec) -training >> step=8100500, episode=1351 reward=0.7636623 (537.98 it/sec) -training >> step=8100600, episode=1351 reward=0.7841508 (506.84 it/sec) -training >> step=8100700, episode=1351 reward=0.7824982 (483.77 it/sec) -training >> step=8100800, episode=1351 reward=0.7743825 (505.29 it/sec) -training >> step=8100900, episode=1351 reward=0.8096041 (498.91 it/sec) -training >> step=8101000, episode=1351 reward=0.7955062 (518.37 it/sec) -training >> step=8101100, episode=1351 reward=0.780888 (522.32 it/sec) -training >> step=8101200, episode=1351 reward=0.781469 (505.91 it/sec) -training >> step=8101300, episode=1351 reward=0.7950801 (526.32 it/sec) -training >> step=8101400, episode=1351 reward=0.7915806 (517.81 it/sec) -training >> step=8101500, episode=1351 reward=0.7959569 (532.59 it/sec) -training >> step=8101600, episode=1351 reward=0.7837917 (500.89 it/sec) -training >> step=8101700, episode=1351 reward=0.7947425 (503.45 it/sec) -training >> step=8101800, episode=1351 reward=0.8029745 (517.88 it/sec) -training >> step=8101900, episode=1351 reward=0.7785768 (530.72 it/sec) -training >> step=8102000, episode=1351 reward=0.7859811 (522.78 it/sec) -training >> step=8102100, episode=1351 reward=0.7882402 (510.03 it/sec) -training >> step=8102200, episode=1351 reward=0.7989263 (393.37 it/sec) -training >> step=8102300, episode=1351 reward=0.7838927 (514.30 it/sec) -training >> step=8102400, episode=1351 reward=0.7928405 (540.40 it/sec) -training >> step=8102500, episode=1351 reward=0.7953184 (529.01 it/sec) -training >> step=8102600, episode=1351 reward=0.793866 (547.48 it/sec) -training >> step=8102700, episode=1351 reward=0.7597773 (534.77 it/sec) -training >> step=8102800, episode=1351 reward=0.8002372 (497.48 it/sec) -training >> step=8102900, episode=1351 reward=0.7813582 (518.32 it/sec) -training >> step=8103000, episode=1351 reward=0.7945751 (516.59 it/sec) -training >> step=8103100, episode=1351 reward=0.7868596 (522.99 it/sec) -training >> step=8103200, episode=1351 reward=0.7882928 (522.86 it/sec) -training >> step=8103300, episode=1351 reward=0.7773263 (483.78 it/sec) -training >> step=8103400, episode=1351 reward=0.7953019 (469.48 it/sec) -training >> step=8103500, episode=1351 reward=0.794275 (491.76 it/sec) -training >> step=8103600, episode=1351 reward=0.7563709 (503.34 it/sec) -training >> step=8103700, episode=1351 reward=0.7877719 (539.98 it/sec) -training >> step=8103800, episode=1351 reward=0.7988538 (503.68 it/sec) -training >> step=8103900, episode=1351 reward=0.791934 (517.47 it/sec) -training >> step=8104000, episode=1351 reward=0.7822761 (520.71 it/sec) -training >> step=8104100, episode=1351 reward=0.7802132 (522.12 it/sec) -training >> step=8104200, episode=1351 reward=0.7906968 (521.64 it/sec) -training >> step=8104300, episode=1351 reward=0.8061225 (497.83 it/sec) -training >> step=8104400, episode=1351 reward=0.7868991 (542.82 it/sec) -training >> step=8104500, episode=1351 reward=0.7840728 (437.44 it/sec) -training >> step=8104600, episode=1351 reward=0.7686877 (478.20 it/sec) -training >> step=8104700, episode=1351 reward=0.7995912 (529.83 it/sec) -training >> step=8104800, episode=1351 reward=0.7847478 (486.35 it/sec) -training >> step=8104900, episode=1351 reward=0.791119 (478.35 it/sec) -training >> step=8105000, episode=1351 reward=0.7678818 (494.24 it/sec) -training >> step=8105100, episode=1351 reward=0.7939683 (531.31 it/sec) -training >> step=8105200, episode=1351 reward=0.7820188 (493.65 it/sec) -training >> step=8105300, episode=1352 reward=0.8002923 (109.54 it/sec) -training >> step=8105400, episode=1352 reward=0.789748 (542.30 it/sec) -training >> step=8105500, episode=1352 reward=0.7836445 (542.40 it/sec) -training >> step=8105600, episode=1352 reward=0.7882068 (534.06 it/sec) -training >> step=8105700, episode=1352 reward=0.780955 (509.74 it/sec) -training >> step=8105800, episode=1352 reward=0.7751301 (553.32 it/sec) -training >> step=8105900, episode=1352 reward=0.7832194 (543.67 it/sec) -training >> step=8106000, episode=1352 reward=0.7802063 (533.01 it/sec) -training >> step=8106100, episode=1352 reward=0.796367 (515.17 it/sec) -training >> step=8106200, episode=1352 reward=0.7933567 (562.30 it/sec) -training >> step=8106300, episode=1352 reward=0.7820006 (548.62 it/sec) -training >> step=8106400, episode=1352 reward=0.7889535 (517.33 it/sec) -training >> step=8106500, episode=1352 reward=0.7928072 (566.64 it/sec) -training >> step=8106600, episode=1352 reward=0.7854968 (514.67 it/sec) -training >> step=8106700, episode=1352 reward=0.7866737 (530.77 it/sec) -training >> step=8106800, episode=1352 reward=0.8071777 (509.56 it/sec) -training >> step=8106900, episode=1352 reward=0.7708595 (536.92 it/sec) -training >> step=8107000, episode=1352 reward=0.7955559 (492.36 it/sec) -training >> step=8107100, episode=1352 reward=0.808473 (533.14 it/sec) -training >> step=8107200, episode=1352 reward=0.7698599 (484.81 it/sec) -training >> step=8107300, episode=1352 reward=0.7926221 (553.90 it/sec) -training >> step=8107400, episode=1352 reward=0.7887988 (526.46 it/sec) -training >> step=8107500, episode=1352 reward=0.7899099 (515.08 it/sec) -training >> step=8107600, episode=1352 reward=0.7824492 (529.00 it/sec) -training >> step=8107700, episode=1352 reward=0.8019552 (520.56 it/sec) -training >> step=8107800, episode=1352 reward=0.7906157 (514.37 it/sec) -training >> step=8107900, episode=1352 reward=0.7803018 (513.97 it/sec) -training >> step=8108000, episode=1352 reward=0.7886835 (549.73 it/sec) -training >> step=8108100, episode=1352 reward=0.7801354 (496.80 it/sec) -training >> step=8108200, episode=1352 reward=0.7835378 (505.79 it/sec) -training >> step=8108300, episode=1352 reward=0.7736182 (474.36 it/sec) -training >> step=8108400, episode=1352 reward=0.7742799 (408.48 it/sec) -training >> step=8108500, episode=1352 reward=0.7888687 (536.67 it/sec) -training >> step=8108600, episode=1352 reward=0.7787263 (515.40 it/sec) -training >> step=8108700, episode=1352 reward=0.7624317 (557.71 it/sec) -training >> step=8108800, episode=1352 reward=0.8006377 (522.29 it/sec) -training >> step=8108900, episode=1352 reward=0.7768934 (500.30 it/sec) -training >> step=8109000, episode=1352 reward=0.7639441 (508.71 it/sec) -training >> step=8109100, episode=1352 reward=0.7701213 (517.37 it/sec) -training >> step=8109200, episode=1352 reward=0.7804133 (481.57 it/sec) -training >> step=8109300, episode=1352 reward=0.7891571 (474.66 it/sec) -training >> step=8109400, episode=1352 reward=0.8055822 (517.63 it/sec) -training >> step=8109500, episode=1352 reward=0.7988442 (513.48 it/sec) -training >> step=8109600, episode=1352 reward=0.7919461 (490.05 it/sec) -training >> step=8109700, episode=1352 reward=0.7816111 (510.19 it/sec) -training >> step=8109800, episode=1352 reward=0.7773296 (499.99 it/sec) -training >> step=8109900, episode=1352 reward=0.7708308 (474.14 it/sec) -training >> step=8110000, episode=1352 reward=0.7728513 (512.18 it/sec) -training >> step=8110100, episode=1352 reward=0.7918229 (513.76 it/sec) -training >> step=8110200, episode=1352 reward=0.787139 (561.32 it/sec) -training >> step=8110300, episode=1352 reward=0.7864836 (472.54 it/sec) -training >> step=8110400, episode=1352 reward=0.7971285 (466.03 it/sec) -training >> step=8110500, episode=1352 reward=0.7990807 (452.79 it/sec) -training >> step=8110600, episode=1352 reward=0.78302 (453.92 it/sec) -training >> step=8110700, episode=1352 reward=0.7714413 (482.90 it/sec) -training >> step=8110800, episode=1352 reward=0.7782544 (451.63 it/sec) -training >> step=8110900, episode=1352 reward=0.7634028 (516.59 it/sec) -training >> step=8111000, episode=1352 reward=0.7903033 (460.16 it/sec) -training >> step=8111100, episode=1352 reward=0.782631 (493.08 it/sec) -training >> step=8111200, episode=1352 reward=0.777186 (519.51 it/sec) -training >> step=8111300, episode=1353 reward=0.7883381 (119.20 it/sec) -training >> step=8111400, episode=1353 reward=0.7921112 (529.23 it/sec) -training >> step=8111500, episode=1353 reward=0.7939337 (511.47 it/sec) -training >> step=8111600, episode=1353 reward=0.7738674 (522.55 it/sec) -training >> step=8111700, episode=1353 reward=0.7817582 (527.96 it/sec) -training >> step=8111800, episode=1353 reward=0.7951911 (533.05 it/sec) -training >> step=8111900, episode=1353 reward=0.7793705 (557.82 it/sec) -training >> step=8112000, episode=1353 reward=0.7985237 (524.05 it/sec) -training >> step=8112100, episode=1353 reward=0.7728379 (550.34 it/sec) -training >> step=8112200, episode=1353 reward=0.8071036 (505.60 it/sec) -training >> step=8112300, episode=1353 reward=0.7923989 (548.52 it/sec) -training >> step=8112400, episode=1353 reward=0.7955725 (532.00 it/sec) -training >> step=8112500, episode=1353 reward=0.7881451 (539.69 it/sec) -training >> step=8112600, episode=1353 reward=0.7878494 (534.27 it/sec) -training >> step=8112700, episode=1353 reward=0.7995039 (566.25 it/sec) -training >> step=8112800, episode=1353 reward=0.7916489 (528.69 it/sec) -training >> step=8112900, episode=1353 reward=0.7855885 (468.74 it/sec) -training >> step=8113000, episode=1353 reward=0.767796 (436.20 it/sec) -training >> step=8113100, episode=1353 reward=0.7993031 (513.79 it/sec) -training >> step=8113200, episode=1353 reward=0.7892318 (524.09 it/sec) -training >> step=8113300, episode=1353 reward=0.7887664 (497.94 it/sec) -training >> step=8113400, episode=1353 reward=0.7996961 (540.05 it/sec) -training >> step=8113500, episode=1353 reward=0.770758 (556.35 it/sec) -training >> step=8113600, episode=1353 reward=0.8045419 (499.26 it/sec) -training >> step=8113700, episode=1353 reward=0.7849896 (542.95 it/sec) -training >> step=8113800, episode=1353 reward=0.7894888 (540.73 it/sec) -training >> step=8113900, episode=1353 reward=0.7941419 (509.27 it/sec) -training >> step=8114000, episode=1353 reward=0.7691244 (548.76 it/sec) -training >> step=8114100, episode=1353 reward=0.7942381 (534.13 it/sec) -training >> step=8114200, episode=1353 reward=0.7953436 (510.48 it/sec) -training >> step=8114300, episode=1353 reward=0.8023388 (504.12 it/sec) -training >> step=8114400, episode=1353 reward=0.77319 (390.42 it/sec) -training >> step=8114500, episode=1353 reward=0.7894506 (574.50 it/sec) -training >> step=8114600, episode=1353 reward=0.7788834 (515.13 it/sec) -training >> step=8114700, episode=1353 reward=0.787442 (529.49 it/sec) -training >> step=8114800, episode=1353 reward=0.7794386 (536.43 it/sec) -training >> step=8114900, episode=1353 reward=0.7802254 (549.67 it/sec) -training >> step=8115000, episode=1353 reward=0.7751541 (525.19 it/sec) -training >> step=8115100, episode=1353 reward=0.7992419 (503.05 it/sec) -training >> step=8115200, episode=1353 reward=0.7741112 (503.01 it/sec) -training >> step=8115300, episode=1353 reward=0.7872435 (541.10 it/sec) -training >> step=8115400, episode=1353 reward=0.780082 (516.42 it/sec) -training >> step=8115500, episode=1353 reward=0.7904174 (511.90 it/sec) -training >> step=8115600, episode=1353 reward=0.7845389 (564.84 it/sec) -training >> step=8115700, episode=1353 reward=0.7944819 (513.98 it/sec) -training >> step=8115800, episode=1353 reward=0.7936343 (504.57 it/sec) -training >> step=8115900, episode=1353 reward=0.7784432 (512.71 it/sec) -training >> step=8116000, episode=1353 reward=0.7706623 (525.88 it/sec) -training >> step=8116100, episode=1353 reward=0.7818801 (521.55 it/sec) -training >> step=8116200, episode=1353 reward=0.7983747 (529.62 it/sec) -training >> step=8116300, episode=1353 reward=0.7845286 (514.09 it/sec) -training >> step=8116400, episode=1353 reward=0.7900105 (514.78 it/sec) -training >> step=8116500, episode=1353 reward=0.7920818 (523.57 it/sec) -training >> step=8116600, episode=1353 reward=0.7920357 (525.17 it/sec) -training >> step=8116700, episode=1353 reward=0.7700772 (555.70 it/sec) -training >> step=8116800, episode=1353 reward=0.7846658 (507.98 it/sec) -training >> step=8116900, episode=1353 reward=0.7738659 (488.62 it/sec) -training >> step=8117000, episode=1353 reward=0.7805163 (484.28 it/sec) -training >> step=8117100, episode=1353 reward=0.7898296 (509.28 it/sec) -training >> step=8117200, episode=1353 reward=0.80151 (509.86 it/sec) -training >> step=8117300, episode=1354 reward=0.766317 (118.52 it/sec) -training >> step=8117400, episode=1354 reward=0.7939897 (542.35 it/sec) -training >> step=8117500, episode=1354 reward=0.7760097 (530.09 it/sec) -training >> step=8117600, episode=1354 reward=0.790968 (516.01 it/sec) -training >> step=8117700, episode=1354 reward=0.7657497 (522.82 it/sec) -training >> step=8117800, episode=1354 reward=0.7764838 (506.58 it/sec) -training >> step=8117900, episode=1354 reward=0.8018147 (492.89 it/sec) -training >> step=8118000, episode=1354 reward=0.7956502 (511.98 it/sec) -training >> step=8118100, episode=1354 reward=0.7765043 (553.12 it/sec) -training >> step=8118200, episode=1354 reward=0.8029572 (538.43 it/sec) -training >> step=8118300, episode=1354 reward=0.7856041 (539.71 it/sec) -training >> step=8118400, episode=1354 reward=0.7859839 (538.16 it/sec) -training >> step=8118500, episode=1354 reward=0.7920979 (556.87 it/sec) -training >> step=8118600, episode=1354 reward=0.7908038 (504.44 it/sec) -training >> step=8118700, episode=1354 reward=0.7949572 (543.44 it/sec) -training >> step=8118800, episode=1354 reward=0.7863057 (521.82 it/sec) -training >> step=8118900, episode=1354 reward=0.7671509 (566.72 it/sec) -training >> step=8119000, episode=1354 reward=0.7977685 (549.09 it/sec) -training >> step=8119100, episode=1354 reward=0.7941154 (518.26 it/sec) -training >> step=8119200, episode=1354 reward=0.8105059 (496.48 it/sec) -training >> step=8119300, episode=1354 reward=0.7859306 (528.94 it/sec) -training >> step=8119400, episode=1354 reward=0.7969649 (493.42 it/sec) -training >> step=8119500, episode=1354 reward=0.7882414 (494.56 it/sec) -training >> step=8119600, episode=1354 reward=0.7834462 (528.25 it/sec) -training >> step=8119700, episode=1354 reward=0.7913181 (479.71 it/sec) -training >> step=8119800, episode=1354 reward=0.7760016 (464.84 it/sec) -training >> step=8119900, episode=1354 reward=0.7960194 (443.48 it/sec) -training >> step=8120000, episode=1354 reward=0.7696999 (530.75 it/sec) -training >> step=8120100, episode=1354 reward=0.7621498 (515.82 it/sec) -training >> step=8120200, episode=1354 reward=0.7824091 (500.52 it/sec) -training >> step=8120300, episode=1354 reward=0.7752184 (519.08 it/sec) -training >> step=8120400, episode=1354 reward=0.7836831 (502.89 it/sec) -training >> step=8120500, episode=1354 reward=0.771732 (410.53 it/sec) -training >> step=8120600, episode=1354 reward=0.7918599 (540.99 it/sec) -training >> step=8120700, episode=1354 reward=0.7710665 (531.46 it/sec) -training >> step=8120800, episode=1354 reward=0.7840899 (515.19 it/sec) -training >> step=8120900, episode=1354 reward=0.7803557 (518.00 it/sec) -training >> step=8121000, episode=1354 reward=0.7864919 (515.45 it/sec) -training >> step=8121100, episode=1354 reward=0.8154333 (537.55 it/sec) -training >> step=8121200, episode=1354 reward=0.7801419 (504.92 it/sec) -training >> step=8121300, episode=1354 reward=0.8082346 (507.61 it/sec) -training >> step=8121400, episode=1354 reward=0.7840526 (523.37 it/sec) -training >> step=8121500, episode=1354 reward=0.7861878 (514.83 it/sec) -training >> step=8121600, episode=1354 reward=0.7973824 (533.83 it/sec) -training >> step=8121700, episode=1354 reward=0.7821204 (524.06 it/sec) -training >> step=8121800, episode=1354 reward=0.7808986 (524.88 it/sec) -training >> step=8121900, episode=1354 reward=0.7826658 (508.53 it/sec) -training >> step=8122000, episode=1354 reward=0.7807864 (509.37 it/sec) -training >> step=8122100, episode=1354 reward=0.7808213 (536.07 it/sec) -training >> step=8122200, episode=1354 reward=0.7975177 (520.32 it/sec) -training >> step=8122300, episode=1354 reward=0.7894208 (492.03 it/sec) -training >> step=8122400, episode=1354 reward=0.7740007 (529.26 it/sec) -training >> step=8122500, episode=1354 reward=0.7872232 (537.17 it/sec) -training >> step=8122600, episode=1354 reward=0.796369 (462.68 it/sec) -training >> step=8122700, episode=1354 reward=0.804934 (463.26 it/sec) -training >> step=8122800, episode=1354 reward=0.789241 (461.42 it/sec) -training >> step=8122900, episode=1354 reward=0.7922089 (502.40 it/sec) -training >> step=8123000, episode=1354 reward=0.7783474 (441.33 it/sec) -training >> step=8123100, episode=1354 reward=0.7869642 (430.82 it/sec) -training >> step=8123200, episode=1354 reward=0.7662539 (483.51 it/sec) -training >> step=8123300, episode=1355 reward=0.7829172 (118.19 it/sec) -training >> step=8123400, episode=1355 reward=0.7971896 (512.69 it/sec) -training >> step=8123500, episode=1355 reward=0.8002419 (532.11 it/sec) -training >> step=8123600, episode=1355 reward=0.7814528 (541.91 it/sec) -training >> step=8123700, episode=1355 reward=0.7989164 (542.99 it/sec) -training >> step=8123800, episode=1355 reward=0.7915109 (522.12 it/sec) -training >> step=8123900, episode=1355 reward=0.8019822 (540.98 it/sec) -training >> step=8124000, episode=1355 reward=0.7943918 (555.40 it/sec) -training >> step=8124100, episode=1355 reward=0.7979937 (518.65 it/sec) -training >> step=8124200, episode=1355 reward=0.7870017 (533.99 it/sec) -training >> step=8124300, episode=1355 reward=0.7886074 (542.20 it/sec) -training >> step=8124400, episode=1355 reward=0.7988344 (537.11 it/sec) -training >> step=8124500, episode=1355 reward=0.7947615 (536.46 it/sec) -training >> step=8124600, episode=1355 reward=0.7747589 (526.02 it/sec) -training >> step=8124700, episode=1355 reward=0.7983357 (559.00 it/sec) -training >> step=8124800, episode=1355 reward=0.7775854 (530.99 it/sec) -training >> step=8124900, episode=1355 reward=0.784082 (514.77 it/sec) -training >> step=8125000, episode=1355 reward=0.779758 (559.62 it/sec) -training >> step=8125100, episode=1355 reward=0.8018526 (552.84 it/sec) -training >> step=8125200, episode=1355 reward=0.7927941 (516.43 it/sec) -training >> step=8125300, episode=1355 reward=0.7766072 (523.84 it/sec) -training >> step=8125400, episode=1355 reward=0.7909222 (551.56 it/sec) -training >> step=8125500, episode=1355 reward=0.793205 (537.17 it/sec) -training >> step=8125600, episode=1355 reward=0.8109204 (544.69 it/sec) -training >> step=8125700, episode=1355 reward=0.7868116 (517.06 it/sec) -training >> step=8125800, episode=1355 reward=0.7826273 (558.93 it/sec) -training >> step=8125900, episode=1355 reward=0.7687396 (529.44 it/sec) -training >> step=8126000, episode=1355 reward=0.7897725 (531.94 it/sec) -training >> step=8126100, episode=1355 reward=0.7830186 (531.03 it/sec) -training >> step=8126200, episode=1355 reward=0.7815197 (546.53 it/sec) -training >> step=8126300, episode=1355 reward=0.7962064 (502.74 it/sec) -training >> step=8126400, episode=1355 reward=0.8050153 (496.45 it/sec) -training >> step=8126500, episode=1355 reward=0.7909623 (515.90 it/sec) -training >> step=8126600, episode=1355 reward=0.8155736 (408.10 it/sec) -training >> step=8126700, episode=1355 reward=0.7784024 (545.26 it/sec) -training >> step=8126800, episode=1355 reward=0.7919123 (527.98 it/sec) -training >> step=8126900, episode=1355 reward=0.7932658 (541.48 it/sec) -training >> step=8127000, episode=1355 reward=0.7861332 (505.60 it/sec) -training >> step=8127100, episode=1355 reward=0.7976703 (503.25 it/sec) -training >> step=8127200, episode=1355 reward=0.7813772 (554.14 it/sec) -training >> step=8127300, episode=1355 reward=0.7654929 (515.35 it/sec) -training >> step=8127400, episode=1355 reward=0.7866333 (517.43 it/sec) -training >> step=8127500, episode=1355 reward=0.7841846 (509.31 it/sec) -training >> step=8127600, episode=1355 reward=0.792582 (533.03 it/sec) -training >> step=8127700, episode=1355 reward=0.7797225 (506.82 it/sec) -training >> step=8127800, episode=1355 reward=0.7595239 (532.59 it/sec) -training >> step=8127900, episode=1355 reward=0.7784499 (456.43 it/sec) -training >> step=8128000, episode=1355 reward=0.7988753 (486.97 it/sec) -training >> step=8128100, episode=1355 reward=0.7898681 (502.20 it/sec) -training >> step=8128200, episode=1355 reward=0.7981974 (518.44 it/sec) -training >> step=8128300, episode=1355 reward=0.7881191 (530.51 it/sec) -training >> step=8128400, episode=1355 reward=0.7697302 (500.28 it/sec) -training >> step=8128500, episode=1355 reward=0.7886235 (497.55 it/sec) -training >> step=8128600, episode=1355 reward=0.7742959 (500.59 it/sec) -training >> step=8128700, episode=1355 reward=0.7885231 (543.94 it/sec) -training >> step=8128800, episode=1355 reward=0.7932819 (534.08 it/sec) -training >> step=8128900, episode=1355 reward=0.7758366 (475.21 it/sec) -training >> step=8129000, episode=1355 reward=0.8000904 (525.97 it/sec) -training >> step=8129100, episode=1355 reward=0.792696 (533.62 it/sec) -training >> step=8129200, episode=1355 reward=0.7912178 (522.51 it/sec) -training >> step=8129300, episode=1356 reward=0.7905923 (111.18 it/sec) -training >> step=8129400, episode=1356 reward=0.7833845 (564.57 it/sec) -training >> step=8129500, episode=1356 reward=0.7968028 (562.36 it/sec) -training >> step=8129600, episode=1356 reward=0.7890705 (529.06 it/sec) -training >> step=8129700, episode=1356 reward=0.7825727 (528.05 it/sec) -training >> step=8129800, episode=1356 reward=0.8064801 (542.38 it/sec) -training >> step=8129900, episode=1356 reward=0.7927285 (537.53 it/sec) -training >> step=8130000, episode=1356 reward=0.8085203 (517.99 it/sec) -training >> step=8130100, episode=1356 reward=0.7891676 (577.12 it/sec) -training >> step=8130200, episode=1356 reward=0.7904023 (510.79 it/sec) -training >> step=8130300, episode=1356 reward=0.7935196 (510.63 it/sec) -training >> step=8130400, episode=1356 reward=0.7693676 (516.01 it/sec) -training >> step=8130500, episode=1356 reward=0.8059998 (555.92 it/sec) -training >> step=8130600, episode=1356 reward=0.7947219 (540.05 it/sec) -training >> step=8130700, episode=1356 reward=0.7612785 (489.05 it/sec) -training >> step=8130800, episode=1356 reward=0.7878453 (548.30 it/sec) -training >> step=8130900, episode=1356 reward=0.7861344 (539.27 it/sec) -training >> step=8131000, episode=1356 reward=0.7832164 (524.86 it/sec) -training >> step=8131100, episode=1356 reward=0.7945368 (536.11 it/sec) -training >> step=8131200, episode=1356 reward=0.7906808 (580.65 it/sec) -training >> step=8131300, episode=1356 reward=0.7904568 (509.74 it/sec) -training >> step=8131400, episode=1356 reward=0.7776155 (532.53 it/sec) -training >> step=8131500, episode=1356 reward=0.7942443 (521.11 it/sec) -training >> step=8131600, episode=1356 reward=0.7902467 (569.77 it/sec) -training >> step=8131700, episode=1356 reward=0.7837763 (555.62 it/sec) -training >> step=8131800, episode=1356 reward=0.781786 (514.05 it/sec) -training >> step=8131900, episode=1356 reward=0.7913058 (561.06 it/sec) -training >> step=8132000, episode=1356 reward=0.8104621 (497.44 it/sec) -training >> step=8132100, episode=1356 reward=0.747866 (506.23 it/sec) -training >> step=8132200, episode=1356 reward=0.7918375 (524.38 it/sec) -training >> step=8132300, episode=1356 reward=0.7767996 (545.51 it/sec) -training >> step=8132400, episode=1356 reward=0.7752818 (482.30 it/sec) -training >> step=8132500, episode=1356 reward=0.7874829 (506.99 it/sec) -training >> step=8132600, episode=1356 reward=0.7931408 (371.52 it/sec) -training >> step=8132700, episode=1356 reward=0.7809066 (544.13 it/sec) -training >> step=8132800, episode=1356 reward=0.776798 (437.54 it/sec) -training >> step=8132900, episode=1356 reward=0.7800731 (475.11 it/sec) -training >> step=8133000, episode=1356 reward=0.7947284 (526.73 it/sec) -training >> step=8133100, episode=1356 reward=0.7804621 (509.06 it/sec) -training >> step=8133200, episode=1356 reward=0.7736012 (523.64 it/sec) -training >> step=8133300, episode=1356 reward=0.7847477 (525.67 it/sec) -training >> step=8133400, episode=1356 reward=0.7892565 (524.58 it/sec) -training >> step=8133500, episode=1356 reward=0.7897723 (486.57 it/sec) -training >> step=8133600, episode=1356 reward=0.7881889 (521.15 it/sec) -training >> step=8133700, episode=1356 reward=0.7873129 (485.44 it/sec) -training >> step=8133800, episode=1356 reward=0.7935503 (536.93 it/sec) -training >> step=8133900, episode=1356 reward=0.7774954 (502.80 it/sec) -training >> step=8134000, episode=1356 reward=0.7683154 (477.38 it/sec) -training >> step=8134100, episode=1356 reward=0.7808684 (503.12 it/sec) -training >> step=8134200, episode=1356 reward=0.7894402 (500.56 it/sec) -training >> step=8134300, episode=1356 reward=0.8097248 (478.47 it/sec) -training >> step=8134400, episode=1356 reward=0.7788651 (492.70 it/sec) -training >> step=8134500, episode=1356 reward=0.7962242 (545.71 it/sec) -training >> step=8134600, episode=1356 reward=0.790282 (443.63 it/sec) -training >> step=8134700, episode=1356 reward=0.7910748 (435.24 it/sec) -training >> step=8134800, episode=1356 reward=0.8012888 (488.15 it/sec) -training >> step=8134900, episode=1356 reward=0.7873541 (472.87 it/sec) -training >> step=8135000, episode=1356 reward=0.7987924 (474.93 it/sec) -training >> step=8135100, episode=1356 reward=0.7941695 (409.75 it/sec) -training >> step=8135200, episode=1356 reward=0.784978 (505.75 it/sec) -training >> step=8135300, episode=1357 reward=0.8032377 (110.62 it/sec) -training >> step=8135400, episode=1357 reward=0.7765288 (545.17 it/sec) -training >> step=8135500, episode=1357 reward=0.7918354 (517.96 it/sec) -training >> step=8135600, episode=1357 reward=0.7954876 (531.55 it/sec) -training >> step=8135700, episode=1357 reward=0.7724267 (528.88 it/sec) -training >> step=8135800, episode=1357 reward=0.7890936 (544.10 it/sec) -training >> step=8135900, episode=1357 reward=0.8003071 (547.40 it/sec) -training >> step=8136000, episode=1357 reward=0.7719241 (570.06 it/sec) -training >> step=8136100, episode=1357 reward=0.7814747 (528.88 it/sec) -training >> step=8136200, episode=1357 reward=0.7895879 (527.64 it/sec) -training >> step=8136300, episode=1357 reward=0.7935909 (516.47 it/sec) -training >> step=8136400, episode=1357 reward=0.7961459 (529.24 it/sec) -training >> step=8136500, episode=1357 reward=0.7899897 (542.81 it/sec) -training >> step=8136600, episode=1357 reward=0.8111982 (510.85 it/sec) -training >> step=8136700, episode=1357 reward=0.7926841 (561.59 it/sec) -training >> step=8136800, episode=1357 reward=0.7918919 (510.28 it/sec) -training >> step=8136900, episode=1357 reward=0.8077269 (518.22 it/sec) -training >> step=8137000, episode=1357 reward=0.7956259 (565.73 it/sec) -training >> step=8137100, episode=1357 reward=0.7913585 (550.98 it/sec) -training >> step=8137200, episode=1357 reward=0.770675 (508.51 it/sec) -training >> step=8137300, episode=1357 reward=0.7738536 (522.17 it/sec) -training >> step=8137400, episode=1357 reward=0.8069587 (516.59 it/sec) -training >> step=8137500, episode=1357 reward=0.7980136 (465.70 it/sec) -training >> step=8137600, episode=1357 reward=0.7768372 (498.78 it/sec) -training >> step=8137700, episode=1357 reward=0.7803005 (493.42 it/sec) -training >> step=8137800, episode=1357 reward=0.7906107 (522.19 it/sec) -training >> step=8137900, episode=1357 reward=0.793869 (489.28 it/sec) -training >> step=8138000, episode=1357 reward=0.7967678 (487.84 it/sec) -training >> step=8138100, episode=1357 reward=0.7867911 (553.24 it/sec) -training >> step=8138200, episode=1357 reward=0.7702328 (525.07 it/sec) -training >> step=8138300, episode=1357 reward=0.7969574 (509.13 it/sec) -training >> step=8138400, episode=1357 reward=0.7954882 (484.19 it/sec) -training >> step=8138500, episode=1357 reward=0.7946256 (548.37 it/sec) -training >> step=8138600, episode=1357 reward=0.7957407 (514.46 it/sec) -training >> step=8138700, episode=1357 reward=0.7959329 (422.17 it/sec) -training >> step=8138800, episode=1357 reward=0.8028863 (537.86 it/sec) -training >> step=8138900, episode=1357 reward=0.7896999 (539.37 it/sec) -training >> step=8139000, episode=1357 reward=0.786616 (516.81 it/sec) -training >> step=8139100, episode=1357 reward=0.795424 (523.28 it/sec) -training >> step=8139200, episode=1357 reward=0.7982034 (534.78 it/sec) -training >> step=8139300, episode=1357 reward=0.7862221 (509.97 it/sec) -training >> step=8139400, episode=1357 reward=0.7703692 (497.14 it/sec) -training >> step=8139500, episode=1357 reward=0.7965475 (492.97 it/sec) -training >> step=8139600, episode=1357 reward=0.79163 (547.98 it/sec) -training >> step=8139700, episode=1357 reward=0.7888821 (518.53 it/sec) -training >> step=8139800, episode=1357 reward=0.7842087 (513.04 it/sec) -training >> step=8139900, episode=1357 reward=0.7829523 (510.83 it/sec) -training >> step=8140000, episode=1357 reward=0.7987751 (538.15 it/sec) -training >> step=8140100, episode=1357 reward=0.7653872 (479.93 it/sec) -training >> step=8140200, episode=1357 reward=0.7698399 (498.43 it/sec) -training >> step=8140300, episode=1357 reward=0.7943959 (535.83 it/sec) -training >> step=8140400, episode=1357 reward=0.8076096 (520.02 it/sec) -training >> step=8140500, episode=1357 reward=0.7944742 (494.73 it/sec) -training >> step=8140600, episode=1357 reward=0.797915 (512.11 it/sec) -training >> step=8140700, episode=1357 reward=0.7964687 (554.77 it/sec) -training >> step=8140800, episode=1357 reward=0.7875373 (511.43 it/sec) -training >> step=8140900, episode=1357 reward=0.7893751 (485.55 it/sec) -training >> step=8141000, episode=1357 reward=0.7895305 (507.29 it/sec) -training >> step=8141100, episode=1357 reward=0.8024175 (527.15 it/sec) -training >> step=8141200, episode=1357 reward=0.7922659 (525.98 it/sec) -training >> step=8141300, episode=1358 reward=0.7993755 (112.66 it/sec) -training >> step=8141400, episode=1358 reward=0.781131 (566.34 it/sec) -training >> step=8141500, episode=1358 reward=0.7891409 (525.06 it/sec) -training >> step=8141600, episode=1358 reward=0.7977788 (496.33 it/sec) -training >> step=8141700, episode=1358 reward=0.7834805 (509.67 it/sec) -training >> step=8141800, episode=1358 reward=0.7965585 (558.58 it/sec) -training >> step=8141900, episode=1358 reward=0.7882547 (519.25 it/sec) -training >> step=8142000, episode=1358 reward=0.790208 (503.35 it/sec) -training >> step=8142100, episode=1358 reward=0.8040063 (558.03 it/sec) -training >> step=8142200, episode=1358 reward=0.7895045 (516.15 it/sec) -training >> step=8142300, episode=1358 reward=0.7996761 (503.11 it/sec) -training >> step=8142400, episode=1358 reward=0.7952954 (450.73 it/sec) -training >> step=8142500, episode=1358 reward=0.7904941 (560.45 it/sec) -training >> step=8142600, episode=1358 reward=0.796414 (492.58 it/sec) -training >> step=8142700, episode=1358 reward=0.7703878 (479.18 it/sec) -training >> step=8142800, episode=1358 reward=0.7772737 (521.46 it/sec) -training >> step=8142900, episode=1358 reward=0.8106351 (565.33 it/sec) -training >> step=8143000, episode=1358 reward=0.783599 (519.51 it/sec) -training >> step=8143100, episode=1358 reward=0.7727274 (515.43 it/sec) -training >> step=8143200, episode=1358 reward=0.7989069 (526.00 it/sec) -training >> step=8143300, episode=1358 reward=0.7734976 (531.51 it/sec) -training >> step=8143400, episode=1358 reward=0.7996916 (521.24 it/sec) -training >> step=8143500, episode=1358 reward=0.7769166 (517.69 it/sec) -training >> step=8143600, episode=1358 reward=0.7913017 (558.86 it/sec) -training >> step=8143700, episode=1358 reward=0.7838688 (495.00 it/sec) -training >> step=8143800, episode=1358 reward=0.792114 (529.09 it/sec) -training >> step=8143900, episode=1358 reward=0.7924615 (506.37 it/sec) -training >> step=8144000, episode=1358 reward=0.7845864 (583.86 it/sec) -training >> step=8144100, episode=1358 reward=0.7739353 (547.07 it/sec) -training >> step=8144200, episode=1358 reward=0.797237 (511.40 it/sec) -training >> step=8144300, episode=1358 reward=0.7714416 (554.81 it/sec) -training >> step=8144400, episode=1358 reward=0.7916076 (490.41 it/sec) -training >> step=8144500, episode=1358 reward=0.791047 (542.88 it/sec) -training >> step=8144600, episode=1358 reward=0.7940055 (526.01 it/sec) -training >> step=8144700, episode=1358 reward=0.7842797 (565.40 it/sec) -training >> step=8144800, episode=1358 reward=0.7925057 (505.72 it/sec) -training >> step=8144900, episode=1358 reward=0.7918512 (374.58 it/sec) -training >> step=8145000, episode=1358 reward=0.7900637 (502.38 it/sec) -training >> step=8145100, episode=1358 reward=0.8025622 (572.41 it/sec) -training >> step=8145200, episode=1358 reward=0.8025457 (550.08 it/sec) -training >> step=8145300, episode=1358 reward=0.7882341 (549.15 it/sec) -training >> step=8145400, episode=1358 reward=0.8050812 (543.54 it/sec) -training >> step=8145500, episode=1358 reward=0.7585908 (514.61 it/sec) -training >> step=8145600, episode=1358 reward=0.7934767 (543.87 it/sec) -training >> step=8145700, episode=1358 reward=0.8051419 (522.45 it/sec) -training >> step=8145800, episode=1358 reward=0.7851583 (553.79 it/sec) -training >> step=8145900, episode=1358 reward=0.7934561 (518.61 it/sec) -training >> step=8146000, episode=1358 reward=0.7955161 (467.13 it/sec) -training >> step=8146100, episode=1358 reward=0.8125509 (507.34 it/sec) -training >> step=8146200, episode=1358 reward=0.7925799 (532.07 it/sec) -training >> step=8146300, episode=1358 reward=0.780798 (525.72 it/sec) -training >> step=8146400, episode=1358 reward=0.7733465 (503.62 it/sec) -training >> step=8146500, episode=1358 reward=0.7868881 (535.06 it/sec) -training >> step=8146600, episode=1358 reward=0.7641698 (470.19 it/sec) -training >> step=8146700, episode=1358 reward=0.7742197 (524.81 it/sec) -training >> step=8146800, episode=1358 reward=0.7832297 (521.53 it/sec) -training >> step=8146900, episode=1358 reward=0.765319 (533.19 it/sec) -training >> step=8147000, episode=1358 reward=0.789602 (502.28 it/sec) -training >> step=8147100, episode=1358 reward=0.7765517 (475.44 it/sec) -training >> step=8147200, episode=1358 reward=0.7714672 (502.68 it/sec) -training >> step=8147300, episode=1359 reward=0.7803476 (96.99 it/sec) -training >> step=8147400, episode=1359 reward=0.7895562 (537.14 it/sec) -training >> step=8147500, episode=1359 reward=0.7719225 (516.45 it/sec) -training >> step=8147600, episode=1359 reward=0.7949566 (559.96 it/sec) -training >> step=8147700, episode=1359 reward=0.8019066 (494.82 it/sec) -training >> step=8147800, episode=1359 reward=0.7770087 (538.43 it/sec) -training >> step=8147900, episode=1359 reward=0.7745246 (542.34 it/sec) -training >> step=8148000, episode=1359 reward=0.7741914 (494.45 it/sec) -training >> step=8148100, episode=1359 reward=0.7962461 (499.78 it/sec) -training >> step=8148200, episode=1359 reward=0.7864989 (516.61 it/sec) -training >> step=8148300, episode=1359 reward=0.7827698 (486.74 it/sec) -training >> step=8148400, episode=1359 reward=0.7968943 (506.90 it/sec) -training >> step=8148500, episode=1359 reward=0.8207833 (499.16 it/sec) -training >> step=8148600, episode=1359 reward=0.7926348 (525.88 it/sec) -training >> step=8148700, episode=1359 reward=0.7887726 (507.86 it/sec) -training >> step=8148800, episode=1359 reward=0.7848006 (480.46 it/sec) -training >> step=8148900, episode=1359 reward=0.7862481 (468.55 it/sec) -training >> step=8149000, episode=1359 reward=0.789018 (468.45 it/sec) -training >> step=8149100, episode=1359 reward=0.8097538 (494.34 it/sec) -training >> step=8149200, episode=1359 reward=0.7968713 (499.42 it/sec) -training >> step=8149300, episode=1359 reward=0.7766308 (536.21 it/sec) -training >> step=8149400, episode=1359 reward=0.7970579 (495.98 it/sec) -training >> step=8149500, episode=1359 reward=0.7944372 (525.56 it/sec) -training >> step=8149600, episode=1359 reward=0.783268 (494.14 it/sec) -training >> step=8149700, episode=1359 reward=0.7912886 (545.10 it/sec) -training >> step=8149800, episode=1359 reward=0.7919415 (511.51 it/sec) -training >> step=8149900, episode=1359 reward=0.7788272 (494.81 it/sec) -training >> step=8150000, episode=1359 reward=0.7885138 (531.22 it/sec) -training >> step=8150100, episode=1359 reward=0.7678549 (532.39 it/sec) -training >> step=8150200, episode=1359 reward=0.766377 (509.40 it/sec) -training >> step=8150300, episode=1359 reward=0.7777383 (499.33 it/sec) -training >> step=8150400, episode=1359 reward=0.7834553 (546.64 it/sec) -training >> step=8150500, episode=1359 reward=0.7827216 (505.45 it/sec) -training >> step=8150600, episode=1359 reward=0.7739063 (530.45 it/sec) -training >> step=8150700, episode=1359 reward=0.7884928 (508.80 it/sec) -training >> step=8150800, episode=1359 reward=0.803553 (544.45 it/sec) -training >> step=8150900, episode=1359 reward=0.7965264 (532.90 it/sec) -training >> step=8151000, episode=1359 reward=0.7726385 (387.64 it/sec) -training >> step=8151100, episode=1359 reward=0.7912171 (582.05 it/sec) -training >> step=8151200, episode=1359 reward=0.7965544 (527.27 it/sec) -training >> step=8151300, episode=1359 reward=0.7856672 (544.88 it/sec) -training >> step=8151400, episode=1359 reward=0.789887 (504.14 it/sec) -training >> step=8151500, episode=1359 reward=0.7973131 (544.95 it/sec) -training >> step=8151600, episode=1359 reward=0.7785393 (509.73 it/sec) -training >> step=8151700, episode=1359 reward=0.7876634 (523.17 it/sec) -training >> step=8151800, episode=1359 reward=0.7873583 (506.00 it/sec) -training >> step=8151900, episode=1359 reward=0.7768261 (486.96 it/sec) -training >> step=8152000, episode=1359 reward=0.7931842 (497.83 it/sec) -training >> step=8152100, episode=1359 reward=0.7847536 (494.19 it/sec) -training >> step=8152200, episode=1359 reward=0.7809037 (516.05 it/sec) -training >> step=8152300, episode=1359 reward=0.7936316 (479.73 it/sec) -training >> step=8152400, episode=1359 reward=0.7911848 (485.43 it/sec) -training >> step=8152500, episode=1359 reward=0.7994135 (506.82 it/sec) -training >> step=8152600, episode=1359 reward=0.7867608 (545.43 it/sec) -training >> step=8152700, episode=1359 reward=0.812324 (512.18 it/sec) -training >> step=8152800, episode=1359 reward=0.7841046 (519.08 it/sec) -training >> step=8152900, episode=1359 reward=0.7850767 (489.24 it/sec) -training >> step=8153000, episode=1359 reward=0.7925205 (512.90 it/sec) -training >> step=8153100, episode=1359 reward=0.7887316 (508.83 it/sec) -training >> step=8153200, episode=1359 reward=0.7711455 (527.82 it/sec) -training >> step=8153300, episode=1360 reward=0.7756351 (111.94 it/sec) -training >> step=8153400, episode=1360 reward=0.791756 (523.58 it/sec) -training >> step=8153500, episode=1360 reward=0.7696037 (529.88 it/sec) -training >> step=8153600, episode=1360 reward=0.7781672 (496.48 it/sec) -training >> step=8153700, episode=1360 reward=0.7585245 (526.02 it/sec) -training >> step=8153800, episode=1360 reward=0.7898238 (555.12 it/sec) -training >> step=8153900, episode=1360 reward=0.8020525 (517.79 it/sec) -training >> step=8154000, episode=1360 reward=0.782055 (540.97 it/sec) -training >> step=8154100, episode=1360 reward=0.7933214 (511.45 it/sec) -training >> step=8154200, episode=1360 reward=0.8032395 (515.76 it/sec) -training >> step=8154300, episode=1360 reward=0.8129056 (527.27 it/sec) -training >> step=8154400, episode=1360 reward=0.7578838 (565.13 it/sec) -training >> step=8154500, episode=1360 reward=0.7988912 (484.02 it/sec) -training >> step=8154600, episode=1360 reward=0.7899458 (517.45 it/sec) -training >> step=8154700, episode=1360 reward=0.7887502 (504.62 it/sec) -training >> step=8154800, episode=1360 reward=0.7848372 (583.67 it/sec) -training >> step=8154900, episode=1360 reward=0.7695833 (549.96 it/sec) -training >> step=8155000, episode=1360 reward=0.7936894 (520.36 it/sec) -training >> step=8155100, episode=1360 reward=0.7833502 (539.59 it/sec) -training >> step=8155200, episode=1360 reward=0.7718658 (535.35 it/sec) -training >> step=8155300, episode=1360 reward=0.8130145 (523.27 it/sec) -training >> step=8155400, episode=1360 reward=0.7888734 (545.36 it/sec) -training >> step=8155500, episode=1360 reward=0.7916345 (571.81 it/sec) -training >> step=8155600, episode=1360 reward=0.7921695 (513.81 it/sec) -training >> step=8155700, episode=1360 reward=0.7854117 (522.68 it/sec) -training >> step=8155800, episode=1360 reward=0.7837947 (497.29 it/sec) -training >> step=8155900, episode=1360 reward=0.7875537 (559.92 it/sec) -training >> step=8156000, episode=1360 reward=0.787681 (541.35 it/sec) -training >> step=8156100, episode=1360 reward=0.7877864 (538.00 it/sec) -training >> step=8156200, episode=1360 reward=0.780319 (524.22 it/sec) -training >> step=8156300, episode=1360 reward=0.7835782 (525.68 it/sec) -training >> step=8156400, episode=1360 reward=0.7903671 (541.03 it/sec) -training >> step=8156500, episode=1360 reward=0.7958778 (532.88 it/sec) -training >> step=8156600, episode=1360 reward=0.7710084 (543.83 it/sec) -training >> step=8156700, episode=1360 reward=0.7958201 (519.01 it/sec) -training >> step=8156800, episode=1360 reward=0.7935572 (500.17 it/sec) -training >> step=8156900, episode=1360 reward=0.798184 (550.05 it/sec) -training >> step=8157000, episode=1360 reward=0.7950376 (543.67 it/sec) -training >> step=8157100, episode=1360 reward=0.7734288 (426.32 it/sec) -training >> step=8157200, episode=1360 reward=0.7808211 (512.03 it/sec) -training >> step=8157300, episode=1360 reward=0.7974944 (538.36 it/sec) -training >> step=8157400, episode=1360 reward=0.7921486 (490.85 it/sec) -training >> step=8157500, episode=1360 reward=0.790391 (520.50 it/sec) -training >> step=8157600, episode=1360 reward=0.7954792 (532.32 it/sec) -training >> step=8157700, episode=1360 reward=0.7968457 (568.85 it/sec) -training >> step=8157800, episode=1360 reward=0.7833096 (532.95 it/sec) -training >> step=8157900, episode=1360 reward=0.7759103 (541.40 it/sec) -training >> step=8158000, episode=1360 reward=0.8107497 (506.95 it/sec) -training >> step=8158100, episode=1360 reward=0.7932684 (550.85 it/sec) -training >> step=8158200, episode=1360 reward=0.8000402 (546.34 it/sec) -training >> step=8158300, episode=1360 reward=0.7936884 (525.31 it/sec) -training >> step=8158400, episode=1360 reward=0.7695074 (531.71 it/sec) -training >> step=8158500, episode=1360 reward=0.7764397 (433.56 it/sec) -training >> step=8158600, episode=1360 reward=0.8099247 (508.11 it/sec) -training >> step=8158700, episode=1360 reward=0.7823243 (532.00 it/sec) -training >> step=8158800, episode=1360 reward=0.788306 (543.74 it/sec) -training >> step=8158900, episode=1360 reward=0.7856925 (511.20 it/sec) -training >> step=8159000, episode=1360 reward=0.7833483 (514.70 it/sec) -training >> step=8159100, episode=1360 reward=0.7826328 (516.89 it/sec) -training >> step=8159200, episode=1360 reward=0.7711536 (477.56 it/sec) -training >> step=8159300, episode=1361 reward=0.7731647 (100.39 it/sec) -training >> step=8159400, episode=1361 reward=0.7801425 (542.68 it/sec) -training >> step=8159500, episode=1361 reward=0.7802947 (525.08 it/sec) -training >> step=8159600, episode=1361 reward=0.7853665 (486.08 it/sec) -training >> step=8159700, episode=1361 reward=0.7990202 (499.12 it/sec) -training >> step=8159800, episode=1361 reward=0.7910315 (531.29 it/sec) -training >> step=8159900, episode=1361 reward=0.7983802 (563.73 it/sec) -training >> step=8160000, episode=1361 reward=0.7824845 (554.67 it/sec) -training >> step=8160100, episode=1361 reward=0.7867298 (515.43 it/sec) -training >> step=8160200, episode=1361 reward=0.8044462 (522.90 it/sec) -training >> step=8160300, episode=1361 reward=0.7933183 (533.28 it/sec) -training >> step=8160400, episode=1361 reward=0.7762498 (530.70 it/sec) -training >> step=8160500, episode=1361 reward=0.7825537 (536.26 it/sec) -training >> step=8160600, episode=1361 reward=0.7821779 (539.92 it/sec) -training >> step=8160700, episode=1361 reward=0.8028774 (503.24 it/sec) -training >> step=8160800, episode=1361 reward=0.8004434 (488.44 it/sec) -training >> step=8160900, episode=1361 reward=0.7995737 (551.63 it/sec) -training >> step=8161000, episode=1361 reward=0.7902874 (559.30 it/sec) -training >> step=8161100, episode=1361 reward=0.795602 (516.13 it/sec) -training >> step=8161200, episode=1361 reward=0.7781216 (480.68 it/sec) -training >> step=8161300, episode=1361 reward=0.7913288 (539.22 it/sec) -training >> step=8161400, episode=1361 reward=0.7690298 (497.27 it/sec) -training >> step=8161500, episode=1361 reward=0.7695725 (536.50 it/sec) -training >> step=8161600, episode=1361 reward=0.7814751 (548.20 it/sec) -training >> step=8161700, episode=1361 reward=0.7817773 (539.28 it/sec) -training >> step=8161800, episode=1361 reward=0.798512 (516.43 it/sec) -training >> step=8161900, episode=1361 reward=0.7899199 (500.41 it/sec) -training >> step=8162000, episode=1361 reward=0.781131 (498.96 it/sec) -training >> step=8162100, episode=1361 reward=0.7947299 (516.55 it/sec) -training >> step=8162200, episode=1361 reward=0.781377 (496.62 it/sec) -training >> step=8162300, episode=1361 reward=0.7990304 (510.64 it/sec) -training >> step=8162400, episode=1361 reward=0.7952822 (525.28 it/sec) -training >> step=8162500, episode=1361 reward=0.7924641 (497.69 it/sec) -training >> step=8162600, episode=1361 reward=0.790291 (502.91 it/sec) -training >> step=8162700, episode=1361 reward=0.7745252 (542.62 it/sec) -training >> step=8162800, episode=1361 reward=0.7883889 (542.73 it/sec) -training >> step=8162900, episode=1361 reward=0.7863141 (514.37 it/sec) -training >> step=8163000, episode=1361 reward=0.7900367 (499.55 it/sec) -training >> step=8163100, episode=1361 reward=0.7853973 (378.95 it/sec) -training >> step=8163200, episode=1361 reward=0.784204 (526.40 it/sec) -training >> step=8163300, episode=1361 reward=0.7979546 (470.26 it/sec) -training >> step=8163400, episode=1361 reward=0.7793211 (499.26 it/sec) -training >> step=8163500, episode=1361 reward=0.7664556 (512.95 it/sec) -training >> step=8163600, episode=1361 reward=0.8014706 (420.94 it/sec) -training >> step=8163700, episode=1361 reward=0.8009048 (452.00 it/sec) -training >> step=8163800, episode=1361 reward=0.78426 (504.44 it/sec) -training >> step=8163900, episode=1361 reward=0.7901403 (513.07 it/sec) -training >> step=8164000, episode=1361 reward=0.7749913 (507.50 it/sec) -training >> step=8164100, episode=1361 reward=0.78319 (536.99 it/sec) -training >> step=8164200, episode=1361 reward=0.7747565 (535.88 it/sec) -training >> step=8164300, episode=1361 reward=0.787037 (501.49 it/sec) -training >> step=8164400, episode=1361 reward=0.778565 (516.83 it/sec) -training >> step=8164500, episode=1361 reward=0.7832488 (502.96 it/sec) -training >> step=8164600, episode=1361 reward=0.7844291 (489.15 it/sec) -training >> step=8164700, episode=1361 reward=0.7707658 (516.89 it/sec) -training >> step=8164800, episode=1361 reward=0.7933063 (504.48 it/sec) -training >> step=8164900, episode=1361 reward=0.8079637 (501.13 it/sec) -training >> step=8165000, episode=1361 reward=0.8081803 (543.29 it/sec) -training >> step=8165100, episode=1361 reward=0.7789823 (533.39 it/sec) -training >> step=8165200, episode=1361 reward=0.7919105 (522.01 it/sec) -training >> step=8165300, episode=1362 reward=0.7807736 (120.78 it/sec) -training >> step=8165400, episode=1362 reward=0.788163 (544.17 it/sec) -training >> step=8165500, episode=1362 reward=0.8073643 (526.73 it/sec) -training >> step=8165600, episode=1362 reward=0.7834843 (522.11 it/sec) -training >> step=8165700, episode=1362 reward=0.776652 (541.75 it/sec) -training >> step=8165800, episode=1362 reward=0.7805427 (547.17 it/sec) -training >> step=8165900, episode=1362 reward=0.7946271 (536.25 it/sec) -training >> step=8166000, episode=1362 reward=0.782687 (548.10 it/sec) -training >> step=8166100, episode=1362 reward=0.781001 (501.67 it/sec) -training >> step=8166200, episode=1362 reward=0.8081442 (512.15 it/sec) -training >> step=8166300, episode=1362 reward=0.8024336 (532.09 it/sec) -training >> step=8166400, episode=1362 reward=0.8025607 (529.71 it/sec) -training >> step=8166500, episode=1362 reward=0.77054 (553.89 it/sec) -training >> step=8166600, episode=1362 reward=0.781378 (497.40 it/sec) -training >> step=8166700, episode=1362 reward=0.8092451 (509.61 it/sec) -training >> step=8166800, episode=1362 reward=0.7940499 (514.90 it/sec) -training >> step=8166900, episode=1362 reward=0.793628 (519.67 it/sec) -training >> step=8167000, episode=1362 reward=0.7731003 (548.77 it/sec) -training >> step=8167100, episode=1362 reward=0.7975597 (558.59 it/sec) -training >> step=8167200, episode=1362 reward=0.7830698 (499.50 it/sec) -training >> step=8167300, episode=1362 reward=0.7945829 (543.21 it/sec) -training >> step=8167400, episode=1362 reward=0.797607 (540.43 it/sec) -training >> step=8167500, episode=1362 reward=0.8135422 (559.09 it/sec) -training >> step=8167600, episode=1362 reward=0.7943366 (539.50 it/sec) -training >> step=8167700, episode=1362 reward=0.7908779 (525.29 it/sec) -training >> step=8167800, episode=1362 reward=0.7929556 (498.48 it/sec) -training >> step=8167900, episode=1362 reward=0.7704197 (518.35 it/sec) -training >> step=8168000, episode=1362 reward=0.7820652 (532.37 it/sec) -training >> step=8168100, episode=1362 reward=0.793153 (507.74 it/sec) -training >> step=8168200, episode=1362 reward=0.7856764 (537.14 it/sec) -training >> step=8168300, episode=1362 reward=0.8038826 (512.17 it/sec) -training >> step=8168400, episode=1362 reward=0.7781329 (486.27 it/sec) -training >> step=8168500, episode=1362 reward=0.7880638 (526.73 it/sec) -training >> step=8168600, episode=1362 reward=0.7719323 (548.25 it/sec) -training >> step=8168700, episode=1362 reward=0.7701148 (521.79 it/sec) -training >> step=8168800, episode=1362 reward=0.771713 (544.40 it/sec) -training >> step=8168900, episode=1362 reward=0.7862763 (567.51 it/sec) -training >> step=8169000, episode=1362 reward=0.7796069 (522.36 it/sec) -training >> step=8169100, episode=1362 reward=0.7783855 (531.77 it/sec) -training >> step=8169200, episode=1362 reward=0.78237 (544.87 it/sec) -training >> step=8169300, episode=1362 reward=0.8021287 (396.57 it/sec) -training >> step=8169400, episode=1362 reward=0.7904388 (537.06 it/sec) -training >> step=8169500, episode=1362 reward=0.8105101 (516.06 it/sec) -training >> step=8169600, episode=1362 reward=0.7904064 (530.66 it/sec) -training >> step=8169700, episode=1362 reward=0.7933947 (548.39 it/sec) -training >> step=8169800, episode=1362 reward=0.7810964 (538.86 it/sec) -training >> step=8169900, episode=1362 reward=0.788847 (528.23 it/sec) -training >> step=8170000, episode=1362 reward=0.7719656 (551.21 it/sec) -training >> step=8170100, episode=1362 reward=0.8011815 (534.93 it/sec) -training >> step=8170200, episode=1362 reward=0.7790471 (529.47 it/sec) -training >> step=8170300, episode=1362 reward=0.8019369 (550.70 it/sec) -training >> step=8170400, episode=1362 reward=0.7947465 (507.58 it/sec) -training >> step=8170500, episode=1362 reward=0.8067384 (513.93 it/sec) -training >> step=8170600, episode=1362 reward=0.7869652 (513.09 it/sec) -training >> step=8170700, episode=1362 reward=0.7910342 (528.40 it/sec) -training >> step=8170800, episode=1362 reward=0.7749798 (534.62 it/sec) -training >> step=8170900, episode=1362 reward=0.8036582 (522.99 it/sec) -training >> step=8171000, episode=1362 reward=0.7846931 (513.59 it/sec) -training >> step=8171100, episode=1362 reward=0.7877457 (532.11 it/sec) -training >> step=8171200, episode=1362 reward=0.8013864 (452.49 it/sec) -training >> step=8171300, episode=1363 reward=0.7757674 (92.75 it/sec) -training >> step=8171400, episode=1363 reward=0.7844253 (491.89 it/sec) -training >> step=8171500, episode=1363 reward=0.7697556 (459.64 it/sec) -training >> step=8171600, episode=1363 reward=0.7878881 (464.42 it/sec) -training >> step=8171700, episode=1363 reward=0.789898 (485.29 it/sec) -training >> step=8171800, episode=1363 reward=0.7827576 (512.15 it/sec) -training >> step=8171900, episode=1363 reward=0.8184476 (504.43 it/sec) -training >> step=8172000, episode=1363 reward=0.7869422 (489.30 it/sec) -training >> step=8172100, episode=1363 reward=0.7614748 (520.16 it/sec) -training >> step=8172200, episode=1363 reward=0.7904843 (541.03 it/sec) -training >> step=8172300, episode=1363 reward=0.7907618 (517.37 it/sec) -training >> step=8172400, episode=1363 reward=0.7859278 (535.62 it/sec) -training >> step=8172500, episode=1363 reward=0.7861214 (555.41 it/sec) -training >> step=8172600, episode=1363 reward=0.786437 (499.23 it/sec) -training >> step=8172700, episode=1363 reward=0.7873683 (522.23 it/sec) -training >> step=8172800, episode=1363 reward=0.7678915 (514.27 it/sec) -training >> step=8172900, episode=1363 reward=0.8064455 (550.11 it/sec) -training >> step=8173000, episode=1363 reward=0.7797817 (516.48 it/sec) -training >> step=8173100, episode=1363 reward=0.7717969 (488.84 it/sec) -training >> step=8173200, episode=1363 reward=0.8029373 (528.82 it/sec) -training >> step=8173300, episode=1363 reward=0.7882434 (530.74 it/sec) -training >> step=8173400, episode=1363 reward=0.8024168 (527.64 it/sec) -training >> step=8173500, episode=1363 reward=0.7885841 (524.64 it/sec) -training >> step=8173600, episode=1363 reward=0.7944105 (530.21 it/sec) -training >> step=8173700, episode=1363 reward=0.7961078 (487.99 it/sec) -training >> step=8173800, episode=1363 reward=0.7873794 (518.23 it/sec) -training >> step=8173900, episode=1363 reward=0.786225 (530.91 it/sec) -training >> step=8174000, episode=1363 reward=0.772064 (562.12 it/sec) -training >> step=8174100, episode=1363 reward=0.8015701 (544.86 it/sec) -training >> step=8174200, episode=1363 reward=0.7701883 (503.75 it/sec) -training >> step=8174300, episode=1363 reward=0.7877173 (466.54 it/sec) -training >> step=8174400, episode=1363 reward=0.7924792 (502.05 it/sec) -training >> step=8174500, episode=1363 reward=0.790729 (528.62 it/sec) -training >> step=8174600, episode=1363 reward=0.7821792 (543.57 it/sec) -training >> step=8174700, episode=1363 reward=0.7921394 (527.70 it/sec) -training >> step=8174800, episode=1363 reward=0.7800282 (512.09 it/sec) -training >> step=8174900, episode=1363 reward=0.8028447 (486.49 it/sec) -training >> step=8175000, episode=1363 reward=0.7850289 (521.73 it/sec) -training >> step=8175100, episode=1363 reward=0.8031029 (562.34 it/sec) -training >> step=8175200, episode=1363 reward=0.7823573 (523.40 it/sec) -training >> step=8175300, episode=1363 reward=0.7890936 (499.31 it/sec) -training >> step=8175400, episode=1363 reward=0.7858186 (528.71 it/sec) -training >> step=8175500, episode=1363 reward=0.8002623 (498.77 it/sec) -training >> step=8175600, episode=1363 reward=0.7751473 (385.93 it/sec) -training >> step=8175700, episode=1363 reward=0.7762961 (517.56 it/sec) -training >> step=8175800, episode=1363 reward=0.7817686 (548.40 it/sec) -training >> step=8175900, episode=1363 reward=0.771156 (525.80 it/sec) -training >> step=8176000, episode=1363 reward=0.7906445 (537.33 it/sec) -training >> step=8176100, episode=1363 reward=0.806831 (549.08 it/sec) -training >> step=8176200, episode=1363 reward=0.772289 (559.38 it/sec) -training >> step=8176300, episode=1363 reward=0.7828237 (513.67 it/sec) -training >> step=8176400, episode=1363 reward=0.8074114 (520.40 it/sec) -training >> step=8176500, episode=1363 reward=0.7979099 (550.16 it/sec) -training >> step=8176600, episode=1363 reward=0.7781956 (528.30 it/sec) -training >> step=8176700, episode=1363 reward=0.7957821 (495.90 it/sec) -training >> step=8176800, episode=1363 reward=0.7977864 (497.32 it/sec) -training >> step=8176900, episode=1363 reward=0.7836795 (502.00 it/sec) -training >> step=8177000, episode=1363 reward=0.7683696 (517.32 it/sec) -training >> step=8177100, episode=1363 reward=0.7766767 (513.11 it/sec) -training >> step=8177200, episode=1363 reward=0.8023793 (556.92 it/sec) -training >> step=8177300, episode=1364 reward=0.7895918 (123.28 it/sec) -training >> step=8177400, episode=1364 reward=0.7772934 (514.97 it/sec) -training >> step=8177500, episode=1364 reward=0.7847679 (523.15 it/sec) -training >> step=8177600, episode=1364 reward=0.7929755 (523.65 it/sec) -training >> step=8177700, episode=1364 reward=0.7950176 (502.93 it/sec) -training >> step=8177800, episode=1364 reward=0.7803319 (519.35 it/sec) -training >> step=8177900, episode=1364 reward=0.7823614 (531.67 it/sec) -training >> step=8178000, episode=1364 reward=0.7833463 (518.88 it/sec) -training >> step=8178100, episode=1364 reward=0.7952448 (437.16 it/sec) -training >> step=8178200, episode=1364 reward=0.7817387 (532.75 it/sec) -training >> step=8178300, episode=1364 reward=0.8142673 (543.32 it/sec) -training >> step=8178400, episode=1364 reward=0.7818903 (541.21 it/sec) -training >> step=8178500, episode=1364 reward=0.7876814 (523.82 it/sec) -training >> step=8178600, episode=1364 reward=0.7809736 (538.91 it/sec) -training >> step=8178700, episode=1364 reward=0.777019 (532.59 it/sec) -training >> step=8178800, episode=1364 reward=0.7620094 (511.91 it/sec) -training >> step=8178900, episode=1364 reward=0.7838676 (542.95 it/sec) -training >> step=8179000, episode=1364 reward=0.790714 (571.71 it/sec) -training >> step=8179100, episode=1364 reward=0.7885094 (522.49 it/sec) -training >> step=8179200, episode=1364 reward=0.8004983 (485.57 it/sec) -training >> step=8179300, episode=1364 reward=0.7843713 (558.58 it/sec) -training >> step=8179400, episode=1364 reward=0.7732628 (547.33 it/sec) -training >> step=8179500, episode=1364 reward=0.7860367 (507.38 it/sec) -training >> step=8179600, episode=1364 reward=0.7741572 (560.76 it/sec) -training >> step=8179700, episode=1364 reward=0.7630037 (530.52 it/sec) -training >> step=8179800, episode=1364 reward=0.7952867 (484.48 it/sec) -training >> step=8179900, episode=1364 reward=0.7893703 (502.12 it/sec) -training >> step=8180000, episode=1364 reward=0.7863342 (516.52 it/sec) -training >> step=8180100, episode=1364 reward=0.7706543 (556.28 it/sec) -training >> step=8180200, episode=1364 reward=0.7692614 (547.57 it/sec) -training >> step=8180300, episode=1364 reward=0.8067757 (474.57 it/sec) -training >> step=8180400, episode=1364 reward=0.7889764 (534.60 it/sec) -training >> step=8180500, episode=1364 reward=0.7765483 (529.85 it/sec) -training >> step=8180600, episode=1364 reward=0.7772709 (556.78 it/sec) -training >> step=8180700, episode=1364 reward=0.7695672 (540.24 it/sec) -training >> step=8180800, episode=1364 reward=0.7889087 (518.46 it/sec) -training >> step=8180900, episode=1364 reward=0.8051909 (503.12 it/sec) -training >> step=8181000, episode=1364 reward=0.7925913 (512.71 it/sec) -training >> step=8181100, episode=1364 reward=0.8014922 (539.91 it/sec) -training >> step=8181200, episode=1364 reward=0.8084353 (542.03 it/sec) -training >> step=8181300, episode=1364 reward=0.7962448 (505.53 it/sec) -training >> step=8181400, episode=1364 reward=0.7932996 (492.79 it/sec) -training >> step=8181500, episode=1364 reward=0.7907283 (534.21 it/sec) -training >> step=8181600, episode=1364 reward=0.7865547 (460.54 it/sec) -training >> step=8181700, episode=1364 reward=0.7909819 (349.37 it/sec) -training >> step=8181800, episode=1364 reward=0.7783857 (464.80 it/sec) -training >> step=8181900, episode=1364 reward=0.7859663 (512.89 it/sec) -training >> step=8182000, episode=1364 reward=0.7920392 (505.50 it/sec) -training >> step=8182100, episode=1364 reward=0.7628812 (515.34 it/sec) -training >> step=8182200, episode=1364 reward=0.7882625 (560.11 it/sec) -training >> step=8182300, episode=1364 reward=0.7954876 (508.74 it/sec) -training >> step=8182400, episode=1364 reward=0.7848548 (512.18 it/sec) -training >> step=8182500, episode=1364 reward=0.8014939 (510.22 it/sec) -training >> step=8182600, episode=1364 reward=0.7690518 (517.30 it/sec) -training >> step=8182700, episode=1364 reward=0.7981223 (540.30 it/sec) -training >> step=8182800, episode=1364 reward=0.778906 (522.35 it/sec) -training >> step=8182900, episode=1364 reward=0.7829385 (500.70 it/sec) -training >> step=8183000, episode=1364 reward=0.772374 (519.96 it/sec) -training >> step=8183100, episode=1364 reward=0.7828748 (514.95 it/sec) -training >> step=8183200, episode=1364 reward=0.7999428 (551.96 it/sec) -training >> step=8183300, episode=1365 reward=0.796682 (116.26 it/sec) -training >> step=8183400, episode=1365 reward=0.7826325 (506.66 it/sec) -training >> step=8183500, episode=1365 reward=0.7890779 (522.23 it/sec) -training >> step=8183600, episode=1365 reward=0.8036584 (477.19 it/sec) -training >> step=8183700, episode=1365 reward=0.8012224 (542.04 it/sec) -training >> step=8183800, episode=1365 reward=0.8011237 (519.65 it/sec) -training >> step=8183900, episode=1365 reward=0.7939011 (499.77 it/sec) -training >> step=8184000, episode=1365 reward=0.775534 (561.59 it/sec) -training >> step=8184100, episode=1365 reward=0.8029941 (468.84 it/sec) -training >> step=8184200, episode=1365 reward=0.7847865 (512.11 it/sec) -training >> step=8184300, episode=1365 reward=0.7926451 (503.52 it/sec) -training >> step=8184400, episode=1365 reward=0.791079 (545.14 it/sec) -training >> step=8184500, episode=1365 reward=0.7933207 (540.69 it/sec) -training >> step=8184600, episode=1365 reward=0.7828292 (514.13 it/sec) -training >> step=8184700, episode=1365 reward=0.780795 (508.64 it/sec) -training >> step=8184800, episode=1365 reward=0.7961005 (519.47 it/sec) -training >> step=8184900, episode=1365 reward=0.7948812 (539.75 it/sec) -training >> step=8185000, episode=1365 reward=0.7806466 (511.97 it/sec) -training >> step=8185100, episode=1365 reward=0.7832362 (535.31 it/sec) -training >> step=8185200, episode=1365 reward=0.7955583 (497.42 it/sec) -training >> step=8185300, episode=1365 reward=0.8021025 (497.56 it/sec) -training >> step=8185400, episode=1365 reward=0.8016284 (515.44 it/sec) -training >> step=8185500, episode=1365 reward=0.7886015 (523.18 it/sec) -training >> step=8185600, episode=1365 reward=0.7833514 (505.85 it/sec) -training >> step=8185700, episode=1365 reward=0.8074371 (434.00 it/sec) -training >> step=8185800, episode=1365 reward=0.805146 (504.03 it/sec) -training >> step=8185900, episode=1365 reward=0.7806138 (506.17 it/sec) -training >> step=8186000, episode=1365 reward=0.8138561 (501.34 it/sec) -training >> step=8186100, episode=1365 reward=0.7595916 (448.44 it/sec) -training >> step=8186200, episode=1365 reward=0.795479 (477.64 it/sec) -training >> step=8186300, episode=1365 reward=0.7808849 (441.79 it/sec) -training >> step=8186400, episode=1365 reward=0.7750766 (401.63 it/sec) -training >> step=8186500, episode=1365 reward=0.8060902 (497.48 it/sec) -training >> step=8186600, episode=1365 reward=0.7719613 (418.98 it/sec) -training >> step=8186700, episode=1365 reward=0.7765736 (433.97 it/sec) -training >> step=8186800, episode=1365 reward=0.7808992 (475.25 it/sec) -training >> step=8186900, episode=1365 reward=0.781821 (496.14 it/sec) -training >> step=8187000, episode=1365 reward=0.7719505 (429.88 it/sec) -training >> step=8187100, episode=1365 reward=0.7812621 (471.31 it/sec) -training >> step=8187200, episode=1365 reward=0.7920034 (442.29 it/sec) -training >> step=8187300, episode=1365 reward=0.8063959 (498.83 it/sec) -training >> step=8187400, episode=1365 reward=0.7816405 (483.12 it/sec) -training >> step=8187500, episode=1365 reward=0.7898932 (462.77 it/sec) -training >> step=8187600, episode=1365 reward=0.804453 (438.61 it/sec) -training >> step=8187700, episode=1365 reward=0.7717775 (372.70 it/sec) -training >> step=8187800, episode=1365 reward=0.7763582 (466.34 it/sec) -training >> step=8187900, episode=1365 reward=0.7919623 (485.73 it/sec) -training >> step=8188000, episode=1365 reward=0.7811093 (466.85 it/sec) -training >> step=8188100, episode=1365 reward=0.7838862 (452.08 it/sec) -training >> step=8188200, episode=1365 reward=0.8040605 (482.70 it/sec) -training >> step=8188300, episode=1365 reward=0.7863873 (488.41 it/sec) -training >> step=8188400, episode=1365 reward=0.7970877 (490.72 it/sec) -training >> step=8188500, episode=1365 reward=0.7597015 (495.41 it/sec) -training >> step=8188600, episode=1365 reward=0.7845324 (470.04 it/sec) -training >> step=8188700, episode=1365 reward=0.7860273 (441.68 it/sec) -training >> step=8188800, episode=1365 reward=0.7758417 (486.38 it/sec) -training >> step=8188900, episode=1365 reward=0.7959144 (505.11 it/sec) -training >> step=8189000, episode=1365 reward=0.7813039 (465.15 it/sec) -training >> step=8189100, episode=1365 reward=0.8073989 (469.66 it/sec) -training >> step=8189200, episode=1365 reward=0.7988423 (464.83 it/sec) -training >> step=8189300, episode=1366 reward=0.781417 (115.92 it/sec) -training >> step=8189400, episode=1366 reward=0.7940935 (443.61 it/sec) -training >> step=8189500, episode=1366 reward=0.7711817 (465.33 it/sec) -training >> step=8189600, episode=1366 reward=0.7621883 (464.96 it/sec) -training >> step=8189700, episode=1366 reward=0.7975612 (492.98 it/sec) -training >> step=8189800, episode=1366 reward=0.8023774 (486.48 it/sec) -training >> step=8189900, episode=1366 reward=0.7960467 (483.23 it/sec) -training >> step=8190000, episode=1366 reward=0.7749222 (479.91 it/sec) -training >> step=8190100, episode=1366 reward=0.7915612 (493.02 it/sec) -training >> step=8190200, episode=1366 reward=0.7815942 (502.56 it/sec) -training >> step=8190300, episode=1366 reward=0.7803329 (441.03 it/sec) -training >> step=8190400, episode=1366 reward=0.7889612 (501.90 it/sec) -training >> step=8190500, episode=1366 reward=0.8004071 (502.61 it/sec) -training >> step=8190600, episode=1366 reward=0.7920207 (459.86 it/sec) -training >> step=8190700, episode=1366 reward=0.7971215 (442.63 it/sec) -training >> step=8190800, episode=1366 reward=0.7957203 (453.35 it/sec) -training >> step=8190900, episode=1366 reward=0.7744499 (500.37 it/sec) -training >> step=8191000, episode=1366 reward=0.7897199 (479.16 it/sec) -training >> step=8191100, episode=1366 reward=0.7958068 (490.04 it/sec) -training >> step=8191200, episode=1366 reward=0.7839448 (439.73 it/sec) -training >> step=8191300, episode=1366 reward=0.7752706 (469.59 it/sec) -training >> step=8191400, episode=1366 reward=0.7840582 (484.69 it/sec) -training >> step=8191500, episode=1366 reward=0.7793166 (494.47 it/sec) -training >> step=8191600, episode=1366 reward=0.7990556 (467.50 it/sec) -training >> step=8191700, episode=1366 reward=0.7889181 (417.00 it/sec) -training >> step=8191800, episode=1366 reward=0.769307 (439.86 it/sec) -training >> step=8191900, episode=1366 reward=0.7981809 (479.03 it/sec) -training >> step=8192000, episode=1366 reward=0.7779285 (431.92 it/sec) -training >> step=8192100, episode=1366 reward=0.7775472 (426.46 it/sec) -training >> step=8192200, episode=1366 reward=0.7997109 (432.45 it/sec) -training >> step=8192300, episode=1366 reward=0.7832387 (494.57 it/sec) -training >> step=8192400, episode=1366 reward=0.7765273 (488.49 it/sec) -training >> step=8192500, episode=1366 reward=0.8014033 (483.92 it/sec) -training >> step=8192600, episode=1366 reward=0.7906788 (472.54 it/sec) -training >> step=8192700, episode=1366 reward=0.7964271 (495.09 it/sec) -training >> step=8192800, episode=1366 reward=0.7959563 (479.92 it/sec) -training >> step=8192900, episode=1366 reward=0.7782932 (514.79 it/sec) -training >> step=8193000, episode=1366 reward=0.7715643 (470.80 it/sec) -training >> step=8193100, episode=1366 reward=0.7713051 (449.82 it/sec) -training >> step=8193200, episode=1366 reward=0.788894 (499.65 it/sec) -training >> step=8193300, episode=1366 reward=0.7664875 (473.57 it/sec) -training >> step=8193400, episode=1366 reward=0.7847686 (527.08 it/sec) -training >> step=8193500, episode=1366 reward=0.796456 (492.20 it/sec) -training >> step=8193600, episode=1366 reward=0.7892246 (456.46 it/sec) -training >> step=8193700, episode=1366 reward=0.7714972 (476.27 it/sec) -training >> step=8193800, episode=1366 reward=0.7908011 (507.31 it/sec) -training >> step=8193900, episode=1366 reward=0.7862621 (363.12 it/sec) -training >> step=8194000, episode=1366 reward=0.7826722 (465.01 it/sec) -training >> step=8194100, episode=1366 reward=0.7790242 (491.96 it/sec) -training >> step=8194200, episode=1366 reward=0.7655185 (486.11 it/sec) -training >> step=8194300, episode=1366 reward=0.7619197 (498.10 it/sec) -training >> step=8194400, episode=1366 reward=0.781842 (484.73 it/sec) -training >> step=8194500, episode=1366 reward=0.7944478 (518.40 it/sec) -training >> step=8194600, episode=1366 reward=0.7923577 (479.05 it/sec) -training >> step=8194700, episode=1366 reward=0.7831079 (468.72 it/sec) -training >> step=8194800, episode=1366 reward=0.801891 (495.06 it/sec) -training >> step=8194900, episode=1366 reward=0.7787325 (478.24 it/sec) -training >> step=8195000, episode=1366 reward=0.7960854 (418.36 it/sec) -training >> step=8195100, episode=1366 reward=0.7761107 (425.30 it/sec) -training >> step=8195200, episode=1366 reward=0.7639091 (513.78 it/sec) -training >> step=8195300, episode=1367 reward=0.7737819 (113.06 it/sec) -training >> step=8195400, episode=1367 reward=0.7990584 (451.83 it/sec) -training >> step=8195500, episode=1367 reward=0.7963145 (467.85 it/sec) -training >> step=8195600, episode=1367 reward=0.7900462 (484.36 it/sec) -training >> step=8195700, episode=1367 reward=0.7964311 (477.76 it/sec) -training >> step=8195800, episode=1367 reward=0.7834916 (496.64 it/sec) -training >> step=8195900, episode=1367 reward=0.7949997 (513.25 it/sec) -training >> step=8196000, episode=1367 reward=0.8026162 (490.14 it/sec) -training >> step=8196100, episode=1367 reward=0.8060272 (439.94 it/sec) -training >> step=8196200, episode=1367 reward=0.7957214 (487.49 it/sec) -training >> step=8196300, episode=1367 reward=0.7941374 (517.74 it/sec) -training >> step=8196400, episode=1367 reward=0.786657 (476.34 it/sec) -training >> step=8196500, episode=1367 reward=0.8031828 (468.26 it/sec) -training >> step=8196600, episode=1367 reward=0.7961878 (491.44 it/sec) -training >> step=8196700, episode=1367 reward=0.7923505 (452.25 it/sec) -training >> step=8196800, episode=1367 reward=0.7813384 (475.43 it/sec) -training >> step=8196900, episode=1367 reward=0.8122131 (497.45 it/sec) -training >> step=8197000, episode=1367 reward=0.7787283 (429.40 it/sec) -training >> step=8197100, episode=1367 reward=0.7821441 (480.04 it/sec) -training >> step=8197200, episode=1367 reward=0.7856193 (462.39 it/sec) -training >> step=8197300, episode=1367 reward=0.7898701 (466.23 it/sec) -training >> step=8197400, episode=1367 reward=0.7899804 (510.54 it/sec) -training >> step=8197500, episode=1367 reward=0.7873613 (494.13 it/sec) -training >> step=8197600, episode=1367 reward=0.7851665 (452.35 it/sec) -training >> step=8197700, episode=1367 reward=0.7870581 (507.00 it/sec) -training >> step=8197800, episode=1367 reward=0.7834176 (470.68 it/sec) -training >> step=8197900, episode=1367 reward=0.7646652 (486.07 it/sec) -training >> step=8198000, episode=1367 reward=0.7752259 (489.86 it/sec) -training >> step=8198100, episode=1367 reward=0.7939688 (494.52 it/sec) -training >> step=8198200, episode=1367 reward=0.7838026 (485.65 it/sec) -training >> step=8198300, episode=1367 reward=0.7857473 (427.28 it/sec) -training >> step=8198400, episode=1367 reward=0.7844392 (467.90 it/sec) -training >> step=8198500, episode=1367 reward=0.7870832 (470.66 it/sec) -training >> step=8198600, episode=1367 reward=0.7983269 (489.32 it/sec) -training >> step=8198700, episode=1367 reward=0.7866114 (491.39 it/sec) -training >> step=8198800, episode=1367 reward=0.7715257 (510.30 it/sec) -training >> step=8198900, episode=1367 reward=0.7928721 (441.34 it/sec) -training >> step=8199000, episode=1367 reward=0.7987559 (486.41 it/sec) -training >> step=8199100, episode=1367 reward=0.7954245 (507.12 it/sec) -training >> step=8199200, episode=1367 reward=0.7772284 (493.96 it/sec) -training >> step=8199300, episode=1367 reward=0.7745377 (488.91 it/sec) -training >> step=8199400, episode=1367 reward=0.7817207 (494.26 it/sec) -training >> step=8199500, episode=1367 reward=0.7841645 (459.39 it/sec) -training >> step=8199600, episode=1367 reward=0.773312 (460.03 it/sec) -training >> step=8199700, episode=1367 reward=0.7618518 (447.90 it/sec) -training >> step=8199800, episode=1367 reward=0.7786348 (487.70 it/sec) -training >> step=8199900, episode=1367 reward=0.7677778 (482.29 it/sec) -training >> step=8200000, episode=1367 reward=0.7890958 (326.21 it/sec) -training >> step=8200100, episode=1367 reward=0.7934015 (437.25 it/sec) -training >> step=8200200, episode=1367 reward=0.7972292 (492.00 it/sec) -training >> step=8200300, episode=1367 reward=0.7993019 (469.02 it/sec) -training >> step=8200400, episode=1367 reward=0.7742085 (531.36 it/sec) -training >> step=8200500, episode=1367 reward=0.7963036 (498.08 it/sec) -training >> step=8200600, episode=1367 reward=0.7854301 (492.90 it/sec) -training >> step=8200700, episode=1367 reward=0.7868707 (473.12 it/sec) -training >> step=8200800, episode=1367 reward=0.7652634 (483.34 it/sec) -training >> step=8200900, episode=1367 reward=0.7815548 (550.38 it/sec) -training >> step=8201000, episode=1367 reward=0.8038238 (464.17 it/sec) -training >> step=8201100, episode=1367 reward=0.775394 (381.62 it/sec) -training >> step=8201200, episode=1367 reward=0.787065 (493.59 it/sec) -training >> step=8201300, episode=1368 reward=0.7959154 (137.27 it/sec) -training >> step=8201400, episode=1368 reward=0.8100539 (469.09 it/sec) -training >> step=8201500, episode=1368 reward=0.8050138 (528.76 it/sec) -training >> step=8201600, episode=1368 reward=0.7957498 (486.50 it/sec) -training >> step=8201700, episode=1368 reward=0.8048452 (508.33 it/sec) -training >> step=8201800, episode=1368 reward=0.7870042 (476.29 it/sec) -training >> step=8201900, episode=1368 reward=0.8129526 (507.65 it/sec) -training >> step=8202000, episode=1368 reward=0.8040666 (522.13 it/sec) -training >> step=8202100, episode=1368 reward=0.7899436 (517.33 it/sec) -training >> step=8202200, episode=1368 reward=0.8176273 (476.37 it/sec) -training >> step=8202300, episode=1368 reward=0.791248 (439.02 it/sec) -training >> step=8202400, episode=1368 reward=0.7979226 (458.05 it/sec) -training >> step=8202500, episode=1368 reward=0.7976004 (515.73 it/sec) -training >> step=8202600, episode=1368 reward=0.8039026 (509.90 it/sec) -training >> step=8202700, episode=1368 reward=0.8044505 (516.82 it/sec) -training >> step=8202800, episode=1368 reward=0.7822565 (524.92 it/sec) -training >> step=8202900, episode=1368 reward=0.7981297 (495.93 it/sec) -training >> step=8203000, episode=1368 reward=0.7751231 (532.80 it/sec) -training >> step=8203100, episode=1368 reward=0.7819147 (576.65 it/sec) -training >> step=8203200, episode=1368 reward=0.7893898 (508.06 it/sec) -training >> step=8203300, episode=1368 reward=0.7992376 (518.20 it/sec) -training >> step=8203400, episode=1368 reward=0.7950155 (499.15 it/sec) -training >> step=8203500, episode=1368 reward=0.799173 (532.46 it/sec) -training >> step=8203600, episode=1368 reward=0.8041016 (518.41 it/sec) -training >> step=8203700, episode=1368 reward=0.7880107 (524.05 it/sec) -training >> step=8203800, episode=1368 reward=0.807034 (520.03 it/sec) -training >> step=8203900, episode=1368 reward=0.8032598 (496.37 it/sec) -training >> step=8204000, episode=1368 reward=0.8017761 (515.86 it/sec) -training >> step=8204100, episode=1368 reward=0.7984924 (544.09 it/sec) -training >> step=8204200, episode=1368 reward=0.7847739 (564.97 it/sec) -training >> step=8204300, episode=1368 reward=0.7821674 (511.73 it/sec) -training >> step=8204400, episode=1368 reward=0.8005903 (477.48 it/sec) -training >> step=8204500, episode=1368 reward=0.7694192 (509.36 it/sec) -training >> step=8204600, episode=1368 reward=0.7773827 (520.91 it/sec) -training >> step=8204700, episode=1368 reward=0.7851651 (462.59 it/sec) -training >> step=8204800, episode=1368 reward=0.7850519 (519.21 it/sec) -training >> step=8204900, episode=1368 reward=0.7648234 (557.36 it/sec) -training >> step=8205000, episode=1368 reward=0.8025584 (476.36 it/sec) -training >> step=8205100, episode=1368 reward=0.7920469 (527.55 it/sec) -training >> step=8205200, episode=1368 reward=0.7909173 (545.22 it/sec) -training >> step=8205300, episode=1368 reward=0.7905079 (526.87 it/sec) -training >> step=8205400, episode=1368 reward=0.7796904 (532.39 it/sec) -training >> step=8205500, episode=1368 reward=0.7744989 (482.60 it/sec) -training >> step=8205600, episode=1368 reward=0.8087718 (505.10 it/sec) -training >> step=8205700, episode=1368 reward=0.7817445 (532.18 it/sec) -training >> step=8205800, episode=1368 reward=0.7802563 (507.13 it/sec) -training >> step=8205900, episode=1368 reward=0.7638339 (498.95 it/sec) -training >> step=8206000, episode=1368 reward=0.7738945 (500.09 it/sec) -training >> step=8206100, episode=1368 reward=0.7757547 (368.94 it/sec) -training >> step=8206200, episode=1368 reward=0.7694525 (494.76 it/sec) -training >> step=8206300, episode=1368 reward=0.7765094 (486.61 it/sec) -training >> step=8206400, episode=1368 reward=0.7865213 (524.17 it/sec) -training >> step=8206500, episode=1368 reward=0.7906478 (484.42 it/sec) -training >> step=8206600, episode=1368 reward=0.7758392 (465.05 it/sec) -training >> step=8206700, episode=1368 reward=0.7845758 (469.15 it/sec) -training >> step=8206800, episode=1368 reward=0.7784261 (451.68 it/sec) -training >> step=8206900, episode=1368 reward=0.7826352 (517.06 it/sec) -training >> step=8207000, episode=1368 reward=0.7491522 (480.13 it/sec) -training >> step=8207100, episode=1368 reward=0.7713704 (512.84 it/sec) -training >> step=8207200, episode=1368 reward=0.780042 (535.82 it/sec) -training >> step=8207300, episode=1369 reward=0.790031 (125.82 it/sec) -training >> step=8207400, episode=1369 reward=0.7884595 (525.29 it/sec) -training >> step=8207500, episode=1369 reward=0.7761742 (539.10 it/sec) -training >> step=8207600, episode=1369 reward=0.8136345 (526.00 it/sec) -training >> step=8207700, episode=1369 reward=0.7836016 (511.87 it/sec) -training >> step=8207800, episode=1369 reward=0.7720262 (496.68 it/sec) -training >> step=8207900, episode=1369 reward=0.8022614 (521.86 it/sec) -training >> step=8208000, episode=1369 reward=0.8102788 (533.63 it/sec) -training >> step=8208100, episode=1369 reward=0.7783194 (516.95 it/sec) -training >> step=8208200, episode=1369 reward=0.7640547 (545.85 it/sec) -training >> step=8208300, episode=1369 reward=0.7959247 (478.52 it/sec) -training >> step=8208400, episode=1369 reward=0.7862723 (548.05 it/sec) -training >> step=8208500, episode=1369 reward=0.79138 (542.85 it/sec) -training >> step=8208600, episode=1369 reward=0.79283 (521.52 it/sec) -training >> step=8208700, episode=1369 reward=0.8111358 (538.85 it/sec) -training >> step=8208800, episode=1369 reward=0.8035653 (520.80 it/sec) -training >> step=8208900, episode=1369 reward=0.7866241 (516.47 it/sec) -training >> step=8209000, episode=1369 reward=0.7920172 (522.13 it/sec) -training >> step=8209100, episode=1369 reward=0.7849505 (517.31 it/sec) -training >> step=8209200, episode=1369 reward=0.7853715 (516.25 it/sec) -training >> step=8209300, episode=1369 reward=0.8074582 (564.61 it/sec) -training >> step=8209400, episode=1369 reward=0.7933612 (482.34 it/sec) -training >> step=8209500, episode=1369 reward=0.787668 (461.25 it/sec) -training >> step=8209600, episode=1369 reward=0.7856011 (488.94 it/sec) -training >> step=8209700, episode=1369 reward=0.7954149 (459.26 it/sec) -training >> step=8209800, episode=1369 reward=0.7598577 (523.85 it/sec) -training >> step=8209900, episode=1369 reward=0.7797945 (489.22 it/sec) -training >> step=8210000, episode=1369 reward=0.7801501 (551.96 it/sec) -training >> step=8210100, episode=1369 reward=0.7875546 (508.20 it/sec) -training >> step=8210200, episode=1369 reward=0.8059684 (498.73 it/sec) -training >> step=8210300, episode=1369 reward=0.8005602 (556.19 it/sec) -training >> step=8210400, episode=1369 reward=0.7824592 (469.66 it/sec) -training >> step=8210500, episode=1369 reward=0.8078793 (520.14 it/sec) -training >> step=8210600, episode=1369 reward=0.7771304 (529.37 it/sec) -training >> step=8210700, episode=1369 reward=0.7696711 (542.61 it/sec) -training >> step=8210800, episode=1369 reward=0.7708813 (515.89 it/sec) -training >> step=8210900, episode=1369 reward=0.7859547 (514.84 it/sec) -training >> step=8211000, episode=1369 reward=0.7673147 (500.49 it/sec) -training >> step=8211100, episode=1369 reward=0.8024898 (559.46 it/sec) -training >> step=8211200, episode=1369 reward=0.7922416 (536.20 it/sec) -training >> step=8211300, episode=1369 reward=0.7730722 (528.16 it/sec) -training >> step=8211400, episode=1369 reward=0.7823811 (544.95 it/sec) -training >> step=8211500, episode=1369 reward=0.770798 (452.25 it/sec) -training >> step=8211600, episode=1369 reward=0.7596065 (539.32 it/sec) -training >> step=8211700, episode=1369 reward=0.7875308 (520.26 it/sec) -training >> step=8211800, episode=1369 reward=0.7752475 (537.61 it/sec) -training >> step=8211900, episode=1369 reward=0.7897155 (539.18 it/sec) -training >> step=8212000, episode=1369 reward=0.7857479 (492.95 it/sec) -training >> step=8212100, episode=1369 reward=0.7852728 (507.28 it/sec) -training >> step=8212200, episode=1369 reward=0.7746419 (398.21 it/sec) -training >> step=8212300, episode=1369 reward=0.7772857 (512.28 it/sec) -training >> step=8212400, episode=1369 reward=0.7680705 (530.52 it/sec) -training >> step=8212500, episode=1369 reward=0.7747216 (522.04 it/sec) -training >> step=8212600, episode=1369 reward=0.7744339 (496.50 it/sec) -training >> step=8212700, episode=1369 reward=0.7864492 (483.50 it/sec) -training >> step=8212800, episode=1369 reward=0.7740463 (487.21 it/sec) -training >> step=8212900, episode=1369 reward=0.770741 (496.05 it/sec) -training >> step=8213000, episode=1369 reward=0.802399 (497.79 it/sec) -training >> step=8213100, episode=1369 reward=0.7925051 (499.44 it/sec) -training >> step=8213200, episode=1369 reward=0.7919544 (488.50 it/sec) -training >> step=8213300, episode=1370 reward=0.7677851 (129.89 it/sec) -training >> step=8213400, episode=1370 reward=0.7747541 (524.88 it/sec) -training >> step=8213500, episode=1370 reward=0.7661979 (516.82 it/sec) -training >> step=8213600, episode=1370 reward=0.779373 (525.32 it/sec) -training >> step=8213700, episode=1370 reward=0.7822618 (553.19 it/sec) -training >> step=8213800, episode=1370 reward=0.7946478 (487.11 it/sec) -training >> step=8213900, episode=1370 reward=0.8001306 (538.01 it/sec) -training >> step=8214000, episode=1370 reward=0.7711769 (553.33 it/sec) -training >> step=8214100, episode=1370 reward=0.7816522 (492.76 it/sec) -training >> step=8214200, episode=1370 reward=0.7769161 (541.62 it/sec) -training >> step=8214300, episode=1370 reward=0.7799506 (487.81 it/sec) -training >> step=8214400, episode=1370 reward=0.7989351 (448.14 it/sec) -training >> step=8214500, episode=1370 reward=0.8012511 (543.90 it/sec) -training >> step=8214600, episode=1370 reward=0.8023211 (483.76 it/sec) -training >> step=8214700, episode=1370 reward=0.79561 (520.35 it/sec) -training >> step=8214800, episode=1370 reward=0.7841792 (522.26 it/sec) -training >> step=8214900, episode=1370 reward=0.8103925 (497.75 it/sec) -training >> step=8215000, episode=1370 reward=0.7932936 (532.72 it/sec) -training >> step=8215100, episode=1370 reward=0.7660553 (530.69 it/sec) -training >> step=8215200, episode=1370 reward=0.7825119 (525.58 it/sec) -training >> step=8215300, episode=1370 reward=0.7850618 (540.24 it/sec) -training >> step=8215400, episode=1370 reward=0.7826921 (480.17 it/sec) -training >> step=8215500, episode=1370 reward=0.768323 (527.68 it/sec) -training >> step=8215600, episode=1370 reward=0.7841029 (494.00 it/sec) -training >> step=8215700, episode=1370 reward=0.7908993 (514.59 it/sec) -training >> step=8215800, episode=1370 reward=0.80229 (553.85 it/sec) -training >> step=8215900, episode=1370 reward=0.8034118 (489.05 it/sec) -training >> step=8216000, episode=1370 reward=0.7804571 (499.82 it/sec) -training >> step=8216100, episode=1370 reward=0.7889916 (529.36 it/sec) -training >> step=8216200, episode=1370 reward=0.7711702 (533.93 it/sec) -training >> step=8216300, episode=1370 reward=0.7791103 (527.90 it/sec) -training >> step=8216400, episode=1370 reward=0.7614602 (533.51 it/sec) -training >> step=8216500, episode=1370 reward=0.7765988 (501.71 it/sec) -training >> step=8216600, episode=1370 reward=0.7844818 (546.48 it/sec) -training >> step=8216700, episode=1370 reward=0.8021558 (514.53 it/sec) -training >> step=8216800, episode=1370 reward=0.7616853 (536.70 it/sec) -training >> step=8216900, episode=1370 reward=0.7881621 (526.27 it/sec) -training >> step=8217000, episode=1370 reward=0.781251 (486.23 it/sec) -training >> step=8217100, episode=1370 reward=0.7737215 (489.89 it/sec) -training >> step=8217200, episode=1370 reward=0.7808349 (483.67 it/sec) -training >> step=8217300, episode=1370 reward=0.7838204 (544.26 it/sec) -training >> step=8217400, episode=1370 reward=0.7749515 (516.81 it/sec) -training >> step=8217500, episode=1370 reward=0.7828087 (520.19 it/sec) -training >> step=8217600, episode=1370 reward=0.7648025 (512.72 it/sec) -training >> step=8217700, episode=1370 reward=0.7894253 (535.78 it/sec) -training >> step=8217800, episode=1370 reward=0.7839386 (522.51 it/sec) -training >> step=8217900, episode=1370 reward=0.7696527 (545.70 it/sec) -training >> step=8218000, episode=1370 reward=0.7829838 (521.26 it/sec) -training >> step=8218100, episode=1370 reward=0.7732477 (496.20 it/sec) -training >> step=8218200, episode=1370 reward=0.7633462 (544.27 it/sec) -training >> step=8218300, episode=1370 reward=0.7704594 (349.85 it/sec) -training >> step=8218400, episode=1370 reward=0.7732568 (482.52 it/sec) -training >> step=8218500, episode=1370 reward=0.7715966 (494.61 it/sec) -training >> step=8218600, episode=1370 reward=0.7774358 (450.65 it/sec) -training >> step=8218700, episode=1370 reward=0.7993444 (473.87 it/sec) -training >> step=8218800, episode=1370 reward=0.7608081 (531.53 it/sec) -training >> step=8218900, episode=1370 reward=0.7879978 (514.49 it/sec) -training >> step=8219000, episode=1370 reward=0.7766577 (551.31 it/sec) -training >> step=8219100, episode=1370 reward=0.7808216 (511.15 it/sec) -training >> step=8219200, episode=1370 reward=0.7843185 (527.80 it/sec) -training >> step=8219300, episode=1371 reward=0.769861 (129.95 it/sec) -training >> step=8219400, episode=1371 reward=0.778258 (516.71 it/sec) -training >> step=8219500, episode=1371 reward=0.8025992 (495.74 it/sec) -training >> step=8219600, episode=1371 reward=0.7839556 (518.30 it/sec) -training >> step=8219700, episode=1371 reward=0.7776437 (533.63 it/sec) -training >> step=8219800, episode=1371 reward=0.7719772 (485.23 it/sec) -training >> step=8219900, episode=1371 reward=0.7827874 (505.68 it/sec) -training >> step=8220000, episode=1371 reward=0.7881367 (481.25 it/sec) -training >> step=8220100, episode=1371 reward=0.7823485 (512.77 it/sec) -training >> step=8220200, episode=1371 reward=0.7876944 (530.41 it/sec) -training >> step=8220300, episode=1371 reward=0.8094475 (496.24 it/sec) -training >> step=8220400, episode=1371 reward=0.7997601 (516.32 it/sec) -training >> step=8220500, episode=1371 reward=0.7789053 (451.04 it/sec) -training >> step=8220600, episode=1371 reward=0.8005893 (501.64 it/sec) -training >> step=8220700, episode=1371 reward=0.794673 (521.80 it/sec) -training >> step=8220800, episode=1371 reward=0.7750825 (536.04 it/sec) -training >> step=8220900, episode=1371 reward=0.7820078 (503.43 it/sec) -training >> step=8221000, episode=1371 reward=0.7904437 (534.32 it/sec) -training >> step=8221100, episode=1371 reward=0.771143 (514.52 it/sec) -training >> step=8221200, episode=1371 reward=0.783621 (534.15 it/sec) -training >> step=8221300, episode=1371 reward=0.7917767 (517.47 it/sec) -training >> step=8221400, episode=1371 reward=0.7882674 (507.02 it/sec) -training >> step=8221500, episode=1371 reward=0.7873088 (517.98 it/sec) -training >> step=8221600, episode=1371 reward=0.8009971 (526.66 it/sec) -training >> step=8221700, episode=1371 reward=0.7723739 (534.40 it/sec) -training >> step=8221800, episode=1371 reward=0.7580509 (520.37 it/sec) -training >> step=8221900, episode=1371 reward=0.7829375 (519.90 it/sec) -training >> step=8222000, episode=1371 reward=0.7838309 (511.74 it/sec) -training >> step=8222100, episode=1371 reward=0.7666647 (498.01 it/sec) -training >> step=8222200, episode=1371 reward=0.7732488 (536.99 it/sec) -training >> step=8222300, episode=1371 reward=0.7813809 (498.16 it/sec) -training >> step=8222400, episode=1371 reward=0.7859383 (516.38 it/sec) -training >> step=8222500, episode=1371 reward=0.7787447 (495.12 it/sec) -training >> step=8222600, episode=1371 reward=0.7807667 (541.50 it/sec) -training >> step=8222700, episode=1371 reward=0.7890251 (517.99 it/sec) -training >> step=8222800, episode=1371 reward=0.7636289 (526.93 it/sec) -training >> step=8222900, episode=1371 reward=0.7701238 (520.42 it/sec) -training >> step=8223000, episode=1371 reward=0.7821414 (529.35 it/sec) -training >> step=8223100, episode=1371 reward=0.7924305 (509.94 it/sec) -training >> step=8223200, episode=1371 reward=0.7834023 (535.66 it/sec) -training >> step=8223300, episode=1371 reward=0.7655385 (514.89 it/sec) -training >> step=8223400, episode=1371 reward=0.7734861 (547.66 it/sec) -training >> step=8223500, episode=1371 reward=0.7678534 (498.97 it/sec) -training >> step=8223600, episode=1371 reward=0.7707538 (516.70 it/sec) -training >> step=8223700, episode=1371 reward=0.7846358 (502.22 it/sec) -training >> step=8223800, episode=1371 reward=0.78884 (533.93 it/sec) -training >> step=8223900, episode=1371 reward=0.7875705 (501.86 it/sec) -training >> step=8224000, episode=1371 reward=0.7891873 (493.56 it/sec) -training >> step=8224100, episode=1371 reward=0.7810947 (486.36 it/sec) -training >> step=8224200, episode=1371 reward=0.7840497 (521.19 it/sec) -training >> step=8224300, episode=1371 reward=0.7655069 (372.88 it/sec) -training >> step=8224400, episode=1371 reward=0.7895511 (542.44 it/sec) -training >> step=8224500, episode=1371 reward=0.7874984 (485.46 it/sec) -training >> step=8224600, episode=1371 reward=0.7804683 (526.35 it/sec) -training >> step=8224700, episode=1371 reward=0.7798892 (517.17 it/sec) -training >> step=8224800, episode=1371 reward=0.7858858 (535.40 it/sec) -training >> step=8224900, episode=1371 reward=0.7639104 (528.04 it/sec) -training >> step=8225000, episode=1371 reward=0.7823505 (517.64 it/sec) -training >> step=8225100, episode=1371 reward=0.7880699 (510.77 it/sec) -training >> step=8225200, episode=1371 reward=0.783499 (544.11 it/sec) -training >> step=8225300, episode=1372 reward=0.7722733 (140.72 it/sec) -training >> step=8225400, episode=1372 reward=0.7676467 (517.81 it/sec) -training >> step=8225500, episode=1372 reward=0.7741614 (518.24 it/sec) -training >> step=8225600, episode=1372 reward=0.7502939 (531.11 it/sec) -training >> step=8225700, episode=1372 reward=0.8031396 (511.28 it/sec) -training >> step=8225800, episode=1372 reward=0.7989136 (510.10 it/sec) -training >> step=8225900, episode=1372 reward=0.7898173 (509.48 it/sec) -training >> step=8226000, episode=1372 reward=0.7896891 (531.90 it/sec) -training >> step=8226100, episode=1372 reward=0.7830325 (499.55 it/sec) -training >> step=8226200, episode=1372 reward=0.7900728 (527.32 it/sec) -training >> step=8226300, episode=1372 reward=0.8116166 (527.61 it/sec) -training >> step=8226400, episode=1372 reward=0.7661829 (500.86 it/sec) -training >> step=8226500, episode=1372 reward=0.7860333 (507.66 it/sec) -training >> step=8226600, episode=1372 reward=0.7994714 (533.33 it/sec) -training >> step=8226700, episode=1372 reward=0.805696 (528.09 it/sec) -training >> step=8226800, episode=1372 reward=0.7944204 (468.95 it/sec) -training >> step=8226900, episode=1372 reward=0.8076097 (504.90 it/sec) -training >> step=8227000, episode=1372 reward=0.8054562 (514.15 it/sec) -training >> step=8227100, episode=1372 reward=0.7832884 (502.98 it/sec) -training >> step=8227200, episode=1372 reward=0.7987918 (520.82 it/sec) -training >> step=8227300, episode=1372 reward=0.7914377 (528.62 it/sec) -training >> step=8227400, episode=1372 reward=0.7839449 (484.03 it/sec) -training >> step=8227500, episode=1372 reward=0.8025234 (484.39 it/sec) -training >> step=8227600, episode=1372 reward=0.7963309 (528.85 it/sec) -training >> step=8227700, episode=1372 reward=0.7672156 (534.30 it/sec) -training >> step=8227800, episode=1372 reward=0.7979525 (521.67 it/sec) -training >> step=8227900, episode=1372 reward=0.7616803 (505.66 it/sec) -training >> step=8228000, episode=1372 reward=0.7866306 (523.31 it/sec) -training >> step=8228100, episode=1372 reward=0.7833261 (498.97 it/sec) -training >> step=8228200, episode=1372 reward=0.7718745 (542.76 it/sec) -training >> step=8228300, episode=1372 reward=0.7725032 (510.10 it/sec) -training >> step=8228400, episode=1372 reward=0.7728121 (535.90 it/sec) -training >> step=8228500, episode=1372 reward=0.7757036 (485.15 it/sec) -training >> step=8228600, episode=1372 reward=0.7891977 (501.29 it/sec) -training >> step=8228700, episode=1372 reward=0.7817712 (533.05 it/sec) -training >> step=8228800, episode=1372 reward=0.7754921 (534.75 it/sec) -training >> step=8228900, episode=1372 reward=0.7739986 (534.30 it/sec) -training >> step=8229000, episode=1372 reward=0.8002939 (504.30 it/sec) -training >> step=8229100, episode=1372 reward=0.7759578 (489.39 it/sec) -training >> step=8229200, episode=1372 reward=0.7890496 (435.91 it/sec) -training >> step=8229300, episode=1372 reward=0.783926 (510.39 it/sec) -training >> step=8229400, episode=1372 reward=0.7689109 (526.81 it/sec) -training >> step=8229500, episode=1372 reward=0.7832441 (512.43 it/sec) -training >> step=8229600, episode=1372 reward=0.7806829 (511.00 it/sec) -training >> step=8229700, episode=1372 reward=0.7987606 (508.31 it/sec) -training >> step=8229800, episode=1372 reward=0.777293 (545.62 it/sec) -training >> step=8229900, episode=1372 reward=0.7924285 (513.41 it/sec) -training >> step=8230000, episode=1372 reward=0.7989761 (525.11 it/sec) -training >> step=8230100, episode=1372 reward=0.7788243 (506.67 it/sec) -training >> step=8230200, episode=1372 reward=0.7781221 (509.37 it/sec) -training >> step=8230300, episode=1372 reward=0.7658225 (527.20 it/sec) -training >> step=8230400, episode=1372 reward=0.781065 (408.99 it/sec) -training >> step=8230500, episode=1372 reward=0.7777466 (572.70 it/sec) -training >> step=8230600, episode=1372 reward=0.7966749 (474.01 it/sec) -training >> step=8230700, episode=1372 reward=0.7843317 (446.14 it/sec) -training >> step=8230800, episode=1372 reward=0.7963359 (492.99 it/sec) -training >> step=8230900, episode=1372 reward=0.7494327 (508.60 it/sec) -training >> step=8231000, episode=1372 reward=0.7656916 (488.85 it/sec) -training >> step=8231100, episode=1372 reward=0.7628769 (481.67 it/sec) -training >> step=8231200, episode=1372 reward=0.7731606 (462.32 it/sec) -training >> step=8231300, episode=1373 reward=0.7703476 (144.86 it/sec) -training >> step=8231400, episode=1373 reward=0.792758 (509.87 it/sec) -training >> step=8231500, episode=1373 reward=0.7816905 (494.99 it/sec) -training >> step=8231600, episode=1373 reward=0.7902206 (533.09 it/sec) -training >> step=8231700, episode=1373 reward=0.7692733 (526.20 it/sec) -training >> step=8231800, episode=1373 reward=0.7849516 (503.07 it/sec) -training >> step=8231900, episode=1373 reward=0.7744879 (501.09 it/sec) -training >> step=8232000, episode=1373 reward=0.7598868 (525.10 it/sec) -training >> step=8232100, episode=1373 reward=0.8013751 (532.84 it/sec) -training >> step=8232200, episode=1373 reward=0.7800746 (511.79 it/sec) -training >> step=8232300, episode=1373 reward=0.7794856 (556.36 it/sec) -training >> step=8232400, episode=1373 reward=0.7960957 (506.42 it/sec) -training >> step=8232500, episode=1373 reward=0.8112016 (512.65 it/sec) -training >> step=8232600, episode=1373 reward=0.7838625 (531.37 it/sec) -training >> step=8232700, episode=1373 reward=0.7828525 (508.18 it/sec) -training >> step=8232800, episode=1373 reward=0.7794263 (547.05 it/sec) -training >> step=8232900, episode=1373 reward=0.7815118 (484.48 it/sec) -training >> step=8233000, episode=1373 reward=0.7633682 (513.15 it/sec) -training >> step=8233100, episode=1373 reward=0.7751622 (532.98 it/sec) -training >> step=8233200, episode=1373 reward=0.7740048 (518.15 it/sec) -training >> step=8233300, episode=1373 reward=0.7926392 (529.52 it/sec) -training >> step=8233400, episode=1373 reward=0.7868501 (485.22 it/sec) -training >> step=8233500, episode=1373 reward=0.8084738 (497.44 it/sec) -training >> step=8233600, episode=1373 reward=0.7592654 (530.77 it/sec) -training >> step=8233700, episode=1373 reward=0.7901307 (513.83 it/sec) -training >> step=8233800, episode=1373 reward=0.7800233 (537.01 it/sec) -training >> step=8233900, episode=1373 reward=0.7846125 (538.67 it/sec) -training >> step=8234000, episode=1373 reward=0.7845834 (468.33 it/sec) -training >> step=8234100, episode=1373 reward=0.7765545 (479.98 it/sec) -training >> step=8234200, episode=1373 reward=0.7928839 (513.08 it/sec) -training >> step=8234300, episode=1373 reward=0.7864603 (510.11 it/sec) -training >> step=8234400, episode=1373 reward=0.8114489 (509.07 it/sec) -training >> step=8234500, episode=1373 reward=0.7822977 (476.57 it/sec) -training >> step=8234600, episode=1373 reward=0.7927337 (496.41 it/sec) -training >> step=8234700, episode=1373 reward=0.793556 (495.33 it/sec) -training >> step=8234800, episode=1373 reward=0.7775036 (522.26 it/sec) -training >> step=8234900, episode=1373 reward=0.7784935 (532.02 it/sec) -training >> step=8235000, episode=1373 reward=0.7907346 (505.44 it/sec) -training >> step=8235100, episode=1373 reward=0.7762473 (479.94 it/sec) -training >> step=8235200, episode=1373 reward=0.786387 (523.20 it/sec) -training >> step=8235300, episode=1373 reward=0.7802502 (455.98 it/sec) -training >> step=8235400, episode=1373 reward=0.7922689 (440.45 it/sec) -training >> step=8235500, episode=1373 reward=0.7762939 (481.37 it/sec) -training >> step=8235600, episode=1373 reward=0.7846163 (502.03 it/sec) -training >> step=8235700, episode=1373 reward=0.8049904 (540.75 it/sec) -training >> step=8235800, episode=1373 reward=0.7510754 (526.70 it/sec) -training >> step=8235900, episode=1373 reward=0.7972016 (526.51 it/sec) -training >> step=8236000, episode=1373 reward=0.7941374 (488.78 it/sec) -training >> step=8236100, episode=1373 reward=0.7975688 (524.73 it/sec) -training >> step=8236200, episode=1373 reward=0.7810073 (530.54 it/sec) -training >> step=8236300, episode=1373 reward=0.7881302 (535.25 it/sec) -training >> step=8236400, episode=1373 reward=0.796228 (539.13 it/sec) -training >> step=8236500, episode=1373 reward=0.7870829 (537.17 it/sec) -training >> step=8236600, episode=1373 reward=0.7983336 (368.64 it/sec) -training >> step=8236700, episode=1373 reward=0.7556228 (515.03 it/sec) -training >> step=8236800, episode=1373 reward=0.7832004 (535.45 it/sec) -training >> step=8236900, episode=1373 reward=0.7718464 (536.10 it/sec) -training >> step=8237000, episode=1373 reward=0.7822552 (492.23 it/sec) -training >> step=8237100, episode=1373 reward=0.767287 (525.46 it/sec) -training >> step=8237200, episode=1373 reward=0.7685171 (519.04 it/sec) -training >> step=8237300, episode=1374 reward=0.7784157 (147.79 it/sec) -training >> step=8237400, episode=1374 reward=0.7939063 (485.75 it/sec) -training >> step=8237500, episode=1374 reward=0.7956874 (532.55 it/sec) -training >> step=8237600, episode=1374 reward=0.7853401 (530.87 it/sec) -training >> step=8237700, episode=1374 reward=0.791384 (530.35 it/sec) -training >> step=8237800, episode=1374 reward=0.8014386 (529.43 it/sec) -training >> step=8237900, episode=1374 reward=0.7926074 (522.78 it/sec) -training >> step=8238000, episode=1374 reward=0.7916238 (495.85 it/sec) -training >> step=8238100, episode=1374 reward=0.7826784 (514.34 it/sec) -training >> step=8238200, episode=1374 reward=0.7857563 (515.37 it/sec) -training >> step=8238300, episode=1374 reward=0.7859733 (513.94 it/sec) -training >> step=8238400, episode=1374 reward=0.8019444 (469.32 it/sec) -training >> step=8238500, episode=1374 reward=0.8098255 (485.99 it/sec) -training >> step=8238600, episode=1374 reward=0.7991316 (540.52 it/sec) -training >> step=8238700, episode=1374 reward=0.8049439 (534.41 it/sec) -training >> step=8238800, episode=1374 reward=0.8040571 (509.58 it/sec) -training >> step=8238900, episode=1374 reward=0.7975252 (486.61 it/sec) -training >> step=8239000, episode=1374 reward=0.7942746 (504.18 it/sec) -training >> step=8239100, episode=1374 reward=0.7975475 (503.57 it/sec) -training >> step=8239200, episode=1374 reward=0.8042614 (518.79 it/sec) -training >> step=8239300, episode=1374 reward=0.777626 (533.06 it/sec) -training >> step=8239400, episode=1374 reward=0.7959385 (505.80 it/sec) -training >> step=8239500, episode=1374 reward=0.7832677 (477.07 it/sec) -training >> step=8239600, episode=1374 reward=0.7864724 (491.82 it/sec) -training >> step=8239700, episode=1374 reward=0.7870794 (530.40 it/sec) -training >> step=8239800, episode=1374 reward=0.7889608 (540.69 it/sec) -training >> step=8239900, episode=1374 reward=0.7812999 (521.05 it/sec) -training >> step=8240000, episode=1374 reward=0.7632847 (517.65 it/sec) -training >> step=8240100, episode=1374 reward=0.7987966 (486.52 it/sec) -training >> step=8240200, episode=1374 reward=0.7834662 (516.70 it/sec) -training >> step=8240300, episode=1374 reward=0.7873532 (466.64 it/sec) -training >> step=8240400, episode=1374 reward=0.8000403 (537.27 it/sec) -training >> step=8240500, episode=1374 reward=0.7960843 (483.69 it/sec) -training >> step=8240600, episode=1374 reward=0.7929146 (496.39 it/sec) -training >> step=8240700, episode=1374 reward=0.7839102 (543.74 it/sec) -training >> step=8240800, episode=1374 reward=0.8044599 (536.76 it/sec) -training >> step=8240900, episode=1374 reward=0.7809886 (514.16 it/sec) -training >> step=8241000, episode=1374 reward=0.7944996 (518.00 it/sec) -training >> step=8241100, episode=1374 reward=0.7737441 (515.38 it/sec) -training >> step=8241200, episode=1374 reward=0.795449 (530.40 it/sec) -training >> step=8241300, episode=1374 reward=0.775808 (519.78 it/sec) -training >> step=8241400, episode=1374 reward=0.7953886 (547.88 it/sec) -training >> step=8241500, episode=1374 reward=0.7811611 (537.68 it/sec) -training >> step=8241600, episode=1374 reward=0.7738996 (516.47 it/sec) -training >> step=8241700, episode=1374 reward=0.7831621 (481.03 it/sec) -training >> step=8241800, episode=1374 reward=0.7925968 (524.70 it/sec) -training >> step=8241900, episode=1374 reward=0.771333 (516.29 it/sec) -training >> step=8242000, episode=1374 reward=0.7655026 (508.47 it/sec) -training >> step=8242100, episode=1374 reward=0.7866027 (503.40 it/sec) -training >> step=8242200, episode=1374 reward=0.7897983 (522.30 it/sec) -training >> step=8242300, episode=1374 reward=0.782036 (526.54 it/sec) -training >> step=8242400, episode=1374 reward=0.7878984 (545.25 it/sec) -training >> step=8242500, episode=1374 reward=0.7773656 (520.84 it/sec) -training >> step=8242600, episode=1374 reward=0.7819909 (401.52 it/sec) -training >> step=8242700, episode=1374 reward=0.7899979 (500.75 it/sec) -training >> step=8242800, episode=1374 reward=0.7794265 (520.57 it/sec) -training >> step=8242900, episode=1374 reward=0.8018972 (532.95 it/sec) -training >> step=8243000, episode=1374 reward=0.793554 (464.06 it/sec) -training >> step=8243100, episode=1374 reward=0.7876762 (466.62 it/sec) -training >> step=8243200, episode=1374 reward=0.7627827 (523.97 it/sec) -training >> step=8243300, episode=1375 reward=0.809581 (130.62 it/sec) -training >> step=8243400, episode=1375 reward=0.8061427 (505.17 it/sec) -training >> step=8243500, episode=1375 reward=0.7989052 (474.98 it/sec) -training >> step=8243600, episode=1375 reward=0.7795267 (530.03 it/sec) -training >> step=8243700, episode=1375 reward=0.7879948 (482.48 it/sec) -training >> step=8243800, episode=1375 reward=0.7983693 (494.01 it/sec) -training >> step=8243900, episode=1375 reward=0.7915644 (497.48 it/sec) -training >> step=8244000, episode=1375 reward=0.7927868 (514.68 it/sec) -training >> step=8244100, episode=1375 reward=0.7832521 (528.82 it/sec) -training >> step=8244200, episode=1375 reward=0.7730704 (537.32 it/sec) -training >> step=8244300, episode=1375 reward=0.786228 (525.06 it/sec) -training >> step=8244400, episode=1375 reward=0.8100487 (511.57 it/sec) -training >> step=8244500, episode=1375 reward=0.7770665 (482.85 it/sec) -training >> step=8244600, episode=1375 reward=0.7969505 (532.16 it/sec) -training >> step=8244700, episode=1375 reward=0.7827966 (549.98 it/sec) -training >> step=8244800, episode=1375 reward=0.8020463 (520.15 it/sec) -training >> step=8244900, episode=1375 reward=0.7670043 (496.53 it/sec) -training >> step=8245000, episode=1375 reward=0.7920215 (509.29 it/sec) -training >> step=8245100, episode=1375 reward=0.8013772 (504.40 it/sec) -training >> step=8245200, episode=1375 reward=0.7880992 (522.86 it/sec) -training >> step=8245300, episode=1375 reward=0.7888052 (522.09 it/sec) -training >> step=8245400, episode=1375 reward=0.7842695 (520.81 it/sec) -training >> step=8245500, episode=1375 reward=0.7841691 (489.66 it/sec) -training >> step=8245600, episode=1375 reward=0.7757947 (503.01 it/sec) -training >> step=8245700, episode=1375 reward=0.7859935 (526.74 it/sec) -training >> step=8245800, episode=1375 reward=0.7759742 (562.36 it/sec) -training >> step=8245900, episode=1375 reward=0.7856823 (512.93 it/sec) -training >> step=8246000, episode=1375 reward=0.7863557 (503.60 it/sec) -training >> step=8246100, episode=1375 reward=0.7767465 (485.95 it/sec) -training >> step=8246200, episode=1375 reward=0.7836097 (570.74 it/sec) -training >> step=8246300, episode=1375 reward=0.7915188 (530.62 it/sec) -training >> step=8246400, episode=1375 reward=0.781085 (532.71 it/sec) -training >> step=8246500, episode=1375 reward=0.7875336 (527.32 it/sec) -training >> step=8246600, episode=1375 reward=0.7758306 (515.73 it/sec) -training >> step=8246700, episode=1375 reward=0.7827132 (478.96 it/sec) -training >> step=8246800, episode=1375 reward=0.7867937 (519.69 it/sec) -training >> step=8246900, episode=1375 reward=0.7910021 (523.60 it/sec) -training >> step=8247000, episode=1375 reward=0.7893299 (535.68 it/sec) -training >> step=8247100, episode=1375 reward=0.801659 (522.82 it/sec) -training >> step=8247200, episode=1375 reward=0.7969716 (473.87 it/sec) -training >> step=8247300, episode=1375 reward=0.7938743 (540.86 it/sec) -training >> step=8247400, episode=1375 reward=0.7768641 (506.38 it/sec) -training >> step=8247500, episode=1375 reward=0.7861159 (509.27 it/sec) -training >> step=8247600, episode=1375 reward=0.7763903 (514.50 it/sec) -training >> step=8247700, episode=1375 reward=0.8016157 (493.80 it/sec) -training >> step=8247800, episode=1375 reward=0.7812491 (495.29 it/sec) -training >> step=8247900, episode=1375 reward=0.7910281 (532.79 it/sec) -training >> step=8248000, episode=1375 reward=0.7972093 (502.07 it/sec) -training >> step=8248100, episode=1375 reward=0.7803234 (495.21 it/sec) -training >> step=8248200, episode=1375 reward=0.7812988 (508.88 it/sec) -training >> step=8248300, episode=1375 reward=0.7846842 (517.60 it/sec) -training >> step=8248400, episode=1375 reward=0.7742584 (529.47 it/sec) -training >> step=8248500, episode=1375 reward=0.7649612 (500.78 it/sec) -training >> step=8248600, episode=1375 reward=0.7821328 (388.69 it/sec) -training >> step=8248700, episode=1375 reward=0.7856595 (534.01 it/sec) -training >> step=8248800, episode=1375 reward=0.7922454 (486.73 it/sec) -training >> step=8248900, episode=1375 reward=0.7993969 (496.85 it/sec) -training >> step=8249000, episode=1375 reward=0.7745984 (526.37 it/sec) -training >> step=8249100, episode=1375 reward=0.7773774 (530.76 it/sec) -training >> step=8249200, episode=1375 reward=0.7657371 (512.59 it/sec) -training >> step=8249300, episode=1376 reward=0.7808523 (139.35 it/sec) -training >> step=8249400, episode=1376 reward=0.7789719 (502.47 it/sec) -training >> step=8249500, episode=1376 reward=0.787131 (502.12 it/sec) -training >> step=8249600, episode=1376 reward=0.7889616 (497.32 it/sec) -training >> step=8249700, episode=1376 reward=0.779395 (486.29 it/sec) -training >> step=8249800, episode=1376 reward=0.7955002 (475.06 it/sec) -training >> step=8249900, episode=1376 reward=0.794422 (508.81 it/sec) -training >> step=8250000, episode=1376 reward=0.7999598 (516.90 it/sec) -training >> step=8250100, episode=1376 reward=0.7813812 (501.54 it/sec) -training >> step=8250200, episode=1376 reward=0.7803721 (521.11 it/sec) -training >> step=8250300, episode=1376 reward=0.7787831 (521.70 it/sec) -training >> step=8250400, episode=1376 reward=0.7832421 (516.96 it/sec) -training >> step=8250500, episode=1376 reward=0.7832891 (551.74 it/sec) -training >> step=8250600, episode=1376 reward=0.7922261 (498.27 it/sec) -training >> step=8250700, episode=1376 reward=0.8065377 (541.49 it/sec) -training >> step=8250800, episode=1376 reward=0.7943168 (513.67 it/sec) -training >> step=8250900, episode=1376 reward=0.7844225 (527.23 it/sec) -training >> step=8251000, episode=1376 reward=0.783558 (507.51 it/sec) -training >> step=8251100, episode=1376 reward=0.7932164 (489.02 it/sec) -training >> step=8251200, episode=1376 reward=0.7735472 (551.14 it/sec) -training >> step=8251300, episode=1376 reward=0.7868763 (493.39 it/sec) -training >> step=8251400, episode=1376 reward=0.784849 (505.16 it/sec) -training >> step=8251500, episode=1376 reward=0.7981206 (520.54 it/sec) -training >> step=8251600, episode=1376 reward=0.7808319 (494.01 it/sec) -training >> step=8251700, episode=1376 reward=0.7875155 (531.52 it/sec) -training >> step=8251800, episode=1376 reward=0.76449 (524.80 it/sec) -training >> step=8251900, episode=1376 reward=0.7851582 (531.11 it/sec) -training >> step=8252000, episode=1376 reward=0.7796289 (536.57 it/sec) -training >> step=8252100, episode=1376 reward=0.7919183 (520.97 it/sec) -training >> step=8252200, episode=1376 reward=0.7892144 (508.78 it/sec) -training >> step=8252300, episode=1376 reward=0.7993001 (535.99 it/sec) -training >> step=8252400, episode=1376 reward=0.7770553 (496.22 it/sec) -training >> step=8252500, episode=1376 reward=0.7956078 (504.69 it/sec) -training >> step=8252600, episode=1376 reward=0.7856395 (531.60 it/sec) -training >> step=8252700, episode=1376 reward=0.7678425 (497.01 it/sec) -training >> step=8252800, episode=1376 reward=0.7718487 (539.97 it/sec) -training >> step=8252900, episode=1376 reward=0.763756 (516.85 it/sec) -training >> step=8253000, episode=1376 reward=0.7713653 (534.60 it/sec) -training >> step=8253100, episode=1376 reward=0.7740045 (487.12 it/sec) -training >> step=8253200, episode=1376 reward=0.8029147 (493.50 it/sec) -training >> step=8253300, episode=1376 reward=0.816396 (530.30 it/sec) -training >> step=8253400, episode=1376 reward=0.7838063 (539.95 it/sec) -training >> step=8253500, episode=1376 reward=0.7829896 (540.58 it/sec) -training >> step=8253600, episode=1376 reward=0.8010489 (497.40 it/sec) -training >> step=8253700, episode=1376 reward=0.7904924 (490.38 it/sec) -training >> step=8253800, episode=1376 reward=0.7885773 (467.25 it/sec) -training >> step=8253900, episode=1376 reward=0.8026624 (525.43 it/sec) -training >> step=8254000, episode=1376 reward=0.7855166 (513.10 it/sec) -training >> step=8254100, episode=1376 reward=0.7669122 (551.82 it/sec) -training >> step=8254200, episode=1376 reward=0.7724806 (511.21 it/sec) -training >> step=8254300, episode=1376 reward=0.784389 (510.18 it/sec) -training >> step=8254400, episode=1376 reward=0.7643715 (545.06 it/sec) -training >> step=8254500, episode=1376 reward=0.7941311 (508.76 it/sec) -training >> step=8254600, episode=1376 reward=0.7936357 (532.84 it/sec) -training >> step=8254700, episode=1376 reward=0.7997866 (411.35 it/sec) -training >> step=8254800, episode=1376 reward=0.788607 (513.81 it/sec) -training >> step=8254900, episode=1376 reward=0.7836069 (523.97 it/sec) -training >> step=8255000, episode=1376 reward=0.8037305 (490.57 it/sec) -training >> step=8255100, episode=1376 reward=0.7887153 (498.81 it/sec) -training >> step=8255200, episode=1376 reward=0.7973413 (473.67 it/sec) -training >> step=8255300, episode=1377 reward=0.7992155 (109.90 it/sec) -training >> step=8255400, episode=1377 reward=0.7963549 (410.59 it/sec) -training >> step=8255500, episode=1377 reward=0.7820788 (447.77 it/sec) -training >> step=8255600, episode=1377 reward=0.7987401 (457.15 it/sec) -training >> step=8255700, episode=1377 reward=0.7824041 (467.27 it/sec) -training >> step=8255800, episode=1377 reward=0.7889251 (486.97 it/sec) -training >> step=8255900, episode=1377 reward=0.7742262 (483.22 it/sec) -training >> step=8256000, episode=1377 reward=0.7628991 (475.62 it/sec) -training >> step=8256100, episode=1377 reward=0.7727208 (476.09 it/sec) -training >> step=8256200, episode=1377 reward=0.7907769 (498.17 it/sec) -training >> step=8256300, episode=1377 reward=0.7862096 (478.21 it/sec) -training >> step=8256400, episode=1377 reward=0.7733819 (458.29 it/sec) -training >> step=8256500, episode=1377 reward=0.7785206 (458.86 it/sec) -training >> step=8256600, episode=1377 reward=0.7956457 (453.18 it/sec) -training >> step=8256700, episode=1377 reward=0.7675609 (499.77 it/sec) -training >> step=8256800, episode=1377 reward=0.7876067 (480.64 it/sec) -training >> step=8256900, episode=1377 reward=0.8008979 (507.51 it/sec) -training >> step=8257000, episode=1377 reward=0.7990524 (526.91 it/sec) -training >> step=8257100, episode=1377 reward=0.8036705 (516.57 it/sec) -training >> step=8257200, episode=1377 reward=0.8027881 (523.54 it/sec) -training >> step=8257300, episode=1377 reward=0.7812812 (538.12 it/sec) -training >> step=8257400, episode=1377 reward=0.7710614 (509.19 it/sec) -training >> step=8257500, episode=1377 reward=0.7921873 (517.26 it/sec) -training >> step=8257600, episode=1377 reward=0.7665988 (526.43 it/sec) -training >> step=8257700, episode=1377 reward=0.7802089 (531.61 it/sec) -training >> step=8257800, episode=1377 reward=0.7928017 (513.01 it/sec) -training >> step=8257900, episode=1377 reward=0.7876199 (515.41 it/sec) -training >> step=8258000, episode=1377 reward=0.8014793 (491.58 it/sec) -training >> step=8258100, episode=1377 reward=0.7889131 (559.00 it/sec) -training >> step=8258200, episode=1377 reward=0.7786416 (505.65 it/sec) -training >> step=8258300, episode=1377 reward=0.7887851 (488.54 it/sec) -training >> step=8258400, episode=1377 reward=0.766943 (474.31 it/sec) -training >> step=8258500, episode=1377 reward=0.7716237 (464.94 it/sec) -training >> step=8258600, episode=1377 reward=0.7851892 (528.80 it/sec) -training >> step=8258700, episode=1377 reward=0.8053724 (518.00 it/sec) -training >> step=8258800, episode=1377 reward=0.8006933 (549.15 it/sec) -training >> step=8258900, episode=1377 reward=0.7914924 (485.40 it/sec) -training >> step=8259000, episode=1377 reward=0.8137108 (507.36 it/sec) -training >> step=8259100, episode=1377 reward=0.7996942 (500.05 it/sec) -training >> step=8259200, episode=1377 reward=0.796424 (511.29 it/sec) -training >> step=8259300, episode=1377 reward=0.7680688 (545.18 it/sec) -training >> step=8259400, episode=1377 reward=0.7763336 (522.27 it/sec) -training >> step=8259500, episode=1377 reward=0.7839326 (507.71 it/sec) -training >> step=8259600, episode=1377 reward=0.7995598 (520.68 it/sec) -training >> step=8259700, episode=1377 reward=0.7873329 (474.04 it/sec) -training >> step=8259800, episode=1377 reward=0.7921394 (532.37 it/sec) -training >> step=8259900, episode=1377 reward=0.7906539 (540.82 it/sec) -training >> step=8260000, episode=1377 reward=0.7974272 (487.54 it/sec) -training >> step=8260100, episode=1377 reward=0.7805251 (510.97 it/sec) -training >> step=8260200, episode=1377 reward=0.7836381 (536.03 it/sec) -training >> step=8260300, episode=1377 reward=0.7639661 (536.73 it/sec) -training >> step=8260400, episode=1377 reward=0.7714038 (526.11 it/sec) -training >> step=8260500, episode=1377 reward=0.7866901 (501.81 it/sec) -training >> step=8260600, episode=1377 reward=0.7775306 (535.34 it/sec) -training >> step=8260700, episode=1377 reward=0.7793558 (506.50 it/sec) -training >> step=8260800, episode=1377 reward=0.7848995 (380.92 it/sec) -training >> step=8260900, episode=1377 reward=0.8067535 (530.36 it/sec) -training >> step=8261000, episode=1377 reward=0.7755571 (509.49 it/sec) -training >> step=8261100, episode=1377 reward=0.7949243 (524.78 it/sec) -training >> step=8261200, episode=1377 reward=0.8032624 (500.28 it/sec) -training >> step=8261300, episode=1378 reward=0.8060066 (142.28 it/sec) -training >> step=8261400, episode=1378 reward=0.7941799 (501.83 it/sec) -training >> step=8261500, episode=1378 reward=0.7805929 (526.76 it/sec) -training >> step=8261600, episode=1378 reward=0.8002966 (525.94 it/sec) -training >> step=8261700, episode=1378 reward=0.7836194 (558.12 it/sec) -training >> step=8261800, episode=1378 reward=0.7651238 (481.27 it/sec) -training >> step=8261900, episode=1378 reward=0.7757716 (498.81 it/sec) -training >> step=8262000, episode=1378 reward=0.7860813 (532.10 it/sec) -training >> step=8262100, episode=1378 reward=0.7818858 (514.00 it/sec) -training >> step=8262200, episode=1378 reward=0.7848364 (547.96 it/sec) -training >> step=8262300, episode=1378 reward=0.7901322 (516.12 it/sec) -training >> step=8262400, episode=1378 reward=0.7880788 (523.82 it/sec) -training >> step=8262500, episode=1378 reward=0.7743708 (513.90 it/sec) -training >> step=8262600, episode=1378 reward=0.7646879 (522.27 it/sec) -training >> step=8262700, episode=1378 reward=0.7743195 (527.80 it/sec) -training >> step=8262800, episode=1378 reward=0.7921259 (550.03 it/sec) -training >> step=8262900, episode=1378 reward=0.8031481 (486.78 it/sec) -training >> step=8263000, episode=1378 reward=0.7996295 (492.71 it/sec) -training >> step=8263100, episode=1378 reward=0.8007223 (513.20 it/sec) -training >> step=8263200, episode=1378 reward=0.794886 (534.12 it/sec) -training >> step=8263300, episode=1378 reward=0.7887424 (491.22 it/sec) -training >> step=8263400, episode=1378 reward=0.7663151 (497.71 it/sec) -training >> step=8263500, episode=1378 reward=0.8206725 (540.63 it/sec) -training >> step=8263600, episode=1378 reward=0.7739815 (538.80 it/sec) -training >> step=8263700, episode=1378 reward=0.7913232 (510.56 it/sec) -training >> step=8263800, episode=1378 reward=0.7875609 (509.60 it/sec) -training >> step=8263900, episode=1378 reward=0.7821192 (523.20 it/sec) -training >> step=8264000, episode=1378 reward=0.7726653 (509.97 it/sec) -training >> step=8264100, episode=1378 reward=0.7855705 (520.21 it/sec) -training >> step=8264200, episode=1378 reward=0.7785216 (517.12 it/sec) -training >> step=8264300, episode=1378 reward=0.7775494 (543.63 it/sec) -training >> step=8264400, episode=1378 reward=0.7943977 (411.22 it/sec) -training >> step=8264500, episode=1378 reward=0.7901819 (510.16 it/sec) -training >> step=8264600, episode=1378 reward=0.8061743 (518.18 it/sec) -training >> step=8264700, episode=1378 reward=0.7731785 (523.13 it/sec) -training >> step=8264800, episode=1378 reward=0.7834449 (542.09 it/sec) -training >> step=8264900, episode=1378 reward=0.7832548 (523.76 it/sec) -training >> step=8265000, episode=1378 reward=0.7832727 (502.95 it/sec) -training >> step=8265100, episode=1378 reward=0.7850724 (544.24 it/sec) -training >> step=8265200, episode=1378 reward=0.7899421 (510.47 it/sec) -training >> step=8265300, episode=1378 reward=0.7937872 (533.73 it/sec) -training >> step=8265400, episode=1378 reward=0.8036206 (557.36 it/sec) -training >> step=8265500, episode=1378 reward=0.7940233 (496.18 it/sec) -training >> step=8265600, episode=1378 reward=0.7809308 (521.33 it/sec) -training >> step=8265700, episode=1378 reward=0.8070884 (521.18 it/sec) -training >> step=8265800, episode=1378 reward=0.7727423 (501.76 it/sec) -training >> step=8265900, episode=1378 reward=0.7710486 (479.34 it/sec) -training >> step=8266000, episode=1378 reward=0.7844966 (510.19 it/sec) -training >> step=8266100, episode=1378 reward=0.7945863 (523.70 it/sec) -training >> step=8266200, episode=1378 reward=0.7668356 (507.29 it/sec) -training >> step=8266300, episode=1378 reward=0.7854245 (502.96 it/sec) -training >> step=8266400, episode=1378 reward=0.7763807 (542.66 it/sec) -training >> step=8266500, episode=1378 reward=0.7752682 (534.41 it/sec) -training >> step=8266600, episode=1378 reward=0.7924651 (520.52 it/sec) -training >> step=8266700, episode=1378 reward=0.793651 (499.64 it/sec) -training >> step=8266800, episode=1378 reward=0.773212 (538.93 it/sec) -training >> step=8266900, episode=1378 reward=0.7800928 (378.75 it/sec) -training >> step=8267000, episode=1378 reward=0.7971176 (535.17 it/sec) -training >> step=8267100, episode=1378 reward=0.7701955 (501.04 it/sec) -training >> step=8267200, episode=1378 reward=0.7855388 (541.88 it/sec) -training >> step=8267300, episode=1379 reward=0.7852309 (116.28 it/sec) -training >> step=8267400, episode=1379 reward=0.7823281 (514.17 it/sec) -training >> step=8267500, episode=1379 reward=0.7848685 (497.68 it/sec) -training >> step=8267600, episode=1379 reward=0.803916 (515.79 it/sec) -training >> step=8267700, episode=1379 reward=0.7921737 (537.37 it/sec) -training >> step=8267800, episode=1379 reward=0.7884693 (516.46 it/sec) -training >> step=8267900, episode=1379 reward=0.7929969 (524.77 it/sec) -training >> step=8268000, episode=1379 reward=0.7763489 (470.90 it/sec) -training >> step=8268100, episode=1379 reward=0.7859405 (497.30 it/sec) -training >> step=8268200, episode=1379 reward=0.7891349 (554.08 it/sec) -training >> step=8268300, episode=1379 reward=0.7892838 (499.80 it/sec) -training >> step=8268400, episode=1379 reward=0.8004022 (513.05 it/sec) -training >> step=8268500, episode=1379 reward=0.796094 (506.48 it/sec) -training >> step=8268600, episode=1379 reward=0.7732742 (523.12 it/sec) -training >> step=8268700, episode=1379 reward=0.8008711 (510.62 it/sec) -training >> step=8268800, episode=1379 reward=0.7880004 (512.03 it/sec) -training >> step=8268900, episode=1379 reward=0.7655771 (524.06 it/sec) -training >> step=8269000, episode=1379 reward=0.7909771 (552.97 it/sec) -training >> step=8269100, episode=1379 reward=0.795043 (513.34 it/sec) -training >> step=8269200, episode=1379 reward=0.7873889 (522.37 it/sec) -training >> step=8269300, episode=1379 reward=0.7892898 (528.00 it/sec) -training >> step=8269400, episode=1379 reward=0.7850673 (523.39 it/sec) -training >> step=8269500, episode=1379 reward=0.7857589 (493.56 it/sec) -training >> step=8269600, episode=1379 reward=0.7858893 (518.12 it/sec) -training >> step=8269700, episode=1379 reward=0.8142956 (507.35 it/sec) -training >> step=8269800, episode=1379 reward=0.7876064 (542.38 it/sec) -training >> step=8269900, episode=1379 reward=0.7988093 (495.31 it/sec) -training >> step=8270000, episode=1379 reward=0.7812777 (526.77 it/sec) -training >> step=8270100, episode=1379 reward=0.788313 (529.83 it/sec) -training >> step=8270200, episode=1379 reward=0.8044404 (516.81 it/sec) -training >> step=8270300, episode=1379 reward=0.7877838 (520.75 it/sec) -training >> step=8270400, episode=1379 reward=0.784126 (491.60 it/sec) -training >> step=8270500, episode=1379 reward=0.7756532 (529.64 it/sec) -training >> step=8270600, episode=1379 reward=0.7875381 (504.89 it/sec) -training >> step=8270700, episode=1379 reward=0.7849056 (518.26 it/sec) -training >> step=8270800, episode=1379 reward=0.7713522 (549.41 it/sec) -training >> step=8270900, episode=1379 reward=0.7897701 (509.29 it/sec) -training >> step=8271000, episode=1379 reward=0.7836046 (514.66 it/sec) -training >> step=8271100, episode=1379 reward=0.7854477 (519.45 it/sec) -training >> step=8271200, episode=1379 reward=0.7857412 (493.62 it/sec) -training >> step=8271300, episode=1379 reward=0.792887 (538.72 it/sec) -training >> step=8271400, episode=1379 reward=0.779391 (539.11 it/sec) -training >> step=8271500, episode=1379 reward=0.798658 (522.93 it/sec) -training >> step=8271600, episode=1379 reward=0.7885779 (534.95 it/sec) -training >> step=8271700, episode=1379 reward=0.7826572 (471.76 it/sec) -training >> step=8271800, episode=1379 reward=0.7766373 (537.35 it/sec) -training >> step=8271900, episode=1379 reward=0.8072167 (545.35 it/sec) -training >> step=8272000, episode=1379 reward=0.7713932 (504.44 it/sec) -training >> step=8272100, episode=1379 reward=0.7701839 (518.14 it/sec) -training >> step=8272200, episode=1379 reward=0.7829652 (557.55 it/sec) -training >> step=8272300, episode=1379 reward=0.7682674 (499.87 it/sec) -training >> step=8272400, episode=1379 reward=0.7595206 (535.79 it/sec) -training >> step=8272500, episode=1379 reward=0.7673851 (511.66 it/sec) -training >> step=8272600, episode=1379 reward=0.7986665 (543.63 it/sec) -training >> step=8272700, episode=1379 reward=0.7762617 (521.01 it/sec) -training >> step=8272800, episode=1379 reward=0.770826 (519.45 it/sec) -training >> step=8272900, episode=1379 reward=0.7876443 (531.43 it/sec) -training >> step=8273000, episode=1379 reward=0.797581 (501.87 it/sec) -training >> step=8273100, episode=1379 reward=0.7855144 (360.75 it/sec) -training >> step=8273200, episode=1379 reward=0.8000259 (493.79 it/sec) -training >> step=8273300, episode=1380 reward=0.7863554 (139.59 it/sec) -training >> step=8273400, episode=1380 reward=0.7816767 (554.70 it/sec) -training >> step=8273500, episode=1380 reward=0.7843996 (497.55 it/sec) -training >> step=8273600, episode=1380 reward=0.7859394 (520.49 it/sec) -training >> step=8273700, episode=1380 reward=0.7930791 (540.57 it/sec) -training >> step=8273800, episode=1380 reward=0.8112109 (522.57 it/sec) -training >> step=8273900, episode=1380 reward=0.7923973 (492.31 it/sec) -training >> step=8274000, episode=1380 reward=0.8031394 (489.35 it/sec) -training >> step=8274100, episode=1380 reward=0.7967662 (507.84 it/sec) -training >> step=8274200, episode=1380 reward=0.7864794 (543.92 it/sec) -training >> step=8274300, episode=1380 reward=0.7816148 (518.60 it/sec) -training >> step=8274400, episode=1380 reward=0.8020533 (509.07 it/sec) -training >> step=8274500, episode=1380 reward=0.7787145 (536.77 it/sec) -training >> step=8274600, episode=1380 reward=0.789655 (471.88 it/sec) -training >> step=8274700, episode=1380 reward=0.7886848 (530.76 it/sec) -training >> step=8274800, episode=1380 reward=0.8043161 (543.19 it/sec) -training >> step=8274900, episode=1380 reward=0.7825869 (516.48 it/sec) -training >> step=8275000, episode=1380 reward=0.8053671 (518.99 it/sec) -training >> step=8275100, episode=1380 reward=0.799215 (490.81 it/sec) -training >> step=8275200, episode=1380 reward=0.7951254 (516.96 it/sec) -training >> step=8275300, episode=1380 reward=0.7983601 (541.44 it/sec) -training >> step=8275400, episode=1380 reward=0.7800431 (489.34 it/sec) -training >> step=8275500, episode=1380 reward=0.791248 (521.80 it/sec) -training >> step=8275600, episode=1380 reward=0.7699624 (517.41 it/sec) -training >> step=8275700, episode=1380 reward=0.7863342 (498.67 it/sec) -training >> step=8275800, episode=1380 reward=0.7957139 (540.90 it/sec) -training >> step=8275900, episode=1380 reward=0.7738432 (529.43 it/sec) -training >> step=8276000, episode=1380 reward=0.7954615 (519.84 it/sec) -training >> step=8276100, episode=1380 reward=0.8084188 (508.02 it/sec) -training >> step=8276200, episode=1380 reward=0.7692427 (529.04 it/sec) -training >> step=8276300, episode=1380 reward=0.7812515 (513.66 it/sec) -training >> step=8276400, episode=1380 reward=0.7889333 (515.30 it/sec) -training >> step=8276500, episode=1380 reward=0.8029535 (502.73 it/sec) -training >> step=8276600, episode=1380 reward=0.7795528 (525.23 it/sec) -training >> step=8276700, episode=1380 reward=0.7670068 (506.67 it/sec) -training >> step=8276800, episode=1380 reward=0.7741872 (533.32 it/sec) -training >> step=8276900, episode=1380 reward=0.773506 (548.28 it/sec) -training >> step=8277000, episode=1380 reward=0.7881953 (492.20 it/sec) -training >> step=8277100, episode=1380 reward=0.7822063 (513.71 it/sec) -training >> step=8277200, episode=1380 reward=0.7992145 (511.47 it/sec) -training >> step=8277300, episode=1380 reward=0.7753096 (520.09 it/sec) -training >> step=8277400, episode=1380 reward=0.8011677 (487.24 it/sec) -training >> step=8277500, episode=1380 reward=0.7715395 (495.32 it/sec) -training >> step=8277600, episode=1380 reward=0.7821321 (497.69 it/sec) -training >> step=8277700, episode=1380 reward=0.7794398 (518.22 it/sec) -training >> step=8277800, episode=1380 reward=0.7695391 (499.90 it/sec) -training >> step=8277900, episode=1380 reward=0.7890499 (504.44 it/sec) -training >> step=8278000, episode=1380 reward=0.7656684 (542.59 it/sec) -training >> step=8278100, episode=1380 reward=0.774819 (486.41 it/sec) -training >> step=8278200, episode=1380 reward=0.80701 (463.41 it/sec) -training >> step=8278300, episode=1380 reward=0.7924235 (479.53 it/sec) -training >> step=8278400, episode=1380 reward=0.7809858 (473.75 it/sec) -training >> step=8278500, episode=1380 reward=0.7782447 (475.89 it/sec) -training >> step=8278600, episode=1380 reward=0.7927008 (472.93 it/sec) -training >> step=8278700, episode=1380 reward=0.7926977 (497.85 it/sec) -training >> step=8278800, episode=1380 reward=0.7672678 (445.75 it/sec) -training >> step=8278900, episode=1380 reward=0.7708641 (530.99 it/sec) -training >> step=8279000, episode=1380 reward=0.7823098 (485.81 it/sec) -training >> step=8279100, episode=1380 reward=0.7804251 (376.87 it/sec) -training >> step=8279200, episode=1380 reward=0.7745043 (291.74 it/sec) -training >> step=8279300, episode=1381 reward=0.7971541 (113.48 it/sec) -training >> step=8279400, episode=1381 reward=0.7911158 (471.17 it/sec) -training >> step=8279500, episode=1381 reward=0.7806737 (481.93 it/sec) -training >> step=8279600, episode=1381 reward=0.799894 (425.90 it/sec) -training >> step=8279700, episode=1381 reward=0.7924678 (466.24 it/sec) -training >> step=8279800, episode=1381 reward=0.7818498 (448.04 it/sec) -training >> step=8279900, episode=1381 reward=0.7677228 (484.30 it/sec) -training >> step=8280000, episode=1381 reward=0.8103787 (494.13 it/sec) -training >> step=8280100, episode=1381 reward=0.7794283 (483.90 it/sec) -training >> step=8280200, episode=1381 reward=0.7915841 (516.87 it/sec) -training >> step=8280300, episode=1381 reward=0.7743592 (500.01 it/sec) -training >> step=8280400, episode=1381 reward=0.7795796 (519.58 it/sec) -training >> step=8280500, episode=1381 reward=0.7923595 (449.42 it/sec) -training >> step=8280600, episode=1381 reward=0.7981187 (507.56 it/sec) -training >> step=8280700, episode=1381 reward=0.7835315 (498.63 it/sec) -training >> step=8280800, episode=1381 reward=0.7949848 (526.66 it/sec) -training >> step=8280900, episode=1381 reward=0.7927161 (528.54 it/sec) -training >> step=8281000, episode=1381 reward=0.7802971 (543.99 it/sec) -training >> step=8281100, episode=1381 reward=0.8118184 (523.42 it/sec) -training >> step=8281200, episode=1381 reward=0.7835354 (510.74 it/sec) -training >> step=8281300, episode=1381 reward=0.7780289 (512.51 it/sec) -training >> step=8281400, episode=1381 reward=0.8061706 (480.72 it/sec) -training >> step=8281500, episode=1381 reward=0.7883912 (550.43 it/sec) -training >> step=8281600, episode=1381 reward=0.7866609 (506.15 it/sec) -training >> step=8281700, episode=1381 reward=0.7852561 (491.97 it/sec) -training >> step=8281800, episode=1381 reward=0.7998422 (543.10 it/sec) -training >> step=8281900, episode=1381 reward=0.7790805 (525.28 it/sec) -training >> step=8282000, episode=1381 reward=0.7742634 (525.70 it/sec) -training >> step=8282100, episode=1381 reward=0.7866719 (537.03 it/sec) -training >> step=8282200, episode=1381 reward=0.7990187 (504.82 it/sec) -training >> step=8282300, episode=1381 reward=0.7983561 (460.11 it/sec) -training >> step=8282400, episode=1381 reward=0.7893716 (509.23 it/sec) -training >> step=8282500, episode=1381 reward=0.7772403 (502.96 it/sec) -training >> step=8282600, episode=1381 reward=0.7592306 (474.01 it/sec) -training >> step=8282700, episode=1381 reward=0.8019447 (427.40 it/sec) -training >> step=8282800, episode=1381 reward=0.7783861 (438.13 it/sec) -training >> step=8282900, episode=1381 reward=0.7700252 (438.29 it/sec) -training >> step=8283000, episode=1381 reward=0.7705722 (462.16 it/sec) -training >> step=8283100, episode=1381 reward=0.7959626 (482.10 it/sec) -training >> step=8283200, episode=1381 reward=0.7976673 (457.03 it/sec) -training >> step=8283300, episode=1381 reward=0.7802555 (500.29 it/sec) -training >> step=8283400, episode=1381 reward=0.7590299 (482.93 it/sec) -training >> step=8283500, episode=1381 reward=0.8039589 (438.03 it/sec) -training >> step=8283600, episode=1381 reward=0.7777696 (504.55 it/sec) -training >> step=8283700, episode=1381 reward=0.7876192 (506.75 it/sec) -training >> step=8283800, episode=1381 reward=0.7631959 (524.26 it/sec) -training >> step=8283900, episode=1381 reward=0.7765815 (494.91 it/sec) -training >> step=8284000, episode=1381 reward=0.7763726 (540.76 it/sec) -training >> step=8284100, episode=1381 reward=0.7776142 (489.98 it/sec) -training >> step=8284200, episode=1381 reward=0.8014942 (503.50 it/sec) -training >> step=8284300, episode=1381 reward=0.777917 (538.58 it/sec) -training >> step=8284400, episode=1381 reward=0.7644899 (555.21 it/sec) -training >> step=8284500, episode=1381 reward=0.7743003 (517.10 it/sec) -training >> step=8284600, episode=1381 reward=0.7919178 (519.49 it/sec) -training >> step=8284700, episode=1381 reward=0.785714 (535.59 it/sec) -training >> step=8284800, episode=1381 reward=0.8005137 (446.68 it/sec) -training >> step=8284900, episode=1381 reward=0.7875313 (475.91 it/sec) -training >> step=8285000, episode=1381 reward=0.7851315 (528.50 it/sec) -training >> step=8285100, episode=1381 reward=0.7778386 (529.50 it/sec) -training >> step=8285200, episode=1381 reward=0.7644127 (507.46 it/sec) -training >> step=8285300, episode=1382 reward=0.7886695 (116.97 it/sec) -training >> step=8285400, episode=1382 reward=0.7850155 (514.23 it/sec) -training >> step=8285500, episode=1382 reward=0.775501 (463.50 it/sec) -training >> step=8285600, episode=1382 reward=0.79223 (463.12 it/sec) -training >> step=8285700, episode=1382 reward=0.7968306 (462.40 it/sec) -training >> step=8285800, episode=1382 reward=0.8014016 (524.13 it/sec) -training >> step=8285900, episode=1382 reward=0.7722675 (480.40 it/sec) -training >> step=8286000, episode=1382 reward=0.7756893 (481.57 it/sec) -training >> step=8286100, episode=1382 reward=0.786023 (495.89 it/sec) -training >> step=8286200, episode=1382 reward=0.7773894 (495.27 it/sec) -training >> step=8286300, episode=1382 reward=0.7704893 (509.18 it/sec) -training >> step=8286400, episode=1382 reward=0.8007413 (476.57 it/sec) -training >> step=8286500, episode=1382 reward=0.7848299 (444.40 it/sec) -training >> step=8286600, episode=1382 reward=0.8071585 (477.77 it/sec) -training >> step=8286700, episode=1382 reward=0.7989826 (492.10 it/sec) -training >> step=8286800, episode=1382 reward=0.7826142 (523.09 it/sec) -training >> step=8286900, episode=1382 reward=0.7854157 (502.27 it/sec) -training >> step=8287000, episode=1382 reward=0.779973 (492.26 it/sec) -training >> step=8287100, episode=1382 reward=0.8014323 (477.82 it/sec) -training >> step=8287200, episode=1382 reward=0.7838796 (506.11 it/sec) -training >> step=8287300, episode=1382 reward=0.7719616 (392.20 it/sec) -training >> step=8287400, episode=1382 reward=0.7899861 (487.27 it/sec) -training >> step=8287500, episode=1382 reward=0.7786109 (489.30 it/sec) -training >> step=8287600, episode=1382 reward=0.8090808 (470.25 it/sec) -training >> step=8287700, episode=1382 reward=0.7683387 (490.90 it/sec) -training >> step=8287800, episode=1382 reward=0.8037081 (483.99 it/sec) -training >> step=8287900, episode=1382 reward=0.7812532 (512.44 it/sec) -training >> step=8288000, episode=1382 reward=0.7812381 (505.23 it/sec) -training >> step=8288100, episode=1382 reward=0.7746783 (433.68 it/sec) -training >> step=8288200, episode=1382 reward=0.7981839 (463.16 it/sec) -training >> step=8288300, episode=1382 reward=0.790535 (408.75 it/sec) -training >> step=8288400, episode=1382 reward=0.7732216 (489.11 it/sec) -training >> step=8288500, episode=1382 reward=0.8053415 (418.27 it/sec) -training >> step=8288600, episode=1382 reward=0.7947991 (389.37 it/sec) -training >> step=8288700, episode=1382 reward=0.7885919 (414.55 it/sec) -training >> step=8288800, episode=1382 reward=0.796603 (466.42 it/sec) -training >> step=8288900, episode=1382 reward=0.8001944 (452.47 it/sec) -training >> step=8289000, episode=1382 reward=0.7757394 (478.46 it/sec) -training >> step=8289100, episode=1382 reward=0.7837483 (445.83 it/sec) -training >> step=8289200, episode=1382 reward=0.7609866 (472.77 it/sec) -training >> step=8289300, episode=1382 reward=0.7823075 (483.15 it/sec) -training >> step=8289400, episode=1382 reward=0.788556 (486.11 it/sec) -training >> step=8289500, episode=1382 reward=0.7791015 (509.56 it/sec) -training >> step=8289600, episode=1382 reward=0.7740316 (468.91 it/sec) -training >> step=8289700, episode=1382 reward=0.7817425 (472.13 it/sec) -training >> step=8289800, episode=1382 reward=0.796781 (495.62 it/sec) -training >> step=8289900, episode=1382 reward=0.7994702 (447.98 it/sec) -training >> step=8290000, episode=1382 reward=0.7753955 (470.51 it/sec) -training >> step=8290100, episode=1382 reward=0.7805374 (511.25 it/sec) -training >> step=8290200, episode=1382 reward=0.7923021 (439.09 it/sec) -training >> step=8290300, episode=1382 reward=0.7855377 (458.64 it/sec) -training >> step=8290400, episode=1382 reward=0.7782667 (448.15 it/sec) -training >> step=8290500, episode=1382 reward=0.8124775 (476.54 it/sec) -training >> step=8290600, episode=1382 reward=0.7798323 (418.11 it/sec) -training >> step=8290700, episode=1382 reward=0.7767963 (389.87 it/sec) -training >> step=8290800, episode=1382 reward=0.7947372 (449.44 it/sec) -training >> step=8290900, episode=1382 reward=0.772105 (436.00 it/sec) -training >> step=8291000, episode=1382 reward=0.7804617 (465.66 it/sec) -training >> step=8291100, episode=1382 reward=0.788529 (424.84 it/sec) -training >> step=8291200, episode=1382 reward=0.7891631 (326.80 it/sec) -training >> step=8291300, episode=1383 reward=0.7780837 (58.23 it/sec) -training >> step=8291400, episode=1383 reward=0.789266 (423.96 it/sec) -training >> step=8291500, episode=1383 reward=0.7709641 (451.05 it/sec) -training >> step=8291600, episode=1383 reward=0.7852743 (422.69 it/sec) -training >> step=8291700, episode=1383 reward=0.7800434 (482.57 it/sec) -training >> step=8291800, episode=1383 reward=0.7990286 (498.10 it/sec) -training >> step=8291900, episode=1383 reward=0.7884772 (497.02 it/sec) -training >> step=8292000, episode=1383 reward=0.7826787 (512.50 it/sec) -training >> step=8292100, episode=1383 reward=0.8184262 (490.23 it/sec) -training >> step=8292200, episode=1383 reward=0.7922823 (514.70 it/sec) -training >> step=8292300, episode=1383 reward=0.7929079 (501.75 it/sec) -training >> step=8292400, episode=1383 reward=0.7790329 (523.23 it/sec) -training >> step=8292500, episode=1383 reward=0.8012899 (509.06 it/sec) -training >> step=8292600, episode=1383 reward=0.7710336 (507.94 it/sec) -training >> step=8292700, episode=1383 reward=0.7929226 (489.69 it/sec) -training >> step=8292800, episode=1383 reward=0.7953629 (525.67 it/sec) -training >> step=8292900, episode=1383 reward=0.7923289 (539.53 it/sec) -training >> step=8293000, episode=1383 reward=0.7842022 (492.36 it/sec) -training >> step=8293100, episode=1383 reward=0.7696645 (487.24 it/sec) -training >> step=8293200, episode=1383 reward=0.766378 (533.26 it/sec) -training >> step=8293300, episode=1383 reward=0.813917 (478.41 it/sec) -training >> step=8293400, episode=1383 reward=0.7760332 (537.01 it/sec) -training >> step=8293500, episode=1383 reward=0.7908688 (485.84 it/sec) -training >> step=8293600, episode=1383 reward=0.7721993 (519.54 it/sec) -training >> step=8293700, episode=1383 reward=0.8049715 (510.93 it/sec) -training >> step=8293800, episode=1383 reward=0.7867365 (504.13 it/sec) -training >> step=8293900, episode=1383 reward=0.7923474 (544.57 it/sec) -training >> step=8294000, episode=1383 reward=0.7864828 (532.30 it/sec) -training >> step=8294100, episode=1383 reward=0.8024376 (498.88 it/sec) -training >> step=8294200, episode=1383 reward=0.7977877 (496.63 it/sec) -training >> step=8294300, episode=1383 reward=0.7761008 (535.63 it/sec) -training >> step=8294400, episode=1383 reward=0.7864082 (514.86 it/sec) -training >> step=8294500, episode=1383 reward=0.796463 (534.19 it/sec) -training >> step=8294600, episode=1383 reward=0.7949006 (510.44 it/sec) -training >> step=8294700, episode=1383 reward=0.7959498 (513.78 it/sec) -training >> step=8294800, episode=1383 reward=0.7922983 (527.77 it/sec) -training >> step=8294900, episode=1383 reward=0.7814414 (479.06 it/sec) -training >> step=8295000, episode=1383 reward=0.7854375 (556.41 it/sec) -training >> step=8295100, episode=1383 reward=0.7929228 (511.25 it/sec) -training >> step=8295200, episode=1383 reward=0.7931722 (507.77 it/sec) -training >> step=8295300, episode=1383 reward=0.7707814 (483.49 it/sec) -training >> step=8295400, episode=1383 reward=0.793534 (539.19 it/sec) -training >> step=8295500, episode=1383 reward=0.7893324 (498.82 it/sec) -training >> step=8295600, episode=1383 reward=0.7915322 (488.85 it/sec) -training >> step=8295700, episode=1383 reward=0.7600022 (500.56 it/sec) -training >> step=8295800, episode=1383 reward=0.7667465 (549.51 it/sec) -training >> step=8295900, episode=1383 reward=0.7872227 (518.84 it/sec) -training >> step=8296000, episode=1383 reward=0.7743526 (535.29 it/sec) -training >> step=8296100, episode=1383 reward=0.7778152 (544.45 it/sec) -training >> step=8296200, episode=1383 reward=0.7756397 (437.96 it/sec) -training >> step=8296300, episode=1383 reward=0.7952393 (478.21 it/sec) -training >> step=8296400, episode=1383 reward=0.7924901 (459.57 it/sec) -training >> step=8296500, episode=1383 reward=0.7796808 (556.05 it/sec) -training >> step=8296600, episode=1383 reward=0.7805609 (544.45 it/sec) -training >> step=8296700, episode=1383 reward=0.7687167 (488.38 it/sec) -training >> step=8296800, episode=1383 reward=0.7515571 (494.02 it/sec) -training >> step=8296900, episode=1383 reward=0.7808009 (531.55 it/sec) -training >> step=8297000, episode=1383 reward=0.7671233 (529.42 it/sec) -training >> step=8297100, episode=1383 reward=0.7887512 (529.87 it/sec) -training >> step=8297200, episode=1383 reward=0.7892506 (515.19 it/sec) -training >> step=8297300, episode=1384 reward=0.7966949 (121.61 it/sec) -training >> step=8297400, episode=1384 reward=0.7930767 (499.27 it/sec) -training >> step=8297500, episode=1384 reward=0.7763443 (484.79 it/sec) -training >> step=8297600, episode=1384 reward=0.7942996 (468.63 it/sec) -training >> step=8297700, episode=1384 reward=0.8046098 (472.26 it/sec) -training >> step=8297800, episode=1384 reward=0.7896177 (460.21 it/sec) -training >> step=8297900, episode=1384 reward=0.7915702 (437.70 it/sec) -training >> step=8298000, episode=1384 reward=0.792493 (454.13 it/sec) -training >> step=8298100, episode=1384 reward=0.7924957 (452.07 it/sec) -training >> step=8298200, episode=1384 reward=0.7749143 (475.66 it/sec) -training >> step=8298300, episode=1384 reward=0.7934425 (498.72 it/sec) -training >> step=8298400, episode=1384 reward=0.798083 (467.14 it/sec) -training >> step=8298500, episode=1384 reward=0.7950898 (425.90 it/sec) -training >> step=8298600, episode=1384 reward=0.7897959 (473.25 it/sec) -training >> step=8298700, episode=1384 reward=0.7867926 (478.81 it/sec) -training >> step=8298800, episode=1384 reward=0.7840202 (458.25 it/sec) -training >> step=8298900, episode=1384 reward=0.7918524 (477.53 it/sec) -training >> step=8299000, episode=1384 reward=0.7620752 (476.51 it/sec) -training >> step=8299100, episode=1384 reward=0.7807858 (444.15 it/sec) -training >> step=8299200, episode=1384 reward=0.8133939 (459.65 it/sec) -training >> step=8299300, episode=1384 reward=0.8021528 (436.57 it/sec) -training >> step=8299400, episode=1384 reward=0.7911852 (485.06 it/sec) -training >> step=8299500, episode=1384 reward=0.792204 (449.88 it/sec) -training >> step=8299600, episode=1384 reward=0.8058026 (470.53 it/sec) -training >> step=8299700, episode=1384 reward=0.7873642 (478.89 it/sec) -training >> step=8299800, episode=1384 reward=0.7974271 (457.48 it/sec) -training >> step=8299900, episode=1384 reward=0.7684824 (459.45 it/sec) -training >> step=8300000, episode=1384 reward=0.8090538 (470.90 it/sec) -training >> step=8300100, episode=1384 reward=0.7694827 (470.87 it/sec) -training >> step=8300200, episode=1384 reward=0.7813923 (483.98 it/sec) -training >> step=8300300, episode=1384 reward=0.7687629 (439.33 it/sec) -training >> step=8300400, episode=1384 reward=0.785054 (464.27 it/sec) -training >> step=8300500, episode=1384 reward=0.7724971 (475.67 it/sec) -training >> step=8300600, episode=1384 reward=0.789946 (449.21 it/sec) -training >> step=8300700, episode=1384 reward=0.7853745 (412.11 it/sec) -training >> step=8300800, episode=1384 reward=0.7791172 (444.46 it/sec) -training >> step=8300900, episode=1384 reward=0.7755364 (421.56 it/sec) -training >> step=8301000, episode=1384 reward=0.7825537 (480.96 it/sec) -training >> step=8301100, episode=1384 reward=0.7996065 (519.86 it/sec) -training >> step=8301200, episode=1384 reward=0.7773727 (499.54 it/sec) -training >> step=8301300, episode=1384 reward=0.7999117 (471.02 it/sec) -training >> step=8301400, episode=1384 reward=0.7863635 (461.68 it/sec) -training >> step=8301500, episode=1384 reward=0.7594483 (456.90 it/sec) -training >> step=8301600, episode=1384 reward=0.7685018 (469.80 it/sec) -training >> step=8301700, episode=1384 reward=0.7799476 (456.87 it/sec) -training >> step=8301800, episode=1384 reward=0.7941658 (447.38 it/sec) -training >> step=8301900, episode=1384 reward=0.8050407 (413.66 it/sec) -training >> step=8302000, episode=1384 reward=0.7905737 (448.57 it/sec) -training >> step=8302100, episode=1384 reward=0.8019606 (442.90 it/sec) -training >> step=8302200, episode=1384 reward=0.7747895 (460.78 it/sec) -training >> step=8302300, episode=1384 reward=0.7821037 (422.28 it/sec) -training >> step=8302400, episode=1384 reward=0.7921624 (472.03 it/sec) -training >> step=8302500, episode=1384 reward=0.7866942 (500.90 it/sec) -training >> step=8302600, episode=1384 reward=0.7908678 (422.03 it/sec) -training >> step=8302700, episode=1384 reward=0.7633508 (491.25 it/sec) -training >> step=8302800, episode=1384 reward=0.7774763 (504.07 it/sec) -training >> step=8302900, episode=1384 reward=0.7842984 (496.37 it/sec) -training >> step=8303000, episode=1384 reward=0.7610206 (541.81 it/sec) -training >> step=8303100, episode=1384 reward=0.7808802 (498.07 it/sec) -training >> step=8303200, episode=1384 reward=0.7789562 (464.46 it/sec) -training >> step=8303300, episode=1385 reward=0.7800752 (94.16 it/sec) -training >> step=8303400, episode=1385 reward=0.7840562 (491.78 it/sec) -training >> step=8303500, episode=1385 reward=0.7851566 (488.25 it/sec) -training >> step=8303600, episode=1385 reward=0.7682433 (515.62 it/sec) -training >> step=8303700, episode=1385 reward=0.8002139 (521.18 it/sec) -training >> step=8303800, episode=1385 reward=0.7860304 (468.77 it/sec) -training >> step=8303900, episode=1385 reward=0.7968769 (469.15 it/sec) -training >> step=8304000, episode=1385 reward=0.7836422 (501.30 it/sec) -training >> step=8304100, episode=1385 reward=0.8003945 (510.32 it/sec) -training >> step=8304200, episode=1385 reward=0.7886313 (490.82 it/sec) -training >> step=8304300, episode=1385 reward=0.7895952 (467.33 it/sec) -training >> step=8304400, episode=1385 reward=0.7903056 (500.52 it/sec) -training >> step=8304500, episode=1385 reward=0.7661649 (476.96 it/sec) -training >> step=8304600, episode=1385 reward=0.7904037 (480.46 it/sec) -training >> step=8304700, episode=1385 reward=0.7833251 (445.52 it/sec) -training >> step=8304800, episode=1385 reward=0.8002974 (456.14 it/sec) -training >> step=8304900, episode=1385 reward=0.7844039 (409.99 it/sec) -training >> step=8305000, episode=1385 reward=0.7570854 (465.21 it/sec) -training >> step=8305100, episode=1385 reward=0.7827026 (452.48 it/sec) -training >> step=8305200, episode=1385 reward=0.7980588 (406.60 it/sec) -training >> step=8305300, episode=1385 reward=0.7767914 (418.62 it/sec) -training >> step=8305400, episode=1385 reward=0.7798961 (404.06 it/sec) -training >> step=8305500, episode=1385 reward=0.7984588 (444.47 it/sec) -training >> step=8305600, episode=1385 reward=0.7993003 (395.03 it/sec) -training >> step=8305700, episode=1385 reward=0.7932615 (452.16 it/sec) -training >> step=8305800, episode=1385 reward=0.8021348 (506.06 it/sec) -training >> step=8305900, episode=1385 reward=0.7765673 (520.65 it/sec) -training >> step=8306000, episode=1385 reward=0.7969507 (524.51 it/sec) -training >> step=8306100, episode=1385 reward=0.7921901 (480.45 it/sec) -training >> step=8306200, episode=1385 reward=0.7825357 (535.97 it/sec) -training >> step=8306300, episode=1385 reward=0.7905532 (472.00 it/sec) -training >> step=8306400, episode=1385 reward=0.8007923 (521.24 it/sec) -training >> step=8306500, episode=1385 reward=0.7888616 (533.27 it/sec) -training >> step=8306600, episode=1385 reward=0.7800081 (467.56 it/sec) -training >> step=8306700, episode=1385 reward=0.7914822 (469.80 it/sec) -training >> step=8306800, episode=1385 reward=0.789855 (483.59 it/sec) -training >> step=8306900, episode=1385 reward=0.784105 (485.04 it/sec) -training >> step=8307000, episode=1385 reward=0.7945256 (483.64 it/sec) -training >> step=8307100, episode=1385 reward=0.8188436 (491.77 it/sec) -training >> step=8307200, episode=1385 reward=0.7911176 (474.79 it/sec) -training >> step=8307300, episode=1385 reward=0.7889255 (468.99 it/sec) -training >> step=8307400, episode=1385 reward=0.779919 (523.17 it/sec) -training >> step=8307500, episode=1385 reward=0.7795319 (511.11 it/sec) -training >> step=8307600, episode=1385 reward=0.7748464 (525.04 it/sec) -training >> step=8307700, episode=1385 reward=0.784838 (508.00 it/sec) -training >> step=8307800, episode=1385 reward=0.8047727 (516.51 it/sec) -training >> step=8307900, episode=1385 reward=0.7765043 (516.30 it/sec) -training >> step=8308000, episode=1385 reward=0.7999185 (546.69 it/sec) -training >> step=8308100, episode=1385 reward=0.776698 (514.48 it/sec) -training >> step=8308200, episode=1385 reward=0.7735143 (489.63 it/sec) -training >> step=8308300, episode=1385 reward=0.7695003 (475.53 it/sec) -training >> step=8308400, episode=1385 reward=0.7694057 (525.90 it/sec) -training >> step=8308500, episode=1385 reward=0.7975085 (516.03 it/sec) -training >> step=8308600, episode=1385 reward=0.7995477 (500.97 it/sec) -training >> step=8308700, episode=1385 reward=0.7734205 (500.49 it/sec) -training >> step=8308800, episode=1385 reward=0.7960685 (497.73 it/sec) -training >> step=8308900, episode=1385 reward=0.7808186 (494.09 it/sec) -training >> step=8309000, episode=1385 reward=0.7862888 (507.32 it/sec) -training >> step=8309100, episode=1385 reward=0.789121 (506.26 it/sec) -training >> step=8309200, episode=1385 reward=0.7771322 (488.71 it/sec) -training >> step=8309300, episode=1386 reward=0.7741873 (88.85 it/sec) -training >> step=8309400, episode=1386 reward=0.7769849 (412.14 it/sec) -training >> step=8309500, episode=1386 reward=0.7713359 (458.21 it/sec) -training >> step=8309600, episode=1386 reward=0.7842034 (468.28 it/sec) -training >> step=8309700, episode=1386 reward=0.7825703 (456.14 it/sec) -training >> step=8309800, episode=1386 reward=0.7882373 (487.03 it/sec) -training >> step=8309900, episode=1386 reward=0.7923487 (439.16 it/sec) -training >> step=8310000, episode=1386 reward=0.7920125 (459.01 it/sec) -training >> step=8310100, episode=1386 reward=0.8096865 (462.80 it/sec) -training >> step=8310200, episode=1386 reward=0.7764462 (434.16 it/sec) -training >> step=8310300, episode=1386 reward=0.7889594 (441.94 it/sec) -training >> step=8310400, episode=1386 reward=0.7744879 (472.53 it/sec) -training >> step=8310500, episode=1386 reward=0.7828763 (455.63 it/sec) -training >> step=8310600, episode=1386 reward=0.8011771 (479.06 it/sec) -training >> step=8310700, episode=1386 reward=0.7924898 (476.34 it/sec) -training >> step=8310800, episode=1386 reward=0.791467 (462.25 it/sec) -training >> step=8310900, episode=1386 reward=0.7772458 (443.52 it/sec) -training >> step=8311000, episode=1386 reward=0.778327 (465.73 it/sec) -training >> step=8311100, episode=1386 reward=0.7958208 (456.73 it/sec) -training >> step=8311200, episode=1386 reward=0.7934021 (479.97 it/sec) -training >> step=8311300, episode=1386 reward=0.7736415 (468.34 it/sec) -training >> step=8311400, episode=1386 reward=0.7748956 (451.72 it/sec) -training >> step=8311500, episode=1386 reward=0.7649705 (472.25 it/sec) -training >> step=8311600, episode=1386 reward=0.797745 (482.63 it/sec) -training >> step=8311700, episode=1386 reward=0.7700331 (437.99 it/sec) -training >> step=8311800, episode=1386 reward=0.7695781 (475.57 it/sec) -training >> step=8311900, episode=1386 reward=0.8016148 (466.01 it/sec) -training >> step=8312000, episode=1386 reward=0.7738096 (454.57 it/sec) -training >> step=8312100, episode=1386 reward=0.7604869 (451.53 it/sec) -training >> step=8312200, episode=1386 reward=0.7847139 (416.54 it/sec) -training >> step=8312300, episode=1386 reward=0.8012585 (451.17 it/sec) -training >> step=8312400, episode=1386 reward=0.7916682 (387.63 it/sec) -training >> step=8312500, episode=1386 reward=0.7817008 (445.43 it/sec) -training >> step=8312600, episode=1386 reward=0.7907218 (420.72 it/sec) -training >> step=8312700, episode=1386 reward=0.797689 (384.50 it/sec) -training >> step=8312800, episode=1386 reward=0.7973033 (500.77 it/sec) -training >> step=8312900, episode=1386 reward=0.7792984 (422.33 it/sec) -training >> step=8313000, episode=1386 reward=0.7922236 (432.73 it/sec) -training >> step=8313100, episode=1386 reward=0.7948631 (447.36 it/sec) -training >> step=8313200, episode=1386 reward=0.7762573 (420.80 it/sec) -training >> step=8313300, episode=1386 reward=0.7835311 (454.27 it/sec) -training >> step=8313400, episode=1386 reward=0.7936852 (518.21 it/sec) -training >> step=8313500, episode=1386 reward=0.7939156 (459.47 it/sec) -training >> step=8313600, episode=1386 reward=0.7759954 (450.20 it/sec) -training >> step=8313700, episode=1386 reward=0.7750621 (452.75 it/sec) -training >> step=8313800, episode=1386 reward=0.7979401 (470.66 it/sec) -training >> step=8313900, episode=1386 reward=0.7674605 (421.66 it/sec) -training >> step=8314000, episode=1386 reward=0.7818797 (444.19 it/sec) -training >> step=8314100, episode=1386 reward=0.7882389 (497.51 it/sec) -training >> step=8314200, episode=1386 reward=0.7731314 (544.07 it/sec) -training >> step=8314300, episode=1386 reward=0.7985511 (527.83 it/sec) -training >> step=8314400, episode=1386 reward=0.7825797 (536.47 it/sec) -training >> step=8314500, episode=1386 reward=0.7880229 (428.75 it/sec) -training >> step=8314600, episode=1386 reward=0.7667559 (445.35 it/sec) -training >> step=8314700, episode=1386 reward=0.7753941 (520.38 it/sec) -training >> step=8314800, episode=1386 reward=0.7793268 (535.30 it/sec) -training >> step=8314900, episode=1386 reward=0.7739365 (516.88 it/sec) -training >> step=8315000, episode=1386 reward=0.7953098 (490.05 it/sec) -training >> step=8315100, episode=1386 reward=0.7942833 (509.15 it/sec) -training >> step=8315200, episode=1386 reward=0.7738901 (540.25 it/sec) -training >> step=8315300, episode=1387 reward=0.7707442 (105.09 it/sec) -training >> step=8315400, episode=1387 reward=0.8010108 (426.03 it/sec) -training >> step=8315500, episode=1387 reward=0.7677444 (503.07 it/sec) -training >> step=8315600, episode=1387 reward=0.7957373 (493.33 it/sec) -training >> step=8315700, episode=1387 reward=0.7915241 (483.93 it/sec) -training >> step=8315800, episode=1387 reward=0.7882607 (504.37 it/sec) -training >> step=8315900, episode=1387 reward=0.8110368 (470.32 it/sec) -training >> step=8316000, episode=1387 reward=0.7986143 (465.93 it/sec) -training >> step=8316100, episode=1387 reward=0.7703242 (440.79 it/sec) -training >> step=8316200, episode=1387 reward=0.7966353 (451.46 it/sec) -training >> step=8316300, episode=1387 reward=0.7970549 (491.76 it/sec) -training >> step=8316400, episode=1387 reward=0.7853621 (494.92 it/sec) -training >> step=8316500, episode=1387 reward=0.7905021 (459.34 it/sec) -training >> step=8316600, episode=1387 reward=0.7956206 (430.30 it/sec) -training >> step=8316700, episode=1387 reward=0.7907056 (431.79 it/sec) -training >> step=8316800, episode=1387 reward=0.790804 (480.93 it/sec) -training >> step=8316900, episode=1387 reward=0.7983705 (456.18 it/sec) -training >> step=8317000, episode=1387 reward=0.7974017 (469.32 it/sec) -training >> step=8317100, episode=1387 reward=0.7897748 (505.78 it/sec) -training >> step=8317200, episode=1387 reward=0.7859381 (525.85 it/sec) -training >> step=8317300, episode=1387 reward=0.7964055 (510.33 it/sec) -training >> step=8317400, episode=1387 reward=0.7997531 (527.24 it/sec) -training >> step=8317500, episode=1387 reward=0.7789078 (463.89 it/sec) -training >> step=8317600, episode=1387 reward=0.779439 (469.91 it/sec) -training >> step=8317700, episode=1387 reward=0.8003558 (512.65 it/sec) -training >> step=8317800, episode=1387 reward=0.784754 (520.66 it/sec) -training >> step=8317900, episode=1387 reward=0.7897558 (494.01 it/sec) -training >> step=8318000, episode=1387 reward=0.7809131 (469.64 it/sec) -training >> step=8318100, episode=1387 reward=0.7983851 (466.32 it/sec) -training >> step=8318200, episode=1387 reward=0.7761728 (511.58 it/sec) -training >> step=8318300, episode=1387 reward=0.7927958 (488.57 it/sec) -training >> step=8318400, episode=1387 reward=0.7824611 (456.24 it/sec) -training >> step=8318500, episode=1387 reward=0.7740976 (464.52 it/sec) -training >> step=8318600, episode=1387 reward=0.7914996 (437.05 it/sec) -training >> step=8318700, episode=1387 reward=0.7753859 (409.66 it/sec) -training >> step=8318800, episode=1387 reward=0.795234 (375.45 it/sec) -training >> step=8318900, episode=1387 reward=0.7829686 (433.14 it/sec) -training >> step=8319000, episode=1387 reward=0.7901439 (408.80 it/sec) -training >> step=8319100, episode=1387 reward=0.7739163 (444.25 it/sec) -training >> step=8319200, episode=1387 reward=0.7861192 (470.27 it/sec) -training >> step=8319300, episode=1387 reward=0.7757436 (430.55 it/sec) -training >> step=8319400, episode=1387 reward=0.7748645 (384.48 it/sec) -training >> step=8319500, episode=1387 reward=0.7825282 (414.68 it/sec) -training >> step=8319600, episode=1387 reward=0.7881013 (445.33 it/sec) -training >> step=8319700, episode=1387 reward=0.7941809 (407.02 it/sec) -training >> step=8319800, episode=1387 reward=0.7834346 (450.25 it/sec) -training >> step=8319900, episode=1387 reward=0.7870417 (473.77 it/sec) -training >> step=8320000, episode=1387 reward=0.7883531 (509.98 it/sec) -training >> step=8320100, episode=1387 reward=0.7965524 (507.92 it/sec) -training >> step=8320200, episode=1387 reward=0.7869266 (506.65 it/sec) -training >> step=8320300, episode=1387 reward=0.7641266 (538.90 it/sec) -training >> step=8320400, episode=1387 reward=0.7738154 (488.88 it/sec) -training >> step=8320500, episode=1387 reward=0.8006542 (480.12 it/sec) -training >> step=8320600, episode=1387 reward=0.7885259 (522.23 it/sec) -training >> step=8320700, episode=1387 reward=0.7734842 (526.46 it/sec) -training >> step=8320800, episode=1387 reward=0.8059143 (524.95 it/sec) -training >> step=8320900, episode=1387 reward=0.7793 (489.93 it/sec) -training >> step=8321000, episode=1387 reward=0.8014688 (496.49 it/sec) -training >> step=8321100, episode=1387 reward=0.7852319 (491.04 it/sec) -training >> step=8321200, episode=1387 reward=0.7746388 (509.86 it/sec) -training >> step=8321300, episode=1388 reward=0.812991 (97.52 it/sec) -training >> step=8321400, episode=1388 reward=0.8053304 (517.51 it/sec) -training >> step=8321500, episode=1388 reward=0.7876161 (497.86 it/sec) -training >> step=8321600, episode=1388 reward=0.7821381 (464.06 it/sec) -training >> step=8321700, episode=1388 reward=0.7929041 (513.31 it/sec) -training >> step=8321800, episode=1388 reward=0.7894729 (509.82 it/sec) -training >> step=8321900, episode=1388 reward=0.8019045 (500.23 it/sec) -training >> step=8322000, episode=1388 reward=0.7865235 (497.25 it/sec) -training >> step=8322100, episode=1388 reward=0.7965841 (502.76 it/sec) -training >> step=8322200, episode=1388 reward=0.7828631 (446.38 it/sec) -training >> step=8322300, episode=1388 reward=0.7826182 (470.22 it/sec) -training >> step=8322400, episode=1388 reward=0.7795815 (488.06 it/sec) -training >> step=8322500, episode=1388 reward=0.7755747 (437.29 it/sec) -training >> step=8322600, episode=1388 reward=0.7889354 (424.89 it/sec) -training >> step=8322700, episode=1388 reward=0.7924808 (467.41 it/sec) -training >> step=8322800, episode=1388 reward=0.7920116 (486.82 it/sec) -training >> step=8322900, episode=1388 reward=0.7845323 (464.38 it/sec) -training >> step=8323000, episode=1388 reward=0.7848359 (489.24 it/sec) -training >> step=8323100, episode=1388 reward=0.7849244 (462.45 it/sec) -training >> step=8323200, episode=1388 reward=0.7684569 (528.17 it/sec) -training >> step=8323300, episode=1388 reward=0.7846124 (480.80 it/sec) -training >> step=8323400, episode=1388 reward=0.7947945 (529.12 it/sec) -training >> step=8323500, episode=1388 reward=0.8046981 (478.45 it/sec) -training >> step=8323600, episode=1388 reward=0.790222 (456.10 it/sec) -training >> step=8323700, episode=1388 reward=0.7892078 (492.78 it/sec) -training >> step=8323800, episode=1388 reward=0.7800914 (467.97 it/sec) -training >> step=8323900, episode=1388 reward=0.7896413 (473.70 it/sec) -training >> step=8324000, episode=1388 reward=0.7912475 (486.15 it/sec) -training >> step=8324100, episode=1388 reward=0.7706538 (468.94 it/sec) -training >> step=8324200, episode=1388 reward=0.7855492 (488.75 it/sec) -training >> step=8324300, episode=1388 reward=0.7910309 (490.20 it/sec) -training >> step=8324400, episode=1388 reward=0.7972003 (472.63 it/sec) -training >> step=8324500, episode=1388 reward=0.7688227 (452.28 it/sec) -training >> step=8324600, episode=1388 reward=0.7951821 (459.19 it/sec) -training >> step=8324700, episode=1388 reward=0.7997588 (505.56 it/sec) -training >> step=8324800, episode=1388 reward=0.7652954 (468.54 it/sec) -training >> step=8324900, episode=1388 reward=0.7772327 (489.22 it/sec) -training >> step=8325000, episode=1388 reward=0.7933554 (487.64 it/sec) -training >> step=8325100, episode=1388 reward=0.809153 (465.65 it/sec) -training >> step=8325200, episode=1388 reward=0.7910993 (483.00 it/sec) -training >> step=8325300, episode=1388 reward=0.7949827 (490.66 it/sec) -training >> step=8325400, episode=1388 reward=0.7740262 (458.36 it/sec) -training >> step=8325500, episode=1388 reward=0.7803588 (504.50 it/sec) -training >> step=8325600, episode=1388 reward=0.7883894 (459.63 it/sec) -training >> step=8325700, episode=1388 reward=0.806908 (510.37 it/sec) -training >> step=8325800, episode=1388 reward=0.7919946 (511.37 it/sec) -training >> step=8325900, episode=1388 reward=0.7905165 (451.55 it/sec) -training >> step=8326000, episode=1388 reward=0.781743 (447.25 it/sec) -training >> step=8326100, episode=1388 reward=0.7693113 (458.08 it/sec) -training >> step=8326200, episode=1388 reward=0.7818795 (512.62 it/sec) -training >> step=8326300, episode=1388 reward=0.7822351 (481.39 it/sec) -training >> step=8326400, episode=1388 reward=0.7614467 (509.55 it/sec) -training >> step=8326500, episode=1388 reward=0.7841294 (453.89 it/sec) -training >> step=8326600, episode=1388 reward=0.7935771 (500.46 it/sec) -training >> step=8326700, episode=1388 reward=0.7605892 (526.07 it/sec) -training >> step=8326800, episode=1388 reward=0.7847115 (462.85 it/sec) -training >> step=8326900, episode=1388 reward=0.7885523 (485.80 it/sec) -training >> step=8327000, episode=1388 reward=0.7777061 (492.76 it/sec) -training >> step=8327100, episode=1388 reward=0.7792144 (512.03 it/sec) -training >> step=8327200, episode=1388 reward=0.7869654 (486.24 it/sec) -training >> step=8327300, episode=1389 reward=0.7606708 (87.13 it/sec) -training >> step=8327400, episode=1389 reward=0.7970136 (508.21 it/sec) -training >> step=8327500, episode=1389 reward=0.7954732 (475.54 it/sec) -training >> step=8327600, episode=1389 reward=0.7943689 (435.73 it/sec) -training >> step=8327700, episode=1389 reward=0.7678609 (488.44 it/sec) -training >> step=8327800, episode=1389 reward=0.7794669 (499.39 it/sec) -training >> step=8327900, episode=1389 reward=0.7836397 (489.08 it/sec) -training >> step=8328000, episode=1389 reward=0.777514 (517.31 it/sec) -training >> step=8328100, episode=1389 reward=0.794226 (496.44 it/sec) -training >> step=8328200, episode=1389 reward=0.785254 (508.05 it/sec) -training >> step=8328300, episode=1389 reward=0.797621 (517.98 it/sec) -training >> step=8328400, episode=1389 reward=0.7872967 (452.81 it/sec) -training >> step=8328500, episode=1389 reward=0.7821624 (521.51 it/sec) -training >> step=8328600, episode=1389 reward=0.768158 (487.40 it/sec) -training >> step=8328700, episode=1389 reward=0.7977505 (482.57 it/sec) -training >> step=8328800, episode=1389 reward=0.7783926 (526.17 it/sec) -training >> step=8328900, episode=1389 reward=0.8015828 (524.67 it/sec) -training >> step=8329000, episode=1389 reward=0.7696409 (498.61 it/sec) -training >> step=8329100, episode=1389 reward=0.7890512 (484.60 it/sec) -training >> step=8329200, episode=1389 reward=0.773415 (449.77 it/sec) -training >> step=8329300, episode=1389 reward=0.7928299 (470.89 it/sec) -training >> step=8329400, episode=1389 reward=0.7855862 (499.46 it/sec) -training >> step=8329500, episode=1389 reward=0.7794226 (475.39 it/sec) -training >> step=8329600, episode=1389 reward=0.7758169 (507.00 it/sec) -training >> step=8329700, episode=1389 reward=0.7807577 (461.21 it/sec) -training >> step=8329800, episode=1389 reward=0.8005217 (479.77 it/sec) -training >> step=8329900, episode=1389 reward=0.7826197 (475.86 it/sec) -training >> step=8330000, episode=1389 reward=0.7736545 (489.57 it/sec) -training >> step=8330100, episode=1389 reward=0.7823954 (480.81 it/sec) -training >> step=8330200, episode=1389 reward=0.7967571 (471.39 it/sec) -training >> step=8330300, episode=1389 reward=0.7926109 (514.34 it/sec) -training >> step=8330400, episode=1389 reward=0.7828435 (488.22 it/sec) -training >> step=8330500, episode=1389 reward=0.7700049 (475.09 it/sec) -training >> step=8330600, episode=1389 reward=0.8010995 (484.99 it/sec) -training >> step=8330700, episode=1389 reward=0.7671098 (485.17 it/sec) -training >> step=8330800, episode=1389 reward=0.8045325 (459.60 it/sec) -training >> step=8330900, episode=1389 reward=0.7882976 (508.94 it/sec) -training >> step=8331000, episode=1389 reward=0.8112257 (483.11 it/sec) -training >> step=8331100, episode=1389 reward=0.8093939 (501.78 it/sec) -training >> step=8331200, episode=1389 reward=0.7842489 (472.04 it/sec) -training >> step=8331300, episode=1389 reward=0.7704788 (481.91 it/sec) -training >> step=8331400, episode=1389 reward=0.7837338 (442.55 it/sec) -training >> step=8331500, episode=1389 reward=0.7874625 (474.70 it/sec) -training >> step=8331600, episode=1389 reward=0.765865 (452.99 it/sec) -training >> step=8331700, episode=1389 reward=0.7874351 (502.44 it/sec) -training >> step=8331800, episode=1389 reward=0.7842284 (491.60 it/sec) -training >> step=8331900, episode=1389 reward=0.7720745 (485.19 it/sec) -training >> step=8332000, episode=1389 reward=0.7688538 (506.02 it/sec) -training >> step=8332100, episode=1389 reward=0.7913008 (472.94 it/sec) -training >> step=8332200, episode=1389 reward=0.7598601 (486.56 it/sec) -training >> step=8332300, episode=1389 reward=0.7775292 (478.90 it/sec) -training >> step=8332400, episode=1389 reward=0.7727413 (505.54 it/sec) -training >> step=8332500, episode=1389 reward=0.773688 (482.48 it/sec) -training >> step=8332600, episode=1389 reward=0.7779374 (450.06 it/sec) -training >> step=8332700, episode=1389 reward=0.7766555 (455.08 it/sec) -training >> step=8332800, episode=1389 reward=0.7871765 (451.28 it/sec) -training >> step=8332900, episode=1389 reward=0.7855802 (496.16 it/sec) -training >> step=8333000, episode=1389 reward=0.7855421 (472.98 it/sec) -training >> step=8333100, episode=1389 reward=0.7897971 (474.11 it/sec) -training >> step=8333200, episode=1389 reward=0.7867679 (503.02 it/sec) -training >> step=8333300, episode=1390 reward=0.7702966 (90.94 it/sec) -training >> step=8333400, episode=1390 reward=0.7958772 (464.76 it/sec) -training >> step=8333500, episode=1390 reward=0.7798511 (474.55 it/sec) -training >> step=8333600, episode=1390 reward=0.7898687 (511.07 it/sec) -training >> step=8333700, episode=1390 reward=0.8073863 (465.38 it/sec) -training >> step=8333800, episode=1390 reward=0.7959208 (475.50 it/sec) -training >> step=8333900, episode=1390 reward=0.7867444 (466.49 it/sec) -training >> step=8334000, episode=1390 reward=0.7801707 (522.02 it/sec) -training >> step=8334100, episode=1390 reward=0.8104565 (502.19 it/sec) -training >> step=8334200, episode=1390 reward=0.7922679 (496.55 it/sec) -training >> step=8334300, episode=1390 reward=0.8019521 (470.86 it/sec) -training >> step=8334400, episode=1390 reward=0.8050325 (500.18 it/sec) -training >> step=8334500, episode=1390 reward=0.7808453 (472.06 it/sec) -training >> step=8334600, episode=1390 reward=0.8064647 (489.46 it/sec) -training >> step=8334700, episode=1390 reward=0.7934154 (449.21 it/sec) -training >> step=8334800, episode=1390 reward=0.8006259 (485.74 it/sec) -training >> step=8334900, episode=1390 reward=0.8016414 (443.92 it/sec) -training >> step=8335000, episode=1390 reward=0.7961531 (481.88 it/sec) -training >> step=8335100, episode=1390 reward=0.7971336 (493.23 it/sec) -training >> step=8335200, episode=1390 reward=0.7935094 (473.36 it/sec) -training >> step=8335300, episode=1390 reward=0.7933731 (481.81 it/sec) -training >> step=8335400, episode=1390 reward=0.7987692 (479.70 it/sec) -training >> step=8335500, episode=1390 reward=0.7869577 (445.95 it/sec) -training >> step=8335600, episode=1390 reward=0.7889788 (510.95 it/sec) -training >> step=8335700, episode=1390 reward=0.7775033 (488.47 it/sec) -training >> step=8335800, episode=1390 reward=0.7798252 (505.13 it/sec) -training >> step=8335900, episode=1390 reward=0.787553 (518.20 it/sec) -training >> step=8336000, episode=1390 reward=0.7743913 (490.08 it/sec) -training >> step=8336100, episode=1390 reward=0.8058714 (497.41 it/sec) -training >> step=8336200, episode=1390 reward=0.7872942 (489.45 it/sec) -training >> step=8336300, episode=1390 reward=0.7909492 (512.44 it/sec) -training >> step=8336400, episode=1390 reward=0.7809442 (514.61 it/sec) -training >> step=8336500, episode=1390 reward=0.7783519 (465.25 it/sec) -training >> step=8336600, episode=1390 reward=0.7842292 (439.84 it/sec) -training >> step=8336700, episode=1390 reward=0.771077 (543.71 it/sec) -training >> step=8336800, episode=1390 reward=0.771098 (503.14 it/sec) -training >> step=8336900, episode=1390 reward=0.7616584 (484.87 it/sec) -training >> step=8337000, episode=1390 reward=0.774671 (466.94 it/sec) -training >> step=8337100, episode=1390 reward=0.7744542 (522.95 it/sec) -training >> step=8337200, episode=1390 reward=0.7865006 (493.52 it/sec) -training >> step=8337300, episode=1390 reward=0.7964092 (474.01 it/sec) -training >> step=8337400, episode=1390 reward=0.7858651 (500.50 it/sec) -training >> step=8337500, episode=1390 reward=0.7886569 (498.11 it/sec) -training >> step=8337600, episode=1390 reward=0.7941857 (484.60 it/sec) -training >> step=8337700, episode=1390 reward=0.7818959 (484.89 it/sec) -training >> step=8337800, episode=1390 reward=0.8087952 (505.27 it/sec) -training >> step=8337900, episode=1390 reward=0.7701513 (507.27 it/sec) -training >> step=8338000, episode=1390 reward=0.7874784 (507.05 it/sec) -training >> step=8338100, episode=1390 reward=0.7641092 (492.50 it/sec) -training >> step=8338200, episode=1390 reward=0.784731 (482.16 it/sec) -training >> step=8338300, episode=1390 reward=0.7724015 (491.39 it/sec) -training >> step=8338400, episode=1390 reward=0.7673186 (478.80 it/sec) -training >> step=8338500, episode=1390 reward=0.7781981 (477.49 it/sec) -training >> step=8338600, episode=1390 reward=0.7794949 (480.48 it/sec) -training >> step=8338700, episode=1390 reward=0.7783247 (504.74 it/sec) -training >> step=8338800, episode=1390 reward=0.788966 (494.25 it/sec) -training >> step=8338900, episode=1390 reward=0.7910197 (499.87 it/sec) -training >> step=8339000, episode=1390 reward=0.768221 (481.74 it/sec) -training >> step=8339100, episode=1390 reward=0.7839758 (463.36 it/sec) -training >> step=8339200, episode=1390 reward=0.7701601 (521.46 it/sec) -training >> step=8339300, episode=1391 reward=0.7840197 (92.74 it/sec) -training >> step=8339400, episode=1391 reward=0.780497 (497.22 it/sec) -training >> step=8339500, episode=1391 reward=0.7678074 (480.48 it/sec) -training >> step=8339600, episode=1391 reward=0.7852793 (499.67 it/sec) -training >> step=8339700, episode=1391 reward=0.784461 (473.34 it/sec) -training >> step=8339800, episode=1391 reward=0.7938989 (472.95 it/sec) -training >> step=8339900, episode=1391 reward=0.7990295 (485.66 it/sec) -training >> step=8340000, episode=1391 reward=0.7949677 (522.00 it/sec) -training >> step=8340100, episode=1391 reward=0.7764044 (478.99 it/sec) -training >> step=8340200, episode=1391 reward=0.7914356 (491.91 it/sec) -training >> step=8340300, episode=1391 reward=0.7963701 (514.93 it/sec) -training >> step=8340400, episode=1391 reward=0.7864364 (498.74 it/sec) -training >> step=8340500, episode=1391 reward=0.8065689 (511.38 it/sec) -training >> step=8340600, episode=1391 reward=0.7921866 (479.15 it/sec) -training >> step=8340700, episode=1391 reward=0.7852889 (489.06 it/sec) -training >> step=8340800, episode=1391 reward=0.7742947 (473.44 it/sec) -training >> step=8340900, episode=1391 reward=0.7820225 (507.58 it/sec) -training >> step=8341000, episode=1391 reward=0.8023001 (479.20 it/sec) -training >> step=8341100, episode=1391 reward=0.8037775 (468.36 it/sec) -training >> step=8341200, episode=1391 reward=0.7918178 (484.63 it/sec) -training >> step=8341300, episode=1391 reward=0.7933953 (491.13 it/sec) -training >> step=8341400, episode=1391 reward=0.790399 (477.69 it/sec) -training >> step=8341500, episode=1391 reward=0.7787337 (479.52 it/sec) -training >> step=8341600, episode=1391 reward=0.7760277 (458.96 it/sec) -training >> step=8341700, episode=1391 reward=0.8018984 (491.67 it/sec) -training >> step=8341800, episode=1391 reward=0.7802517 (454.40 it/sec) -training >> step=8341900, episode=1391 reward=0.7734226 (461.23 it/sec) -training >> step=8342000, episode=1391 reward=0.7679589 (508.28 it/sec) -training >> step=8342100, episode=1391 reward=0.7984541 (473.09 it/sec) -training >> step=8342200, episode=1391 reward=0.7893077 (466.01 it/sec) -training >> step=8342300, episode=1391 reward=0.7870083 (484.84 it/sec) -training >> step=8342400, episode=1391 reward=0.8080872 (511.87 it/sec) -training >> step=8342500, episode=1391 reward=0.7708475 (511.14 it/sec) -training >> step=8342600, episode=1391 reward=0.797164 (493.71 it/sec) -training >> step=8342700, episode=1391 reward=0.7942443 (497.24 it/sec) -training >> step=8342800, episode=1391 reward=0.7807936 (501.48 it/sec) -training >> step=8342900, episode=1391 reward=0.7816843 (492.52 it/sec) -training >> step=8343000, episode=1391 reward=0.7854082 (513.68 it/sec) -training >> step=8343100, episode=1391 reward=0.7878726 (491.94 it/sec) -training >> step=8343200, episode=1391 reward=0.7936003 (488.35 it/sec) -training >> step=8343300, episode=1391 reward=0.7880057 (514.82 it/sec) -training >> step=8343400, episode=1391 reward=0.7926406 (491.68 it/sec) -training >> step=8343500, episode=1391 reward=0.7836749 (535.55 it/sec) -training >> step=8343600, episode=1391 reward=0.7812955 (489.96 it/sec) -training >> step=8343700, episode=1391 reward=0.7923481 (468.47 it/sec) -training >> step=8343800, episode=1391 reward=0.7747132 (483.93 it/sec) -training >> step=8343900, episode=1391 reward=0.7845916 (528.84 it/sec) -training >> step=8344000, episode=1391 reward=0.7794853 (492.54 it/sec) -training >> step=8344100, episode=1391 reward=0.7752166 (456.51 it/sec) -training >> step=8344200, episode=1391 reward=0.7732481 (510.32 it/sec) -training >> step=8344300, episode=1391 reward=0.7795607 (487.41 it/sec) -training >> step=8344400, episode=1391 reward=0.7804742 (503.68 it/sec) -training >> step=8344500, episode=1391 reward=0.787481 (473.79 it/sec) -training >> step=8344600, episode=1391 reward=0.7533576 (514.25 it/sec) -training >> step=8344700, episode=1391 reward=0.7807714 (501.79 it/sec) -training >> step=8344800, episode=1391 reward=0.8050566 (466.47 it/sec) -training >> step=8344900, episode=1391 reward=0.76203 (539.84 it/sec) -training >> step=8345000, episode=1391 reward=0.7925072 (489.79 it/sec) -training >> step=8345100, episode=1391 reward=0.8086572 (504.27 it/sec) -training >> step=8345200, episode=1391 reward=0.7623293 (473.99 it/sec) -training >> step=8345300, episode=1392 reward=0.7760266 (93.16 it/sec) -training >> step=8345400, episode=1392 reward=0.778333 (484.00 it/sec) -training >> step=8345500, episode=1392 reward=0.7778693 (472.17 it/sec) -training >> step=8345600, episode=1392 reward=0.7493123 (439.83 it/sec) -training >> step=8345700, episode=1392 reward=0.78656 (494.37 it/sec) -training >> step=8345800, episode=1392 reward=0.7923891 (503.91 it/sec) -training >> step=8345900, episode=1392 reward=0.7734253 (494.47 it/sec) -training >> step=8346000, episode=1392 reward=0.7924878 (504.10 it/sec) -training >> step=8346100, episode=1392 reward=0.7862967 (442.88 it/sec) -training >> step=8346200, episode=1392 reward=0.7879352 (372.02 it/sec) -training >> step=8346300, episode=1392 reward=0.7897174 (479.47 it/sec) -training >> step=8346400, episode=1392 reward=0.7862641 (451.76 it/sec) -training >> step=8346500, episode=1392 reward=0.7788412 (471.92 it/sec) -training >> step=8346600, episode=1392 reward=0.7829977 (506.41 it/sec) -training >> step=8346700, episode=1392 reward=0.7976274 (472.62 it/sec) -training >> step=8346800, episode=1392 reward=0.7766886 (471.36 it/sec) -training >> step=8346900, episode=1392 reward=0.8011332 (442.39 it/sec) -training >> step=8347000, episode=1392 reward=0.7977839 (485.25 it/sec) -training >> step=8347100, episode=1392 reward=0.7830927 (484.47 it/sec) -training >> step=8347200, episode=1392 reward=0.7984148 (456.37 it/sec) -training >> step=8347300, episode=1392 reward=0.7927573 (450.17 it/sec) -training >> step=8347400, episode=1392 reward=0.7835662 (518.13 it/sec) -training >> step=8347500, episode=1392 reward=0.784744 (488.57 it/sec) -training >> step=8347600, episode=1392 reward=0.7795058 (516.99 it/sec) -training >> step=8347700, episode=1392 reward=0.7930327 (523.67 it/sec) -training >> step=8347800, episode=1392 reward=0.7998483 (453.91 it/sec) -training >> step=8347900, episode=1392 reward=0.7769308 (487.50 it/sec) -training >> step=8348000, episode=1392 reward=0.7904097 (496.72 it/sec) -training >> step=8348100, episode=1392 reward=0.7898008 (518.60 it/sec) -training >> step=8348200, episode=1392 reward=0.7906018 (478.81 it/sec) -training >> step=8348300, episode=1392 reward=0.783576 (505.30 it/sec) -training >> step=8348400, episode=1392 reward=0.7898208 (500.99 it/sec) -training >> step=8348500, episode=1392 reward=0.7495196 (513.74 it/sec) -training >> step=8348600, episode=1392 reward=0.7957011 (466.42 it/sec) -training >> step=8348700, episode=1392 reward=0.7825372 (486.52 it/sec) -training >> step=8348800, episode=1392 reward=0.7863038 (497.53 it/sec) -training >> step=8348900, episode=1392 reward=0.7709661 (505.36 it/sec) -training >> step=8349000, episode=1392 reward=0.7838935 (471.70 it/sec) -training >> step=8349100, episode=1392 reward=0.7673904 (503.07 it/sec) -training >> step=8349200, episode=1392 reward=0.7786152 (489.78 it/sec) -training >> step=8349300, episode=1392 reward=0.7845874 (461.39 it/sec) -training >> step=8349400, episode=1392 reward=0.7996803 (451.05 it/sec) -training >> step=8349500, episode=1392 reward=0.7971454 (520.62 it/sec) -training >> step=8349600, episode=1392 reward=0.7823228 (468.30 it/sec) -training >> step=8349700, episode=1392 reward=0.7857333 (492.58 it/sec) -training >> step=8349800, episode=1392 reward=0.7674453 (504.08 it/sec) -training >> step=8349900, episode=1392 reward=0.7702191 (492.96 it/sec) -training >> step=8350000, episode=1392 reward=0.7603484 (507.82 it/sec) -training >> step=8350100, episode=1392 reward=0.7955369 (481.31 it/sec) -training >> step=8350200, episode=1392 reward=0.7906126 (468.78 it/sec) -training >> step=8350300, episode=1392 reward=0.7803204 (489.14 it/sec) -training >> step=8350400, episode=1392 reward=0.8012767 (447.82 it/sec) -training >> step=8350500, episode=1392 reward=0.780767 (449.36 it/sec) -training >> step=8350600, episode=1392 reward=0.781421 (498.77 it/sec) -training >> step=8350700, episode=1392 reward=0.7903177 (479.33 it/sec) -training >> step=8350800, episode=1392 reward=0.782831 (486.86 it/sec) -training >> step=8350900, episode=1392 reward=0.7722239 (479.99 it/sec) -training >> step=8351000, episode=1392 reward=0.7954753 (476.09 it/sec) -training >> step=8351100, episode=1392 reward=0.77895 (512.61 it/sec) -training >> step=8351200, episode=1392 reward=0.7940487 (478.43 it/sec) -training >> step=8351300, episode=1393 reward=0.7913461 (98.31 it/sec) -training >> step=8351400, episode=1393 reward=0.7765614 (480.65 it/sec) -training >> step=8351500, episode=1393 reward=0.7873378 (469.44 it/sec) -training >> step=8351600, episode=1393 reward=0.7761548 (462.26 it/sec) -training >> step=8351700, episode=1393 reward=0.784921 (509.95 it/sec) -training >> step=8351800, episode=1393 reward=0.7772998 (472.28 it/sec) -training >> step=8351900, episode=1393 reward=0.7795644 (446.89 it/sec) -training >> step=8352000, episode=1393 reward=0.7893475 (534.04 it/sec) -training >> step=8352100, episode=1393 reward=0.7939921 (508.51 it/sec) -training >> step=8352200, episode=1393 reward=0.7739152 (545.12 it/sec) -training >> step=8352300, episode=1393 reward=0.7726066 (482.60 it/sec) -training >> step=8352400, episode=1393 reward=0.7761686 (520.30 it/sec) -training >> step=8352500, episode=1393 reward=0.7748241 (502.87 it/sec) -training >> step=8352600, episode=1393 reward=0.7999598 (499.87 it/sec) -training >> step=8352700, episode=1393 reward=0.7932089 (484.76 it/sec) -training >> step=8352800, episode=1393 reward=0.7847375 (454.29 it/sec) -training >> step=8352900, episode=1393 reward=0.7926997 (546.17 it/sec) -training >> step=8353000, episode=1393 reward=0.7951485 (491.79 it/sec) -training >> step=8353100, episode=1393 reward=0.7986731 (465.33 it/sec) -training >> step=8353200, episode=1393 reward=0.7809624 (522.94 it/sec) -training >> step=8353300, episode=1393 reward=0.7875407 (460.36 it/sec) -training >> step=8353400, episode=1393 reward=0.7818658 (490.06 it/sec) -training >> step=8353500, episode=1393 reward=0.7976473 (459.64 it/sec) -training >> step=8353600, episode=1393 reward=0.8081208 (536.30 it/sec) -training >> step=8353700, episode=1393 reward=0.7810326 (468.27 it/sec) -training >> step=8353800, episode=1393 reward=0.8024033 (467.28 it/sec) -training >> step=8353900, episode=1393 reward=0.7924642 (486.43 it/sec) -training >> step=8354000, episode=1393 reward=0.7819883 (447.77 it/sec) -training >> step=8354100, episode=1393 reward=0.7904645 (511.32 it/sec) -training >> step=8354200, episode=1393 reward=0.780735 (451.69 it/sec) -training >> step=8354300, episode=1393 reward=0.7964991 (481.56 it/sec) -training >> step=8354400, episode=1393 reward=0.7831688 (489.47 it/sec) -training >> step=8354500, episode=1393 reward=0.8055547 (489.55 it/sec) -training >> step=8354600, episode=1393 reward=0.791981 (491.45 it/sec) -training >> step=8354700, episode=1393 reward=0.7776514 (513.70 it/sec) -training >> step=8354800, episode=1393 reward=0.7921898 (471.72 it/sec) -training >> step=8354900, episode=1393 reward=0.789185 (470.38 it/sec) -training >> step=8355000, episode=1393 reward=0.7762394 (453.01 it/sec) -training >> step=8355100, episode=1393 reward=0.8109222 (464.08 it/sec) -training >> step=8355200, episode=1393 reward=0.8014865 (447.53 it/sec) -training >> step=8355300, episode=1393 reward=0.7769509 (498.29 it/sec) -training >> step=8355400, episode=1393 reward=0.8088169 (483.47 it/sec) -training >> step=8355500, episode=1393 reward=0.7807893 (495.46 it/sec) -training >> step=8355600, episode=1393 reward=0.7967327 (505.17 it/sec) -training >> step=8355700, episode=1393 reward=0.7957157 (491.03 it/sec) -training >> step=8355800, episode=1393 reward=0.8035966 (482.54 it/sec) -training >> step=8355900, episode=1393 reward=0.7987771 (503.16 it/sec) -training >> step=8356000, episode=1393 reward=0.7642143 (472.34 it/sec) -training >> step=8356100, episode=1393 reward=0.7816354 (468.75 it/sec) -training >> step=8356200, episode=1393 reward=0.7617514 (469.33 it/sec) -training >> step=8356300, episode=1393 reward=0.7794665 (480.19 it/sec) -training >> step=8356400, episode=1393 reward=0.790366 (492.03 it/sec) -training >> step=8356500, episode=1393 reward=0.7739353 (513.12 it/sec) -training >> step=8356600, episode=1393 reward=0.7791079 (479.70 it/sec) -training >> step=8356700, episode=1393 reward=0.7630343 (498.85 it/sec) -training >> step=8356800, episode=1393 reward=0.7928876 (505.21 it/sec) -training >> step=8356900, episode=1393 reward=0.8088025 (526.12 it/sec) -training >> step=8357000, episode=1393 reward=0.7786841 (452.77 it/sec) -training >> step=8357100, episode=1393 reward=0.7783321 (497.18 it/sec) -training >> step=8357200, episode=1393 reward=0.7867911 (504.21 it/sec) -training >> step=8357300, episode=1394 reward=0.791318 (95.11 it/sec) -training >> step=8357400, episode=1394 reward=0.7768316 (458.04 it/sec) -training >> step=8357500, episode=1394 reward=0.7830815 (465.13 it/sec) -training >> step=8357600, episode=1394 reward=0.7999651 (484.49 it/sec) -training >> step=8357700, episode=1394 reward=0.7963486 (469.00 it/sec) -training >> step=8357800, episode=1394 reward=0.7774246 (475.94 it/sec) -training >> step=8357900, episode=1394 reward=0.7994899 (467.00 it/sec) -training >> step=8358000, episode=1394 reward=0.818077 (442.69 it/sec) -training >> step=8358100, episode=1394 reward=0.7663872 (468.59 it/sec) -training >> step=8358200, episode=1394 reward=0.7963579 (523.68 it/sec) -training >> step=8358300, episode=1394 reward=0.7957608 (494.87 it/sec) -training >> step=8358400, episode=1394 reward=0.7952145 (468.70 it/sec) -training >> step=8358500, episode=1394 reward=0.7695024 (487.86 it/sec) -training >> step=8358600, episode=1394 reward=0.8048329 (516.82 it/sec) -training >> step=8358700, episode=1394 reward=0.7921655 (487.08 it/sec) -training >> step=8358800, episode=1394 reward=0.801895 (496.05 it/sec) -training >> step=8358900, episode=1394 reward=0.7926115 (512.18 it/sec) -training >> step=8359000, episode=1394 reward=0.7744553 (475.56 it/sec) -training >> step=8359100, episode=1394 reward=0.7796605 (462.45 it/sec) -training >> step=8359200, episode=1394 reward=0.7881168 (504.18 it/sec) -training >> step=8359300, episode=1394 reward=0.7839541 (468.85 it/sec) -training >> step=8359400, episode=1394 reward=0.7912958 (466.52 it/sec) -training >> step=8359500, episode=1394 reward=0.7821226 (485.08 it/sec) -training >> step=8359600, episode=1394 reward=0.7827703 (521.30 it/sec) -training >> step=8359700, episode=1394 reward=0.7942554 (472.98 it/sec) -training >> step=8359800, episode=1394 reward=0.7825915 (444.67 it/sec) -training >> step=8359900, episode=1394 reward=0.7777073 (419.00 it/sec) -training >> step=8360000, episode=1394 reward=0.7939261 (482.99 it/sec) -training >> step=8360100, episode=1394 reward=0.7693568 (476.16 it/sec) -training >> step=8360200, episode=1394 reward=0.8002424 (478.30 it/sec) -training >> step=8360300, episode=1394 reward=0.7993921 (488.41 it/sec) -training >> step=8360400, episode=1394 reward=0.7880936 (501.90 it/sec) -training >> step=8360500, episode=1394 reward=0.7606887 (384.61 it/sec) -training >> step=8360600, episode=1394 reward=0.7762548 (480.79 it/sec) -training >> step=8360700, episode=1394 reward=0.7952225 (501.27 it/sec) -training >> step=8360800, episode=1394 reward=0.7902706 (471.59 it/sec) -training >> step=8360900, episode=1394 reward=0.77731 (493.39 it/sec) -training >> step=8361000, episode=1394 reward=0.7868529 (458.37 it/sec) -training >> step=8361100, episode=1394 reward=0.8001968 (518.03 it/sec) -training >> step=8361200, episode=1394 reward=0.7890915 (466.21 it/sec) -training >> step=8361300, episode=1394 reward=0.7946051 (471.70 it/sec) -training >> step=8361400, episode=1394 reward=0.7889925 (495.40 it/sec) -training >> step=8361500, episode=1394 reward=0.7634382 (507.82 it/sec) -training >> step=8361600, episode=1394 reward=0.7798006 (488.86 it/sec) -training >> step=8361700, episode=1394 reward=0.7942625 (485.78 it/sec) -training >> step=8361800, episode=1394 reward=0.7776691 (486.62 it/sec) -training >> step=8361900, episode=1394 reward=0.7852123 (483.88 it/sec) -training >> step=8362000, episode=1394 reward=0.7646842 (464.65 it/sec) -training >> step=8362100, episode=1394 reward=0.7848171 (517.59 it/sec) -training >> step=8362200, episode=1394 reward=0.780677 (521.69 it/sec) -training >> step=8362300, episode=1394 reward=0.7930272 (431.65 it/sec) -training >> step=8362400, episode=1394 reward=0.7876472 (517.65 it/sec) -training >> step=8362500, episode=1394 reward=0.8045473 (477.33 it/sec) -training >> step=8362600, episode=1394 reward=0.7911646 (495.73 it/sec) -training >> step=8362700, episode=1394 reward=0.7743537 (475.03 it/sec) -training >> step=8362800, episode=1394 reward=0.7973886 (471.99 it/sec) -training >> step=8362900, episode=1394 reward=0.7800697 (490.22 it/sec) -training >> step=8363000, episode=1394 reward=0.7855637 (490.99 it/sec) -training >> step=8363100, episode=1394 reward=0.7886085 (494.01 it/sec) -training >> step=8363200, episode=1394 reward=0.7902008 (492.80 it/sec) -training >> step=8363300, episode=1395 reward=0.7993342 (97.79 it/sec) -training >> step=8363400, episode=1395 reward=0.7836094 (472.05 it/sec) -training >> step=8363500, episode=1395 reward=0.7840124 (469.40 it/sec) -training >> step=8363600, episode=1395 reward=0.774628 (463.07 it/sec) -training >> step=8363700, episode=1395 reward=0.8080812 (461.49 it/sec) -training >> step=8363800, episode=1395 reward=0.7883984 (496.84 it/sec) -training >> step=8363900, episode=1395 reward=0.7970049 (501.24 it/sec) -training >> step=8364000, episode=1395 reward=0.8101015 (437.44 it/sec) -training >> step=8364100, episode=1395 reward=0.7990381 (464.49 it/sec) -training >> step=8364200, episode=1395 reward=0.8121306 (497.18 it/sec) -training >> step=8364300, episode=1395 reward=0.7898189 (510.68 it/sec) -training >> step=8364400, episode=1395 reward=0.7857277 (455.94 it/sec) -training >> step=8364500, episode=1395 reward=0.7910065 (507.37 it/sec) -training >> step=8364600, episode=1395 reward=0.7978364 (498.04 it/sec) -training >> step=8364700, episode=1395 reward=0.7868037 (544.48 it/sec) -training >> step=8364800, episode=1395 reward=0.7943611 (449.83 it/sec) -training >> step=8364900, episode=1395 reward=0.8032564 (495.22 it/sec) -training >> step=8365000, episode=1395 reward=0.8110428 (495.81 it/sec) -training >> step=8365100, episode=1395 reward=0.8106794 (472.63 it/sec) -training >> step=8365200, episode=1395 reward=0.7898532 (484.34 it/sec) -training >> step=8365300, episode=1395 reward=0.788286 (499.90 it/sec) -training >> step=8365400, episode=1395 reward=0.7809428 (481.94 it/sec) -training >> step=8365500, episode=1395 reward=0.7879087 (449.29 it/sec) -training >> step=8365600, episode=1395 reward=0.788792 (487.25 it/sec) -training >> step=8365700, episode=1395 reward=0.7877412 (498.43 it/sec) -training >> step=8365800, episode=1395 reward=0.7885747 (509.24 it/sec) -training >> step=8365900, episode=1395 reward=0.7906288 (482.79 it/sec) -training >> step=8366000, episode=1395 reward=0.8108428 (457.20 it/sec) -training >> step=8366100, episode=1395 reward=0.7714276 (498.73 it/sec) -training >> step=8366200, episode=1395 reward=0.7808309 (457.37 it/sec) -training >> step=8366300, episode=1395 reward=0.785857 (501.56 it/sec) -training >> step=8366400, episode=1395 reward=0.808453 (482.27 it/sec) -training >> step=8366500, episode=1395 reward=0.793565 (507.83 it/sec) -training >> step=8366600, episode=1395 reward=0.8026654 (481.09 it/sec) -training >> step=8366700, episode=1395 reward=0.7951291 (464.03 it/sec) -training >> step=8366800, episode=1395 reward=0.7714265 (489.06 it/sec) -training >> step=8366900, episode=1395 reward=0.7914032 (509.83 it/sec) -training >> step=8367000, episode=1395 reward=0.8230876 (486.25 it/sec) -training >> step=8367100, episode=1395 reward=0.7926342 (477.20 it/sec) -training >> step=8367200, episode=1395 reward=0.7719308 (525.60 it/sec) -training >> step=8367300, episode=1395 reward=0.7736894 (486.46 it/sec) -training >> step=8367400, episode=1395 reward=0.7831175 (479.89 it/sec) -training >> step=8367500, episode=1395 reward=0.7642039 (471.34 it/sec) -training >> step=8367600, episode=1395 reward=0.790616 (498.63 it/sec) -training >> step=8367700, episode=1395 reward=0.8007186 (482.19 it/sec) -training >> step=8367800, episode=1395 reward=0.7866184 (463.73 it/sec) -training >> step=8367900, episode=1395 reward=0.7993993 (491.81 it/sec) -training >> step=8368000, episode=1395 reward=0.7961897 (469.65 it/sec) -training >> step=8368100, episode=1395 reward=0.7964808 (520.64 it/sec) -training >> step=8368200, episode=1395 reward=0.7931051 (491.95 it/sec) -training >> step=8368300, episode=1395 reward=0.7849386 (531.88 it/sec) -training >> step=8368400, episode=1395 reward=0.7910077 (463.15 it/sec) -training >> step=8368500, episode=1395 reward=0.7965446 (500.34 it/sec) -training >> step=8368600, episode=1395 reward=0.7803007 (451.67 it/sec) -training >> step=8368700, episode=1395 reward=0.7842965 (547.13 it/sec) -training >> step=8368800, episode=1395 reward=0.7770764 (524.54 it/sec) -training >> step=8368900, episode=1395 reward=0.7764872 (476.01 it/sec) -training >> step=8369000, episode=1395 reward=0.7887555 (510.28 it/sec) -training >> step=8369100, episode=1395 reward=0.7789398 (431.28 it/sec) -training >> step=8369200, episode=1395 reward=0.7724862 (499.56 it/sec) -training >> step=8369300, episode=1396 reward=0.7843858 (85.95 it/sec) -training >> step=8369400, episode=1396 reward=0.7759825 (432.84 it/sec) -training >> step=8369500, episode=1396 reward=0.7996051 (465.68 it/sec) -training >> step=8369600, episode=1396 reward=0.7895889 (489.00 it/sec) -training >> step=8369700, episode=1396 reward=0.8069978 (514.15 it/sec) -training >> step=8369800, episode=1396 reward=0.7877046 (497.13 it/sec) -training >> step=8369900, episode=1396 reward=0.7786285 (498.70 it/sec) -training >> step=8370000, episode=1396 reward=0.7971108 (502.11 it/sec) -training >> step=8370100, episode=1396 reward=0.8074531 (491.21 it/sec) -training >> step=8370200, episode=1396 reward=0.8000721 (497.91 it/sec) -training >> step=8370300, episode=1396 reward=0.7993687 (536.62 it/sec) -training >> step=8370400, episode=1396 reward=0.780427 (476.30 it/sec) -training >> step=8370500, episode=1396 reward=0.7837186 (478.63 it/sec) -training >> step=8370600, episode=1396 reward=0.7875129 (498.54 it/sec) -training >> step=8370700, episode=1396 reward=0.7826015 (503.08 it/sec) -training >> step=8370800, episode=1396 reward=0.7865779 (507.48 it/sec) -training >> step=8370900, episode=1396 reward=0.7935857 (505.09 it/sec) -training >> step=8371000, episode=1396 reward=0.8011474 (466.60 it/sec) -training >> step=8371100, episode=1396 reward=0.8080648 (512.72 it/sec) -training >> step=8371200, episode=1396 reward=0.7631341 (487.13 it/sec) -training >> step=8371300, episode=1396 reward=0.7935683 (487.33 it/sec) -training >> step=8371400, episode=1396 reward=0.7958052 (536.74 it/sec) -training >> step=8371500, episode=1396 reward=0.7925528 (482.24 it/sec) -training >> step=8371600, episode=1396 reward=0.7811027 (454.48 it/sec) -training >> step=8371700, episode=1396 reward=0.8035645 (485.86 it/sec) -training >> step=8371800, episode=1396 reward=0.798077 (548.10 it/sec) -training >> step=8371900, episode=1396 reward=0.7987011 (496.82 it/sec) -training >> step=8372000, episode=1396 reward=0.7845234 (455.09 it/sec) -training >> step=8372100, episode=1396 reward=0.7780741 (495.73 it/sec) -training >> step=8372200, episode=1396 reward=0.7733245 (504.65 it/sec) -training >> step=8372300, episode=1396 reward=0.7882993 (491.06 it/sec) -training >> step=8372400, episode=1396 reward=0.7882235 (482.83 it/sec) -training >> step=8372500, episode=1396 reward=0.8107005 (448.71 it/sec) -training >> step=8372600, episode=1396 reward=0.810643 (501.25 it/sec) -training >> step=8372700, episode=1396 reward=0.7886218 (457.61 it/sec) -training >> step=8372800, episode=1396 reward=0.7781597 (508.04 it/sec) -training >> step=8372900, episode=1396 reward=0.7781889 (497.53 it/sec) -training >> step=8373000, episode=1396 reward=0.8034627 (470.57 it/sec) -training >> step=8373100, episode=1396 reward=0.7959823 (429.07 it/sec) -training >> step=8373200, episode=1396 reward=0.7831874 (499.32 it/sec) -training >> step=8373300, episode=1396 reward=0.8059049 (494.88 it/sec) -training >> step=8373400, episode=1396 reward=0.7790238 (481.69 it/sec) -training >> step=8373500, episode=1396 reward=0.818611 (439.56 it/sec) -training >> step=8373600, episode=1396 reward=0.7987651 (442.92 it/sec) -training >> step=8373700, episode=1396 reward=0.7995764 (462.38 it/sec) -training >> step=8373800, episode=1396 reward=0.8006801 (493.98 it/sec) -training >> step=8373900, episode=1396 reward=0.8044494 (480.01 it/sec) -training >> step=8374000, episode=1396 reward=0.7932463 (473.33 it/sec) -training >> step=8374100, episode=1396 reward=0.7824095 (496.33 it/sec) -training >> step=8374200, episode=1396 reward=0.7717786 (478.49 it/sec) -training >> step=8374300, episode=1396 reward=0.7782776 (538.40 it/sec) -training >> step=8374400, episode=1396 reward=0.7991767 (535.77 it/sec) -training >> step=8374500, episode=1396 reward=0.7945401 (455.47 it/sec) -training >> step=8374600, episode=1396 reward=0.7891787 (503.90 it/sec) -training >> step=8374700, episode=1396 reward=0.7814709 (500.62 it/sec) -training >> step=8374800, episode=1396 reward=0.807227 (498.19 it/sec) -training >> step=8374900, episode=1396 reward=0.7825237 (491.69 it/sec) -training >> step=8375000, episode=1396 reward=0.7924384 (456.89 it/sec) -training >> step=8375100, episode=1396 reward=0.8095894 (513.86 it/sec) -training >> step=8375200, episode=1396 reward=0.7865047 (515.36 it/sec) -training >> step=8375300, episode=1397 reward=0.7880368 (86.32 it/sec) -training >> step=8375400, episode=1397 reward=0.7905688 (473.53 it/sec) -training >> step=8375500, episode=1397 reward=0.7881902 (466.82 it/sec) -training >> step=8375600, episode=1397 reward=0.7728626 (459.76 it/sec) -training >> step=8375700, episode=1397 reward=0.7908483 (493.55 it/sec) -training >> step=8375800, episode=1397 reward=0.8061822 (508.14 it/sec) -training >> step=8375900, episode=1397 reward=0.7747284 (497.45 it/sec) -training >> step=8376000, episode=1397 reward=0.8116638 (456.13 it/sec) -training >> step=8376100, episode=1397 reward=0.7874621 (495.84 it/sec) -training >> step=8376200, episode=1397 reward=0.7859652 (482.11 it/sec) -training >> step=8376300, episode=1397 reward=0.8012246 (495.73 it/sec) -training >> step=8376400, episode=1397 reward=0.7909701 (503.46 it/sec) -training >> step=8376500, episode=1397 reward=0.7921468 (499.39 it/sec) -training >> step=8376600, episode=1397 reward=0.7954393 (473.29 it/sec) -training >> step=8376700, episode=1397 reward=0.7791411 (476.31 it/sec) -training >> step=8376800, episode=1397 reward=0.7777725 (488.94 it/sec) -training >> step=8376900, episode=1397 reward=0.8002797 (496.65 it/sec) -training >> step=8377000, episode=1397 reward=0.7827871 (492.48 it/sec) -training >> step=8377100, episode=1397 reward=0.7921208 (466.93 it/sec) -training >> step=8377200, episode=1397 reward=0.7784607 (475.97 it/sec) -training >> step=8377300, episode=1397 reward=0.7854844 (515.22 it/sec) -training >> step=8377400, episode=1397 reward=0.7827164 (477.18 it/sec) -training >> step=8377500, episode=1397 reward=0.7917422 (473.97 it/sec) -training >> step=8377600, episode=1397 reward=0.7772511 (500.08 it/sec) -training >> step=8377700, episode=1397 reward=0.7846482 (542.86 it/sec) -training >> step=8377800, episode=1397 reward=0.7925354 (468.99 it/sec) -training >> step=8377900, episode=1397 reward=0.773095 (501.53 it/sec) -training >> step=8378000, episode=1397 reward=0.8006386 (499.69 it/sec) -training >> step=8378100, episode=1397 reward=0.7786046 (474.30 it/sec) -training >> step=8378200, episode=1397 reward=0.7965378 (516.00 it/sec) -training >> step=8378300, episode=1397 reward=0.7991719 (504.78 it/sec) -training >> step=8378400, episode=1397 reward=0.7904594 (447.01 it/sec) -training >> step=8378500, episode=1397 reward=0.7926703 (515.33 it/sec) -training >> step=8378600, episode=1397 reward=0.7830037 (503.43 it/sec) -training >> step=8378700, episode=1397 reward=0.7870511 (497.79 it/sec) -training >> step=8378800, episode=1397 reward=0.7833375 (482.10 it/sec) -training >> step=8378900, episode=1397 reward=0.7907819 (504.91 it/sec) -training >> step=8379000, episode=1397 reward=0.778169 (472.11 it/sec) -training >> step=8379100, episode=1397 reward=0.7750772 (478.72 it/sec) -training >> step=8379200, episode=1397 reward=0.7883364 (505.99 it/sec) -training >> step=8379300, episode=1397 reward=0.7918462 (495.95 it/sec) -training >> step=8379400, episode=1397 reward=0.8012094 (492.24 it/sec) -training >> step=8379500, episode=1397 reward=0.8011497 (484.97 it/sec) -training >> step=8379600, episode=1397 reward=0.8065279 (508.24 it/sec) -training >> step=8379700, episode=1397 reward=0.7918396 (472.07 it/sec) -training >> step=8379800, episode=1397 reward=0.8033261 (497.71 it/sec) -training >> step=8379900, episode=1397 reward=0.797348 (491.86 it/sec) -training >> step=8380000, episode=1397 reward=0.7949144 (542.11 it/sec) -training >> step=8380100, episode=1397 reward=0.7761175 (482.26 it/sec) -training >> step=8380200, episode=1397 reward=0.7741601 (498.97 it/sec) -training >> step=8380300, episode=1397 reward=0.8088537 (512.97 it/sec) -training >> step=8380400, episode=1397 reward=0.7917752 (469.01 it/sec) -training >> step=8380500, episode=1397 reward=0.8018497 (498.48 it/sec) -training >> step=8380600, episode=1397 reward=0.7783405 (486.08 it/sec) -training >> step=8380700, episode=1397 reward=0.7786413 (482.89 it/sec) -training >> step=8380800, episode=1397 reward=0.7985905 (490.75 it/sec) -training >> step=8380900, episode=1397 reward=0.783776 (517.82 it/sec) -training >> step=8381000, episode=1397 reward=0.7813819 (491.65 it/sec) -training >> step=8381100, episode=1397 reward=0.8108505 (438.81 it/sec) -training >> step=8381200, episode=1397 reward=0.7889253 (434.47 it/sec) -training >> step=8381300, episode=1398 reward=0.7999562 (88.86 it/sec) -training >> step=8381400, episode=1398 reward=0.7742509 (486.77 it/sec) -training >> step=8381500, episode=1398 reward=0.7844232 (470.05 it/sec) -training >> step=8381600, episode=1398 reward=0.7750571 (463.83 it/sec) -training >> step=8381700, episode=1398 reward=0.7831048 (501.57 it/sec) -training >> step=8381800, episode=1398 reward=0.7945577 (506.41 it/sec) -training >> step=8381900, episode=1398 reward=0.7830285 (437.78 it/sec) -training >> step=8382000, episode=1398 reward=0.7931583 (477.69 it/sec) -training >> step=8382100, episode=1398 reward=0.7939719 (527.11 it/sec) -training >> step=8382200, episode=1398 reward=0.7913168 (511.13 it/sec) -training >> step=8382300, episode=1398 reward=0.7864621 (492.53 it/sec) -training >> step=8382400, episode=1398 reward=0.7798395 (541.71 it/sec) -training >> step=8382500, episode=1398 reward=0.7791486 (488.51 it/sec) -training >> step=8382600, episode=1398 reward=0.7987279 (481.28 it/sec) -training >> step=8382700, episode=1398 reward=0.7695808 (499.52 it/sec) -training >> step=8382800, episode=1398 reward=0.7986258 (530.40 it/sec) -training >> step=8382900, episode=1398 reward=0.7795191 (494.91 it/sec) -training >> step=8383000, episode=1398 reward=0.791781 (457.74 it/sec) -training >> step=8383100, episode=1398 reward=0.7900627 (512.73 it/sec) -training >> step=8383200, episode=1398 reward=0.7927045 (497.33 it/sec) -training >> step=8383300, episode=1398 reward=0.7874884 (520.97 it/sec) -training >> step=8383400, episode=1398 reward=0.7741966 (481.18 it/sec) -training >> step=8383500, episode=1398 reward=0.7831666 (503.19 it/sec) -training >> step=8383600, episode=1398 reward=0.7984923 (473.30 it/sec) -training >> step=8383700, episode=1398 reward=0.7939351 (455.53 it/sec) -training >> step=8383800, episode=1398 reward=0.7773266 (495.15 it/sec) -training >> step=8383900, episode=1398 reward=0.8076479 (511.21 it/sec) -training >> step=8384000, episode=1398 reward=0.7991068 (486.56 it/sec) -training >> step=8384100, episode=1398 reward=0.7775991 (488.45 it/sec) -training >> step=8384200, episode=1398 reward=0.7946897 (512.96 it/sec) -training >> step=8384300, episode=1398 reward=0.804469 (482.47 it/sec) -training >> step=8384400, episode=1398 reward=0.7895145 (488.48 it/sec) -training >> step=8384500, episode=1398 reward=0.7891855 (496.66 it/sec) -training >> step=8384600, episode=1398 reward=0.7952877 (506.59 it/sec) -training >> step=8384700, episode=1398 reward=0.7805982 (469.29 it/sec) -training >> step=8384800, episode=1398 reward=0.7879222 (495.86 it/sec) -training >> step=8384900, episode=1398 reward=0.7830203 (500.87 it/sec) -training >> step=8385000, episode=1398 reward=0.7929809 (533.98 it/sec) -training >> step=8385100, episode=1398 reward=0.8030791 (485.87 it/sec) -training >> step=8385200, episode=1398 reward=0.7854032 (475.31 it/sec) -training >> step=8385300, episode=1398 reward=0.790989 (492.65 it/sec) -training >> step=8385400, episode=1398 reward=0.7760255 (502.24 it/sec) -training >> step=8385500, episode=1398 reward=0.7748597 (445.88 it/sec) -training >> step=8385600, episode=1398 reward=0.7868479 (486.12 it/sec) -training >> step=8385700, episode=1398 reward=0.8004502 (500.19 it/sec) -training >> step=8385800, episode=1398 reward=0.7904337 (474.40 it/sec) -training >> step=8385900, episode=1398 reward=0.7854492 (506.41 it/sec) -training >> step=8386000, episode=1398 reward=0.8161588 (501.81 it/sec) -training >> step=8386100, episode=1398 reward=0.7916164 (513.88 it/sec) -training >> step=8386200, episode=1398 reward=0.785467 (493.24 it/sec) -training >> step=8386300, episode=1398 reward=0.8003658 (431.42 it/sec) -training >> step=8386400, episode=1398 reward=0.7821541 (512.44 it/sec) -training >> step=8386500, episode=1398 reward=0.7832249 (466.67 it/sec) -training >> step=8386600, episode=1398 reward=0.7938546 (457.39 it/sec) -training >> step=8386700, episode=1398 reward=0.7922174 (486.43 it/sec) -training >> step=8386800, episode=1398 reward=0.7846382 (505.84 it/sec) -training >> step=8386900, episode=1398 reward=0.8006405 (460.13 it/sec) -training >> step=8387000, episode=1398 reward=0.7899099 (479.61 it/sec) -training >> step=8387100, episode=1398 reward=0.7933393 (467.51 it/sec) -training >> step=8387200, episode=1398 reward=0.7805486 (472.85 it/sec) -training >> step=8387300, episode=1399 reward=0.7687902 (89.39 it/sec) -training >> step=8387400, episode=1399 reward=0.778723 (457.68 it/sec) -training >> step=8387500, episode=1399 reward=0.7831661 (428.47 it/sec) -training >> step=8387600, episode=1399 reward=0.7975018 (490.63 it/sec) -training >> step=8387700, episode=1399 reward=0.8138961 (506.03 it/sec) -training >> step=8387800, episode=1399 reward=0.7971842 (487.68 it/sec) -training >> step=8387900, episode=1399 reward=0.8074114 (488.50 it/sec) -training >> step=8388000, episode=1399 reward=0.8013027 (480.06 it/sec) -training >> step=8388100, episode=1399 reward=0.7803127 (449.29 it/sec) -training >> step=8388200, episode=1399 reward=0.7811458 (483.30 it/sec) -training >> step=8388300, episode=1399 reward=0.7987868 (419.50 it/sec) -training >> step=8388400, episode=1399 reward=0.8009461 (392.77 it/sec) -training >> step=8388500, episode=1399 reward=0.8007336 (501.33 it/sec) -training >> step=8388600, episode=1399 reward=0.7793477 (447.07 it/sec) -training >> step=8388700, episode=1399 reward=0.7941304 (469.99 it/sec) -training >> step=8388800, episode=1399 reward=0.7748736 (422.23 it/sec) -training >> step=8388900, episode=1399 reward=0.7968872 (519.66 it/sec) -training >> step=8389000, episode=1399 reward=0.7729536 (474.34 it/sec) -training >> step=8389100, episode=1399 reward=0.8102872 (503.13 it/sec) -training >> step=8389200, episode=1399 reward=0.7993273 (475.48 it/sec) -training >> step=8389300, episode=1399 reward=0.7951565 (499.97 it/sec) -training >> step=8389400, episode=1399 reward=0.7814168 (475.18 it/sec) -training >> step=8389500, episode=1399 reward=0.8090825 (512.70 it/sec) -training >> step=8389600, episode=1399 reward=0.8000913 (501.38 it/sec) -training >> step=8389700, episode=1399 reward=0.7876254 (443.22 it/sec) -training >> step=8389800, episode=1399 reward=0.8129523 (488.25 it/sec) -training >> step=8389900, episode=1399 reward=0.7838288 (527.84 it/sec) -training >> step=8390000, episode=1399 reward=0.7912171 (500.77 it/sec) -training >> step=8390100, episode=1399 reward=0.7914316 (507.50 it/sec) -training >> step=8390200, episode=1399 reward=0.8162336 (476.41 it/sec) -training >> step=8390300, episode=1399 reward=0.7782218 (476.44 it/sec) -training >> step=8390400, episode=1399 reward=0.7991925 (439.00 it/sec) -training >> step=8390500, episode=1399 reward=0.8024065 (473.70 it/sec) -training >> step=8390600, episode=1399 reward=0.8032184 (492.90 it/sec) -training >> step=8390700, episode=1399 reward=0.7810951 (502.03 it/sec) -training >> step=8390800, episode=1399 reward=0.7968302 (510.29 it/sec) -training >> step=8390900, episode=1399 reward=0.7931111 (447.68 it/sec) -training >> step=8391000, episode=1399 reward=0.8084446 (508.53 it/sec) -training >> step=8391100, episode=1399 reward=0.7971519 (533.49 it/sec) -training >> step=8391200, episode=1399 reward=0.7824826 (453.50 it/sec) -training >> step=8391300, episode=1399 reward=0.8009223 (434.57 it/sec) -training >> step=8391400, episode=1399 reward=0.7806926 (471.73 it/sec) -training >> step=8391500, episode=1399 reward=0.8042859 (455.82 it/sec) -training >> step=8391600, episode=1399 reward=0.7707798 (492.34 it/sec) -training >> step=8391700, episode=1399 reward=0.7883584 (490.61 it/sec) -training >> step=8391800, episode=1399 reward=0.7841326 (471.58 it/sec) -training >> step=8391900, episode=1399 reward=0.7694679 (485.36 it/sec) -training >> step=8392000, episode=1399 reward=0.7871785 (454.20 it/sec) -training >> step=8392100, episode=1399 reward=0.8029919 (486.17 it/sec) -training >> step=8392200, episode=1399 reward=0.7903546 (496.42 it/sec) -training >> step=8392300, episode=1399 reward=0.7906597 (422.93 it/sec) -training >> step=8392400, episode=1399 reward=0.7936727 (481.07 it/sec) -training >> step=8392500, episode=1399 reward=0.7953177 (511.36 it/sec) -training >> step=8392600, episode=1399 reward=0.7727402 (446.92 it/sec) -training >> step=8392700, episode=1399 reward=0.8099245 (494.71 it/sec) -training >> step=8392800, episode=1399 reward=0.7889255 (469.04 it/sec) -training >> step=8392900, episode=1399 reward=0.7884099 (486.92 it/sec) -training >> step=8393000, episode=1399 reward=0.7858195 (496.47 it/sec) -training >> step=8393100, episode=1399 reward=0.783102 (488.24 it/sec) -training >> step=8393200, episode=1399 reward=0.7975008 (520.47 it/sec) -training >> step=8393300, episode=1400 reward=0.8114884 (97.66 it/sec) -training >> step=8393400, episode=1400 reward=0.8063678 (466.19 it/sec) -training >> step=8393500, episode=1400 reward=0.7705379 (475.42 it/sec) -training >> step=8393600, episode=1400 reward=0.7778417 (492.40 it/sec) -training >> step=8393700, episode=1400 reward=0.7768682 (508.30 it/sec) -training >> step=8393800, episode=1400 reward=0.7793952 (475.43 it/sec) -training >> step=8393900, episode=1400 reward=0.7916051 (496.01 it/sec) -training >> step=8394000, episode=1400 reward=0.7788106 (490.40 it/sec) -training >> step=8394100, episode=1400 reward=0.7973183 (503.30 it/sec) -training >> step=8394200, episode=1400 reward=0.7923751 (478.84 it/sec) -training >> step=8394300, episode=1400 reward=0.7756812 (521.13 it/sec) -training >> step=8394400, episode=1400 reward=0.7788196 (492.38 it/sec) -training >> step=8394500, episode=1400 reward=0.7846014 (513.84 it/sec) -training >> step=8394600, episode=1400 reward=0.7846172 (460.45 it/sec) -training >> step=8394700, episode=1400 reward=0.809254 (476.70 it/sec) -training >> step=8394800, episode=1400 reward=0.7999523 (520.59 it/sec) -training >> step=8394900, episode=1400 reward=0.8048803 (484.83 it/sec) -training >> step=8395000, episode=1400 reward=0.8081186 (488.35 it/sec) -training >> step=8395100, episode=1400 reward=0.7991582 (524.52 it/sec) -training >> step=8395200, episode=1400 reward=0.781498 (485.08 it/sec) -training >> step=8395300, episode=1400 reward=0.7996895 (504.95 it/sec) -training >> step=8395400, episode=1400 reward=0.8050918 (498.07 it/sec) -training >> step=8395500, episode=1400 reward=0.7997556 (501.75 it/sec) -training >> step=8395600, episode=1400 reward=0.7904129 (470.20 it/sec) -training >> step=8395700, episode=1400 reward=0.7860216 (500.18 it/sec) -training >> step=8395800, episode=1400 reward=0.7840092 (486.94 it/sec) -training >> step=8395900, episode=1400 reward=0.7842107 (473.02 it/sec) -training >> step=8396000, episode=1400 reward=0.7920124 (456.01 it/sec) -training >> step=8396100, episode=1400 reward=0.7876863 (455.08 it/sec) -training >> step=8396200, episode=1400 reward=0.8076094 (479.39 it/sec) -training >> step=8396300, episode=1400 reward=0.7860569 (504.48 it/sec) -training >> step=8396400, episode=1400 reward=0.7795954 (479.14 it/sec) -training >> step=8396500, episode=1400 reward=0.7760171 (499.90 it/sec) -training >> step=8396600, episode=1400 reward=0.7868066 (489.55 it/sec) -training >> step=8396700, episode=1400 reward=0.7900789 (513.66 it/sec) -training >> step=8396800, episode=1400 reward=0.7868897 (486.08 it/sec) -training >> step=8396900, episode=1400 reward=0.7788988 (502.57 it/sec) -training >> step=8397000, episode=1400 reward=0.7823593 (506.00 it/sec) -training >> step=8397100, episode=1400 reward=0.7969372 (490.29 it/sec) -training >> step=8397200, episode=1400 reward=0.78321 (527.76 it/sec) -training >> step=8397300, episode=1400 reward=0.7906078 (479.05 it/sec) -training >> step=8397400, episode=1400 reward=0.8011432 (483.13 it/sec) -training >> step=8397500, episode=1400 reward=0.8062524 (476.73 it/sec) -training >> step=8397600, episode=1400 reward=0.7992876 (511.28 it/sec) -training >> step=8397700, episode=1400 reward=0.7976657 (466.21 it/sec) -training >> step=8397800, episode=1400 reward=0.7996137 (474.73 it/sec) -training >> step=8397900, episode=1400 reward=0.784003 (517.63 it/sec) -training >> step=8398000, episode=1400 reward=0.7947157 (494.29 it/sec) -training >> step=8398100, episode=1400 reward=0.7776591 (479.34 it/sec) -training >> step=8398200, episode=1400 reward=0.7901027 (518.34 it/sec) -training >> step=8398300, episode=1400 reward=0.7971612 (500.97 it/sec) -training >> step=8398400, episode=1400 reward=0.7999449 (493.41 it/sec) -training >> step=8398500, episode=1400 reward=0.7986544 (463.27 it/sec) -training >> step=8398600, episode=1400 reward=0.7942951 (490.52 it/sec) -training >> step=8398700, episode=1400 reward=0.7950481 (486.03 it/sec) -training >> step=8398800, episode=1400 reward=0.7861114 (486.49 it/sec) -training >> step=8398900, episode=1400 reward=0.7954615 (499.30 it/sec) -training >> step=8399000, episode=1400 reward=0.8038933 (490.63 it/sec) -training >> step=8399100, episode=1400 reward=0.794691 (493.81 it/sec) -training >> step=8399200, episode=1400 reward=0.7939189 (495.53 it/sec) -training >> step=8399300, episode=1401 reward=0.7697513 (88.87 it/sec) -training >> step=8399400, episode=1401 reward=0.7870849 (462.36 it/sec) -training >> step=8399500, episode=1401 reward=0.8047287 (504.80 it/sec) -training >> step=8399600, episode=1401 reward=0.7768481 (444.41 it/sec) -training >> step=8399700, episode=1401 reward=0.8017273 (480.19 it/sec) -training >> step=8399800, episode=1401 reward=0.7971216 (433.96 it/sec) -training >> step=8399900, episode=1401 reward=0.7828158 (468.23 it/sec) -training >> step=8400000, episode=1401 reward=0.782648 (463.63 it/sec) -training >> step=8400100, episode=1401 reward=0.7993841 (467.97 it/sec) -training >> step=8400200, episode=1401 reward=0.7895934 (478.87 it/sec) -training >> step=8400300, episode=1401 reward=0.795879 (451.46 it/sec) -training >> step=8400400, episode=1401 reward=0.7856021 (486.69 it/sec) -training >> step=8400500, episode=1401 reward=0.7884738 (432.07 it/sec) -training >> step=8400600, episode=1401 reward=0.7776401 (493.47 it/sec) -training >> step=8400700, episode=1401 reward=0.7982984 (461.03 it/sec) -training >> step=8400800, episode=1401 reward=0.7953236 (496.78 it/sec) -training >> step=8400900, episode=1401 reward=0.7863731 (473.42 it/sec) -training >> step=8401000, episode=1401 reward=0.7925171 (479.35 it/sec) -training >> step=8401100, episode=1401 reward=0.7856758 (499.56 it/sec) -training >> step=8401200, episode=1401 reward=0.7907261 (463.19 it/sec) -training >> step=8401300, episode=1401 reward=0.7878579 (474.02 it/sec) -training >> step=8401400, episode=1401 reward=0.8059963 (468.36 it/sec) -training >> step=8401500, episode=1401 reward=0.7997153 (474.06 it/sec) -training >> step=8401600, episode=1401 reward=0.795251 (523.15 it/sec) -training >> step=8401700, episode=1401 reward=0.7924087 (499.99 it/sec) -training >> step=8401800, episode=1401 reward=0.7966623 (478.64 it/sec) -training >> step=8401900, episode=1401 reward=0.7908573 (506.54 it/sec) -training >> step=8402000, episode=1401 reward=0.817524 (428.39 it/sec) -training >> step=8402100, episode=1401 reward=0.7753289 (483.29 it/sec) -training >> step=8402200, episode=1401 reward=0.7829349 (482.92 it/sec) -training >> step=8402300, episode=1401 reward=0.7927471 (524.39 it/sec) -training >> step=8402400, episode=1401 reward=0.7874868 (465.73 it/sec) -training >> step=8402500, episode=1401 reward=0.796349 (481.73 it/sec) -training >> step=8402600, episode=1401 reward=0.7819086 (465.90 it/sec) -training >> step=8402700, episode=1401 reward=0.7845174 (495.24 it/sec) -training >> step=8402800, episode=1401 reward=0.7935892 (476.21 it/sec) -training >> step=8402900, episode=1401 reward=0.7971238 (480.67 it/sec) -training >> step=8403000, episode=1401 reward=0.7733709 (466.83 it/sec) -training >> step=8403100, episode=1401 reward=0.7700382 (441.79 it/sec) -training >> step=8403200, episode=1401 reward=0.8087031 (461.81 it/sec) -training >> step=8403300, episode=1401 reward=0.7867741 (468.16 it/sec) -training >> step=8403400, episode=1401 reward=0.773613 (481.45 it/sec) -training >> step=8403500, episode=1401 reward=0.7774452 (465.34 it/sec) -training >> step=8403600, episode=1401 reward=0.789187 (498.03 it/sec) -training >> step=8403700, episode=1401 reward=0.7978338 (487.67 it/sec) -training >> step=8403800, episode=1401 reward=0.79892 (468.76 it/sec) -training >> step=8403900, episode=1401 reward=0.7875054 (491.46 it/sec) -training >> step=8404000, episode=1401 reward=0.7865253 (491.62 it/sec) -training >> step=8404100, episode=1401 reward=0.7983994 (495.67 it/sec) -training >> step=8404200, episode=1401 reward=0.7808068 (466.49 it/sec) -training >> step=8404300, episode=1401 reward=0.7755067 (493.91 it/sec) -training >> step=8404400, episode=1401 reward=0.7941696 (483.23 it/sec) -training >> step=8404500, episode=1401 reward=0.7936384 (544.78 it/sec) -training >> step=8404600, episode=1401 reward=0.8046137 (471.16 it/sec) -training >> step=8404700, episode=1401 reward=0.7848536 (485.64 it/sec) -training >> step=8404800, episode=1401 reward=0.7923349 (500.52 it/sec) -training >> step=8404900, episode=1401 reward=0.7933658 (469.64 it/sec) -training >> step=8405000, episode=1401 reward=0.7933747 (471.59 it/sec) -training >> step=8405100, episode=1401 reward=0.7883633 (505.39 it/sec) -training >> step=8405200, episode=1401 reward=0.7891036 (511.78 it/sec) -training >> step=8405300, episode=1402 reward=0.7813652 (97.71 it/sec) -training >> step=8405400, episode=1402 reward=0.7878957 (477.64 it/sec) -training >> step=8405500, episode=1402 reward=0.7933251 (506.45 it/sec) -training >> step=8405600, episode=1402 reward=0.7733374 (500.44 it/sec) -training >> step=8405700, episode=1402 reward=0.7721363 (502.90 it/sec) -training >> step=8405800, episode=1402 reward=0.7907143 (496.39 it/sec) -training >> step=8405900, episode=1402 reward=0.8065751 (524.84 it/sec) -training >> step=8406000, episode=1402 reward=0.7830062 (477.36 it/sec) -training >> step=8406100, episode=1402 reward=0.8100377 (477.22 it/sec) -training >> step=8406200, episode=1402 reward=0.793644 (522.64 it/sec) -training >> step=8406300, episode=1402 reward=0.778919 (487.42 it/sec) -training >> step=8406400, episode=1402 reward=0.7873268 (475.33 it/sec) -training >> step=8406500, episode=1402 reward=0.7817234 (517.46 it/sec) -training >> step=8406600, episode=1402 reward=0.7878354 (497.14 it/sec) -training >> step=8406700, episode=1402 reward=0.76515 (485.49 it/sec) -training >> step=8406800, episode=1402 reward=0.7813435 (482.91 it/sec) -training >> step=8406900, episode=1402 reward=0.7749532 (446.26 it/sec) -training >> step=8407000, episode=1402 reward=0.8172069 (468.04 it/sec) -training >> step=8407100, episode=1402 reward=0.7816682 (515.60 it/sec) -training >> step=8407200, episode=1402 reward=0.778192 (488.09 it/sec) -training >> step=8407300, episode=1402 reward=0.8056312 (458.92 it/sec) -training >> step=8407400, episode=1402 reward=0.8021389 (491.65 it/sec) -training >> step=8407500, episode=1402 reward=0.7981529 (502.95 it/sec) -training >> step=8407600, episode=1402 reward=0.8097097 (478.67 it/sec) -training >> step=8407700, episode=1402 reward=0.7828137 (525.62 it/sec) -training >> step=8407800, episode=1402 reward=0.8072184 (433.47 it/sec) -training >> step=8407900, episode=1402 reward=0.7705374 (515.60 it/sec) -training >> step=8408000, episode=1402 reward=0.7842271 (499.81 it/sec) -training >> step=8408100, episode=1402 reward=0.7789286 (505.75 it/sec) -training >> step=8408200, episode=1402 reward=0.7636072 (495.88 it/sec) -training >> step=8408300, episode=1402 reward=0.7893321 (480.41 it/sec) -training >> step=8408400, episode=1402 reward=0.8042298 (485.71 it/sec) -training >> step=8408500, episode=1402 reward=0.7848713 (448.15 it/sec) -training >> step=8408600, episode=1402 reward=0.7961379 (499.86 it/sec) -training >> step=8408700, episode=1402 reward=0.7983356 (481.01 it/sec) -training >> step=8408800, episode=1402 reward=0.7917567 (508.29 it/sec) -training >> step=8408900, episode=1402 reward=0.768074 (503.39 it/sec) -training >> step=8409000, episode=1402 reward=0.7906029 (526.62 it/sec) -training >> step=8409100, episode=1402 reward=0.7913588 (498.27 it/sec) -training >> step=8409200, episode=1402 reward=0.7815376 (484.87 it/sec) -training >> step=8409300, episode=1402 reward=0.7802246 (486.50 it/sec) -training >> step=8409400, episode=1402 reward=0.8143592 (451.44 it/sec) -training >> step=8409500, episode=1402 reward=0.7903957 (478.24 it/sec) -training >> step=8409600, episode=1402 reward=0.8000485 (498.58 it/sec) -training >> step=8409700, episode=1402 reward=0.7742294 (496.05 it/sec) -training >> step=8409800, episode=1402 reward=0.7836161 (497.34 it/sec) -training >> step=8409900, episode=1402 reward=0.7874547 (509.05 it/sec) -training >> step=8410000, episode=1402 reward=0.8051792 (505.52 it/sec) -training >> step=8410100, episode=1402 reward=0.7867872 (503.86 it/sec) -training >> step=8410200, episode=1402 reward=0.7955332 (510.44 it/sec) -training >> step=8410300, episode=1402 reward=0.8077655 (497.20 it/sec) -training >> step=8410400, episode=1402 reward=0.7701861 (499.65 it/sec) -training >> step=8410500, episode=1402 reward=0.7894015 (484.07 it/sec) -training >> step=8410600, episode=1402 reward=0.7764863 (504.53 it/sec) -training >> step=8410700, episode=1402 reward=0.7698063 (515.19 it/sec) -training >> step=8410800, episode=1402 reward=0.7860923 (530.16 it/sec) -training >> step=8410900, episode=1402 reward=0.7939051 (507.71 it/sec) -training >> step=8411000, episode=1402 reward=0.7918127 (461.35 it/sec) -training >> step=8411100, episode=1402 reward=0.7883039 (464.98 it/sec) -training >> step=8411200, episode=1402 reward=0.7886207 (533.49 it/sec) -training >> step=8411300, episode=1403 reward=0.7696697 (87.49 it/sec) -training >> step=8411400, episode=1403 reward=0.7912537 (479.00 it/sec) -training >> step=8411500, episode=1403 reward=0.7858241 (502.05 it/sec) -training >> step=8411600, episode=1403 reward=0.7794377 (466.36 it/sec) -training >> step=8411700, episode=1403 reward=0.7968354 (494.49 it/sec) -training >> step=8411800, episode=1403 reward=0.7762411 (542.81 it/sec) -training >> step=8411900, episode=1403 reward=0.7975485 (522.22 it/sec) -training >> step=8412000, episode=1403 reward=0.7996048 (464.96 it/sec) -training >> step=8412100, episode=1403 reward=0.7893966 (496.54 it/sec) -training >> step=8412200, episode=1403 reward=0.7891728 (462.52 it/sec) -training >> step=8412300, episode=1403 reward=0.8051243 (511.02 it/sec) -training >> step=8412400, episode=1403 reward=0.7898452 (521.20 it/sec) -training >> step=8412500, episode=1403 reward=0.8084961 (489.69 it/sec) -training >> step=8412600, episode=1403 reward=0.8054028 (469.02 it/sec) -training >> step=8412700, episode=1403 reward=0.7935032 (462.58 it/sec) -training >> step=8412800, episode=1403 reward=0.8028825 (507.64 it/sec) -training >> step=8412900, episode=1403 reward=0.800236 (509.46 it/sec) -training >> step=8413000, episode=1403 reward=0.7801656 (505.81 it/sec) -training >> step=8413100, episode=1403 reward=0.8081748 (494.56 it/sec) -training >> step=8413200, episode=1403 reward=0.7948038 (513.53 it/sec) -training >> step=8413300, episode=1403 reward=0.7965525 (503.60 it/sec) -training >> step=8413400, episode=1403 reward=0.7985603 (462.93 it/sec) -training >> step=8413500, episode=1403 reward=0.774014 (458.49 it/sec) -training >> step=8413600, episode=1403 reward=0.7835084 (505.94 it/sec) -training >> step=8413700, episode=1403 reward=0.8077989 (482.29 it/sec) -training >> step=8413800, episode=1403 reward=0.7839355 (484.46 it/sec) -training >> step=8413900, episode=1403 reward=0.8029141 (507.80 it/sec) -training >> step=8414000, episode=1403 reward=0.7883486 (468.54 it/sec) -training >> step=8414100, episode=1403 reward=0.7737291 (498.15 it/sec) -training >> step=8414200, episode=1403 reward=0.7742695 (414.50 it/sec) -training >> step=8414300, episode=1403 reward=0.7732944 (449.12 it/sec) -training >> step=8414400, episode=1403 reward=0.7913338 (487.86 it/sec) -training >> step=8414500, episode=1403 reward=0.8185473 (499.40 it/sec) -training >> step=8414600, episode=1403 reward=0.7874211 (472.69 it/sec) -training >> step=8414700, episode=1403 reward=0.7860423 (465.71 it/sec) -training >> step=8414800, episode=1403 reward=0.7982039 (492.93 it/sec) -training >> step=8414900, episode=1403 reward=0.7834441 (483.73 it/sec) -training >> step=8415000, episode=1403 reward=0.7918972 (540.44 it/sec) -training >> step=8415100, episode=1403 reward=0.7886337 (486.90 it/sec) -training >> step=8415200, episode=1403 reward=0.7916056 (439.69 it/sec) -training >> step=8415300, episode=1403 reward=0.7902027 (517.93 it/sec) -training >> step=8415400, episode=1403 reward=0.787053 (504.23 it/sec) -training >> step=8415500, episode=1403 reward=0.8122095 (483.71 it/sec) -training >> step=8415600, episode=1403 reward=0.787107 (452.49 it/sec) -training >> step=8415700, episode=1403 reward=0.7926569 (494.91 it/sec) -training >> step=8415800, episode=1403 reward=0.8096088 (320.40 it/sec) -training >> step=8415900, episode=1403 reward=0.808776 (467.44 it/sec) -training >> step=8416000, episode=1403 reward=0.7872955 (488.53 it/sec) -training >> step=8416100, episode=1403 reward=0.7887582 (501.85 it/sec) -training >> step=8416200, episode=1403 reward=0.7733909 (537.04 it/sec) -training >> step=8416300, episode=1403 reward=0.7815938 (489.19 it/sec) -training >> step=8416400, episode=1403 reward=0.7897155 (483.43 it/sec) -training >> step=8416500, episode=1403 reward=0.7886552 (506.18 it/sec) -training >> step=8416600, episode=1403 reward=0.7919601 (438.90 it/sec) -training >> step=8416700, episode=1403 reward=0.7916793 (467.92 it/sec) -training >> step=8416800, episode=1403 reward=0.8180413 (522.07 it/sec) -training >> step=8416900, episode=1403 reward=0.7819009 (500.55 it/sec) -training >> step=8417000, episode=1403 reward=0.7949005 (519.31 it/sec) -training >> step=8417100, episode=1403 reward=0.7957174 (465.69 it/sec) -training >> step=8417200, episode=1403 reward=0.7770583 (531.46 it/sec) -training >> step=8417300, episode=1404 reward=0.7851718 (119.64 it/sec) -training >> step=8417400, episode=1404 reward=0.7895491 (550.47 it/sec) -training >> step=8417500, episode=1404 reward=0.7791771 (524.68 it/sec) -training >> step=8417600, episode=1404 reward=0.8047478 (499.34 it/sec) -training >> step=8417700, episode=1404 reward=0.7763008 (532.95 it/sec) -training >> step=8417800, episode=1404 reward=0.7742263 (512.84 it/sec) -training >> step=8417900, episode=1404 reward=0.7795355 (521.81 it/sec) -training >> step=8418000, episode=1404 reward=0.7806029 (525.37 it/sec) -training >> step=8418100, episode=1404 reward=0.792323 (532.44 it/sec) -training >> step=8418200, episode=1404 reward=0.7726268 (483.34 it/sec) -training >> step=8418300, episode=1404 reward=0.7787985 (494.62 it/sec) -training >> step=8418400, episode=1404 reward=0.7991945 (519.33 it/sec) -training >> step=8418500, episode=1404 reward=0.8046599 (525.33 it/sec) -training >> step=8418600, episode=1404 reward=0.7806377 (484.91 it/sec) -training >> step=8418700, episode=1404 reward=0.7917154 (475.32 it/sec) -training >> step=8418800, episode=1404 reward=0.7989742 (513.71 it/sec) -training >> step=8418900, episode=1404 reward=0.7808563 (523.29 it/sec) -training >> step=8419000, episode=1404 reward=0.78602 (520.65 it/sec) -training >> step=8419100, episode=1404 reward=0.786653 (516.46 it/sec) -training >> step=8419200, episode=1404 reward=0.7937765 (515.66 it/sec) -training >> step=8419300, episode=1404 reward=0.7959336 (506.14 it/sec) -training >> step=8419400, episode=1404 reward=0.7873293 (490.84 it/sec) -training >> step=8419500, episode=1404 reward=0.8017812 (531.49 it/sec) -training >> step=8419600, episode=1404 reward=0.7946182 (520.76 it/sec) -training >> step=8419700, episode=1404 reward=0.797914 (492.72 it/sec) -training >> step=8419800, episode=1404 reward=0.7824148 (522.51 it/sec) -training >> step=8419900, episode=1404 reward=0.7909831 (448.45 it/sec) -training >> step=8420000, episode=1404 reward=0.7875792 (458.93 it/sec) -training >> step=8420100, episode=1404 reward=0.7707219 (490.02 it/sec) -training >> step=8420200, episode=1404 reward=0.7956251 (478.72 it/sec) -training >> step=8420300, episode=1404 reward=0.787021 (493.29 it/sec) -training >> step=8420400, episode=1404 reward=0.7972772 (457.82 it/sec) -training >> step=8420500, episode=1404 reward=0.8004575 (438.75 it/sec) -training >> step=8420600, episode=1404 reward=0.7775398 (495.95 it/sec) -training >> step=8420700, episode=1404 reward=0.7915883 (457.85 it/sec) -training >> step=8420800, episode=1404 reward=0.791075 (487.24 it/sec) -training >> step=8420900, episode=1404 reward=0.7825341 (485.92 it/sec) -training >> step=8421000, episode=1404 reward=0.7933815 (525.96 it/sec) -training >> step=8421100, episode=1404 reward=0.7772728 (436.28 it/sec) -training >> step=8421200, episode=1404 reward=0.7831571 (448.30 it/sec) -training >> step=8421300, episode=1404 reward=0.7747998 (475.73 it/sec) -training >> step=8421400, episode=1404 reward=0.7922741 (496.51 it/sec) -training >> step=8421500, episode=1404 reward=0.8047931 (457.16 it/sec) -training >> step=8421600, episode=1404 reward=0.7944971 (487.54 it/sec) -training >> step=8421700, episode=1404 reward=0.7984934 (474.11 it/sec) -training >> step=8421800, episode=1404 reward=0.7883936 (472.55 it/sec) -training >> step=8421900, episode=1404 reward=0.7915281 (504.09 it/sec) -training >> step=8422000, episode=1404 reward=0.8067729 (480.04 it/sec) -training >> step=8422100, episode=1404 reward=0.790818 (527.01 it/sec) -training >> step=8422200, episode=1404 reward=0.8006264 (488.66 it/sec) -training >> step=8422300, episode=1404 reward=0.7897239 (424.44 it/sec) -training >> step=8422400, episode=1404 reward=0.8009303 (488.35 it/sec) -training >> step=8422500, episode=1404 reward=0.8041311 (483.33 it/sec) -training >> step=8422600, episode=1404 reward=0.7740742 (446.04 it/sec) -training >> step=8422700, episode=1404 reward=0.7664147 (423.36 it/sec) -training >> step=8422800, episode=1404 reward=0.8145471 (471.51 it/sec) -training >> step=8422900, episode=1404 reward=0.7796104 (450.19 it/sec) -training >> step=8423000, episode=1404 reward=0.7899855 (495.44 it/sec) -training >> step=8423100, episode=1404 reward=0.7820777 (493.77 it/sec) -training >> step=8423200, episode=1404 reward=0.7845058 (481.48 it/sec) -training >> step=8423300, episode=1405 reward=0.7713836 (106.72 it/sec) -training >> step=8423400, episode=1405 reward=0.7997583 (339.61 it/sec) -training >> step=8423500, episode=1405 reward=0.7920845 (500.58 it/sec) -training >> step=8423600, episode=1405 reward=0.799799 (462.76 it/sec) -training >> step=8423700, episode=1405 reward=0.7971849 (488.57 it/sec) -training >> step=8423800, episode=1405 reward=0.8097317 (478.24 it/sec) -training >> step=8423900, episode=1405 reward=0.8031496 (492.51 it/sec) -training >> step=8424000, episode=1405 reward=0.7774898 (441.12 it/sec) -training >> step=8424100, episode=1405 reward=0.7887658 (466.75 it/sec) -training >> step=8424200, episode=1405 reward=0.8115553 (429.86 it/sec) -training >> step=8424300, episode=1405 reward=0.7905027 (478.09 it/sec) -training >> step=8424400, episode=1405 reward=0.7810877 (477.24 it/sec) -training >> step=8424500, episode=1405 reward=0.8020621 (492.94 it/sec) -training >> step=8424600, episode=1405 reward=0.7770498 (454.26 it/sec) -training >> step=8424700, episode=1405 reward=0.782849 (476.11 it/sec) -training >> step=8424800, episode=1405 reward=0.7893699 (466.72 it/sec) -training >> step=8424900, episode=1405 reward=0.7676474 (488.63 it/sec) -training >> step=8425000, episode=1405 reward=0.7973313 (445.20 it/sec) -training >> step=8425100, episode=1405 reward=0.8019779 (495.97 it/sec) -training >> step=8425200, episode=1405 reward=0.789947 (491.11 it/sec) -training >> step=8425300, episode=1405 reward=0.7884266 (479.74 it/sec) -training >> step=8425400, episode=1405 reward=0.804629 (456.28 it/sec) -training >> step=8425500, episode=1405 reward=0.7772734 (495.08 it/sec) -training >> step=8425600, episode=1405 reward=0.7918161 (506.46 it/sec) -training >> step=8425700, episode=1405 reward=0.7868629 (456.25 it/sec) -training >> step=8425800, episode=1405 reward=0.7844813 (461.68 it/sec) -training >> step=8425900, episode=1405 reward=0.7860965 (455.99 it/sec) -training >> step=8426000, episode=1405 reward=0.7913392 (459.69 it/sec) -training >> step=8426100, episode=1405 reward=0.7821553 (460.35 it/sec) -training >> step=8426200, episode=1405 reward=0.8055425 (435.78 it/sec) -training >> step=8426300, episode=1405 reward=0.8075079 (500.41 it/sec) -training >> step=8426400, episode=1405 reward=0.7945872 (503.66 it/sec) -training >> step=8426500, episode=1405 reward=0.8037421 (497.31 it/sec) -training >> step=8426600, episode=1405 reward=0.7884355 (468.43 it/sec) -training >> step=8426700, episode=1405 reward=0.79954 (470.20 it/sec) -training >> step=8426800, episode=1405 reward=0.7867104 (415.26 it/sec) -training >> step=8426900, episode=1405 reward=0.8058016 (454.62 it/sec) -training >> step=8427000, episode=1405 reward=0.8071229 (510.66 it/sec) -training >> step=8427100, episode=1405 reward=0.7748086 (487.59 it/sec) -training >> step=8427200, episode=1405 reward=0.7855964 (466.67 it/sec) -training >> step=8427300, episode=1405 reward=0.7745467 (480.45 it/sec) -training >> step=8427400, episode=1405 reward=0.8039618 (491.36 it/sec) -training >> step=8427500, episode=1405 reward=0.7743981 (483.59 it/sec) -training >> step=8427600, episode=1405 reward=0.7999043 (461.15 it/sec) -training >> step=8427700, episode=1405 reward=0.7739199 (476.75 it/sec) -training >> step=8427800, episode=1405 reward=0.8112324 (448.05 it/sec) -training >> step=8427900, episode=1405 reward=0.7984098 (453.01 it/sec) -training >> step=8428000, episode=1405 reward=0.7934178 (431.18 it/sec) -training >> step=8428100, episode=1405 reward=0.7816095 (465.67 it/sec) -training >> step=8428200, episode=1405 reward=0.7726124 (502.53 it/sec) -training >> step=8428300, episode=1405 reward=0.7782467 (468.37 it/sec) -training >> step=8428400, episode=1405 reward=0.8130128 (490.71 it/sec) -training >> step=8428500, episode=1405 reward=0.7842067 (472.40 it/sec) -training >> step=8428600, episode=1405 reward=0.7913928 (466.19 it/sec) -training >> step=8428700, episode=1405 reward=0.783055 (486.89 it/sec) -training >> step=8428800, episode=1405 reward=0.784419 (490.25 it/sec) -training >> step=8428900, episode=1405 reward=0.7833676 (447.17 it/sec) -training >> step=8429000, episode=1405 reward=0.7894465 (429.74 it/sec) -training >> step=8429100, episode=1405 reward=0.8041723 (486.98 it/sec) -training >> step=8429200, episode=1405 reward=0.7953997 (515.71 it/sec) -training >> step=8429300, episode=1406 reward=0.7912213 (128.05 it/sec) -training >> step=8429400, episode=1406 reward=0.7759046 (449.28 it/sec) -training >> step=8429500, episode=1406 reward=0.7787305 (463.24 it/sec) -training >> step=8429600, episode=1406 reward=0.770627 (496.69 it/sec) -training >> step=8429700, episode=1406 reward=0.7818227 (498.27 it/sec) -training >> step=8429800, episode=1406 reward=0.7836512 (480.37 it/sec) -training >> step=8429900, episode=1406 reward=0.7772591 (514.24 it/sec) -training >> step=8430000, episode=1406 reward=0.7778745 (427.23 it/sec) -training >> step=8430100, episode=1406 reward=0.7838927 (494.82 it/sec) -training >> step=8430200, episode=1406 reward=0.7847635 (490.58 it/sec) -training >> step=8430300, episode=1406 reward=0.7857994 (476.02 it/sec) -training >> step=8430400, episode=1406 reward=0.7782146 (476.75 it/sec) -training >> step=8430500, episode=1406 reward=0.7659072 (463.12 it/sec) -training >> step=8430600, episode=1406 reward=0.7881904 (529.62 it/sec) -training >> step=8430700, episode=1406 reward=0.7810665 (485.31 it/sec) -training >> step=8430800, episode=1406 reward=0.7878105 (452.02 it/sec) -training >> step=8430900, episode=1406 reward=0.802843 (480.06 it/sec) -training >> step=8431000, episode=1406 reward=0.7905017 (486.18 it/sec) -training >> step=8431100, episode=1406 reward=0.7944483 (462.57 it/sec) -training >> step=8431200, episode=1406 reward=0.7830378 (493.06 it/sec) -training >> step=8431300, episode=1406 reward=0.7778941 (428.74 it/sec) -training >> step=8431400, episode=1406 reward=0.7718459 (438.05 it/sec) -training >> step=8431500, episode=1406 reward=0.7841699 (494.67 it/sec) -training >> step=8431600, episode=1406 reward=0.801484 (489.13 it/sec) -training >> step=8431700, episode=1406 reward=0.7992709 (504.05 it/sec) -training >> step=8431800, episode=1406 reward=0.7925096 (433.15 it/sec) -training >> step=8431900, episode=1406 reward=0.7854396 (423.28 it/sec) -training >> step=8432000, episode=1406 reward=0.7973586 (522.77 it/sec) -training >> step=8432100, episode=1406 reward=0.7821944 (498.11 it/sec) -training >> step=8432200, episode=1406 reward=0.7761493 (481.38 it/sec) -training >> step=8432300, episode=1406 reward=0.8152035 (450.75 it/sec) -training >> step=8432400, episode=1406 reward=0.7829002 (503.05 it/sec) -training >> step=8432500, episode=1406 reward=0.7757851 (457.06 it/sec) -training >> step=8432600, episode=1406 reward=0.776121 (477.61 it/sec) -training >> step=8432700, episode=1406 reward=0.7831753 (479.88 it/sec) -training >> step=8432800, episode=1406 reward=0.7755657 (497.38 it/sec) -training >> step=8432900, episode=1406 reward=0.7819096 (452.14 it/sec) -training >> step=8433000, episode=1406 reward=0.8000946 (469.62 it/sec) -training >> step=8433100, episode=1406 reward=0.7880008 (513.00 it/sec) -training >> step=8433200, episode=1406 reward=0.8076714 (504.72 it/sec) -training >> step=8433300, episode=1406 reward=0.7883937 (484.33 it/sec) -training >> step=8433400, episode=1406 reward=0.7900333 (479.95 it/sec) -training >> step=8433500, episode=1406 reward=0.7962067 (528.97 it/sec) -training >> step=8433600, episode=1406 reward=0.7965104 (463.30 it/sec) -training >> step=8433700, episode=1406 reward=0.7968899 (491.82 it/sec) -training >> step=8433800, episode=1406 reward=0.8072276 (453.81 it/sec) -training >> step=8433900, episode=1406 reward=0.7863238 (469.58 it/sec) -training >> step=8434000, episode=1406 reward=0.7999129 (483.20 it/sec) -training >> step=8434100, episode=1406 reward=0.779136 (437.25 it/sec) -training >> step=8434200, episode=1406 reward=0.7855406 (495.41 it/sec) -training >> step=8434300, episode=1406 reward=0.7937599 (494.81 it/sec) -training >> step=8434400, episode=1406 reward=0.8064817 (429.54 it/sec) -training >> step=8434500, episode=1406 reward=0.7806813 (502.84 it/sec) -training >> step=8434600, episode=1406 reward=0.7933966 (500.44 it/sec) -training >> step=8434700, episode=1406 reward=0.7916607 (439.01 it/sec) -training >> step=8434800, episode=1406 reward=0.8074787 (448.63 it/sec) -training >> step=8434900, episode=1406 reward=0.7905646 (441.46 it/sec) -training >> step=8435000, episode=1406 reward=0.787042 (507.07 it/sec) -training >> step=8435100, episode=1406 reward=0.7826417 (485.10 it/sec) -training >> step=8435200, episode=1406 reward=0.7667792 (465.32 it/sec) -training >> step=8435300, episode=1407 reward=0.7960749 (144.59 it/sec) -training >> step=8435400, episode=1407 reward=0.7832255 (388.76 it/sec) -training >> step=8435500, episode=1407 reward=0.8004283 (453.96 it/sec) -training >> step=8435600, episode=1407 reward=0.7854668 (477.46 it/sec) -training >> step=8435700, episode=1407 reward=0.8095951 (489.89 it/sec) -training >> step=8435800, episode=1407 reward=0.7840288 (514.10 it/sec) -training >> step=8435900, episode=1407 reward=0.7895392 (421.15 it/sec) -training >> step=8436000, episode=1407 reward=0.7627243 (499.26 it/sec) -training >> step=8436100, episode=1407 reward=0.8092595 (503.79 it/sec) -training >> step=8436200, episode=1407 reward=0.7872948 (507.55 it/sec) -training >> step=8436300, episode=1407 reward=0.780539 (466.82 it/sec) -training >> step=8436400, episode=1407 reward=0.8184126 (484.86 it/sec) -training >> step=8436500, episode=1407 reward=0.7946977 (468.00 it/sec) -training >> step=8436600, episode=1407 reward=0.7826492 (487.08 it/sec) -training >> step=8436700, episode=1407 reward=0.7930846 (499.38 it/sec) -training >> step=8436800, episode=1407 reward=0.7802975 (494.23 it/sec) -training >> step=8436900, episode=1407 reward=0.8037786 (411.35 it/sec) -training >> step=8437000, episode=1407 reward=0.7825424 (446.43 it/sec) -training >> step=8437100, episode=1407 reward=0.7926598 (466.30 it/sec) -training >> step=8437200, episode=1407 reward=0.7904447 (457.09 it/sec) -training >> step=8437300, episode=1407 reward=0.8089955 (438.33 it/sec) -training >> step=8437400, episode=1407 reward=0.7973726 (461.36 it/sec) -training >> step=8437500, episode=1407 reward=0.7789559 (508.98 it/sec) -training >> step=8437600, episode=1407 reward=0.7963128 (483.36 it/sec) -training >> step=8437700, episode=1407 reward=0.7824654 (449.15 it/sec) -training >> step=8437800, episode=1407 reward=0.780155 (449.97 it/sec) -training >> step=8437900, episode=1407 reward=0.8018004 (485.34 it/sec) -training >> step=8438000, episode=1407 reward=0.7962271 (459.65 it/sec) -training >> step=8438100, episode=1407 reward=0.7840009 (430.51 it/sec) -training >> step=8438200, episode=1407 reward=0.7922103 (470.52 it/sec) -training >> step=8438300, episode=1407 reward=0.7987482 (490.61 it/sec) -training >> step=8438400, episode=1407 reward=0.7688987 (448.32 it/sec) -training >> step=8438500, episode=1407 reward=0.7934653 (434.21 it/sec) -training >> step=8438600, episode=1407 reward=0.7889397 (515.21 it/sec) -training >> step=8438700, episode=1407 reward=0.8246186 (486.34 it/sec) -training >> step=8438800, episode=1407 reward=0.776405 (463.67 it/sec) -training >> step=8438900, episode=1407 reward=0.7807359 (487.48 it/sec) -training >> step=8439000, episode=1407 reward=0.803537 (503.47 it/sec) -training >> step=8439100, episode=1407 reward=0.7913077 (466.63 it/sec) -training >> step=8439200, episode=1407 reward=0.7792386 (468.25 it/sec) -training >> step=8439300, episode=1407 reward=0.79071 (509.68 it/sec) -training >> step=8439400, episode=1407 reward=0.7729768 (479.18 it/sec) -training >> step=8439500, episode=1407 reward=0.8007727 (479.41 it/sec) -training >> step=8439600, episode=1407 reward=0.7895867 (473.27 it/sec) -training >> step=8439700, episode=1407 reward=0.7821231 (510.54 it/sec) -training >> step=8439800, episode=1407 reward=0.7848343 (473.00 it/sec) -training >> step=8439900, episode=1407 reward=0.7998877 (485.73 it/sec) -training >> step=8440000, episode=1407 reward=0.8063408 (482.23 it/sec) -training >> step=8440100, episode=1407 reward=0.7777144 (505.41 it/sec) -training >> step=8440200, episode=1407 reward=0.8006446 (457.40 it/sec) -training >> step=8440300, episode=1407 reward=0.7885519 (421.60 it/sec) -training >> step=8440400, episode=1407 reward=0.7812244 (518.78 it/sec) -training >> step=8440500, episode=1407 reward=0.7992262 (473.74 it/sec) -training >> step=8440600, episode=1407 reward=0.7842786 (456.99 it/sec) -training >> step=8440700, episode=1407 reward=0.7917064 (490.43 it/sec) -training >> step=8440800, episode=1407 reward=0.7847453 (511.20 it/sec) -training >> step=8440900, episode=1407 reward=0.7871366 (455.65 it/sec) -training >> step=8441000, episode=1407 reward=0.7977665 (459.11 it/sec) -training >> step=8441100, episode=1407 reward=0.7879359 (470.31 it/sec) -training >> step=8441200, episode=1407 reward=0.768182 (442.68 it/sec) -training >> step=8441300, episode=1408 reward=0.7982114 (126.77 it/sec) -training >> step=8441400, episode=1408 reward=0.7849179 (428.16 it/sec) -training >> step=8441500, episode=1408 reward=0.7800602 (308.81 it/sec) -training >> step=8441600, episode=1408 reward=0.7819555 (440.35 it/sec) -training >> step=8441700, episode=1408 reward=0.7967713 (458.10 it/sec) -training >> step=8441800, episode=1408 reward=0.8092449 (456.46 it/sec) -training >> step=8441900, episode=1408 reward=0.7848407 (468.64 it/sec) -training >> step=8442000, episode=1408 reward=0.7918467 (443.91 it/sec) -training >> step=8442100, episode=1408 reward=0.7999777 (478.20 it/sec) -training >> step=8442200, episode=1408 reward=0.7972466 (431.76 it/sec) -training >> step=8442300, episode=1408 reward=0.7954555 (494.04 it/sec) -training >> step=8442400, episode=1408 reward=0.7901327 (478.04 it/sec) -training >> step=8442500, episode=1408 reward=0.8058249 (420.20 it/sec) -training >> step=8442600, episode=1408 reward=0.8060423 (518.97 it/sec) -training >> step=8442700, episode=1408 reward=0.7841528 (491.61 it/sec) -training >> step=8442800, episode=1408 reward=0.7812257 (448.00 it/sec) -training >> step=8442900, episode=1408 reward=0.801993 (477.53 it/sec) -training >> step=8443000, episode=1408 reward=0.7946993 (524.98 it/sec) -training >> step=8443100, episode=1408 reward=0.8106882 (438.98 it/sec) -training >> step=8443200, episode=1408 reward=0.8031259 (459.12 it/sec) -training >> step=8443300, episode=1408 reward=0.7995481 (516.38 it/sec) -training >> step=8443400, episode=1408 reward=0.784299 (449.60 it/sec) -training >> step=8443500, episode=1408 reward=0.8023267 (420.88 it/sec) -training >> step=8443600, episode=1408 reward=0.774839 (478.79 it/sec) -training >> step=8443700, episode=1408 reward=0.7864323 (522.23 it/sec) -training >> step=8443800, episode=1408 reward=0.7883331 (473.98 it/sec) -training >> step=8443900, episode=1408 reward=0.7916441 (493.20 it/sec) -training >> step=8444000, episode=1408 reward=0.7882506 (461.44 it/sec) -training >> step=8444100, episode=1408 reward=0.7803203 (471.89 it/sec) -training >> step=8444200, episode=1408 reward=0.781678 (475.52 it/sec) -training >> step=8444300, episode=1408 reward=0.7983779 (471.77 it/sec) -training >> step=8444400, episode=1408 reward=0.7888212 (514.95 it/sec) -training >> step=8444500, episode=1408 reward=0.8063423 (414.76 it/sec) -training >> step=8444600, episode=1408 reward=0.7941692 (489.83 it/sec) -training >> step=8444700, episode=1408 reward=0.7875975 (477.41 it/sec) -training >> step=8444800, episode=1408 reward=0.7955979 (509.94 it/sec) -training >> step=8444900, episode=1408 reward=0.7760663 (468.22 it/sec) -training >> step=8445000, episode=1408 reward=0.8026069 (492.40 it/sec) -training >> step=8445100, episode=1408 reward=0.7872231 (519.32 it/sec) -training >> step=8445200, episode=1408 reward=0.7875651 (522.75 it/sec) -training >> step=8445300, episode=1408 reward=0.7837583 (474.84 it/sec) -training >> step=8445400, episode=1408 reward=0.7705007 (463.06 it/sec) -training >> step=8445500, episode=1408 reward=0.7956715 (464.92 it/sec) -training >> step=8445600, episode=1408 reward=0.797996 (467.15 it/sec) -training >> step=8445700, episode=1408 reward=0.8100734 (469.38 it/sec) -training >> step=8445800, episode=1408 reward=0.808588 (486.37 it/sec) -training >> step=8445900, episode=1408 reward=0.7911861 (499.32 it/sec) -training >> step=8446000, episode=1408 reward=0.7924939 (459.21 it/sec) -training >> step=8446100, episode=1408 reward=0.7905677 (474.49 it/sec) -training >> step=8446200, episode=1408 reward=0.7853078 (493.48 it/sec) -training >> step=8446300, episode=1408 reward=0.799741 (497.15 it/sec) -training >> step=8446400, episode=1408 reward=0.8025208 (482.29 it/sec) -training >> step=8446500, episode=1408 reward=0.7952601 (490.05 it/sec) -training >> step=8446600, episode=1408 reward=0.7938804 (467.01 it/sec) -training >> step=8446700, episode=1408 reward=0.7774572 (484.81 it/sec) -training >> step=8446800, episode=1408 reward=0.796415 (461.14 it/sec) -training >> step=8446900, episode=1408 reward=0.7837892 (482.23 it/sec) -training >> step=8447000, episode=1408 reward=0.7967751 (454.16 it/sec) -training >> step=8447100, episode=1408 reward=0.7877281 (477.89 it/sec) -training >> step=8447200, episode=1408 reward=0.7846764 (485.30 it/sec) -training >> step=8447300, episode=1409 reward=0.8034841 (144.37 it/sec) -training >> step=8447400, episode=1409 reward=0.7882172 (435.81 it/sec) -training >> step=8447500, episode=1409 reward=0.7765839 (458.71 it/sec) -training >> step=8447600, episode=1409 reward=0.7941341 (467.09 it/sec) -training >> step=8447700, episode=1409 reward=0.7879568 (507.42 it/sec) -training >> step=8447800, episode=1409 reward=0.7748691 (399.41 it/sec) -training >> step=8447900, episode=1409 reward=0.7993141 (485.47 it/sec) -training >> step=8448000, episode=1409 reward=0.7922416 (472.84 it/sec) -training >> step=8448100, episode=1409 reward=0.803229 (471.25 it/sec) -training >> step=8448200, episode=1409 reward=0.7925991 (443.98 it/sec) -training >> step=8448300, episode=1409 reward=0.8010602 (442.31 it/sec) -training >> step=8448400, episode=1409 reward=0.7730153 (442.32 it/sec) -training >> step=8448500, episode=1409 reward=0.7790237 (450.07 it/sec) -training >> step=8448600, episode=1409 reward=0.7682653 (462.15 it/sec) -training >> step=8448700, episode=1409 reward=0.8155943 (476.29 it/sec) -training >> step=8448800, episode=1409 reward=0.7920218 (470.83 it/sec) -training >> step=8448900, episode=1409 reward=0.7919981 (474.89 it/sec) -training >> step=8449000, episode=1409 reward=0.794634 (461.81 it/sec) -training >> step=8449100, episode=1409 reward=0.8020431 (521.42 it/sec) -training >> step=8449200, episode=1409 reward=0.8070714 (485.23 it/sec) -training >> step=8449300, episode=1409 reward=0.7781315 (447.94 it/sec) -training >> step=8449400, episode=1409 reward=0.7978497 (485.40 it/sec) -training >> step=8449500, episode=1409 reward=0.7990128 (457.44 it/sec) -training >> step=8449600, episode=1409 reward=0.7949495 (487.33 it/sec) -training >> step=8449700, episode=1409 reward=0.7584652 (490.28 it/sec) -training >> step=8449800, episode=1409 reward=0.7892771 (510.13 it/sec) -training >> step=8449900, episode=1409 reward=0.8041734 (529.11 it/sec) -training >> step=8450000, episode=1409 reward=0.7961561 (478.85 it/sec) -training >> step=8450100, episode=1409 reward=0.7888365 (489.20 it/sec) -training >> step=8450200, episode=1409 reward=0.7777418 (460.06 it/sec) -training >> step=8450300, episode=1409 reward=0.7898731 (476.23 it/sec) -training >> step=8450400, episode=1409 reward=0.8179917 (451.63 it/sec) -training >> step=8450500, episode=1409 reward=0.7816452 (443.66 it/sec) -training >> step=8450600, episode=1409 reward=0.7999191 (478.08 it/sec) -training >> step=8450700, episode=1409 reward=0.7729672 (477.48 it/sec) -training >> step=8450800, episode=1409 reward=0.7881816 (494.91 it/sec) -training >> step=8450900, episode=1409 reward=0.7813869 (471.14 it/sec) -training >> step=8451000, episode=1409 reward=0.8000346 (466.42 it/sec) -training >> step=8451100, episode=1409 reward=0.7750978 (489.01 it/sec) -training >> step=8451200, episode=1409 reward=0.7821832 (486.04 it/sec) -training >> step=8451300, episode=1409 reward=0.7888879 (506.95 it/sec) -training >> step=8451400, episode=1409 reward=0.7990029 (460.31 it/sec) -training >> step=8451500, episode=1409 reward=0.803394 (482.13 it/sec) -training >> step=8451600, episode=1409 reward=0.8009604 (490.42 it/sec) -training >> step=8451700, episode=1409 reward=0.8112448 (455.24 it/sec) -training >> step=8451800, episode=1409 reward=0.7920723 (477.93 it/sec) -training >> step=8451900, episode=1409 reward=0.7871763 (465.78 it/sec) -training >> step=8452000, episode=1409 reward=0.7892192 (522.25 it/sec) -training >> step=8452100, episode=1409 reward=0.8020116 (447.17 it/sec) -training >> step=8452200, episode=1409 reward=0.8030262 (473.53 it/sec) -training >> step=8452300, episode=1409 reward=0.7913457 (485.83 it/sec) -training >> step=8452400, episode=1409 reward=0.7890721 (493.07 it/sec) -training >> step=8452500, episode=1409 reward=0.7830326 (492.42 it/sec) -training >> step=8452600, episode=1409 reward=0.7872829 (467.83 it/sec) -training >> step=8452700, episode=1409 reward=0.8035926 (527.62 it/sec) -training >> step=8452800, episode=1409 reward=0.8077488 (505.19 it/sec) -training >> step=8452900, episode=1409 reward=0.789104 (444.60 it/sec) -training >> step=8453000, episode=1409 reward=0.7788693 (508.47 it/sec) -training >> step=8453100, episode=1409 reward=0.7873864 (456.80 it/sec) -training >> step=8453200, episode=1409 reward=0.7899098 (447.73 it/sec) -training >> step=8453300, episode=1410 reward=0.7744412 (132.13 it/sec) -training >> step=8453400, episode=1410 reward=0.7870892 (478.93 it/sec) -training >> step=8453500, episode=1410 reward=0.8055198 (511.48 it/sec) -training >> step=8453600, episode=1410 reward=0.7721634 (476.34 it/sec) -training >> step=8453700, episode=1410 reward=0.7976094 (472.67 it/sec) -training >> step=8453800, episode=1410 reward=0.8050997 (513.86 it/sec) -training >> step=8453900, episode=1410 reward=0.7608916 (487.31 it/sec) -training >> step=8454000, episode=1410 reward=0.773583 (381.64 it/sec) -training >> step=8454100, episode=1410 reward=0.766149 (501.06 it/sec) -training >> step=8454200, episode=1410 reward=0.7964876 (518.92 it/sec) -training >> step=8454300, episode=1410 reward=0.7840741 (473.94 it/sec) -training >> step=8454400, episode=1410 reward=0.7812385 (436.46 it/sec) -training >> step=8454500, episode=1410 reward=0.7684395 (499.25 it/sec) -training >> step=8454600, episode=1410 reward=0.7989845 (485.09 it/sec) -training >> step=8454700, episode=1410 reward=0.7929223 (485.00 it/sec) -training >> step=8454800, episode=1410 reward=0.8008682 (479.75 it/sec) -training >> step=8454900, episode=1410 reward=0.7870691 (503.78 it/sec) -training >> step=8455000, episode=1410 reward=0.7778389 (431.62 it/sec) -training >> step=8455100, episode=1410 reward=0.7836251 (398.89 it/sec) -training >> step=8455200, episode=1410 reward=0.7925 (482.24 it/sec) -training >> step=8455300, episode=1410 reward=0.7835315 (514.58 it/sec) -training >> step=8455400, episode=1410 reward=0.7908114 (465.85 it/sec) -training >> step=8455500, episode=1410 reward=0.7804527 (506.53 it/sec) -training >> step=8455600, episode=1410 reward=0.7994273 (491.06 it/sec) -training >> step=8455700, episode=1410 reward=0.7976019 (505.90 it/sec) -training >> step=8455800, episode=1410 reward=0.7905734 (462.59 it/sec) -training >> step=8455900, episode=1410 reward=0.8001898 (474.52 it/sec) -training >> step=8456000, episode=1410 reward=0.7798047 (526.05 it/sec) -training >> step=8456100, episode=1410 reward=0.7739385 (493.38 it/sec) -training >> step=8456200, episode=1410 reward=0.7977485 (457.73 it/sec) -training >> step=8456300, episode=1410 reward=0.7875069 (498.92 it/sec) -training >> step=8456400, episode=1410 reward=0.8171911 (448.25 it/sec) -training >> step=8456500, episode=1410 reward=0.7861444 (481.64 it/sec) -training >> step=8456600, episode=1410 reward=0.7973729 (487.88 it/sec) -training >> step=8456700, episode=1410 reward=0.7830465 (503.83 it/sec) -training >> step=8456800, episode=1410 reward=0.7859807 (485.72 it/sec) -training >> step=8456900, episode=1410 reward=0.8031741 (469.98 it/sec) -training >> step=8457000, episode=1410 reward=0.7948055 (493.62 it/sec) -training >> step=8457100, episode=1410 reward=0.8015733 (523.92 it/sec) -training >> step=8457200, episode=1410 reward=0.7898965 (456.99 it/sec) -training >> step=8457300, episode=1410 reward=0.8034579 (469.38 it/sec) -training >> step=8457400, episode=1410 reward=0.7906216 (511.59 it/sec) -training >> step=8457500, episode=1410 reward=0.7944468 (501.12 it/sec) -training >> step=8457600, episode=1410 reward=0.7956446 (434.43 it/sec) -training >> step=8457700, episode=1410 reward=0.7976567 (452.23 it/sec) -training >> step=8457800, episode=1410 reward=0.7767187 (499.95 it/sec) -training >> step=8457900, episode=1410 reward=0.7793156 (479.37 it/sec) -training >> step=8458000, episode=1410 reward=0.7899625 (469.54 it/sec) -training >> step=8458100, episode=1410 reward=0.7881902 (499.62 it/sec) -training >> step=8458200, episode=1410 reward=0.7991198 (510.63 it/sec) -training >> step=8458300, episode=1410 reward=0.7947481 (459.60 it/sec) -training >> step=8458400, episode=1410 reward=0.7881679 (474.58 it/sec) -training >> step=8458500, episode=1410 reward=0.7938566 (520.50 it/sec) -training >> step=8458600, episode=1410 reward=0.7900426 (464.02 it/sec) -training >> step=8458700, episode=1410 reward=0.7797811 (494.88 it/sec) -training >> step=8458800, episode=1410 reward=0.7928011 (478.53 it/sec) -training >> step=8458900, episode=1410 reward=0.7776413 (505.56 it/sec) -training >> step=8459000, episode=1410 reward=0.7745247 (466.38 it/sec) -training >> step=8459100, episode=1410 reward=0.7882985 (495.20 it/sec) -training >> step=8459200, episode=1410 reward=0.7900001 (517.10 it/sec) -training >> step=8459300, episode=1411 reward=0.8009273 (127.02 it/sec) -training >> step=8459400, episode=1411 reward=0.7992816 (488.55 it/sec) -training >> step=8459500, episode=1411 reward=0.7815915 (440.73 it/sec) -training >> step=8459600, episode=1411 reward=0.7793636 (388.11 it/sec) -training >> step=8459700, episode=1411 reward=0.8041915 (445.61 it/sec) -training >> step=8459800, episode=1411 reward=0.7787574 (471.68 it/sec) -training >> step=8459900, episode=1411 reward=0.7757712 (462.78 it/sec) -training >> step=8460000, episode=1411 reward=0.7875927 (449.68 it/sec) -training >> step=8460100, episode=1411 reward=0.7902149 (446.47 it/sec) -training >> step=8460200, episode=1411 reward=0.803108 (461.80 it/sec) -training >> step=8460300, episode=1411 reward=0.8039423 (344.62 it/sec) -training >> step=8460400, episode=1411 reward=0.8093333 (497.55 it/sec) -training >> step=8460500, episode=1411 reward=0.7966696 (480.93 it/sec) -training >> step=8460600, episode=1411 reward=0.7858032 (485.06 it/sec) -training >> step=8460700, episode=1411 reward=0.7775123 (508.99 it/sec) -training >> step=8460800, episode=1411 reward=0.8094881 (484.48 it/sec) -training >> step=8460900, episode=1411 reward=0.7737784 (492.37 it/sec) -training >> step=8461000, episode=1411 reward=0.7957134 (497.48 it/sec) -training >> step=8461100, episode=1411 reward=0.7929924 (503.55 it/sec) -training >> step=8461200, episode=1411 reward=0.7837417 (501.78 it/sec) -training >> step=8461300, episode=1411 reward=0.7858192 (472.89 it/sec) -training >> step=8461400, episode=1411 reward=0.7813814 (465.78 it/sec) -training >> step=8461500, episode=1411 reward=0.8063132 (452.50 it/sec) -training >> step=8461600, episode=1411 reward=0.791627 (453.53 it/sec) -training >> step=8461700, episode=1411 reward=0.7733051 (454.72 it/sec) -training >> step=8461800, episode=1411 reward=0.818373 (498.04 it/sec) -training >> step=8461900, episode=1411 reward=0.7877155 (491.20 it/sec) -training >> step=8462000, episode=1411 reward=0.7874323 (493.78 it/sec) -training >> step=8462100, episode=1411 reward=0.7970666 (491.27 it/sec) -training >> step=8462200, episode=1411 reward=0.7733082 (519.58 it/sec) -training >> step=8462300, episode=1411 reward=0.7860674 (497.89 it/sec) -training >> step=8462400, episode=1411 reward=0.7832795 (445.03 it/sec) -training >> step=8462500, episode=1411 reward=0.7946503 (486.27 it/sec) -training >> step=8462600, episode=1411 reward=0.7856866 (479.27 it/sec) -training >> step=8462700, episode=1411 reward=0.7897411 (495.51 it/sec) -training >> step=8462800, episode=1411 reward=0.7912009 (466.27 it/sec) -training >> step=8462900, episode=1411 reward=0.7982374 (493.36 it/sec) -training >> step=8463000, episode=1411 reward=0.7932439 (488.57 it/sec) -training >> step=8463100, episode=1411 reward=0.7841805 (486.99 it/sec) -training >> step=8463200, episode=1411 reward=0.8028057 (496.66 it/sec) -training >> step=8463300, episode=1411 reward=0.7634895 (486.51 it/sec) -training >> step=8463400, episode=1411 reward=0.7891827 (482.28 it/sec) -training >> step=8463500, episode=1411 reward=0.7920807 (470.74 it/sec) -training >> step=8463600, episode=1411 reward=0.7740905 (494.58 it/sec) -training >> step=8463700, episode=1411 reward=0.7916834 (482.92 it/sec) -training >> step=8463800, episode=1411 reward=0.7784056 (497.12 it/sec) -training >> step=8463900, episode=1411 reward=0.7945083 (473.53 it/sec) -training >> step=8464000, episode=1411 reward=0.8050363 (520.70 it/sec) -training >> step=8464100, episode=1411 reward=0.8022383 (486.03 it/sec) -training >> step=8464200, episode=1411 reward=0.7918998 (484.61 it/sec) -training >> step=8464300, episode=1411 reward=0.7808741 (491.49 it/sec) -training >> step=8464400, episode=1411 reward=0.7820232 (433.77 it/sec) -training >> step=8464500, episode=1411 reward=0.7995256 (453.38 it/sec) -training >> step=8464600, episode=1411 reward=0.7863291 (467.32 it/sec) -training >> step=8464700, episode=1411 reward=0.7781755 (477.62 it/sec) -training >> step=8464800, episode=1411 reward=0.7931269 (504.75 it/sec) -training >> step=8464900, episode=1411 reward=0.7903821 (495.87 it/sec) -training >> step=8465000, episode=1411 reward=0.781382 (516.60 it/sec) -training >> step=8465100, episode=1411 reward=0.7919556 (525.70 it/sec) -training >> step=8465200, episode=1411 reward=0.7833098 (435.55 it/sec) -training >> step=8465300, episode=1412 reward=0.7987971 (130.52 it/sec) -training >> step=8465400, episode=1412 reward=0.7746139 (476.82 it/sec) -training >> step=8465500, episode=1412 reward=0.785736 (447.86 it/sec) -training >> step=8465600, episode=1412 reward=0.7801859 (478.78 it/sec) -training >> step=8465700, episode=1412 reward=0.7843364 (494.21 it/sec) -training >> step=8465800, episode=1412 reward=0.7967603 (476.16 it/sec) -training >> step=8465900, episode=1412 reward=0.7595952 (528.83 it/sec) -training >> step=8466000, episode=1412 reward=0.8005169 (451.66 it/sec) -training >> step=8466100, episode=1412 reward=0.7813287 (482.79 it/sec) -training >> step=8466200, episode=1412 reward=0.8094812 (526.53 it/sec) -training >> step=8466300, episode=1412 reward=0.7911753 (459.20 it/sec) -training >> step=8466400, episode=1412 reward=0.7839025 (497.68 it/sec) -training >> step=8466500, episode=1412 reward=0.7889697 (407.04 it/sec) -training >> step=8466600, episode=1412 reward=0.7859971 (456.74 it/sec) -training >> step=8466700, episode=1412 reward=0.8055905 (471.73 it/sec) -training >> step=8466800, episode=1412 reward=0.7765962 (442.07 it/sec) -training >> step=8466900, episode=1412 reward=0.8101951 (503.39 it/sec) -training >> step=8467000, episode=1412 reward=0.8080615 (514.09 it/sec) -training >> step=8467100, episode=1412 reward=0.7954013 (434.87 it/sec) -training >> step=8467200, episode=1412 reward=0.7977264 (457.04 it/sec) -training >> step=8467300, episode=1412 reward=0.7950618 (464.54 it/sec) -training >> step=8467400, episode=1412 reward=0.8117937 (490.66 it/sec) -training >> step=8467500, episode=1412 reward=0.816003 (497.68 it/sec) -training >> step=8467600, episode=1412 reward=0.8119445 (505.48 it/sec) -training >> step=8467700, episode=1412 reward=0.7806858 (420.67 it/sec) -training >> step=8467800, episode=1412 reward=0.7836029 (416.03 it/sec) -training >> step=8467900, episode=1412 reward=0.8002405 (451.76 it/sec) -training >> step=8468000, episode=1412 reward=0.797776 (427.52 it/sec) -training >> step=8468100, episode=1412 reward=0.7926728 (471.08 it/sec) -training >> step=8468200, episode=1412 reward=0.7966977 (441.75 it/sec) -training >> step=8468300, episode=1412 reward=0.7868168 (499.27 it/sec) -training >> step=8468400, episode=1412 reward=0.8047581 (440.58 it/sec) -training >> step=8468500, episode=1412 reward=0.7889006 (493.34 it/sec) -training >> step=8468600, episode=1412 reward=0.7896212 (482.46 it/sec) -training >> step=8468700, episode=1412 reward=0.7600718 (487.60 it/sec) -training >> step=8468800, episode=1412 reward=0.7926274 (424.96 it/sec) -training >> step=8468900, episode=1412 reward=0.7910053 (414.85 it/sec) -training >> step=8469000, episode=1412 reward=0.8008175 (486.23 it/sec) -training >> step=8469100, episode=1412 reward=0.7873758 (496.50 it/sec) -training >> step=8469200, episode=1412 reward=0.8035172 (482.62 it/sec) -training >> step=8469300, episode=1412 reward=0.7909787 (454.96 it/sec) -training >> step=8469400, episode=1412 reward=0.7973827 (511.55 it/sec) -training >> step=8469500, episode=1412 reward=0.7913022 (468.09 it/sec) -training >> step=8469600, episode=1412 reward=0.776176 (482.83 it/sec) -training >> step=8469700, episode=1412 reward=0.789924 (478.08 it/sec) -training >> step=8469800, episode=1412 reward=0.7990763 (503.06 it/sec) -training >> step=8469900, episode=1412 reward=0.7945543 (487.43 it/sec) -training >> step=8470000, episode=1412 reward=0.7890515 (448.97 it/sec) -training >> step=8470100, episode=1412 reward=0.795365 (507.74 it/sec) -training >> step=8470200, episode=1412 reward=0.7743571 (467.02 it/sec) -training >> step=8470300, episode=1412 reward=0.7810883 (428.61 it/sec) -training >> step=8470400, episode=1412 reward=0.7873654 (466.63 it/sec) -training >> step=8470500, episode=1412 reward=0.781807 (482.85 it/sec) -training >> step=8470600, episode=1412 reward=0.7995253 (475.12 it/sec) -training >> step=8470700, episode=1412 reward=0.7742865 (457.32 it/sec) -training >> step=8470800, episode=1412 reward=0.7808073 (473.19 it/sec) -training >> step=8470900, episode=1412 reward=0.8022172 (497.72 it/sec) -training >> step=8471000, episode=1412 reward=0.7861767 (481.52 it/sec) -training >> step=8471100, episode=1412 reward=0.8090678 (493.95 it/sec) -training >> step=8471200, episode=1412 reward=0.7884212 (445.45 it/sec) -training >> step=8471300, episode=1413 reward=0.7749348 (114.73 it/sec) -training >> step=8471400, episode=1413 reward=0.7973118 (448.87 it/sec) -training >> step=8471500, episode=1413 reward=0.7760428 (486.69 it/sec) -training >> step=8471600, episode=1413 reward=0.80726 (483.08 it/sec) -training >> step=8471700, episode=1413 reward=0.7944215 (446.35 it/sec) -training >> step=8471800, episode=1413 reward=0.7705333 (466.91 it/sec) -training >> step=8471900, episode=1413 reward=0.7914187 (477.13 it/sec) -training >> step=8472000, episode=1413 reward=0.7788039 (521.70 it/sec) -training >> step=8472100, episode=1413 reward=0.7960923 (466.07 it/sec) -training >> step=8472200, episode=1413 reward=0.7856731 (459.51 it/sec) -training >> step=8472300, episode=1413 reward=0.7621764 (497.47 it/sec) -training >> step=8472400, episode=1413 reward=0.7835719 (482.33 it/sec) -training >> step=8472500, episode=1413 reward=0.794246 (450.72 it/sec) -training >> step=8472600, episode=1413 reward=0.7906271 (483.09 it/sec) -training >> step=8472700, episode=1413 reward=0.7993555 (368.32 it/sec) -training >> step=8472800, episode=1413 reward=0.7953724 (490.05 it/sec) -training >> step=8472900, episode=1413 reward=0.7916178 (478.65 it/sec) -training >> step=8473000, episode=1413 reward=0.8045838 (476.05 it/sec) -training >> step=8473100, episode=1413 reward=0.8017807 (505.70 it/sec) -training >> step=8473200, episode=1413 reward=0.7972188 (473.05 it/sec) -training >> step=8473300, episode=1413 reward=0.7934896 (454.42 it/sec) -training >> step=8473400, episode=1413 reward=0.7835351 (494.09 it/sec) -training >> step=8473500, episode=1413 reward=0.8096282 (481.94 it/sec) -training >> step=8473600, episode=1413 reward=0.797612 (492.89 it/sec) -training >> step=8473700, episode=1413 reward=0.7847332 (492.08 it/sec) -training >> step=8473800, episode=1413 reward=0.7864234 (496.48 it/sec) -training >> step=8473900, episode=1413 reward=0.8019843 (520.43 it/sec) -training >> step=8474000, episode=1413 reward=0.8000257 (473.44 it/sec) -training >> step=8474100, episode=1413 reward=0.7896739 (480.60 it/sec) -training >> step=8474200, episode=1413 reward=0.8004426 (477.65 it/sec) -training >> step=8474300, episode=1413 reward=0.8087359 (457.96 it/sec) -training >> step=8474400, episode=1413 reward=0.7847912 (495.61 it/sec) -training >> step=8474500, episode=1413 reward=0.7971213 (533.23 it/sec) -training >> step=8474600, episode=1413 reward=0.80415 (480.66 it/sec) -training >> step=8474700, episode=1413 reward=0.7929993 (488.30 it/sec) -training >> step=8474800, episode=1413 reward=0.8195212 (485.85 it/sec) -training >> step=8474900, episode=1413 reward=0.8105422 (463.74 it/sec) -training >> step=8475000, episode=1413 reward=0.7961808 (478.20 it/sec) -training >> step=8475100, episode=1413 reward=0.7767767 (484.50 it/sec) -training >> step=8475200, episode=1413 reward=0.7758856 (507.64 it/sec) -training >> step=8475300, episode=1413 reward=0.7788157 (458.96 it/sec) -training >> step=8475400, episode=1413 reward=0.7923524 (487.45 it/sec) -training >> step=8475500, episode=1413 reward=0.7887833 (503.57 it/sec) -training >> step=8475600, episode=1413 reward=0.7955357 (480.51 it/sec) -training >> step=8475700, episode=1413 reward=0.7742839 (456.24 it/sec) -training >> step=8475800, episode=1413 reward=0.8168846 (464.40 it/sec) -training >> step=8475900, episode=1413 reward=0.7734471 (488.22 it/sec) -training >> step=8476000, episode=1413 reward=0.7853678 (468.00 it/sec) -training >> step=8476100, episode=1413 reward=0.7907 (483.58 it/sec) -training >> step=8476200, episode=1413 reward=0.7855051 (489.03 it/sec) -training >> step=8476300, episode=1413 reward=0.7912037 (450.80 it/sec) -training >> step=8476400, episode=1413 reward=0.791396 (508.07 it/sec) -training >> step=8476500, episode=1413 reward=0.7808734 (490.06 it/sec) -training >> step=8476600, episode=1413 reward=0.7790216 (477.59 it/sec) -training >> step=8476700, episode=1413 reward=0.7643899 (485.19 it/sec) -training >> step=8476800, episode=1413 reward=0.813554 (501.58 it/sec) -training >> step=8476900, episode=1413 reward=0.8019514 (486.97 it/sec) -training >> step=8477000, episode=1413 reward=0.8006042 (509.52 it/sec) -training >> step=8477100, episode=1413 reward=0.784637 (458.26 it/sec) -training >> step=8477200, episode=1413 reward=0.804082 (398.15 it/sec) -training >> step=8477300, episode=1414 reward=0.7771907 (135.86 it/sec) -training >> step=8477400, episode=1414 reward=0.7781535 (474.31 it/sec) -training >> step=8477500, episode=1414 reward=0.7795238 (479.61 it/sec) -training >> step=8477600, episode=1414 reward=0.7999842 (495.12 it/sec) -training >> step=8477700, episode=1414 reward=0.797101 (487.37 it/sec) -training >> step=8477800, episode=1414 reward=0.7857237 (465.57 it/sec) -training >> step=8477900, episode=1414 reward=0.7811559 (469.06 it/sec) -training >> step=8478000, episode=1414 reward=0.7966442 (483.49 it/sec) -training >> step=8478100, episode=1414 reward=0.7588853 (511.94 it/sec) -training >> step=8478200, episode=1414 reward=0.791123 (490.91 it/sec) -training >> step=8478300, episode=1414 reward=0.7829888 (455.98 it/sec) -training >> step=8478400, episode=1414 reward=0.7951178 (463.33 it/sec) -training >> step=8478500, episode=1414 reward=0.7992409 (474.88 it/sec) -training >> step=8478600, episode=1414 reward=0.8017063 (480.42 it/sec) -training >> step=8478700, episode=1414 reward=0.8068709 (463.67 it/sec) -training >> step=8478800, episode=1414 reward=0.7801929 (376.08 it/sec) -training >> step=8478900, episode=1414 reward=0.7930303 (511.97 it/sec) -training >> step=8479000, episode=1414 reward=0.8020206 (497.27 it/sec) -training >> step=8479100, episode=1414 reward=0.795008 (477.54 it/sec) -training >> step=8479200, episode=1414 reward=0.795262 (500.32 it/sec) -training >> step=8479300, episode=1414 reward=0.7694292 (471.90 it/sec) -training >> step=8479400, episode=1414 reward=0.788256 (443.01 it/sec) -training >> step=8479500, episode=1414 reward=0.796005 (510.29 it/sec) -training >> step=8479600, episode=1414 reward=0.7961854 (484.01 it/sec) -training >> step=8479700, episode=1414 reward=0.7874051 (460.68 it/sec) -training >> step=8479800, episode=1414 reward=0.7875068 (459.87 it/sec) -training >> step=8479900, episode=1414 reward=0.7970937 (502.98 it/sec) -training >> step=8480000, episode=1414 reward=0.793952 (499.80 it/sec) -training >> step=8480100, episode=1414 reward=0.7988918 (493.82 it/sec) -training >> step=8480200, episode=1414 reward=0.8047976 (495.83 it/sec) -training >> step=8480300, episode=1414 reward=0.8044079 (450.29 it/sec) -training >> step=8480400, episode=1414 reward=0.7928525 (478.92 it/sec) -training >> step=8480500, episode=1414 reward=0.8107271 (483.05 it/sec) -training >> step=8480600, episode=1414 reward=0.7938186 (513.24 it/sec) -training >> step=8480700, episode=1414 reward=0.8000572 (504.81 it/sec) -training >> step=8480800, episode=1414 reward=0.7928613 (439.30 it/sec) -training >> step=8480900, episode=1414 reward=0.7706474 (478.12 it/sec) -training >> step=8481000, episode=1414 reward=0.7784613 (524.84 it/sec) -training >> step=8481100, episode=1414 reward=0.8103048 (494.44 it/sec) -training >> step=8481200, episode=1414 reward=0.7727932 (488.38 it/sec) -training >> step=8481300, episode=1414 reward=0.7983792 (493.23 it/sec) -training >> step=8481400, episode=1414 reward=0.7868736 (479.40 it/sec) -training >> step=8481500, episode=1414 reward=0.8213258 (498.06 it/sec) -training >> step=8481600, episode=1414 reward=0.8141103 (501.24 it/sec) -training >> step=8481700, episode=1414 reward=0.7771478 (504.12 it/sec) -training >> step=8481800, episode=1414 reward=0.7966828 (448.60 it/sec) -training >> step=8481900, episode=1414 reward=0.7800559 (467.35 it/sec) -training >> step=8482000, episode=1414 reward=0.7977089 (491.81 it/sec) -training >> step=8482100, episode=1414 reward=0.7937311 (484.37 it/sec) -training >> step=8482200, episode=1414 reward=0.7783153 (423.25 it/sec) -training >> step=8482300, episode=1414 reward=0.7881755 (499.55 it/sec) -training >> step=8482400, episode=1414 reward=0.7858384 (496.76 it/sec) -training >> step=8482500, episode=1414 reward=0.7775928 (445.63 it/sec) -training >> step=8482600, episode=1414 reward=0.7901559 (382.90 it/sec) -training >> step=8482700, episode=1414 reward=0.7880836 (439.41 it/sec) -training >> step=8482800, episode=1414 reward=0.7827079 (470.85 it/sec) -training >> step=8482900, episode=1414 reward=0.7893413 (484.93 it/sec) -training >> step=8483000, episode=1414 reward=0.7914079 (483.06 it/sec) -training >> step=8483100, episode=1414 reward=0.7788106 (496.86 it/sec) -training >> step=8483200, episode=1414 reward=0.7920151 (496.89 it/sec) -training >> step=8483300, episode=1415 reward=0.8049845 (130.05 it/sec) -training >> step=8483400, episode=1415 reward=0.7846857 (471.30 it/sec) -training >> step=8483500, episode=1415 reward=0.7810103 (499.62 it/sec) -training >> step=8483600, episode=1415 reward=0.8008043 (483.20 it/sec) -training >> step=8483700, episode=1415 reward=0.792523 (430.64 it/sec) -training >> step=8483800, episode=1415 reward=0.7951077 (470.16 it/sec) -training >> step=8483900, episode=1415 reward=0.7791586 (477.23 it/sec) -training >> step=8484000, episode=1415 reward=0.7969161 (503.15 it/sec) -training >> step=8484100, episode=1415 reward=0.80125 (469.76 it/sec) -training >> step=8484200, episode=1415 reward=0.7938538 (498.15 it/sec) -training >> step=8484300, episode=1415 reward=0.7962971 (475.04 it/sec) -training >> step=8484400, episode=1415 reward=0.8087849 (467.17 it/sec) -training >> step=8484500, episode=1415 reward=0.7632207 (507.30 it/sec) -training >> step=8484600, episode=1415 reward=0.8151925 (513.73 it/sec) -training >> step=8484700, episode=1415 reward=0.7874169 (469.24 it/sec) -training >> step=8484800, episode=1415 reward=0.7748744 (476.26 it/sec) -training >> step=8484900, episode=1415 reward=0.7840198 (360.64 it/sec) -training >> step=8485000, episode=1415 reward=0.7866564 (503.92 it/sec) -training >> step=8485100, episode=1415 reward=0.7922699 (477.93 it/sec) -training >> step=8485200, episode=1415 reward=0.7940332 (485.78 it/sec) -training >> step=8485300, episode=1415 reward=0.7831501 (485.13 it/sec) -training >> step=8485400, episode=1415 reward=0.7970588 (459.07 it/sec) -training >> step=8485500, episode=1415 reward=0.7878121 (472.36 it/sec) -training >> step=8485600, episode=1415 reward=0.7752097 (468.13 it/sec) -training >> step=8485700, episode=1415 reward=0.7949727 (480.26 it/sec) -training >> step=8485800, episode=1415 reward=0.7806115 (475.62 it/sec) -training >> step=8485900, episode=1415 reward=0.8020088 (470.79 it/sec) -training >> step=8486000, episode=1415 reward=0.7996757 (505.53 it/sec) -training >> step=8486100, episode=1415 reward=0.8088856 (471.63 it/sec) -training >> step=8486200, episode=1415 reward=0.7978864 (461.64 it/sec) -training >> step=8486300, episode=1415 reward=0.7897164 (418.56 it/sec) -training >> step=8486400, episode=1415 reward=0.7960496 (519.77 it/sec) -training >> step=8486500, episode=1415 reward=0.7866566 (510.10 it/sec) -training >> step=8486600, episode=1415 reward=0.7883168 (453.90 it/sec) -training >> step=8486700, episode=1415 reward=0.7956755 (459.08 it/sec) -training >> step=8486800, episode=1415 reward=0.8006 (507.00 it/sec) -training >> step=8486900, episode=1415 reward=0.7996963 (453.03 it/sec) -training >> step=8487000, episode=1415 reward=0.7933493 (501.30 it/sec) -training >> step=8487100, episode=1415 reward=0.7817944 (522.99 it/sec) -training >> step=8487200, episode=1415 reward=0.8058761 (436.18 it/sec) -training >> step=8487300, episode=1415 reward=0.8062047 (438.23 it/sec) -training >> step=8487400, episode=1415 reward=0.8089409 (488.21 it/sec) -training >> step=8487500, episode=1415 reward=0.7986504 (532.56 it/sec) -training >> step=8487600, episode=1415 reward=0.7946095 (501.25 it/sec) -training >> step=8487700, episode=1415 reward=0.804849 (507.47 it/sec) -training >> step=8487800, episode=1415 reward=0.7936041 (493.02 it/sec) -training >> step=8487900, episode=1415 reward=0.8044934 (468.61 it/sec) -training >> step=8488000, episode=1415 reward=0.8068351 (509.71 it/sec) -training >> step=8488100, episode=1415 reward=0.7789385 (463.76 it/sec) -training >> step=8488200, episode=1415 reward=0.7866056 (534.62 it/sec) -training >> step=8488300, episode=1415 reward=0.7920071 (476.34 it/sec) -training >> step=8488400, episode=1415 reward=0.7880746 (451.29 it/sec) -training >> step=8488500, episode=1415 reward=0.7893464 (477.68 it/sec) -training >> step=8488600, episode=1415 reward=0.7857462 (510.99 it/sec) -training >> step=8488700, episode=1415 reward=0.7689358 (499.97 it/sec) -training >> step=8488800, episode=1415 reward=0.7728909 (490.16 it/sec) -training >> step=8488900, episode=1415 reward=0.8059741 (522.60 it/sec) -training >> step=8489000, episode=1415 reward=0.7950966 (473.43 it/sec) -training >> step=8489100, episode=1415 reward=0.794007 (507.74 it/sec) -training >> step=8489200, episode=1415 reward=0.7874948 (479.35 it/sec) -training >> step=8489300, episode=1416 reward=0.7956354 (131.75 it/sec) -training >> step=8489400, episode=1416 reward=0.7831302 (491.69 it/sec) -training >> step=8489500, episode=1416 reward=0.7856516 (485.86 it/sec) -training >> step=8489600, episode=1416 reward=0.7981435 (460.40 it/sec) -training >> step=8489700, episode=1416 reward=0.7807906 (469.07 it/sec) -training >> step=8489800, episode=1416 reward=0.7959825 (474.94 it/sec) -training >> step=8489900, episode=1416 reward=0.7934464 (438.49 it/sec) -training >> step=8490000, episode=1416 reward=0.8043735 (458.11 it/sec) -training >> step=8490100, episode=1416 reward=0.7832943 (492.73 it/sec) -training >> step=8490200, episode=1416 reward=0.803304 (434.10 it/sec) -training >> step=8490300, episode=1416 reward=0.7763819 (452.82 it/sec) -training >> step=8490400, episode=1416 reward=0.7916647 (477.34 it/sec) -training >> step=8490500, episode=1416 reward=0.7819358 (480.82 it/sec) -training >> step=8490600, episode=1416 reward=0.8059779 (508.07 it/sec) -training >> step=8490700, episode=1416 reward=0.8055713 (501.91 it/sec) -training >> step=8490800, episode=1416 reward=0.7869607 (458.26 it/sec) -training >> step=8490900, episode=1416 reward=0.7871096 (496.02 it/sec) -training >> step=8491000, episode=1416 reward=0.7968845 (458.98 it/sec) -training >> step=8491100, episode=1416 reward=0.7982567 (379.97 it/sec) -training >> step=8491200, episode=1416 reward=0.774889 (527.50 it/sec) -training >> step=8491300, episode=1416 reward=0.7883154 (449.43 it/sec) -training >> step=8491400, episode=1416 reward=0.7765658 (508.71 it/sec) -training >> step=8491500, episode=1416 reward=0.8022047 (453.53 it/sec) -training >> step=8491600, episode=1416 reward=0.7715416 (497.68 it/sec) -training >> step=8491700, episode=1416 reward=0.7984011 (454.34 it/sec) -training >> step=8491800, episode=1416 reward=0.8047548 (475.02 it/sec) -training >> step=8491900, episode=1416 reward=0.7868631 (486.86 it/sec) -training >> step=8492000, episode=1416 reward=0.7971427 (483.93 it/sec) -training >> step=8492100, episode=1416 reward=0.7810001 (465.06 it/sec) -training >> step=8492200, episode=1416 reward=0.808297 (520.35 it/sec) -training >> step=8492300, episode=1416 reward=0.7785748 (468.06 it/sec) -training >> step=8492400, episode=1416 reward=0.7917065 (503.69 it/sec) -training >> step=8492500, episode=1416 reward=0.7922991 (489.41 it/sec) -training >> step=8492600, episode=1416 reward=0.7817894 (506.44 it/sec) -training >> step=8492700, episode=1416 reward=0.7905734 (507.42 it/sec) -training >> step=8492800, episode=1416 reward=0.7821465 (502.20 it/sec) -training >> step=8492900, episode=1416 reward=0.8056452 (497.69 it/sec) -training >> step=8493000, episode=1416 reward=0.7859666 (485.97 it/sec) -training >> step=8493100, episode=1416 reward=0.7743493 (497.45 it/sec) -training >> step=8493200, episode=1416 reward=0.8084733 (516.13 it/sec) -training >> step=8493300, episode=1416 reward=0.7962558 (487.86 it/sec) -training >> step=8493400, episode=1416 reward=0.8134884 (480.05 it/sec) -training >> step=8493500, episode=1416 reward=0.780248 (498.43 it/sec) -training >> step=8493600, episode=1416 reward=0.7728164 (547.65 it/sec) -training >> step=8493700, episode=1416 reward=0.8050845 (483.51 it/sec) -training >> step=8493800, episode=1416 reward=0.8028482 (504.51 it/sec) -training >> step=8493900, episode=1416 reward=0.791366 (497.86 it/sec) -training >> step=8494000, episode=1416 reward=0.7937768 (490.30 it/sec) -training >> step=8494100, episode=1416 reward=0.7873567 (468.82 it/sec) -training >> step=8494200, episode=1416 reward=0.8166991 (422.26 it/sec) -training >> step=8494300, episode=1416 reward=0.8120857 (471.19 it/sec) -training >> step=8494400, episode=1416 reward=0.7804983 (455.38 it/sec) -training >> step=8494500, episode=1416 reward=0.7907292 (454.30 it/sec) -training >> step=8494600, episode=1416 reward=0.7867666 (469.21 it/sec) -training >> step=8494700, episode=1416 reward=0.7867308 (476.10 it/sec) -training >> step=8494800, episode=1416 reward=0.7830409 (471.31 it/sec) -training >> step=8494900, episode=1416 reward=0.7795482 (469.98 it/sec) -training >> step=8495000, episode=1416 reward=0.7816261 (486.03 it/sec) -training >> step=8495100, episode=1416 reward=0.7790481 (488.07 it/sec) -training >> step=8495200, episode=1416 reward=0.7706349 (463.30 it/sec) -training >> step=8495300, episode=1417 reward=0.7878062 (133.52 it/sec) -training >> step=8495400, episode=1417 reward=0.8042242 (445.99 it/sec) -training >> step=8495500, episode=1417 reward=0.7774549 (444.92 it/sec) -training >> step=8495600, episode=1417 reward=0.785004 (501.72 it/sec) -training >> step=8495700, episode=1417 reward=0.78648 (468.33 it/sec) -training >> step=8495800, episode=1417 reward=0.817636 (432.98 it/sec) -training >> step=8495900, episode=1417 reward=0.7802641 (487.51 it/sec) -training >> step=8496000, episode=1417 reward=0.783006 (451.58 it/sec) -training >> step=8496100, episode=1417 reward=0.7910125 (443.99 it/sec) -training >> step=8496200, episode=1417 reward=0.8016322 (494.53 it/sec) -training >> step=8496300, episode=1417 reward=0.7906403 (424.45 it/sec) -training >> step=8496400, episode=1417 reward=0.7816491 (497.97 it/sec) -training >> step=8496500, episode=1417 reward=0.7873993 (423.51 it/sec) -training >> step=8496600, episode=1417 reward=0.7857757 (458.27 it/sec) -training >> step=8496700, episode=1417 reward=0.7924813 (497.37 it/sec) -training >> step=8496800, episode=1417 reward=0.7781461 (484.79 it/sec) -training >> step=8496900, episode=1417 reward=0.8040053 (481.59 it/sec) -training >> step=8497000, episode=1417 reward=0.7876467 (466.87 it/sec) -training >> step=8497100, episode=1417 reward=0.799172 (473.59 it/sec) -training >> step=8497200, episode=1417 reward=0.7797052 (487.25 it/sec) -training >> step=8497300, episode=1417 reward=0.8141025 (345.88 it/sec) -training >> step=8497400, episode=1417 reward=0.7941532 (470.28 it/sec) -training >> step=8497500, episode=1417 reward=0.7908053 (499.13 it/sec) -training >> step=8497600, episode=1417 reward=0.7914295 (500.15 it/sec) -training >> step=8497700, episode=1417 reward=0.7853982 (506.11 it/sec) -training >> step=8497800, episode=1417 reward=0.7716439 (473.55 it/sec) -training >> step=8497900, episode=1417 reward=0.7924948 (470.89 it/sec) -training >> step=8498000, episode=1417 reward=0.793623 (497.68 it/sec) -training >> step=8498100, episode=1417 reward=0.7991259 (440.14 it/sec) -training >> step=8498200, episode=1417 reward=0.7828012 (408.14 it/sec) -training >> step=8498300, episode=1417 reward=0.7841635 (469.84 it/sec) -training >> step=8498400, episode=1417 reward=0.7613495 (518.93 it/sec) -training >> step=8498500, episode=1417 reward=0.7732969 (456.66 it/sec) -training >> step=8498600, episode=1417 reward=0.8022013 (498.00 it/sec) -training >> step=8498700, episode=1417 reward=0.7971458 (503.81 it/sec) -training >> step=8498800, episode=1417 reward=0.792914 (499.23 it/sec) -training >> step=8498900, episode=1417 reward=0.7800596 (465.01 it/sec) -training >> step=8499000, episode=1417 reward=0.7843427 (473.60 it/sec) -training >> step=8499100, episode=1417 reward=0.7879457 (489.82 it/sec) -training >> step=8499200, episode=1417 reward=0.7746444 (506.20 it/sec) -training >> step=8499300, episode=1417 reward=0.8022299 (506.30 it/sec) -training >> step=8499400, episode=1417 reward=0.8016129 (517.10 it/sec) -training >> step=8499500, episode=1417 reward=0.788159 (484.35 it/sec) -training >> step=8499600, episode=1417 reward=0.8013078 (484.05 it/sec) -training >> step=8499700, episode=1417 reward=0.7896485 (530.68 it/sec) -training >> step=8499800, episode=1417 reward=0.7968827 (493.10 it/sec) -training >> step=8499900, episode=1417 reward=0.7853084 (477.12 it/sec) -training >> step=8500000, episode=1417 reward=0.7881704 (481.22 it/sec) -training >> step=8500100, episode=1417 reward=0.7966308 (472.18 it/sec) -training >> step=8500200, episode=1417 reward=0.7793534 (461.86 it/sec) -training >> step=8500300, episode=1417 reward=0.7854062 (472.29 it/sec) -training >> step=8500400, episode=1417 reward=0.7768651 (509.87 it/sec) -training >> step=8500500, episode=1417 reward=0.8029472 (520.10 it/sec) -training >> step=8500600, episode=1417 reward=0.7855957 (496.74 it/sec) -training >> step=8500700, episode=1417 reward=0.7856587 (506.68 it/sec) -training >> step=8500800, episode=1417 reward=0.7767493 (467.97 it/sec) -training >> step=8500900, episode=1417 reward=0.7739133 (505.06 it/sec) -training >> step=8501000, episode=1417 reward=0.7808395 (501.24 it/sec) -training >> step=8501100, episode=1417 reward=0.7890454 (464.32 it/sec) -training >> step=8501200, episode=1417 reward=0.791423 (492.49 it/sec) -training >> step=8501300, episode=1418 reward=0.8158931 (116.30 it/sec) -training >> step=8501400, episode=1418 reward=0.7870026 (468.60 it/sec) -training >> step=8501500, episode=1418 reward=0.7914783 (511.87 it/sec) -training >> step=8501600, episode=1418 reward=0.7988051 (505.19 it/sec) -training >> step=8501700, episode=1418 reward=0.7906536 (477.33 it/sec) -training >> step=8501800, episode=1418 reward=0.8073263 (456.93 it/sec) -training >> step=8501900, episode=1418 reward=0.7894627 (549.40 it/sec) -training >> step=8502000, episode=1418 reward=0.7804443 (487.07 it/sec) -training >> step=8502100, episode=1418 reward=0.7848249 (464.28 it/sec) -training >> step=8502200, episode=1418 reward=0.7905498 (466.77 it/sec) -training >> step=8502300, episode=1418 reward=0.7829438 (469.48 it/sec) -training >> step=8502400, episode=1418 reward=0.7568837 (485.40 it/sec) -training >> step=8502500, episode=1418 reward=0.7789328 (503.94 it/sec) -training >> step=8502600, episode=1418 reward=0.7855728 (480.43 it/sec) -training >> step=8502700, episode=1418 reward=0.7865832 (462.96 it/sec) -training >> step=8502800, episode=1418 reward=0.7984096 (489.65 it/sec) -training >> step=8502900, episode=1418 reward=0.8106099 (482.53 it/sec) -training >> step=8503000, episode=1418 reward=0.811003 (487.26 it/sec) -training >> step=8503100, episode=1418 reward=0.7684095 (494.90 it/sec) -training >> step=8503200, episode=1418 reward=0.7800422 (468.32 it/sec) -training >> step=8503300, episode=1418 reward=0.7764316 (501.95 it/sec) -training >> step=8503400, episode=1418 reward=0.781693 (372.78 it/sec) -training >> step=8503500, episode=1418 reward=0.7926525 (497.31 it/sec) -training >> step=8503600, episode=1418 reward=0.7759408 (459.85 it/sec) -training >> step=8503700, episode=1418 reward=0.7868928 (508.37 it/sec) -training >> step=8503800, episode=1418 reward=0.8028398 (461.24 it/sec) -training >> step=8503900, episode=1418 reward=0.7930757 (455.04 it/sec) -training >> step=8504000, episode=1418 reward=0.8097919 (493.66 it/sec) -training >> step=8504100, episode=1418 reward=0.7811484 (504.62 it/sec) -training >> step=8504200, episode=1418 reward=0.7895818 (455.71 it/sec) -training >> step=8504300, episode=1418 reward=0.802741 (478.58 it/sec) -training >> step=8504400, episode=1418 reward=0.7674876 (512.89 it/sec) -training >> step=8504500, episode=1418 reward=0.7877793 (476.50 it/sec) -training >> step=8504600, episode=1418 reward=0.7715681 (478.31 it/sec) -training >> step=8504700, episode=1418 reward=0.7979175 (426.38 it/sec) -training >> step=8504800, episode=1418 reward=0.7904087 (479.82 it/sec) -training >> step=8504900, episode=1418 reward=0.7922587 (483.71 it/sec) -training >> step=8505000, episode=1418 reward=0.8034706 (483.65 it/sec) -training >> step=8505100, episode=1418 reward=0.78329 (462.01 it/sec) -training >> step=8505200, episode=1418 reward=0.8023528 (450.21 it/sec) -training >> step=8505300, episode=1418 reward=0.7731407 (420.10 it/sec) -training >> step=8505400, episode=1418 reward=0.8074592 (483.70 it/sec) -training >> step=8505500, episode=1418 reward=0.8143874 (484.00 it/sec) -training >> step=8505600, episode=1418 reward=0.7959265 (423.65 it/sec) -training >> step=8505700, episode=1418 reward=0.7748203 (462.44 it/sec) -training >> step=8505800, episode=1418 reward=0.79497 (472.86 it/sec) -training >> step=8505900, episode=1418 reward=0.8191538 (442.91 it/sec) -training >> step=8506000, episode=1418 reward=0.8023449 (465.12 it/sec) -training >> step=8506100, episode=1418 reward=0.8013937 (479.78 it/sec) -training >> step=8506200, episode=1418 reward=0.7956671 (497.26 it/sec) -training >> step=8506300, episode=1418 reward=0.7916659 (493.62 it/sec) -training >> step=8506400, episode=1418 reward=0.7776157 (511.27 it/sec) -training >> step=8506500, episode=1418 reward=0.8120509 (437.38 it/sec) -training >> step=8506600, episode=1418 reward=0.778828 (506.72 it/sec) -training >> step=8506700, episode=1418 reward=0.8098265 (467.62 it/sec) -training >> step=8506800, episode=1418 reward=0.7725641 (491.08 it/sec) -training >> step=8506900, episode=1418 reward=0.792935 (492.24 it/sec) -training >> step=8507000, episode=1418 reward=0.7737355 (510.95 it/sec) -training >> step=8507100, episode=1418 reward=0.7816887 (507.44 it/sec) -training >> step=8507200, episode=1418 reward=0.7933684 (475.24 it/sec) -training >> step=8507300, episode=1419 reward=0.7727203 (135.66 it/sec) -training >> step=8507400, episode=1419 reward=0.7957411 (445.82 it/sec) -training >> step=8507500, episode=1419 reward=0.7757787 (484.48 it/sec) -training >> step=8507600, episode=1419 reward=0.7884849 (491.45 it/sec) -training >> step=8507700, episode=1419 reward=0.7917076 (450.58 it/sec) -training >> step=8507800, episode=1419 reward=0.7857109 (512.46 it/sec) -training >> step=8507900, episode=1419 reward=0.7749117 (440.79 it/sec) -training >> step=8508000, episode=1419 reward=0.7974191 (509.44 it/sec) -training >> step=8508100, episode=1419 reward=0.7960293 (446.62 it/sec) -training >> step=8508200, episode=1419 reward=0.8014663 (432.13 it/sec) -training >> step=8508300, episode=1419 reward=0.8079224 (477.62 it/sec) -training >> step=8508400, episode=1419 reward=0.7783696 (491.21 it/sec) -training >> step=8508500, episode=1419 reward=0.7796303 (468.84 it/sec) -training >> step=8508600, episode=1419 reward=0.8088571 (460.87 it/sec) -training >> step=8508700, episode=1419 reward=0.7697612 (465.07 it/sec) -training >> step=8508800, episode=1419 reward=0.7796795 (497.66 it/sec) -training >> step=8508900, episode=1419 reward=0.7794627 (456.99 it/sec) -training >> step=8509000, episode=1419 reward=0.8060164 (486.41 it/sec) -training >> step=8509100, episode=1419 reward=0.7823851 (487.88 it/sec) -training >> step=8509200, episode=1419 reward=0.7772387 (422.23 it/sec) -training >> step=8509300, episode=1419 reward=0.7997296 (428.88 it/sec) -training >> step=8509400, episode=1419 reward=0.7967743 (454.20 it/sec) -training >> step=8509500, episode=1419 reward=0.7910383 (366.52 it/sec) -training >> step=8509600, episode=1419 reward=0.795865 (490.22 it/sec) -training >> step=8509700, episode=1419 reward=0.7714284 (379.48 it/sec) -training >> step=8509800, episode=1419 reward=0.7696891 (392.37 it/sec) -training >> step=8509900, episode=1419 reward=0.8002416 (508.26 it/sec) -training >> step=8510000, episode=1419 reward=0.7979824 (458.90 it/sec) -training >> step=8510100, episode=1419 reward=0.8002267 (478.31 it/sec) -training >> step=8510200, episode=1419 reward=0.7844056 (449.10 it/sec) -training >> step=8510300, episode=1419 reward=0.7948014 (457.59 it/sec) -training >> step=8510400, episode=1419 reward=0.8162698 (463.10 it/sec) -training >> step=8510500, episode=1419 reward=0.7965086 (473.89 it/sec) -training >> step=8510600, episode=1419 reward=0.7805523 (510.34 it/sec) -training >> step=8510700, episode=1419 reward=0.7947839 (487.53 it/sec) -training >> step=8510800, episode=1419 reward=0.7906556 (442.63 it/sec) -training >> step=8510900, episode=1419 reward=0.8058711 (452.03 it/sec) -training >> step=8511000, episode=1419 reward=0.8000051 (394.02 it/sec) -training >> step=8511100, episode=1419 reward=0.7685155 (493.61 it/sec) -training >> step=8511200, episode=1419 reward=0.7714952 (475.52 it/sec) -training >> step=8511300, episode=1419 reward=0.7942284 (513.28 it/sec) -training >> step=8511400, episode=1419 reward=0.7856802 (482.09 it/sec) -training >> step=8511500, episode=1419 reward=0.7719242 (521.93 it/sec) -training >> step=8511600, episode=1419 reward=0.7964267 (547.06 it/sec) -training >> step=8511700, episode=1419 reward=0.7741001 (508.98 it/sec) -training >> step=8511800, episode=1419 reward=0.7795828 (534.24 it/sec) -training >> step=8511900, episode=1419 reward=0.8014764 (506.46 it/sec) -training >> step=8512000, episode=1419 reward=0.8019959 (495.61 it/sec) -training >> step=8512100, episode=1419 reward=0.7821643 (522.73 it/sec) -training >> step=8512200, episode=1419 reward=0.7961451 (425.14 it/sec) -training >> step=8512300, episode=1419 reward=0.7954515 (508.22 it/sec) -training >> step=8512400, episode=1419 reward=0.7850752 (528.63 it/sec) -training >> step=8512500, episode=1419 reward=0.7975534 (490.68 it/sec) -training >> step=8512600, episode=1419 reward=0.789137 (498.35 it/sec) -training >> step=8512700, episode=1419 reward=0.793072 (494.69 it/sec) -training >> step=8512800, episode=1419 reward=0.8103293 (501.22 it/sec) -training >> step=8512900, episode=1419 reward=0.79432 (538.88 it/sec) -training >> step=8513000, episode=1419 reward=0.7789565 (509.95 it/sec) -training >> step=8513100, episode=1419 reward=0.7901558 (527.52 it/sec) -training >> step=8513200, episode=1419 reward=0.7897028 (454.37 it/sec) -training >> step=8513300, episode=1420 reward=0.7821276 (112.86 it/sec) -training >> step=8513400, episode=1420 reward=0.7754064 (470.58 it/sec) -training >> step=8513500, episode=1420 reward=0.7935571 (502.47 it/sec) -training >> step=8513600, episode=1420 reward=0.7884052 (490.30 it/sec) -training >> step=8513700, episode=1420 reward=0.7955093 (509.30 it/sec) -training >> step=8513800, episode=1420 reward=0.7813502 (537.47 it/sec) -training >> step=8513900, episode=1420 reward=0.7978498 (470.06 it/sec) -training >> step=8514000, episode=1420 reward=0.7946598 (504.73 it/sec) -training >> step=8514100, episode=1420 reward=0.7995614 (475.93 it/sec) -training >> step=8514200, episode=1420 reward=0.7905102 (485.78 it/sec) -training >> step=8514300, episode=1420 reward=0.7857908 (489.74 it/sec) -training >> step=8514400, episode=1420 reward=0.7658493 (457.89 it/sec) -training >> step=8514500, episode=1420 reward=0.7800604 (497.48 it/sec) -training >> step=8514600, episode=1420 reward=0.7760141 (485.80 it/sec) -training >> step=8514700, episode=1420 reward=0.7977322 (433.89 it/sec) -training >> step=8514800, episode=1420 reward=0.7845876 (406.30 it/sec) -training >> step=8514900, episode=1420 reward=0.7936252 (483.35 it/sec) -training >> step=8515000, episode=1420 reward=0.775356 (529.31 it/sec) -training >> step=8515100, episode=1420 reward=0.7896523 (524.49 it/sec) -training >> step=8515200, episode=1420 reward=0.8062134 (522.74 it/sec) -training >> step=8515300, episode=1420 reward=0.7937943 (553.79 it/sec) -training >> step=8515400, episode=1420 reward=0.7880802 (479.37 it/sec) -training >> step=8515500, episode=1420 reward=0.7731049 (457.04 it/sec) -training >> step=8515600, episode=1420 reward=0.7663441 (338.84 it/sec) -training >> step=8515700, episode=1420 reward=0.7924596 (519.81 it/sec) -training >> step=8515800, episode=1420 reward=0.7728081 (493.22 it/sec) -training >> step=8515900, episode=1420 reward=0.7878867 (484.14 it/sec) -training >> step=8516000, episode=1420 reward=0.8088134 (517.09 it/sec) -training >> step=8516100, episode=1420 reward=0.7994777 (501.15 it/sec) -training >> step=8516200, episode=1420 reward=0.7876267 (488.47 it/sec) -training >> step=8516300, episode=1420 reward=0.7932379 (498.19 it/sec) -training >> step=8516400, episode=1420 reward=0.8044075 (506.75 it/sec) -training >> step=8516500, episode=1420 reward=0.7926041 (475.69 it/sec) -training >> step=8516600, episode=1420 reward=0.7834282 (438.23 it/sec) -training >> step=8516700, episode=1420 reward=0.8024727 (463.31 it/sec) -training >> step=8516800, episode=1420 reward=0.7830182 (482.39 it/sec) -training >> step=8516900, episode=1420 reward=0.7933131 (412.92 it/sec) -training >> step=8517000, episode=1420 reward=0.7946466 (468.97 it/sec) -training >> step=8517100, episode=1420 reward=0.7977186 (460.23 it/sec) -training >> step=8517200, episode=1420 reward=0.7941667 (456.04 it/sec) -training >> step=8517300, episode=1420 reward=0.7754014 (395.43 it/sec) -training >> step=8517400, episode=1420 reward=0.8057795 (493.41 it/sec) -training >> step=8517500, episode=1420 reward=0.7868342 (502.17 it/sec) -training >> step=8517600, episode=1420 reward=0.7915252 (488.36 it/sec) -training >> step=8517700, episode=1420 reward=0.805093 (505.24 it/sec) -training >> step=8517800, episode=1420 reward=0.7734392 (474.68 it/sec) -training >> step=8517900, episode=1420 reward=0.7909961 (487.78 it/sec) -training >> step=8518000, episode=1420 reward=0.785297 (499.98 it/sec) -training >> step=8518100, episode=1420 reward=0.8120223 (519.29 it/sec) -training >> step=8518200, episode=1420 reward=0.7879919 (531.42 it/sec) -training >> step=8518300, episode=1420 reward=0.7729818 (476.73 it/sec) -training >> step=8518400, episode=1420 reward=0.7806696 (499.71 it/sec) -training >> step=8518500, episode=1420 reward=0.7926549 (489.79 it/sec) -training >> step=8518600, episode=1420 reward=0.7955483 (495.59 it/sec) -training >> step=8518700, episode=1420 reward=0.78529 (520.84 it/sec) -training >> step=8518800, episode=1420 reward=0.7885022 (497.76 it/sec) -training >> step=8518900, episode=1420 reward=0.7910126 (507.66 it/sec) -training >> step=8519000, episode=1420 reward=0.7921379 (485.60 it/sec) -training >> step=8519100, episode=1420 reward=0.7822507 (469.86 it/sec) -training >> step=8519200, episode=1420 reward=0.78594 (519.81 it/sec) -training >> step=8519300, episode=1421 reward=0.8001423 (86.16 it/sec) -training >> step=8519400, episode=1421 reward=0.7868675 (485.17 it/sec) -training >> step=8519500, episode=1421 reward=0.7876966 (486.56 it/sec) -training >> step=8519600, episode=1421 reward=0.8020502 (504.52 it/sec) -training >> step=8519700, episode=1421 reward=0.7827299 (474.13 it/sec) -training >> step=8519800, episode=1421 reward=0.7812565 (499.26 it/sec) -training >> step=8519900, episode=1421 reward=0.796583 (474.27 it/sec) -training >> step=8520000, episode=1421 reward=0.792226 (521.82 it/sec) -training >> step=8520100, episode=1421 reward=0.7656168 (504.25 it/sec) -training >> step=8520200, episode=1421 reward=0.7925795 (470.92 it/sec) -training >> step=8520300, episode=1421 reward=0.8002614 (529.44 it/sec) -training >> step=8520400, episode=1421 reward=0.7828337 (483.57 it/sec) -training >> step=8520500, episode=1421 reward=0.8061499 (477.91 it/sec) -training >> step=8520600, episode=1421 reward=0.8115848 (501.95 it/sec) -training >> step=8520700, episode=1421 reward=0.8004994 (453.71 it/sec) -training >> step=8520800, episode=1421 reward=0.7607563 (497.22 it/sec) -training >> step=8520900, episode=1421 reward=0.794107 (454.32 it/sec) -training >> step=8521000, episode=1421 reward=0.7917139 (483.63 it/sec) -training >> step=8521100, episode=1421 reward=0.8135942 (507.54 it/sec) -training >> step=8521200, episode=1421 reward=0.7814986 (492.74 it/sec) -training >> step=8521300, episode=1421 reward=0.8047673 (512.22 it/sec) -training >> step=8521400, episode=1421 reward=0.79074 (447.57 it/sec) -training >> step=8521500, episode=1421 reward=0.8053258 (501.33 it/sec) -training >> step=8521600, episode=1421 reward=0.7951967 (474.00 it/sec) -training >> step=8521700, episode=1421 reward=0.7876102 (500.88 it/sec) -training >> step=8521800, episode=1421 reward=0.7905347 (390.90 it/sec) -training >> step=8521900, episode=1421 reward=0.7954454 (488.11 it/sec) -training >> step=8522000, episode=1421 reward=0.7840918 (471.22 it/sec) -training >> step=8522100, episode=1421 reward=0.8040204 (493.89 it/sec) -training >> step=8522200, episode=1421 reward=0.7965292 (492.40 it/sec) -training >> step=8522300, episode=1421 reward=0.8037442 (528.17 it/sec) -training >> step=8522400, episode=1421 reward=0.7859475 (454.12 it/sec) -training >> step=8522500, episode=1421 reward=0.7865026 (505.36 it/sec) -training >> step=8522600, episode=1421 reward=0.7752886 (478.31 it/sec) -training >> step=8522700, episode=1421 reward=0.8009756 (494.44 it/sec) -training >> step=8522800, episode=1421 reward=0.7884707 (490.82 it/sec) -training >> step=8522900, episode=1421 reward=0.7869486 (486.39 it/sec) -training >> step=8523000, episode=1421 reward=0.7710926 (492.38 it/sec) -training >> step=8523100, episode=1421 reward=0.8034397 (450.42 it/sec) -training >> step=8523200, episode=1421 reward=0.7946708 (447.77 it/sec) -training >> step=8523300, episode=1421 reward=0.7898897 (494.74 it/sec) -training >> step=8523400, episode=1421 reward=0.78817 (415.83 it/sec) -training >> step=8523500, episode=1421 reward=0.7980086 (402.84 it/sec) -training >> step=8523600, episode=1421 reward=0.8149529 (512.96 it/sec) -training >> step=8523700, episode=1421 reward=0.7970669 (472.57 it/sec) -training >> step=8523800, episode=1421 reward=0.7718778 (428.55 it/sec) -training >> step=8523900, episode=1421 reward=0.7918559 (465.88 it/sec) -training >> step=8524000, episode=1421 reward=0.7920651 (548.29 it/sec) -training >> step=8524100, episode=1421 reward=0.7803342 (486.31 it/sec) -training >> step=8524200, episode=1421 reward=0.7800148 (511.18 it/sec) -training >> step=8524300, episode=1421 reward=0.7959865 (490.15 it/sec) -training >> step=8524400, episode=1421 reward=0.7985186 (472.87 it/sec) -training >> step=8524500, episode=1421 reward=0.7973737 (484.78 it/sec) -training >> step=8524600, episode=1421 reward=0.7699081 (527.88 it/sec) -training >> step=8524700, episode=1421 reward=0.7976426 (537.09 it/sec) -training >> step=8524800, episode=1421 reward=0.7740936 (480.06 it/sec) -training >> step=8524900, episode=1421 reward=0.784315 (431.74 it/sec) -training >> step=8525000, episode=1421 reward=0.8053769 (530.28 it/sec) -training >> step=8525100, episode=1421 reward=0.7969855 (509.99 it/sec) -training >> step=8525200, episode=1421 reward=0.7959484 (511.69 it/sec) -training >> step=8525300, episode=1422 reward=0.8005134 (92.42 it/sec) -training >> step=8525400, episode=1422 reward=0.7735199 (507.41 it/sec) -training >> step=8525500, episode=1422 reward=0.7708426 (463.31 it/sec) -training >> step=8525600, episode=1422 reward=0.7901722 (507.71 it/sec) -training >> step=8525700, episode=1422 reward=0.7788581 (487.94 it/sec) -training >> step=8525800, episode=1422 reward=0.7789871 (493.85 it/sec) -training >> step=8525900, episode=1422 reward=0.7782566 (487.73 it/sec) -training >> step=8526000, episode=1422 reward=0.8110988 (504.78 it/sec) -training >> step=8526100, episode=1422 reward=0.775229 (474.97 it/sec) -training >> step=8526200, episode=1422 reward=0.7991074 (494.67 it/sec) -training >> step=8526300, episode=1422 reward=0.8015884 (514.37 it/sec) -training >> step=8526400, episode=1422 reward=0.7723679 (502.87 it/sec) -training >> step=8526500, episode=1422 reward=0.7941495 (485.75 it/sec) -training >> step=8526600, episode=1422 reward=0.7935536 (449.98 it/sec) -training >> step=8526700, episode=1422 reward=0.7845538 (504.92 it/sec) -training >> step=8526800, episode=1422 reward=0.7873039 (508.51 it/sec) -training >> step=8526900, episode=1422 reward=0.7815785 (488.37 it/sec) -training >> step=8527000, episode=1422 reward=0.7840318 (492.67 it/sec) -training >> step=8527100, episode=1422 reward=0.7922719 (522.11 it/sec) -training >> step=8527200, episode=1422 reward=0.8159854 (491.67 it/sec) -training >> step=8527300, episode=1422 reward=0.7857855 (496.38 it/sec) -training >> step=8527400, episode=1422 reward=0.8022271 (489.46 it/sec) -training >> step=8527500, episode=1422 reward=0.7944782 (473.25 it/sec) -training >> step=8527600, episode=1422 reward=0.8220136 (487.48 it/sec) -training >> step=8527700, episode=1422 reward=0.7788159 (467.06 it/sec) -training >> step=8527800, episode=1422 reward=0.7876354 (493.08 it/sec) -training >> step=8527900, episode=1422 reward=0.8011643 (297.07 it/sec) -training >> step=8528000, episode=1422 reward=0.803212 (453.90 it/sec) -training >> step=8528100, episode=1422 reward=0.7954356 (461.78 it/sec) -training >> step=8528200, episode=1422 reward=0.7963089 (442.26 it/sec) -training >> step=8528300, episode=1422 reward=0.7830762 (420.11 it/sec) -training >> step=8528400, episode=1422 reward=0.7856572 (361.35 it/sec) -training >> step=8528500, episode=1422 reward=0.7975572 (445.36 it/sec) -training >> step=8528600, episode=1422 reward=0.7818508 (483.20 it/sec) -training >> step=8528700, episode=1422 reward=0.7999429 (408.11 it/sec) -training >> step=8528800, episode=1422 reward=0.7961742 (466.54 it/sec) -training >> step=8528900, episode=1422 reward=0.8052968 (516.77 it/sec) -training >> step=8529000, episode=1422 reward=0.827284 (496.73 it/sec) -training >> step=8529100, episode=1422 reward=0.7879878 (453.60 it/sec) -training >> step=8529200, episode=1422 reward=0.7867104 (534.13 it/sec) -training >> step=8529300, episode=1422 reward=0.7901409 (488.71 it/sec) -training >> step=8529400, episode=1422 reward=0.7733527 (488.32 it/sec) -training >> step=8529500, episode=1422 reward=0.7689748 (478.92 it/sec) -training >> step=8529600, episode=1422 reward=0.7975377 (515.17 it/sec) -training >> step=8529700, episode=1422 reward=0.8092927 (479.87 it/sec) -training >> step=8529800, episode=1422 reward=0.7925659 (451.77 it/sec) -training >> step=8529900, episode=1422 reward=0.7908999 (463.55 it/sec) -training >> step=8530000, episode=1422 reward=0.7966918 (512.42 it/sec) -training >> step=8530100, episode=1422 reward=0.7743636 (464.69 it/sec) -training >> step=8530200, episode=1422 reward=0.8132542 (494.64 it/sec) -training >> step=8530300, episode=1422 reward=0.7896523 (524.85 it/sec) -training >> step=8530400, episode=1422 reward=0.7960842 (460.86 it/sec) -training >> step=8530500, episode=1422 reward=0.7646129 (479.64 it/sec) -training >> step=8530600, episode=1422 reward=0.7720207 (515.14 it/sec) -training >> step=8530700, episode=1422 reward=0.779703 (464.74 it/sec) -training >> step=8530800, episode=1422 reward=0.8009871 (423.30 it/sec) -training >> step=8530900, episode=1422 reward=0.784163 (471.19 it/sec) -training >> step=8531000, episode=1422 reward=0.7809857 (472.35 it/sec) -training >> step=8531100, episode=1422 reward=0.7835969 (505.50 it/sec) -training >> step=8531200, episode=1422 reward=0.8090538 (459.83 it/sec) -training >> step=8531300, episode=1423 reward=0.7904638 (98.47 it/sec) -training >> step=8531400, episode=1423 reward=0.784921 (466.01 it/sec) -training >> step=8531500, episode=1423 reward=0.7900497 (433.28 it/sec) -training >> step=8531600, episode=1423 reward=0.8003446 (444.04 it/sec) -training >> step=8531700, episode=1423 reward=0.7927186 (498.96 it/sec) -training >> step=8531800, episode=1423 reward=0.7799256 (465.20 it/sec) -training >> step=8531900, episode=1423 reward=0.7806835 (460.86 it/sec) -training >> step=8532000, episode=1423 reward=0.7967096 (427.58 it/sec) -training >> step=8532100, episode=1423 reward=0.7857134 (482.52 it/sec) -training >> step=8532200, episode=1423 reward=0.7768726 (445.64 it/sec) -training >> step=8532300, episode=1423 reward=0.7760496 (472.60 it/sec) -training >> step=8532400, episode=1423 reward=0.7722498 (497.19 it/sec) -training >> step=8532500, episode=1423 reward=0.7820931 (404.45 it/sec) -training >> step=8532600, episode=1423 reward=0.7714723 (460.83 it/sec) -training >> step=8532700, episode=1423 reward=0.7751698 (432.72 it/sec) -training >> step=8532800, episode=1423 reward=0.7859529 (455.21 it/sec) -training >> step=8532900, episode=1423 reward=0.790238 (474.84 it/sec) -training >> step=8533000, episode=1423 reward=0.7964231 (469.27 it/sec) -training >> step=8533100, episode=1423 reward=0.7850012 (462.72 it/sec) -training >> step=8533200, episode=1423 reward=0.788898 (419.82 it/sec) -training >> step=8533300, episode=1423 reward=0.798815 (443.65 it/sec) -training >> step=8533400, episode=1423 reward=0.7725036 (476.23 it/sec) -training >> step=8533500, episode=1423 reward=0.801994 (477.18 it/sec) -training >> step=8533600, episode=1423 reward=0.796374 (454.28 it/sec) -training >> step=8533700, episode=1423 reward=0.8043417 (444.24 it/sec) -training >> step=8533800, episode=1423 reward=0.7976718 (444.66 it/sec) -training >> step=8533900, episode=1423 reward=0.7676001 (436.55 it/sec) -training >> step=8534000, episode=1423 reward=0.7847482 (452.23 it/sec) -training >> step=8534100, episode=1423 reward=0.7799566 (339.97 it/sec) -training >> step=8534200, episode=1423 reward=0.8053385 (445.09 it/sec) -training >> step=8534300, episode=1423 reward=0.7924128 (445.98 it/sec) -training >> step=8534400, episode=1423 reward=0.7864789 (444.68 it/sec) -training >> step=8534500, episode=1423 reward=0.7911694 (503.92 it/sec) -training >> step=8534600, episode=1423 reward=0.764918 (454.88 it/sec) -training >> step=8534700, episode=1423 reward=0.8058031 (447.87 it/sec) -training >> step=8534800, episode=1423 reward=0.7859531 (444.72 it/sec) -training >> step=8534900, episode=1423 reward=0.7991796 (480.04 it/sec) -training >> step=8535000, episode=1423 reward=0.7809697 (442.72 it/sec) -training >> step=8535100, episode=1423 reward=0.7712924 (464.00 it/sec) -training >> step=8535200, episode=1423 reward=0.7895008 (498.32 it/sec) -training >> step=8535300, episode=1423 reward=0.8104031 (458.41 it/sec) -training >> step=8535400, episode=1423 reward=0.796145 (476.96 it/sec) -training >> step=8535500, episode=1423 reward=0.78973 (400.33 it/sec) -training >> step=8535600, episode=1423 reward=0.7819981 (491.45 it/sec) -training >> step=8535700, episode=1423 reward=0.7852283 (466.91 it/sec) -training >> step=8535800, episode=1423 reward=0.7715833 (453.06 it/sec) -training >> step=8535900, episode=1423 reward=0.7898075 (489.85 it/sec) -training >> step=8536000, episode=1423 reward=0.7927075 (425.15 it/sec) -training >> step=8536100, episode=1423 reward=0.773529 (422.36 it/sec) -training >> step=8536200, episode=1423 reward=0.7803114 (435.64 it/sec) -training >> step=8536300, episode=1423 reward=0.7977917 (466.19 it/sec) -training >> step=8536400, episode=1423 reward=0.7916601 (386.37 it/sec) -training >> step=8536500, episode=1423 reward=0.802278 (448.47 it/sec) -training >> step=8536600, episode=1423 reward=0.770197 (431.98 it/sec) -training >> step=8536700, episode=1423 reward=0.7994774 (412.52 it/sec) -training >> step=8536800, episode=1423 reward=0.7961887 (466.81 it/sec) -training >> step=8536900, episode=1423 reward=0.7923996 (431.63 it/sec) -training >> step=8537000, episode=1423 reward=0.7843567 (462.56 it/sec) -training >> step=8537100, episode=1423 reward=0.7712454 (413.51 it/sec) -training >> step=8537200, episode=1423 reward=0.7972468 (468.61 it/sec) -training >> step=8537300, episode=1424 reward=0.7871406 (86.86 it/sec) -training >> step=8537400, episode=1424 reward=0.799906 (470.01 it/sec) -training >> step=8537500, episode=1424 reward=0.7660222 (421.66 it/sec) -training >> step=8537600, episode=1424 reward=0.808807 (453.25 it/sec) -training >> step=8537700, episode=1424 reward=0.7875996 (500.54 it/sec) -training >> step=8537800, episode=1424 reward=0.8003853 (407.25 it/sec) -training >> step=8537900, episode=1424 reward=0.8021073 (470.70 it/sec) -training >> step=8538000, episode=1424 reward=0.7918437 (484.95 it/sec) -training >> step=8538100, episode=1424 reward=0.7932337 (413.78 it/sec) -training >> step=8538200, episode=1424 reward=0.7729875 (467.15 it/sec) -training >> step=8538300, episode=1424 reward=0.7936866 (428.29 it/sec) -training >> step=8538400, episode=1424 reward=0.7939438 (475.92 it/sec) -training >> step=8538500, episode=1424 reward=0.777477 (430.18 it/sec) -training >> step=8538600, episode=1424 reward=0.7870502 (416.16 it/sec) -training >> step=8538700, episode=1424 reward=0.7803906 (429.43 it/sec) -training >> step=8538800, episode=1424 reward=0.7937997 (430.30 it/sec) -training >> step=8538900, episode=1424 reward=0.8029081 (430.36 it/sec) -training >> step=8539000, episode=1424 reward=0.7872689 (450.89 it/sec) -training >> step=8539100, episode=1424 reward=0.7941527 (396.24 it/sec) -training >> step=8539200, episode=1424 reward=0.8036416 (438.35 it/sec) -training >> step=8539300, episode=1424 reward=0.7809886 (451.49 it/sec) -training >> step=8539400, episode=1424 reward=0.8054016 (453.74 it/sec) -training >> step=8539500, episode=1424 reward=0.7884387 (458.66 it/sec) -training >> step=8539600, episode=1424 reward=0.7983546 (477.09 it/sec) -training >> step=8539700, episode=1424 reward=0.7793489 (451.61 it/sec) -training >> step=8539800, episode=1424 reward=0.8041387 (423.58 it/sec) -training >> step=8539900, episode=1424 reward=0.7919766 (447.34 it/sec) -training >> step=8540000, episode=1424 reward=0.7877008 (452.53 it/sec) -training >> step=8540100, episode=1424 reward=0.7891442 (350.25 it/sec) -training >> step=8540200, episode=1424 reward=0.8005211 (461.56 it/sec) -training >> step=8540300, episode=1424 reward=0.7987533 (449.32 it/sec) -training >> step=8540400, episode=1424 reward=0.8096754 (459.66 it/sec) -training >> step=8540500, episode=1424 reward=0.7653162 (487.93 it/sec) -training >> step=8540600, episode=1424 reward=0.7891554 (451.99 it/sec) -training >> step=8540700, episode=1424 reward=0.7846835 (445.61 it/sec) -training >> step=8540800, episode=1424 reward=0.7887377 (452.53 it/sec) -training >> step=8540900, episode=1424 reward=0.7816963 (476.49 it/sec) -training >> step=8541000, episode=1424 reward=0.8141655 (435.34 it/sec) -training >> step=8541100, episode=1424 reward=0.7915943 (415.09 it/sec) -training >> step=8541200, episode=1424 reward=0.7780893 (417.32 it/sec) -training >> step=8541300, episode=1424 reward=0.7786556 (494.83 it/sec) -training >> step=8541400, episode=1424 reward=0.8007826 (477.70 it/sec) -training >> step=8541500, episode=1424 reward=0.8040815 (443.77 it/sec) -training >> step=8541600, episode=1424 reward=0.8119631 (472.57 it/sec) -training >> step=8541700, episode=1424 reward=0.7678382 (450.77 it/sec) -training >> step=8541800, episode=1424 reward=0.7864348 (448.47 it/sec) -training >> step=8541900, episode=1424 reward=0.7772275 (483.12 it/sec) -training >> step=8542000, episode=1424 reward=0.7917742 (447.04 it/sec) -training >> step=8542100, episode=1424 reward=0.8040587 (459.98 it/sec) -training >> step=8542200, episode=1424 reward=0.7768701 (444.09 it/sec) -training >> step=8542300, episode=1424 reward=0.7777115 (446.69 it/sec) -training >> step=8542400, episode=1424 reward=0.7906216 (442.83 it/sec) -training >> step=8542500, episode=1424 reward=0.7909904 (453.06 it/sec) -training >> step=8542600, episode=1424 reward=0.7974044 (456.59 it/sec) -training >> step=8542700, episode=1424 reward=0.7957293 (504.69 it/sec) -training >> step=8542800, episode=1424 reward=0.8059798 (458.70 it/sec) -training >> step=8542900, episode=1424 reward=0.796257 (432.55 it/sec) -training >> step=8543000, episode=1424 reward=0.7790743 (479.35 it/sec) -training >> step=8543100, episode=1424 reward=0.8058123 (457.60 it/sec) -training >> step=8543200, episode=1424 reward=0.7780276 (466.84 it/sec) -training >> step=8543300, episode=1425 reward=0.785893 (85.84 it/sec) -training >> step=8543400, episode=1425 reward=0.8043399 (446.57 it/sec) -training >> step=8543500, episode=1425 reward=0.8046036 (423.24 it/sec) -training >> step=8543600, episode=1425 reward=0.7871066 (445.32 it/sec) -training >> step=8543700, episode=1425 reward=0.8012964 (450.37 it/sec) -training >> step=8543800, episode=1425 reward=0.7881482 (456.43 it/sec) -training >> step=8543900, episode=1425 reward=0.7919798 (479.26 it/sec) -training >> step=8544000, episode=1425 reward=0.7763003 (482.56 it/sec) -training >> step=8544100, episode=1425 reward=0.7862074 (449.50 it/sec) -training >> step=8544200, episode=1425 reward=0.7776374 (465.19 it/sec) -training >> step=8544300, episode=1425 reward=0.7772287 (473.94 it/sec) -training >> step=8544400, episode=1425 reward=0.7963753 (493.57 it/sec) -training >> step=8544500, episode=1425 reward=0.7793367 (459.68 it/sec) -training >> step=8544600, episode=1425 reward=0.7996283 (423.17 it/sec) -training >> step=8544700, episode=1425 reward=0.8022226 (476.73 it/sec) -training >> step=8544800, episode=1425 reward=0.7974624 (433.05 it/sec) -training >> step=8544900, episode=1425 reward=0.7960896 (431.34 it/sec) -training >> step=8545000, episode=1425 reward=0.7873712 (403.56 it/sec) -training >> step=8545100, episode=1425 reward=0.8005617 (469.79 it/sec) -training >> step=8545200, episode=1425 reward=0.7876191 (453.26 it/sec) -training >> step=8545300, episode=1425 reward=0.7882284 (419.78 it/sec) -training >> step=8545400, episode=1425 reward=0.8031574 (468.50 it/sec) -training >> step=8545500, episode=1425 reward=0.7832426 (433.41 it/sec) -training >> step=8545600, episode=1425 reward=0.8101305 (481.36 it/sec) -training >> step=8545700, episode=1425 reward=0.7794631 (432.78 it/sec) -training >> step=8545800, episode=1425 reward=0.8168939 (418.48 it/sec) -training >> step=8545900, episode=1425 reward=0.7980825 (455.79 it/sec) -training >> step=8546000, episode=1425 reward=0.7917811 (475.40 it/sec) -training >> step=8546100, episode=1425 reward=0.7963194 (476.06 it/sec) -training >> step=8546200, episode=1425 reward=0.7919858 (469.87 it/sec) -training >> step=8546300, episode=1425 reward=0.7791888 (446.89 it/sec) -training >> step=8546400, episode=1425 reward=0.7892799 (295.36 it/sec) -training >> step=8546500, episode=1425 reward=0.8006161 (479.06 it/sec) -training >> step=8546600, episode=1425 reward=0.8003989 (463.15 it/sec) -training >> step=8546700, episode=1425 reward=0.7958711 (464.92 it/sec) -training >> step=8546800, episode=1425 reward=0.8140097 (421.87 it/sec) -training >> step=8546900, episode=1425 reward=0.8066939 (468.81 it/sec) -training >> step=8547000, episode=1425 reward=0.7921146 (451.81 it/sec) -training >> step=8547100, episode=1425 reward=0.7967303 (427.40 it/sec) -training >> step=8547200, episode=1425 reward=0.8041355 (403.25 it/sec) -training >> step=8547300, episode=1425 reward=0.7834565 (433.18 it/sec) -training >> step=8547400, episode=1425 reward=0.7966506 (465.20 it/sec) -training >> step=8547500, episode=1425 reward=0.7945747 (412.19 it/sec) -training >> step=8547600, episode=1425 reward=0.7943277 (456.10 it/sec) -training >> step=8547700, episode=1425 reward=0.7880216 (473.56 it/sec) -training >> step=8547800, episode=1425 reward=0.780382 (487.88 it/sec) -training >> step=8547900, episode=1425 reward=0.7854302 (464.61 it/sec) -training >> step=8548000, episode=1425 reward=0.773479 (457.79 it/sec) -training >> step=8548100, episode=1425 reward=0.7764787 (443.90 it/sec) -training >> step=8548200, episode=1425 reward=0.7953246 (456.14 it/sec) -training >> step=8548300, episode=1425 reward=0.7755011 (489.74 it/sec) -training >> step=8548400, episode=1425 reward=0.7872669 (463.32 it/sec) -training >> step=8548500, episode=1425 reward=0.8055133 (445.80 it/sec) -training >> step=8548600, episode=1425 reward=0.7970001 (444.39 it/sec) -training >> step=8548700, episode=1425 reward=0.7939614 (465.05 it/sec) -training >> step=8548800, episode=1425 reward=0.774905 (466.14 it/sec) -training >> step=8548900, episode=1425 reward=0.7841842 (456.87 it/sec) -training >> step=8549000, episode=1425 reward=0.787055 (421.90 it/sec) -training >> step=8549100, episode=1425 reward=0.7928658 (439.00 it/sec) -training >> step=8549200, episode=1425 reward=0.7960812 (367.59 it/sec) -training >> step=8549300, episode=1426 reward=0.7783538 (71.78 it/sec) -training >> step=8549400, episode=1426 reward=0.8067746 (378.39 it/sec) -training >> step=8549500, episode=1426 reward=0.7678816 (469.79 it/sec) -training >> step=8549600, episode=1426 reward=0.7822095 (455.96 it/sec) -training >> step=8549700, episode=1426 reward=0.7830585 (476.34 it/sec) -training >> step=8549800, episode=1426 reward=0.7674112 (396.77 it/sec) -training >> step=8549900, episode=1426 reward=0.7886954 (444.08 it/sec) -training >> step=8550000, episode=1426 reward=0.7817949 (459.75 it/sec) -training >> step=8550100, episode=1426 reward=0.7864336 (456.43 it/sec) -training >> step=8550200, episode=1426 reward=0.8029449 (459.34 it/sec) -training >> step=8550300, episode=1426 reward=0.7773568 (467.33 it/sec) -training >> step=8550400, episode=1426 reward=0.7937304 (469.54 it/sec) -training >> step=8550500, episode=1426 reward=0.7832757 (486.51 it/sec) -training >> step=8550600, episode=1426 reward=0.7868306 (471.59 it/sec) -training >> step=8550700, episode=1426 reward=0.8032259 (448.49 it/sec) -training >> step=8550800, episode=1426 reward=0.803657 (448.96 it/sec) -training >> step=8550900, episode=1426 reward=0.810748 (464.88 it/sec) -training >> step=8551000, episode=1426 reward=0.7932721 (499.14 it/sec) -training >> step=8551100, episode=1426 reward=0.7731933 (453.39 it/sec) -training >> step=8551200, episode=1426 reward=0.7782928 (446.92 it/sec) -training >> step=8551300, episode=1426 reward=0.7880121 (479.40 it/sec) -training >> step=8551400, episode=1426 reward=0.7926297 (442.43 it/sec) -training >> step=8551500, episode=1426 reward=0.797227 (530.16 it/sec) -training >> step=8551600, episode=1426 reward=0.8065521 (475.18 it/sec) -training >> step=8551700, episode=1426 reward=0.77531 (483.51 it/sec) -training >> step=8551800, episode=1426 reward=0.7824177 (456.36 it/sec) -training >> step=8551900, episode=1426 reward=0.7927925 (431.68 it/sec) -training >> step=8552000, episode=1426 reward=0.7835454 (489.50 it/sec) -training >> step=8552100, episode=1426 reward=0.7866522 (449.17 it/sec) -training >> step=8552200, episode=1426 reward=0.813483 (409.77 it/sec) -training >> step=8552300, episode=1426 reward=0.7981895 (425.15 it/sec) -training >> step=8552400, episode=1426 reward=0.7739877 (512.27 it/sec) -training >> step=8552500, episode=1426 reward=0.7991096 (343.10 it/sec) -training >> step=8552600, episode=1426 reward=0.7786291 (465.20 it/sec) -training >> step=8552700, episode=1426 reward=0.7828797 (455.86 it/sec) -training >> step=8552800, episode=1426 reward=0.7811375 (497.78 it/sec) -training >> step=8552900, episode=1426 reward=0.7645221 (450.82 it/sec) -training >> step=8553000, episode=1426 reward=0.7897958 (423.89 it/sec) -training >> step=8553100, episode=1426 reward=0.7924138 (489.56 it/sec) -training >> step=8553200, episode=1426 reward=0.8179296 (465.28 it/sec) -training >> step=8553300, episode=1426 reward=0.8079805 (487.05 it/sec) -training >> step=8553400, episode=1426 reward=0.7907108 (430.11 it/sec) -training >> step=8553500, episode=1426 reward=0.8091346 (451.71 it/sec) -training >> step=8553600, episode=1426 reward=0.8055862 (452.67 it/sec) -training >> step=8553700, episode=1426 reward=0.7802141 (446.76 it/sec) -training >> step=8553800, episode=1426 reward=0.8014085 (447.57 it/sec) -training >> step=8553900, episode=1426 reward=0.7939407 (457.82 it/sec) -training >> step=8554000, episode=1426 reward=0.7924309 (421.07 it/sec) -training >> step=8554100, episode=1426 reward=0.7972746 (418.06 it/sec) -training >> step=8554200, episode=1426 reward=0.8026042 (475.10 it/sec) -training >> step=8554300, episode=1426 reward=0.8052492 (489.11 it/sec) -training >> step=8554400, episode=1426 reward=0.7868567 (443.19 it/sec) -training >> step=8554500, episode=1426 reward=0.7913215 (461.82 it/sec) -training >> step=8554600, episode=1426 reward=0.7954417 (471.13 it/sec) -training >> step=8554700, episode=1426 reward=0.8161114 (456.62 it/sec) -training >> step=8554800, episode=1426 reward=0.7863777 (486.25 it/sec) -training >> step=8554900, episode=1426 reward=0.7755616 (423.89 it/sec) -training >> step=8555000, episode=1426 reward=0.8021998 (488.32 it/sec) -training >> step=8555100, episode=1426 reward=0.7788942 (433.47 it/sec) -training >> step=8555200, episode=1426 reward=0.8000636 (443.52 it/sec) -training >> step=8555300, episode=1427 reward=0.7861719 (85.66 it/sec) -training >> step=8555400, episode=1427 reward=0.7947496 (458.93 it/sec) -training >> step=8555500, episode=1427 reward=0.8108174 (452.77 it/sec) -training >> step=8555600, episode=1427 reward=0.7712259 (472.27 it/sec) -training >> step=8555700, episode=1427 reward=0.7992171 (516.66 it/sec) -training >> step=8555800, episode=1427 reward=0.7833272 (456.96 it/sec) -training >> step=8555900, episode=1427 reward=0.7929848 (442.91 it/sec) -training >> step=8556000, episode=1427 reward=0.7890118 (466.25 it/sec) -training >> step=8556100, episode=1427 reward=0.7866389 (450.70 it/sec) -training >> step=8556200, episode=1427 reward=0.7819132 (458.24 it/sec) -training >> step=8556300, episode=1427 reward=0.7976308 (473.22 it/sec) -training >> step=8556400, episode=1427 reward=0.7980911 (462.95 it/sec) -training >> step=8556500, episode=1427 reward=0.7968246 (451.12 it/sec) -training >> step=8556600, episode=1427 reward=0.7818151 (474.71 it/sec) -training >> step=8556700, episode=1427 reward=0.7846496 (464.29 it/sec) -training >> step=8556800, episode=1427 reward=0.7938626 (470.47 it/sec) -training >> step=8556900, episode=1427 reward=0.7804594 (469.64 it/sec) -training >> step=8557000, episode=1427 reward=0.771403 (442.95 it/sec) -training >> step=8557100, episode=1427 reward=0.8123143 (486.76 it/sec) -training >> step=8557200, episode=1427 reward=0.783099 (472.10 it/sec) -training >> step=8557300, episode=1427 reward=0.7922029 (450.80 it/sec) -training >> step=8557400, episode=1427 reward=0.794194 (444.08 it/sec) -training >> step=8557500, episode=1427 reward=0.7901254 (479.17 it/sec) -training >> step=8557600, episode=1427 reward=0.8017777 (467.78 it/sec) -training >> step=8557700, episode=1427 reward=0.790708 (422.79 it/sec) -training >> step=8557800, episode=1427 reward=0.7854383 (459.96 it/sec) -training >> step=8557900, episode=1427 reward=0.7862364 (451.63 it/sec) -training >> step=8558000, episode=1427 reward=0.8057702 (429.61 it/sec) -training >> step=8558100, episode=1427 reward=0.7839757 (449.64 it/sec) -training >> step=8558200, episode=1427 reward=0.767081 (509.49 it/sec) -training >> step=8558300, episode=1427 reward=0.7927804 (447.13 it/sec) -training >> step=8558400, episode=1427 reward=0.8088498 (456.71 it/sec) -training >> step=8558500, episode=1427 reward=0.789073 (484.38 it/sec) -training >> step=8558600, episode=1427 reward=0.7954165 (341.30 it/sec) -training >> step=8558700, episode=1427 reward=0.7835371 (479.08 it/sec) -training >> step=8558800, episode=1427 reward=0.7704741 (480.78 it/sec) -training >> step=8558900, episode=1427 reward=0.7910864 (483.62 it/sec) -training >> step=8559000, episode=1427 reward=0.7802864 (467.86 it/sec) -training >> step=8559100, episode=1427 reward=0.7771895 (460.04 it/sec) -training >> step=8559200, episode=1427 reward=0.788011 (431.90 it/sec) -training >> step=8559300, episode=1427 reward=0.7903119 (505.88 it/sec) -training >> step=8559400, episode=1427 reward=0.8016392 (386.73 it/sec) -training >> step=8559500, episode=1427 reward=0.764129 (449.50 it/sec) -training >> step=8559600, episode=1427 reward=0.7898651 (414.48 it/sec) -training >> step=8559700, episode=1427 reward=0.7992465 (429.61 it/sec) -training >> step=8559800, episode=1427 reward=0.7994776 (460.88 it/sec) -training >> step=8559900, episode=1427 reward=0.7976372 (443.78 it/sec) -training >> step=8560000, episode=1427 reward=0.7769333 (446.78 it/sec) -training >> step=8560100, episode=1427 reward=0.8004378 (438.74 it/sec) -training >> step=8560200, episode=1427 reward=0.8088145 (446.65 it/sec) -training >> step=8560300, episode=1427 reward=0.8040241 (491.69 it/sec) -training >> step=8560400, episode=1427 reward=0.7929739 (510.31 it/sec) -training >> step=8560500, episode=1427 reward=0.8014064 (447.85 it/sec) -training >> step=8560600, episode=1427 reward=0.7878104 (458.77 it/sec) -training >> step=8560700, episode=1427 reward=0.789307 (485.68 it/sec) -training >> step=8560800, episode=1427 reward=0.7973925 (444.93 it/sec) -training >> step=8560900, episode=1427 reward=0.7863617 (463.51 it/sec) -training >> step=8561000, episode=1427 reward=0.8098624 (479.63 it/sec) -training >> step=8561100, episode=1427 reward=0.7677046 (406.43 it/sec) -training >> step=8561200, episode=1427 reward=0.7904165 (449.28 it/sec) -training >> step=8561300, episode=1428 reward=0.8012805 (74.72 it/sec) -training >> step=8561400, episode=1428 reward=0.7676783 (408.04 it/sec) -training >> step=8561500, episode=1428 reward=0.7858063 (439.87 it/sec) -training >> step=8561600, episode=1428 reward=0.800248 (461.72 it/sec) -training >> step=8561700, episode=1428 reward=0.7751694 (486.98 it/sec) -training >> step=8561800, episode=1428 reward=0.7849669 (441.09 it/sec) -training >> step=8561900, episode=1428 reward=0.7880079 (443.30 it/sec) -training >> step=8562000, episode=1428 reward=0.7808543 (474.86 it/sec) -training >> step=8562100, episode=1428 reward=0.7721418 (426.63 it/sec) -training >> step=8562200, episode=1428 reward=0.7940409 (482.40 it/sec) -training >> step=8562300, episode=1428 reward=0.7807211 (469.73 it/sec) -training >> step=8562400, episode=1428 reward=0.7955511 (442.63 it/sec) -training >> step=8562500, episode=1428 reward=0.7804594 (474.37 it/sec) -training >> step=8562600, episode=1428 reward=0.7840652 (487.37 it/sec) -training >> step=8562700, episode=1428 reward=0.7946218 (442.17 it/sec) -training >> step=8562800, episode=1428 reward=0.8026248 (460.22 it/sec) -training >> step=8562900, episode=1428 reward=0.7932308 (476.72 it/sec) -training >> step=8563000, episode=1428 reward=0.7824773 (502.26 it/sec) -training >> step=8563100, episode=1428 reward=0.7931036 (427.27 it/sec) -training >> step=8563200, episode=1428 reward=0.8199732 (444.01 it/sec) -training >> step=8563300, episode=1428 reward=0.8088805 (453.57 it/sec) -training >> step=8563400, episode=1428 reward=0.7857288 (469.07 it/sec) -training >> step=8563500, episode=1428 reward=0.8091071 (460.94 it/sec) -training >> step=8563600, episode=1428 reward=0.7722695 (454.44 it/sec) -training >> step=8563700, episode=1428 reward=0.7927225 (517.62 it/sec) -training >> step=8563800, episode=1428 reward=0.8033711 (419.77 it/sec) -training >> step=8563900, episode=1428 reward=0.8025693 (406.09 it/sec) -training >> step=8564000, episode=1428 reward=0.8023236 (466.19 it/sec) -training >> step=8564100, episode=1428 reward=0.7821281 (464.26 it/sec) -training >> step=8564200, episode=1428 reward=0.7698352 (466.31 it/sec) -training >> step=8564300, episode=1428 reward=0.8026091 (450.68 it/sec) -training >> step=8564400, episode=1428 reward=0.7878949 (450.13 it/sec) -training >> step=8564500, episode=1428 reward=0.7823005 (466.42 it/sec) -training >> step=8564600, episode=1428 reward=0.7853083 (448.53 it/sec) -training >> step=8564700, episode=1428 reward=0.7773661 (446.80 it/sec) -training >> step=8564800, episode=1428 reward=0.7817672 (343.51 it/sec) -training >> step=8564900, episode=1428 reward=0.7766989 (439.83 it/sec) -training >> step=8565000, episode=1428 reward=0.787405 (476.78 it/sec) -training >> step=8565100, episode=1428 reward=0.7992381 (448.91 it/sec) -training >> step=8565200, episode=1428 reward=0.7748368 (450.13 it/sec) -training >> step=8565300, episode=1428 reward=0.7857013 (498.09 it/sec) -training >> step=8565400, episode=1428 reward=0.7896926 (472.96 it/sec) -training >> step=8565500, episode=1428 reward=0.7732043 (473.24 it/sec) -training >> step=8565600, episode=1428 reward=0.8160306 (466.54 it/sec) -training >> step=8565700, episode=1428 reward=0.7884338 (443.56 it/sec) -training >> step=8565800, episode=1428 reward=0.7852851 (459.33 it/sec) -training >> step=8565900, episode=1428 reward=0.7870013 (477.61 it/sec) -training >> step=8566000, episode=1428 reward=0.8072332 (470.77 it/sec) -training >> step=8566100, episode=1428 reward=0.7850617 (402.47 it/sec) -training >> step=8566200, episode=1428 reward=0.7939654 (479.21 it/sec) -training >> step=8566300, episode=1428 reward=0.810568 (429.23 it/sec) -training >> step=8566400, episode=1428 reward=0.7967138 (485.76 it/sec) -training >> step=8566500, episode=1428 reward=0.7738565 (456.70 it/sec) -training >> step=8566600, episode=1428 reward=0.790744 (437.25 it/sec) -training >> step=8566700, episode=1428 reward=0.7985371 (464.36 it/sec) -training >> step=8566800, episode=1428 reward=0.7763826 (471.52 it/sec) -training >> step=8566900, episode=1428 reward=0.791585 (478.18 it/sec) -training >> step=8567000, episode=1428 reward=0.7591273 (469.52 it/sec) -training >> step=8567100, episode=1428 reward=0.8111191 (448.29 it/sec) -training >> step=8567200, episode=1428 reward=0.780416 (468.28 it/sec) -training >> step=8567300, episode=1429 reward=0.7928741 (91.81 it/sec) -training >> step=8567400, episode=1429 reward=0.7868519 (482.13 it/sec) -training >> step=8567500, episode=1429 reward=0.7910895 (449.08 it/sec) -training >> step=8567600, episode=1429 reward=0.7752056 (479.47 it/sec) -training >> step=8567700, episode=1429 reward=0.7782491 (461.97 it/sec) -training >> step=8567800, episode=1429 reward=0.7806276 (460.65 it/sec) -training >> step=8567900, episode=1429 reward=0.7698359 (410.97 it/sec) -training >> step=8568000, episode=1429 reward=0.7666897 (473.39 it/sec) -training >> step=8568100, episode=1429 reward=0.7952654 (446.70 it/sec) -training >> step=8568200, episode=1429 reward=0.7881223 (480.45 it/sec) -training >> step=8568300, episode=1429 reward=0.8026201 (488.51 it/sec) -training >> step=8568400, episode=1429 reward=0.7924865 (455.22 it/sec) -training >> step=8568500, episode=1429 reward=0.7988191 (455.64 it/sec) -training >> step=8568600, episode=1429 reward=0.7980872 (449.56 it/sec) -training >> step=8568700, episode=1429 reward=0.7983087 (477.66 it/sec) -training >> step=8568800, episode=1429 reward=0.7953613 (445.09 it/sec) -training >> step=8568900, episode=1429 reward=0.7912098 (451.83 it/sec) -training >> step=8569000, episode=1429 reward=0.7930319 (471.29 it/sec) -training >> step=8569100, episode=1429 reward=0.7857711 (483.21 it/sec) -training >> step=8569200, episode=1429 reward=0.7831514 (462.71 it/sec) -training >> step=8569300, episode=1429 reward=0.7788652 (474.30 it/sec) -training >> step=8569400, episode=1429 reward=0.788286 (461.15 it/sec) -training >> step=8569500, episode=1429 reward=0.7853469 (461.55 it/sec) -training >> step=8569600, episode=1429 reward=0.7676171 (457.73 it/sec) -training >> step=8569700, episode=1429 reward=0.8036402 (445.16 it/sec) -training >> step=8569800, episode=1429 reward=0.7892909 (507.74 it/sec) -training >> step=8569900, episode=1429 reward=0.7936604 (420.45 it/sec) -training >> step=8570000, episode=1429 reward=0.7879104 (426.99 it/sec) -training >> step=8570100, episode=1429 reward=0.7913567 (437.49 it/sec) -training >> step=8570200, episode=1429 reward=0.8003877 (426.88 it/sec) -training >> step=8570300, episode=1429 reward=0.7784627 (408.07 it/sec) -training >> step=8570400, episode=1429 reward=0.7853647 (440.37 it/sec) -training >> step=8570500, episode=1429 reward=0.7868922 (478.15 it/sec) -training >> step=8570600, episode=1429 reward=0.7998027 (444.95 it/sec) -training >> step=8570700, episode=1429 reward=0.7825855 (436.45 it/sec) -training >> step=8570800, episode=1429 reward=0.7807491 (429.51 it/sec) -training >> step=8570900, episode=1429 reward=0.783763 (353.67 it/sec) -training >> step=8571000, episode=1429 reward=0.7799815 (439.61 it/sec) -training >> step=8571100, episode=1429 reward=0.8103562 (459.51 it/sec) -training >> step=8571200, episode=1429 reward=0.8071725 (461.19 it/sec) -training >> step=8571300, episode=1429 reward=0.8007128 (471.05 it/sec) -training >> step=8571400, episode=1429 reward=0.7908957 (476.88 it/sec) -training >> step=8571500, episode=1429 reward=0.7805728 (486.37 it/sec) -training >> step=8571600, episode=1429 reward=0.7936762 (497.45 it/sec) -training >> step=8571700, episode=1429 reward=0.7923886 (472.05 it/sec) -training >> step=8571800, episode=1429 reward=0.7966985 (466.64 it/sec) -training >> step=8571900, episode=1429 reward=0.805092 (459.75 it/sec) -training >> step=8572000, episode=1429 reward=0.7828205 (475.15 it/sec) -training >> step=8572100, episode=1429 reward=0.7753496 (489.65 it/sec) -training >> step=8572200, episode=1429 reward=0.8051829 (417.81 it/sec) -training >> step=8572300, episode=1429 reward=0.7886877 (469.07 it/sec) -training >> step=8572400, episode=1429 reward=0.7847747 (456.69 it/sec) -training >> step=8572500, episode=1429 reward=0.7941511 (425.50 it/sec) -training >> step=8572600, episode=1429 reward=0.780813 (500.40 it/sec) -training >> step=8572700, episode=1429 reward=0.7792304 (486.10 it/sec) -training >> step=8572800, episode=1429 reward=0.7794694 (445.81 it/sec) -training >> step=8572900, episode=1429 reward=0.7790703 (465.36 it/sec) -training >> step=8573000, episode=1429 reward=0.795172 (461.43 it/sec) -training >> step=8573100, episode=1429 reward=0.7845287 (445.84 it/sec) -training >> step=8573200, episode=1429 reward=0.7876716 (446.32 it/sec) -training >> step=8573300, episode=1430 reward=0.7855543 (71.34 it/sec) -training >> step=8573400, episode=1430 reward=0.77686 (489.03 it/sec) -training >> step=8573500, episode=1430 reward=0.7903091 (443.95 it/sec) -training >> step=8573600, episode=1430 reward=0.7855002 (457.74 it/sec) -training >> step=8573700, episode=1430 reward=0.7892022 (505.81 it/sec) -training >> step=8573800, episode=1430 reward=0.7961379 (482.51 it/sec) -training >> step=8573900, episode=1430 reward=0.7947371 (477.08 it/sec) -training >> step=8574000, episode=1430 reward=0.7853743 (447.93 it/sec) -training >> step=8574100, episode=1430 reward=0.7883913 (420.82 it/sec) -training >> step=8574200, episode=1430 reward=0.7821512 (431.11 it/sec) -training >> step=8574300, episode=1430 reward=0.8094648 (471.53 it/sec) -training >> step=8574400, episode=1430 reward=0.8005996 (496.34 it/sec) -training >> step=8574500, episode=1430 reward=0.7950969 (478.19 it/sec) -training >> step=8574600, episode=1430 reward=0.781852 (475.23 it/sec) -training >> step=8574700, episode=1430 reward=0.7937911 (484.90 it/sec) -training >> step=8574800, episode=1430 reward=0.7711875 (475.31 it/sec) -training >> step=8574900, episode=1430 reward=0.7964808 (486.97 it/sec) -training >> step=8575000, episode=1430 reward=0.7932273 (505.32 it/sec) -training >> step=8575100, episode=1430 reward=0.7523543 (534.76 it/sec) -training >> step=8575200, episode=1430 reward=0.7912835 (482.92 it/sec) -training >> step=8575300, episode=1430 reward=0.7925438 (512.02 it/sec) -training >> step=8575400, episode=1430 reward=0.7739858 (502.83 it/sec) -training >> step=8575500, episode=1430 reward=0.7670737 (495.45 it/sec) -training >> step=8575600, episode=1430 reward=0.7797529 (426.51 it/sec) -training >> step=8575700, episode=1430 reward=0.8052611 (491.66 it/sec) -training >> step=8575800, episode=1430 reward=0.7912576 (498.83 it/sec) -training >> step=8575900, episode=1430 reward=0.7927271 (520.11 it/sec) -training >> step=8576000, episode=1430 reward=0.7954835 (513.35 it/sec) -training >> step=8576100, episode=1430 reward=0.7815337 (514.29 it/sec) -training >> step=8576200, episode=1430 reward=0.7784562 (471.34 it/sec) -training >> step=8576300, episode=1430 reward=0.7846567 (468.08 it/sec) -training >> step=8576400, episode=1430 reward=0.7859695 (529.67 it/sec) -training >> step=8576500, episode=1430 reward=0.7807594 (483.54 it/sec) -training >> step=8576600, episode=1430 reward=0.7667489 (484.15 it/sec) -training >> step=8576700, episode=1430 reward=0.765404 (480.97 it/sec) -training >> step=8576800, episode=1430 reward=0.7610317 (494.70 it/sec) -training >> step=8576900, episode=1430 reward=0.7883306 (518.43 it/sec) -training >> step=8577000, episode=1430 reward=0.772321 (465.69 it/sec) -training >> step=8577100, episode=1430 reward=0.7886262 (489.22 it/sec) -training >> step=8577200, episode=1430 reward=0.7980084 (397.81 it/sec) -training >> step=8577300, episode=1430 reward=0.7930536 (509.79 it/sec) -training >> step=8577400, episode=1430 reward=0.7977492 (542.60 it/sec) -training >> step=8577500, episode=1430 reward=0.7950925 (533.98 it/sec) -training >> step=8577600, episode=1430 reward=0.7847864 (498.63 it/sec) -training >> step=8577700, episode=1430 reward=0.7877231 (461.07 it/sec) -training >> step=8577800, episode=1430 reward=0.7702681 (490.97 it/sec) -training >> step=8577900, episode=1430 reward=0.7824311 (544.59 it/sec) -training >> step=8578000, episode=1430 reward=0.7799429 (497.34 it/sec) -training >> step=8578100, episode=1430 reward=0.8014033 (506.34 it/sec) -training >> step=8578200, episode=1430 reward=0.8049119 (530.49 it/sec) -training >> step=8578300, episode=1430 reward=0.7732826 (435.75 it/sec) -training >> step=8578400, episode=1430 reward=0.7828312 (511.30 it/sec) -training >> step=8578500, episode=1430 reward=0.7884049 (484.01 it/sec) -training >> step=8578600, episode=1430 reward=0.7913172 (541.65 it/sec) -training >> step=8578700, episode=1430 reward=0.7962874 (484.64 it/sec) -training >> step=8578800, episode=1430 reward=0.800301 (462.57 it/sec) -training >> step=8578900, episode=1430 reward=0.7608658 (504.15 it/sec) -training >> step=8579000, episode=1430 reward=0.781992 (494.70 it/sec) -training >> step=8579100, episode=1430 reward=0.7866508 (511.09 it/sec) -training >> step=8579200, episode=1430 reward=0.7880552 (470.24 it/sec) -training >> step=8579300, episode=1431 reward=0.8129217 (123.15 it/sec) -training >> step=8579400, episode=1431 reward=0.7890612 (476.75 it/sec) -training >> step=8579500, episode=1431 reward=0.78162 (434.40 it/sec) -training >> step=8579600, episode=1431 reward=0.7770141 (522.78 it/sec) -training >> step=8579700, episode=1431 reward=0.7951293 (532.74 it/sec) -training >> step=8579800, episode=1431 reward=0.7862188 (495.30 it/sec) -training >> step=8579900, episode=1431 reward=0.777907 (480.62 it/sec) -training >> step=8580000, episode=1431 reward=0.7868951 (503.42 it/sec) -training >> step=8580100, episode=1431 reward=0.8124684 (509.42 it/sec) -training >> step=8580200, episode=1431 reward=0.7874069 (467.16 it/sec) -training >> step=8580300, episode=1431 reward=0.7782435 (502.74 it/sec) -training >> step=8580400, episode=1431 reward=0.7878672 (503.66 it/sec) -training >> step=8580500, episode=1431 reward=0.8007832 (474.30 it/sec) -training >> step=8580600, episode=1431 reward=0.7905021 (499.45 it/sec) -training >> step=8580700, episode=1431 reward=0.7829643 (452.59 it/sec) -training >> step=8580800, episode=1431 reward=0.7756773 (495.70 it/sec) -training >> step=8580900, episode=1431 reward=0.7951161 (467.04 it/sec) -training >> step=8581000, episode=1431 reward=0.790481 (475.65 it/sec) -training >> step=8581100, episode=1431 reward=0.7942675 (493.23 it/sec) -training >> step=8581200, episode=1431 reward=0.7845273 (459.70 it/sec) -training >> step=8581300, episode=1431 reward=0.7773513 (465.03 it/sec) -training >> step=8581400, episode=1431 reward=0.7741448 (433.33 it/sec) -training >> step=8581500, episode=1431 reward=0.7740523 (453.20 it/sec) -training >> step=8581600, episode=1431 reward=0.8065987 (445.19 it/sec) -training >> step=8581700, episode=1431 reward=0.7815132 (439.04 it/sec) -training >> step=8581800, episode=1431 reward=0.8012936 (530.43 it/sec) -training >> step=8581900, episode=1431 reward=0.8035012 (500.07 it/sec) -training >> step=8582000, episode=1431 reward=0.80343 (429.74 it/sec) -training >> step=8582100, episode=1431 reward=0.7893341 (509.49 it/sec) -training >> step=8582200, episode=1431 reward=0.7853881 (523.24 it/sec) -training >> step=8582300, episode=1431 reward=0.7794186 (467.75 it/sec) -training >> step=8582400, episode=1431 reward=0.800091 (497.77 it/sec) -training >> step=8582500, episode=1431 reward=0.8016288 (472.92 it/sec) -training >> step=8582600, episode=1431 reward=0.7745028 (492.70 it/sec) -training >> step=8582700, episode=1431 reward=0.8138806 (463.81 it/sec) -training >> step=8582800, episode=1431 reward=0.7764179 (481.57 it/sec) -training >> step=8582900, episode=1431 reward=0.7835631 (527.72 it/sec) -training >> step=8583000, episode=1431 reward=0.7511395 (476.84 it/sec) -training >> step=8583100, episode=1431 reward=0.7845914 (485.14 it/sec) -training >> step=8583200, episode=1431 reward=0.7885031 (510.39 it/sec) -training >> step=8583300, episode=1431 reward=0.7803929 (415.91 it/sec) -training >> step=8583400, episode=1431 reward=0.7900552 (491.09 it/sec) -training >> step=8583500, episode=1431 reward=0.7939319 (502.66 it/sec) -training >> step=8583600, episode=1431 reward=0.7816627 (559.57 it/sec) -training >> step=8583700, episode=1431 reward=0.778828 (496.35 it/sec) -training >> step=8583800, episode=1431 reward=0.7731969 (476.85 it/sec) -training >> step=8583900, episode=1431 reward=0.7971345 (460.14 it/sec) -training >> step=8584000, episode=1431 reward=0.7890616 (521.42 it/sec) -training >> step=8584100, episode=1431 reward=0.7950182 (499.67 it/sec) -training >> step=8584200, episode=1431 reward=0.7688442 (516.22 it/sec) -training >> step=8584300, episode=1431 reward=0.7916444 (494.77 it/sec) -training >> step=8584400, episode=1431 reward=0.7685223 (417.73 it/sec) -training >> step=8584500, episode=1431 reward=0.789997 (495.50 it/sec) -training >> step=8584600, episode=1431 reward=0.7838994 (520.06 it/sec) -training >> step=8584700, episode=1431 reward=0.7912478 (489.44 it/sec) -training >> step=8584800, episode=1431 reward=0.792356 (497.10 it/sec) -training >> step=8584900, episode=1431 reward=0.7830772 (465.67 it/sec) -training >> step=8585000, episode=1431 reward=0.7765167 (502.56 it/sec) -training >> step=8585100, episode=1431 reward=0.7900427 (512.31 it/sec) -training >> step=8585200, episode=1431 reward=0.7687895 (480.57 it/sec) -training >> step=8585300, episode=1432 reward=0.7993408 (96.39 it/sec) -training >> step=8585400, episode=1432 reward=0.764168 (511.97 it/sec) -training >> step=8585500, episode=1432 reward=0.7950226 (497.08 it/sec) -training >> step=8585600, episode=1432 reward=0.7897432 (507.25 it/sec) -training >> step=8585700, episode=1432 reward=0.7939306 (548.51 it/sec) -training >> step=8585800, episode=1432 reward=0.8055294 (519.77 it/sec) -training >> step=8585900, episode=1432 reward=0.7823889 (503.54 it/sec) -training >> step=8586000, episode=1432 reward=0.7925193 (508.77 it/sec) -training >> step=8586100, episode=1432 reward=0.7914767 (512.57 it/sec) -training >> step=8586200, episode=1432 reward=0.7901416 (512.13 it/sec) -training >> step=8586300, episode=1432 reward=0.78958 (518.61 it/sec) -training >> step=8586400, episode=1432 reward=0.7968864 (528.56 it/sec) -training >> step=8586500, episode=1432 reward=0.7930589 (523.07 it/sec) -training >> step=8586600, episode=1432 reward=0.8007253 (528.73 it/sec) -training >> step=8586700, episode=1432 reward=0.8130408 (511.67 it/sec) -training >> step=8586800, episode=1432 reward=0.7885593 (553.64 it/sec) -training >> step=8586900, episode=1432 reward=0.7807716 (497.62 it/sec) -training >> step=8587000, episode=1432 reward=0.7825449 (458.86 it/sec) -training >> step=8587100, episode=1432 reward=0.785363 (489.80 it/sec) -training >> step=8587200, episode=1432 reward=0.7968684 (518.28 it/sec) -training >> step=8587300, episode=1432 reward=0.794659 (482.52 it/sec) -training >> step=8587400, episode=1432 reward=0.7863073 (467.82 it/sec) -training >> step=8587500, episode=1432 reward=0.8016146 (518.25 it/sec) -training >> step=8587600, episode=1432 reward=0.7882434 (468.81 it/sec) -training >> step=8587700, episode=1432 reward=0.7810092 (470.70 it/sec) -training >> step=8587800, episode=1432 reward=0.7921486 (506.81 it/sec) -training >> step=8587900, episode=1432 reward=0.8007189 (506.22 it/sec) -training >> step=8588000, episode=1432 reward=0.8026227 (482.42 it/sec) -training >> step=8588100, episode=1432 reward=0.7955221 (415.72 it/sec) -training >> step=8588200, episode=1432 reward=0.7888934 (487.81 it/sec) -training >> step=8588300, episode=1432 reward=0.803045 (521.90 it/sec) -training >> step=8588400, episode=1432 reward=0.8015532 (476.99 it/sec) -training >> step=8588500, episode=1432 reward=0.7943726 (478.99 it/sec) -training >> step=8588600, episode=1432 reward=0.77697 (477.41 it/sec) -training >> step=8588700, episode=1432 reward=0.7932725 (508.84 it/sec) -training >> step=8588800, episode=1432 reward=0.7869994 (499.80 it/sec) -training >> step=8588900, episode=1432 reward=0.7890505 (475.17 it/sec) -training >> step=8589000, episode=1432 reward=0.7893445 (511.17 it/sec) -training >> step=8589100, episode=1432 reward=0.7853274 (466.42 it/sec) -training >> step=8589200, episode=1432 reward=0.7946581 (481.57 it/sec) -training >> step=8589300, episode=1432 reward=0.7836653 (504.11 it/sec) -training >> step=8589400, episode=1432 reward=0.7879485 (508.43 it/sec) -training >> step=8589500, episode=1432 reward=0.7909918 (382.87 it/sec) -training >> step=8589600, episode=1432 reward=0.7705533 (437.14 it/sec) -training >> step=8589700, episode=1432 reward=0.7925535 (530.37 it/sec) -training >> step=8589800, episode=1432 reward=0.7767553 (483.56 it/sec) -training >> step=8589900, episode=1432 reward=0.7844526 (489.19 it/sec) -training >> step=8590000, episode=1432 reward=0.7932141 (497.19 it/sec) -training >> step=8590100, episode=1432 reward=0.7596358 (544.88 it/sec) -training >> step=8590200, episode=1432 reward=0.7850834 (520.06 it/sec) -training >> step=8590300, episode=1432 reward=0.7817651 (495.43 it/sec) -training >> step=8590400, episode=1432 reward=0.7839411 (469.82 it/sec) -training >> step=8590500, episode=1432 reward=0.784704 (464.22 it/sec) -training >> step=8590600, episode=1432 reward=0.7726925 (444.32 it/sec) -training >> step=8590700, episode=1432 reward=0.8036562 (477.77 it/sec) -training >> step=8590800, episode=1432 reward=0.7817581 (473.79 it/sec) -training >> step=8590900, episode=1432 reward=0.8130474 (460.21 it/sec) -training >> step=8591000, episode=1432 reward=0.783219 (458.87 it/sec) -training >> step=8591100, episode=1432 reward=0.7890129 (438.12 it/sec) -training >> step=8591200, episode=1432 reward=0.8088289 (483.78 it/sec) -training >> step=8591300, episode=1433 reward=0.7899497 (97.62 it/sec) -training >> step=8591400, episode=1433 reward=0.776009 (445.04 it/sec) -training >> step=8591500, episode=1433 reward=0.7847844 (463.66 it/sec) -training >> step=8591600, episode=1433 reward=0.7795336 (465.89 it/sec) -training >> step=8591700, episode=1433 reward=0.7877063 (474.36 it/sec) -training >> step=8591800, episode=1433 reward=0.79061 (483.04 it/sec) -training >> step=8591900, episode=1433 reward=0.7885455 (418.02 it/sec) -training >> step=8592000, episode=1433 reward=0.8135352 (488.59 it/sec) -training >> step=8592100, episode=1433 reward=0.794221 (442.74 it/sec) -training >> step=8592200, episode=1433 reward=0.7821171 (493.32 it/sec) -training >> step=8592300, episode=1433 reward=0.7964808 (494.55 it/sec) -training >> step=8592400, episode=1433 reward=0.813737 (477.76 it/sec) -training >> step=8592500, episode=1433 reward=0.7844682 (444.91 it/sec) -training >> step=8592600, episode=1433 reward=0.783478 (400.07 it/sec) -training >> step=8592700, episode=1433 reward=0.7886011 (455.44 it/sec) -training >> step=8592800, episode=1433 reward=0.7993289 (474.52 it/sec) -training >> step=8592900, episode=1433 reward=0.7895017 (457.81 it/sec) -training >> step=8593000, episode=1433 reward=0.7908407 (493.22 it/sec) -training >> step=8593100, episode=1433 reward=0.7778669 (457.51 it/sec) -training >> step=8593200, episode=1433 reward=0.7882812 (457.01 it/sec) -training >> step=8593300, episode=1433 reward=0.7842038 (456.40 it/sec) -training >> step=8593400, episode=1433 reward=0.7931117 (454.51 it/sec) -training >> step=8593500, episode=1433 reward=0.780529 (449.55 it/sec) -training >> step=8593600, episode=1433 reward=0.7940261 (460.54 it/sec) -training >> step=8593700, episode=1433 reward=0.7799377 (508.46 it/sec) -training >> step=8593800, episode=1433 reward=0.7792512 (446.19 it/sec) -training >> step=8593900, episode=1433 reward=0.7890146 (476.63 it/sec) -training >> step=8594000, episode=1433 reward=0.8171567 (458.97 it/sec) -training >> step=8594100, episode=1433 reward=0.7832645 (494.13 it/sec) -training >> step=8594200, episode=1433 reward=0.801872 (463.27 it/sec) -training >> step=8594300, episode=1433 reward=0.7922145 (474.35 it/sec) -training >> step=8594400, episode=1433 reward=0.8018706 (469.72 it/sec) -training >> step=8594500, episode=1433 reward=0.823038 (492.48 it/sec) -training >> step=8594600, episode=1433 reward=0.8169405 (440.35 it/sec) -training >> step=8594700, episode=1433 reward=0.7872974 (430.06 it/sec) -training >> step=8594800, episode=1433 reward=0.7972641 (473.36 it/sec) -training >> step=8594900, episode=1433 reward=0.7848051 (479.78 it/sec) -training >> step=8595000, episode=1433 reward=0.7859903 (501.69 it/sec) -training >> step=8595100, episode=1433 reward=0.7817217 (461.74 it/sec) -training >> step=8595200, episode=1433 reward=0.8173749 (442.30 it/sec) -training >> step=8595300, episode=1433 reward=0.765604 (456.49 it/sec) -training >> step=8595400, episode=1433 reward=0.787918 (452.16 it/sec) -training >> step=8595500, episode=1433 reward=0.7935659 (463.22 it/sec) -training >> step=8595600, episode=1433 reward=0.7800763 (520.18 it/sec) -training >> step=8595700, episode=1433 reward=0.7937759 (324.35 it/sec) -training >> step=8595800, episode=1433 reward=0.7750798 (466.65 it/sec) -training >> step=8595900, episode=1433 reward=0.7769487 (464.22 it/sec) -training >> step=8596000, episode=1433 reward=0.797721 (460.64 it/sec) -training >> step=8596100, episode=1433 reward=0.7845808 (492.47 it/sec) -training >> step=8596200, episode=1433 reward=0.7911493 (427.28 it/sec) -training >> step=8596300, episode=1433 reward=0.7878214 (435.05 it/sec) -training >> step=8596400, episode=1433 reward=0.7864616 (495.70 it/sec) -training >> step=8596500, episode=1433 reward=0.7892377 (458.48 it/sec) -training >> step=8596600, episode=1433 reward=0.7708502 (502.87 it/sec) -training >> step=8596700, episode=1433 reward=0.7890578 (470.51 it/sec) -training >> step=8596800, episode=1433 reward=0.7707626 (450.68 it/sec) -training >> step=8596900, episode=1433 reward=0.806379 (462.05 it/sec) -training >> step=8597000, episode=1433 reward=0.7943704 (485.66 it/sec) -training >> step=8597100, episode=1433 reward=0.7823659 (469.17 it/sec) -training >> step=8597200, episode=1433 reward=0.7943445 (489.97 it/sec) -training >> step=8597300, episode=1434 reward=0.7993705 (93.93 it/sec) -training >> step=8597400, episode=1434 reward=0.7734159 (457.71 it/sec) -training >> step=8597500, episode=1434 reward=0.7778788 (421.61 it/sec) -training >> step=8597600, episode=1434 reward=0.7662772 (458.46 it/sec) -training >> step=8597700, episode=1434 reward=0.7908127 (480.78 it/sec) -training >> step=8597800, episode=1434 reward=0.7959307 (465.69 it/sec) -training >> step=8597900, episode=1434 reward=0.7945301 (477.36 it/sec) -training >> step=8598000, episode=1434 reward=0.7935115 (459.51 it/sec) -training >> step=8598100, episode=1434 reward=0.7863243 (481.72 it/sec) -training >> step=8598200, episode=1434 reward=0.7830863 (472.14 it/sec) -training >> step=8598300, episode=1434 reward=0.7939625 (500.75 it/sec) -training >> step=8598400, episode=1434 reward=0.7841264 (461.93 it/sec) -training >> step=8598500, episode=1434 reward=0.8040258 (442.54 it/sec) -training >> step=8598600, episode=1434 reward=0.7994102 (459.78 it/sec) -training >> step=8598700, episode=1434 reward=0.7847002 (492.20 it/sec) -training >> step=8598800, episode=1434 reward=0.7794335 (499.58 it/sec) -training >> step=8598900, episode=1434 reward=0.7861434 (466.22 it/sec) -training >> step=8599000, episode=1434 reward=0.8065899 (460.74 it/sec) -training >> step=8599100, episode=1434 reward=0.7853673 (469.81 it/sec) -training >> step=8599200, episode=1434 reward=0.8007815 (458.80 it/sec) -training >> step=8599300, episode=1434 reward=0.7911082 (464.11 it/sec) -training >> step=8599400, episode=1434 reward=0.7932183 (504.07 it/sec) -training >> step=8599500, episode=1434 reward=0.7847895 (454.25 it/sec) -training >> step=8599600, episode=1434 reward=0.7729802 (463.42 it/sec) -training >> step=8599700, episode=1434 reward=0.7967717 (499.94 it/sec) -training >> step=8599800, episode=1434 reward=0.785723 (470.54 it/sec) -training >> step=8599900, episode=1434 reward=0.7966629 (445.98 it/sec) -training >> step=8600000, episode=1434 reward=0.7768301 (463.29 it/sec) -training >> step=8600100, episode=1434 reward=0.7730962 (474.98 it/sec) -training >> step=8600200, episode=1434 reward=0.7960095 (478.42 it/sec) -training >> step=8600300, episode=1434 reward=0.7909384 (443.10 it/sec) -training >> step=8600400, episode=1434 reward=0.794666 (427.58 it/sec) -training >> step=8600500, episode=1434 reward=0.8095008 (477.47 it/sec) -training >> step=8600600, episode=1434 reward=0.7791524 (447.87 it/sec) -training >> step=8600700, episode=1434 reward=0.7947684 (459.88 it/sec) -training >> step=8600800, episode=1434 reward=0.7974246 (452.58 it/sec) -training >> step=8600900, episode=1434 reward=0.7763221 (416.29 it/sec) -training >> step=8601000, episode=1434 reward=0.7910016 (455.67 it/sec) -training >> step=8601100, episode=1434 reward=0.8225953 (473.01 it/sec) -training >> step=8601200, episode=1434 reward=0.7992928 (434.37 it/sec) -training >> step=8601300, episode=1434 reward=0.7879056 (401.37 it/sec) -training >> step=8601400, episode=1434 reward=0.7903439 (417.66 it/sec) -training >> step=8601500, episode=1434 reward=0.7948134 (455.30 it/sec) -training >> step=8601600, episode=1434 reward=0.7885184 (496.43 it/sec) -training >> step=8601700, episode=1434 reward=0.7903745 (454.70 it/sec) -training >> step=8601800, episode=1434 reward=0.805503 (472.85 it/sec) -training >> step=8601900, episode=1434 reward=0.7814711 (480.34 it/sec) -training >> step=8602000, episode=1434 reward=0.8050422 (462.92 it/sec) -training >> step=8602100, episode=1434 reward=0.7638599 (312.15 it/sec) -training >> step=8602200, episode=1434 reward=0.7758538 (469.46 it/sec) -training >> step=8602300, episode=1434 reward=0.8002346 (510.29 it/sec) -training >> step=8602400, episode=1434 reward=0.785315 (460.99 it/sec) -training >> step=8602500, episode=1434 reward=0.815508 (479.59 it/sec) -training >> step=8602600, episode=1434 reward=0.7752996 (466.56 it/sec) -training >> step=8602700, episode=1434 reward=0.7869943 (469.96 it/sec) -training >> step=8602800, episode=1434 reward=0.7987924 (469.58 it/sec) -training >> step=8602900, episode=1434 reward=0.7846994 (506.13 it/sec) -training >> step=8603000, episode=1434 reward=0.8197272 (512.08 it/sec) -training >> step=8603100, episode=1434 reward=0.7863019 (455.12 it/sec) -training >> step=8603200, episode=1434 reward=0.7786237 (468.75 it/sec) -training >> step=8603300, episode=1435 reward=0.8058716 (93.13 it/sec) -training >> step=8603400, episode=1435 reward=0.8002145 (282.90 it/sec) -training >> step=8603500, episode=1435 reward=0.7980067 (462.67 it/sec) -training >> step=8603600, episode=1435 reward=0.7880759 (401.50 it/sec) -training >> step=8603700, episode=1435 reward=0.8058552 (391.00 it/sec) -training >> step=8603800, episode=1435 reward=0.7798719 (456.80 it/sec) -training >> step=8603900, episode=1435 reward=0.7871909 (480.61 it/sec) -training >> step=8604000, episode=1435 reward=0.7986383 (465.11 it/sec) -training >> step=8604100, episode=1435 reward=0.7775605 (487.77 it/sec) -training >> step=8604200, episode=1435 reward=0.7879612 (492.80 it/sec) -training >> step=8604300, episode=1435 reward=0.7833688 (442.02 it/sec) -training >> step=8604400, episode=1435 reward=0.7874067 (462.34 it/sec) -training >> step=8604500, episode=1435 reward=0.7980181 (479.76 it/sec) -training >> step=8604600, episode=1435 reward=0.7840276 (415.09 it/sec) -training >> step=8604700, episode=1435 reward=0.7838876 (503.09 it/sec) -training >> step=8604800, episode=1435 reward=0.7920198 (439.17 it/sec) -training >> step=8604900, episode=1435 reward=0.7954066 (508.08 it/sec) -training >> step=8605000, episode=1435 reward=0.7824621 (449.96 it/sec) -training >> step=8605100, episode=1435 reward=0.7788959 (470.39 it/sec) -training >> step=8605200, episode=1435 reward=0.7794574 (511.90 it/sec) -training >> step=8605300, episode=1435 reward=0.7944996 (500.48 it/sec) -training >> step=8605400, episode=1435 reward=0.7908979 (508.21 it/sec) -training >> step=8605500, episode=1435 reward=0.775262 (454.41 it/sec) -training >> step=8605600, episode=1435 reward=0.7910932 (421.83 it/sec) -training >> step=8605700, episode=1435 reward=0.7917805 (466.59 it/sec) -training >> step=8605800, episode=1435 reward=0.7790208 (519.48 it/sec) -training >> step=8605900, episode=1435 reward=0.8058309 (470.42 it/sec) -training >> step=8606000, episode=1435 reward=0.8008091 (453.32 it/sec) -training >> step=8606100, episode=1435 reward=0.7791806 (488.14 it/sec) -training >> step=8606200, episode=1435 reward=0.7680836 (442.49 it/sec) -training >> step=8606300, episode=1435 reward=0.7891443 (460.47 it/sec) -training >> step=8606400, episode=1435 reward=0.7765049 (466.33 it/sec) -training >> step=8606500, episode=1435 reward=0.7996134 (450.00 it/sec) -training >> step=8606600, episode=1435 reward=0.7890483 (450.54 it/sec) -training >> step=8606700, episode=1435 reward=0.7975355 (471.90 it/sec) -training >> step=8606800, episode=1435 reward=0.803787 (499.87 it/sec) -training >> step=8606900, episode=1435 reward=0.7854152 (494.77 it/sec) -training >> step=8607000, episode=1435 reward=0.7895488 (417.55 it/sec) -training >> step=8607100, episode=1435 reward=0.7858467 (458.39 it/sec) -training >> step=8607200, episode=1435 reward=0.7730942 (472.88 it/sec) -training >> step=8607300, episode=1435 reward=0.789566 (479.02 it/sec) -training >> step=8607400, episode=1435 reward=0.7900044 (486.78 it/sec) -training >> step=8607500, episode=1435 reward=0.7925012 (479.18 it/sec) -training >> step=8607600, episode=1435 reward=0.7785802 (441.74 it/sec) -training >> step=8607700, episode=1435 reward=0.7658992 (452.38 it/sec) -training >> step=8607800, episode=1435 reward=0.7910437 (474.14 it/sec) -training >> step=8607900, episode=1435 reward=0.8057637 (466.95 it/sec) -training >> step=8608000, episode=1435 reward=0.799358 (439.82 it/sec) -training >> step=8608100, episode=1435 reward=0.7924728 (483.74 it/sec) -training >> step=8608200, episode=1435 reward=0.8024111 (490.77 it/sec) -training >> step=8608300, episode=1435 reward=0.7950001 (464.30 it/sec) -training >> step=8608400, episode=1435 reward=0.7768152 (335.62 it/sec) -training >> step=8608500, episode=1435 reward=0.7968687 (477.64 it/sec) -training >> step=8608600, episode=1435 reward=0.8011049 (460.89 it/sec) -training >> step=8608700, episode=1435 reward=0.7763503 (518.24 it/sec) -training >> step=8608800, episode=1435 reward=0.7924455 (474.95 it/sec) -training >> step=8608900, episode=1435 reward=0.7899987 (450.17 it/sec) -training >> step=8609000, episode=1435 reward=0.7714618 (496.06 it/sec) -training >> step=8609100, episode=1435 reward=0.8218908 (473.06 it/sec) -training >> step=8609200, episode=1435 reward=0.8090206 (514.68 it/sec) -training >> step=8609300, episode=1436 reward=0.7794288 (74.26 it/sec) -training >> step=8609400, episode=1436 reward=0.7831751 (443.64 it/sec) -training >> step=8609500, episode=1436 reward=0.7806551 (501.10 it/sec) -training >> step=8609600, episode=1436 reward=0.8047045 (488.83 it/sec) -training >> step=8609700, episode=1436 reward=0.7747673 (470.21 it/sec) -training >> step=8609800, episode=1436 reward=0.8049806 (499.66 it/sec) -training >> step=8609900, episode=1436 reward=0.7774242 (459.24 it/sec) -training >> step=8610000, episode=1436 reward=0.7793946 (482.21 it/sec) -training >> step=8610100, episode=1436 reward=0.809708 (502.90 it/sec) -training >> step=8610200, episode=1436 reward=0.7984565 (480.72 it/sec) -training >> step=8610300, episode=1436 reward=0.8037829 (488.52 it/sec) -training >> step=8610400, episode=1436 reward=0.7808166 (465.11 it/sec) -training >> step=8610500, episode=1436 reward=0.7807786 (443.40 it/sec) -training >> step=8610600, episode=1436 reward=0.7791709 (460.37 it/sec) -training >> step=8610700, episode=1436 reward=0.7770509 (517.80 it/sec) -training >> step=8610800, episode=1436 reward=0.7784588 (410.75 it/sec) -training >> step=8610900, episode=1436 reward=0.780098 (463.35 it/sec) -training >> step=8611000, episode=1436 reward=0.7998167 (504.98 it/sec) -training >> step=8611100, episode=1436 reward=0.7971115 (488.65 it/sec) -training >> step=8611200, episode=1436 reward=0.7974703 (454.14 it/sec) -training >> step=8611300, episode=1436 reward=0.7905074 (425.34 it/sec) -training >> step=8611400, episode=1436 reward=0.7911037 (469.17 it/sec) -training >> step=8611500, episode=1436 reward=0.8047141 (479.29 it/sec) -training >> step=8611600, episode=1436 reward=0.7931504 (456.69 it/sec) -training >> step=8611700, episode=1436 reward=0.8154811 (406.77 it/sec) -training >> step=8611800, episode=1436 reward=0.8158633 (480.15 it/sec) -training >> step=8611900, episode=1436 reward=0.7672149 (496.95 it/sec) -training >> step=8612000, episode=1436 reward=0.7769939 (451.00 it/sec) -training >> step=8612100, episode=1436 reward=0.7910199 (420.79 it/sec) -training >> step=8612200, episode=1436 reward=0.8000253 (433.29 it/sec) -training >> step=8612300, episode=1436 reward=0.7873724 (470.01 it/sec) -training >> step=8612400, episode=1436 reward=0.788689 (457.24 it/sec) -training >> step=8612500, episode=1436 reward=0.7942353 (473.70 it/sec) -training >> step=8612600, episode=1436 reward=0.7756548 (468.06 it/sec) -training >> step=8612700, episode=1436 reward=0.7957204 (455.26 it/sec) -training >> step=8612800, episode=1436 reward=0.7672098 (489.90 it/sec) -training >> step=8612900, episode=1436 reward=0.7853703 (460.23 it/sec) -training >> step=8613000, episode=1436 reward=0.7965015 (482.82 it/sec) -training >> step=8613100, episode=1436 reward=0.7987268 (452.37 it/sec) -training >> step=8613200, episode=1436 reward=0.7868753 (464.57 it/sec) -training >> step=8613300, episode=1436 reward=0.8068983 (462.31 it/sec) -training >> step=8613400, episode=1436 reward=0.7999024 (447.45 it/sec) -training >> step=8613500, episode=1436 reward=0.8129126 (431.60 it/sec) -training >> step=8613600, episode=1436 reward=0.7693188 (420.60 it/sec) -training >> step=8613700, episode=1436 reward=0.7696922 (466.43 it/sec) -training >> step=8613800, episode=1436 reward=0.7942018 (474.28 it/sec) -training >> step=8613900, episode=1436 reward=0.7907315 (433.08 it/sec) -training >> step=8614000, episode=1436 reward=0.8007765 (440.94 it/sec) -training >> step=8614100, episode=1436 reward=0.7928633 (391.97 it/sec) -training >> step=8614200, episode=1436 reward=0.7961272 (397.90 it/sec) -training >> step=8614300, episode=1436 reward=0.7735657 (413.87 it/sec) -training >> step=8614400, episode=1436 reward=0.801054 (413.95 it/sec) -training >> step=8614500, episode=1436 reward=0.8074576 (432.97 it/sec) -training >> step=8614600, episode=1436 reward=0.7964705 (336.47 it/sec) -training >> step=8614700, episode=1436 reward=0.7932246 (520.93 it/sec) -training >> step=8614800, episode=1436 reward=0.7861252 (470.64 it/sec) -training >> step=8614900, episode=1436 reward=0.7815895 (433.92 it/sec) -training >> step=8615000, episode=1436 reward=0.7928229 (436.88 it/sec) -training >> step=8615100, episode=1436 reward=0.7974579 (472.64 it/sec) -training >> step=8615200, episode=1436 reward=0.798648 (455.57 it/sec) -training >> step=8615300, episode=1437 reward=0.7947656 (92.56 it/sec) -training >> step=8615400, episode=1437 reward=0.7836977 (479.40 it/sec) -training >> step=8615500, episode=1437 reward=0.7928632 (450.52 it/sec) -training >> step=8615600, episode=1437 reward=0.7613762 (431.23 it/sec) -training >> step=8615700, episode=1437 reward=0.7817452 (556.40 it/sec) -training >> step=8615800, episode=1437 reward=0.784145 (535.82 it/sec) -training >> step=8615900, episode=1437 reward=0.7949033 (553.88 it/sec) -training >> step=8616000, episode=1437 reward=0.8082398 (516.98 it/sec) -training >> step=8616100, episode=1437 reward=0.7870868 (510.53 it/sec) -training >> step=8616200, episode=1437 reward=0.7853984 (522.71 it/sec) -training >> step=8616300, episode=1437 reward=0.8073754 (509.07 it/sec) -training >> step=8616400, episode=1437 reward=0.7984682 (564.50 it/sec) -training >> step=8616500, episode=1437 reward=0.7962087 (495.67 it/sec) -training >> step=8616600, episode=1437 reward=0.7814496 (478.91 it/sec) -training >> step=8616700, episode=1437 reward=0.7852496 (533.72 it/sec) -training >> step=8616800, episode=1437 reward=0.7813845 (471.92 it/sec) -training >> step=8616900, episode=1437 reward=0.7866573 (487.01 it/sec) -training >> step=8617000, episode=1437 reward=0.7960454 (459.18 it/sec) -training >> step=8617100, episode=1437 reward=0.793811 (514.28 it/sec) -training >> step=8617200, episode=1437 reward=0.7862037 (521.54 it/sec) -training >> step=8617300, episode=1437 reward=0.7767508 (509.34 it/sec) -training >> step=8617400, episode=1437 reward=0.7935072 (548.49 it/sec) -training >> step=8617500, episode=1437 reward=0.7894129 (520.59 it/sec) -training >> step=8617600, episode=1437 reward=0.8017886 (496.28 it/sec) -training >> step=8617700, episode=1437 reward=0.776047 (512.40 it/sec) -training >> step=8617800, episode=1437 reward=0.7727531 (536.74 it/sec) -training >> step=8617900, episode=1437 reward=0.8106661 (523.35 it/sec) -training >> step=8618000, episode=1437 reward=0.776193 (510.82 it/sec) -training >> step=8618100, episode=1437 reward=0.7713539 (508.20 it/sec) -training >> step=8618200, episode=1437 reward=0.8021304 (480.94 it/sec) -training >> step=8618300, episode=1437 reward=0.798751 (481.64 it/sec) -training >> step=8618400, episode=1437 reward=0.7783501 (536.18 it/sec) -training >> step=8618500, episode=1437 reward=0.7926089 (548.90 it/sec) -training >> step=8618600, episode=1437 reward=0.787948 (521.11 it/sec) -training >> step=8618700, episode=1437 reward=0.8138257 (512.01 it/sec) -training >> step=8618800, episode=1437 reward=0.7953054 (528.13 it/sec) -training >> step=8618900, episode=1437 reward=0.7809768 (524.60 it/sec) -training >> step=8619000, episode=1437 reward=0.7875487 (540.56 it/sec) -training >> step=8619100, episode=1437 reward=0.7959104 (519.77 it/sec) -training >> step=8619200, episode=1437 reward=0.7899486 (512.94 it/sec) -training >> step=8619300, episode=1437 reward=0.7860417 (525.90 it/sec) -training >> step=8619400, episode=1437 reward=0.7896966 (503.77 it/sec) -training >> step=8619500, episode=1437 reward=0.7939212 (555.34 it/sec) -training >> step=8619600, episode=1437 reward=0.7810575 (509.55 it/sec) -training >> step=8619700, episode=1437 reward=0.7753832 (463.64 it/sec) -training >> step=8619800, episode=1437 reward=0.8013381 (515.16 it/sec) -training >> step=8619900, episode=1437 reward=0.8028242 (546.99 it/sec) -training >> step=8620000, episode=1437 reward=0.7874229 (510.23 it/sec) -training >> step=8620100, episode=1437 reward=0.7970099 (551.77 it/sec) -training >> step=8620200, episode=1437 reward=0.7963431 (503.96 it/sec) -training >> step=8620300, episode=1437 reward=0.7781201 (536.25 it/sec) -training >> step=8620400, episode=1437 reward=0.8018795 (538.08 it/sec) -training >> step=8620500, episode=1437 reward=0.795541 (534.57 it/sec) -training >> step=8620600, episode=1437 reward=0.8053992 (576.97 it/sec) -training >> step=8620700, episode=1437 reward=0.7949772 (393.62 it/sec) -training >> step=8620800, episode=1437 reward=0.7983482 (526.03 it/sec) -training >> step=8620900, episode=1437 reward=0.7962306 (510.21 it/sec) -training >> step=8621000, episode=1437 reward=0.7880587 (535.83 it/sec) -training >> step=8621100, episode=1437 reward=0.7760235 (496.62 it/sec) -training >> step=8621200, episode=1437 reward=0.7905864 (529.98 it/sec) -training >> step=8621300, episode=1438 reward=0.7988623 (130.63 it/sec) -training >> step=8621400, episode=1438 reward=0.7864192 (532.98 it/sec) -training >> step=8621500, episode=1438 reward=0.77597 (533.57 it/sec) -training >> step=8621600, episode=1438 reward=0.7935658 (497.05 it/sec) -training >> step=8621700, episode=1438 reward=0.79811 (514.52 it/sec) -training >> step=8621800, episode=1438 reward=0.7913694 (540.16 it/sec) -training >> step=8621900, episode=1438 reward=0.8099753 (515.63 it/sec) -training >> step=8622000, episode=1438 reward=0.7866375 (539.10 it/sec) -training >> step=8622100, episode=1438 reward=0.7886022 (522.68 it/sec) -training >> step=8622200, episode=1438 reward=0.7748501 (506.43 it/sec) -training >> step=8622300, episode=1438 reward=0.7960816 (538.07 it/sec) -training >> step=8622400, episode=1438 reward=0.8039106 (536.15 it/sec) -training >> step=8622500, episode=1438 reward=0.7949257 (521.67 it/sec) -training >> step=8622600, episode=1438 reward=0.7901518 (517.59 it/sec) -training >> step=8622700, episode=1438 reward=0.7902565 (487.45 it/sec) -training >> step=8622800, episode=1438 reward=0.8082448 (482.46 it/sec) -training >> step=8622900, episode=1438 reward=0.7824669 (514.35 it/sec) -training >> step=8623000, episode=1438 reward=0.7830558 (521.97 it/sec) -training >> step=8623100, episode=1438 reward=0.7893651 (503.30 it/sec) -training >> step=8623200, episode=1438 reward=0.770929 (528.98 it/sec) -training >> step=8623300, episode=1438 reward=0.8075769 (532.79 it/sec) -training >> step=8623400, episode=1438 reward=0.7810647 (516.43 it/sec) -training >> step=8623500, episode=1438 reward=0.7933356 (556.06 it/sec) -training >> step=8623600, episode=1438 reward=0.7556925 (543.82 it/sec) -training >> step=8623700, episode=1438 reward=0.791856 (499.14 it/sec) -training >> step=8623800, episode=1438 reward=0.7981787 (524.18 it/sec) -training >> step=8623900, episode=1438 reward=0.7901243 (550.31 it/sec) -training >> step=8624000, episode=1438 reward=0.7806109 (490.32 it/sec) -training >> step=8624100, episode=1438 reward=0.7719882 (479.03 it/sec) -training >> step=8624200, episode=1438 reward=0.803501 (519.63 it/sec) -training >> step=8624300, episode=1438 reward=0.8027016 (474.35 it/sec) -training >> step=8624400, episode=1438 reward=0.7959929 (514.12 it/sec) -training >> step=8624500, episode=1438 reward=0.8088818 (499.16 it/sec) -training >> step=8624600, episode=1438 reward=0.7930949 (500.47 it/sec) -training >> step=8624700, episode=1438 reward=0.7929739 (481.49 it/sec) -training >> step=8624800, episode=1438 reward=0.7974607 (533.34 it/sec) -training >> step=8624900, episode=1438 reward=0.8049805 (501.67 it/sec) -training >> step=8625000, episode=1438 reward=0.8061429 (565.92 it/sec) -training >> step=8625100, episode=1438 reward=0.7862757 (535.23 it/sec) -training >> step=8625200, episode=1438 reward=0.7854252 (503.97 it/sec) -training >> step=8625300, episode=1438 reward=0.7937666 (494.83 it/sec) -training >> step=8625400, episode=1438 reward=0.7805966 (550.58 it/sec) -training >> step=8625500, episode=1438 reward=0.7960224 (543.95 it/sec) -training >> step=8625600, episode=1438 reward=0.8001704 (519.34 it/sec) -training >> step=8625700, episode=1438 reward=0.7711898 (566.33 it/sec) -training >> step=8625800, episode=1438 reward=0.7886173 (503.38 it/sec) -training >> step=8625900, episode=1438 reward=0.790441 (439.41 it/sec) -training >> step=8626000, episode=1438 reward=0.7841699 (516.63 it/sec) -training >> step=8626100, episode=1438 reward=0.7967786 (492.17 it/sec) -training >> step=8626200, episode=1438 reward=0.7863915 (474.42 it/sec) -training >> step=8626300, episode=1438 reward=0.7814173 (484.39 it/sec) -training >> step=8626400, episode=1438 reward=0.7829258 (478.83 it/sec) -training >> step=8626500, episode=1438 reward=0.7739722 (520.48 it/sec) -training >> step=8626600, episode=1438 reward=0.7978408 (514.92 it/sec) -training >> step=8626700, episode=1438 reward=0.7779661 (511.82 it/sec) -training >> step=8626800, episode=1438 reward=0.7867808 (449.63 it/sec) -training >> step=8626900, episode=1438 reward=0.7901158 (348.71 it/sec) -training >> step=8627000, episode=1438 reward=0.7749822 (514.96 it/sec) -training >> step=8627100, episode=1438 reward=0.7757586 (485.69 it/sec) -training >> step=8627200, episode=1438 reward=0.768219 (498.69 it/sec) -training >> step=8627300, episode=1439 reward=0.7963343 (85.50 it/sec) -training >> step=8627400, episode=1439 reward=0.7991417 (469.86 it/sec) -training >> step=8627500, episode=1439 reward=0.7849826 (457.87 it/sec) -training >> step=8627600, episode=1439 reward=0.7840846 (485.99 it/sec) -training >> step=8627700, episode=1439 reward=0.810237 (495.09 it/sec) -training >> step=8627800, episode=1439 reward=0.7899532 (462.05 it/sec) -training >> step=8627900, episode=1439 reward=0.7861614 (493.47 it/sec) -training >> step=8628000, episode=1439 reward=0.7976682 (418.96 it/sec) -training >> step=8628100, episode=1439 reward=0.7858509 (498.79 it/sec) -training >> step=8628200, episode=1439 reward=0.793936 (485.90 it/sec) -training >> step=8628300, episode=1439 reward=0.7931772 (452.27 it/sec) -training >> step=8628400, episode=1439 reward=0.7762133 (457.14 it/sec) -training >> step=8628500, episode=1439 reward=0.7976261 (457.30 it/sec) -training >> step=8628600, episode=1439 reward=0.7912248 (500.37 it/sec) -training >> step=8628700, episode=1439 reward=0.7926179 (412.11 it/sec) -training >> step=8628800, episode=1439 reward=0.7965537 (418.42 it/sec) -training >> step=8628900, episode=1439 reward=0.7998094 (456.76 it/sec) -training >> step=8629000, episode=1439 reward=0.8026153 (423.61 it/sec) -training >> step=8629100, episode=1439 reward=0.7953507 (341.21 it/sec) -training >> step=8629200, episode=1439 reward=0.7712651 (415.18 it/sec) -training >> step=8629300, episode=1439 reward=0.7899522 (414.68 it/sec) -training >> step=8629400, episode=1439 reward=0.8002609 (464.32 it/sec) -training >> step=8629500, episode=1439 reward=0.7725971 (472.51 it/sec) -training >> step=8629600, episode=1439 reward=0.802845 (454.60 it/sec) -training >> step=8629700, episode=1439 reward=0.7854073 (426.05 it/sec) -training >> step=8629800, episode=1439 reward=0.790437 (450.92 it/sec) -training >> step=8629900, episode=1439 reward=0.7947212 (438.49 it/sec) -training >> step=8630000, episode=1439 reward=0.797365 (424.38 it/sec) -training >> step=8630100, episode=1439 reward=0.8117619 (421.40 it/sec) -training >> step=8630200, episode=1439 reward=0.8073694 (421.13 it/sec) -training >> step=8630300, episode=1439 reward=0.7948791 (462.04 it/sec) -training >> step=8630400, episode=1439 reward=0.7941777 (455.10 it/sec) -training >> step=8630500, episode=1439 reward=0.7992791 (451.63 it/sec) -training >> step=8630600, episode=1439 reward=0.7965016 (439.92 it/sec) -training >> step=8630700, episode=1439 reward=0.77075 (430.74 it/sec) -training >> step=8630800, episode=1439 reward=0.7841384 (455.87 it/sec) -training >> step=8630900, episode=1439 reward=0.7881397 (456.22 it/sec) -training >> step=8631000, episode=1439 reward=0.8087021 (458.40 it/sec) -training >> step=8631100, episode=1439 reward=0.812652 (460.47 it/sec) -training >> step=8631200, episode=1439 reward=0.7781115 (484.90 it/sec) -training >> step=8631300, episode=1439 reward=0.7754508 (455.25 it/sec) -training >> step=8631400, episode=1439 reward=0.7984975 (421.09 it/sec) -training >> step=8631500, episode=1439 reward=0.795 (457.36 it/sec) -training >> step=8631600, episode=1439 reward=0.8038877 (472.58 it/sec) -training >> step=8631700, episode=1439 reward=0.8078428 (436.08 it/sec) -training >> step=8631800, episode=1439 reward=0.7894783 (441.77 it/sec) -training >> step=8631900, episode=1439 reward=0.81448 (457.28 it/sec) -training >> step=8632000, episode=1439 reward=0.7891456 (419.27 it/sec) -training >> step=8632100, episode=1439 reward=0.7854555 (452.49 it/sec) -training >> step=8632200, episode=1439 reward=0.7903361 (492.56 it/sec) -training >> step=8632300, episode=1439 reward=0.772669 (475.77 it/sec) -training >> step=8632400, episode=1439 reward=0.8043708 (433.13 it/sec) -training >> step=8632500, episode=1439 reward=0.8029701 (441.62 it/sec) -training >> step=8632600, episode=1439 reward=0.7880423 (449.95 it/sec) -training >> step=8632700, episode=1439 reward=0.7945111 (453.80 it/sec) -training >> step=8632800, episode=1439 reward=0.7816316 (441.44 it/sec) -training >> step=8632900, episode=1439 reward=0.8008728 (465.68 it/sec) -training >> step=8633000, episode=1439 reward=0.8031881 (343.45 it/sec) -training >> step=8633100, episode=1439 reward=0.7955948 (434.34 it/sec) -training >> step=8633200, episode=1439 reward=0.7727036 (477.84 it/sec) -training >> step=8633300, episode=1440 reward=0.8018051 (84.70 it/sec) -training >> step=8633400, episode=1440 reward=0.7971049 (464.91 it/sec) -training >> step=8633500, episode=1440 reward=0.7877496 (424.90 it/sec) -training >> step=8633600, episode=1440 reward=0.78847 (461.38 it/sec) -training >> step=8633700, episode=1440 reward=0.7876298 (457.91 it/sec) -training >> step=8633800, episode=1440 reward=0.8045256 (418.66 it/sec) -training >> step=8633900, episode=1440 reward=0.789212 (455.92 it/sec) -training >> step=8634000, episode=1440 reward=0.7777343 (451.79 it/sec) -training >> step=8634100, episode=1440 reward=0.7822744 (456.92 it/sec) -training >> step=8634200, episode=1440 reward=0.7911838 (454.58 it/sec) -training >> step=8634300, episode=1440 reward=0.7931482 (449.84 it/sec) -training >> step=8634400, episode=1440 reward=0.787369 (469.95 it/sec) -training >> step=8634500, episode=1440 reward=0.7761322 (478.67 it/sec) -training >> step=8634600, episode=1440 reward=0.7996364 (486.01 it/sec) -training >> step=8634700, episode=1440 reward=0.7831127 (454.49 it/sec) -training >> step=8634800, episode=1440 reward=0.7921137 (460.26 it/sec) -training >> step=8634900, episode=1440 reward=0.7868514 (467.16 it/sec) -training >> step=8635000, episode=1440 reward=0.7904342 (428.39 it/sec) -training >> step=8635100, episode=1440 reward=0.7947168 (446.88 it/sec) -training >> step=8635200, episode=1440 reward=0.7904115 (445.01 it/sec) -training >> step=8635300, episode=1440 reward=0.7983692 (461.62 it/sec) -training >> step=8635400, episode=1440 reward=0.7795529 (479.74 it/sec) -training >> step=8635500, episode=1440 reward=0.79906 (454.19 it/sec) -training >> step=8635600, episode=1440 reward=0.7939038 (444.96 it/sec) -training >> step=8635700, episode=1440 reward=0.8050672 (415.27 it/sec) -training >> step=8635800, episode=1440 reward=0.797036 (451.58 it/sec) -training >> step=8635900, episode=1440 reward=0.7777706 (488.01 it/sec) -training >> step=8636000, episode=1440 reward=0.7805057 (488.56 it/sec) -training >> step=8636100, episode=1440 reward=0.8070413 (476.19 it/sec) -training >> step=8636200, episode=1440 reward=0.7896678 (436.64 it/sec) -training >> step=8636300, episode=1440 reward=0.7666706 (479.96 it/sec) -training >> step=8636400, episode=1440 reward=0.811206 (448.27 it/sec) -training >> step=8636500, episode=1440 reward=0.7918622 (471.08 it/sec) -training >> step=8636600, episode=1440 reward=0.7903705 (471.66 it/sec) -training >> step=8636700, episode=1440 reward=0.7919062 (401.66 it/sec) -training >> step=8636800, episode=1440 reward=0.7855189 (533.07 it/sec) -training >> step=8636900, episode=1440 reward=0.7999722 (444.82 it/sec) -training >> step=8637000, episode=1440 reward=0.8027622 (493.38 it/sec) -training >> step=8637100, episode=1440 reward=0.8072342 (461.73 it/sec) -training >> step=8637200, episode=1440 reward=0.7902126 (482.27 it/sec) -training >> step=8637300, episode=1440 reward=0.7913621 (477.16 it/sec) -training >> step=8637400, episode=1440 reward=0.8001151 (472.87 it/sec) -training >> step=8637500, episode=1440 reward=0.8020155 (486.07 it/sec) -training >> step=8637600, episode=1440 reward=0.79804 (449.42 it/sec) -training >> step=8637700, episode=1440 reward=0.797722 (442.58 it/sec) -training >> step=8637800, episode=1440 reward=0.7905166 (461.91 it/sec) -training >> step=8637900, episode=1440 reward=0.7910005 (522.96 it/sec) -training >> step=8638000, episode=1440 reward=0.7983352 (492.22 it/sec) -training >> step=8638100, episode=1440 reward=0.7748339 (466.39 it/sec) -training >> step=8638200, episode=1440 reward=0.7878025 (494.75 it/sec) -training >> step=8638300, episode=1440 reward=0.7880123 (529.23 it/sec) -training >> step=8638400, episode=1440 reward=0.8110275 (498.77 it/sec) -training >> step=8638500, episode=1440 reward=0.7884707 (483.76 it/sec) -training >> step=8638600, episode=1440 reward=0.792097 (485.16 it/sec) -training >> step=8638700, episode=1440 reward=0.7850455 (463.20 it/sec) -training >> step=8638800, episode=1440 reward=0.7798446 (516.86 it/sec) -training >> step=8638900, episode=1440 reward=0.7849647 (499.69 it/sec) -training >> step=8639000, episode=1440 reward=0.7914328 (514.62 it/sec) -training >> step=8639100, episode=1440 reward=0.8205692 (338.70 it/sec) -training >> step=8639200, episode=1440 reward=0.7818485 (473.00 it/sec) -training >> step=8639300, episode=1441 reward=0.7853146 (75.74 it/sec) -training >> step=8639400, episode=1441 reward=0.7947733 (481.29 it/sec) -training >> step=8639500, episode=1441 reward=0.7664701 (461.85 it/sec) -training >> step=8639600, episode=1441 reward=0.7835061 (481.41 it/sec) -training >> step=8639700, episode=1441 reward=0.791387 (488.38 it/sec) -training >> step=8639800, episode=1441 reward=0.7807052 (467.66 it/sec) -training >> step=8639900, episode=1441 reward=0.7946001 (513.65 it/sec) -training >> step=8640000, episode=1441 reward=0.7675735 (461.47 it/sec) -training >> step=8640100, episode=1441 reward=0.792541 (468.00 it/sec) -training >> step=8640200, episode=1441 reward=0.7879583 (502.92 it/sec) -training >> step=8640300, episode=1441 reward=0.8059076 (483.84 it/sec) -training >> step=8640400, episode=1441 reward=0.7937803 (506.42 it/sec) -training >> step=8640500, episode=1441 reward=0.7968037 (458.83 it/sec) -training >> step=8640600, episode=1441 reward=0.7829079 (479.04 it/sec) -training >> step=8640700, episode=1441 reward=0.7657089 (440.92 it/sec) -training >> step=8640800, episode=1441 reward=0.7939157 (416.18 it/sec) -training >> step=8640900, episode=1441 reward=0.7961029 (458.08 it/sec) -training >> step=8641000, episode=1441 reward=0.7894071 (425.47 it/sec) -training >> step=8641100, episode=1441 reward=0.7953477 (483.42 it/sec) -training >> step=8641200, episode=1441 reward=0.7706813 (486.48 it/sec) -training >> step=8641300, episode=1441 reward=0.7711217 (466.97 it/sec) -training >> step=8641400, episode=1441 reward=0.7875115 (495.24 it/sec) -training >> step=8641500, episode=1441 reward=0.7745218 (434.73 it/sec) -training >> step=8641600, episode=1441 reward=0.7926496 (520.37 it/sec) -training >> step=8641700, episode=1441 reward=0.79549 (489.95 it/sec) -training >> step=8641800, episode=1441 reward=0.7871844 (448.51 it/sec) -training >> step=8641900, episode=1441 reward=0.8038301 (420.41 it/sec) -training >> step=8642000, episode=1441 reward=0.7908331 (489.95 it/sec) -training >> step=8642100, episode=1441 reward=0.7677933 (486.06 it/sec) -training >> step=8642200, episode=1441 reward=0.7767654 (489.20 it/sec) -training >> step=8642300, episode=1441 reward=0.8146665 (469.53 it/sec) -training >> step=8642400, episode=1441 reward=0.7966921 (456.21 it/sec) -training >> step=8642500, episode=1441 reward=0.8038533 (470.20 it/sec) -training >> step=8642600, episode=1441 reward=0.7848371 (468.50 it/sec) -training >> step=8642700, episode=1441 reward=0.7981052 (492.36 it/sec) -training >> step=8642800, episode=1441 reward=0.7724444 (501.86 it/sec) -training >> step=8642900, episode=1441 reward=0.7841067 (459.85 it/sec) -training >> step=8643000, episode=1441 reward=0.793386 (471.15 it/sec) -training >> step=8643100, episode=1441 reward=0.7843867 (477.49 it/sec) -training >> step=8643200, episode=1441 reward=0.7955116 (485.75 it/sec) -training >> step=8643300, episode=1441 reward=0.7903973 (489.29 it/sec) -training >> step=8643400, episode=1441 reward=0.7887776 (470.86 it/sec) -training >> step=8643500, episode=1441 reward=0.8014086 (487.37 it/sec) -training >> step=8643600, episode=1441 reward=0.7941814 (432.98 it/sec) -training >> step=8643700, episode=1441 reward=0.8040258 (456.70 it/sec) -training >> step=8643800, episode=1441 reward=0.7699037 (554.44 it/sec) -training >> step=8643900, episode=1441 reward=0.793057 (428.97 it/sec) -training >> step=8644000, episode=1441 reward=0.7815161 (462.16 it/sec) -training >> step=8644100, episode=1441 reward=0.8062136 (491.02 it/sec) -training >> step=8644200, episode=1441 reward=0.788746 (497.34 it/sec) -training >> step=8644300, episode=1441 reward=0.7927304 (490.18 it/sec) -training >> step=8644400, episode=1441 reward=0.7849064 (447.68 it/sec) -training >> step=8644500, episode=1441 reward=0.7795464 (478.39 it/sec) -training >> step=8644600, episode=1441 reward=0.8056334 (479.53 it/sec) -training >> step=8644700, episode=1441 reward=0.7677363 (479.83 it/sec) -training >> step=8644800, episode=1441 reward=0.7921529 (462.41 it/sec) -training >> step=8644900, episode=1441 reward=0.7834517 (497.76 it/sec) -training >> step=8645000, episode=1441 reward=0.7911001 (494.03 it/sec) -training >> step=8645100, episode=1441 reward=0.7813278 (406.64 it/sec) -training >> step=8645200, episode=1441 reward=0.8077118 (473.70 it/sec) -training >> step=8645300, episode=1442 reward=0.7996839 (88.38 it/sec) -training >> step=8645400, episode=1442 reward=0.7792749 (448.17 it/sec) -training >> step=8645500, episode=1442 reward=0.7773722 (477.02 it/sec) -training >> step=8645600, episode=1442 reward=0.7852324 (495.78 it/sec) -training >> step=8645700, episode=1442 reward=0.7837162 (464.48 it/sec) -training >> step=8645800, episode=1442 reward=0.7837088 (483.83 it/sec) -training >> step=8645900, episode=1442 reward=0.7874644 (456.09 it/sec) -training >> step=8646000, episode=1442 reward=0.7896488 (470.55 it/sec) -training >> step=8646100, episode=1442 reward=0.7988067 (493.85 it/sec) -training >> step=8646200, episode=1442 reward=0.7929462 (433.94 it/sec) -training >> step=8646300, episode=1442 reward=0.8084121 (488.62 it/sec) -training >> step=8646400, episode=1442 reward=0.7871773 (437.50 it/sec) -training >> step=8646500, episode=1442 reward=0.7866743 (470.00 it/sec) -training >> step=8646600, episode=1442 reward=0.7929673 (484.37 it/sec) -training >> step=8646700, episode=1442 reward=0.7728148 (470.90 it/sec) -training >> step=8646800, episode=1442 reward=0.8026057 (450.89 it/sec) -training >> step=8646900, episode=1442 reward=0.7882174 (484.84 it/sec) -training >> step=8647000, episode=1442 reward=0.8007556 (443.72 it/sec) -training >> step=8647100, episode=1442 reward=0.7971656 (461.70 it/sec) -training >> step=8647200, episode=1442 reward=0.7788362 (438.78 it/sec) -training >> step=8647300, episode=1442 reward=0.8193857 (419.41 it/sec) -training >> step=8647400, episode=1442 reward=0.791276 (457.39 it/sec) -training >> step=8647500, episode=1442 reward=0.7867267 (482.32 it/sec) -training >> step=8647600, episode=1442 reward=0.7944129 (428.10 it/sec) -training >> step=8647700, episode=1442 reward=0.7868903 (468.48 it/sec) -training >> step=8647800, episode=1442 reward=0.7927451 (451.41 it/sec) -training >> step=8647900, episode=1442 reward=0.8031333 (425.16 it/sec) -training >> step=8648000, episode=1442 reward=0.7934577 (452.62 it/sec) -training >> step=8648100, episode=1442 reward=0.8086007 (478.90 it/sec) -training >> step=8648200, episode=1442 reward=0.8122917 (442.01 it/sec) -training >> step=8648300, episode=1442 reward=0.7935804 (439.41 it/sec) -training >> step=8648400, episode=1442 reward=0.7864035 (481.47 it/sec) -training >> step=8648500, episode=1442 reward=0.7952565 (494.44 it/sec) -training >> step=8648600, episode=1442 reward=0.7840282 (473.05 it/sec) -training >> step=8648700, episode=1442 reward=0.7734592 (428.56 it/sec) -training >> step=8648800, episode=1442 reward=0.7877787 (509.75 it/sec) -training >> step=8648900, episode=1442 reward=0.7811441 (492.95 it/sec) -training >> step=8649000, episode=1442 reward=0.7695583 (507.85 it/sec) -training >> step=8649100, episode=1442 reward=0.7726397 (480.14 it/sec) -training >> step=8649200, episode=1442 reward=0.7840752 (454.34 it/sec) -training >> step=8649300, episode=1442 reward=0.7738839 (491.59 it/sec) -training >> step=8649400, episode=1442 reward=0.7696329 (455.02 it/sec) -training >> step=8649500, episode=1442 reward=0.7941824 (515.60 it/sec) -training >> step=8649600, episode=1442 reward=0.8096741 (512.46 it/sec) -training >> step=8649700, episode=1442 reward=0.8053913 (470.24 it/sec) -training >> step=8649800, episode=1442 reward=0.7902351 (464.39 it/sec) -training >> step=8649900, episode=1442 reward=0.7892323 (514.52 it/sec) -training >> step=8650000, episode=1442 reward=0.8152348 (462.62 it/sec) -training >> step=8650100, episode=1442 reward=0.7776014 (503.11 it/sec) -training >> step=8650200, episode=1442 reward=0.7819321 (470.02 it/sec) -training >> step=8650300, episode=1442 reward=0.791194 (487.98 it/sec) -training >> step=8650400, episode=1442 reward=0.7934404 (476.94 it/sec) -training >> step=8650500, episode=1442 reward=0.7801905 (487.58 it/sec) -training >> step=8650600, episode=1442 reward=0.7929102 (483.67 it/sec) -training >> step=8650700, episode=1442 reward=0.7996987 (454.63 it/sec) -training >> step=8650800, episode=1442 reward=0.7777318 (442.94 it/sec) -training >> step=8650900, episode=1442 reward=0.7800112 (445.75 it/sec) -training >> step=8651000, episode=1442 reward=0.8153595 (500.73 it/sec) -training >> step=8651100, episode=1442 reward=0.7902531 (486.48 it/sec) -training >> step=8651200, episode=1442 reward=0.7818675 (444.80 it/sec) -training >> step=8651300, episode=1443 reward=0.7817883 (74.34 it/sec) -training >> step=8651400, episode=1443 reward=0.7864544 (466.35 it/sec) -training >> step=8651500, episode=1443 reward=0.7937548 (500.55 it/sec) -training >> step=8651600, episode=1443 reward=0.7775781 (467.00 it/sec) -training >> step=8651700, episode=1443 reward=0.8016309 (445.90 it/sec) -training >> step=8651800, episode=1443 reward=0.7872511 (462.01 it/sec) -training >> step=8651900, episode=1443 reward=0.7646202 (496.10 it/sec) -training >> step=8652000, episode=1443 reward=0.7997091 (433.41 it/sec) -training >> step=8652100, episode=1443 reward=0.7979593 (435.28 it/sec) -training >> step=8652200, episode=1443 reward=0.7887161 (462.25 it/sec) -training >> step=8652300, episode=1443 reward=0.7994894 (478.82 it/sec) -training >> step=8652400, episode=1443 reward=0.8039178 (461.74 it/sec) -training >> step=8652500, episode=1443 reward=0.7809981 (522.92 it/sec) -training >> step=8652600, episode=1443 reward=0.7752939 (436.26 it/sec) -training >> step=8652700, episode=1443 reward=0.7943034 (478.20 it/sec) -training >> step=8652800, episode=1443 reward=0.8037891 (487.79 it/sec) -training >> step=8652900, episode=1443 reward=0.7899748 (511.18 it/sec) -training >> step=8653000, episode=1443 reward=0.7846198 (502.12 it/sec) -training >> step=8653100, episode=1443 reward=0.770958 (460.12 it/sec) -training >> step=8653200, episode=1443 reward=0.795962 (460.09 it/sec) -training >> step=8653300, episode=1443 reward=0.7827317 (507.64 it/sec) -training >> step=8653400, episode=1443 reward=0.8062194 (457.07 it/sec) -training >> step=8653500, episode=1443 reward=0.7794749 (399.99 it/sec) -training >> step=8653600, episode=1443 reward=0.7890064 (444.40 it/sec) -training >> step=8653700, episode=1443 reward=0.7882419 (502.60 it/sec) -training >> step=8653800, episode=1443 reward=0.7747133 (434.14 it/sec) -training >> step=8653900, episode=1443 reward=0.7754972 (384.91 it/sec) -training >> step=8654000, episode=1443 reward=0.7964815 (450.44 it/sec) -training >> step=8654100, episode=1443 reward=0.7889604 (458.92 it/sec) -training >> step=8654200, episode=1443 reward=0.7943305 (506.97 it/sec) -training >> step=8654300, episode=1443 reward=0.8140936 (503.89 it/sec) -training >> step=8654400, episode=1443 reward=0.7960208 (466.03 it/sec) -training >> step=8654500, episode=1443 reward=0.7892658 (463.77 it/sec) -training >> step=8654600, episode=1443 reward=0.810779 (454.98 it/sec) -training >> step=8654700, episode=1443 reward=0.8057904 (543.31 it/sec) -training >> step=8654800, episode=1443 reward=0.8064212 (489.53 it/sec) -training >> step=8654900, episode=1443 reward=0.7840925 (441.13 it/sec) -training >> step=8655000, episode=1443 reward=0.8003013 (423.40 it/sec) -training >> step=8655100, episode=1443 reward=0.7860478 (455.79 it/sec) -training >> step=8655200, episode=1443 reward=0.7923694 (483.16 it/sec) -training >> step=8655300, episode=1443 reward=0.7933547 (494.52 it/sec) -training >> step=8655400, episode=1443 reward=0.7933361 (486.50 it/sec) -training >> step=8655500, episode=1443 reward=0.7793668 (462.43 it/sec) -training >> step=8655600, episode=1443 reward=0.7865462 (458.22 it/sec) -training >> step=8655700, episode=1443 reward=0.7932754 (495.27 it/sec) -training >> step=8655800, episode=1443 reward=0.7757683 (493.55 it/sec) -training >> step=8655900, episode=1443 reward=0.7768123 (446.68 it/sec) -training >> step=8656000, episode=1443 reward=0.7941158 (446.40 it/sec) -training >> step=8656100, episode=1443 reward=0.7780529 (480.93 it/sec) -training >> step=8656200, episode=1443 reward=0.8104313 (487.81 it/sec) -training >> step=8656300, episode=1443 reward=0.7721901 (504.90 it/sec) -training >> step=8656400, episode=1443 reward=0.7901595 (474.44 it/sec) -training >> step=8656500, episode=1443 reward=0.7753921 (455.67 it/sec) -training >> step=8656600, episode=1443 reward=0.8037291 (494.77 it/sec) -training >> step=8656700, episode=1443 reward=0.7989138 (495.80 it/sec) -training >> step=8656800, episode=1443 reward=0.7920805 (494.74 it/sec) -training >> step=8656900, episode=1443 reward=0.8013321 (475.53 it/sec) -training >> step=8657000, episode=1443 reward=0.7934588 (459.53 it/sec) -training >> step=8657100, episode=1443 reward=0.7938502 (478.33 it/sec) -training >> step=8657200, episode=1443 reward=0.7862306 (501.98 it/sec) -training >> step=8657300, episode=1444 reward=0.7844599 (86.77 it/sec) -training >> step=8657400, episode=1444 reward=0.7991639 (457.43 it/sec) -training >> step=8657500, episode=1444 reward=0.7641319 (444.17 it/sec) -training >> step=8657600, episode=1444 reward=0.7818906 (477.70 it/sec) -training >> step=8657700, episode=1444 reward=0.7944001 (533.02 it/sec) -training >> step=8657800, episode=1444 reward=0.8013744 (488.25 it/sec) -training >> step=8657900, episode=1444 reward=0.7795856 (390.52 it/sec) -training >> step=8658000, episode=1444 reward=0.7935187 (458.48 it/sec) -training >> step=8658100, episode=1444 reward=0.7879493 (442.56 it/sec) -training >> step=8658200, episode=1444 reward=0.7663698 (472.35 it/sec) -training >> step=8658300, episode=1444 reward=0.7985395 (405.61 it/sec) -training >> step=8658400, episode=1444 reward=0.7967364 (435.14 it/sec) -training >> step=8658500, episode=1444 reward=0.788864 (441.70 it/sec) -training >> step=8658600, episode=1444 reward=0.7982407 (498.57 it/sec) -training >> step=8658700, episode=1444 reward=0.7887013 (450.28 it/sec) -training >> step=8658800, episode=1444 reward=0.7878539 (440.76 it/sec) -training >> step=8658900, episode=1444 reward=0.8055449 (433.15 it/sec) -training >> step=8659000, episode=1444 reward=0.7869598 (495.11 it/sec) -training >> step=8659100, episode=1444 reward=0.7893484 (481.81 it/sec) -training >> step=8659200, episode=1444 reward=0.7844482 (410.22 it/sec) -training >> step=8659300, episode=1444 reward=0.7842386 (389.89 it/sec) -training >> step=8659400, episode=1444 reward=0.7822891 (494.06 it/sec) -training >> step=8659500, episode=1444 reward=0.7655184 (507.28 it/sec) -training >> step=8659600, episode=1444 reward=0.7867869 (535.42 it/sec) -training >> step=8659700, episode=1444 reward=0.7945857 (504.30 it/sec) -training >> step=8659800, episode=1444 reward=0.7912167 (472.73 it/sec) -training >> step=8659900, episode=1444 reward=0.7934494 (455.04 it/sec) -training >> step=8660000, episode=1444 reward=0.7908291 (530.99 it/sec) -training >> step=8660100, episode=1444 reward=0.7940673 (504.67 it/sec) -training >> step=8660200, episode=1444 reward=0.7940928 (535.36 it/sec) -training >> step=8660300, episode=1444 reward=0.7978948 (455.04 it/sec) -training >> step=8660400, episode=1444 reward=0.7920056 (475.94 it/sec) -training >> step=8660500, episode=1444 reward=0.7859057 (484.92 it/sec) -training >> step=8660600, episode=1444 reward=0.7878586 (460.77 it/sec) -training >> step=8660700, episode=1444 reward=0.7996484 (503.46 it/sec) -training >> step=8660800, episode=1444 reward=0.7832352 (468.89 it/sec) -training >> step=8660900, episode=1444 reward=0.7712728 (459.61 it/sec) -training >> step=8661000, episode=1444 reward=0.7905509 (498.65 it/sec) -training >> step=8661100, episode=1444 reward=0.7740692 (494.61 it/sec) -training >> step=8661200, episode=1444 reward=0.8130093 (442.34 it/sec) -training >> step=8661300, episode=1444 reward=0.8005959 (449.86 it/sec) -training >> step=8661400, episode=1444 reward=0.7848187 (483.70 it/sec) -training >> step=8661500, episode=1444 reward=0.788062 (521.28 it/sec) -training >> step=8661600, episode=1444 reward=0.7819744 (467.60 it/sec) -training >> step=8661700, episode=1444 reward=0.8036848 (494.56 it/sec) -training >> step=8661800, episode=1444 reward=0.7925991 (460.72 it/sec) -training >> step=8661900, episode=1444 reward=0.7896526 (496.17 it/sec) -training >> step=8662000, episode=1444 reward=0.7987206 (501.08 it/sec) -training >> step=8662100, episode=1444 reward=0.7810948 (452.32 it/sec) -training >> step=8662200, episode=1444 reward=0.8025407 (491.65 it/sec) -training >> step=8662300, episode=1444 reward=0.7869854 (454.57 it/sec) -training >> step=8662400, episode=1444 reward=0.7936317 (459.72 it/sec) -training >> step=8662500, episode=1444 reward=0.7819749 (526.89 it/sec) -training >> step=8662600, episode=1444 reward=0.7787867 (489.77 it/sec) -training >> step=8662700, episode=1444 reward=0.8179875 (414.91 it/sec) -training >> step=8662800, episode=1444 reward=0.8019164 (452.16 it/sec) -training >> step=8662900, episode=1444 reward=0.7773046 (505.71 it/sec) -training >> step=8663000, episode=1444 reward=0.7759572 (474.80 it/sec) -training >> step=8663100, episode=1444 reward=0.7729933 (448.75 it/sec) -training >> step=8663200, episode=1444 reward=0.7914685 (468.52 it/sec) -training >> step=8663300, episode=1445 reward=0.7708216 (69.36 it/sec) -training >> step=8663400, episode=1445 reward=0.8029273 (448.22 it/sec) -training >> step=8663500, episode=1445 reward=0.7654038 (456.96 it/sec) -training >> step=8663600, episode=1445 reward=0.7879696 (480.54 it/sec) -training >> step=8663700, episode=1445 reward=0.7872767 (428.54 it/sec) -training >> step=8663800, episode=1445 reward=0.7851105 (509.13 it/sec) -training >> step=8663900, episode=1445 reward=0.7737833 (489.32 it/sec) -training >> step=8664000, episode=1445 reward=0.7970881 (490.44 it/sec) -training >> step=8664100, episode=1445 reward=0.7881795 (471.67 it/sec) -training >> step=8664200, episode=1445 reward=0.7884187 (473.10 it/sec) -training >> step=8664300, episode=1445 reward=0.7850549 (454.32 it/sec) -training >> step=8664400, episode=1445 reward=0.7812782 (485.52 it/sec) -training >> step=8664500, episode=1445 reward=0.8059674 (487.66 it/sec) -training >> step=8664600, episode=1445 reward=0.7905579 (492.03 it/sec) -training >> step=8664700, episode=1445 reward=0.7968356 (445.76 it/sec) -training >> step=8664800, episode=1445 reward=0.7954329 (477.77 it/sec) -training >> step=8664900, episode=1445 reward=0.785769 (483.46 it/sec) -training >> step=8665000, episode=1445 reward=0.7953843 (464.91 it/sec) -training >> step=8665100, episode=1445 reward=0.791274 (480.06 it/sec) -training >> step=8665200, episode=1445 reward=0.7872532 (475.82 it/sec) -training >> step=8665300, episode=1445 reward=0.7934055 (507.49 it/sec) -training >> step=8665400, episode=1445 reward=0.7876168 (471.75 it/sec) -training >> step=8665500, episode=1445 reward=0.79918 (484.79 it/sec) -training >> step=8665600, episode=1445 reward=0.7786853 (459.82 it/sec) -training >> step=8665700, episode=1445 reward=0.7865383 (453.48 it/sec) -training >> step=8665800, episode=1445 reward=0.8006417 (470.39 it/sec) -training >> step=8665900, episode=1445 reward=0.7925554 (519.17 it/sec) -training >> step=8666000, episode=1445 reward=0.7873688 (483.81 it/sec) -training >> step=8666100, episode=1445 reward=0.7836979 (449.32 it/sec) -training >> step=8666200, episode=1445 reward=0.7693657 (492.16 it/sec) -training >> step=8666300, episode=1445 reward=0.7809697 (487.02 it/sec) -training >> step=8666400, episode=1445 reward=0.803912 (481.90 it/sec) -training >> step=8666500, episode=1445 reward=0.792901 (469.38 it/sec) -training >> step=8666600, episode=1445 reward=0.80085 (458.13 it/sec) -training >> step=8666700, episode=1445 reward=0.7987288 (441.02 it/sec) -training >> step=8666800, episode=1445 reward=0.7853528 (485.03 it/sec) -training >> step=8666900, episode=1445 reward=0.7807195 (461.83 it/sec) -training >> step=8667000, episode=1445 reward=0.7961626 (422.87 it/sec) -training >> step=8667100, episode=1445 reward=0.7833565 (442.09 it/sec) -training >> step=8667200, episode=1445 reward=0.7857275 (483.34 it/sec) -training >> step=8667300, episode=1445 reward=0.7995058 (487.44 it/sec) -training >> step=8667400, episode=1445 reward=0.7878473 (466.93 it/sec) -training >> step=8667500, episode=1445 reward=0.795324 (441.27 it/sec) -training >> step=8667600, episode=1445 reward=0.8031758 (450.14 it/sec) -training >> step=8667700, episode=1445 reward=0.802721 (520.11 it/sec) -training >> step=8667800, episode=1445 reward=0.7972518 (477.21 it/sec) -training >> step=8667900, episode=1445 reward=0.7650874 (464.08 it/sec) -training >> step=8668000, episode=1445 reward=0.7883299 (490.22 it/sec) -training >> step=8668100, episode=1445 reward=0.7772129 (437.56 it/sec) -training >> step=8668200, episode=1445 reward=0.8089799 (437.91 it/sec) -training >> step=8668300, episode=1445 reward=0.8078904 (442.03 it/sec) -training >> step=8668400, episode=1445 reward=0.7875473 (467.52 it/sec) -training >> step=8668500, episode=1445 reward=0.7879466 (486.90 it/sec) -training >> step=8668600, episode=1445 reward=0.7927643 (432.60 it/sec) -training >> step=8668700, episode=1445 reward=0.7942685 (516.95 it/sec) -training >> step=8668800, episode=1445 reward=0.7938593 (504.99 it/sec) -training >> step=8668900, episode=1445 reward=0.7904432 (488.31 it/sec) -training >> step=8669000, episode=1445 reward=0.7966311 (383.94 it/sec) -training >> step=8669100, episode=1445 reward=0.8060942 (454.36 it/sec) -training >> step=8669200, episode=1445 reward=0.8029008 (415.23 it/sec) -training >> step=8669300, episode=1446 reward=0.7983725 (74.32 it/sec) -training >> step=8669400, episode=1446 reward=0.7911738 (465.97 it/sec) -training >> step=8669500, episode=1446 reward=0.767334 (461.25 it/sec) -training >> step=8669600, episode=1446 reward=0.7940613 (465.21 it/sec) -training >> step=8669700, episode=1446 reward=0.7927858 (506.75 it/sec) -training >> step=8669800, episode=1446 reward=0.7933907 (471.49 it/sec) -training >> step=8669900, episode=1446 reward=0.7809234 (496.89 it/sec) -training >> step=8670000, episode=1446 reward=0.7724907 (442.91 it/sec) -training >> step=8670100, episode=1446 reward=0.8043383 (503.50 it/sec) -training >> step=8670200, episode=1446 reward=0.7651836 (454.42 it/sec) -training >> step=8670300, episode=1446 reward=0.7685134 (486.51 it/sec) -training >> step=8670400, episode=1446 reward=0.7813941 (509.55 it/sec) -training >> step=8670500, episode=1446 reward=0.7960834 (463.42 it/sec) -training >> step=8670600, episode=1446 reward=0.7737872 (483.65 it/sec) -training >> step=8670700, episode=1446 reward=0.771922 (510.88 it/sec) -training >> step=8670800, episode=1446 reward=0.7772362 (484.30 it/sec) -training >> step=8670900, episode=1446 reward=0.7744671 (487.30 it/sec) -training >> step=8671000, episode=1446 reward=0.7887952 (477.16 it/sec) -training >> step=8671100, episode=1446 reward=0.7822345 (474.88 it/sec) -training >> step=8671200, episode=1446 reward=0.8069499 (442.53 it/sec) -training >> step=8671300, episode=1446 reward=0.7891415 (483.88 it/sec) -training >> step=8671400, episode=1446 reward=0.8021592 (511.19 it/sec) -training >> step=8671500, episode=1446 reward=0.7909046 (491.02 it/sec) -training >> step=8671600, episode=1446 reward=0.7877097 (458.31 it/sec) -training >> step=8671700, episode=1446 reward=0.7908666 (487.12 it/sec) -training >> step=8671800, episode=1446 reward=0.7880334 (509.50 it/sec) -training >> step=8671900, episode=1446 reward=0.800837 (504.61 it/sec) -training >> step=8672000, episode=1446 reward=0.8048918 (431.75 it/sec) -training >> step=8672100, episode=1446 reward=0.7977627 (519.11 it/sec) -training >> step=8672200, episode=1446 reward=0.7876713 (499.68 it/sec) -training >> step=8672300, episode=1446 reward=0.7895327 (498.56 it/sec) -training >> step=8672400, episode=1446 reward=0.7722339 (470.60 it/sec) -training >> step=8672500, episode=1446 reward=0.7930817 (422.71 it/sec) -training >> step=8672600, episode=1446 reward=0.7845351 (505.17 it/sec) -training >> step=8672700, episode=1446 reward=0.7753319 (498.04 it/sec) -training >> step=8672800, episode=1446 reward=0.7639891 (530.11 it/sec) -training >> step=8672900, episode=1446 reward=0.8075254 (463.63 it/sec) -training >> step=8673000, episode=1446 reward=0.771884 (473.31 it/sec) -training >> step=8673100, episode=1446 reward=0.7726737 (495.33 it/sec) -training >> step=8673200, episode=1446 reward=0.7940487 (517.18 it/sec) -training >> step=8673300, episode=1446 reward=0.7924395 (495.54 it/sec) -training >> step=8673400, episode=1446 reward=0.7739663 (480.69 it/sec) -training >> step=8673500, episode=1446 reward=0.7867486 (441.01 it/sec) -training >> step=8673600, episode=1446 reward=0.8147183 (507.12 it/sec) -training >> step=8673700, episode=1446 reward=0.7769094 (465.39 it/sec) -training >> step=8673800, episode=1446 reward=0.7829146 (498.66 it/sec) -training >> step=8673900, episode=1446 reward=0.788187 (534.59 it/sec) -training >> step=8674000, episode=1446 reward=0.7917007 (473.58 it/sec) -training >> step=8674100, episode=1446 reward=0.783947 (432.67 it/sec) -training >> step=8674200, episode=1446 reward=0.7891902 (509.49 it/sec) -training >> step=8674300, episode=1446 reward=0.7829324 (481.64 it/sec) -training >> step=8674400, episode=1446 reward=0.792596 (492.12 it/sec) -training >> step=8674500, episode=1446 reward=0.7764238 (487.81 it/sec) -training >> step=8674600, episode=1446 reward=0.7980939 (470.53 it/sec) -training >> step=8674700, episode=1446 reward=0.8091399 (454.26 it/sec) -training >> step=8674800, episode=1446 reward=0.7889274 (501.74 it/sec) -training >> step=8674900, episode=1446 reward=0.7892722 (492.94 it/sec) -training >> step=8675000, episode=1446 reward=0.7911404 (489.69 it/sec) -training >> step=8675100, episode=1446 reward=0.7953756 (469.35 it/sec) -training >> step=8675200, episode=1446 reward=0.7993957 (519.50 it/sec) -training >> step=8675300, episode=1447 reward=0.7892032 (115.80 it/sec) -training >> step=8675400, episode=1447 reward=0.7907093 (473.87 it/sec) -training >> step=8675500, episode=1447 reward=0.7796388 (488.66 it/sec) -training >> step=8675600, episode=1447 reward=0.7898217 (533.69 it/sec) -training >> step=8675700, episode=1447 reward=0.7879874 (458.12 it/sec) -training >> step=8675800, episode=1447 reward=0.7900602 (497.56 it/sec) -training >> step=8675900, episode=1447 reward=0.8002048 (463.23 it/sec) -training >> step=8676000, episode=1447 reward=0.787064 (457.66 it/sec) -training >> step=8676100, episode=1447 reward=0.7738084 (452.57 it/sec) -training >> step=8676200, episode=1447 reward=0.7954692 (440.69 it/sec) -training >> step=8676300, episode=1447 reward=0.7966663 (505.02 it/sec) -training >> step=8676400, episode=1447 reward=0.7925203 (464.73 it/sec) -training >> step=8676500, episode=1447 reward=0.7896698 (446.80 it/sec) -training >> step=8676600, episode=1447 reward=0.7910446 (464.64 it/sec) -training >> step=8676700, episode=1447 reward=0.7872397 (385.47 it/sec) -training >> step=8676800, episode=1447 reward=0.7908758 (457.03 it/sec) -training >> step=8676900, episode=1447 reward=0.7810928 (438.65 it/sec) -training >> step=8677000, episode=1447 reward=0.7942595 (463.15 it/sec) -training >> step=8677100, episode=1447 reward=0.7850217 (403.20 it/sec) -training >> step=8677200, episode=1447 reward=0.7974065 (405.73 it/sec) -training >> step=8677300, episode=1447 reward=0.779925 (490.88 it/sec) -training >> step=8677400, episode=1447 reward=0.8002666 (456.39 it/sec) -training >> step=8677500, episode=1447 reward=0.8006495 (428.67 it/sec) -training >> step=8677600, episode=1447 reward=0.7935299 (415.46 it/sec) -training >> step=8677700, episode=1447 reward=0.7999647 (482.52 it/sec) -training >> step=8677800, episode=1447 reward=0.778079 (427.06 it/sec) -training >> step=8677900, episode=1447 reward=0.7950718 (490.52 it/sec) -training >> step=8678000, episode=1447 reward=0.7691969 (448.38 it/sec) -training >> step=8678100, episode=1447 reward=0.7986466 (471.51 it/sec) -training >> step=8678200, episode=1447 reward=0.7804741 (461.64 it/sec) -training >> step=8678300, episode=1447 reward=0.8023812 (441.09 it/sec) -training >> step=8678400, episode=1447 reward=0.8100669 (444.80 it/sec) -training >> step=8678500, episode=1447 reward=0.7903006 (481.28 it/sec) -training >> step=8678600, episode=1447 reward=0.793658 (399.52 it/sec) -training >> step=8678700, episode=1447 reward=0.7871408 (427.39 it/sec) -training >> step=8678800, episode=1447 reward=0.7884991 (453.55 it/sec) -training >> step=8678900, episode=1447 reward=0.7990326 (435.31 it/sec) -training >> step=8679000, episode=1447 reward=0.7957063 (438.20 it/sec) -training >> step=8679100, episode=1447 reward=0.784187 (449.11 it/sec) -training >> step=8679200, episode=1447 reward=0.8009487 (474.21 it/sec) -training >> step=8679300, episode=1447 reward=0.7828405 (453.91 it/sec) -training >> step=8679400, episode=1447 reward=0.7771996 (490.84 it/sec) -training >> step=8679500, episode=1447 reward=0.8135217 (442.21 it/sec) -training >> step=8679600, episode=1447 reward=0.8018014 (455.71 it/sec) -training >> step=8679700, episode=1447 reward=0.8075913 (442.38 it/sec) -training >> step=8679800, episode=1447 reward=0.7977012 (423.26 it/sec) -training >> step=8679900, episode=1447 reward=0.7925903 (454.60 it/sec) -training >> step=8680000, episode=1447 reward=0.7866791 (408.90 it/sec) -training >> step=8680100, episode=1447 reward=0.7859022 (407.80 it/sec) -training >> step=8680200, episode=1447 reward=0.7816706 (393.99 it/sec) -training >> step=8680300, episode=1447 reward=0.8068932 (465.17 it/sec) -training >> step=8680400, episode=1447 reward=0.77131 (442.18 it/sec) -training >> step=8680500, episode=1447 reward=0.7916513 (430.56 it/sec) -training >> step=8680600, episode=1447 reward=0.7986622 (517.08 it/sec) -training >> step=8680700, episode=1447 reward=0.7844881 (475.79 it/sec) -training >> step=8680800, episode=1447 reward=0.7704638 (436.21 it/sec) -training >> step=8680900, episode=1447 reward=0.8015621 (406.76 it/sec) -training >> step=8681000, episode=1447 reward=0.7975976 (478.00 it/sec) -training >> step=8681100, episode=1447 reward=0.7935372 (486.20 it/sec) -training >> step=8681200, episode=1447 reward=0.7837959 (461.77 it/sec) -training >> step=8681300, episode=1448 reward=0.7873964 (72.74 it/sec) -training >> step=8681400, episode=1448 reward=0.796906 (433.38 it/sec) -training >> step=8681500, episode=1448 reward=0.7941872 (401.52 it/sec) -training >> step=8681600, episode=1448 reward=0.7860051 (481.60 it/sec) -training >> step=8681700, episode=1448 reward=0.7837057 (510.58 it/sec) -training >> step=8681800, episode=1448 reward=0.8030615 (451.10 it/sec) -training >> step=8681900, episode=1448 reward=0.7852935 (513.08 it/sec) -training >> step=8682000, episode=1448 reward=0.786446 (461.06 it/sec) -training >> step=8682100, episode=1448 reward=0.8050177 (420.78 it/sec) -training >> step=8682200, episode=1448 reward=0.8088291 (479.65 it/sec) -training >> step=8682300, episode=1448 reward=0.7843614 (467.54 it/sec) -training >> step=8682400, episode=1448 reward=0.766498 (493.92 it/sec) -training >> step=8682500, episode=1448 reward=0.7943898 (489.17 it/sec) -training >> step=8682600, episode=1448 reward=0.7843527 (454.15 it/sec) -training >> step=8682700, episode=1448 reward=0.7827813 (480.88 it/sec) -training >> step=8682800, episode=1448 reward=0.7920409 (441.51 it/sec) -training >> step=8682900, episode=1448 reward=0.7959662 (475.65 it/sec) -training >> step=8683000, episode=1448 reward=0.7984303 (426.43 it/sec) -training >> step=8683100, episode=1448 reward=0.7911794 (478.32 it/sec) -training >> step=8683200, episode=1448 reward=0.7849249 (468.89 it/sec) -training >> step=8683300, episode=1448 reward=0.7844805 (474.36 it/sec) -training >> step=8683400, episode=1448 reward=0.7842448 (451.96 it/sec) -training >> step=8683500, episode=1448 reward=0.8081474 (461.11 it/sec) -training >> step=8683600, episode=1448 reward=0.7860807 (469.66 it/sec) -training >> step=8683700, episode=1448 reward=0.7828093 (456.46 it/sec) -training >> step=8683800, episode=1448 reward=0.7888788 (480.13 it/sec) -training >> step=8683900, episode=1448 reward=0.7831339 (424.95 it/sec) -training >> step=8684000, episode=1448 reward=0.7894018 (456.51 it/sec) -training >> step=8684100, episode=1448 reward=0.8074526 (463.06 it/sec) -training >> step=8684200, episode=1448 reward=0.7879623 (413.83 it/sec) -training >> step=8684300, episode=1448 reward=0.7952738 (482.86 it/sec) -training >> step=8684400, episode=1448 reward=0.8012137 (428.83 it/sec) -training >> step=8684500, episode=1448 reward=0.7847562 (430.89 it/sec) -training >> step=8684600, episode=1448 reward=0.7960809 (431.55 it/sec) -training >> step=8684700, episode=1448 reward=0.7954131 (473.69 it/sec) -training >> step=8684800, episode=1448 reward=0.8111516 (389.46 it/sec) -training >> step=8684900, episode=1448 reward=0.7829106 (454.62 it/sec) -training >> step=8685000, episode=1448 reward=0.7998939 (488.66 it/sec) -training >> step=8685100, episode=1448 reward=0.8010595 (455.14 it/sec) -training >> step=8685200, episode=1448 reward=0.8043229 (476.55 it/sec) -training >> step=8685300, episode=1448 reward=0.7904068 (452.77 it/sec) -training >> step=8685400, episode=1448 reward=0.7783183 (484.62 it/sec) -training >> step=8685500, episode=1448 reward=0.8070266 (431.72 it/sec) -training >> step=8685600, episode=1448 reward=0.7778676 (453.23 it/sec) -training >> step=8685700, episode=1448 reward=0.8047922 (459.94 it/sec) -training >> step=8685800, episode=1448 reward=0.811857 (470.93 it/sec) -training >> step=8685900, episode=1448 reward=0.7694132 (488.56 it/sec) -training >> step=8686000, episode=1448 reward=0.7828044 (470.18 it/sec) -training >> step=8686100, episode=1448 reward=0.7887745 (511.10 it/sec) -training >> step=8686200, episode=1448 reward=0.8162013 (483.15 it/sec) -training >> step=8686300, episode=1448 reward=0.7882253 (431.00 it/sec) -training >> step=8686400, episode=1448 reward=0.7884174 (472.03 it/sec) -training >> step=8686500, episode=1448 reward=0.7953556 (466.67 it/sec) -training >> step=8686600, episode=1448 reward=0.794555 (477.78 it/sec) -training >> step=8686700, episode=1448 reward=0.7772803 (509.58 it/sec) -training >> step=8686800, episode=1448 reward=0.778424 (513.01 it/sec) -training >> step=8686900, episode=1448 reward=0.80367 (462.63 it/sec) -training >> step=8687000, episode=1448 reward=0.7889519 (429.01 it/sec) -training >> step=8687100, episode=1448 reward=0.8176408 (423.41 it/sec) -training >> step=8687200, episode=1448 reward=0.7746451 (516.46 it/sec) -training >> step=8687300, episode=1449 reward=0.7855187 (96.41 it/sec) -training >> step=8687400, episode=1449 reward=0.7847863 (486.35 it/sec) -training >> step=8687500, episode=1449 reward=0.7930984 (473.57 it/sec) -training >> step=8687600, episode=1449 reward=0.8029937 (441.13 it/sec) -training >> step=8687700, episode=1449 reward=0.7721288 (472.17 it/sec) -training >> step=8687800, episode=1449 reward=0.8006012 (478.56 it/sec) -training >> step=8687900, episode=1449 reward=0.7732405 (495.10 it/sec) -training >> step=8688000, episode=1449 reward=0.7998104 (499.59 it/sec) -training >> step=8688100, episode=1449 reward=0.7935132 (406.39 it/sec) -training >> step=8688200, episode=1449 reward=0.8090844 (455.49 it/sec) -training >> step=8688300, episode=1449 reward=0.794054 (447.82 it/sec) -training >> step=8688400, episode=1449 reward=0.7868447 (457.90 it/sec) -training >> step=8688500, episode=1449 reward=0.8009294 (452.56 it/sec) -training >> step=8688600, episode=1449 reward=0.8118382 (485.68 it/sec) -training >> step=8688700, episode=1449 reward=0.8062758 (468.05 it/sec) -training >> step=8688800, episode=1449 reward=0.7844601 (474.90 it/sec) -training >> step=8688900, episode=1449 reward=0.8012123 (472.33 it/sec) -training >> step=8689000, episode=1449 reward=0.7903523 (489.28 it/sec) -training >> step=8689100, episode=1449 reward=0.7915304 (463.45 it/sec) -training >> step=8689200, episode=1449 reward=0.8106167 (454.83 it/sec) -training >> step=8689300, episode=1449 reward=0.7825906 (488.44 it/sec) -training >> step=8689400, episode=1449 reward=0.8086363 (457.14 it/sec) -training >> step=8689500, episode=1449 reward=0.8195427 (469.98 it/sec) -training >> step=8689600, episode=1449 reward=0.8143643 (463.48 it/sec) -training >> step=8689700, episode=1449 reward=0.8136528 (507.75 it/sec) -training >> step=8689800, episode=1449 reward=0.7748914 (426.99 it/sec) -training >> step=8689900, episode=1449 reward=0.7733977 (435.12 it/sec) -training >> step=8690000, episode=1449 reward=0.7663557 (422.38 it/sec) -training >> step=8690100, episode=1449 reward=0.7839842 (465.23 it/sec) -training >> step=8690200, episode=1449 reward=0.7991783 (454.23 it/sec) -training >> step=8690300, episode=1449 reward=0.784574 (489.40 it/sec) -training >> step=8690400, episode=1449 reward=0.7673103 (426.22 it/sec) -training >> step=8690500, episode=1449 reward=0.7911936 (395.67 it/sec) -training >> step=8690600, episode=1449 reward=0.803137 (447.11 it/sec) -training >> step=8690700, episode=1449 reward=0.7689785 (396.55 it/sec) -training >> step=8690800, episode=1449 reward=0.7873746 (449.50 it/sec) -training >> step=8690900, episode=1449 reward=0.7962884 (408.72 it/sec) -training >> step=8691000, episode=1449 reward=0.7819141 (451.40 it/sec) -training >> step=8691100, episode=1449 reward=0.7844353 (505.60 it/sec) -training >> step=8691200, episode=1449 reward=0.7741481 (469.48 it/sec) -training >> step=8691300, episode=1449 reward=0.7823775 (487.11 it/sec) -training >> step=8691400, episode=1449 reward=0.8004349 (458.88 it/sec) -training >> step=8691500, episode=1449 reward=0.7736392 (479.56 it/sec) -training >> step=8691600, episode=1449 reward=0.7796767 (458.13 it/sec) -training >> step=8691700, episode=1449 reward=0.7959344 (465.95 it/sec) -training >> step=8691800, episode=1449 reward=0.7857965 (468.68 it/sec) -training >> step=8691900, episode=1449 reward=0.7870021 (506.20 it/sec) -training >> step=8692000, episode=1449 reward=0.7956944 (475.04 it/sec) -training >> step=8692100, episode=1449 reward=0.8006781 (442.30 it/sec) -training >> step=8692200, episode=1449 reward=0.8013958 (464.83 it/sec) -training >> step=8692300, episode=1449 reward=0.7737842 (421.78 it/sec) -training >> step=8692400, episode=1449 reward=0.7822898 (482.96 it/sec) -training >> step=8692500, episode=1449 reward=0.8081531 (466.36 it/sec) -training >> step=8692600, episode=1449 reward=0.7936541 (498.51 it/sec) -training >> step=8692700, episode=1449 reward=0.7838064 (415.09 it/sec) -training >> step=8692800, episode=1449 reward=0.7770544 (443.76 it/sec) -training >> step=8692900, episode=1449 reward=0.8148237 (500.15 it/sec) -training >> step=8693000, episode=1449 reward=0.8129866 (454.71 it/sec) -training >> step=8693100, episode=1449 reward=0.7953506 (451.89 it/sec) -training >> step=8693200, episode=1449 reward=0.8171828 (475.60 it/sec) -training >> step=8693300, episode=1450 reward=0.7786934 (86.23 it/sec) -training >> step=8693400, episode=1450 reward=0.8020746 (498.50 it/sec) -training >> step=8693500, episode=1450 reward=0.7836142 (501.81 it/sec) -training >> step=8693600, episode=1450 reward=0.793109 (490.20 it/sec) -training >> step=8693700, episode=1450 reward=0.789421 (511.32 it/sec) -training >> step=8693800, episode=1450 reward=0.795578 (486.41 it/sec) -training >> step=8693900, episode=1450 reward=0.8117936 (509.17 it/sec) -training >> step=8694000, episode=1450 reward=0.8043581 (502.58 it/sec) -training >> step=8694100, episode=1450 reward=0.8012922 (483.08 it/sec) -training >> step=8694200, episode=1450 reward=0.791967 (478.92 it/sec) -training >> step=8694300, episode=1450 reward=0.7804973 (487.99 it/sec) -training >> step=8694400, episode=1450 reward=0.7911355 (452.58 it/sec) -training >> step=8694500, episode=1450 reward=0.7867554 (488.14 it/sec) -training >> step=8694600, episode=1450 reward=0.7868614 (477.64 it/sec) -training >> step=8694700, episode=1450 reward=0.7753349 (494.69 it/sec) -training >> step=8694800, episode=1450 reward=0.7969018 (459.48 it/sec) -training >> step=8694900, episode=1450 reward=0.7877718 (499.44 it/sec) -training >> step=8695000, episode=1450 reward=0.7938832 (510.71 it/sec) -training >> step=8695100, episode=1450 reward=0.7986696 (467.32 it/sec) -training >> step=8695200, episode=1450 reward=0.7963628 (460.47 it/sec) -training >> step=8695300, episode=1450 reward=0.7905009 (445.43 it/sec) -training >> step=8695400, episode=1450 reward=0.8042497 (451.15 it/sec) -training >> step=8695500, episode=1450 reward=0.7894843 (446.91 it/sec) -training >> step=8695600, episode=1450 reward=0.8052201 (451.33 it/sec) -training >> step=8695700, episode=1450 reward=0.7929684 (448.12 it/sec) -training >> step=8695800, episode=1450 reward=0.8005105 (465.84 it/sec) -training >> step=8695900, episode=1450 reward=0.7968047 (471.93 it/sec) -training >> step=8696000, episode=1450 reward=0.794028 (430.86 it/sec) -training >> step=8696100, episode=1450 reward=0.7829526 (449.63 it/sec) -training >> step=8696200, episode=1450 reward=0.7791346 (429.15 it/sec) -training >> step=8696300, episode=1450 reward=0.7933732 (428.81 it/sec) -training >> step=8696400, episode=1450 reward=0.7937996 (456.30 it/sec) -training >> step=8696500, episode=1450 reward=0.7786148 (432.03 it/sec) -training >> step=8696600, episode=1450 reward=0.8014944 (477.56 it/sec) -training >> step=8696700, episode=1450 reward=0.7801772 (417.64 it/sec) -training >> step=8696800, episode=1450 reward=0.7906501 (465.30 it/sec) -training >> step=8696900, episode=1450 reward=0.7848277 (488.24 it/sec) -training >> step=8697000, episode=1450 reward=0.7836978 (445.06 it/sec) -training >> step=8697100, episode=1450 reward=0.7761877 (435.58 it/sec) -training >> step=8697200, episode=1450 reward=0.7801195 (473.45 it/sec) -training >> step=8697300, episode=1450 reward=0.8049477 (465.56 it/sec) -training >> step=8697400, episode=1450 reward=0.7912309 (467.47 it/sec) -training >> step=8697500, episode=1450 reward=0.7942799 (478.51 it/sec) -training >> step=8697600, episode=1450 reward=0.8076061 (472.52 it/sec) -training >> step=8697700, episode=1450 reward=0.8020651 (434.09 it/sec) -training >> step=8697800, episode=1450 reward=0.7739166 (423.77 it/sec) -training >> step=8697900, episode=1450 reward=0.7975773 (487.22 it/sec) -training >> step=8698000, episode=1450 reward=0.8017066 (475.78 it/sec) -training >> step=8698100, episode=1450 reward=0.7942865 (435.94 it/sec) -training >> step=8698200, episode=1450 reward=0.8042492 (482.91 it/sec) -training >> step=8698300, episode=1450 reward=0.7903633 (486.20 it/sec) -training >> step=8698400, episode=1450 reward=0.8034548 (432.03 it/sec) -training >> step=8698500, episode=1450 reward=0.7947771 (466.76 it/sec) -training >> step=8698600, episode=1450 reward=0.7886674 (401.97 it/sec) -training >> step=8698700, episode=1450 reward=0.7884714 (443.97 it/sec) -training >> step=8698800, episode=1450 reward=0.7774063 (445.02 it/sec) -training >> step=8698900, episode=1450 reward=0.7869664 (464.73 it/sec) -training >> step=8699000, episode=1450 reward=0.7869422 (454.30 it/sec) -training >> step=8699100, episode=1450 reward=0.8115765 (469.96 it/sec) -training >> step=8699200, episode=1450 reward=0.8009911 (408.75 it/sec) -training >> step=8699300, episode=1451 reward=0.7979209 (86.93 it/sec) -training >> step=8699400, episode=1451 reward=0.780919 (481.54 it/sec) -training >> step=8699500, episode=1451 reward=0.778605 (410.55 it/sec) -training >> step=8699600, episode=1451 reward=0.7731277 (409.44 it/sec) -training >> step=8699700, episode=1451 reward=0.7765433 (483.85 it/sec) -training >> step=8699800, episode=1451 reward=0.8027654 (460.56 it/sec) -training >> step=8699900, episode=1451 reward=0.801967 (437.37 it/sec) -training >> step=8700000, episode=1451 reward=0.7935518 (460.40 it/sec) -training >> step=8700100, episode=1451 reward=0.8028201 (469.90 it/sec) -training >> step=8700200, episode=1451 reward=0.781683 (403.14 it/sec) -training >> step=8700300, episode=1451 reward=0.8054841 (441.60 it/sec) -training >> step=8700400, episode=1451 reward=0.8127526 (424.58 it/sec) -training >> step=8700500, episode=1451 reward=0.7843851 (467.20 it/sec) -training >> step=8700600, episode=1451 reward=0.7821012 (439.71 it/sec) -training >> step=8700700, episode=1451 reward=0.7657614 (432.46 it/sec) -training >> step=8700800, episode=1451 reward=0.7964836 (486.93 it/sec) -training >> step=8700900, episode=1451 reward=0.7924808 (447.21 it/sec) -training >> step=8701000, episode=1451 reward=0.7843965 (477.78 it/sec) -training >> step=8701100, episode=1451 reward=0.7939786 (401.43 it/sec) -training >> step=8701200, episode=1451 reward=0.7862312 (436.33 it/sec) -training >> step=8701300, episode=1451 reward=0.785341 (439.79 it/sec) -training >> step=8701400, episode=1451 reward=0.800758 (408.18 it/sec) -training >> step=8701500, episode=1451 reward=0.8013073 (460.98 it/sec) -training >> step=8701600, episode=1451 reward=0.7908141 (485.97 it/sec) -training >> step=8701700, episode=1451 reward=0.7955614 (409.62 it/sec) -training >> step=8701800, episode=1451 reward=0.8068959 (373.12 it/sec) -training >> step=8701900, episode=1451 reward=0.7874698 (474.62 it/sec) -training >> step=8702000, episode=1451 reward=0.78328 (439.44 it/sec) -training >> step=8702100, episode=1451 reward=0.7884083 (457.52 it/sec) -training >> step=8702200, episode=1451 reward=0.7964926 (461.63 it/sec) -training >> step=8702300, episode=1451 reward=0.8069232 (483.08 it/sec) -training >> step=8702400, episode=1451 reward=0.7967209 (461.44 it/sec) -training >> step=8702500, episode=1451 reward=0.7823223 (444.05 it/sec) -training >> step=8702600, episode=1451 reward=0.7841498 (436.93 it/sec) -training >> step=8702700, episode=1451 reward=0.7833803 (460.23 it/sec) -training >> step=8702800, episode=1451 reward=0.8196069 (409.24 it/sec) -training >> step=8702900, episode=1451 reward=0.7941346 (453.21 it/sec) -training >> step=8703000, episode=1451 reward=0.7876636 (474.14 it/sec) -training >> step=8703100, episode=1451 reward=0.7801009 (451.16 it/sec) -training >> step=8703200, episode=1451 reward=0.8045843 (443.55 it/sec) -training >> step=8703300, episode=1451 reward=0.8056369 (446.42 it/sec) -training >> step=8703400, episode=1451 reward=0.7942716 (483.01 it/sec) -training >> step=8703500, episode=1451 reward=0.7813957 (460.31 it/sec) -training >> step=8703600, episode=1451 reward=0.7959278 (433.75 it/sec) -training >> step=8703700, episode=1451 reward=0.786817 (497.10 it/sec) -training >> step=8703800, episode=1451 reward=0.8107893 (498.30 it/sec) -training >> step=8703900, episode=1451 reward=0.8040158 (465.46 it/sec) -training >> step=8704000, episode=1451 reward=0.80142 (483.98 it/sec) -training >> step=8704100, episode=1451 reward=0.7960939 (484.51 it/sec) -training >> step=8704200, episode=1451 reward=0.8018825 (426.98 it/sec) -training >> step=8704300, episode=1451 reward=0.7787215 (493.87 it/sec) -training >> step=8704400, episode=1451 reward=0.7811453 (434.68 it/sec) -training >> step=8704500, episode=1451 reward=0.7923257 (501.72 it/sec) -training >> step=8704600, episode=1451 reward=0.8042101 (448.36 it/sec) -training >> step=8704700, episode=1451 reward=0.8174477 (453.73 it/sec) -training >> step=8704800, episode=1451 reward=0.8018877 (458.89 it/sec) -training >> step=8704900, episode=1451 reward=0.7949528 (494.81 it/sec) -training >> step=8705000, episode=1451 reward=0.7794312 (408.07 it/sec) -training >> step=8705100, episode=1451 reward=0.8015827 (387.43 it/sec) -training >> step=8705200, episode=1451 reward=0.7812429 (443.33 it/sec) -training >> step=8705300, episode=1452 reward=0.7800364 (85.40 it/sec) -training >> step=8705400, episode=1452 reward=0.7955326 (349.38 it/sec) -training >> step=8705500, episode=1452 reward=0.7894551 (431.26 it/sec) -training >> step=8705600, episode=1452 reward=0.7868344 (464.49 it/sec) -training >> step=8705700, episode=1452 reward=0.7889338 (460.33 it/sec) -training >> step=8705800, episode=1452 reward=0.7854741 (491.57 it/sec) -training >> step=8705900, episode=1452 reward=0.7937468 (568.71 it/sec) -training >> step=8706000, episode=1452 reward=0.7862412 (522.27 it/sec) -training >> step=8706100, episode=1452 reward=0.784662 (559.73 it/sec) -training >> step=8706200, episode=1452 reward=0.8004431 (537.32 it/sec) -training >> step=8706300, episode=1452 reward=0.7784234 (547.63 it/sec) -training >> step=8706400, episode=1452 reward=0.8056174 (557.53 it/sec) -training >> step=8706500, episode=1452 reward=0.787625 (496.81 it/sec) -training >> step=8706600, episode=1452 reward=0.7974987 (495.73 it/sec) -training >> step=8706700, episode=1452 reward=0.7818368 (547.76 it/sec) -training >> step=8706800, episode=1452 reward=0.7976689 (526.92 it/sec) -training >> step=8706900, episode=1452 reward=0.7947267 (483.34 it/sec) -training >> step=8707000, episode=1452 reward=0.7625431 (514.49 it/sec) -training >> step=8707100, episode=1452 reward=0.8117316 (412.62 it/sec) -training >> step=8707200, episode=1452 reward=0.8010368 (552.29 it/sec) -training >> step=8707300, episode=1452 reward=0.7789372 (519.38 it/sec) -training >> step=8707400, episode=1452 reward=0.7898918 (569.84 it/sec) -training >> step=8707500, episode=1452 reward=0.785176 (540.98 it/sec) -training >> step=8707600, episode=1452 reward=0.7802477 (499.71 it/sec) -training >> step=8707700, episode=1452 reward=0.7880612 (501.46 it/sec) -training >> step=8707800, episode=1452 reward=0.7858717 (521.47 it/sec) -training >> step=8707900, episode=1452 reward=0.7839616 (521.00 it/sec) -training >> step=8708000, episode=1452 reward=0.7875503 (551.09 it/sec) -training >> step=8708100, episode=1452 reward=0.7971499 (562.98 it/sec) -training >> step=8708200, episode=1452 reward=0.8170857 (501.61 it/sec) -training >> step=8708300, episode=1452 reward=0.7969405 (498.33 it/sec) -training >> step=8708400, episode=1452 reward=0.7809002 (544.60 it/sec) -training >> step=8708500, episode=1452 reward=0.8049089 (551.86 it/sec) -training >> step=8708600, episode=1452 reward=0.8073955 (546.01 it/sec) -training >> step=8708700, episode=1452 reward=0.8023227 (518.06 it/sec) -training >> step=8708800, episode=1452 reward=0.7855421 (576.32 it/sec) -training >> step=8708900, episode=1452 reward=0.7874024 (480.98 it/sec) -training >> step=8709000, episode=1452 reward=0.7887776 (538.21 it/sec) -training >> step=8709100, episode=1452 reward=0.8229468 (551.89 it/sec) -training >> step=8709200, episode=1452 reward=0.7930837 (554.28 it/sec) -training >> step=8709300, episode=1452 reward=0.7882808 (507.75 it/sec) -training >> step=8709400, episode=1452 reward=0.7938967 (521.30 it/sec) -training >> step=8709500, episode=1452 reward=0.7704791 (548.20 it/sec) -training >> step=8709600, episode=1452 reward=0.8004663 (541.24 it/sec) -training >> step=8709700, episode=1452 reward=0.7941279 (522.44 it/sec) -training >> step=8709800, episode=1452 reward=0.7793702 (483.18 it/sec) -training >> step=8709900, episode=1452 reward=0.8025674 (570.27 it/sec) -training >> step=8710000, episode=1452 reward=0.7834979 (488.61 it/sec) -training >> step=8710100, episode=1452 reward=0.7836074 (538.36 it/sec) -training >> step=8710200, episode=1452 reward=0.7746049 (515.79 it/sec) -training >> step=8710300, episode=1452 reward=0.7872542 (530.23 it/sec) -training >> step=8710400, episode=1452 reward=0.786853 (483.81 it/sec) -training >> step=8710500, episode=1452 reward=0.8002954 (474.51 it/sec) -training >> step=8710600, episode=1452 reward=0.8004236 (519.27 it/sec) -training >> step=8710700, episode=1452 reward=0.7768239 (551.69 it/sec) -training >> step=8710800, episode=1452 reward=0.7985405 (530.97 it/sec) -training >> step=8710900, episode=1452 reward=0.7696106 (505.96 it/sec) -training >> step=8711000, episode=1452 reward=0.8189284 (585.56 it/sec) -training >> step=8711100, episode=1452 reward=0.7875894 (537.42 it/sec) -training >> step=8711200, episode=1452 reward=0.7649026 (521.77 it/sec) -training >> step=8711300, episode=1453 reward=0.7891738 (125.70 it/sec) -training >> step=8711400, episode=1453 reward=0.7912542 (539.42 it/sec) -training >> step=8711500, episode=1453 reward=0.7976356 (530.06 it/sec) -training >> step=8711600, episode=1453 reward=0.7633046 (524.17 it/sec) -training >> step=8711700, episode=1453 reward=0.7736592 (514.51 it/sec) -training >> step=8711800, episode=1453 reward=0.799244 (502.22 it/sec) -training >> step=8711900, episode=1453 reward=0.7812533 (543.46 it/sec) -training >> step=8712000, episode=1453 reward=0.7715132 (477.17 it/sec) -training >> step=8712100, episode=1453 reward=0.7895826 (569.52 it/sec) -training >> step=8712200, episode=1453 reward=0.8102414 (489.59 it/sec) -training >> step=8712300, episode=1453 reward=0.7792986 (542.72 it/sec) -training >> step=8712400, episode=1453 reward=0.8049651 (522.04 it/sec) -training >> step=8712500, episode=1453 reward=0.7835382 (553.34 it/sec) -training >> step=8712600, episode=1453 reward=0.7912076 (537.13 it/sec) -training >> step=8712700, episode=1453 reward=0.792129 (516.22 it/sec) -training >> step=8712800, episode=1453 reward=0.7954454 (453.39 it/sec) -training >> step=8712900, episode=1453 reward=0.7900766 (461.05 it/sec) -training >> step=8713000, episode=1453 reward=0.7831053 (447.90 it/sec) -training >> step=8713100, episode=1453 reward=0.7875512 (523.81 it/sec) -training >> step=8713200, episode=1453 reward=0.796202 (513.83 it/sec) -training >> step=8713300, episode=1453 reward=0.811071 (495.54 it/sec) -training >> step=8713400, episode=1453 reward=0.7764594 (538.91 it/sec) -training >> step=8713500, episode=1453 reward=0.8017526 (568.85 it/sec) -training >> step=8713600, episode=1453 reward=0.8010055 (525.20 it/sec) -training >> step=8713700, episode=1453 reward=0.8031192 (542.00 it/sec) -training >> step=8713800, episode=1453 reward=0.7853956 (483.76 it/sec) -training >> step=8713900, episode=1453 reward=0.781231 (524.91 it/sec) -training >> step=8714000, episode=1453 reward=0.7888334 (520.20 it/sec) -training >> step=8714100, episode=1453 reward=0.7743772 (542.16 it/sec) -training >> step=8714200, episode=1453 reward=0.7947174 (536.11 it/sec) -training >> step=8714300, episode=1453 reward=0.7946109 (534.63 it/sec) -training >> step=8714400, episode=1453 reward=0.7794458 (454.59 it/sec) -training >> step=8714500, episode=1453 reward=0.7935679 (529.77 it/sec) -training >> step=8714600, episode=1453 reward=0.7824023 (549.77 it/sec) -training >> step=8714700, episode=1453 reward=0.7950264 (542.76 it/sec) -training >> step=8714800, episode=1453 reward=0.7817435 (493.62 it/sec) -training >> step=8714900, episode=1453 reward=0.7871776 (512.41 it/sec) -training >> step=8715000, episode=1453 reward=0.7925836 (528.83 it/sec) -training >> step=8715100, episode=1453 reward=0.8020501 (544.36 it/sec) -training >> step=8715200, episode=1453 reward=0.7809542 (550.03 it/sec) -training >> step=8715300, episode=1453 reward=0.7983186 (539.65 it/sec) -training >> step=8715400, episode=1453 reward=0.805111 (517.09 it/sec) -training >> step=8715500, episode=1453 reward=0.7690778 (493.50 it/sec) -training >> step=8715600, episode=1453 reward=0.8082822 (548.37 it/sec) -training >> step=8715700, episode=1453 reward=0.7884241 (580.27 it/sec) -training >> step=8715800, episode=1453 reward=0.7906097 (535.36 it/sec) -training >> step=8715900, episode=1453 reward=0.8013332 (528.81 it/sec) -training >> step=8716000, episode=1453 reward=0.8193135 (509.68 it/sec) -training >> step=8716100, episode=1453 reward=0.7977234 (517.75 it/sec) -training >> step=8716200, episode=1453 reward=0.7972184 (505.09 it/sec) -training >> step=8716300, episode=1453 reward=0.8152696 (537.43 it/sec) -training >> step=8716400, episode=1453 reward=0.8025391 (529.55 it/sec) -training >> step=8716500, episode=1453 reward=0.8014593 (549.44 it/sec) -training >> step=8716600, episode=1453 reward=0.7642819 (438.23 it/sec) -training >> step=8716700, episode=1453 reward=0.7872111 (498.13 it/sec) -training >> step=8716800, episode=1453 reward=0.7962113 (536.94 it/sec) -training >> step=8716900, episode=1453 reward=0.7870791 (496.83 it/sec) -training >> step=8717000, episode=1453 reward=0.8034642 (504.92 it/sec) -training >> step=8717100, episode=1453 reward=0.7754519 (486.68 it/sec) -training >> step=8717200, episode=1453 reward=0.7980975 (437.83 it/sec) -training >> step=8717300, episode=1454 reward=0.7949286 (93.73 it/sec) -training >> step=8717400, episode=1454 reward=0.7852321 (523.00 it/sec) -training >> step=8717500, episode=1454 reward=0.7739138 (502.02 it/sec) -training >> step=8717600, episode=1454 reward=0.801048 (515.06 it/sec) -training >> step=8717700, episode=1454 reward=0.7987069 (438.72 it/sec) -training >> step=8717800, episode=1454 reward=0.7742662 (543.20 it/sec) -training >> step=8717900, episode=1454 reward=0.8097405 (487.24 it/sec) -training >> step=8718000, episode=1454 reward=0.79312 (496.99 it/sec) -training >> step=8718100, episode=1454 reward=0.7947845 (507.44 it/sec) -training >> step=8718200, episode=1454 reward=0.7828445 (444.95 it/sec) -training >> step=8718300, episode=1454 reward=0.7817531 (502.66 it/sec) -training >> step=8718400, episode=1454 reward=0.8102783 (534.90 it/sec) -training >> step=8718500, episode=1454 reward=0.778516 (466.03 it/sec) -training >> step=8718600, episode=1454 reward=0.8031337 (492.77 it/sec) -training >> step=8718700, episode=1454 reward=0.8022016 (445.54 it/sec) -training >> step=8718800, episode=1454 reward=0.8149154 (521.55 it/sec) -training >> step=8718900, episode=1454 reward=0.7828491 (494.13 it/sec) -training >> step=8719000, episode=1454 reward=0.8079129 (420.30 it/sec) -training >> step=8719100, episode=1454 reward=0.8004687 (507.62 it/sec) -training >> step=8719200, episode=1454 reward=0.7926385 (452.42 it/sec) -training >> step=8719300, episode=1454 reward=0.8048286 (497.91 it/sec) -training >> step=8719400, episode=1454 reward=0.790546 (466.69 it/sec) -training >> step=8719500, episode=1454 reward=0.7642814 (496.11 it/sec) -training >> step=8719600, episode=1454 reward=0.7993686 (465.88 it/sec) -training >> step=8719700, episode=1454 reward=0.8121404 (443.39 it/sec) -training >> step=8719800, episode=1454 reward=0.8037586 (510.35 it/sec) -training >> step=8719900, episode=1454 reward=0.7914435 (481.31 it/sec) -training >> step=8720000, episode=1454 reward=0.7817796 (410.74 it/sec) -training >> step=8720100, episode=1454 reward=0.8004736 (531.30 it/sec) -training >> step=8720200, episode=1454 reward=0.7841042 (467.65 it/sec) -training >> step=8720300, episode=1454 reward=0.778201 (471.49 it/sec) -training >> step=8720400, episode=1454 reward=0.8072029 (474.54 it/sec) -training >> step=8720500, episode=1454 reward=0.8054572 (473.67 it/sec) -training >> step=8720600, episode=1454 reward=0.786999 (522.21 it/sec) -training >> step=8720700, episode=1454 reward=0.8010794 (496.18 it/sec) -training >> step=8720800, episode=1454 reward=0.7871138 (466.11 it/sec) -training >> step=8720900, episode=1454 reward=0.7832289 (498.58 it/sec) -training >> step=8721000, episode=1454 reward=0.7919973 (492.42 it/sec) -training >> step=8721100, episode=1454 reward=0.7610881 (479.49 it/sec) -training >> step=8721200, episode=1454 reward=0.8051181 (505.75 it/sec) -training >> step=8721300, episode=1454 reward=0.7857097 (498.24 it/sec) -training >> step=8721400, episode=1454 reward=0.7849723 (464.93 it/sec) -training >> step=8721500, episode=1454 reward=0.8078693 (515.68 it/sec) -training >> step=8721600, episode=1454 reward=0.7736865 (456.51 it/sec) -training >> step=8721700, episode=1454 reward=0.792038 (516.82 it/sec) -training >> step=8721800, episode=1454 reward=0.7852407 (412.35 it/sec) -training >> step=8721900, episode=1454 reward=0.7834035 (490.58 it/sec) -training >> step=8722000, episode=1454 reward=0.8072796 (513.49 it/sec) -training >> step=8722100, episode=1454 reward=0.7819991 (501.99 it/sec) -training >> step=8722200, episode=1454 reward=0.79457 (492.83 it/sec) -training >> step=8722300, episode=1454 reward=0.7762185 (446.34 it/sec) -training >> step=8722400, episode=1454 reward=0.7653117 (527.56 it/sec) -training >> step=8722500, episode=1454 reward=0.7855284 (512.51 it/sec) -training >> step=8722600, episode=1454 reward=0.7880069 (475.68 it/sec) -training >> step=8722700, episode=1454 reward=0.8122076 (509.20 it/sec) -training >> step=8722800, episode=1454 reward=0.7667808 (458.70 it/sec) -training >> step=8722900, episode=1454 reward=0.7945946 (497.81 it/sec) -training >> step=8723000, episode=1454 reward=0.7814705 (506.97 it/sec) -training >> step=8723100, episode=1454 reward=0.7944262 (524.68 it/sec) -training >> step=8723200, episode=1454 reward=0.7923471 (466.64 it/sec) -training >> step=8723300, episode=1455 reward=0.7879841 (94.87 it/sec) -training >> step=8723400, episode=1455 reward=0.7900347 (442.43 it/sec) -training >> step=8723500, episode=1455 reward=0.8006729 (489.27 it/sec) -training >> step=8723600, episode=1455 reward=0.7640021 (414.73 it/sec) -training >> step=8723700, episode=1455 reward=0.7646864 (483.29 it/sec) -training >> step=8723800, episode=1455 reward=0.8006241 (507.07 it/sec) -training >> step=8723900, episode=1455 reward=0.7988069 (480.71 it/sec) -training >> step=8724000, episode=1455 reward=0.7854 (468.24 it/sec) -training >> step=8724100, episode=1455 reward=0.785692 (464.50 it/sec) -training >> step=8724200, episode=1455 reward=0.7961001 (438.97 it/sec) -training >> step=8724300, episode=1455 reward=0.800599 (450.11 it/sec) -training >> step=8724400, episode=1455 reward=0.7817146 (434.75 it/sec) -training >> step=8724500, episode=1455 reward=0.7952079 (463.66 it/sec) -training >> step=8724600, episode=1455 reward=0.7861322 (490.26 it/sec) -training >> step=8724700, episode=1455 reward=0.801607 (452.00 it/sec) -training >> step=8724800, episode=1455 reward=0.7934782 (483.50 it/sec) -training >> step=8724900, episode=1455 reward=0.7792527 (468.05 it/sec) -training >> step=8725000, episode=1455 reward=0.8191187 (472.87 it/sec) -training >> step=8725100, episode=1455 reward=0.7880995 (497.14 it/sec) -training >> step=8725200, episode=1455 reward=0.7848231 (515.11 it/sec) -training >> step=8725300, episode=1455 reward=0.7752616 (457.23 it/sec) -training >> step=8725400, episode=1455 reward=0.7633965 (494.14 it/sec) -training >> step=8725500, episode=1455 reward=0.7804371 (532.42 it/sec) -training >> step=8725600, episode=1455 reward=0.796662 (501.88 it/sec) -training >> step=8725700, episode=1455 reward=0.7870882 (506.04 it/sec) -training >> step=8725800, episode=1455 reward=0.7743685 (473.74 it/sec) -training >> step=8725900, episode=1455 reward=0.7833769 (457.41 it/sec) -training >> step=8726000, episode=1455 reward=0.7996696 (478.66 it/sec) -training >> step=8726100, episode=1455 reward=0.7898112 (454.93 it/sec) -training >> step=8726200, episode=1455 reward=0.793029 (479.82 it/sec) -training >> step=8726300, episode=1455 reward=0.7824636 (479.24 it/sec) -training >> step=8726400, episode=1455 reward=0.7732194 (467.14 it/sec) -training >> step=8726500, episode=1455 reward=0.7990454 (497.21 it/sec) -training >> step=8726600, episode=1455 reward=0.7639843 (499.85 it/sec) -training >> step=8726700, episode=1455 reward=0.7885457 (517.27 it/sec) -training >> step=8726800, episode=1455 reward=0.7773799 (452.38 it/sec) -training >> step=8726900, episode=1455 reward=0.7720465 (505.09 it/sec) -training >> step=8727000, episode=1455 reward=0.7845862 (506.92 it/sec) -training >> step=8727100, episode=1455 reward=0.7737842 (478.76 it/sec) -training >> step=8727200, episode=1455 reward=0.8116909 (486.23 it/sec) -training >> step=8727300, episode=1455 reward=0.7848379 (423.56 it/sec) -training >> step=8727400, episode=1455 reward=0.7884479 (482.94 it/sec) -training >> step=8727500, episode=1455 reward=0.7876458 (486.72 it/sec) -training >> step=8727600, episode=1455 reward=0.7946073 (507.11 it/sec) -training >> step=8727700, episode=1455 reward=0.8086842 (538.22 it/sec) -training >> step=8727800, episode=1455 reward=0.7777044 (431.41 it/sec) -training >> step=8727900, episode=1455 reward=0.7762645 (465.60 it/sec) -training >> step=8728000, episode=1455 reward=0.7787698 (494.08 it/sec) -training >> step=8728100, episode=1455 reward=0.8180887 (506.23 it/sec) -training >> step=8728200, episode=1455 reward=0.7907133 (490.98 it/sec) -training >> step=8728300, episode=1455 reward=0.7994543 (460.69 it/sec) -training >> step=8728400, episode=1455 reward=0.7752045 (478.19 it/sec) -training >> step=8728500, episode=1455 reward=0.7997659 (432.92 it/sec) -training >> step=8728600, episode=1455 reward=0.7853233 (499.57 it/sec) -training >> step=8728700, episode=1455 reward=0.78189 (505.20 it/sec) -training >> step=8728800, episode=1455 reward=0.7681184 (433.16 it/sec) -training >> step=8728900, episode=1455 reward=0.7807218 (489.49 it/sec) -training >> step=8729000, episode=1455 reward=0.7914681 (483.04 it/sec) -training >> step=8729100, episode=1455 reward=0.7831039 (513.12 it/sec) -training >> step=8729200, episode=1455 reward=0.8001817 (502.23 it/sec) -training >> step=8729300, episode=1456 reward=0.7721706 (95.63 it/sec) -training >> step=8729400, episode=1456 reward=0.7799628 (445.06 it/sec) -training >> step=8729500, episode=1456 reward=0.7834826 (500.97 it/sec) -training >> step=8729600, episode=1456 reward=0.7861372 (497.82 it/sec) -training >> step=8729700, episode=1456 reward=0.7992922 (521.90 it/sec) -training >> step=8729800, episode=1456 reward=0.7982469 (515.33 it/sec) -training >> step=8729900, episode=1456 reward=0.7877439 (445.23 it/sec) -training >> step=8730000, episode=1456 reward=0.785831 (487.26 it/sec) -training >> step=8730100, episode=1456 reward=0.8062957 (478.45 it/sec) -training >> step=8730200, episode=1456 reward=0.7755346 (532.10 it/sec) -training >> step=8730300, episode=1456 reward=0.7806826 (552.31 it/sec) -training >> step=8730400, episode=1456 reward=0.7715305 (430.72 it/sec) -training >> step=8730500, episode=1456 reward=0.7973447 (410.47 it/sec) -training >> step=8730600, episode=1456 reward=0.7760464 (488.08 it/sec) -training >> step=8730700, episode=1456 reward=0.7899839 (432.73 it/sec) -training >> step=8730800, episode=1456 reward=0.7969849 (489.43 it/sec) -training >> step=8730900, episode=1456 reward=0.7936559 (489.80 it/sec) -training >> step=8731000, episode=1456 reward=0.7850316 (524.76 it/sec) -training >> step=8731100, episode=1456 reward=0.7825635 (459.12 it/sec) -training >> step=8731200, episode=1456 reward=0.785412 (500.56 it/sec) -training >> step=8731300, episode=1456 reward=0.7887066 (469.77 it/sec) -training >> step=8731400, episode=1456 reward=0.8037251 (438.75 it/sec) -training >> step=8731500, episode=1456 reward=0.7977588 (494.25 it/sec) -training >> step=8731600, episode=1456 reward=0.7920573 (502.33 it/sec) -training >> step=8731700, episode=1456 reward=0.7792255 (517.54 it/sec) -training >> step=8731800, episode=1456 reward=0.7714735 (477.76 it/sec) -training >> step=8731900, episode=1456 reward=0.7858825 (433.41 it/sec) -training >> step=8732000, episode=1456 reward=0.7830845 (518.51 it/sec) -training >> step=8732100, episode=1456 reward=0.7798519 (529.48 it/sec) -training >> step=8732200, episode=1456 reward=0.7561784 (496.47 it/sec) -training >> step=8732300, episode=1456 reward=0.785926 (442.30 it/sec) -training >> step=8732400, episode=1456 reward=0.815266 (477.86 it/sec) -training >> step=8732500, episode=1456 reward=0.7781176 (480.71 it/sec) -training >> step=8732600, episode=1456 reward=0.7725849 (520.44 it/sec) -training >> step=8732700, episode=1456 reward=0.7878694 (491.85 it/sec) -training >> step=8732800, episode=1456 reward=0.8132868 (513.23 it/sec) -training >> step=8732900, episode=1456 reward=0.7909713 (459.48 it/sec) -training >> step=8733000, episode=1456 reward=0.7939271 (464.60 it/sec) -training >> step=8733100, episode=1456 reward=0.8047953 (471.56 it/sec) -training >> step=8733200, episode=1456 reward=0.8170822 (512.12 it/sec) -training >> step=8733300, episode=1456 reward=0.8048326 (472.89 it/sec) -training >> step=8733400, episode=1456 reward=0.784582 (463.17 it/sec) -training >> step=8733500, episode=1456 reward=0.7729262 (491.42 it/sec) -training >> step=8733600, episode=1456 reward=0.7807562 (401.38 it/sec) -training >> step=8733700, episode=1456 reward=0.7857514 (464.73 it/sec) -training >> step=8733800, episode=1456 reward=0.7786409 (478.50 it/sec) -training >> step=8733900, episode=1456 reward=0.8077404 (505.49 it/sec) -training >> step=8734000, episode=1456 reward=0.7834327 (476.61 it/sec) -training >> step=8734100, episode=1456 reward=0.8052021 (503.53 it/sec) -training >> step=8734200, episode=1456 reward=0.7805759 (495.41 it/sec) -training >> step=8734300, episode=1456 reward=0.8046775 (488.95 it/sec) -training >> step=8734400, episode=1456 reward=0.792317 (465.98 it/sec) -training >> step=8734500, episode=1456 reward=0.8043348 (499.76 it/sec) -training >> step=8734600, episode=1456 reward=0.8030743 (528.61 it/sec) -training >> step=8734700, episode=1456 reward=0.7910432 (472.77 it/sec) -training >> step=8734800, episode=1456 reward=0.7993215 (473.30 it/sec) -training >> step=8734900, episode=1456 reward=0.7682908 (487.66 it/sec) -training >> step=8735000, episode=1456 reward=0.77946 (512.48 it/sec) -training >> step=8735100, episode=1456 reward=0.7965067 (495.03 it/sec) -training >> step=8735200, episode=1456 reward=0.7959819 (423.10 it/sec) -training >> step=8735300, episode=1457 reward=0.8030027 (89.20 it/sec) -training >> step=8735400, episode=1457 reward=0.7975461 (429.01 it/sec) -training >> step=8735500, episode=1457 reward=0.7850993 (469.79 it/sec) -training >> step=8735600, episode=1457 reward=0.759862 (537.89 it/sec) -training >> step=8735700, episode=1457 reward=0.7868567 (516.44 it/sec) -training >> step=8735800, episode=1457 reward=0.7799985 (491.78 it/sec) -training >> step=8735900, episode=1457 reward=0.788551 (436.73 it/sec) -training >> step=8736000, episode=1457 reward=0.7854437 (472.90 it/sec) -training >> step=8736100, episode=1457 reward=0.8106931 (490.03 it/sec) -training >> step=8736200, episode=1457 reward=0.7758421 (517.49 it/sec) -training >> step=8736300, episode=1457 reward=0.7884874 (511.65 it/sec) -training >> step=8736400, episode=1457 reward=0.7909222 (446.23 it/sec) -training >> step=8736500, episode=1457 reward=0.788408 (466.18 it/sec) -training >> step=8736600, episode=1457 reward=0.8048806 (496.13 it/sec) -training >> step=8736700, episode=1457 reward=0.7732587 (494.03 it/sec) -training >> step=8736800, episode=1457 reward=0.7891027 (494.48 it/sec) -training >> step=8736900, episode=1457 reward=0.8097332 (458.39 it/sec) -training >> step=8737000, episode=1457 reward=0.7985439 (511.61 it/sec) -training >> step=8737100, episode=1457 reward=0.7959628 (490.52 it/sec) -training >> step=8737200, episode=1457 reward=0.7989575 (492.61 it/sec) -training >> step=8737300, episode=1457 reward=0.7998434 (494.70 it/sec) -training >> step=8737400, episode=1457 reward=0.7972215 (471.23 it/sec) -training >> step=8737500, episode=1457 reward=0.7988062 (493.48 it/sec) -training >> step=8737600, episode=1457 reward=0.7857905 (470.58 it/sec) -training >> step=8737700, episode=1457 reward=0.7911995 (500.89 it/sec) -training >> step=8737800, episode=1457 reward=0.789224 (494.99 it/sec) -training >> step=8737900, episode=1457 reward=0.7932639 (452.35 it/sec) -training >> step=8738000, episode=1457 reward=0.7964396 (496.66 it/sec) -training >> step=8738100, episode=1457 reward=0.7933078 (503.39 it/sec) -training >> step=8738200, episode=1457 reward=0.780169 (493.89 it/sec) -training >> step=8738300, episode=1457 reward=0.7962466 (475.12 it/sec) -training >> step=8738400, episode=1457 reward=0.7784916 (443.41 it/sec) -training >> step=8738500, episode=1457 reward=0.7894574 (517.95 it/sec) -training >> step=8738600, episode=1457 reward=0.8135846 (520.71 it/sec) -training >> step=8738700, episode=1457 reward=0.7863343 (502.24 it/sec) -training >> step=8738800, episode=1457 reward=0.7825843 (499.23 it/sec) -training >> step=8738900, episode=1457 reward=0.7919986 (440.60 it/sec) -training >> step=8739000, episode=1457 reward=0.8112078 (502.58 it/sec) -training >> step=8739100, episode=1457 reward=0.7854875 (506.54 it/sec) -training >> step=8739200, episode=1457 reward=0.8000093 (522.09 it/sec) -training >> step=8739300, episode=1457 reward=0.8009346 (489.95 it/sec) -training >> step=8739400, episode=1457 reward=0.7857125 (439.72 it/sec) -training >> step=8739500, episode=1457 reward=0.7938692 (478.45 it/sec) -training >> step=8739600, episode=1457 reward=0.7706077 (461.99 it/sec) -training >> step=8739700, episode=1457 reward=0.7872127 (484.92 it/sec) -training >> step=8739800, episode=1457 reward=0.8092496 (430.23 it/sec) -training >> step=8739900, episode=1457 reward=0.7877241 (487.25 it/sec) -training >> step=8740000, episode=1457 reward=0.8057535 (480.75 it/sec) -training >> step=8740100, episode=1457 reward=0.7908742 (462.53 it/sec) -training >> step=8740200, episode=1457 reward=0.8104451 (497.52 it/sec) -training >> step=8740300, episode=1457 reward=0.7832731 (494.38 it/sec) -training >> step=8740400, episode=1457 reward=0.7933148 (434.33 it/sec) -training >> step=8740500, episode=1457 reward=0.786301 (437.34 it/sec) -training >> step=8740600, episode=1457 reward=0.8003892 (519.46 it/sec) -training >> step=8740700, episode=1457 reward=0.7612742 (492.09 it/sec) -training >> step=8740800, episode=1457 reward=0.7950251 (460.73 it/sec) -training >> step=8740900, episode=1457 reward=0.7804314 (484.34 it/sec) -training >> step=8741000, episode=1457 reward=0.7968202 (526.33 it/sec) -training >> step=8741100, episode=1457 reward=0.7972108 (484.45 it/sec) -training >> step=8741200, episode=1457 reward=0.8078154 (477.78 it/sec) -training >> step=8741300, episode=1458 reward=0.7889842 (97.45 it/sec) -training >> step=8741400, episode=1458 reward=0.7962784 (466.47 it/sec) -training >> step=8741500, episode=1458 reward=0.7761865 (464.03 it/sec) -training >> step=8741600, episode=1458 reward=0.7888303 (524.87 it/sec) -training >> step=8741700, episode=1458 reward=0.7874919 (540.58 it/sec) -training >> step=8741800, episode=1458 reward=0.7934051 (472.98 it/sec) -training >> step=8741900, episode=1458 reward=0.7964813 (479.58 it/sec) -training >> step=8742000, episode=1458 reward=0.807006 (464.65 it/sec) -training >> step=8742100, episode=1458 reward=0.794724 (488.41 it/sec) -training >> step=8742200, episode=1458 reward=0.7820492 (522.34 it/sec) -training >> step=8742300, episode=1458 reward=0.8001106 (505.32 it/sec) -training >> step=8742400, episode=1458 reward=0.7949635 (473.51 it/sec) -training >> step=8742500, episode=1458 reward=0.7960081 (422.85 it/sec) -training >> step=8742600, episode=1458 reward=0.7834651 (495.84 it/sec) -training >> step=8742700, episode=1458 reward=0.7732335 (497.11 it/sec) -training >> step=8742800, episode=1458 reward=0.8000085 (501.84 it/sec) -training >> step=8742900, episode=1458 reward=0.783687 (460.97 it/sec) -training >> step=8743000, episode=1458 reward=0.7697582 (465.27 it/sec) -training >> step=8743100, episode=1458 reward=0.7945668 (464.63 it/sec) -training >> step=8743200, episode=1458 reward=0.8045555 (484.79 it/sec) -training >> step=8743300, episode=1458 reward=0.7669517 (535.81 it/sec) -training >> step=8743400, episode=1458 reward=0.7926756 (524.69 it/sec) -training >> step=8743500, episode=1458 reward=0.772624 (462.06 it/sec) -training >> step=8743600, episode=1458 reward=0.7913926 (535.50 it/sec) -training >> step=8743700, episode=1458 reward=0.7691621 (542.81 it/sec) -training >> step=8743800, episode=1458 reward=0.790678 (547.34 it/sec) -training >> step=8743900, episode=1458 reward=0.7786081 (522.71 it/sec) -training >> step=8744000, episode=1458 reward=0.7955357 (495.92 it/sec) -training >> step=8744100, episode=1458 reward=0.7834705 (546.76 it/sec) -training >> step=8744200, episode=1458 reward=0.7872266 (504.31 it/sec) -training >> step=8744300, episode=1458 reward=0.8094536 (459.15 it/sec) -training >> step=8744400, episode=1458 reward=0.7976828 (501.48 it/sec) -training >> step=8744500, episode=1458 reward=0.7848381 (443.83 it/sec) -training >> step=8744600, episode=1458 reward=0.7758141 (509.07 it/sec) -training >> step=8744700, episode=1458 reward=0.7946056 (518.79 it/sec) -training >> step=8744800, episode=1458 reward=0.7697657 (570.15 it/sec) -training >> step=8744900, episode=1458 reward=0.7883492 (484.49 it/sec) -training >> step=8745000, episode=1458 reward=0.7842976 (489.73 it/sec) -training >> step=8745100, episode=1458 reward=0.7930218 (496.83 it/sec) -training >> step=8745200, episode=1458 reward=0.7860175 (534.57 it/sec) -training >> step=8745300, episode=1458 reward=0.7795656 (546.23 it/sec) -training >> step=8745400, episode=1458 reward=0.7857935 (519.50 it/sec) -training >> step=8745500, episode=1458 reward=0.8108044 (493.27 it/sec) -training >> step=8745600, episode=1458 reward=0.7958596 (482.95 it/sec) -training >> step=8745700, episode=1458 reward=0.7977142 (514.60 it/sec) -training >> step=8745800, episode=1458 reward=0.7822776 (544.14 it/sec) -training >> step=8745900, episode=1458 reward=0.7820519 (554.74 it/sec) -training >> step=8746000, episode=1458 reward=0.7945896 (508.59 it/sec) -training >> step=8746100, episode=1458 reward=0.7954789 (489.55 it/sec) -training >> step=8746200, episode=1458 reward=0.804854 (553.12 it/sec) -training >> step=8746300, episode=1458 reward=0.776824 (552.87 it/sec) -training >> step=8746400, episode=1458 reward=0.7930509 (536.39 it/sec) -training >> step=8746500, episode=1458 reward=0.7959672 (526.42 it/sec) -training >> step=8746600, episode=1458 reward=0.7900672 (527.48 it/sec) -training >> step=8746700, episode=1458 reward=0.7844751 (476.30 it/sec) -training >> step=8746800, episode=1458 reward=0.7681594 (484.40 it/sec) -training >> step=8746900, episode=1458 reward=0.7957741 (528.23 it/sec) -training >> step=8747000, episode=1458 reward=0.7955869 (514.00 it/sec) -training >> step=8747100, episode=1458 reward=0.7894903 (440.91 it/sec) -training >> step=8747200, episode=1458 reward=0.7658815 (501.77 it/sec) -training >> step=8747300, episode=1459 reward=0.7968668 (121.40 it/sec) -training >> step=8747400, episode=1459 reward=0.7927456 (525.71 it/sec) -training >> step=8747500, episode=1459 reward=0.7985725 (539.42 it/sec) -training >> step=8747600, episode=1459 reward=0.7742957 (520.08 it/sec) -training >> step=8747700, episode=1459 reward=0.7583138 (569.42 it/sec) -training >> step=8747800, episode=1459 reward=0.7943137 (494.68 it/sec) -training >> step=8747900, episode=1459 reward=0.8040414 (471.20 it/sec) -training >> step=8748000, episode=1459 reward=0.8055742 (547.11 it/sec) -training >> step=8748100, episode=1459 reward=0.793706 (538.26 it/sec) -training >> step=8748200, episode=1459 reward=0.7964215 (509.75 it/sec) -training >> step=8748300, episode=1459 reward=0.7793625 (523.85 it/sec) -training >> step=8748400, episode=1459 reward=0.8014868 (530.60 it/sec) -training >> step=8748500, episode=1459 reward=0.8024703 (455.80 it/sec) -training >> step=8748600, episode=1459 reward=0.7681249 (524.20 it/sec) -training >> step=8748700, episode=1459 reward=0.8101782 (536.03 it/sec) -training >> step=8748800, episode=1459 reward=0.7858862 (525.78 it/sec) -training >> step=8748900, episode=1459 reward=0.7869265 (517.12 it/sec) -training >> step=8749000, episode=1459 reward=0.8019779 (493.47 it/sec) -training >> step=8749100, episode=1459 reward=0.7892334 (493.86 it/sec) -training >> step=8749200, episode=1459 reward=0.7977645 (527.44 it/sec) -training >> step=8749300, episode=1459 reward=0.7910993 (527.06 it/sec) -training >> step=8749400, episode=1459 reward=0.8219318 (480.15 it/sec) -training >> step=8749500, episode=1459 reward=0.7945188 (520.19 it/sec) -training >> step=8749600, episode=1459 reward=0.8120762 (530.37 it/sec) -training >> step=8749700, episode=1459 reward=0.7992321 (515.12 it/sec) -training >> step=8749800, episode=1459 reward=0.7875626 (539.65 it/sec) -training >> step=8749900, episode=1459 reward=0.7767093 (529.06 it/sec) -training >> step=8750000, episode=1459 reward=0.7930954 (479.45 it/sec) -training >> step=8750100, episode=1459 reward=0.7931761 (505.50 it/sec) -training >> step=8750200, episode=1459 reward=0.7902881 (539.88 it/sec) -training >> step=8750300, episode=1459 reward=0.7779609 (484.67 it/sec) -training >> step=8750400, episode=1459 reward=0.7880969 (530.40 it/sec) -training >> step=8750500, episode=1459 reward=0.7748903 (493.84 it/sec) -training >> step=8750600, episode=1459 reward=0.7827256 (530.33 it/sec) -training >> step=8750700, episode=1459 reward=0.7962345 (522.00 it/sec) -training >> step=8750800, episode=1459 reward=0.7800379 (539.33 it/sec) -training >> step=8750900, episode=1459 reward=0.7946773 (551.47 it/sec) -training >> step=8751000, episode=1459 reward=0.7891594 (487.82 it/sec) -training >> step=8751100, episode=1459 reward=0.8022936 (483.51 it/sec) -training >> step=8751200, episode=1459 reward=0.7796401 (547.53 it/sec) -training >> step=8751300, episode=1459 reward=0.7851897 (585.72 it/sec) -training >> step=8751400, episode=1459 reward=0.7900769 (538.46 it/sec) -training >> step=8751500, episode=1459 reward=0.7958221 (529.48 it/sec) -training >> step=8751600, episode=1459 reward=0.7939367 (494.32 it/sec) -training >> step=8751700, episode=1459 reward=0.8021148 (521.34 it/sec) -training >> step=8751800, episode=1459 reward=0.7998346 (539.37 it/sec) -training >> step=8751900, episode=1459 reward=0.8161827 (526.77 it/sec) -training >> step=8752000, episode=1459 reward=0.8004386 (558.02 it/sec) -training >> step=8752100, episode=1459 reward=0.7994835 (491.37 it/sec) -training >> step=8752200, episode=1459 reward=0.7859499 (482.12 it/sec) -training >> step=8752300, episode=1459 reward=0.7737026 (543.74 it/sec) -training >> step=8752400, episode=1459 reward=0.793389 (565.67 it/sec) -training >> step=8752500, episode=1459 reward=0.7835268 (516.25 it/sec) -training >> step=8752600, episode=1459 reward=0.8104469 (527.68 it/sec) -training >> step=8752700, episode=1459 reward=0.7769221 (456.09 it/sec) -training >> step=8752800, episode=1459 reward=0.78086 (523.45 it/sec) -training >> step=8752900, episode=1459 reward=0.7777012 (532.60 it/sec) -training >> step=8753000, episode=1459 reward=0.8039117 (507.59 it/sec) -training >> step=8753100, episode=1459 reward=0.8011715 (491.93 it/sec) -training >> step=8753200, episode=1459 reward=0.788654 (446.80 it/sec) -training >> step=8753300, episode=1460 reward=0.7929935 (95.18 it/sec) -training >> step=8753400, episode=1460 reward=0.8079961 (501.66 it/sec) -training >> step=8753500, episode=1460 reward=0.7729145 (495.69 it/sec) -training >> step=8753600, episode=1460 reward=0.7922994 (529.55 it/sec) -training >> step=8753700, episode=1460 reward=0.7756435 (481.22 it/sec) -training >> step=8753800, episode=1460 reward=0.7884783 (430.13 it/sec) -training >> step=8753900, episode=1460 reward=0.7963846 (457.84 it/sec) -training >> step=8754000, episode=1460 reward=0.7794757 (494.76 it/sec) -training >> step=8754100, episode=1460 reward=0.7881374 (533.01 it/sec) -training >> step=8754200, episode=1460 reward=0.7933038 (489.77 it/sec) -training >> step=8754300, episode=1460 reward=0.7947602 (448.96 it/sec) -training >> step=8754400, episode=1460 reward=0.8035471 (505.94 it/sec) -training >> step=8754500, episode=1460 reward=0.7791911 (523.47 it/sec) -training >> step=8754600, episode=1460 reward=0.7890543 (504.71 it/sec) -training >> step=8754700, episode=1460 reward=0.7892165 (488.23 it/sec) -training >> step=8754800, episode=1460 reward=0.8094386 (466.04 it/sec) -training >> step=8754900, episode=1460 reward=0.7797933 (483.20 it/sec) -training >> step=8755000, episode=1460 reward=0.7868115 (462.55 it/sec) -training >> step=8755100, episode=1460 reward=0.8052952 (517.19 it/sec) -training >> step=8755200, episode=1460 reward=0.7940395 (479.38 it/sec) -training >> step=8755300, episode=1460 reward=0.7742095 (409.36 it/sec) -training >> step=8755400, episode=1460 reward=0.8000427 (495.83 it/sec) -training >> step=8755500, episode=1460 reward=0.7919293 (493.76 it/sec) -training >> step=8755600, episode=1460 reward=0.776997 (452.37 it/sec) -training >> step=8755700, episode=1460 reward=0.7694502 (507.76 it/sec) -training >> step=8755800, episode=1460 reward=0.7770256 (448.24 it/sec) -training >> step=8755900, episode=1460 reward=0.7905357 (520.67 it/sec) -training >> step=8756000, episode=1460 reward=0.8097211 (428.94 it/sec) -training >> step=8756100, episode=1460 reward=0.7955351 (509.38 it/sec) -training >> step=8756200, episode=1460 reward=0.7822087 (472.13 it/sec) -training >> step=8756300, episode=1460 reward=0.7763729 (450.19 it/sec) -training >> step=8756400, episode=1460 reward=0.779453 (473.07 it/sec) -training >> step=8756500, episode=1460 reward=0.7921838 (490.81 it/sec) -training >> step=8756600, episode=1460 reward=0.7768067 (503.92 it/sec) -training >> step=8756700, episode=1460 reward=0.799931 (483.55 it/sec) -training >> step=8756800, episode=1460 reward=0.7976633 (412.05 it/sec) -training >> step=8756900, episode=1460 reward=0.7762511 (486.28 it/sec) -training >> step=8757000, episode=1460 reward=0.7856499 (481.76 it/sec) -training >> step=8757100, episode=1460 reward=0.809554 (466.76 it/sec) -training >> step=8757200, episode=1460 reward=0.8078622 (455.82 it/sec) -training >> step=8757300, episode=1460 reward=0.7930824 (450.28 it/sec) -training >> step=8757400, episode=1460 reward=0.8022147 (431.24 it/sec) -training >> step=8757500, episode=1460 reward=0.7722243 (464.93 it/sec) -training >> step=8757600, episode=1460 reward=0.7956479 (472.51 it/sec) -training >> step=8757700, episode=1460 reward=0.7938352 (477.22 it/sec) -training >> step=8757800, episode=1460 reward=0.7563115 (432.75 it/sec) -training >> step=8757900, episode=1460 reward=0.7873141 (463.67 it/sec) -training >> step=8758000, episode=1460 reward=0.7900743 (469.89 it/sec) -training >> step=8758100, episode=1460 reward=0.7997541 (432.08 it/sec) -training >> step=8758200, episode=1460 reward=0.7697142 (434.45 it/sec) -training >> step=8758300, episode=1460 reward=0.7904713 (455.90 it/sec) -training >> step=8758400, episode=1460 reward=0.7783217 (486.88 it/sec) -training >> step=8758500, episode=1460 reward=0.7894539 (448.59 it/sec) -training >> step=8758600, episode=1460 reward=0.8005829 (450.54 it/sec) -training >> step=8758700, episode=1460 reward=0.7893731 (432.94 it/sec) -training >> step=8758800, episode=1460 reward=0.7866709 (443.31 it/sec) -training >> step=8758900, episode=1460 reward=0.7791749 (423.62 it/sec) -training >> step=8759000, episode=1460 reward=0.7727919 (434.04 it/sec) -training >> step=8759100, episode=1460 reward=0.8038123 (380.42 it/sec) -training >> step=8759200, episode=1460 reward=0.8103621 (446.15 it/sec) -training >> step=8759300, episode=1461 reward=0.7799744 (96.69 it/sec) -training >> step=8759400, episode=1461 reward=0.7913975 (545.18 it/sec) -training >> step=8759500, episode=1461 reward=0.7871827 (466.88 it/sec) -training >> step=8759600, episode=1461 reward=0.8009143 (499.45 it/sec) -training >> step=8759700, episode=1461 reward=0.8000368 (413.05 it/sec) -training >> step=8759800, episode=1461 reward=0.7927525 (493.19 it/sec) -training >> step=8759900, episode=1461 reward=0.7943792 (481.78 it/sec) -training >> step=8760000, episode=1461 reward=0.7935427 (478.99 it/sec) -training >> step=8760100, episode=1461 reward=0.7879446 (487.98 it/sec) -training >> step=8760200, episode=1461 reward=0.8166166 (450.75 it/sec) -training >> step=8760300, episode=1461 reward=0.7805777 (462.60 it/sec) -training >> step=8760400, episode=1461 reward=0.7943519 (436.50 it/sec) -training >> step=8760500, episode=1461 reward=0.7993195 (477.77 it/sec) -training >> step=8760600, episode=1461 reward=0.801377 (432.54 it/sec) -training >> step=8760700, episode=1461 reward=0.7857593 (432.80 it/sec) -training >> step=8760800, episode=1461 reward=0.7782368 (456.16 it/sec) -training >> step=8760900, episode=1461 reward=0.7924761 (448.84 it/sec) -training >> step=8761000, episode=1461 reward=0.7823476 (408.59 it/sec) -training >> step=8761100, episode=1461 reward=0.7894163 (444.08 it/sec) -training >> step=8761200, episode=1461 reward=0.7802206 (397.52 it/sec) -training >> step=8761300, episode=1461 reward=0.8139343 (481.53 it/sec) -training >> step=8761400, episode=1461 reward=0.7828905 (458.54 it/sec) -training >> step=8761500, episode=1461 reward=0.8001873 (470.24 it/sec) -training >> step=8761600, episode=1461 reward=0.7713739 (452.14 it/sec) -training >> step=8761700, episode=1461 reward=0.7825733 (431.49 it/sec) -training >> step=8761800, episode=1461 reward=0.7706174 (468.08 it/sec) -training >> step=8761900, episode=1461 reward=0.7839212 (465.07 it/sec) -training >> step=8762000, episode=1461 reward=0.7791668 (478.76 it/sec) -training >> step=8762100, episode=1461 reward=0.7793348 (459.76 it/sec) -training >> step=8762200, episode=1461 reward=0.7926775 (472.36 it/sec) -training >> step=8762300, episode=1461 reward=0.7793851 (438.95 it/sec) -training >> step=8762400, episode=1461 reward=0.7794 (460.53 it/sec) -training >> step=8762500, episode=1461 reward=0.8038145 (439.78 it/sec) -training >> step=8762600, episode=1461 reward=0.8120943 (427.67 it/sec) -training >> step=8762700, episode=1461 reward=0.8239917 (482.58 it/sec) -training >> step=8762800, episode=1461 reward=0.7895817 (424.12 it/sec) -training >> step=8762900, episode=1461 reward=0.8049568 (435.62 it/sec) -training >> step=8763000, episode=1461 reward=0.7638632 (439.75 it/sec) -training >> step=8763100, episode=1461 reward=0.7922927 (461.52 it/sec) -training >> step=8763200, episode=1461 reward=0.7866843 (497.03 it/sec) -training >> step=8763300, episode=1461 reward=0.7802096 (440.92 it/sec) -training >> step=8763400, episode=1461 reward=0.8107066 (497.67 it/sec) -training >> step=8763500, episode=1461 reward=0.7712421 (418.45 it/sec) -training >> step=8763600, episode=1461 reward=0.7949836 (466.35 it/sec) -training >> step=8763700, episode=1461 reward=0.7874601 (499.54 it/sec) -training >> step=8763800, episode=1461 reward=0.7840139 (475.93 it/sec) -training >> step=8763900, episode=1461 reward=0.778051 (459.43 it/sec) -training >> step=8764000, episode=1461 reward=0.8084507 (434.81 it/sec) -training >> step=8764100, episode=1461 reward=0.7963436 (483.96 it/sec) -training >> step=8764200, episode=1461 reward=0.7980587 (457.54 it/sec) -training >> step=8764300, episode=1461 reward=0.8025336 (489.79 it/sec) -training >> step=8764400, episode=1461 reward=0.7964241 (461.03 it/sec) -training >> step=8764500, episode=1461 reward=0.7855518 (472.01 it/sec) -training >> step=8764600, episode=1461 reward=0.8037433 (468.55 it/sec) -training >> step=8764700, episode=1461 reward=0.8025685 (426.54 it/sec) -training >> step=8764800, episode=1461 reward=0.7857583 (485.83 it/sec) -training >> step=8764900, episode=1461 reward=0.8102535 (509.26 it/sec) -training >> step=8765000, episode=1461 reward=0.8017642 (421.35 it/sec) -training >> step=8765100, episode=1461 reward=0.7718781 (448.59 it/sec) -training >> step=8765200, episode=1461 reward=0.7665403 (494.20 it/sec) -training >> step=8765300, episode=1462 reward=0.7930763 (97.67 it/sec) -training >> step=8765400, episode=1462 reward=0.8036023 (345.26 it/sec) -training >> step=8765500, episode=1462 reward=0.7972156 (473.27 it/sec) -training >> step=8765600, episode=1462 reward=0.7879542 (528.21 it/sec) -training >> step=8765700, episode=1462 reward=0.7943124 (476.76 it/sec) -training >> step=8765800, episode=1462 reward=0.7919477 (488.81 it/sec) -training >> step=8765900, episode=1462 reward=0.7907596 (509.96 it/sec) -training >> step=8766000, episode=1462 reward=0.7971674 (438.28 it/sec) -training >> step=8766100, episode=1462 reward=0.7829015 (491.34 it/sec) -training >> step=8766200, episode=1462 reward=0.7964334 (523.48 it/sec) -training >> step=8766300, episode=1462 reward=0.7992934 (493.70 it/sec) -training >> step=8766400, episode=1462 reward=0.7743832 (491.59 it/sec) -training >> step=8766500, episode=1462 reward=0.7924955 (467.62 it/sec) -training >> step=8766600, episode=1462 reward=0.7801394 (462.72 it/sec) -training >> step=8766700, episode=1462 reward=0.7911512 (475.77 it/sec) -training >> step=8766800, episode=1462 reward=0.7630897 (485.26 it/sec) -training >> step=8766900, episode=1462 reward=0.7745628 (473.70 it/sec) -training >> step=8767000, episode=1462 reward=0.7907664 (460.62 it/sec) -training >> step=8767100, episode=1462 reward=0.7916279 (429.25 it/sec) -training >> step=8767200, episode=1462 reward=0.7889946 (431.98 it/sec) -training >> step=8767300, episode=1462 reward=0.7776451 (481.12 it/sec) -training >> step=8767400, episode=1462 reward=0.8013776 (441.08 it/sec) -training >> step=8767500, episode=1462 reward=0.8125852 (435.58 it/sec) -training >> step=8767600, episode=1462 reward=0.8006638 (424.83 it/sec) -training >> step=8767700, episode=1462 reward=0.799537 (441.32 it/sec) -training >> step=8767800, episode=1462 reward=0.8007989 (467.82 it/sec) -training >> step=8767900, episode=1462 reward=0.8013384 (503.88 it/sec) -training >> step=8768000, episode=1462 reward=0.7785863 (442.59 it/sec) -training >> step=8768100, episode=1462 reward=0.7809124 (465.65 it/sec) -training >> step=8768200, episode=1462 reward=0.7906294 (475.31 it/sec) -training >> step=8768300, episode=1462 reward=0.786674 (478.43 it/sec) -training >> step=8768400, episode=1462 reward=0.7868773 (484.72 it/sec) -training >> step=8768500, episode=1462 reward=0.7875089 (412.46 it/sec) -training >> step=8768600, episode=1462 reward=0.7771733 (492.39 it/sec) -training >> step=8768700, episode=1462 reward=0.7861825 (472.73 it/sec) -training >> step=8768800, episode=1462 reward=0.78409 (421.94 it/sec) -training >> step=8768900, episode=1462 reward=0.7969243 (489.91 it/sec) -training >> step=8769000, episode=1462 reward=0.7835025 (494.54 it/sec) -training >> step=8769100, episode=1462 reward=0.7798517 (481.99 it/sec) -training >> step=8769200, episode=1462 reward=0.7899699 (507.61 it/sec) -training >> step=8769300, episode=1462 reward=0.776376 (504.82 it/sec) -training >> step=8769400, episode=1462 reward=0.7843243 (549.78 it/sec) -training >> step=8769500, episode=1462 reward=0.8151618 (487.93 it/sec) -training >> step=8769600, episode=1462 reward=0.7880994 (489.90 it/sec) -training >> step=8769700, episode=1462 reward=0.7775145 (531.90 it/sec) -training >> step=8769800, episode=1462 reward=0.7972165 (508.68 it/sec) -training >> step=8769900, episode=1462 reward=0.7818121 (504.56 it/sec) -training >> step=8770000, episode=1462 reward=0.7856102 (411.44 it/sec) -training >> step=8770100, episode=1462 reward=0.7799218 (477.77 it/sec) -training >> step=8770200, episode=1462 reward=0.8102811 (498.05 it/sec) -training >> step=8770300, episode=1462 reward=0.7983802 (502.32 it/sec) -training >> step=8770400, episode=1462 reward=0.7896154 (547.47 it/sec) -training >> step=8770500, episode=1462 reward=0.8107368 (486.30 it/sec) -training >> step=8770600, episode=1462 reward=0.7730359 (493.90 it/sec) -training >> step=8770700, episode=1462 reward=0.7964858 (522.12 it/sec) -training >> step=8770800, episode=1462 reward=0.8112784 (540.65 it/sec) -training >> step=8770900, episode=1462 reward=0.7877382 (506.16 it/sec) -training >> step=8771000, episode=1462 reward=0.7965708 (506.73 it/sec) -training >> step=8771100, episode=1462 reward=0.7894226 (510.95 it/sec) -training >> step=8771200, episode=1462 reward=0.793161 (531.89 it/sec) -training >> step=8771300, episode=1463 reward=0.7765303 (123.70 it/sec) -training >> step=8771400, episode=1463 reward=0.7965017 (502.98 it/sec) -training >> step=8771500, episode=1463 reward=0.7915503 (530.53 it/sec) -training >> step=8771600, episode=1463 reward=0.7960062 (484.78 it/sec) -training >> step=8771700, episode=1463 reward=0.7775998 (491.22 it/sec) -training >> step=8771800, episode=1463 reward=0.8102167 (485.74 it/sec) -training >> step=8771900, episode=1463 reward=0.8017083 (537.19 it/sec) -training >> step=8772000, episode=1463 reward=0.8039061 (482.63 it/sec) -training >> step=8772100, episode=1463 reward=0.7814762 (512.96 it/sec) -training >> step=8772200, episode=1463 reward=0.8110629 (522.47 it/sec) -training >> step=8772300, episode=1463 reward=0.7854615 (533.36 it/sec) -training >> step=8772400, episode=1463 reward=0.7997985 (512.40 it/sec) -training >> step=8772500, episode=1463 reward=0.8014359 (517.61 it/sec) -training >> step=8772600, episode=1463 reward=0.7980233 (476.40 it/sec) -training >> step=8772700, episode=1463 reward=0.7797099 (545.78 it/sec) -training >> step=8772800, episode=1463 reward=0.791344 (477.38 it/sec) -training >> step=8772900, episode=1463 reward=0.7935433 (535.49 it/sec) -training >> step=8773000, episode=1463 reward=0.8033354 (523.07 it/sec) -training >> step=8773100, episode=1463 reward=0.7984025 (487.40 it/sec) -training >> step=8773200, episode=1463 reward=0.7985353 (492.49 it/sec) -training >> step=8773300, episode=1463 reward=0.7849575 (542.09 it/sec) -training >> step=8773400, episode=1463 reward=0.7952859 (498.77 it/sec) -training >> step=8773500, episode=1463 reward=0.7714107 (527.37 it/sec) -training >> step=8773600, episode=1463 reward=0.7842764 (508.29 it/sec) -training >> step=8773700, episode=1463 reward=0.7965322 (517.05 it/sec) -training >> step=8773800, episode=1463 reward=0.7824103 (515.25 it/sec) -training >> step=8773900, episode=1463 reward=0.8024519 (428.55 it/sec) -training >> step=8774000, episode=1463 reward=0.8105139 (473.11 it/sec) -training >> step=8774100, episode=1463 reward=0.7930283 (429.89 it/sec) -training >> step=8774200, episode=1463 reward=0.8009447 (462.93 it/sec) -training >> step=8774300, episode=1463 reward=0.7914392 (434.12 it/sec) -training >> step=8774400, episode=1463 reward=0.8155601 (474.26 it/sec) -training >> step=8774500, episode=1463 reward=0.7832024 (437.98 it/sec) -training >> step=8774600, episode=1463 reward=0.7726121 (445.03 it/sec) -training >> step=8774700, episode=1463 reward=0.7743757 (467.30 it/sec) -training >> step=8774800, episode=1463 reward=0.7854453 (492.59 it/sec) -training >> step=8774900, episode=1463 reward=0.796895 (455.14 it/sec) -training >> step=8775000, episode=1463 reward=0.7951949 (466.77 it/sec) -training >> step=8775100, episode=1463 reward=0.7887275 (455.16 it/sec) -training >> step=8775200, episode=1463 reward=0.8042836 (484.86 it/sec) -training >> step=8775300, episode=1463 reward=0.8104764 (434.21 it/sec) -training >> step=8775400, episode=1463 reward=0.8064644 (402.19 it/sec) -training >> step=8775500, episode=1463 reward=0.8165812 (429.77 it/sec) -training >> step=8775600, episode=1463 reward=0.8038689 (458.72 it/sec) -training >> step=8775700, episode=1463 reward=0.8000339 (422.51 it/sec) -training >> step=8775800, episode=1463 reward=0.7928061 (460.17 it/sec) -training >> step=8775900, episode=1463 reward=0.7903717 (520.44 it/sec) -training >> step=8776000, episode=1463 reward=0.7905322 (459.30 it/sec) -training >> step=8776100, episode=1463 reward=0.7995501 (460.32 it/sec) -training >> step=8776200, episode=1463 reward=0.7862737 (451.31 it/sec) -training >> step=8776300, episode=1463 reward=0.7931234 (455.65 it/sec) -training >> step=8776400, episode=1463 reward=0.8021239 (462.39 it/sec) -training >> step=8776500, episode=1463 reward=0.7849555 (450.55 it/sec) -training >> step=8776600, episode=1463 reward=0.7811778 (442.61 it/sec) -training >> step=8776700, episode=1463 reward=0.7786298 (424.32 it/sec) -training >> step=8776800, episode=1463 reward=0.773553 (450.35 it/sec) -training >> step=8776900, episode=1463 reward=0.7881108 (472.92 it/sec) -training >> step=8777000, episode=1463 reward=0.7658693 (485.83 it/sec) -training >> step=8777100, episode=1463 reward=0.7861671 (493.79 it/sec) -training >> step=8777200, episode=1463 reward=0.7829886 (481.20 it/sec) -training >> step=8777300, episode=1464 reward=0.7761503 (99.43 it/sec) -training >> step=8777400, episode=1464 reward=0.7717547 (498.11 it/sec) -training >> step=8777500, episode=1464 reward=0.7647161 (465.74 it/sec) -training >> step=8777600, episode=1464 reward=0.7817596 (527.50 it/sec) -training >> step=8777700, episode=1464 reward=0.7803933 (486.77 it/sec) -training >> step=8777800, episode=1464 reward=0.7749888 (472.59 it/sec) -training >> step=8777900, episode=1464 reward=0.7815755 (498.60 it/sec) -training >> step=8778000, episode=1464 reward=0.7904211 (507.04 it/sec) -training >> step=8778100, episode=1464 reward=0.7899356 (502.10 it/sec) -training >> step=8778200, episode=1464 reward=0.8098865 (465.86 it/sec) -training >> step=8778300, episode=1464 reward=0.7940325 (488.22 it/sec) -training >> step=8778400, episode=1464 reward=0.7986844 (450.44 it/sec) -training >> step=8778500, episode=1464 reward=0.7966734 (420.45 it/sec) -training >> step=8778600, episode=1464 reward=0.8012158 (507.64 it/sec) -training >> step=8778700, episode=1464 reward=0.8041232 (545.40 it/sec) -training >> step=8778800, episode=1464 reward=0.7912468 (490.25 it/sec) -training >> step=8778900, episode=1464 reward=0.7855127 (454.51 it/sec) -training >> step=8779000, episode=1464 reward=0.7738931 (504.77 it/sec) -training >> step=8779100, episode=1464 reward=0.7916446 (488.56 it/sec) -training >> step=8779200, episode=1464 reward=0.7664264 (484.99 it/sec) -training >> step=8779300, episode=1464 reward=0.7877269 (480.21 it/sec) -training >> step=8779400, episode=1464 reward=0.8051783 (563.61 it/sec) -training >> step=8779500, episode=1464 reward=0.7901438 (499.58 it/sec) -training >> step=8779600, episode=1464 reward=0.7883906 (531.99 it/sec) -training >> step=8779700, episode=1464 reward=0.8014597 (520.60 it/sec) -training >> step=8779800, episode=1464 reward=0.7739127 (524.67 it/sec) -training >> step=8779900, episode=1464 reward=0.7778531 (506.65 it/sec) -training >> step=8780000, episode=1464 reward=0.7878689 (528.72 it/sec) -training >> step=8780100, episode=1464 reward=0.7810109 (549.64 it/sec) -training >> step=8780200, episode=1464 reward=0.7726273 (497.23 it/sec) -training >> step=8780300, episode=1464 reward=0.7950034 (442.64 it/sec) -training >> step=8780400, episode=1464 reward=0.7722543 (536.37 it/sec) -training >> step=8780500, episode=1464 reward=0.7671376 (521.44 it/sec) -training >> step=8780600, episode=1464 reward=0.7860852 (540.05 it/sec) -training >> step=8780700, episode=1464 reward=0.7987522 (521.68 it/sec) -training >> step=8780800, episode=1464 reward=0.7766299 (509.53 it/sec) -training >> step=8780900, episode=1464 reward=0.7990783 (467.03 it/sec) -training >> step=8781000, episode=1464 reward=0.7752773 (471.64 it/sec) -training >> step=8781100, episode=1464 reward=0.7951602 (488.79 it/sec) -training >> step=8781200, episode=1464 reward=0.7934748 (485.42 it/sec) -training >> step=8781300, episode=1464 reward=0.7741997 (501.82 it/sec) -training >> step=8781400, episode=1464 reward=0.7969627 (432.04 it/sec) -training >> step=8781500, episode=1464 reward=0.7701942 (476.11 it/sec) -training >> step=8781600, episode=1464 reward=0.7792439 (484.79 it/sec) -training >> step=8781700, episode=1464 reward=0.7897361 (515.80 it/sec) -training >> step=8781800, episode=1464 reward=0.776337 (505.95 it/sec) -training >> step=8781900, episode=1464 reward=0.795217 (513.02 it/sec) -training >> step=8782000, episode=1464 reward=0.7931558 (530.17 it/sec) -training >> step=8782100, episode=1464 reward=0.7875883 (523.17 it/sec) -training >> step=8782200, episode=1464 reward=0.7817007 (543.63 it/sec) -training >> step=8782300, episode=1464 reward=0.7996337 (516.17 it/sec) -training >> step=8782400, episode=1464 reward=0.7835882 (521.85 it/sec) -training >> step=8782500, episode=1464 reward=0.7869578 (520.58 it/sec) -training >> step=8782600, episode=1464 reward=0.7875744 (543.29 it/sec) -training >> step=8782700, episode=1464 reward=0.7982782 (507.80 it/sec) -training >> step=8782800, episode=1464 reward=0.7967401 (506.24 it/sec) -training >> step=8782900, episode=1464 reward=0.7974045 (492.32 it/sec) -training >> step=8783000, episode=1464 reward=0.8076364 (545.44 it/sec) -training >> step=8783100, episode=1464 reward=0.8014585 (514.64 it/sec) -training >> step=8783200, episode=1464 reward=0.7880181 (502.00 it/sec) -training >> step=8783300, episode=1465 reward=0.7956856 (131.28 it/sec) -training >> step=8783400, episode=1465 reward=0.7842255 (518.97 it/sec) -training >> step=8783500, episode=1465 reward=0.803943 (488.98 it/sec) -training >> step=8783600, episode=1465 reward=0.7678679 (472.02 it/sec) -training >> step=8783700, episode=1465 reward=0.7897565 (477.34 it/sec) -training >> step=8783800, episode=1465 reward=0.8011574 (485.27 it/sec) -training >> step=8783900, episode=1465 reward=0.7882224 (513.49 it/sec) -training >> step=8784000, episode=1465 reward=0.7911599 (475.88 it/sec) -training >> step=8784100, episode=1465 reward=0.806973 (491.47 it/sec) -training >> step=8784200, episode=1465 reward=0.7764516 (473.87 it/sec) -training >> step=8784300, episode=1465 reward=0.7827926 (489.44 it/sec) -training >> step=8784400, episode=1465 reward=0.7751257 (521.33 it/sec) -training >> step=8784500, episode=1465 reward=0.7903413 (521.13 it/sec) -training >> step=8784600, episode=1465 reward=0.7869419 (509.74 it/sec) -training >> step=8784700, episode=1465 reward=0.791208 (520.82 it/sec) -training >> step=8784800, episode=1465 reward=0.7888564 (540.88 it/sec) -training >> step=8784900, episode=1465 reward=0.7833281 (510.35 it/sec) -training >> step=8785000, episode=1465 reward=0.7813103 (469.79 it/sec) -training >> step=8785100, episode=1465 reward=0.80067 (532.31 it/sec) -training >> step=8785200, episode=1465 reward=0.817096 (505.78 it/sec) -training >> step=8785300, episode=1465 reward=0.7803695 (487.96 it/sec) -training >> step=8785400, episode=1465 reward=0.7768286 (524.43 it/sec) -training >> step=8785500, episode=1465 reward=0.7845138 (492.00 it/sec) -training >> step=8785600, episode=1465 reward=0.7769956 (502.86 it/sec) -training >> step=8785700, episode=1465 reward=0.8096158 (488.65 it/sec) -training >> step=8785800, episode=1465 reward=0.8093051 (472.04 it/sec) -training >> step=8785900, episode=1465 reward=0.7924896 (523.46 it/sec) -training >> step=8786000, episode=1465 reward=0.7818316 (517.81 it/sec) -training >> step=8786100, episode=1465 reward=0.8101858 (534.54 it/sec) -training >> step=8786200, episode=1465 reward=0.787663 (421.44 it/sec) -training >> step=8786300, episode=1465 reward=0.7928855 (502.77 it/sec) -training >> step=8786400, episode=1465 reward=0.7731869 (520.48 it/sec) -training >> step=8786500, episode=1465 reward=0.7786169 (493.86 it/sec) -training >> step=8786600, episode=1465 reward=0.7754675 (560.58 it/sec) -training >> step=8786700, episode=1465 reward=0.7814902 (523.99 it/sec) -training >> step=8786800, episode=1465 reward=0.7932035 (498.98 it/sec) -training >> step=8786900, episode=1465 reward=0.8036326 (519.67 it/sec) -training >> step=8787000, episode=1465 reward=0.7939733 (533.11 it/sec) -training >> step=8787100, episode=1465 reward=0.7953751 (510.98 it/sec) -training >> step=8787200, episode=1465 reward=0.8078672 (491.85 it/sec) -training >> step=8787300, episode=1465 reward=0.8107557 (534.12 it/sec) -training >> step=8787400, episode=1465 reward=0.8157459 (465.13 it/sec) -training >> step=8787500, episode=1465 reward=0.8001423 (534.97 it/sec) -training >> step=8787600, episode=1465 reward=0.7906917 (487.42 it/sec) -training >> step=8787700, episode=1465 reward=0.796446 (539.58 it/sec) -training >> step=8787800, episode=1465 reward=0.7954686 (518.99 it/sec) -training >> step=8787900, episode=1465 reward=0.7790965 (549.19 it/sec) -training >> step=8788000, episode=1465 reward=0.7902119 (481.95 it/sec) -training >> step=8788100, episode=1465 reward=0.8009648 (464.21 it/sec) -training >> step=8788200, episode=1465 reward=0.7966856 (500.07 it/sec) -training >> step=8788300, episode=1465 reward=0.7773397 (428.58 it/sec) -training >> step=8788400, episode=1465 reward=0.7939954 (469.90 it/sec) -training >> step=8788500, episode=1465 reward=0.768953 (465.70 it/sec) -training >> step=8788600, episode=1465 reward=0.8041735 (401.98 it/sec) -training >> step=8788700, episode=1465 reward=0.7800195 (454.18 it/sec) -training >> step=8788800, episode=1465 reward=0.7969276 (471.75 it/sec) -training >> step=8788900, episode=1465 reward=0.7993798 (483.72 it/sec) -training >> step=8789000, episode=1465 reward=0.797645 (500.36 it/sec) -training >> step=8789100, episode=1465 reward=0.8011239 (500.35 it/sec) -training >> step=8789200, episode=1465 reward=0.7900393 (511.14 it/sec) -training >> step=8789300, episode=1466 reward=0.7754622 (114.31 it/sec) -training >> step=8789400, episode=1466 reward=0.7865176 (476.45 it/sec) -training >> step=8789500, episode=1466 reward=0.794871 (483.42 it/sec) -training >> step=8789600, episode=1466 reward=0.7932959 (454.81 it/sec) -training >> step=8789700, episode=1466 reward=0.7851324 (454.26 it/sec) -training >> step=8789800, episode=1466 reward=0.7667525 (418.33 it/sec) -training >> step=8789900, episode=1466 reward=0.8018091 (463.00 it/sec) -training >> step=8790000, episode=1466 reward=0.7845992 (482.04 it/sec) -training >> step=8790100, episode=1466 reward=0.7876775 (500.60 it/sec) -training >> step=8790200, episode=1466 reward=0.7872815 (481.79 it/sec) -training >> step=8790300, episode=1466 reward=0.7734733 (462.99 it/sec) -training >> step=8790400, episode=1466 reward=0.7883466 (438.60 it/sec) -training >> step=8790500, episode=1466 reward=0.7814146 (447.49 it/sec) -training >> step=8790600, episode=1466 reward=0.7977442 (484.79 it/sec) -training >> step=8790700, episode=1466 reward=0.7857023 (450.48 it/sec) -training >> step=8790800, episode=1466 reward=0.787753 (410.46 it/sec) -training >> step=8790900, episode=1466 reward=0.787473 (440.61 it/sec) -training >> step=8791000, episode=1466 reward=0.8018328 (469.08 it/sec) -training >> step=8791100, episode=1466 reward=0.7800715 (399.42 it/sec) -training >> step=8791200, episode=1466 reward=0.7976668 (435.35 it/sec) -training >> step=8791300, episode=1466 reward=0.7815723 (462.75 it/sec) -training >> step=8791400, episode=1466 reward=0.7912197 (428.30 it/sec) -training >> step=8791500, episode=1466 reward=0.7833666 (412.68 it/sec) -training >> step=8791600, episode=1466 reward=0.7771887 (456.58 it/sec) -training >> step=8791700, episode=1466 reward=0.7798173 (456.54 it/sec) -training >> step=8791800, episode=1466 reward=0.8001317 (418.76 it/sec) -training >> step=8791900, episode=1466 reward=0.7787668 (470.79 it/sec) -training >> step=8792000, episode=1466 reward=0.7864449 (430.30 it/sec) -training >> step=8792100, episode=1466 reward=0.8133029 (491.77 it/sec) -training >> step=8792200, episode=1466 reward=0.7950603 (479.67 it/sec) -training >> step=8792300, episode=1466 reward=0.7945202 (462.92 it/sec) -training >> step=8792400, episode=1466 reward=0.7816508 (434.30 it/sec) -training >> step=8792500, episode=1466 reward=0.7769699 (434.73 it/sec) -training >> step=8792600, episode=1466 reward=0.7678368 (403.59 it/sec) -training >> step=8792700, episode=1466 reward=0.7931031 (387.06 it/sec) -training >> step=8792800, episode=1466 reward=0.794954 (457.27 it/sec) -training >> step=8792900, episode=1466 reward=0.7874642 (407.00 it/sec) -training >> step=8793000, episode=1466 reward=0.8019254 (356.35 it/sec) -training >> step=8793100, episode=1466 reward=0.7896241 (422.21 it/sec) -training >> step=8793200, episode=1466 reward=0.803912 (433.04 it/sec) -training >> step=8793300, episode=1466 reward=0.7889591 (491.83 it/sec) -training >> step=8793400, episode=1466 reward=0.795084 (467.23 it/sec) -training >> step=8793500, episode=1466 reward=0.8151264 (478.11 it/sec) -training >> step=8793600, episode=1466 reward=0.8031341 (441.41 it/sec) -training >> step=8793700, episode=1466 reward=0.8032983 (447.68 it/sec) -training >> step=8793800, episode=1466 reward=0.7916939 (461.23 it/sec) -training >> step=8793900, episode=1466 reward=0.8042136 (485.68 it/sec) -training >> step=8794000, episode=1466 reward=0.7913044 (461.38 it/sec) -training >> step=8794100, episode=1466 reward=0.7928454 (439.35 it/sec) -training >> step=8794200, episode=1466 reward=0.8045348 (471.75 it/sec) -training >> step=8794300, episode=1466 reward=0.8004741 (462.71 it/sec) -training >> step=8794400, episode=1466 reward=0.805378 (460.49 it/sec) -training >> step=8794500, episode=1466 reward=0.7783275 (470.77 it/sec) -training >> step=8794600, episode=1466 reward=0.8078151 (482.68 it/sec) -training >> step=8794700, episode=1466 reward=0.7823287 (412.88 it/sec) -training >> step=8794800, episode=1466 reward=0.7841038 (417.75 it/sec) -training >> step=8794900, episode=1466 reward=0.7826789 (482.68 it/sec) -training >> step=8795000, episode=1466 reward=0.7817591 (449.25 it/sec) -training >> step=8795100, episode=1466 reward=0.7854005 (453.63 it/sec) -training >> step=8795200, episode=1466 reward=0.7916913 (461.80 it/sec) -training >> step=8795300, episode=1467 reward=0.7866572 (89.44 it/sec) -training >> step=8795400, episode=1467 reward=0.778166 (396.23 it/sec) -training >> step=8795500, episode=1467 reward=0.7992157 (427.39 it/sec) -training >> step=8795600, episode=1467 reward=0.7933243 (474.42 it/sec) -training >> step=8795700, episode=1467 reward=0.7743197 (492.22 it/sec) -training >> step=8795800, episode=1467 reward=0.7752379 (490.99 it/sec) -training >> step=8795900, episode=1467 reward=0.7890879 (459.51 it/sec) -training >> step=8796000, episode=1467 reward=0.7823632 (453.48 it/sec) -training >> step=8796100, episode=1467 reward=0.797632 (483.75 it/sec) -training >> step=8796200, episode=1467 reward=0.7781108 (467.16 it/sec) -training >> step=8796300, episode=1467 reward=0.7909442 (461.38 it/sec) -training >> step=8796400, episode=1467 reward=0.8011596 (505.95 it/sec) -training >> step=8796500, episode=1467 reward=0.785567 (430.29 it/sec) -training >> step=8796600, episode=1467 reward=0.7838227 (459.83 it/sec) -training >> step=8796700, episode=1467 reward=0.7846471 (411.39 it/sec) -training >> step=8796800, episode=1467 reward=0.7888579 (459.08 it/sec) -training >> step=8796900, episode=1467 reward=0.7605994 (432.27 it/sec) -training >> step=8797000, episode=1467 reward=0.7975699 (433.83 it/sec) -training >> step=8797100, episode=1467 reward=0.804833 (426.94 it/sec) -training >> step=8797200, episode=1467 reward=0.7864631 (445.74 it/sec) -training >> step=8797300, episode=1467 reward=0.7738745 (436.46 it/sec) -training >> step=8797400, episode=1467 reward=0.7804189 (435.63 it/sec) -training >> step=8797500, episode=1467 reward=0.7876537 (471.86 it/sec) -training >> step=8797600, episode=1467 reward=0.7993345 (465.32 it/sec) -training >> step=8797700, episode=1467 reward=0.7852967 (457.60 it/sec) -training >> step=8797800, episode=1467 reward=0.7938821 (499.46 it/sec) -training >> step=8797900, episode=1467 reward=0.8085129 (447.29 it/sec) -training >> step=8798000, episode=1467 reward=0.7790809 (452.10 it/sec) -training >> step=8798100, episode=1467 reward=0.7868878 (437.92 it/sec) -training >> step=8798200, episode=1467 reward=0.8009857 (476.85 it/sec) -training >> step=8798300, episode=1467 reward=0.7812467 (427.96 it/sec) -training >> step=8798400, episode=1467 reward=0.7740281 (444.89 it/sec) -training >> step=8798500, episode=1467 reward=0.7937128 (465.44 it/sec) -training >> step=8798600, episode=1467 reward=0.8111693 (476.11 it/sec) -training >> step=8798700, episode=1467 reward=0.8054343 (472.80 it/sec) -training >> step=8798800, episode=1467 reward=0.8114185 (453.80 it/sec) -training >> step=8798900, episode=1467 reward=0.7875132 (467.86 it/sec) -training >> step=8799000, episode=1467 reward=0.7854373 (422.89 it/sec) -training >> step=8799100, episode=1467 reward=0.8035576 (436.13 it/sec) -training >> step=8799200, episode=1467 reward=0.7762195 (457.35 it/sec) -training >> step=8799300, episode=1467 reward=0.7775053 (439.49 it/sec) -training >> step=8799400, episode=1467 reward=0.7871082 (483.21 it/sec) -training >> step=8799500, episode=1467 reward=0.7830909 (451.32 it/sec) -training >> step=8799600, episode=1467 reward=0.7961904 (463.84 it/sec) -training >> step=8799700, episode=1467 reward=0.8043447 (450.99 it/sec) -training >> step=8799800, episode=1467 reward=0.7804928 (482.10 it/sec) -training >> step=8799900, episode=1467 reward=0.7754476 (479.75 it/sec) -training >> step=8800000, episode=1467 reward=0.7814425 (464.77 it/sec) -training >> step=8800100, episode=1467 reward=0.7906117 (435.23 it/sec) -training >> step=8800200, episode=1467 reward=0.8107915 (461.51 it/sec) -training >> step=8800300, episode=1467 reward=0.811115 (452.82 it/sec) -training >> step=8800400, episode=1467 reward=0.7868005 (452.92 it/sec) -training >> step=8800500, episode=1467 reward=0.7953452 (494.63 it/sec) -training >> step=8800600, episode=1467 reward=0.800092 (474.49 it/sec) -training >> step=8800700, episode=1467 reward=0.7960368 (442.68 it/sec) -training >> step=8800800, episode=1467 reward=0.7732136 (482.25 it/sec) -training >> step=8800900, episode=1467 reward=0.7854661 (447.80 it/sec) -training >> step=8801000, episode=1467 reward=0.7838639 (455.58 it/sec) -training >> step=8801100, episode=1467 reward=0.7904419 (462.76 it/sec) -training >> step=8801200, episode=1467 reward=0.7991891 (432.45 it/sec) -training >> step=8801300, episode=1468 reward=0.7930613 (66.39 it/sec) -training >> step=8801400, episode=1468 reward=0.7723467 (467.15 it/sec) -training >> step=8801500, episode=1468 reward=0.7954621 (472.87 it/sec) -training >> step=8801600, episode=1468 reward=0.7991502 (469.14 it/sec) -training >> step=8801700, episode=1468 reward=0.7854735 (429.54 it/sec) -training >> step=8801800, episode=1468 reward=0.7779727 (472.05 it/sec) -training >> step=8801900, episode=1468 reward=0.7974074 (428.80 it/sec) -training >> step=8802000, episode=1468 reward=0.7798812 (447.27 it/sec) -training >> step=8802100, episode=1468 reward=0.7930971 (460.31 it/sec) -training >> step=8802200, episode=1468 reward=0.7824007 (487.28 it/sec) -training >> step=8802300, episode=1468 reward=0.7847039 (496.17 it/sec) -training >> step=8802400, episode=1468 reward=0.7752207 (445.54 it/sec) -training >> step=8802500, episode=1468 reward=0.7912428 (449.89 it/sec) -training >> step=8802600, episode=1468 reward=0.7876329 (461.33 it/sec) -training >> step=8802700, episode=1468 reward=0.7722331 (470.47 it/sec) -training >> step=8802800, episode=1468 reward=0.7844593 (447.57 it/sec) -training >> step=8802900, episode=1468 reward=0.7850769 (453.51 it/sec) -training >> step=8803000, episode=1468 reward=0.7725423 (438.17 it/sec) -training >> step=8803100, episode=1468 reward=0.790734 (372.49 it/sec) -training >> step=8803200, episode=1468 reward=0.8012466 (449.37 it/sec) -training >> step=8803300, episode=1468 reward=0.7897784 (423.88 it/sec) -training >> step=8803400, episode=1468 reward=0.7729857 (410.17 it/sec) -training >> step=8803500, episode=1468 reward=0.8082892 (439.59 it/sec) -training >> step=8803600, episode=1468 reward=0.8044106 (426.01 it/sec) -training >> step=8803700, episode=1468 reward=0.7930019 (438.56 it/sec) -training >> step=8803800, episode=1468 reward=0.795465 (467.11 it/sec) -training >> step=8803900, episode=1468 reward=0.7785331 (462.80 it/sec) -training >> step=8804000, episode=1468 reward=0.8030807 (468.76 it/sec) -training >> step=8804100, episode=1468 reward=0.7930811 (456.93 it/sec) -training >> step=8804200, episode=1468 reward=0.7911716 (453.76 it/sec) -training >> step=8804300, episode=1468 reward=0.7874302 (494.69 it/sec) -training >> step=8804400, episode=1468 reward=0.779393 (465.83 it/sec) -training >> step=8804500, episode=1468 reward=0.7954832 (464.92 it/sec) -training >> step=8804600, episode=1468 reward=0.7876056 (485.21 it/sec) -training >> step=8804700, episode=1468 reward=0.7934586 (480.92 it/sec) -training >> step=8804800, episode=1468 reward=0.7864206 (462.16 it/sec) -training >> step=8804900, episode=1468 reward=0.7792634 (481.11 it/sec) -training >> step=8805000, episode=1468 reward=0.7765561 (436.58 it/sec) -training >> step=8805100, episode=1468 reward=0.7725834 (465.35 it/sec) -training >> step=8805200, episode=1468 reward=0.7887827 (459.18 it/sec) -training >> step=8805300, episode=1468 reward=0.7961209 (458.28 it/sec) -training >> step=8805400, episode=1468 reward=0.7840451 (487.04 it/sec) -training >> step=8805500, episode=1468 reward=0.7953604 (425.72 it/sec) -training >> step=8805600, episode=1468 reward=0.7816969 (467.09 it/sec) -training >> step=8805700, episode=1468 reward=0.779048 (444.66 it/sec) -training >> step=8805800, episode=1468 reward=0.7809113 (455.87 it/sec) -training >> step=8805900, episode=1468 reward=0.8086337 (477.14 it/sec) -training >> step=8806000, episode=1468 reward=0.7903044 (492.33 it/sec) -training >> step=8806100, episode=1468 reward=0.7742345 (445.50 it/sec) -training >> step=8806200, episode=1468 reward=0.7980788 (491.85 it/sec) -training >> step=8806300, episode=1468 reward=0.7925611 (449.20 it/sec) -training >> step=8806400, episode=1468 reward=0.7892901 (434.28 it/sec) -training >> step=8806500, episode=1468 reward=0.7846637 (487.01 it/sec) -training >> step=8806600, episode=1468 reward=0.787416 (517.06 it/sec) -training >> step=8806700, episode=1468 reward=0.8046798 (420.59 it/sec) -training >> step=8806800, episode=1468 reward=0.7954797 (372.55 it/sec) -training >> step=8806900, episode=1468 reward=0.7955047 (473.68 it/sec) -training >> step=8807000, episode=1468 reward=0.8009458 (465.00 it/sec) -training >> step=8807100, episode=1468 reward=0.7924032 (492.00 it/sec) -training >> step=8807200, episode=1468 reward=0.7928028 (404.03 it/sec) -training >> step=8807300, episode=1469 reward=0.7861745 (91.11 it/sec) -training >> step=8807400, episode=1469 reward=0.7661003 (460.37 it/sec) -training >> step=8807500, episode=1469 reward=0.7815152 (495.29 it/sec) -training >> step=8807600, episode=1469 reward=0.7966371 (494.93 it/sec) -training >> step=8807700, episode=1469 reward=0.7843169 (485.36 it/sec) -training >> step=8807800, episode=1469 reward=0.793677 (482.41 it/sec) -training >> step=8807900, episode=1469 reward=0.8002813 (463.62 it/sec) -training >> step=8808000, episode=1469 reward=0.7972865 (473.89 it/sec) -training >> step=8808100, episode=1469 reward=0.7906842 (473.85 it/sec) -training >> step=8808200, episode=1469 reward=0.7829743 (463.07 it/sec) -training >> step=8808300, episode=1469 reward=0.7885581 (444.80 it/sec) -training >> step=8808400, episode=1469 reward=0.7979332 (478.80 it/sec) -training >> step=8808500, episode=1469 reward=0.8048852 (446.78 it/sec) -training >> step=8808600, episode=1469 reward=0.7991434 (483.70 it/sec) -training >> step=8808700, episode=1469 reward=0.7864521 (428.91 it/sec) -training >> step=8808800, episode=1469 reward=0.7683193 (460.37 it/sec) -training >> step=8808900, episode=1469 reward=0.7857116 (489.09 it/sec) -training >> step=8809000, episode=1469 reward=0.7848749 (497.29 it/sec) -training >> step=8809100, episode=1469 reward=0.7751358 (448.13 it/sec) -training >> step=8809200, episode=1469 reward=0.7838714 (476.89 it/sec) -training >> step=8809300, episode=1469 reward=0.7865064 (466.34 it/sec) -training >> step=8809400, episode=1469 reward=0.8020036 (490.18 it/sec) -training >> step=8809500, episode=1469 reward=0.8056865 (483.23 it/sec) -training >> step=8809600, episode=1469 reward=0.7780093 (443.48 it/sec) -training >> step=8809700, episode=1469 reward=0.7793785 (450.49 it/sec) -training >> step=8809800, episode=1469 reward=0.7925217 (453.83 it/sec) -training >> step=8809900, episode=1469 reward=0.8026761 (476.34 it/sec) -training >> step=8810000, episode=1469 reward=0.7993317 (459.38 it/sec) -training >> step=8810100, episode=1469 reward=0.7946892 (492.44 it/sec) -training >> step=8810200, episode=1469 reward=0.7728373 (472.42 it/sec) -training >> step=8810300, episode=1469 reward=0.7970099 (428.01 it/sec) -training >> step=8810400, episode=1469 reward=0.7816299 (454.22 it/sec) -training >> step=8810500, episode=1469 reward=0.7809458 (463.27 it/sec) -training >> step=8810600, episode=1469 reward=0.7827786 (461.32 it/sec) -training >> step=8810700, episode=1469 reward=0.8041459 (452.40 it/sec) -training >> step=8810800, episode=1469 reward=0.7865288 (501.96 it/sec) -training >> step=8810900, episode=1469 reward=0.7913672 (472.91 it/sec) -training >> step=8811000, episode=1469 reward=0.7809278 (456.96 it/sec) -training >> step=8811100, episode=1469 reward=0.80431 (474.17 it/sec) -training >> step=8811200, episode=1469 reward=0.808346 (486.69 it/sec) -training >> step=8811300, episode=1469 reward=0.7713589 (484.48 it/sec) -training >> step=8811400, episode=1469 reward=0.7941797 (460.41 it/sec) -training >> step=8811500, episode=1469 reward=0.8065143 (501.44 it/sec) -training >> step=8811600, episode=1469 reward=0.789162 (460.65 it/sec) -training >> step=8811700, episode=1469 reward=0.8110917 (471.58 it/sec) -training >> step=8811800, episode=1469 reward=0.7898 (431.56 it/sec) -training >> step=8811900, episode=1469 reward=0.7870668 (474.79 it/sec) -training >> step=8812000, episode=1469 reward=0.7813198 (426.94 it/sec) -training >> step=8812100, episode=1469 reward=0.7923976 (455.63 it/sec) -training >> step=8812200, episode=1469 reward=0.7901291 (463.82 it/sec) -training >> step=8812300, episode=1469 reward=0.7917184 (512.11 it/sec) -training >> step=8812400, episode=1469 reward=0.7989258 (458.49 it/sec) -training >> step=8812500, episode=1469 reward=0.7733971 (427.77 it/sec) -training >> step=8812600, episode=1469 reward=0.7949919 (483.55 it/sec) -training >> step=8812700, episode=1469 reward=0.7982989 (423.86 it/sec) -training >> step=8812800, episode=1469 reward=0.7879749 (449.64 it/sec) -training >> step=8812900, episode=1469 reward=0.8001252 (473.13 it/sec) -training >> step=8813000, episode=1469 reward=0.7928268 (485.23 it/sec) -training >> step=8813100, episode=1469 reward=0.7758208 (473.62 it/sec) -training >> step=8813200, episode=1469 reward=0.7857895 (450.15 it/sec) -training >> step=8813300, episode=1470 reward=0.8002763 (73.51 it/sec) -training >> step=8813400, episode=1470 reward=0.7868826 (357.14 it/sec) -training >> step=8813500, episode=1470 reward=0.8002162 (457.87 it/sec) -training >> step=8813600, episode=1470 reward=0.7864447 (427.42 it/sec) -training >> step=8813700, episode=1470 reward=0.7860904 (383.32 it/sec) -training >> step=8813800, episode=1470 reward=0.7879903 (398.27 it/sec) -training >> step=8813900, episode=1470 reward=0.7796944 (493.10 it/sec) -training >> step=8814000, episode=1470 reward=0.7782992 (478.19 it/sec) -training >> step=8814100, episode=1470 reward=0.76041 (452.28 it/sec) -training >> step=8814200, episode=1470 reward=0.7840129 (461.11 it/sec) -training >> step=8814300, episode=1470 reward=0.8058195 (474.16 it/sec) -training >> step=8814400, episode=1470 reward=0.7860616 (463.47 it/sec) -training >> step=8814500, episode=1470 reward=0.7693866 (488.96 it/sec) -training >> step=8814600, episode=1470 reward=0.7939534 (495.08 it/sec) -training >> step=8814700, episode=1470 reward=0.800009 (482.99 it/sec) -training >> step=8814800, episode=1470 reward=0.7765275 (464.84 it/sec) -training >> step=8814900, episode=1470 reward=0.7885773 (506.35 it/sec) -training >> step=8815000, episode=1470 reward=0.7918692 (520.21 it/sec) -training >> step=8815100, episode=1470 reward=0.8057118 (479.41 it/sec) -training >> step=8815200, episode=1470 reward=0.8074269 (490.66 it/sec) -training >> step=8815300, episode=1470 reward=0.812895 (498.25 it/sec) -training >> step=8815400, episode=1470 reward=0.7842503 (448.49 it/sec) -training >> step=8815500, episode=1470 reward=0.7858394 (507.57 it/sec) -training >> step=8815600, episode=1470 reward=0.7922413 (503.27 it/sec) -training >> step=8815700, episode=1470 reward=0.7935877 (481.40 it/sec) -training >> step=8815800, episode=1470 reward=0.7876624 (446.10 it/sec) -training >> step=8815900, episode=1470 reward=0.7511631 (399.32 it/sec) -training >> step=8816000, episode=1470 reward=0.7876473 (482.77 it/sec) -training >> step=8816100, episode=1470 reward=0.7823736 (491.25 it/sec) -training >> step=8816200, episode=1470 reward=0.7784308 (436.02 it/sec) -training >> step=8816300, episode=1470 reward=0.8021575 (507.02 it/sec) -training >> step=8816400, episode=1470 reward=0.7970122 (438.27 it/sec) -training >> step=8816500, episode=1470 reward=0.7887351 (447.90 it/sec) -training >> step=8816600, episode=1470 reward=0.7934762 (459.14 it/sec) -training >> step=8816700, episode=1470 reward=0.7891845 (488.99 it/sec) -training >> step=8816800, episode=1470 reward=0.7874054 (482.58 it/sec) -training >> step=8816900, episode=1470 reward=0.7951748 (495.89 it/sec) -training >> step=8817000, episode=1470 reward=0.7984315 (484.04 it/sec) -training >> step=8817100, episode=1470 reward=0.797983 (497.18 it/sec) -training >> step=8817200, episode=1470 reward=0.8028509 (450.46 it/sec) -training >> step=8817300, episode=1470 reward=0.7642898 (502.65 it/sec) -training >> step=8817400, episode=1470 reward=0.7819847 (449.25 it/sec) -training >> step=8817500, episode=1470 reward=0.8032244 (436.37 it/sec) -training >> step=8817600, episode=1470 reward=0.7943156 (477.60 it/sec) -training >> step=8817700, episode=1470 reward=0.7893794 (458.25 it/sec) -training >> step=8817800, episode=1470 reward=0.781302 (477.41 it/sec) -training >> step=8817900, episode=1470 reward=0.7899708 (458.39 it/sec) -training >> step=8818000, episode=1470 reward=0.792937 (474.03 it/sec) -training >> step=8818100, episode=1470 reward=0.7965209 (481.84 it/sec) -training >> step=8818200, episode=1470 reward=0.7870103 (481.56 it/sec) -training >> step=8818300, episode=1470 reward=0.7839402 (441.02 it/sec) -training >> step=8818400, episode=1470 reward=0.7946726 (476.40 it/sec) -training >> step=8818500, episode=1470 reward=0.7939823 (506.87 it/sec) -training >> step=8818600, episode=1470 reward=0.7781946 (427.14 it/sec) -training >> step=8818700, episode=1470 reward=0.8095664 (453.41 it/sec) -training >> step=8818800, episode=1470 reward=0.7973645 (456.36 it/sec) -training >> step=8818900, episode=1470 reward=0.7864622 (466.78 it/sec) -training >> step=8819000, episode=1470 reward=0.7992221 (488.55 it/sec) -training >> step=8819100, episode=1470 reward=0.7917715 (457.69 it/sec) -training >> step=8819200, episode=1470 reward=0.7906245 (402.55 it/sec) -training >> step=8819300, episode=1471 reward=0.7926471 (95.32 it/sec) -training >> step=8819400, episode=1471 reward=0.7707515 (225.81 it/sec) -training >> step=8819500, episode=1471 reward=0.7873494 (359.94 it/sec) -training >> step=8819600, episode=1471 reward=0.8104764 (444.11 it/sec) -training >> step=8819700, episode=1471 reward=0.7887525 (487.22 it/sec) -training >> step=8819800, episode=1471 reward=0.7846058 (464.53 it/sec) -training >> step=8819900, episode=1471 reward=0.7917473 (442.18 it/sec) -training >> step=8820000, episode=1471 reward=0.7921633 (445.17 it/sec) -training >> step=8820100, episode=1471 reward=0.8104628 (480.31 it/sec) -training >> step=8820200, episode=1471 reward=0.8021585 (458.51 it/sec) -training >> step=8820300, episode=1471 reward=0.7897684 (456.79 it/sec) -training >> step=8820400, episode=1471 reward=0.7994884 (451.87 it/sec) -training >> step=8820500, episode=1471 reward=0.8060808 (447.97 it/sec) -training >> step=8820600, episode=1471 reward=0.8068471 (485.02 it/sec) -training >> step=8820700, episode=1471 reward=0.7828836 (473.04 it/sec) -training >> step=8820800, episode=1471 reward=0.7939251 (423.24 it/sec) -training >> step=8820900, episode=1471 reward=0.7815716 (467.13 it/sec) -training >> step=8821000, episode=1471 reward=0.7940943 (494.03 it/sec) -training >> step=8821100, episode=1471 reward=0.7905014 (441.71 it/sec) -training >> step=8821200, episode=1471 reward=0.7883344 (489.44 it/sec) -training >> step=8821300, episode=1471 reward=0.7732657 (455.08 it/sec) -training >> step=8821400, episode=1471 reward=0.7961395 (473.85 it/sec) -training >> step=8821500, episode=1471 reward=0.7875479 (450.08 it/sec) -training >> step=8821600, episode=1471 reward=0.7842652 (482.09 it/sec) -training >> step=8821700, episode=1471 reward=0.77773 (455.22 it/sec) -training >> step=8821800, episode=1471 reward=0.8106731 (498.18 it/sec) -training >> step=8821900, episode=1471 reward=0.7942991 (472.53 it/sec) -training >> step=8822000, episode=1471 reward=0.7896937 (468.41 it/sec) -training >> step=8822100, episode=1471 reward=0.8015453 (457.80 it/sec) -training >> step=8822200, episode=1471 reward=0.7885669 (466.29 it/sec) -training >> step=8822300, episode=1471 reward=0.786669 (474.44 it/sec) -training >> step=8822400, episode=1471 reward=0.8092386 (473.81 it/sec) -training >> step=8822500, episode=1471 reward=0.7927863 (502.87 it/sec) -training >> step=8822600, episode=1471 reward=0.7663139 (455.82 it/sec) -training >> step=8822700, episode=1471 reward=0.8005031 (480.01 it/sec) -training >> step=8822800, episode=1471 reward=0.7995781 (433.60 it/sec) -training >> step=8822900, episode=1471 reward=0.7937843 (517.47 it/sec) -training >> step=8823000, episode=1471 reward=0.7917461 (486.25 it/sec) -training >> step=8823100, episode=1471 reward=0.8076633 (472.36 it/sec) -training >> step=8823200, episode=1471 reward=0.7894047 (482.60 it/sec) -training >> step=8823300, episode=1471 reward=0.8083465 (492.46 it/sec) -training >> step=8823400, episode=1471 reward=0.7746306 (465.14 it/sec) -training >> step=8823500, episode=1471 reward=0.808756 (501.78 it/sec) -training >> step=8823600, episode=1471 reward=0.7931612 (516.22 it/sec) -training >> step=8823700, episode=1471 reward=0.7714378 (482.25 it/sec) -training >> step=8823800, episode=1471 reward=0.7822698 (464.26 it/sec) -training >> step=8823900, episode=1471 reward=0.8066039 (489.32 it/sec) -training >> step=8824000, episode=1471 reward=0.7953503 (482.98 it/sec) -training >> step=8824100, episode=1471 reward=0.7834355 (471.58 it/sec) -training >> step=8824200, episode=1471 reward=0.7905262 (467.33 it/sec) -training >> step=8824300, episode=1471 reward=0.8028448 (487.19 it/sec) -training >> step=8824400, episode=1471 reward=0.7885821 (443.41 it/sec) -training >> step=8824500, episode=1471 reward=0.7952981 (477.93 it/sec) -training >> step=8824600, episode=1471 reward=0.7767838 (497.66 it/sec) -training >> step=8824700, episode=1471 reward=0.7856342 (418.97 it/sec) -training >> step=8824800, episode=1471 reward=0.8010393 (425.11 it/sec) -training >> step=8824900, episode=1471 reward=0.7990015 (445.41 it/sec) -training >> step=8825000, episode=1471 reward=0.7833597 (470.33 it/sec) -training >> step=8825100, episode=1471 reward=0.7955832 (439.20 it/sec) -training >> step=8825200, episode=1471 reward=0.787844 (502.67 it/sec) -training >> step=8825300, episode=1472 reward=0.8024102 (84.37 it/sec) -training >> step=8825400, episode=1472 reward=0.7997106 (482.89 it/sec) -training >> step=8825500, episode=1472 reward=0.7861907 (440.85 it/sec) -training >> step=8825600, episode=1472 reward=0.797825 (466.09 it/sec) -training >> step=8825700, episode=1472 reward=0.7850464 (352.71 it/sec) -training >> step=8825800, episode=1472 reward=0.7805921 (488.90 it/sec) -training >> step=8825900, episode=1472 reward=0.7989054 (487.98 it/sec) -training >> step=8826000, episode=1472 reward=0.8022946 (472.25 it/sec) -training >> step=8826100, episode=1472 reward=0.7925943 (457.10 it/sec) -training >> step=8826200, episode=1472 reward=0.7974659 (470.54 it/sec) -training >> step=8826300, episode=1472 reward=0.7927626 (433.12 it/sec) -training >> step=8826400, episode=1472 reward=0.7868336 (480.44 it/sec) -training >> step=8826500, episode=1472 reward=0.791776 (477.18 it/sec) -training >> step=8826600, episode=1472 reward=0.7867486 (487.84 it/sec) -training >> step=8826700, episode=1472 reward=0.7899159 (446.40 it/sec) -training >> step=8826800, episode=1472 reward=0.7951114 (441.82 it/sec) -training >> step=8826900, episode=1472 reward=0.8107078 (470.91 it/sec) -training >> step=8827000, episode=1472 reward=0.7979373 (495.59 it/sec) -training >> step=8827100, episode=1472 reward=0.7977148 (472.11 it/sec) -training >> step=8827200, episode=1472 reward=0.796958 (490.26 it/sec) -training >> step=8827300, episode=1472 reward=0.8014897 (445.49 it/sec) -training >> step=8827400, episode=1472 reward=0.8064477 (480.75 it/sec) -training >> step=8827500, episode=1472 reward=0.7861378 (437.77 it/sec) -training >> step=8827600, episode=1472 reward=0.7776481 (458.19 it/sec) -training >> step=8827700, episode=1472 reward=0.7928227 (447.02 it/sec) -training >> step=8827800, episode=1472 reward=0.7897778 (479.53 it/sec) -training >> step=8827900, episode=1472 reward=0.7906633 (482.96 it/sec) -training >> step=8828000, episode=1472 reward=0.7776563 (487.74 it/sec) -training >> step=8828100, episode=1472 reward=0.7932023 (452.70 it/sec) -training >> step=8828200, episode=1472 reward=0.7766102 (480.85 it/sec) -training >> step=8828300, episode=1472 reward=0.7837471 (477.73 it/sec) -training >> step=8828400, episode=1472 reward=0.8013752 (432.29 it/sec) -training >> step=8828500, episode=1472 reward=0.8038282 (482.00 it/sec) -training >> step=8828600, episode=1472 reward=0.8033189 (519.43 it/sec) -training >> step=8828700, episode=1472 reward=0.7997532 (459.52 it/sec) -training >> step=8828800, episode=1472 reward=0.7859629 (456.05 it/sec) -training >> step=8828900, episode=1472 reward=0.8017657 (453.04 it/sec) -training >> step=8829000, episode=1472 reward=0.7804651 (440.35 it/sec) -training >> step=8829100, episode=1472 reward=0.783568 (458.24 it/sec) -training >> step=8829200, episode=1472 reward=0.7940347 (499.87 it/sec) -training >> step=8829300, episode=1472 reward=0.786462 (485.98 it/sec) -training >> step=8829400, episode=1472 reward=0.7874916 (443.97 it/sec) -training >> step=8829500, episode=1472 reward=0.7900891 (479.71 it/sec) -training >> step=8829600, episode=1472 reward=0.7987377 (439.00 it/sec) -training >> step=8829700, episode=1472 reward=0.8077753 (473.05 it/sec) -training >> step=8829800, episode=1472 reward=0.7826325 (501.59 it/sec) -training >> step=8829900, episode=1472 reward=0.7976655 (493.50 it/sec) -training >> step=8830000, episode=1472 reward=0.7777799 (461.58 it/sec) -training >> step=8830100, episode=1472 reward=0.788513 (442.36 it/sec) -training >> step=8830200, episode=1472 reward=0.8054414 (478.05 it/sec) -training >> step=8830300, episode=1472 reward=0.7904336 (486.43 it/sec) -training >> step=8830400, episode=1472 reward=0.7940423 (479.54 it/sec) -training >> step=8830500, episode=1472 reward=0.7907434 (492.55 it/sec) -training >> step=8830600, episode=1472 reward=0.7993683 (465.59 it/sec) -training >> step=8830700, episode=1472 reward=0.7819367 (469.45 it/sec) -training >> step=8830800, episode=1472 reward=0.7777755 (428.86 it/sec) -training >> step=8830900, episode=1472 reward=0.78585 (447.48 it/sec) -training >> step=8831000, episode=1472 reward=0.78216 (494.18 it/sec) -training >> step=8831100, episode=1472 reward=0.7834998 (435.66 it/sec) -training >> step=8831200, episode=1472 reward=0.7965128 (507.67 it/sec) -training >> step=8831300, episode=1473 reward=0.7670355 (85.51 it/sec) -training >> step=8831400, episode=1473 reward=0.7927838 (467.81 it/sec) -training >> step=8831500, episode=1473 reward=0.7832903 (425.74 it/sec) -training >> step=8831600, episode=1473 reward=0.7874575 (465.86 it/sec) -training >> step=8831700, episode=1473 reward=0.7722707 (437.18 it/sec) -training >> step=8831800, episode=1473 reward=0.7874126 (420.67 it/sec) -training >> step=8831900, episode=1473 reward=0.7805459 (376.84 it/sec) -training >> step=8832000, episode=1473 reward=0.7913806 (456.98 it/sec) -training >> step=8832100, episode=1473 reward=0.7962943 (481.12 it/sec) -training >> step=8832200, episode=1473 reward=0.7833347 (434.84 it/sec) -training >> step=8832300, episode=1473 reward=0.8062314 (473.11 it/sec) -training >> step=8832400, episode=1473 reward=0.777787 (461.90 it/sec) -training >> step=8832500, episode=1473 reward=0.7994524 (464.77 it/sec) -training >> step=8832600, episode=1473 reward=0.799951 (499.37 it/sec) -training >> step=8832700, episode=1473 reward=0.7627878 (487.06 it/sec) -training >> step=8832800, episode=1473 reward=0.8160114 (487.52 it/sec) -training >> step=8832900, episode=1473 reward=0.7961575 (452.01 it/sec) -training >> step=8833000, episode=1473 reward=0.7693185 (480.66 it/sec) -training >> step=8833100, episode=1473 reward=0.7980043 (468.57 it/sec) -training >> step=8833200, episode=1473 reward=0.7961004 (422.50 it/sec) -training >> step=8833300, episode=1473 reward=0.7807648 (441.50 it/sec) -training >> step=8833400, episode=1473 reward=0.788516 (467.17 it/sec) -training >> step=8833500, episode=1473 reward=0.7847645 (417.00 it/sec) -training >> step=8833600, episode=1473 reward=0.7804708 (437.36 it/sec) -training >> step=8833700, episode=1473 reward=0.8103507 (502.84 it/sec) -training >> step=8833800, episode=1473 reward=0.809129 (441.04 it/sec) -training >> step=8833900, episode=1473 reward=0.7871041 (421.45 it/sec) -training >> step=8834000, episode=1473 reward=0.8024806 (406.29 it/sec) -training >> step=8834100, episode=1473 reward=0.7924407 (501.51 it/sec) -training >> step=8834200, episode=1473 reward=0.7829693 (444.28 it/sec) -training >> step=8834300, episode=1473 reward=0.8036269 (494.69 it/sec) -training >> step=8834400, episode=1473 reward=0.7755796 (436.91 it/sec) -training >> step=8834500, episode=1473 reward=0.7937343 (488.89 it/sec) -training >> step=8834600, episode=1473 reward=0.7791788 (453.59 it/sec) -training >> step=8834700, episode=1473 reward=0.7917128 (440.03 it/sec) -training >> step=8834800, episode=1473 reward=0.8125292 (448.55 it/sec) -training >> step=8834900, episode=1473 reward=0.7998422 (462.82 it/sec) -training >> step=8835000, episode=1473 reward=0.8014259 (422.68 it/sec) -training >> step=8835100, episode=1473 reward=0.7835668 (466.05 it/sec) -training >> step=8835200, episode=1473 reward=0.8120262 (457.70 it/sec) -training >> step=8835300, episode=1473 reward=0.801097 (461.03 it/sec) -training >> step=8835400, episode=1473 reward=0.7945757 (484.76 it/sec) -training >> step=8835500, episode=1473 reward=0.7937127 (481.74 it/sec) -training >> step=8835600, episode=1473 reward=0.7864159 (459.19 it/sec) -training >> step=8835700, episode=1473 reward=0.7812876 (429.08 it/sec) -training >> step=8835800, episode=1473 reward=0.7990467 (446.52 it/sec) -training >> step=8835900, episode=1473 reward=0.8038731 (509.06 it/sec) -training >> step=8836000, episode=1473 reward=0.7905411 (509.59 it/sec) -training >> step=8836100, episode=1473 reward=0.7661208 (449.03 it/sec) -training >> step=8836200, episode=1473 reward=0.7883787 (448.50 it/sec) -training >> step=8836300, episode=1473 reward=0.7826049 (451.79 it/sec) -training >> step=8836400, episode=1473 reward=0.7882746 (469.40 it/sec) -training >> step=8836500, episode=1473 reward=0.8026115 (495.72 it/sec) -training >> step=8836600, episode=1473 reward=0.7962387 (473.73 it/sec) -training >> step=8836700, episode=1473 reward=0.7811539 (486.13 it/sec) -training >> step=8836800, episode=1473 reward=0.7954134 (461.71 it/sec) -training >> step=8836900, episode=1473 reward=0.8022991 (470.53 it/sec) -training >> step=8837000, episode=1473 reward=0.7801374 (499.90 it/sec) -training >> step=8837100, episode=1473 reward=0.7928948 (518.94 it/sec) -training >> step=8837200, episode=1473 reward=0.7927454 (472.23 it/sec) -training >> step=8837300, episode=1474 reward=0.7926908 (95.87 it/sec) -training >> step=8837400, episode=1474 reward=0.7937344 (344.17 it/sec) -training >> step=8837500, episode=1474 reward=0.8052521 (455.95 it/sec) -training >> step=8837600, episode=1474 reward=0.7818182 (368.61 it/sec) -training >> step=8837700, episode=1474 reward=0.796186 (495.52 it/sec) -training >> step=8837800, episode=1474 reward=0.7706309 (460.96 it/sec) -training >> step=8837900, episode=1474 reward=0.7987827 (446.47 it/sec) -training >> step=8838000, episode=1474 reward=0.7710421 (481.03 it/sec) -training >> step=8838100, episode=1474 reward=0.7912936 (363.09 it/sec) -training >> step=8838200, episode=1474 reward=0.7726334 (453.14 it/sec) -training >> step=8838300, episode=1474 reward=0.7930929 (505.41 it/sec) -training >> step=8838400, episode=1474 reward=0.8046514 (486.15 it/sec) -training >> step=8838500, episode=1474 reward=0.8131891 (481.38 it/sec) -training >> step=8838600, episode=1474 reward=0.7961839 (467.62 it/sec) -training >> step=8838700, episode=1474 reward=0.7935483 (465.11 it/sec) -training >> step=8838800, episode=1474 reward=0.7985176 (520.41 it/sec) -training >> step=8838900, episode=1474 reward=0.812373 (467.41 it/sec) -training >> step=8839000, episode=1474 reward=0.8006838 (497.44 it/sec) -training >> step=8839100, episode=1474 reward=0.7709678 (440.82 it/sec) -training >> step=8839200, episode=1474 reward=0.7781681 (428.09 it/sec) -training >> step=8839300, episode=1474 reward=0.7898616 (487.54 it/sec) -training >> step=8839400, episode=1474 reward=0.7769197 (480.59 it/sec) -training >> step=8839500, episode=1474 reward=0.784472 (473.15 it/sec) -training >> step=8839600, episode=1474 reward=0.7901365 (483.73 it/sec) -training >> step=8839700, episode=1474 reward=0.7868747 (404.44 it/sec) -training >> step=8839800, episode=1474 reward=0.7808093 (416.35 it/sec) -training >> step=8839900, episode=1474 reward=0.7894278 (488.69 it/sec) -training >> step=8840000, episode=1474 reward=0.8002121 (511.44 it/sec) -training >> step=8840100, episode=1474 reward=0.7789035 (515.16 it/sec) -training >> step=8840200, episode=1474 reward=0.7801177 (501.51 it/sec) -training >> step=8840300, episode=1474 reward=0.7842976 (469.09 it/sec) -training >> step=8840400, episode=1474 reward=0.7893909 (489.92 it/sec) -training >> step=8840500, episode=1474 reward=0.7609148 (463.86 it/sec) -training >> step=8840600, episode=1474 reward=0.7772021 (509.49 it/sec) -training >> step=8840700, episode=1474 reward=0.8141619 (440.35 it/sec) -training >> step=8840800, episode=1474 reward=0.7993007 (464.97 it/sec) -training >> step=8840900, episode=1474 reward=0.7930105 (531.08 it/sec) -training >> step=8841000, episode=1474 reward=0.7931938 (477.99 it/sec) -training >> step=8841100, episode=1474 reward=0.7921177 (505.17 it/sec) -training >> step=8841200, episode=1474 reward=0.7851545 (464.50 it/sec) -training >> step=8841300, episode=1474 reward=0.8016488 (513.87 it/sec) -training >> step=8841400, episode=1474 reward=0.7961825 (501.29 it/sec) -training >> step=8841500, episode=1474 reward=0.7858481 (464.31 it/sec) -training >> step=8841600, episode=1474 reward=0.8000737 (520.48 it/sec) -training >> step=8841700, episode=1474 reward=0.8015596 (489.13 it/sec) -training >> step=8841800, episode=1474 reward=0.8003976 (498.20 it/sec) -training >> step=8841900, episode=1474 reward=0.7929094 (493.56 it/sec) -training >> step=8842000, episode=1474 reward=0.7857078 (491.45 it/sec) -training >> step=8842100, episode=1474 reward=0.8125609 (465.88 it/sec) -training >> step=8842200, episode=1474 reward=0.7859681 (444.58 it/sec) -training >> step=8842300, episode=1474 reward=0.792635 (461.68 it/sec) -training >> step=8842400, episode=1474 reward=0.7946184 (513.96 it/sec) -training >> step=8842500, episode=1474 reward=0.7979892 (473.00 it/sec) -training >> step=8842600, episode=1474 reward=0.8001266 (499.24 it/sec) -training >> step=8842700, episode=1474 reward=0.7956426 (467.75 it/sec) -training >> step=8842800, episode=1474 reward=0.7976832 (468.74 it/sec) -training >> step=8842900, episode=1474 reward=0.788514 (447.08 it/sec) -training >> step=8843000, episode=1474 reward=0.7907932 (491.01 it/sec) -training >> step=8843100, episode=1474 reward=0.7813879 (511.92 it/sec) -training >> step=8843200, episode=1474 reward=0.7947971 (454.91 it/sec) -training >> step=8843300, episode=1475 reward=0.7938715 (93.86 it/sec) -training >> step=8843400, episode=1475 reward=0.7866825 (499.13 it/sec) -training >> step=8843500, episode=1475 reward=0.7918681 (467.79 it/sec) -training >> step=8843600, episode=1475 reward=0.7892272 (495.18 it/sec) -training >> step=8843700, episode=1475 reward=0.7855681 (474.58 it/sec) -training >> step=8843800, episode=1475 reward=0.782313 (494.41 it/sec) -training >> step=8843900, episode=1475 reward=0.7926136 (492.89 it/sec) -training >> step=8844000, episode=1475 reward=0.7828079 (497.19 it/sec) -training >> step=8844100, episode=1475 reward=0.8012873 (481.37 it/sec) -training >> step=8844200, episode=1475 reward=0.7787346 (336.31 it/sec) -training >> step=8844300, episode=1475 reward=0.8035924 (491.42 it/sec) -training >> step=8844400, episode=1475 reward=0.794436 (497.48 it/sec) -training >> step=8844500, episode=1475 reward=0.8097339 (481.77 it/sec) -training >> step=8844600, episode=1475 reward=0.8121125 (505.73 it/sec) -training >> step=8844700, episode=1475 reward=0.7849756 (483.23 it/sec) -training >> step=8844800, episode=1475 reward=0.7746225 (522.65 it/sec) -training >> step=8844900, episode=1475 reward=0.7884643 (503.23 it/sec) -training >> step=8845000, episode=1475 reward=0.7923667 (492.71 it/sec) -training >> step=8845100, episode=1475 reward=0.7692591 (482.75 it/sec) -training >> step=8845200, episode=1475 reward=0.7957716 (480.54 it/sec) -training >> step=8845300, episode=1475 reward=0.7903006 (495.70 it/sec) -training >> step=8845400, episode=1475 reward=0.7991863 (481.17 it/sec) -training >> step=8845500, episode=1475 reward=0.7926199 (505.66 it/sec) -training >> step=8845600, episode=1475 reward=0.8015363 (455.61 it/sec) -training >> step=8845700, episode=1475 reward=0.7913609 (465.65 it/sec) -training >> step=8845800, episode=1475 reward=0.7868224 (515.36 it/sec) -training >> step=8845900, episode=1475 reward=0.7957246 (512.16 it/sec) -training >> step=8846000, episode=1475 reward=0.7969148 (486.39 it/sec) -training >> step=8846100, episode=1475 reward=0.7863063 (508.74 it/sec) -training >> step=8846200, episode=1475 reward=0.7891214 (483.43 it/sec) -training >> step=8846300, episode=1475 reward=0.7923307 (454.10 it/sec) -training >> step=8846400, episode=1475 reward=0.7980182 (447.26 it/sec) -training >> step=8846500, episode=1475 reward=0.7957637 (450.59 it/sec) -training >> step=8846600, episode=1475 reward=0.7859709 (426.47 it/sec) -training >> step=8846700, episode=1475 reward=0.8046776 (370.19 it/sec) -training >> step=8846800, episode=1475 reward=0.8281628 (425.36 it/sec) -training >> step=8846900, episode=1475 reward=0.7892587 (445.44 it/sec) -training >> step=8847000, episode=1475 reward=0.8206475 (461.06 it/sec) -training >> step=8847100, episode=1475 reward=0.7647055 (502.46 it/sec) -training >> step=8847200, episode=1475 reward=0.7804174 (425.37 it/sec) -training >> step=8847300, episode=1475 reward=0.8265067 (499.52 it/sec) -training >> step=8847400, episode=1475 reward=0.8038047 (482.57 it/sec) -training >> step=8847500, episode=1475 reward=0.7850163 (417.23 it/sec) -training >> step=8847600, episode=1475 reward=0.7838586 (475.85 it/sec) -training >> step=8847700, episode=1475 reward=0.7973671 (507.06 it/sec) -training >> step=8847800, episode=1475 reward=0.7952403 (499.31 it/sec) -training >> step=8847900, episode=1475 reward=0.7934145 (474.13 it/sec) -training >> step=8848000, episode=1475 reward=0.7925556 (523.57 it/sec) -training >> step=8848100, episode=1475 reward=0.8087062 (439.48 it/sec) -training >> step=8848200, episode=1475 reward=0.7842696 (494.88 it/sec) -training >> step=8848300, episode=1475 reward=0.7945264 (486.98 it/sec) -training >> step=8848400, episode=1475 reward=0.7926176 (500.93 it/sec) -training >> step=8848500, episode=1475 reward=0.8087788 (466.14 it/sec) -training >> step=8848600, episode=1475 reward=0.8054411 (437.26 it/sec) -training >> step=8848700, episode=1475 reward=0.7996085 (505.66 it/sec) -training >> step=8848800, episode=1475 reward=0.7901247 (509.60 it/sec) -training >> step=8848900, episode=1475 reward=0.7955028 (480.17 it/sec) -training >> step=8849000, episode=1475 reward=0.7785209 (485.24 it/sec) -training >> step=8849100, episode=1475 reward=0.7943347 (478.82 it/sec) -training >> step=8849200, episode=1475 reward=0.7817829 (492.61 it/sec) -training >> step=8849300, episode=1476 reward=0.8127986 (87.76 it/sec) -training >> step=8849400, episode=1476 reward=0.7903356 (491.74 it/sec) -training >> step=8849500, episode=1476 reward=0.7890623 (468.89 it/sec) -training >> step=8849600, episode=1476 reward=0.7849898 (488.58 it/sec) -training >> step=8849700, episode=1476 reward=0.7997427 (467.59 it/sec) -training >> step=8849800, episode=1476 reward=0.8008083 (471.15 it/sec) -training >> step=8849900, episode=1476 reward=0.7802338 (497.04 it/sec) -training >> step=8850000, episode=1476 reward=0.7701896 (467.33 it/sec) -training >> step=8850100, episode=1476 reward=0.7877814 (538.02 it/sec) -training >> step=8850200, episode=1476 reward=0.7954361 (489.83 it/sec) -training >> step=8850300, episode=1476 reward=0.7910936 (479.81 it/sec) -training >> step=8850400, episode=1476 reward=0.7986668 (368.64 it/sec) -training >> step=8850500, episode=1476 reward=0.7957194 (467.30 it/sec) -training >> step=8850600, episode=1476 reward=0.7979295 (444.67 it/sec) -training >> step=8850700, episode=1476 reward=0.7961542 (502.91 it/sec) -training >> step=8850800, episode=1476 reward=0.7900668 (492.22 it/sec) -training >> step=8850900, episode=1476 reward=0.7848936 (419.55 it/sec) -training >> step=8851000, episode=1476 reward=0.7910757 (489.34 it/sec) -training >> step=8851100, episode=1476 reward=0.7957214 (448.67 it/sec) -training >> step=8851200, episode=1476 reward=0.7749697 (510.13 it/sec) -training >> step=8851300, episode=1476 reward=0.7904373 (511.28 it/sec) -training >> step=8851400, episode=1476 reward=0.7882415 (461.56 it/sec) -training >> step=8851500, episode=1476 reward=0.8070903 (507.43 it/sec) -training >> step=8851600, episode=1476 reward=0.806954 (431.04 it/sec) -training >> step=8851700, episode=1476 reward=0.8010663 (485.71 it/sec) -training >> step=8851800, episode=1476 reward=0.7940077 (520.68 it/sec) -training >> step=8851900, episode=1476 reward=0.7831144 (506.73 it/sec) -training >> step=8852000, episode=1476 reward=0.8083014 (484.19 it/sec) -training >> step=8852100, episode=1476 reward=0.7923629 (454.62 it/sec) -training >> step=8852200, episode=1476 reward=0.7704324 (492.18 it/sec) -training >> step=8852300, episode=1476 reward=0.799395 (491.04 it/sec) -training >> step=8852400, episode=1476 reward=0.8133646 (450.84 it/sec) -training >> step=8852500, episode=1476 reward=0.8006394 (479.43 it/sec) -training >> step=8852600, episode=1476 reward=0.7978665 (476.35 it/sec) -training >> step=8852700, episode=1476 reward=0.7941322 (448.30 it/sec) -training >> step=8852800, episode=1476 reward=0.7832561 (456.08 it/sec) -training >> step=8852900, episode=1476 reward=0.8094929 (473.16 it/sec) -training >> step=8853000, episode=1476 reward=0.8244514 (483.36 it/sec) -training >> step=8853100, episode=1476 reward=0.7813093 (490.35 it/sec) -training >> step=8853200, episode=1476 reward=0.7919773 (410.14 it/sec) -training >> step=8853300, episode=1476 reward=0.7729114 (449.60 it/sec) -training >> step=8853400, episode=1476 reward=0.8046967 (449.79 it/sec) -training >> step=8853500, episode=1476 reward=0.7875037 (468.03 it/sec) -training >> step=8853600, episode=1476 reward=0.7927927 (480.50 it/sec) -training >> step=8853700, episode=1476 reward=0.7967478 (493.91 it/sec) -training >> step=8853800, episode=1476 reward=0.7681947 (459.96 it/sec) -training >> step=8853900, episode=1476 reward=0.7908565 (479.83 it/sec) -training >> step=8854000, episode=1476 reward=0.791351 (461.64 it/sec) -training >> step=8854100, episode=1476 reward=0.7769185 (471.83 it/sec) -training >> step=8854200, episode=1476 reward=0.7849639 (503.70 it/sec) -training >> step=8854300, episode=1476 reward=0.7896779 (463.84 it/sec) -training >> step=8854400, episode=1476 reward=0.7862669 (448.63 it/sec) -training >> step=8854500, episode=1476 reward=0.8085006 (447.92 it/sec) -training >> step=8854600, episode=1476 reward=0.7865033 (485.73 it/sec) -training >> step=8854700, episode=1476 reward=0.8030882 (509.06 it/sec) -training >> step=8854800, episode=1476 reward=0.7965788 (464.54 it/sec) -training >> step=8854900, episode=1476 reward=0.8009654 (470.20 it/sec) -training >> step=8855000, episode=1476 reward=0.7936168 (460.13 it/sec) -training >> step=8855100, episode=1476 reward=0.7886287 (502.36 it/sec) -training >> step=8855200, episode=1476 reward=0.7996793 (491.82 it/sec) -training >> step=8855300, episode=1477 reward=0.7767934 (92.60 it/sec) -training >> step=8855400, episode=1477 reward=0.7806158 (485.84 it/sec) -training >> step=8855500, episode=1477 reward=0.8000163 (466.15 it/sec) -training >> step=8855600, episode=1477 reward=0.7815949 (474.10 it/sec) -training >> step=8855700, episode=1477 reward=0.8055953 (475.04 it/sec) -training >> step=8855800, episode=1477 reward=0.7813919 (515.78 it/sec) -training >> step=8855900, episode=1477 reward=0.8040665 (468.11 it/sec) -training >> step=8856000, episode=1477 reward=0.7781187 (484.05 it/sec) -training >> step=8856100, episode=1477 reward=0.7984167 (489.69 it/sec) -training >> step=8856200, episode=1477 reward=0.7920637 (479.41 it/sec) -training >> step=8856300, episode=1477 reward=0.7943301 (495.94 it/sec) -training >> step=8856400, episode=1477 reward=0.7981195 (365.15 it/sec) -training >> step=8856500, episode=1477 reward=0.8133451 (463.99 it/sec) -training >> step=8856600, episode=1477 reward=0.8008506 (483.82 it/sec) -training >> step=8856700, episode=1477 reward=0.7799829 (470.62 it/sec) -training >> step=8856800, episode=1477 reward=0.797954 (457.23 it/sec) -training >> step=8856900, episode=1477 reward=0.7822068 (473.15 it/sec) -training >> step=8857000, episode=1477 reward=0.7760075 (449.61 it/sec) -training >> step=8857100, episode=1477 reward=0.8061883 (499.25 it/sec) -training >> step=8857200, episode=1477 reward=0.7929571 (468.23 it/sec) -training >> step=8857300, episode=1477 reward=0.7982099 (416.65 it/sec) -training >> step=8857400, episode=1477 reward=0.7954311 (471.63 it/sec) -training >> step=8857500, episode=1477 reward=0.7945295 (447.38 it/sec) -training >> step=8857600, episode=1477 reward=0.8107397 (435.74 it/sec) -training >> step=8857700, episode=1477 reward=0.7931249 (463.16 it/sec) -training >> step=8857800, episode=1477 reward=0.787806 (457.94 it/sec) -training >> step=8857900, episode=1477 reward=0.795739 (494.55 it/sec) -training >> step=8858000, episode=1477 reward=0.7815673 (474.27 it/sec) -training >> step=8858100, episode=1477 reward=0.7738169 (475.26 it/sec) -training >> step=8858200, episode=1477 reward=0.7605446 (510.96 it/sec) -training >> step=8858300, episode=1477 reward=0.8186334 (471.99 it/sec) -training >> step=8858400, episode=1477 reward=0.794676 (459.16 it/sec) -training >> step=8858500, episode=1477 reward=0.775811 (466.24 it/sec) -training >> step=8858600, episode=1477 reward=0.7926768 (486.10 it/sec) -training >> step=8858700, episode=1477 reward=0.7851219 (500.71 it/sec) -training >> step=8858800, episode=1477 reward=0.7887486 (421.88 it/sec) -training >> step=8858900, episode=1477 reward=0.8021156 (489.22 it/sec) -training >> step=8859000, episode=1477 reward=0.7833306 (485.97 it/sec) -training >> step=8859100, episode=1477 reward=0.8040912 (479.54 it/sec) -training >> step=8859200, episode=1477 reward=0.7746598 (452.39 it/sec) -training >> step=8859300, episode=1477 reward=0.7987341 (499.13 it/sec) -training >> step=8859400, episode=1477 reward=0.7974565 (443.39 it/sec) -training >> step=8859500, episode=1477 reward=0.7834935 (484.10 it/sec) -training >> step=8859600, episode=1477 reward=0.797934 (493.61 it/sec) -training >> step=8859700, episode=1477 reward=0.7981396 (488.04 it/sec) -training >> step=8859800, episode=1477 reward=0.780257 (473.87 it/sec) -training >> step=8859900, episode=1477 reward=0.7867851 (470.20 it/sec) -training >> step=8860000, episode=1477 reward=0.7939008 (452.92 it/sec) -training >> step=8860100, episode=1477 reward=0.7938251 (500.08 it/sec) -training >> step=8860200, episode=1477 reward=0.7859573 (457.44 it/sec) -training >> step=8860300, episode=1477 reward=0.7933236 (489.06 it/sec) -training >> step=8860400, episode=1477 reward=0.8002478 (394.09 it/sec) -training >> step=8860500, episode=1477 reward=0.7968892 (444.30 it/sec) -training >> step=8860600, episode=1477 reward=0.7835965 (464.65 it/sec) -training >> step=8860700, episode=1477 reward=0.8014983 (492.88 it/sec) -training >> step=8860800, episode=1477 reward=0.7763785 (515.27 it/sec) -training >> step=8860900, episode=1477 reward=0.7901295 (480.92 it/sec) -training >> step=8861000, episode=1477 reward=0.788875 (481.82 it/sec) -training >> step=8861100, episode=1477 reward=0.7796128 (506.69 it/sec) -training >> step=8861200, episode=1477 reward=0.7712957 (471.99 it/sec) -training >> step=8861300, episode=1478 reward=0.7991304 (95.81 it/sec) -training >> step=8861400, episode=1478 reward=0.7804039 (497.02 it/sec) -training >> step=8861500, episode=1478 reward=0.7982088 (469.34 it/sec) -training >> step=8861600, episode=1478 reward=0.7654714 (485.08 it/sec) -training >> step=8861700, episode=1478 reward=0.7863768 (471.36 it/sec) -training >> step=8861800, episode=1478 reward=0.8032126 (496.96 it/sec) -training >> step=8861900, episode=1478 reward=0.8028002 (460.89 it/sec) -training >> step=8862000, episode=1478 reward=0.803266 (473.33 it/sec) -training >> step=8862100, episode=1478 reward=0.7824053 (454.96 it/sec) -training >> step=8862200, episode=1478 reward=0.815564 (485.44 it/sec) -training >> step=8862300, episode=1478 reward=0.8005384 (472.96 it/sec) -training >> step=8862400, episode=1478 reward=0.7811615 (471.12 it/sec) -training >> step=8862500, episode=1478 reward=0.7818252 (491.65 it/sec) -training >> step=8862600, episode=1478 reward=0.7669193 (474.90 it/sec) -training >> step=8862700, episode=1478 reward=0.7952808 (361.06 it/sec) -training >> step=8862800, episode=1478 reward=0.7942467 (484.17 it/sec) -training >> step=8862900, episode=1478 reward=0.7925003 (490.92 it/sec) -training >> step=8863000, episode=1478 reward=0.8038144 (452.61 it/sec) -training >> step=8863100, episode=1478 reward=0.7846548 (483.27 it/sec) -training >> step=8863200, episode=1478 reward=0.7983272 (489.13 it/sec) -training >> step=8863300, episode=1478 reward=0.8054051 (504.69 it/sec) -training >> step=8863400, episode=1478 reward=0.7878207 (452.10 it/sec) -training >> step=8863500, episode=1478 reward=0.7903159 (509.93 it/sec) -training >> step=8863600, episode=1478 reward=0.7863529 (507.38 it/sec) -training >> step=8863700, episode=1478 reward=0.7727271 (468.49 it/sec) -training >> step=8863800, episode=1478 reward=0.7924889 (484.71 it/sec) -training >> step=8863900, episode=1478 reward=0.7756832 (451.47 it/sec) -training >> step=8864000, episode=1478 reward=0.8054745 (487.24 it/sec) -training >> step=8864100, episode=1478 reward=0.7866599 (497.19 it/sec) -training >> step=8864200, episode=1478 reward=0.7799066 (471.26 it/sec) -training >> step=8864300, episode=1478 reward=0.8107452 (442.53 it/sec) -training >> step=8864400, episode=1478 reward=0.7964366 (422.10 it/sec) -training >> step=8864500, episode=1478 reward=0.7857338 (459.31 it/sec) -training >> step=8864600, episode=1478 reward=0.799246 (484.14 it/sec) -training >> step=8864700, episode=1478 reward=0.7755119 (473.89 it/sec) -training >> step=8864800, episode=1478 reward=0.7644033 (467.97 it/sec) -training >> step=8864900, episode=1478 reward=0.7958053 (477.67 it/sec) -training >> step=8865000, episode=1478 reward=0.7870575 (468.75 it/sec) -training >> step=8865100, episode=1478 reward=0.7894518 (454.07 it/sec) -training >> step=8865200, episode=1478 reward=0.7867255 (372.22 it/sec) -training >> step=8865300, episode=1478 reward=0.8117098 (402.48 it/sec) -training >> step=8865400, episode=1478 reward=0.7907718 (371.95 it/sec) -training >> step=8865500, episode=1478 reward=0.7969186 (418.28 it/sec) -training >> step=8865600, episode=1478 reward=0.7996182 (435.74 it/sec) -training >> step=8865700, episode=1478 reward=0.7919289 (453.20 it/sec) -training >> step=8865800, episode=1478 reward=0.7844071 (489.33 it/sec) -training >> step=8865900, episode=1478 reward=0.8027649 (485.25 it/sec) -training >> step=8866000, episode=1478 reward=0.8136288 (466.05 it/sec) -training >> step=8866100, episode=1478 reward=0.8094233 (457.11 it/sec) -training >> step=8866200, episode=1478 reward=0.7874257 (462.35 it/sec) -training >> step=8866300, episode=1478 reward=0.7868047 (495.94 it/sec) -training >> step=8866400, episode=1478 reward=0.8045884 (409.90 it/sec) -training >> step=8866500, episode=1478 reward=0.7864495 (416.72 it/sec) -training >> step=8866600, episode=1478 reward=0.7932884 (415.84 it/sec) -training >> step=8866700, episode=1478 reward=0.7927763 (447.53 it/sec) -training >> step=8866800, episode=1478 reward=0.7880536 (500.16 it/sec) -training >> step=8866900, episode=1478 reward=0.7762164 (473.19 it/sec) -training >> step=8867000, episode=1478 reward=0.8005775 (445.40 it/sec) -training >> step=8867100, episode=1478 reward=0.7967917 (451.50 it/sec) -training >> step=8867200, episode=1478 reward=0.7979605 (468.31 it/sec) -training >> step=8867300, episode=1479 reward=0.7929167 (76.36 it/sec) -training >> step=8867400, episode=1479 reward=0.7781565 (475.51 it/sec) -training >> step=8867500, episode=1479 reward=0.7881359 (501.22 it/sec) -training >> step=8867600, episode=1479 reward=0.7819602 (461.68 it/sec) -training >> step=8867700, episode=1479 reward=0.7745157 (479.78 it/sec) -training >> step=8867800, episode=1479 reward=0.7768211 (487.98 it/sec) -training >> step=8867900, episode=1479 reward=0.7972326 (458.76 it/sec) -training >> step=8868000, episode=1479 reward=0.7741091 (489.48 it/sec) -training >> step=8868100, episode=1479 reward=0.7823412 (433.91 it/sec) -training >> step=8868200, episode=1479 reward=0.7991475 (460.37 it/sec) -training >> step=8868300, episode=1479 reward=0.7937087 (440.77 it/sec) -training >> step=8868400, episode=1479 reward=0.7848501 (406.63 it/sec) -training >> step=8868500, episode=1479 reward=0.7774264 (434.74 it/sec) -training >> step=8868600, episode=1479 reward=0.7930071 (439.72 it/sec) -training >> step=8868700, episode=1479 reward=0.8176643 (450.67 it/sec) -training >> step=8868800, episode=1479 reward=0.7996362 (462.30 it/sec) -training >> step=8868900, episode=1479 reward=0.798178 (312.10 it/sec) -training >> step=8869000, episode=1479 reward=0.8113911 (505.63 it/sec) -training >> step=8869100, episode=1479 reward=0.7888651 (430.33 it/sec) -training >> step=8869200, episode=1479 reward=0.8161951 (436.83 it/sec) -training >> step=8869300, episode=1479 reward=0.8007506 (475.18 it/sec) -training >> step=8869400, episode=1479 reward=0.7897731 (489.59 it/sec) -training >> step=8869500, episode=1479 reward=0.7903869 (475.93 it/sec) -training >> step=8869600, episode=1479 reward=0.7721934 (486.71 it/sec) -training >> step=8869700, episode=1479 reward=0.7955555 (496.03 it/sec) -training >> step=8869800, episode=1479 reward=0.7863049 (458.20 it/sec) -training >> step=8869900, episode=1479 reward=0.7916023 (460.82 it/sec) -training >> step=8870000, episode=1479 reward=0.7790908 (478.00 it/sec) -training >> step=8870100, episode=1479 reward=0.7781124 (507.25 it/sec) -training >> step=8870200, episode=1479 reward=0.7987562 (476.33 it/sec) -training >> step=8870300, episode=1479 reward=0.7865195 (467.60 it/sec) -training >> step=8870400, episode=1479 reward=0.7903047 (436.83 it/sec) -training >> step=8870500, episode=1479 reward=0.7773936 (467.34 it/sec) -training >> step=8870600, episode=1479 reward=0.7737665 (464.95 it/sec) -training >> step=8870700, episode=1479 reward=0.7940471 (487.18 it/sec) -training >> step=8870800, episode=1479 reward=0.7886942 (488.19 it/sec) -training >> step=8870900, episode=1479 reward=0.8076034 (475.01 it/sec) -training >> step=8871000, episode=1479 reward=0.798394 (464.25 it/sec) -training >> step=8871100, episode=1479 reward=0.7981363 (474.53 it/sec) -training >> step=8871200, episode=1479 reward=0.7803891 (467.44 it/sec) -training >> step=8871300, episode=1479 reward=0.801768 (454.50 it/sec) -training >> step=8871400, episode=1479 reward=0.7969489 (456.61 it/sec) -training >> step=8871500, episode=1479 reward=0.7695552 (474.30 it/sec) -training >> step=8871600, episode=1479 reward=0.7912402 (455.48 it/sec) -training >> step=8871700, episode=1479 reward=0.7864335 (399.00 it/sec) -training >> step=8871800, episode=1479 reward=0.7948361 (489.01 it/sec) -training >> step=8871900, episode=1479 reward=0.793765 (464.26 it/sec) -training >> step=8872000, episode=1479 reward=0.8011273 (476.83 it/sec) -training >> step=8872100, episode=1479 reward=0.7905566 (481.02 it/sec) -training >> step=8872200, episode=1479 reward=0.7831168 (467.32 it/sec) -training >> step=8872300, episode=1479 reward=0.784951 (459.69 it/sec) -training >> step=8872400, episode=1479 reward=0.7837896 (423.54 it/sec) -training >> step=8872500, episode=1479 reward=0.7902272 (503.62 it/sec) -training >> step=8872600, episode=1479 reward=0.8011192 (472.91 it/sec) -training >> step=8872700, episode=1479 reward=0.7845699 (446.04 it/sec) -training >> step=8872800, episode=1479 reward=0.7825812 (484.88 it/sec) -training >> step=8872900, episode=1479 reward=0.8011554 (482.98 it/sec) -training >> step=8873000, episode=1479 reward=0.8045927 (465.36 it/sec) -training >> step=8873100, episode=1479 reward=0.804589 (480.53 it/sec) -training >> step=8873200, episode=1479 reward=0.7803401 (462.13 it/sec) -training >> step=8873300, episode=1480 reward=0.7968866 (100.73 it/sec) -training >> step=8873400, episode=1480 reward=0.7761247 (347.28 it/sec) -training >> step=8873500, episode=1480 reward=0.775556 (486.37 it/sec) -training >> step=8873600, episode=1480 reward=0.7843969 (495.84 it/sec) -training >> step=8873700, episode=1480 reward=0.7818265 (459.36 it/sec) -training >> step=8873800, episode=1480 reward=0.7768266 (456.37 it/sec) -training >> step=8873900, episode=1480 reward=0.777256 (455.04 it/sec) -training >> step=8874000, episode=1480 reward=0.8077239 (494.68 it/sec) -training >> step=8874100, episode=1480 reward=0.7936485 (503.51 it/sec) -training >> step=8874200, episode=1480 reward=0.7995982 (480.02 it/sec) -training >> step=8874300, episode=1480 reward=0.7720531 (495.38 it/sec) -training >> step=8874400, episode=1480 reward=0.7844931 (464.92 it/sec) -training >> step=8874500, episode=1480 reward=0.7813854 (486.71 it/sec) -training >> step=8874600, episode=1480 reward=0.7746366 (482.64 it/sec) -training >> step=8874700, episode=1480 reward=0.7844893 (444.04 it/sec) -training >> step=8874800, episode=1480 reward=0.7762759 (452.34 it/sec) -training >> step=8874900, episode=1480 reward=0.7976623 (500.03 it/sec) -training >> step=8875000, episode=1480 reward=0.789898 (483.66 it/sec) -training >> step=8875100, episode=1480 reward=0.7986911 (327.82 it/sec) -training >> step=8875200, episode=1480 reward=0.7987718 (507.62 it/sec) -training >> step=8875300, episode=1480 reward=0.7739081 (492.35 it/sec) -training >> step=8875400, episode=1480 reward=0.7807078 (490.21 it/sec) -training >> step=8875500, episode=1480 reward=0.7885639 (501.36 it/sec) -training >> step=8875600, episode=1480 reward=0.7917386 (510.99 it/sec) -training >> step=8875700, episode=1480 reward=0.8049333 (469.89 it/sec) -training >> step=8875800, episode=1480 reward=0.8040662 (452.21 it/sec) -training >> step=8875900, episode=1480 reward=0.7840721 (469.10 it/sec) -training >> step=8876000, episode=1480 reward=0.7863416 (524.97 it/sec) -training >> step=8876100, episode=1480 reward=0.8032345 (448.12 it/sec) -training >> step=8876200, episode=1480 reward=0.8016459 (445.07 it/sec) -training >> step=8876300, episode=1480 reward=0.7737474 (498.25 it/sec) -training >> step=8876400, episode=1480 reward=0.7779177 (481.80 it/sec) -training >> step=8876500, episode=1480 reward=0.8036271 (477.72 it/sec) -training >> step=8876600, episode=1480 reward=0.7820548 (485.77 it/sec) -training >> step=8876700, episode=1480 reward=0.8053978 (480.49 it/sec) -training >> step=8876800, episode=1480 reward=0.7878996 (468.08 it/sec) -training >> step=8876900, episode=1480 reward=0.7892886 (496.23 it/sec) -training >> step=8877000, episode=1480 reward=0.7980961 (505.80 it/sec) -training >> step=8877100, episode=1480 reward=0.7987955 (505.72 it/sec) -training >> step=8877200, episode=1480 reward=0.7891434 (437.82 it/sec) -training >> step=8877300, episode=1480 reward=0.7830533 (435.26 it/sec) -training >> step=8877400, episode=1480 reward=0.8048458 (401.87 it/sec) -training >> step=8877500, episode=1480 reward=0.8022289 (497.99 it/sec) -training >> step=8877600, episode=1480 reward=0.8055301 (474.70 it/sec) -training >> step=8877700, episode=1480 reward=0.7934611 (482.02 it/sec) -training >> step=8877800, episode=1480 reward=0.7814437 (439.60 it/sec) -training >> step=8877900, episode=1480 reward=0.8147974 (451.62 it/sec) -training >> step=8878000, episode=1480 reward=0.8032381 (468.90 it/sec) -training >> step=8878100, episode=1480 reward=0.7905877 (506.15 it/sec) -training >> step=8878200, episode=1480 reward=0.8068544 (467.34 it/sec) -training >> step=8878300, episode=1480 reward=0.7955462 (462.16 it/sec) -training >> step=8878400, episode=1480 reward=0.803659 (502.62 it/sec) -training >> step=8878500, episode=1480 reward=0.7797448 (459.13 it/sec) -training >> step=8878600, episode=1480 reward=0.7858363 (470.73 it/sec) -training >> step=8878700, episode=1480 reward=0.7864184 (453.71 it/sec) -training >> step=8878800, episode=1480 reward=0.7860537 (450.72 it/sec) -training >> step=8878900, episode=1480 reward=0.7887078 (504.64 it/sec) -training >> step=8879000, episode=1480 reward=0.7895337 (478.54 it/sec) -training >> step=8879100, episode=1480 reward=0.7703867 (463.88 it/sec) -training >> step=8879200, episode=1480 reward=0.7903426 (437.31 it/sec) -training >> step=8879300, episode=1481 reward=0.7785494 (90.23 it/sec) -training >> step=8879400, episode=1481 reward=0.7873319 (485.48 it/sec) -training >> step=8879500, episode=1481 reward=0.779134 (476.43 it/sec) -training >> step=8879600, episode=1481 reward=0.7881505 (499.96 it/sec) -training >> step=8879700, episode=1481 reward=0.7728109 (418.21 it/sec) -training >> step=8879800, episode=1481 reward=0.779492 (483.96 it/sec) -training >> step=8879900, episode=1481 reward=0.7776079 (501.03 it/sec) -training >> step=8880000, episode=1481 reward=0.7807123 (463.51 it/sec) -training >> step=8880100, episode=1481 reward=0.7921785 (488.54 it/sec) -training >> step=8880200, episode=1481 reward=0.7942163 (453.55 it/sec) -training >> step=8880300, episode=1481 reward=0.8038248 (513.50 it/sec) -training >> step=8880400, episode=1481 reward=0.7772689 (492.04 it/sec) -training >> step=8880500, episode=1481 reward=0.7875273 (499.03 it/sec) -training >> step=8880600, episode=1481 reward=0.7769732 (470.66 it/sec) -training >> step=8880700, episode=1481 reward=0.7945486 (483.58 it/sec) -training >> step=8880800, episode=1481 reward=0.7735771 (465.72 it/sec) -training >> step=8880900, episode=1481 reward=0.8010826 (478.04 it/sec) -training >> step=8881000, episode=1481 reward=0.7947338 (509.68 it/sec) -training >> step=8881100, episode=1481 reward=0.7794673 (482.53 it/sec) -training >> step=8881200, episode=1481 reward=0.7986625 (445.77 it/sec) -training >> step=8881300, episode=1481 reward=0.7918822 (349.32 it/sec) -training >> step=8881400, episode=1481 reward=0.8037626 (487.58 it/sec) -training >> step=8881500, episode=1481 reward=0.7804742 (441.81 it/sec) -training >> step=8881600, episode=1481 reward=0.8007624 (473.81 it/sec) -training >> step=8881700, episode=1481 reward=0.8026588 (470.61 it/sec) -training >> step=8881800, episode=1481 reward=0.7824363 (467.76 it/sec) -training >> step=8881900, episode=1481 reward=0.7895328 (490.70 it/sec) -training >> step=8882000, episode=1481 reward=0.7655405 (468.98 it/sec) -training >> step=8882100, episode=1481 reward=0.7910314 (514.99 it/sec) -training >> step=8882200, episode=1481 reward=0.7939625 (497.82 it/sec) -training >> step=8882300, episode=1481 reward=0.7991773 (459.88 it/sec) -training >> step=8882400, episode=1481 reward=0.7905178 (460.31 it/sec) -training >> step=8882500, episode=1481 reward=0.7847487 (496.43 it/sec) -training >> step=8882600, episode=1481 reward=0.7891938 (480.31 it/sec) -training >> step=8882700, episode=1481 reward=0.7786728 (469.57 it/sec) -training >> step=8882800, episode=1481 reward=0.8026423 (485.48 it/sec) -training >> step=8882900, episode=1481 reward=0.8024738 (509.34 it/sec) -training >> step=8883000, episode=1481 reward=0.784878 (501.18 it/sec) -training >> step=8883100, episode=1481 reward=0.7954176 (462.91 it/sec) -training >> step=8883200, episode=1481 reward=0.7934777 (494.34 it/sec) -training >> step=8883300, episode=1481 reward=0.8017195 (490.16 it/sec) -training >> step=8883400, episode=1481 reward=0.7948982 (508.44 it/sec) -training >> step=8883500, episode=1481 reward=0.8039176 (487.10 it/sec) -training >> step=8883600, episode=1481 reward=0.7823437 (513.85 it/sec) -training >> step=8883700, episode=1481 reward=0.7847064 (454.38 it/sec) -training >> step=8883800, episode=1481 reward=0.7901612 (462.28 it/sec) -training >> step=8883900, episode=1481 reward=0.7834392 (486.22 it/sec) -training >> step=8884000, episode=1481 reward=0.7685412 (495.12 it/sec) -training >> step=8884100, episode=1481 reward=0.7936605 (491.10 it/sec) -training >> step=8884200, episode=1481 reward=0.7916347 (496.54 it/sec) -training >> step=8884300, episode=1481 reward=0.8034358 (440.82 it/sec) -training >> step=8884400, episode=1481 reward=0.7837293 (498.81 it/sec) -training >> step=8884500, episode=1481 reward=0.7707031 (454.56 it/sec) -training >> step=8884600, episode=1481 reward=0.8056895 (425.90 it/sec) -training >> step=8884700, episode=1481 reward=0.7766783 (500.50 it/sec) -training >> step=8884800, episode=1481 reward=0.7835023 (503.64 it/sec) -training >> step=8884900, episode=1481 reward=0.7927175 (478.82 it/sec) -training >> step=8885000, episode=1481 reward=0.8097914 (494.01 it/sec) -training >> step=8885100, episode=1481 reward=0.7859084 (499.92 it/sec) -training >> step=8885200, episode=1481 reward=0.7920198 (461.40 it/sec) -training >> step=8885300, episode=1482 reward=0.7794582 (110.85 it/sec) -training >> step=8885400, episode=1482 reward=0.7868568 (485.58 it/sec) -training >> step=8885500, episode=1482 reward=0.7694144 (494.35 it/sec) -training >> step=8885600, episode=1482 reward=0.7569356 (464.95 it/sec) -training >> step=8885700, episode=1482 reward=0.7734087 (504.07 it/sec) -training >> step=8885800, episode=1482 reward=0.7973984 (460.02 it/sec) -training >> step=8885900, episode=1482 reward=0.8065763 (465.34 it/sec) -training >> step=8886000, episode=1482 reward=0.7826977 (498.48 it/sec) -training >> step=8886100, episode=1482 reward=0.7777321 (485.78 it/sec) -training >> step=8886200, episode=1482 reward=0.7863585 (439.41 it/sec) -training >> step=8886300, episode=1482 reward=0.8159929 (439.17 it/sec) -training >> step=8886400, episode=1482 reward=0.8018207 (469.58 it/sec) -training >> step=8886500, episode=1482 reward=0.7717375 (442.42 it/sec) -training >> step=8886600, episode=1482 reward=0.7844731 (474.80 it/sec) -training >> step=8886700, episode=1482 reward=0.782194 (456.41 it/sec) -training >> step=8886800, episode=1482 reward=0.8087635 (463.90 it/sec) -training >> step=8886900, episode=1482 reward=0.8008763 (494.60 it/sec) -training >> step=8887000, episode=1482 reward=0.7715767 (488.02 it/sec) -training >> step=8887100, episode=1482 reward=0.8038619 (489.58 it/sec) -training >> step=8887200, episode=1482 reward=0.790754 (477.29 it/sec) -training >> step=8887300, episode=1482 reward=0.7893898 (448.38 it/sec) -training >> step=8887400, episode=1482 reward=0.7888356 (482.91 it/sec) -training >> step=8887500, episode=1482 reward=0.7913221 (343.14 it/sec) -training >> step=8887600, episode=1482 reward=0.8029987 (470.22 it/sec) -training >> step=8887700, episode=1482 reward=0.7845174 (457.50 it/sec) -training >> step=8887800, episode=1482 reward=0.8061528 (491.69 it/sec) -training >> step=8887900, episode=1482 reward=0.799745 (513.32 it/sec) -training >> step=8888000, episode=1482 reward=0.7900419 (430.04 it/sec) -training >> step=8888100, episode=1482 reward=0.7935137 (493.36 it/sec) -training >> step=8888200, episode=1482 reward=0.7975569 (494.06 it/sec) -training >> step=8888300, episode=1482 reward=0.7950667 (529.78 it/sec) -training >> step=8888400, episode=1482 reward=0.7849973 (466.86 it/sec) -training >> step=8888500, episode=1482 reward=0.7975327 (458.53 it/sec) -training >> step=8888600, episode=1482 reward=0.8130367 (498.28 it/sec) -training >> step=8888700, episode=1482 reward=0.8037506 (490.89 it/sec) -training >> step=8888800, episode=1482 reward=0.7890952 (462.52 it/sec) -training >> step=8888900, episode=1482 reward=0.8032246 (492.10 it/sec) -training >> step=8889000, episode=1482 reward=0.7988457 (507.53 it/sec) -training >> step=8889100, episode=1482 reward=0.7928503 (468.26 it/sec) -training >> step=8889200, episode=1482 reward=0.7861084 (450.96 it/sec) -training >> step=8889300, episode=1482 reward=0.8031094 (499.15 it/sec) -training >> step=8889400, episode=1482 reward=0.7782954 (489.47 it/sec) -training >> step=8889500, episode=1482 reward=0.7956558 (497.32 it/sec) -training >> step=8889600, episode=1482 reward=0.7813077 (469.97 it/sec) -training >> step=8889700, episode=1482 reward=0.7935914 (506.13 it/sec) -training >> step=8889800, episode=1482 reward=0.7912807 (503.36 it/sec) -training >> step=8889900, episode=1482 reward=0.788466 (503.53 it/sec) -training >> step=8890000, episode=1482 reward=0.7888183 (473.16 it/sec) -training >> step=8890100, episode=1482 reward=0.8061968 (508.17 it/sec) -training >> step=8890200, episode=1482 reward=0.7901286 (477.85 it/sec) -training >> step=8890300, episode=1482 reward=0.7772749 (501.08 it/sec) -training >> step=8890400, episode=1482 reward=0.7802278 (469.05 it/sec) -training >> step=8890500, episode=1482 reward=0.8039454 (453.72 it/sec) -training >> step=8890600, episode=1482 reward=0.7983049 (456.05 it/sec) -training >> step=8890700, episode=1482 reward=0.7903308 (430.38 it/sec) -training >> step=8890800, episode=1482 reward=0.8258566 (470.66 it/sec) -training >> step=8890900, episode=1482 reward=0.8225607 (422.11 it/sec) -training >> step=8891000, episode=1482 reward=0.8039567 (470.37 it/sec) -training >> step=8891100, episode=1482 reward=0.7956058 (472.41 it/sec) -training >> step=8891200, episode=1482 reward=0.7962286 (507.17 it/sec) -training >> step=8891300, episode=1483 reward=0.7604161 (85.88 it/sec) -training >> step=8891400, episode=1483 reward=0.7862715 (424.56 it/sec) -training >> step=8891500, episode=1483 reward=0.7845178 (416.21 it/sec) -training >> step=8891600, episode=1483 reward=0.76916 (457.77 it/sec) -training >> step=8891700, episode=1483 reward=0.7934253 (447.96 it/sec) -training >> step=8891800, episode=1483 reward=0.7987159 (478.98 it/sec) -training >> step=8891900, episode=1483 reward=0.7737405 (525.16 it/sec) -training >> step=8892000, episode=1483 reward=0.7702284 (500.37 it/sec) -training >> step=8892100, episode=1483 reward=0.7927998 (508.30 it/sec) -training >> step=8892200, episode=1483 reward=0.7985436 (481.74 it/sec) -training >> step=8892300, episode=1483 reward=0.7994373 (509.72 it/sec) -training >> step=8892400, episode=1483 reward=0.7936693 (447.38 it/sec) -training >> step=8892500, episode=1483 reward=0.7900105 (419.49 it/sec) -training >> step=8892600, episode=1483 reward=0.7778103 (415.91 it/sec) -training >> step=8892700, episode=1483 reward=0.8041616 (510.04 it/sec) -training >> step=8892800, episode=1483 reward=0.7919019 (512.77 it/sec) -training >> step=8892900, episode=1483 reward=0.7888079 (507.79 it/sec) -training >> step=8893000, episode=1483 reward=0.790036 (523.01 it/sec) -training >> step=8893100, episode=1483 reward=0.7880942 (456.83 it/sec) -training >> step=8893200, episode=1483 reward=0.8003711 (520.63 it/sec) -training >> step=8893300, episode=1483 reward=0.8041492 (503.55 it/sec) -training >> step=8893400, episode=1483 reward=0.7686062 (433.78 it/sec) -training >> step=8893500, episode=1483 reward=0.7793021 (474.73 it/sec) -training >> step=8893600, episode=1483 reward=0.796768 (476.99 it/sec) -training >> step=8893700, episode=1483 reward=0.8005416 (348.10 it/sec) -training >> step=8893800, episode=1483 reward=0.7866833 (473.60 it/sec) -training >> step=8893900, episode=1483 reward=0.7805483 (475.55 it/sec) -training >> step=8894000, episode=1483 reward=0.783646 (494.44 it/sec) -training >> step=8894100, episode=1483 reward=0.7835532 (437.98 it/sec) -training >> step=8894200, episode=1483 reward=0.7961617 (455.05 it/sec) -training >> step=8894300, episode=1483 reward=0.8052904 (491.00 it/sec) -training >> step=8894400, episode=1483 reward=0.7703457 (480.78 it/sec) -training >> step=8894500, episode=1483 reward=0.8062403 (490.36 it/sec) -training >> step=8894600, episode=1483 reward=0.7992823 (472.28 it/sec) -training >> step=8894700, episode=1483 reward=0.7795122 (480.80 it/sec) -training >> step=8894800, episode=1483 reward=0.7822421 (457.54 it/sec) -training >> step=8894900, episode=1483 reward=0.7884408 (474.06 it/sec) -training >> step=8895000, episode=1483 reward=0.7647125 (470.39 it/sec) -training >> step=8895100, episode=1483 reward=0.793518 (486.70 it/sec) -training >> step=8895200, episode=1483 reward=0.7680147 (437.50 it/sec) -training >> step=8895300, episode=1483 reward=0.7843105 (475.69 it/sec) -training >> step=8895400, episode=1483 reward=0.7937468 (509.76 it/sec) -training >> step=8895500, episode=1483 reward=0.7680672 (482.63 it/sec) -training >> step=8895600, episode=1483 reward=0.7888878 (483.97 it/sec) -training >> step=8895700, episode=1483 reward=0.8026801 (508.88 it/sec) -training >> step=8895800, episode=1483 reward=0.7975817 (513.53 it/sec) -training >> step=8895900, episode=1483 reward=0.7987804 (487.59 it/sec) -training >> step=8896000, episode=1483 reward=0.8045962 (461.99 it/sec) -training >> step=8896100, episode=1483 reward=0.7877557 (494.82 it/sec) -training >> step=8896200, episode=1483 reward=0.7919199 (494.61 it/sec) -training >> step=8896300, episode=1483 reward=0.8056864 (485.89 it/sec) -training >> step=8896400, episode=1483 reward=0.7841626 (480.69 it/sec) -training >> step=8896500, episode=1483 reward=0.7953389 (509.83 it/sec) -training >> step=8896600, episode=1483 reward=0.8013747 (465.93 it/sec) -training >> step=8896700, episode=1483 reward=0.8083821 (507.41 it/sec) -training >> step=8896800, episode=1483 reward=0.7924113 (472.68 it/sec) -training >> step=8896900, episode=1483 reward=0.7957936 (518.68 it/sec) -training >> step=8897000, episode=1483 reward=0.7948899 (471.05 it/sec) -training >> step=8897100, episode=1483 reward=0.7851932 (468.35 it/sec) -training >> step=8897200, episode=1483 reward=0.7683999 (531.50 it/sec) -training >> step=8897300, episode=1484 reward=0.8072132 (72.05 it/sec) -training >> step=8897400, episode=1484 reward=0.7938234 (475.84 it/sec) -training >> step=8897500, episode=1484 reward=0.7950085 (510.31 it/sec) -training >> step=8897600, episode=1484 reward=0.7813016 (511.84 it/sec) -training >> step=8897700, episode=1484 reward=0.801189 (479.60 it/sec) -training >> step=8897800, episode=1484 reward=0.7910246 (489.62 it/sec) -training >> step=8897900, episode=1484 reward=0.7995976 (463.48 it/sec) -training >> step=8898000, episode=1484 reward=0.7925633 (481.58 it/sec) -training >> step=8898100, episode=1484 reward=0.7809259 (493.75 it/sec) -training >> step=8898200, episode=1484 reward=0.784139 (502.42 it/sec) -training >> step=8898300, episode=1484 reward=0.7850153 (495.75 it/sec) -training >> step=8898400, episode=1484 reward=0.7983578 (490.60 it/sec) -training >> step=8898500, episode=1484 reward=0.7858382 (516.40 it/sec) -training >> step=8898600, episode=1484 reward=0.7653923 (483.26 it/sec) -training >> step=8898700, episode=1484 reward=0.8148646 (481.53 it/sec) -training >> step=8898800, episode=1484 reward=0.7815817 (466.69 it/sec) -training >> step=8898900, episode=1484 reward=0.7808549 (472.80 it/sec) -training >> step=8899000, episode=1484 reward=0.7972178 (465.35 it/sec) -training >> step=8899100, episode=1484 reward=0.8017989 (496.90 it/sec) -training >> step=8899200, episode=1484 reward=0.7841358 (530.86 it/sec) -training >> step=8899300, episode=1484 reward=0.7897954 (481.82 it/sec) -training >> step=8899400, episode=1484 reward=0.8013464 (468.09 it/sec) -training >> step=8899500, episode=1484 reward=0.7903641 (463.77 it/sec) -training >> step=8899600, episode=1484 reward=0.7749634 (503.53 it/sec) -training >> step=8899700, episode=1484 reward=0.7886772 (459.83 it/sec) -training >> step=8899800, episode=1484 reward=0.7980081 (467.78 it/sec) -training >> step=8899900, episode=1484 reward=0.7956519 (330.15 it/sec) -training >> step=8900000, episode=1484 reward=0.789328 (479.66 it/sec) -training >> step=8900100, episode=1484 reward=0.778224 (479.55 it/sec) -training >> step=8900200, episode=1484 reward=0.7983539 (484.07 it/sec) -training >> step=8900300, episode=1484 reward=0.803704 (523.34 it/sec) -training >> step=8900400, episode=1484 reward=0.8119525 (365.78 it/sec) -training >> step=8900500, episode=1484 reward=0.7858037 (416.57 it/sec) -training >> step=8900600, episode=1484 reward=0.7877089 (379.78 it/sec) -training >> step=8900700, episode=1484 reward=0.8059484 (477.06 it/sec) -training >> step=8900800, episode=1484 reward=0.7837321 (492.03 it/sec) -training >> step=8900900, episode=1484 reward=0.7763072 (525.83 it/sec) -training >> step=8901000, episode=1484 reward=0.7884697 (548.77 it/sec) -training >> step=8901100, episode=1484 reward=0.791682 (471.43 it/sec) -training >> step=8901200, episode=1484 reward=0.7770466 (468.01 it/sec) -training >> step=8901300, episode=1484 reward=0.7796233 (403.40 it/sec) -training >> step=8901400, episode=1484 reward=0.8070283 (469.39 it/sec) -training >> step=8901500, episode=1484 reward=0.7838881 (487.51 it/sec) -training >> step=8901600, episode=1484 reward=0.7875416 (440.19 it/sec) -training >> step=8901700, episode=1484 reward=0.7850116 (533.28 it/sec) -training >> step=8901800, episode=1484 reward=0.78044 (506.30 it/sec) -training >> step=8901900, episode=1484 reward=0.7897689 (517.34 it/sec) -training >> step=8902000, episode=1484 reward=0.8091359 (524.81 it/sec) -training >> step=8902100, episode=1484 reward=0.7835414 (559.21 it/sec) -training >> step=8902200, episode=1484 reward=0.7965388 (435.89 it/sec) -training >> step=8902300, episode=1484 reward=0.7883906 (491.80 it/sec) -training >> step=8902400, episode=1484 reward=0.7978146 (483.14 it/sec) -training >> step=8902500, episode=1484 reward=0.8064081 (558.59 it/sec) -training >> step=8902600, episode=1484 reward=0.7781388 (565.05 it/sec) -training >> step=8902700, episode=1484 reward=0.7825283 (512.88 it/sec) -training >> step=8902800, episode=1484 reward=0.8006573 (536.43 it/sec) -training >> step=8902900, episode=1484 reward=0.7768852 (491.20 it/sec) -training >> step=8903000, episode=1484 reward=0.7946224 (550.62 it/sec) -training >> step=8903100, episode=1484 reward=0.7853745 (503.51 it/sec) -training >> step=8903200, episode=1484 reward=0.7834699 (546.93 it/sec) -training >> step=8903300, episode=1485 reward=0.8078045 (136.22 it/sec) -training >> step=8903400, episode=1485 reward=0.8025466 (512.54 it/sec) -training >> step=8903500, episode=1485 reward=0.786684 (546.34 it/sec) -training >> step=8903600, episode=1485 reward=0.7780385 (532.69 it/sec) -training >> step=8903700, episode=1485 reward=0.7893953 (514.09 it/sec) -training >> step=8903800, episode=1485 reward=0.7737878 (545.91 it/sec) -training >> step=8903900, episode=1485 reward=0.7757388 (568.04 it/sec) -training >> step=8904000, episode=1485 reward=0.7759733 (499.76 it/sec) -training >> step=8904100, episode=1485 reward=0.7987186 (509.00 it/sec) -training >> step=8904200, episode=1485 reward=0.7877355 (521.36 it/sec) -training >> step=8904300, episode=1485 reward=0.7849163 (536.84 it/sec) -training >> step=8904400, episode=1485 reward=0.7920092 (519.49 it/sec) -training >> step=8904500, episode=1485 reward=0.7917633 (551.09 it/sec) -training >> step=8904600, episode=1485 reward=0.7955318 (519.72 it/sec) -training >> step=8904700, episode=1485 reward=0.7812004 (547.99 it/sec) -training >> step=8904800, episode=1485 reward=0.8013031 (545.90 it/sec) -training >> step=8904900, episode=1485 reward=0.7837885 (530.30 it/sec) -training >> step=8905000, episode=1485 reward=0.8094258 (538.77 it/sec) -training >> step=8905100, episode=1485 reward=0.8006993 (470.47 it/sec) -training >> step=8905200, episode=1485 reward=0.7713814 (525.24 it/sec) -training >> step=8905300, episode=1485 reward=0.7772002 (505.39 it/sec) -training >> step=8905400, episode=1485 reward=0.78083 (570.17 it/sec) -training >> step=8905500, episode=1485 reward=0.7758269 (554.91 it/sec) -training >> step=8905600, episode=1485 reward=0.7926061 (484.62 it/sec) -training >> step=8905700, episode=1485 reward=0.7831262 (490.80 it/sec) -training >> step=8905800, episode=1485 reward=0.7873284 (483.86 it/sec) -training >> step=8905900, episode=1485 reward=0.8023108 (490.87 it/sec) -training >> step=8906000, episode=1485 reward=0.7733956 (533.80 it/sec) -training >> step=8906100, episode=1485 reward=0.7719719 (569.61 it/sec) -training >> step=8906200, episode=1485 reward=0.7914271 (350.03 it/sec) -training >> step=8906300, episode=1485 reward=0.7879105 (448.58 it/sec) -training >> step=8906400, episode=1485 reward=0.7888673 (539.61 it/sec) -training >> step=8906500, episode=1485 reward=0.7990732 (500.49 it/sec) -training >> step=8906600, episode=1485 reward=0.8090602 (537.34 it/sec) -training >> step=8906700, episode=1485 reward=0.7893127 (538.11 it/sec) -training >> step=8906800, episode=1485 reward=0.7889581 (542.39 it/sec) -training >> step=8906900, episode=1485 reward=0.78814 (507.05 it/sec) -training >> step=8907000, episode=1485 reward=0.7808921 (564.41 it/sec) -training >> step=8907100, episode=1485 reward=0.800393 (558.55 it/sec) -training >> step=8907200, episode=1485 reward=0.7912889 (541.36 it/sec) -training >> step=8907300, episode=1485 reward=0.784981 (543.40 it/sec) -training >> step=8907400, episode=1485 reward=0.7885942 (514.52 it/sec) -training >> step=8907500, episode=1485 reward=0.7782253 (520.74 it/sec) -training >> step=8907600, episode=1485 reward=0.7820312 (519.83 it/sec) -training >> step=8907700, episode=1485 reward=0.7763668 (533.31 it/sec) -training >> step=8907800, episode=1485 reward=0.787077 (538.90 it/sec) -training >> step=8907900, episode=1485 reward=0.7933175 (543.01 it/sec) -training >> step=8908000, episode=1485 reward=0.7900353 (507.23 it/sec) -training >> step=8908100, episode=1485 reward=0.775903 (419.41 it/sec) -training >> step=8908200, episode=1485 reward=0.768603 (500.90 it/sec) -training >> step=8908300, episode=1485 reward=0.7884318 (481.11 it/sec) -training >> step=8908400, episode=1485 reward=0.7975082 (442.30 it/sec) -training >> step=8908500, episode=1485 reward=0.8033858 (467.73 it/sec) -training >> step=8908600, episode=1485 reward=0.7910675 (477.89 it/sec) -training >> step=8908700, episode=1485 reward=0.7733117 (447.00 it/sec) -training >> step=8908800, episode=1485 reward=0.7620862 (474.51 it/sec) -training >> step=8908900, episode=1485 reward=0.7865424 (465.15 it/sec) -training >> step=8909000, episode=1485 reward=0.7920683 (496.77 it/sec) -training >> step=8909100, episode=1485 reward=0.7979582 (448.61 it/sec) -training >> step=8909200, episode=1485 reward=0.7843369 (455.22 it/sec) -training >> step=8909300, episode=1486 reward=0.8076935 (96.40 it/sec) -training >> step=8909400, episode=1486 reward=0.7879193 (361.19 it/sec) -training >> step=8909500, episode=1486 reward=0.7780905 (436.62 it/sec) -training >> step=8909600, episode=1486 reward=0.7778026 (430.01 it/sec) -training >> step=8909700, episode=1486 reward=0.7862328 (375.01 it/sec) -training >> step=8909800, episode=1486 reward=0.7855781 (439.99 it/sec) -training >> step=8909900, episode=1486 reward=0.80071 (515.31 it/sec) -training >> step=8910000, episode=1486 reward=0.7789621 (489.21 it/sec) -training >> step=8910100, episode=1486 reward=0.8122937 (500.68 it/sec) -training >> step=8910200, episode=1486 reward=0.7865573 (498.81 it/sec) -training >> step=8910300, episode=1486 reward=0.7889008 (519.15 it/sec) -training >> step=8910400, episode=1486 reward=0.795772 (496.27 it/sec) -training >> step=8910500, episode=1486 reward=0.8091781 (543.34 it/sec) -training >> step=8910600, episode=1486 reward=0.7966642 (526.77 it/sec) -training >> step=8910700, episode=1486 reward=0.798662 (517.10 it/sec) -training >> step=8910800, episode=1486 reward=0.8098384 (475.56 it/sec) -training >> step=8910900, episode=1486 reward=0.7916803 (491.88 it/sec) -training >> step=8911000, episode=1486 reward=0.8013086 (505.30 it/sec) -training >> step=8911100, episode=1486 reward=0.7719623 (449.37 it/sec) -training >> step=8911200, episode=1486 reward=0.7963448 (517.14 it/sec) -training >> step=8911300, episode=1486 reward=0.8073233 (516.84 it/sec) -training >> step=8911400, episode=1486 reward=0.7565655 (505.68 it/sec) -training >> step=8911500, episode=1486 reward=0.7844486 (484.00 it/sec) -training >> step=8911600, episode=1486 reward=0.8003826 (468.23 it/sec) -training >> step=8911700, episode=1486 reward=0.7926518 (487.43 it/sec) -training >> step=8911800, episode=1486 reward=0.7962711 (455.72 it/sec) -training >> step=8911900, episode=1486 reward=0.7964026 (464.49 it/sec) -training >> step=8912000, episode=1486 reward=0.7970238 (519.56 it/sec) -training >> step=8912100, episode=1486 reward=0.7956231 (487.29 it/sec) -training >> step=8912200, episode=1486 reward=0.7866116 (438.51 it/sec) -training >> step=8912300, episode=1486 reward=0.7969994 (477.24 it/sec) -training >> step=8912400, episode=1486 reward=0.8210204 (377.48 it/sec) -training >> step=8912500, episode=1486 reward=0.8015354 (457.40 it/sec) -training >> step=8912600, episode=1486 reward=0.7802569 (475.86 it/sec) -training >> step=8912700, episode=1486 reward=0.7874192 (470.47 it/sec) -training >> step=8912800, episode=1486 reward=0.8066376 (463.63 it/sec) -training >> step=8912900, episode=1486 reward=0.8028166 (442.53 it/sec) -training >> step=8913000, episode=1486 reward=0.8072158 (445.76 it/sec) -training >> step=8913100, episode=1486 reward=0.7695757 (509.01 it/sec) -training >> step=8913200, episode=1486 reward=0.8002841 (435.05 it/sec) -training >> step=8913300, episode=1486 reward=0.7875112 (445.84 it/sec) -training >> step=8913400, episode=1486 reward=0.8102285 (501.90 it/sec) -training >> step=8913500, episode=1486 reward=0.7934084 (487.53 it/sec) -training >> step=8913600, episode=1486 reward=0.794477 (485.04 it/sec) -training >> step=8913700, episode=1486 reward=0.7961529 (474.15 it/sec) -training >> step=8913800, episode=1486 reward=0.8009436 (520.95 it/sec) -training >> step=8913900, episode=1486 reward=0.7964944 (469.94 it/sec) -training >> step=8914000, episode=1486 reward=0.7923109 (475.77 it/sec) -training >> step=8914100, episode=1486 reward=0.7951043 (452.53 it/sec) -training >> step=8914200, episode=1486 reward=0.7729842 (465.65 it/sec) -training >> step=8914300, episode=1486 reward=0.7721142 (468.20 it/sec) -training >> step=8914400, episode=1486 reward=0.7986068 (469.77 it/sec) -training >> step=8914500, episode=1486 reward=0.8134489 (516.24 it/sec) -training >> step=8914600, episode=1486 reward=0.7907516 (461.43 it/sec) -training >> step=8914700, episode=1486 reward=0.7828376 (434.95 it/sec) -training >> step=8914800, episode=1486 reward=0.78915 (435.21 it/sec) -training >> step=8914900, episode=1486 reward=0.8015411 (497.34 it/sec) -training >> step=8915000, episode=1486 reward=0.7919282 (463.27 it/sec) -training >> step=8915100, episode=1486 reward=0.7797019 (445.52 it/sec) -training >> step=8915200, episode=1486 reward=0.7833551 (488.20 it/sec) -training >> step=8915300, episode=1487 reward=0.7853073 (96.56 it/sec) -training >> step=8915400, episode=1487 reward=0.7678567 (478.17 it/sec) -training >> step=8915500, episode=1487 reward=0.793142 (456.89 it/sec) -training >> step=8915600, episode=1487 reward=0.774505 (492.19 it/sec) -training >> step=8915700, episode=1487 reward=0.8027787 (416.39 it/sec) -training >> step=8915800, episode=1487 reward=0.7908776 (400.01 it/sec) -training >> step=8915900, episode=1487 reward=0.789539 (469.43 it/sec) -training >> step=8916000, episode=1487 reward=0.7950725 (482.01 it/sec) -training >> step=8916100, episode=1487 reward=0.7941349 (389.69 it/sec) -training >> step=8916200, episode=1487 reward=0.7970114 (442.03 it/sec) -training >> step=8916300, episode=1487 reward=0.7990453 (478.69 it/sec) -training >> step=8916400, episode=1487 reward=0.7989986 (414.28 it/sec) -training >> step=8916500, episode=1487 reward=0.7902814 (460.45 it/sec) -training >> step=8916600, episode=1487 reward=0.7906258 (486.09 it/sec) -training >> step=8916700, episode=1487 reward=0.7864255 (496.36 it/sec) -training >> step=8916800, episode=1487 reward=0.7996781 (492.82 it/sec) -training >> step=8916900, episode=1487 reward=0.7843147 (489.03 it/sec) -training >> step=8917000, episode=1487 reward=0.7913416 (471.81 it/sec) -training >> step=8917100, episode=1487 reward=0.7989091 (448.49 it/sec) -training >> step=8917200, episode=1487 reward=0.7768304 (473.92 it/sec) -training >> step=8917300, episode=1487 reward=0.7971833 (534.69 it/sec) -training >> step=8917400, episode=1487 reward=0.7902764 (503.47 it/sec) -training >> step=8917500, episode=1487 reward=0.7883512 (488.18 it/sec) -training >> step=8917600, episode=1487 reward=0.7994383 (514.99 it/sec) -training >> step=8917700, episode=1487 reward=0.7866037 (429.20 it/sec) -training >> step=8917800, episode=1487 reward=0.7916666 (461.25 it/sec) -training >> step=8917900, episode=1487 reward=0.7973461 (487.16 it/sec) -training >> step=8918000, episode=1487 reward=0.7760448 (527.78 it/sec) -training >> step=8918100, episode=1487 reward=0.8080013 (429.54 it/sec) -training >> step=8918200, episode=1487 reward=0.7882596 (506.39 it/sec) -training >> step=8918300, episode=1487 reward=0.7974137 (479.57 it/sec) -training >> step=8918400, episode=1487 reward=0.791567 (466.46 it/sec) -training >> step=8918500, episode=1487 reward=0.7884939 (372.64 it/sec) -training >> step=8918600, episode=1487 reward=0.7670624 (470.23 it/sec) -training >> step=8918700, episode=1487 reward=0.7827395 (432.58 it/sec) -training >> step=8918800, episode=1487 reward=0.7916477 (486.29 it/sec) -training >> step=8918900, episode=1487 reward=0.7996351 (499.53 it/sec) -training >> step=8919000, episode=1487 reward=0.7844466 (418.60 it/sec) -training >> step=8919100, episode=1487 reward=0.7928304 (500.29 it/sec) -training >> step=8919200, episode=1487 reward=0.8068572 (435.60 it/sec) -training >> step=8919300, episode=1487 reward=0.7868762 (468.77 it/sec) -training >> step=8919400, episode=1487 reward=0.7833499 (496.19 it/sec) -training >> step=8919500, episode=1487 reward=0.790781 (468.83 it/sec) -training >> step=8919600, episode=1487 reward=0.7727897 (411.67 it/sec) -training >> step=8919700, episode=1487 reward=0.7850434 (498.81 it/sec) -training >> step=8919800, episode=1487 reward=0.81357 (514.08 it/sec) -training >> step=8919900, episode=1487 reward=0.7669778 (489.92 it/sec) -training >> step=8920000, episode=1487 reward=0.7947426 (471.64 it/sec) -training >> step=8920100, episode=1487 reward=0.7786913 (481.80 it/sec) -training >> step=8920200, episode=1487 reward=0.7706113 (530.62 it/sec) -training >> step=8920300, episode=1487 reward=0.7998058 (508.50 it/sec) -training >> step=8920400, episode=1487 reward=0.7738151 (482.43 it/sec) -training >> step=8920500, episode=1487 reward=0.7719381 (428.34 it/sec) -training >> step=8920600, episode=1487 reward=0.8160894 (526.52 it/sec) -training >> step=8920700, episode=1487 reward=0.7773118 (458.18 it/sec) -training >> step=8920800, episode=1487 reward=0.7977834 (465.03 it/sec) -training >> step=8920900, episode=1487 reward=0.7908337 (485.24 it/sec) -training >> step=8921000, episode=1487 reward=0.7726297 (452.59 it/sec) -training >> step=8921100, episode=1487 reward=0.7998129 (490.72 it/sec) -training >> step=8921200, episode=1487 reward=0.7904607 (494.81 it/sec) -training >> step=8921300, episode=1488 reward=0.8033243 (89.24 it/sec) -training >> step=8921400, episode=1488 reward=0.7846568 (547.17 it/sec) -training >> step=8921500, episode=1488 reward=0.8139885 (519.82 it/sec) -training >> step=8921600, episode=1488 reward=0.7788081 (556.64 it/sec) -training >> step=8921700, episode=1488 reward=0.7968208 (542.53 it/sec) -training >> step=8921800, episode=1488 reward=0.7667142 (498.62 it/sec) -training >> step=8921900, episode=1488 reward=0.8064194 (559.87 it/sec) -training >> step=8922000, episode=1488 reward=0.796463 (567.55 it/sec) -training >> step=8922100, episode=1488 reward=0.7826206 (553.23 it/sec) -training >> step=8922200, episode=1488 reward=0.7918383 (510.95 it/sec) -training >> step=8922300, episode=1488 reward=0.8047274 (500.96 it/sec) -training >> step=8922400, episode=1488 reward=0.7925571 (492.75 it/sec) -training >> step=8922500, episode=1488 reward=0.7917315 (548.49 it/sec) -training >> step=8922600, episode=1488 reward=0.7791245 (564.56 it/sec) -training >> step=8922700, episode=1488 reward=0.7945296 (505.96 it/sec) -training >> step=8922800, episode=1488 reward=0.7790387 (549.26 it/sec) -training >> step=8922900, episode=1488 reward=0.8060797 (504.06 it/sec) -training >> step=8923000, episode=1488 reward=0.8124619 (575.65 it/sec) -training >> step=8923100, episode=1488 reward=0.7585137 (525.16 it/sec) -training >> step=8923200, episode=1488 reward=0.7755577 (539.36 it/sec) -training >> step=8923300, episode=1488 reward=0.7985148 (512.85 it/sec) -training >> step=8923400, episode=1488 reward=0.7781825 (559.03 it/sec) -training >> step=8923500, episode=1488 reward=0.7886665 (504.44 it/sec) -training >> step=8923600, episode=1488 reward=0.7993541 (544.27 it/sec) -training >> step=8923700, episode=1488 reward=0.7942225 (531.71 it/sec) -training >> step=8923800, episode=1488 reward=0.7907813 (498.07 it/sec) -training >> step=8923900, episode=1488 reward=0.7656285 (518.38 it/sec) -training >> step=8924000, episode=1488 reward=0.7831864 (528.07 it/sec) -training >> step=8924100, episode=1488 reward=0.7964885 (553.21 it/sec) -training >> step=8924200, episode=1488 reward=0.8093325 (452.45 it/sec) -training >> step=8924300, episode=1488 reward=0.7739477 (508.84 it/sec) -training >> step=8924400, episode=1488 reward=0.791546 (493.80 it/sec) -training >> step=8924500, episode=1488 reward=0.7979884 (436.54 it/sec) -training >> step=8924600, episode=1488 reward=0.7920955 (556.28 it/sec) -training >> step=8924700, episode=1488 reward=0.7686531 (521.44 it/sec) -training >> step=8924800, episode=1488 reward=0.796551 (554.90 it/sec) -training >> step=8924900, episode=1488 reward=0.7863628 (501.17 it/sec) -training >> step=8925000, episode=1488 reward=0.7949892 (513.48 it/sec) -training >> step=8925100, episode=1488 reward=0.7989326 (475.73 it/sec) -training >> step=8925200, episode=1488 reward=0.780518 (582.63 it/sec) -training >> step=8925300, episode=1488 reward=0.7965791 (524.36 it/sec) -training >> step=8925400, episode=1488 reward=0.8116928 (549.28 it/sec) -training >> step=8925500, episode=1488 reward=0.7812421 (545.61 it/sec) -training >> step=8925600, episode=1488 reward=0.7818883 (564.40 it/sec) -training >> step=8925700, episode=1488 reward=0.7780897 (557.59 it/sec) -training >> step=8925800, episode=1488 reward=0.8093764 (554.19 it/sec) -training >> step=8925900, episode=1488 reward=0.7796199 (538.94 it/sec) -training >> step=8926000, episode=1488 reward=0.7903267 (492.02 it/sec) -training >> step=8926100, episode=1488 reward=0.801247 (480.17 it/sec) -training >> step=8926200, episode=1488 reward=0.7887578 (549.25 it/sec) -training >> step=8926300, episode=1488 reward=0.7804633 (583.22 it/sec) -training >> step=8926400, episode=1488 reward=0.7984837 (528.81 it/sec) -training >> step=8926500, episode=1488 reward=0.809974 (473.05 it/sec) -training >> step=8926600, episode=1488 reward=0.7844468 (530.75 it/sec) -training >> step=8926700, episode=1488 reward=0.7758158 (565.72 it/sec) -training >> step=8926800, episode=1488 reward=0.7935878 (564.22 it/sec) -training >> step=8926900, episode=1488 reward=0.7630083 (557.73 it/sec) -training >> step=8927000, episode=1488 reward=0.8020186 (564.47 it/sec) -training >> step=8927100, episode=1488 reward=0.7936088 (531.09 it/sec) -training >> step=8927200, episode=1488 reward=0.7846864 (556.87 it/sec) -training >> step=8927300, episode=1489 reward=0.7811461 (139.11 it/sec) -training >> step=8927400, episode=1489 reward=0.787157 (530.91 it/sec) -training >> step=8927500, episode=1489 reward=0.7932783 (514.98 it/sec) -training >> step=8927600, episode=1489 reward=0.7671898 (509.07 it/sec) -training >> step=8927700, episode=1489 reward=0.7781861 (559.67 it/sec) -training >> step=8927800, episode=1489 reward=0.7786565 (532.68 it/sec) -training >> step=8927900, episode=1489 reward=0.785373 (488.24 it/sec) -training >> step=8928000, episode=1489 reward=0.7971244 (513.62 it/sec) -training >> step=8928100, episode=1489 reward=0.7810786 (566.86 it/sec) -training >> step=8928200, episode=1489 reward=0.8035557 (517.08 it/sec) -training >> step=8928300, episode=1489 reward=0.7875885 (515.23 it/sec) -training >> step=8928400, episode=1489 reward=0.7903318 (532.34 it/sec) -training >> step=8928500, episode=1489 reward=0.7943117 (477.44 it/sec) -training >> step=8928600, episode=1489 reward=0.7882988 (519.83 it/sec) -training >> step=8928700, episode=1489 reward=0.7857041 (557.63 it/sec) -training >> step=8928800, episode=1489 reward=0.7838483 (574.01 it/sec) -training >> step=8928900, episode=1489 reward=0.8031135 (475.98 it/sec) -training >> step=8929000, episode=1489 reward=0.7883835 (513.79 it/sec) -training >> step=8929100, episode=1489 reward=0.7977911 (500.80 it/sec) -training >> step=8929200, episode=1489 reward=0.7992922 (573.50 it/sec) -training >> step=8929300, episode=1489 reward=0.7866668 (534.93 it/sec) -training >> step=8929400, episode=1489 reward=0.7925429 (529.24 it/sec) -training >> step=8929500, episode=1489 reward=0.7904816 (583.11 it/sec) -training >> step=8929600, episode=1489 reward=0.7997941 (492.53 it/sec) -training >> step=8929700, episode=1489 reward=0.7917239 (452.57 it/sec) -training >> step=8929800, episode=1489 reward=0.7943664 (542.29 it/sec) -training >> step=8929900, episode=1489 reward=0.7827045 (559.23 it/sec) -training >> step=8930000, episode=1489 reward=0.783132 (523.63 it/sec) -training >> step=8930100, episode=1489 reward=0.8061334 (520.66 it/sec) -training >> step=8930200, episode=1489 reward=0.7765664 (558.85 it/sec) -training >> step=8930300, episode=1489 reward=0.7961104 (503.20 it/sec) -training >> step=8930400, episode=1489 reward=0.800275 (509.57 it/sec) -training >> step=8930500, episode=1489 reward=0.8171288 (494.21 it/sec) -training >> step=8930600, episode=1489 reward=0.7945661 (554.05 it/sec) -training >> step=8930700, episode=1489 reward=0.7949051 (385.79 it/sec) -training >> step=8930800, episode=1489 reward=0.8019587 (538.42 it/sec) -training >> step=8930900, episode=1489 reward=0.7910931 (555.54 it/sec) -training >> step=8931000, episode=1489 reward=0.7905257 (538.58 it/sec) -training >> step=8931100, episode=1489 reward=0.796524 (515.64 it/sec) -training >> step=8931200, episode=1489 reward=0.7941419 (491.23 it/sec) -training >> step=8931300, episode=1489 reward=0.7941629 (575.34 it/sec) -training >> step=8931400, episode=1489 reward=0.8122152 (528.02 it/sec) -training >> step=8931500, episode=1489 reward=0.7952981 (508.77 it/sec) -training >> step=8931600, episode=1489 reward=0.7890614 (534.69 it/sec) -training >> step=8931700, episode=1489 reward=0.7885743 (558.47 it/sec) -training >> step=8931800, episode=1489 reward=0.7895225 (518.64 it/sec) -training >> step=8931900, episode=1489 reward=0.7808003 (552.18 it/sec) -training >> step=8932000, episode=1489 reward=0.7948791 (516.13 it/sec) -training >> step=8932100, episode=1489 reward=0.7900845 (564.67 it/sec) -training >> step=8932200, episode=1489 reward=0.7820241 (499.31 it/sec) -training >> step=8932300, episode=1489 reward=0.7907383 (530.33 it/sec) -training >> step=8932400, episode=1489 reward=0.8003994 (547.21 it/sec) -training >> step=8932500, episode=1489 reward=0.7757045 (509.17 it/sec) -training >> step=8932600, episode=1489 reward=0.7893957 (432.17 it/sec) -training >> step=8932700, episode=1489 reward=0.7962208 (512.73 it/sec) -training >> step=8932800, episode=1489 reward=0.7934293 (517.62 it/sec) -training >> step=8932900, episode=1489 reward=0.7694948 (542.52 it/sec) -training >> step=8933000, episode=1489 reward=0.7712842 (513.80 it/sec) -training >> step=8933100, episode=1489 reward=0.7862441 (534.91 it/sec) -training >> step=8933200, episode=1489 reward=0.7778298 (510.95 it/sec) -training >> step=8933300, episode=1490 reward=0.806449 (126.33 it/sec) -training >> step=8933400, episode=1490 reward=0.7723353 (501.23 it/sec) -training >> step=8933500, episode=1490 reward=0.7868253 (462.41 it/sec) -training >> step=8933600, episode=1490 reward=0.7799728 (479.61 it/sec) -training >> step=8933700, episode=1490 reward=0.7756081 (518.96 it/sec) -training >> step=8933800, episode=1490 reward=0.7991139 (490.93 it/sec) -training >> step=8933900, episode=1490 reward=0.7881694 (501.70 it/sec) -training >> step=8934000, episode=1490 reward=0.7721273 (515.67 it/sec) -training >> step=8934100, episode=1490 reward=0.800716 (491.49 it/sec) -training >> step=8934200, episode=1490 reward=0.8034896 (490.54 it/sec) -training >> step=8934300, episode=1490 reward=0.8044324 (480.93 it/sec) -training >> step=8934400, episode=1490 reward=0.7738881 (536.38 it/sec) -training >> step=8934500, episode=1490 reward=0.78297 (502.86 it/sec) -training >> step=8934600, episode=1490 reward=0.7884117 (494.70 it/sec) -training >> step=8934700, episode=1490 reward=0.7967239 (544.65 it/sec) -training >> step=8934800, episode=1490 reward=0.7899059 (489.41 it/sec) -training >> step=8934900, episode=1490 reward=0.808212 (495.83 it/sec) -training >> step=8935000, episode=1490 reward=0.7999066 (537.90 it/sec) -training >> step=8935100, episode=1490 reward=0.7903366 (520.84 it/sec) -training >> step=8935200, episode=1490 reward=0.8024866 (543.94 it/sec) -training >> step=8935300, episode=1490 reward=0.7871198 (553.15 it/sec) -training >> step=8935400, episode=1490 reward=0.7889497 (501.12 it/sec) -training >> step=8935500, episode=1490 reward=0.7801064 (538.30 it/sec) -training >> step=8935600, episode=1490 reward=0.7941628 (475.85 it/sec) -training >> step=8935700, episode=1490 reward=0.7739867 (528.37 it/sec) -training >> step=8935800, episode=1490 reward=0.7825394 (487.80 it/sec) -training >> step=8935900, episode=1490 reward=0.7842067 (465.56 it/sec) -training >> step=8936000, episode=1490 reward=0.7890434 (526.82 it/sec) -training >> step=8936100, episode=1490 reward=0.7754507 (508.57 it/sec) -training >> step=8936200, episode=1490 reward=0.8033297 (443.04 it/sec) -training >> step=8936300, episode=1490 reward=0.777574 (473.28 it/sec) -training >> step=8936400, episode=1490 reward=0.8098705 (477.28 it/sec) -training >> step=8936500, episode=1490 reward=0.7842707 (409.30 it/sec) -training >> step=8936600, episode=1490 reward=0.807221 (416.91 it/sec) -training >> step=8936700, episode=1490 reward=0.7990185 (505.72 it/sec) -training >> step=8936800, episode=1490 reward=0.7962195 (429.70 it/sec) -training >> step=8936900, episode=1490 reward=0.7897193 (312.52 it/sec) -training >> step=8937000, episode=1490 reward=0.7738451 (454.01 it/sec) -training >> step=8937100, episode=1490 reward=0.7920288 (471.58 it/sec) -training >> step=8937200, episode=1490 reward=0.8006513 (447.91 it/sec) -training >> step=8937300, episode=1490 reward=0.80946 (498.35 it/sec) -training >> step=8937400, episode=1490 reward=0.7864606 (506.80 it/sec) -training >> step=8937500, episode=1490 reward=0.7767306 (389.88 it/sec) -training >> step=8937600, episode=1490 reward=0.7966942 (433.26 it/sec) -training >> step=8937700, episode=1490 reward=0.7669821 (472.81 it/sec) -training >> step=8937800, episode=1490 reward=0.8032627 (495.39 it/sec) -training >> step=8937900, episode=1490 reward=0.7688448 (492.88 it/sec) -training >> step=8938000, episode=1490 reward=0.7800493 (449.00 it/sec) -training >> step=8938100, episode=1490 reward=0.7909659 (490.31 it/sec) -training >> step=8938200, episode=1490 reward=0.7956886 (486.18 it/sec) -training >> step=8938300, episode=1490 reward=0.7878185 (452.31 it/sec) -training >> step=8938400, episode=1490 reward=0.7933699 (444.15 it/sec) -training >> step=8938500, episode=1490 reward=0.7900995 (453.19 it/sec) -training >> step=8938600, episode=1490 reward=0.8038598 (398.61 it/sec) -training >> step=8938700, episode=1490 reward=0.807992 (374.44 it/sec) -training >> step=8938800, episode=1490 reward=0.7939628 (443.84 it/sec) -training >> step=8938900, episode=1490 reward=0.8069588 (475.00 it/sec) -training >> step=8939000, episode=1490 reward=0.7970066 (460.30 it/sec) -training >> step=8939100, episode=1490 reward=0.7801089 (461.83 it/sec) -training >> step=8939200, episode=1490 reward=0.7900023 (480.99 it/sec) -training >> step=8939300, episode=1491 reward=0.7828717 (85.65 it/sec) -training >> step=8939400, episode=1491 reward=0.7762093 (447.70 it/sec) -training >> step=8939500, episode=1491 reward=0.7685875 (503.88 it/sec) -training >> step=8939600, episode=1491 reward=0.8044325 (484.31 it/sec) -training >> step=8939700, episode=1491 reward=0.7859747 (480.23 it/sec) -training >> step=8939800, episode=1491 reward=0.7909148 (504.76 it/sec) -training >> step=8939900, episode=1491 reward=0.7904362 (474.24 it/sec) -training >> step=8940000, episode=1491 reward=0.7994218 (436.68 it/sec) -training >> step=8940100, episode=1491 reward=0.8050755 (502.24 it/sec) -training >> step=8940200, episode=1491 reward=0.7826202 (475.12 it/sec) -training >> step=8940300, episode=1491 reward=0.7889738 (492.34 it/sec) -training >> step=8940400, episode=1491 reward=0.8109417 (449.59 it/sec) -training >> step=8940500, episode=1491 reward=0.7920283 (434.61 it/sec) -training >> step=8940600, episode=1491 reward=0.8062809 (435.99 it/sec) -training >> step=8940700, episode=1491 reward=0.8023308 (425.39 it/sec) -training >> step=8940800, episode=1491 reward=0.8014544 (473.29 it/sec) -training >> step=8940900, episode=1491 reward=0.8016756 (461.75 it/sec) -training >> step=8941000, episode=1491 reward=0.7636529 (506.94 it/sec) -training >> step=8941100, episode=1491 reward=0.8005946 (488.73 it/sec) -training >> step=8941200, episode=1491 reward=0.7970122 (433.07 it/sec) -training >> step=8941300, episode=1491 reward=0.7667626 (374.93 it/sec) -training >> step=8941400, episode=1491 reward=0.8001103 (441.32 it/sec) -training >> step=8941500, episode=1491 reward=0.7933514 (412.50 it/sec) -training >> step=8941600, episode=1491 reward=0.7929208 (467.31 it/sec) -training >> step=8941700, episode=1491 reward=0.7961338 (419.64 it/sec) -training >> step=8941800, episode=1491 reward=0.8005654 (410.14 it/sec) -training >> step=8941900, episode=1491 reward=0.7863274 (397.37 it/sec) -training >> step=8942000, episode=1491 reward=0.7891442 (393.88 it/sec) -training >> step=8942100, episode=1491 reward=0.7832822 (369.70 it/sec) -training >> step=8942200, episode=1491 reward=0.7889879 (332.09 it/sec) -training >> step=8942300, episode=1491 reward=0.7873853 (373.01 it/sec) -training >> step=8942400, episode=1491 reward=0.7982659 (355.19 it/sec) -training >> step=8942500, episode=1491 reward=0.7837878 (373.87 it/sec) -training >> step=8942600, episode=1491 reward=0.8023307 (426.51 it/sec) -training >> step=8942700, episode=1491 reward=0.8044463 (512.82 it/sec) -training >> step=8942800, episode=1491 reward=0.8123205 (525.38 it/sec) -training >> step=8942900, episode=1491 reward=0.8190351 (472.14 it/sec) -training >> step=8943000, episode=1491 reward=0.7594538 (482.54 it/sec) -training >> step=8943100, episode=1491 reward=0.7818033 (536.72 it/sec) -training >> step=8943200, episode=1491 reward=0.7969471 (368.63 it/sec) -training >> step=8943300, episode=1491 reward=0.7873861 (458.45 it/sec) -training >> step=8943400, episode=1491 reward=0.7698725 (428.81 it/sec) -training >> step=8943500, episode=1491 reward=0.7797791 (469.51 it/sec) -training >> step=8943600, episode=1491 reward=0.7989067 (438.39 it/sec) -training >> step=8943700, episode=1491 reward=0.8067278 (468.13 it/sec) -training >> step=8943800, episode=1491 reward=0.8029207 (455.18 it/sec) -training >> step=8943900, episode=1491 reward=0.7627382 (490.75 it/sec) -training >> step=8944000, episode=1491 reward=0.7836015 (463.80 it/sec) -training >> step=8944100, episode=1491 reward=0.7935941 (401.73 it/sec) -training >> step=8944200, episode=1491 reward=0.7782205 (400.84 it/sec) -training >> step=8944300, episode=1491 reward=0.8068119 (460.54 it/sec) -training >> step=8944400, episode=1491 reward=0.8036214 (446.45 it/sec) -training >> step=8944500, episode=1491 reward=0.789007 (482.93 it/sec) -training >> step=8944600, episode=1491 reward=0.7928942 (498.15 it/sec) -training >> step=8944700, episode=1491 reward=0.7832354 (464.79 it/sec) -training >> step=8944800, episode=1491 reward=0.7747118 (397.16 it/sec) -training >> step=8944900, episode=1491 reward=0.7739466 (438.71 it/sec) -training >> step=8945000, episode=1491 reward=0.7726075 (473.98 it/sec) -training >> step=8945100, episode=1491 reward=0.7962323 (454.04 it/sec) -training >> step=8945200, episode=1491 reward=0.7903171 (429.05 it/sec) -training >> step=8945300, episode=1492 reward=0.8005675 (113.61 it/sec) -training >> step=8945400, episode=1492 reward=0.7831774 (414.03 it/sec) -training >> step=8945500, episode=1492 reward=0.7748759 (419.32 it/sec) -training >> step=8945600, episode=1492 reward=0.7758164 (468.14 it/sec) -training >> step=8945700, episode=1492 reward=0.763701 (485.46 it/sec) -training >> step=8945800, episode=1492 reward=0.7988862 (421.22 it/sec) -training >> step=8945900, episode=1492 reward=0.7820304 (399.76 it/sec) -training >> step=8946000, episode=1492 reward=0.7936853 (469.40 it/sec) -training >> step=8946100, episode=1492 reward=0.774472 (461.99 it/sec) -training >> step=8946200, episode=1492 reward=0.7979091 (456.49 it/sec) -training >> step=8946300, episode=1492 reward=0.7846051 (464.65 it/sec) -training >> step=8946400, episode=1492 reward=0.8001586 (454.32 it/sec) -training >> step=8946500, episode=1492 reward=0.7938502 (454.05 it/sec) -training >> step=8946600, episode=1492 reward=0.7931713 (416.46 it/sec) -training >> step=8946700, episode=1492 reward=0.7789781 (443.11 it/sec) -training >> step=8946800, episode=1492 reward=0.7956672 (407.16 it/sec) -training >> step=8946900, episode=1492 reward=0.785556 (443.07 it/sec) -training >> step=8947000, episode=1492 reward=0.7808829 (423.38 it/sec) -training >> step=8947100, episode=1492 reward=0.7747808 (422.08 it/sec) -training >> step=8947200, episode=1492 reward=0.7978269 (395.65 it/sec) -training >> step=8947300, episode=1492 reward=0.775502 (454.93 it/sec) -training >> step=8947400, episode=1492 reward=0.81056 (449.11 it/sec) -training >> step=8947500, episode=1492 reward=0.7878158 (481.55 it/sec) -training >> step=8947600, episode=1492 reward=0.7819639 (452.72 it/sec) -training >> step=8947700, episode=1492 reward=0.7859199 (464.64 it/sec) -training >> step=8947800, episode=1492 reward=0.7904976 (471.77 it/sec) -training >> step=8947900, episode=1492 reward=0.7777301 (469.40 it/sec) -training >> step=8948000, episode=1492 reward=0.7944462 (478.86 it/sec) -training >> step=8948100, episode=1492 reward=0.7923592 (478.46 it/sec) -training >> step=8948200, episode=1492 reward=0.7720257 (523.51 it/sec) -training >> step=8948300, episode=1492 reward=0.7879432 (456.44 it/sec) -training >> step=8948400, episode=1492 reward=0.8003176 (445.04 it/sec) -training >> step=8948500, episode=1492 reward=0.7879309 (481.93 it/sec) -training >> step=8948600, episode=1492 reward=0.7807904 (506.44 it/sec) -training >> step=8948700, episode=1492 reward=0.7929249 (483.92 it/sec) -training >> step=8948800, episode=1492 reward=0.8029228 (493.49 it/sec) -training >> step=8948900, episode=1492 reward=0.7936712 (493.08 it/sec) -training >> step=8949000, episode=1492 reward=0.8004802 (462.55 it/sec) -training >> step=8949100, episode=1492 reward=0.8000942 (510.52 it/sec) -training >> step=8949200, episode=1492 reward=0.7886912 (489.51 it/sec) -training >> step=8949300, episode=1492 reward=0.7987354 (490.49 it/sec) -training >> step=8949400, episode=1492 reward=0.800073 (462.47 it/sec) -training >> step=8949500, episode=1492 reward=0.800122 (365.92 it/sec) -training >> step=8949600, episode=1492 reward=0.7903958 (502.47 it/sec) -training >> step=8949700, episode=1492 reward=0.7889016 (505.81 it/sec) -training >> step=8949800, episode=1492 reward=0.7972348 (506.38 it/sec) -training >> step=8949900, episode=1492 reward=0.7924557 (499.26 it/sec) -training >> step=8950000, episode=1492 reward=0.7949465 (495.91 it/sec) -training >> step=8950100, episode=1492 reward=0.7925609 (449.87 it/sec) -training >> step=8950200, episode=1492 reward=0.7776654 (497.60 it/sec) -training >> step=8950300, episode=1492 reward=0.7729737 (490.81 it/sec) -training >> step=8950400, episode=1492 reward=0.7941079 (523.68 it/sec) -training >> step=8950500, episode=1492 reward=0.7895907 (521.68 it/sec) -training >> step=8950600, episode=1492 reward=0.8125916 (506.59 it/sec) -training >> step=8950700, episode=1492 reward=0.7813223 (423.31 it/sec) -training >> step=8950800, episode=1492 reward=0.8077774 (422.43 it/sec) -training >> step=8950900, episode=1492 reward=0.7995231 (477.84 it/sec) -training >> step=8951000, episode=1492 reward=0.7793436 (465.31 it/sec) -training >> step=8951100, episode=1492 reward=0.7919688 (482.94 it/sec) -training >> step=8951200, episode=1492 reward=0.7866176 (434.39 it/sec) -training >> step=8951300, episode=1493 reward=0.7709966 (83.52 it/sec) -training >> step=8951400, episode=1493 reward=0.7898605 (472.08 it/sec) -training >> step=8951500, episode=1493 reward=0.7947852 (425.17 it/sec) -training >> step=8951600, episode=1493 reward=0.7711591 (490.49 it/sec) -training >> step=8951700, episode=1493 reward=0.7904232 (484.80 it/sec) -training >> step=8951800, episode=1493 reward=0.789984 (518.74 it/sec) -training >> step=8951900, episode=1493 reward=0.810599 (493.15 it/sec) -training >> step=8952000, episode=1493 reward=0.7794161 (465.23 it/sec) -training >> step=8952100, episode=1493 reward=0.7771969 (488.07 it/sec) -training >> step=8952200, episode=1493 reward=0.790629 (505.75 it/sec) -training >> step=8952300, episode=1493 reward=0.7844211 (443.87 it/sec) -training >> step=8952400, episode=1493 reward=0.802123 (498.37 it/sec) -training >> step=8952500, episode=1493 reward=0.7878393 (453.83 it/sec) -training >> step=8952600, episode=1493 reward=0.7986366 (441.20 it/sec) -training >> step=8952700, episode=1493 reward=0.7917292 (440.19 it/sec) -training >> step=8952800, episode=1493 reward=0.7921849 (457.78 it/sec) -training >> step=8952900, episode=1493 reward=0.7984847 (457.24 it/sec) -training >> step=8953000, episode=1493 reward=0.8083664 (476.83 it/sec) -training >> step=8953100, episode=1493 reward=0.8031108 (409.80 it/sec) -training >> step=8953200, episode=1493 reward=0.7718948 (436.07 it/sec) -training >> step=8953300, episode=1493 reward=0.7966153 (422.05 it/sec) -training >> step=8953400, episode=1493 reward=0.7842845 (485.58 it/sec) -training >> step=8953500, episode=1493 reward=0.8044986 (475.31 it/sec) -training >> step=8953600, episode=1493 reward=0.7923815 (422.15 it/sec) -training >> step=8953700, episode=1493 reward=0.8022054 (485.24 it/sec) -training >> step=8953800, episode=1493 reward=0.7906959 (486.61 it/sec) -training >> step=8953900, episode=1493 reward=0.7966782 (438.32 it/sec) -training >> step=8954000, episode=1493 reward=0.7953587 (440.64 it/sec) -training >> step=8954100, episode=1493 reward=0.7839319 (484.78 it/sec) -training >> step=8954200, episode=1493 reward=0.7898597 (445.13 it/sec) -training >> step=8954300, episode=1493 reward=0.8043033 (386.52 it/sec) -training >> step=8954400, episode=1493 reward=0.783556 (465.49 it/sec) -training >> step=8954500, episode=1493 reward=0.7917078 (470.29 it/sec) -training >> step=8954600, episode=1493 reward=0.7794114 (461.95 it/sec) -training >> step=8954700, episode=1493 reward=0.789336 (447.99 it/sec) -training >> step=8954800, episode=1493 reward=0.8043332 (469.33 it/sec) -training >> step=8954900, episode=1493 reward=0.8094503 (468.69 it/sec) -training >> step=8955000, episode=1493 reward=0.7538828 (436.66 it/sec) -training >> step=8955100, episode=1493 reward=0.773024 (432.13 it/sec) -training >> step=8955200, episode=1493 reward=0.7836676 (496.92 it/sec) -training >> step=8955300, episode=1493 reward=0.7960553 (498.98 it/sec) -training >> step=8955400, episode=1493 reward=0.7840734 (493.83 it/sec) -training >> step=8955500, episode=1493 reward=0.7811724 (499.33 it/sec) -training >> step=8955600, episode=1493 reward=0.7929186 (503.09 it/sec) -training >> step=8955700, episode=1493 reward=0.7769177 (510.49 it/sec) -training >> step=8955800, episode=1493 reward=0.7891159 (373.07 it/sec) -training >> step=8955900, episode=1493 reward=0.7631873 (504.92 it/sec) -training >> step=8956000, episode=1493 reward=0.7835953 (489.44 it/sec) -training >> step=8956100, episode=1493 reward=0.7986354 (457.49 it/sec) -training >> step=8956200, episode=1493 reward=0.7679396 (516.47 it/sec) -training >> step=8956300, episode=1493 reward=0.793846 (442.85 it/sec) -training >> step=8956400, episode=1493 reward=0.8157097 (421.76 it/sec) -training >> step=8956500, episode=1493 reward=0.7719484 (466.86 it/sec) -training >> step=8956600, episode=1493 reward=0.7956037 (508.72 it/sec) -training >> step=8956700, episode=1493 reward=0.7864618 (485.17 it/sec) -training >> step=8956800, episode=1493 reward=0.7801152 (454.54 it/sec) -training >> step=8956900, episode=1493 reward=0.7834769 (442.77 it/sec) -training >> step=8957000, episode=1493 reward=0.790045 (472.60 it/sec) -training >> step=8957100, episode=1493 reward=0.788556 (502.63 it/sec) -training >> step=8957200, episode=1493 reward=0.7877999 (514.00 it/sec) -training >> step=8957300, episode=1494 reward=0.7566459 (121.14 it/sec) -training >> step=8957400, episode=1494 reward=0.7956287 (440.11 it/sec) -training >> step=8957500, episode=1494 reward=0.7651342 (446.79 it/sec) -training >> step=8957600, episode=1494 reward=0.7723434 (383.08 it/sec) -training >> step=8957700, episode=1494 reward=0.7871704 (487.77 it/sec) -training >> step=8957800, episode=1494 reward=0.8018317 (436.38 it/sec) -training >> step=8957900, episode=1494 reward=0.784512 (417.90 it/sec) -training >> step=8958000, episode=1494 reward=0.8101571 (494.20 it/sec) -training >> step=8958100, episode=1494 reward=0.7881599 (437.83 it/sec) -training >> step=8958200, episode=1494 reward=0.7840379 (486.13 it/sec) -training >> step=8958300, episode=1494 reward=0.811569 (478.61 it/sec) -training >> step=8958400, episode=1494 reward=0.8024908 (506.63 it/sec) -training >> step=8958500, episode=1494 reward=0.8080367 (473.68 it/sec) -training >> step=8958600, episode=1494 reward=0.7906681 (454.19 it/sec) -training >> step=8958700, episode=1494 reward=0.7895873 (472.18 it/sec) -training >> step=8958800, episode=1494 reward=0.8007522 (512.34 it/sec) -training >> step=8958900, episode=1494 reward=0.7898954 (505.27 it/sec) -training >> step=8959000, episode=1494 reward=0.7919757 (475.19 it/sec) -training >> step=8959100, episode=1494 reward=0.8100066 (455.14 it/sec) -training >> step=8959200, episode=1494 reward=0.8039912 (470.40 it/sec) -training >> step=8959300, episode=1494 reward=0.7987592 (485.18 it/sec) -training >> step=8959400, episode=1494 reward=0.8097048 (439.90 it/sec) -training >> step=8959500, episode=1494 reward=0.7841837 (514.46 it/sec) -training >> step=8959600, episode=1494 reward=0.7891464 (422.07 it/sec) -training >> step=8959700, episode=1494 reward=0.7833865 (460.03 it/sec) -training >> step=8959800, episode=1494 reward=0.7761033 (472.38 it/sec) -training >> step=8959900, episode=1494 reward=0.7696894 (482.56 it/sec) -training >> step=8960000, episode=1494 reward=0.7684229 (477.00 it/sec) -training >> step=8960100, episode=1494 reward=0.8026067 (457.54 it/sec) -training >> step=8960200, episode=1494 reward=0.7811102 (431.23 it/sec) -training >> step=8960300, episode=1494 reward=0.7969633 (419.64 it/sec) -training >> step=8960400, episode=1494 reward=0.7761185 (453.34 it/sec) -training >> step=8960500, episode=1494 reward=0.8085392 (480.11 it/sec) -training >> step=8960600, episode=1494 reward=0.7720971 (506.74 it/sec) -training >> step=8960700, episode=1494 reward=0.8047586 (473.74 it/sec) -training >> step=8960800, episode=1494 reward=0.7741243 (449.96 it/sec) -training >> step=8960900, episode=1494 reward=0.8048327 (516.73 it/sec) -training >> step=8961000, episode=1494 reward=0.8219756 (495.77 it/sec) -training >> step=8961100, episode=1494 reward=0.7887079 (478.12 it/sec) -training >> step=8961200, episode=1494 reward=0.7957574 (499.75 it/sec) -training >> step=8961300, episode=1494 reward=0.7652234 (478.51 it/sec) -training >> step=8961400, episode=1494 reward=0.7814723 (476.64 it/sec) -training >> step=8961500, episode=1494 reward=0.7868235 (484.17 it/sec) -training >> step=8961600, episode=1494 reward=0.8172344 (519.50 it/sec) -training >> step=8961700, episode=1494 reward=0.7857319 (501.29 it/sec) -training >> step=8961800, episode=1494 reward=0.7933875 (454.69 it/sec) -training >> step=8961900, episode=1494 reward=0.7880839 (504.28 it/sec) -training >> step=8962000, episode=1494 reward=0.8034903 (478.60 it/sec) -training >> step=8962100, episode=1494 reward=0.7939897 (391.65 it/sec) -training >> step=8962200, episode=1494 reward=0.7916217 (480.76 it/sec) -training >> step=8962300, episode=1494 reward=0.7843428 (454.71 it/sec) -training >> step=8962400, episode=1494 reward=0.7786344 (526.39 it/sec) -training >> step=8962500, episode=1494 reward=0.8024085 (475.96 it/sec) -training >> step=8962600, episode=1494 reward=0.7879949 (495.08 it/sec) -training >> step=8962700, episode=1494 reward=0.8019683 (467.07 it/sec) -training >> step=8962800, episode=1494 reward=0.7855613 (520.43 it/sec) -training >> step=8962900, episode=1494 reward=0.7835259 (486.85 it/sec) -training >> step=8963000, episode=1494 reward=0.7921047 (485.20 it/sec) -training >> step=8963100, episode=1494 reward=0.7870032 (537.93 it/sec) -training >> step=8963200, episode=1494 reward=0.7847162 (492.79 it/sec) -training >> step=8963300, episode=1495 reward=0.7836636 (63.93 it/sec) -training >> step=8963400, episode=1495 reward=0.7933583 (501.84 it/sec) -training >> step=8963500, episode=1495 reward=0.7772255 (491.87 it/sec) -training >> step=8963600, episode=1495 reward=0.7899238 (509.18 it/sec) -training >> step=8963700, episode=1495 reward=0.8057194 (519.86 it/sec) -training >> step=8963800, episode=1495 reward=0.7904724 (484.22 it/sec) -training >> step=8963900, episode=1495 reward=0.777122 (528.77 it/sec) -training >> step=8964000, episode=1495 reward=0.8029733 (524.69 it/sec) -training >> step=8964100, episode=1495 reward=0.7949989 (507.08 it/sec) -training >> step=8964200, episode=1495 reward=0.781041 (522.09 it/sec) -training >> step=8964300, episode=1495 reward=0.7750252 (529.68 it/sec) -training >> step=8964400, episode=1495 reward=0.8039032 (467.80 it/sec) -training >> step=8964500, episode=1495 reward=0.7882948 (532.71 it/sec) -training >> step=8964600, episode=1495 reward=0.7864428 (534.14 it/sec) -training >> step=8964700, episode=1495 reward=0.7853461 (514.41 it/sec) -training >> step=8964800, episode=1495 reward=0.8025436 (518.82 it/sec) -training >> step=8964900, episode=1495 reward=0.8133186 (490.97 it/sec) -training >> step=8965000, episode=1495 reward=0.7918935 (553.38 it/sec) -training >> step=8965100, episode=1495 reward=0.7893347 (473.07 it/sec) -training >> step=8965200, episode=1495 reward=0.7750893 (514.35 it/sec) -training >> step=8965300, episode=1495 reward=0.793459 (528.59 it/sec) -training >> step=8965400, episode=1495 reward=0.8008883 (459.45 it/sec) -training >> step=8965500, episode=1495 reward=0.7789905 (505.19 it/sec) -training >> step=8965600, episode=1495 reward=0.7821016 (523.67 it/sec) -training >> step=8965700, episode=1495 reward=0.7738251 (463.35 it/sec) -training >> step=8965800, episode=1495 reward=0.7925654 (489.18 it/sec) -training >> step=8965900, episode=1495 reward=0.8009042 (472.76 it/sec) -training >> step=8966000, episode=1495 reward=0.7803887 (530.21 it/sec) -training >> step=8966100, episode=1495 reward=0.7958184 (514.78 it/sec) -training >> step=8966200, episode=1495 reward=0.7883542 (492.52 it/sec) -training >> step=8966300, episode=1495 reward=0.7993965 (489.54 it/sec) -training >> step=8966400, episode=1495 reward=0.7897046 (471.73 it/sec) -training >> step=8966500, episode=1495 reward=0.8064825 (484.71 it/sec) -training >> step=8966600, episode=1495 reward=0.8082711 (479.33 it/sec) -training >> step=8966700, episode=1495 reward=0.7991996 (556.19 it/sec) -training >> step=8966800, episode=1495 reward=0.7819746 (491.15 it/sec) -training >> step=8966900, episode=1495 reward=0.795041 (506.98 it/sec) -training >> step=8967000, episode=1495 reward=0.7704866 (485.70 it/sec) -training >> step=8967100, episode=1495 reward=0.7867078 (539.75 it/sec) -training >> step=8967200, episode=1495 reward=0.7888883 (501.15 it/sec) -training >> step=8967300, episode=1495 reward=0.7784048 (543.28 it/sec) -training >> step=8967400, episode=1495 reward=0.803252 (518.84 it/sec) -training >> step=8967500, episode=1495 reward=0.7766387 (491.81 it/sec) -training >> step=8967600, episode=1495 reward=0.7745266 (538.06 it/sec) -training >> step=8967700, episode=1495 reward=0.8011971 (519.86 it/sec) -training >> step=8967800, episode=1495 reward=0.7931299 (508.57 it/sec) -training >> step=8967900, episode=1495 reward=0.7697283 (468.52 it/sec) -training >> step=8968000, episode=1495 reward=0.7903494 (496.36 it/sec) -training >> step=8968100, episode=1495 reward=0.7797262 (545.54 it/sec) -training >> step=8968200, episode=1495 reward=0.7772737 (509.32 it/sec) -training >> step=8968300, episode=1495 reward=0.7955123 (511.06 it/sec) -training >> step=8968400, episode=1495 reward=0.7954144 (367.39 it/sec) -training >> step=8968500, episode=1495 reward=0.7855253 (534.17 it/sec) -training >> step=8968600, episode=1495 reward=0.790708 (476.85 it/sec) -training >> step=8968700, episode=1495 reward=0.8059818 (500.71 it/sec) -training >> step=8968800, episode=1495 reward=0.7911944 (503.92 it/sec) -training >> step=8968900, episode=1495 reward=0.7890625 (490.54 it/sec) -training >> step=8969000, episode=1495 reward=0.7649233 (430.52 it/sec) -training >> step=8969100, episode=1495 reward=0.7815738 (404.82 it/sec) -training >> step=8969200, episode=1495 reward=0.80865 (437.99 it/sec) -training >> step=8969300, episode=1496 reward=0.794436 (124.70 it/sec) -training >> step=8969400, episode=1496 reward=0.7908683 (468.47 it/sec) -training >> step=8969500, episode=1496 reward=0.7745803 (444.85 it/sec) -training >> step=8969600, episode=1496 reward=0.7988871 (484.26 it/sec) -training >> step=8969700, episode=1496 reward=0.7917457 (449.50 it/sec) -training >> step=8969800, episode=1496 reward=0.7967538 (464.63 it/sec) -training >> step=8969900, episode=1496 reward=0.8006478 (503.15 it/sec) -training >> step=8970000, episode=1496 reward=0.7761816 (483.76 it/sec) -training >> step=8970100, episode=1496 reward=0.7981248 (469.36 it/sec) -training >> step=8970200, episode=1496 reward=0.7827257 (511.21 it/sec) -training >> step=8970300, episode=1496 reward=0.7924945 (508.79 it/sec) -training >> step=8970400, episode=1496 reward=0.8059655 (498.63 it/sec) -training >> step=8970500, episode=1496 reward=0.803897 (452.26 it/sec) -training >> step=8970600, episode=1496 reward=0.7930567 (496.00 it/sec) -training >> step=8970700, episode=1496 reward=0.782939 (499.08 it/sec) -training >> step=8970800, episode=1496 reward=0.7830833 (383.12 it/sec) -training >> step=8970900, episode=1496 reward=0.7942184 (396.84 it/sec) -training >> step=8971000, episode=1496 reward=0.7910828 (410.97 it/sec) -training >> step=8971100, episode=1496 reward=0.7741445 (382.33 it/sec) -training >> step=8971200, episode=1496 reward=0.7908384 (383.60 it/sec) -training >> step=8971300, episode=1496 reward=0.787286 (488.47 it/sec) -training >> step=8971400, episode=1496 reward=0.7941263 (547.66 it/sec) -training >> step=8971500, episode=1496 reward=0.78909 (495.83 it/sec) -training >> step=8971600, episode=1496 reward=0.7883926 (513.06 it/sec) -training >> step=8971700, episode=1496 reward=0.7806255 (554.11 it/sec) -training >> step=8971800, episode=1496 reward=0.774919 (516.17 it/sec) -training >> step=8971900, episode=1496 reward=0.8003148 (503.69 it/sec) -training >> step=8972000, episode=1496 reward=0.790819 (523.22 it/sec) -training >> step=8972100, episode=1496 reward=0.7820442 (479.84 it/sec) -training >> step=8972200, episode=1496 reward=0.8056996 (464.94 it/sec) -training >> step=8972300, episode=1496 reward=0.7992674 (453.85 it/sec) -training >> step=8972400, episode=1496 reward=0.7966546 (419.78 it/sec) -training >> step=8972500, episode=1496 reward=0.799109 (441.09 it/sec) -training >> step=8972600, episode=1496 reward=0.7840258 (481.68 it/sec) -training >> step=8972700, episode=1496 reward=0.8030125 (490.72 it/sec) -training >> step=8972800, episode=1496 reward=0.8012351 (544.15 it/sec) -training >> step=8972900, episode=1496 reward=0.7815717 (532.02 it/sec) -training >> step=8973000, episode=1496 reward=0.7857906 (480.41 it/sec) -training >> step=8973100, episode=1496 reward=0.8069097 (504.41 it/sec) -training >> step=8973200, episode=1496 reward=0.7815225 (530.85 it/sec) -training >> step=8973300, episode=1496 reward=0.796537 (537.64 it/sec) -training >> step=8973400, episode=1496 reward=0.7978133 (492.49 it/sec) -training >> step=8973500, episode=1496 reward=0.7856055 (558.14 it/sec) -training >> step=8973600, episode=1496 reward=0.7983266 (487.86 it/sec) -training >> step=8973700, episode=1496 reward=0.8057879 (497.35 it/sec) -training >> step=8973800, episode=1496 reward=0.8105195 (456.38 it/sec) -training >> step=8973900, episode=1496 reward=0.7748106 (525.74 it/sec) -training >> step=8974000, episode=1496 reward=0.7854216 (502.02 it/sec) -training >> step=8974100, episode=1496 reward=0.7776174 (488.86 it/sec) -training >> step=8974200, episode=1496 reward=0.8083195 (521.78 it/sec) -training >> step=8974300, episode=1496 reward=0.7879463 (501.91 it/sec) -training >> step=8974400, episode=1496 reward=0.7690816 (499.59 it/sec) -training >> step=8974500, episode=1496 reward=0.7818133 (524.39 it/sec) -training >> step=8974600, episode=1496 reward=0.7976525 (536.66 it/sec) -training >> step=8974700, episode=1496 reward=0.7912161 (429.41 it/sec) -training >> step=8974800, episode=1496 reward=0.7887471 (496.35 it/sec) -training >> step=8974900, episode=1496 reward=0.7817408 (550.99 it/sec) -training >> step=8975000, episode=1496 reward=0.7825502 (530.10 it/sec) -training >> step=8975100, episode=1496 reward=0.7859969 (517.18 it/sec) -training >> step=8975200, episode=1496 reward=0.7873034 (511.25 it/sec) -training >> step=8975300, episode=1497 reward=0.789163 (124.00 it/sec) -training >> step=8975400, episode=1497 reward=0.7941299 (477.43 it/sec) -training >> step=8975500, episode=1497 reward=0.7900516 (501.69 it/sec) -training >> step=8975600, episode=1497 reward=0.7986024 (479.37 it/sec) -training >> step=8975700, episode=1497 reward=0.7743571 (525.36 it/sec) -training >> step=8975800, episode=1497 reward=0.7988725 (503.48 it/sec) -training >> step=8975900, episode=1497 reward=0.8034692 (419.13 it/sec) -training >> step=8976000, episode=1497 reward=0.7730284 (519.07 it/sec) -training >> step=8976100, episode=1497 reward=0.7753319 (490.18 it/sec) -training >> step=8976200, episode=1497 reward=0.7897432 (494.23 it/sec) -training >> step=8976300, episode=1497 reward=0.813388 (451.62 it/sec) -training >> step=8976400, episode=1497 reward=0.7964704 (456.46 it/sec) -training >> step=8976500, episode=1497 reward=0.793798 (486.07 it/sec) -training >> step=8976600, episode=1497 reward=0.7952142 (456.34 it/sec) -training >> step=8976700, episode=1497 reward=0.8007048 (400.69 it/sec) -training >> step=8976800, episode=1497 reward=0.7852315 (453.68 it/sec) -training >> step=8976900, episode=1497 reward=0.7791383 (470.61 it/sec) -training >> step=8977000, episode=1497 reward=0.7987302 (486.28 it/sec) -training >> step=8977100, episode=1497 reward=0.7966713 (513.98 it/sec) -training >> step=8977200, episode=1497 reward=0.7836676 (433.85 it/sec) -training >> step=8977300, episode=1497 reward=0.7850178 (487.76 it/sec) -training >> step=8977400, episode=1497 reward=0.7990673 (490.57 it/sec) -training >> step=8977500, episode=1497 reward=0.7815124 (471.67 it/sec) -training >> step=8977600, episode=1497 reward=0.7953054 (492.01 it/sec) -training >> step=8977700, episode=1497 reward=0.7693127 (450.09 it/sec) -training >> step=8977800, episode=1497 reward=0.7895693 (468.13 it/sec) -training >> step=8977900, episode=1497 reward=0.7908144 (488.04 it/sec) -training >> step=8978000, episode=1497 reward=0.8047873 (466.55 it/sec) -training >> step=8978100, episode=1497 reward=0.7706853 (433.39 it/sec) -training >> step=8978200, episode=1497 reward=0.7818617 (473.91 it/sec) -training >> step=8978300, episode=1497 reward=0.7801912 (448.44 it/sec) -training >> step=8978400, episode=1497 reward=0.7753618 (433.16 it/sec) -training >> step=8978500, episode=1497 reward=0.805993 (489.29 it/sec) -training >> step=8978600, episode=1497 reward=0.7824894 (459.49 it/sec) -training >> step=8978700, episode=1497 reward=0.7952251 (463.73 it/sec) -training >> step=8978800, episode=1497 reward=0.7876938 (479.45 it/sec) -training >> step=8978900, episode=1497 reward=0.7905431 (507.36 it/sec) -training >> step=8979000, episode=1497 reward=0.7899874 (464.65 it/sec) -training >> step=8979100, episode=1497 reward=0.7837974 (461.65 it/sec) -training >> step=8979200, episode=1497 reward=0.7929195 (468.78 it/sec) -training >> step=8979300, episode=1497 reward=0.8064045 (499.64 it/sec) -training >> step=8979400, episode=1497 reward=0.7827719 (480.32 it/sec) -training >> step=8979500, episode=1497 reward=0.7996204 (452.46 it/sec) -training >> step=8979600, episode=1497 reward=0.7972298 (505.08 it/sec) -training >> step=8979700, episode=1497 reward=0.7677748 (425.83 it/sec) -training >> step=8979800, episode=1497 reward=0.791752 (436.43 it/sec) -training >> step=8979900, episode=1497 reward=0.7708935 (467.09 it/sec) -training >> step=8980000, episode=1497 reward=0.7797508 (485.73 it/sec) -training >> step=8980100, episode=1497 reward=0.7775019 (439.62 it/sec) -training >> step=8980200, episode=1497 reward=0.8050743 (455.92 it/sec) -training >> step=8980300, episode=1497 reward=0.7871149 (496.31 it/sec) -training >> step=8980400, episode=1497 reward=0.7704201 (481.04 it/sec) -training >> step=8980500, episode=1497 reward=0.7950682 (470.96 it/sec) -training >> step=8980600, episode=1497 reward=0.8010649 (391.62 it/sec) -training >> step=8980700, episode=1497 reward=0.8011627 (427.11 it/sec) -training >> step=8980800, episode=1497 reward=0.8006635 (310.18 it/sec) -training >> step=8980900, episode=1497 reward=0.8105345 (410.38 it/sec) -training >> step=8981000, episode=1497 reward=0.7810384 (432.86 it/sec) -training >> step=8981100, episode=1497 reward=0.7800777 (465.49 it/sec) -training >> step=8981200, episode=1497 reward=0.7745786 (479.25 it/sec) -training >> step=8981300, episode=1498 reward=0.7974963 (91.01 it/sec) -training >> step=8981400, episode=1498 reward=0.7830642 (478.61 it/sec) -training >> step=8981500, episode=1498 reward=0.7829212 (486.49 it/sec) -training >> step=8981600, episode=1498 reward=0.7849827 (466.57 it/sec) -training >> step=8981700, episode=1498 reward=0.784643 (455.50 it/sec) -training >> step=8981800, episode=1498 reward=0.7849768 (453.44 it/sec) -training >> step=8981900, episode=1498 reward=0.780749 (455.40 it/sec) -training >> step=8982000, episode=1498 reward=0.7898647 (513.56 it/sec) -training >> step=8982100, episode=1498 reward=0.7892579 (484.15 it/sec) -training >> step=8982200, episode=1498 reward=0.7850137 (510.46 it/sec) -training >> step=8982300, episode=1498 reward=0.7781935 (503.73 it/sec) -training >> step=8982400, episode=1498 reward=0.7941377 (513.05 it/sec) -training >> step=8982500, episode=1498 reward=0.795614 (453.91 it/sec) -training >> step=8982600, episode=1498 reward=0.7706013 (445.00 it/sec) -training >> step=8982700, episode=1498 reward=0.8009815 (534.95 it/sec) -training >> step=8982800, episode=1498 reward=0.7995736 (503.02 it/sec) -training >> step=8982900, episode=1498 reward=0.7912299 (528.60 it/sec) -training >> step=8983000, episode=1498 reward=0.7806329 (534.97 it/sec) -training >> step=8983100, episode=1498 reward=0.7965672 (549.83 it/sec) -training >> step=8983200, episode=1498 reward=0.7778177 (489.10 it/sec) -training >> step=8983300, episode=1498 reward=0.7985615 (507.54 it/sec) -training >> step=8983400, episode=1498 reward=0.7991306 (527.70 it/sec) -training >> step=8983500, episode=1498 reward=0.7732603 (526.65 it/sec) -training >> step=8983600, episode=1498 reward=0.7873837 (519.44 it/sec) -training >> step=8983700, episode=1498 reward=0.778491 (477.40 it/sec) -training >> step=8983800, episode=1498 reward=0.8096616 (532.28 it/sec) -training >> step=8983900, episode=1498 reward=0.7905468 (501.29 it/sec) -training >> step=8984000, episode=1498 reward=0.7966059 (498.25 it/sec) -training >> step=8984100, episode=1498 reward=0.7940899 (459.17 it/sec) -training >> step=8984200, episode=1498 reward=0.7934955 (528.02 it/sec) -training >> step=8984300, episode=1498 reward=0.7720017 (491.55 it/sec) -training >> step=8984400, episode=1498 reward=0.7866044 (476.31 it/sec) -training >> step=8984500, episode=1498 reward=0.772124 (526.94 it/sec) -training >> step=8984600, episode=1498 reward=0.7787765 (513.87 it/sec) -training >> step=8984700, episode=1498 reward=0.7711544 (507.75 it/sec) -training >> step=8984800, episode=1498 reward=0.8095682 (529.79 it/sec) -training >> step=8984900, episode=1498 reward=0.7797743 (548.03 it/sec) -training >> step=8985000, episode=1498 reward=0.7948863 (482.59 it/sec) -training >> step=8985100, episode=1498 reward=0.8054991 (521.88 it/sec) -training >> step=8985200, episode=1498 reward=0.7967794 (492.93 it/sec) -training >> step=8985300, episode=1498 reward=0.7827304 (486.88 it/sec) -training >> step=8985400, episode=1498 reward=0.7933174 (514.93 it/sec) -training >> step=8985500, episode=1498 reward=0.809598 (502.92 it/sec) -training >> step=8985600, episode=1498 reward=0.7834898 (464.92 it/sec) -training >> step=8985700, episode=1498 reward=0.7851366 (451.52 it/sec) -training >> step=8985800, episode=1498 reward=0.7843915 (474.99 it/sec) -training >> step=8985900, episode=1498 reward=0.7887062 (488.66 it/sec) -training >> step=8986000, episode=1498 reward=0.7981429 (499.88 it/sec) -training >> step=8986100, episode=1498 reward=0.7793864 (433.27 it/sec) -training >> step=8986200, episode=1498 reward=0.8071349 (442.01 it/sec) -training >> step=8986300, episode=1498 reward=0.776368 (495.07 it/sec) -training >> step=8986400, episode=1498 reward=0.7614298 (478.90 it/sec) -training >> step=8986500, episode=1498 reward=0.790978 (470.54 it/sec) -training >> step=8986600, episode=1498 reward=0.7983665 (454.78 it/sec) -training >> step=8986700, episode=1498 reward=0.789105 (448.05 it/sec) -training >> step=8986800, episode=1498 reward=0.8028067 (434.40 it/sec) -training >> step=8986900, episode=1498 reward=0.7949058 (486.48 it/sec) -training >> step=8987000, episode=1498 reward=0.8006691 (329.55 it/sec) -training >> step=8987100, episode=1498 reward=0.7977924 (464.30 it/sec) -training >> step=8987200, episode=1498 reward=0.7874723 (464.68 it/sec) -training >> step=8987300, episode=1499 reward=0.7817241 (89.74 it/sec) -training >> step=8987400, episode=1499 reward=0.8031088 (420.40 it/sec) -training >> step=8987500, episode=1499 reward=0.7881179 (471.70 it/sec) -training >> step=8987600, episode=1499 reward=0.7883477 (475.45 it/sec) -training >> step=8987700, episode=1499 reward=0.8059343 (463.94 it/sec) -training >> step=8987800, episode=1499 reward=0.7829332 (516.50 it/sec) -training >> step=8987900, episode=1499 reward=0.7909005 (490.37 it/sec) -training >> step=8988000, episode=1499 reward=0.7909381 (436.63 it/sec) -training >> step=8988100, episode=1499 reward=0.790854 (486.30 it/sec) -training >> step=8988200, episode=1499 reward=0.7794943 (508.06 it/sec) -training >> step=8988300, episode=1499 reward=0.792192 (523.75 it/sec) -training >> step=8988400, episode=1499 reward=0.7890751 (509.89 it/sec) -training >> step=8988500, episode=1499 reward=0.7876374 (539.84 it/sec) -training >> step=8988600, episode=1499 reward=0.7776583 (502.39 it/sec) -training >> step=8988700, episode=1499 reward=0.7696913 (503.48 it/sec) -training >> step=8988800, episode=1499 reward=0.7954097 (526.63 it/sec) -training >> step=8988900, episode=1499 reward=0.7637804 (488.88 it/sec) -training >> step=8989000, episode=1499 reward=0.7942609 (492.08 it/sec) -training >> step=8989100, episode=1499 reward=0.7950718 (506.97 it/sec) -training >> step=8989200, episode=1499 reward=0.7996722 (488.39 it/sec) -training >> step=8989300, episode=1499 reward=0.7982321 (540.99 it/sec) -training >> step=8989400, episode=1499 reward=0.7739303 (502.61 it/sec) -training >> step=8989500, episode=1499 reward=0.7907832 (500.31 it/sec) -training >> step=8989600, episode=1499 reward=0.81607 (488.76 it/sec) -training >> step=8989700, episode=1499 reward=0.7861824 (480.72 it/sec) -training >> step=8989800, episode=1499 reward=0.7853091 (477.73 it/sec) -training >> step=8989900, episode=1499 reward=0.792197 (514.47 it/sec) -training >> step=8990000, episode=1499 reward=0.8083758 (512.35 it/sec) -training >> step=8990100, episode=1499 reward=0.7826313 (497.77 it/sec) -training >> step=8990200, episode=1499 reward=0.7952045 (485.81 it/sec) -training >> step=8990300, episode=1499 reward=0.7817469 (512.73 it/sec) -training >> step=8990400, episode=1499 reward=0.784104 (449.69 it/sec) -training >> step=8990500, episode=1499 reward=0.7928755 (423.86 it/sec) -training >> step=8990600, episode=1499 reward=0.7740878 (502.42 it/sec) -training >> step=8990700, episode=1499 reward=0.7989148 (503.32 it/sec) -training >> step=8990800, episode=1499 reward=0.7698417 (479.98 it/sec) -training >> step=8990900, episode=1499 reward=0.7872241 (468.50 it/sec) -training >> step=8991000, episode=1499 reward=0.7989716 (506.23 it/sec) -training >> step=8991100, episode=1499 reward=0.7847599 (494.11 it/sec) -training >> step=8991200, episode=1499 reward=0.7929636 (505.81 it/sec) -training >> step=8991300, episode=1499 reward=0.8024737 (502.21 it/sec) -training >> step=8991400, episode=1499 reward=0.7858335 (503.81 it/sec) -training >> step=8991500, episode=1499 reward=0.7923656 (507.23 it/sec) -training >> step=8991600, episode=1499 reward=0.8096138 (434.83 it/sec) -training >> step=8991700, episode=1499 reward=0.7864221 (521.57 it/sec) -training >> step=8991800, episode=1499 reward=0.7892835 (486.57 it/sec) -training >> step=8991900, episode=1499 reward=0.7638274 (460.68 it/sec) -training >> step=8992000, episode=1499 reward=0.7989031 (477.92 it/sec) -training >> step=8992100, episode=1499 reward=0.7967674 (478.76 it/sec) -training >> step=8992200, episode=1499 reward=0.7813172 (444.46 it/sec) -training >> step=8992300, episode=1499 reward=0.7849759 (472.25 it/sec) -training >> step=8992400, episode=1499 reward=0.8052787 (455.69 it/sec) -training >> step=8992500, episode=1499 reward=0.7846435 (535.29 it/sec) -training >> step=8992600, episode=1499 reward=0.7895484 (502.27 it/sec) -training >> step=8992700, episode=1499 reward=0.7697579 (509.36 it/sec) -training >> step=8992800, episode=1499 reward=0.787358 (504.51 it/sec) -training >> step=8992900, episode=1499 reward=0.8024046 (462.66 it/sec) -training >> step=8993000, episode=1499 reward=0.7689015 (473.22 it/sec) -training >> step=8993100, episode=1499 reward=0.7750977 (456.90 it/sec) -training >> step=8993200, episode=1499 reward=0.7913858 (387.09 it/sec) -training >> step=8993300, episode=1500 reward=0.7856359 (71.03 it/sec) -training >> step=8993400, episode=1500 reward=0.7795562 (529.53 it/sec) -training >> step=8993500, episode=1500 reward=0.7829992 (433.64 it/sec) -training >> step=8993600, episode=1500 reward=0.7854499 (385.98 it/sec) -training >> step=8993700, episode=1500 reward=0.7801855 (429.99 it/sec) -training >> step=8993800, episode=1500 reward=0.7893495 (482.86 it/sec) -training >> step=8993900, episode=1500 reward=0.8134864 (447.92 it/sec) -training >> step=8994000, episode=1500 reward=0.7994729 (421.13 it/sec) -training >> step=8994100, episode=1500 reward=0.7784548 (435.82 it/sec) -training >> step=8994200, episode=1500 reward=0.7798381 (508.28 it/sec) -training >> step=8994300, episode=1500 reward=0.7964259 (443.13 it/sec) -training >> step=8994400, episode=1500 reward=0.7955188 (509.74 it/sec) -training >> step=8994500, episode=1500 reward=0.8018235 (508.87 it/sec) -training >> step=8994600, episode=1500 reward=0.7875234 (456.50 it/sec) -training >> step=8994700, episode=1500 reward=0.7908914 (484.90 it/sec) -training >> step=8994800, episode=1500 reward=0.8134035 (523.00 it/sec) -training >> step=8994900, episode=1500 reward=0.8114827 (503.55 it/sec) -training >> step=8995000, episode=1500 reward=0.7840266 (492.79 it/sec) -training >> step=8995100, episode=1500 reward=0.7774773 (519.69 it/sec) -training >> step=8995200, episode=1500 reward=0.8001574 (508.91 it/sec) -training >> step=8995300, episode=1500 reward=0.7791699 (529.37 it/sec) -training >> step=8995400, episode=1500 reward=0.8002036 (409.73 it/sec) -training >> step=8995500, episode=1500 reward=0.7815176 (495.83 it/sec) -training >> step=8995600, episode=1500 reward=0.7775201 (463.84 it/sec) -training >> step=8995700, episode=1500 reward=0.80309 (476.38 it/sec) -training >> step=8995800, episode=1500 reward=0.8074095 (527.76 it/sec) -training >> step=8995900, episode=1500 reward=0.8028707 (470.27 it/sec) -training >> step=8996000, episode=1500 reward=0.7784914 (505.90 it/sec) -training >> step=8996100, episode=1500 reward=0.7925953 (498.14 it/sec) -training >> step=8996200, episode=1500 reward=0.8028979 (483.71 it/sec) -training >> step=8996300, episode=1500 reward=0.7873135 (458.45 it/sec) -training >> step=8996400, episode=1500 reward=0.7883298 (494.75 it/sec) -training >> step=8996500, episode=1500 reward=0.7692691 (481.31 it/sec) -training >> step=8996600, episode=1500 reward=0.7794783 (526.41 it/sec) -training >> step=8996700, episode=1500 reward=0.7918289 (498.13 it/sec) -training >> step=8996800, episode=1500 reward=0.7879452 (515.28 it/sec) -training >> step=8996900, episode=1500 reward=0.7971349 (491.19 it/sec) -training >> step=8997000, episode=1500 reward=0.7943202 (507.01 it/sec) -training >> step=8997100, episode=1500 reward=0.7942194 (489.95 it/sec) -training >> step=8997200, episode=1500 reward=0.7838147 (491.78 it/sec) -training >> step=8997300, episode=1500 reward=0.8008164 (515.52 it/sec) -training >> step=8997400, episode=1500 reward=0.7918776 (510.00 it/sec) -training >> step=8997500, episode=1500 reward=0.779852 (503.23 it/sec) -training >> step=8997600, episode=1500 reward=0.7867081 (509.42 it/sec) -training >> step=8997700, episode=1500 reward=0.7868532 (489.39 it/sec) -training >> step=8997800, episode=1500 reward=0.7887972 (522.69 it/sec) -training >> step=8997900, episode=1500 reward=0.7913871 (522.43 it/sec) -training >> step=8998000, episode=1500 reward=0.7914602 (493.88 it/sec) -training >> step=8998100, episode=1500 reward=0.7901926 (458.66 it/sec) -training >> step=8998200, episode=1500 reward=0.7918851 (494.44 it/sec) -training >> step=8998300, episode=1500 reward=0.7992435 (527.42 it/sec) -training >> step=8998400, episode=1500 reward=0.7923157 (472.27 it/sec) -training >> step=8998500, episode=1500 reward=0.7954916 (510.17 it/sec) -training >> step=8998600, episode=1500 reward=0.7732076 (528.12 it/sec) -training >> step=8998700, episode=1500 reward=0.7947497 (489.08 it/sec) -training >> step=8998800, episode=1500 reward=0.7956505 (522.38 it/sec) -training >> step=8998900, episode=1500 reward=0.7780932 (473.94 it/sec) -training >> step=8999000, episode=1500 reward=0.7884616 (479.68 it/sec) -training >> step=8999100, episode=1500 reward=0.7944497 (505.99 it/sec) -training >> step=8999200, episode=1500 reward=0.7960143 (533.83 it/sec) -training >> step=8999300, episode=1501 reward=0.8098386 (98.63 it/sec) -training >> step=8999400, episode=1501 reward=0.7815217 (460.66 it/sec) -training >> step=8999500, episode=1501 reward=0.7703902 (500.23 it/sec) -training >> step=8999600, episode=1501 reward=0.7832031 (512.78 it/sec) -training >> step=8999700, episode=1501 reward=0.7836369 (498.22 it/sec) -training >> step=8999800, episode=1501 reward=0.7953539 (508.92 it/sec) -training >> step=8999900, episode=1501 reward=0.7797399 (516.52 it/sec) -training >> step=9000000, episode=1501 reward=0.7927509 (494.99 it/sec) -training >> step=9000100, episode=1501 reward=0.7797581 (486.34 it/sec) -training >> step=9000200, episode=1501 reward=0.817404 (514.93 it/sec) -training >> step=9000300, episode=1501 reward=0.8001884 (506.21 it/sec) -training >> step=9000400, episode=1501 reward=0.7953647 (490.09 it/sec) -training >> step=9000500, episode=1501 reward=0.7731246 (499.63 it/sec) -training >> step=9000600, episode=1501 reward=0.8143187 (486.62 it/sec) -training >> step=9000700, episode=1501 reward=0.7842305 (486.35 it/sec) -training >> step=9000800, episode=1501 reward=0.7850521 (484.44 it/sec) -training >> step=9000900, episode=1501 reward=0.8002213 (533.22 it/sec) -training >> step=9001000, episode=1501 reward=0.7912206 (505.34 it/sec) -training >> step=9001100, episode=1501 reward=0.7855059 (456.10 it/sec) -training >> step=9001200, episode=1501 reward=0.7946566 (505.74 it/sec) -training >> step=9001300, episode=1501 reward=0.7869383 (498.66 it/sec) -training >> step=9001400, episode=1501 reward=0.8017693 (434.60 it/sec) -training >> step=9001500, episode=1501 reward=0.7915034 (523.51 it/sec) -training >> step=9001600, episode=1501 reward=0.7996465 (522.70 it/sec) -training >> step=9001700, episode=1501 reward=0.7810527 (458.59 it/sec) -training >> step=9001800, episode=1501 reward=0.777092 (512.14 it/sec) -training >> step=9001900, episode=1501 reward=0.7817342 (489.07 it/sec) -training >> step=9002000, episode=1501 reward=0.7686433 (522.29 it/sec) -training >> step=9002100, episode=1501 reward=0.7812057 (475.98 it/sec) -training >> step=9002200, episode=1501 reward=0.8009565 (503.11 it/sec) -training >> step=9002300, episode=1501 reward=0.7837791 (496.64 it/sec) -training >> step=9002400, episode=1501 reward=0.7874778 (461.86 it/sec) -training >> step=9002500, episode=1501 reward=0.7910586 (495.68 it/sec) -training >> step=9002600, episode=1501 reward=0.8035872 (535.66 it/sec) -training >> step=9002700, episode=1501 reward=0.7851261 (512.31 it/sec) -training >> step=9002800, episode=1501 reward=0.7784119 (477.51 it/sec) -training >> step=9002900, episode=1501 reward=0.7895924 (474.28 it/sec) -training >> step=9003000, episode=1501 reward=0.7822712 (483.92 it/sec) -training >> step=9003100, episode=1501 reward=0.7899348 (450.52 it/sec) -training >> step=9003200, episode=1501 reward=0.7836434 (496.38 it/sec) -training >> step=9003300, episode=1501 reward=0.782194 (456.56 it/sec) -training >> step=9003400, episode=1501 reward=0.7898375 (512.22 it/sec) -training >> step=9003500, episode=1501 reward=0.7954065 (499.05 it/sec) -training >> step=9003600, episode=1501 reward=0.7859073 (492.10 it/sec) -training >> step=9003700, episode=1501 reward=0.7667074 (509.30 it/sec) -training >> step=9003800, episode=1501 reward=0.7807376 (499.20 it/sec) -training >> step=9003900, episode=1501 reward=0.7918571 (495.73 it/sec) -training >> step=9004000, episode=1501 reward=0.7842267 (529.30 it/sec) -training >> step=9004100, episode=1501 reward=0.7774352 (509.83 it/sec) -training >> step=9004200, episode=1501 reward=0.7955992 (445.75 it/sec) -training >> step=9004300, episode=1501 reward=0.7971219 (504.99 it/sec) -training >> step=9004400, episode=1501 reward=0.7811463 (492.54 it/sec) -training >> step=9004500, episode=1501 reward=0.7744184 (485.58 it/sec) -training >> step=9004600, episode=1501 reward=0.7829558 (512.44 it/sec) -training >> step=9004700, episode=1501 reward=0.7878475 (549.62 it/sec) -training >> step=9004800, episode=1501 reward=0.7762407 (515.84 it/sec) -training >> step=9004900, episode=1501 reward=0.7673137 (490.42 it/sec) -training >> step=9005000, episode=1501 reward=0.7793417 (532.47 it/sec) -training >> step=9005100, episode=1501 reward=0.787706 (523.79 it/sec) -training >> step=9005200, episode=1501 reward=0.7779145 (518.14 it/sec) -training >> step=9005300, episode=1502 reward=0.8143457 (88.99 it/sec) -training >> step=9005400, episode=1502 reward=0.7742027 (549.11 it/sec) -training >> step=9005500, episode=1502 reward=0.7820963 (506.72 it/sec) -training >> step=9005600, episode=1502 reward=0.7754788 (506.94 it/sec) -training >> step=9005700, episode=1502 reward=0.78382 (512.97 it/sec) -training >> step=9005800, episode=1502 reward=0.7860456 (475.70 it/sec) -training >> step=9005900, episode=1502 reward=0.8048609 (476.91 it/sec) -training >> step=9006000, episode=1502 reward=0.7902302 (500.48 it/sec) -training >> step=9006100, episode=1502 reward=0.7817008 (484.24 it/sec) -training >> step=9006200, episode=1502 reward=0.8057791 (512.42 it/sec) -training >> step=9006300, episode=1502 reward=0.8001078 (488.69 it/sec) -training >> step=9006400, episode=1502 reward=0.7963346 (386.94 it/sec) -training >> step=9006500, episode=1502 reward=0.7993757 (441.78 it/sec) -training >> step=9006600, episode=1502 reward=0.8053097 (429.25 it/sec) -training >> step=9006700, episode=1502 reward=0.8129689 (460.15 it/sec) -training >> step=9006800, episode=1502 reward=0.7792814 (443.01 it/sec) -training >> step=9006900, episode=1502 reward=0.7900264 (421.85 it/sec) -training >> step=9007000, episode=1502 reward=0.7969034 (444.75 it/sec) -training >> step=9007100, episode=1502 reward=0.7775468 (459.76 it/sec) -training >> step=9007200, episode=1502 reward=0.7921351 (473.03 it/sec) -training >> step=9007300, episode=1502 reward=0.7881015 (454.31 it/sec) -training >> step=9007400, episode=1502 reward=0.7970877 (455.22 it/sec) -training >> step=9007500, episode=1502 reward=0.7805693 (427.70 it/sec) -training >> step=9007600, episode=1502 reward=0.7925673 (433.21 it/sec) -training >> step=9007700, episode=1502 reward=0.7970396 (417.40 it/sec) -training >> step=9007800, episode=1502 reward=0.7805054 (369.79 it/sec) -training >> step=9007900, episode=1502 reward=0.8040416 (410.13 it/sec) -training >> step=9008000, episode=1502 reward=0.8022816 (442.29 it/sec) -training >> step=9008100, episode=1502 reward=0.7974412 (496.46 it/sec) -training >> step=9008200, episode=1502 reward=0.7853227 (461.12 it/sec) -training >> step=9008300, episode=1502 reward=0.7939234 (492.06 it/sec) -training >> step=9008400, episode=1502 reward=0.7683672 (478.72 it/sec) -training >> step=9008500, episode=1502 reward=0.7933589 (512.49 it/sec) -training >> step=9008600, episode=1502 reward=0.7803156 (490.40 it/sec) -training >> step=9008700, episode=1502 reward=0.7895005 (503.24 it/sec) -training >> step=9008800, episode=1502 reward=0.7992247 (530.80 it/sec) -training >> step=9008900, episode=1502 reward=0.7873822 (531.16 it/sec) -training >> step=9009000, episode=1502 reward=0.7950798 (504.33 it/sec) -training >> step=9009100, episode=1502 reward=0.7911928 (513.82 it/sec) -training >> step=9009200, episode=1502 reward=0.7790656 (485.95 it/sec) -training >> step=9009300, episode=1502 reward=0.7881365 (414.19 it/sec) -training >> step=9009400, episode=1502 reward=0.7951258 (472.70 it/sec) -training >> step=9009500, episode=1502 reward=0.7713969 (494.87 it/sec) -training >> step=9009600, episode=1502 reward=0.8009327 (469.23 it/sec) -training >> step=9009700, episode=1502 reward=0.8008835 (481.82 it/sec) -training >> step=9009800, episode=1502 reward=0.8030955 (468.47 it/sec) -training >> step=9009900, episode=1502 reward=0.7790227 (498.28 it/sec) -training >> step=9010000, episode=1502 reward=0.7881538 (421.43 it/sec) -training >> step=9010100, episode=1502 reward=0.7894075 (391.56 it/sec) -training >> step=9010200, episode=1502 reward=0.7879452 (367.01 it/sec) -training >> step=9010300, episode=1502 reward=0.7983025 (382.65 it/sec) -training >> step=9010400, episode=1502 reward=0.8040705 (375.06 it/sec) -training >> step=9010500, episode=1502 reward=0.7979814 (366.08 it/sec) -training >> step=9010600, episode=1502 reward=0.7845787 (330.11 it/sec) -training >> step=9010700, episode=1502 reward=0.7791486 (442.51 it/sec) -training >> step=9010800, episode=1502 reward=0.8005972 (372.17 it/sec) -training >> step=9010900, episode=1502 reward=0.7712537 (432.53 it/sec) -training >> step=9011000, episode=1502 reward=0.7834069 (469.43 it/sec) -training >> step=9011100, episode=1502 reward=0.7849051 (456.51 it/sec) -training >> step=9011200, episode=1502 reward=0.8062581 (489.15 it/sec) -training >> step=9011300, episode=1503 reward=0.7785305 (68.93 it/sec) -training >> step=9011400, episode=1503 reward=0.7921368 (446.95 it/sec) -training >> step=9011500, episode=1503 reward=0.7823337 (466.43 it/sec) -training >> step=9011600, episode=1503 reward=0.8030718 (469.24 it/sec) -training >> step=9011700, episode=1503 reward=0.7828187 (480.91 it/sec) -training >> step=9011800, episode=1503 reward=0.7956904 (490.17 it/sec) -training >> step=9011900, episode=1503 reward=0.7982439 (484.89 it/sec) -training >> step=9012000, episode=1503 reward=0.7917613 (496.83 it/sec) -training >> step=9012100, episode=1503 reward=0.7876812 (494.47 it/sec) -training >> step=9012200, episode=1503 reward=0.7875333 (465.82 it/sec) -training >> step=9012300, episode=1503 reward=0.7885907 (484.88 it/sec) -training >> step=9012400, episode=1503 reward=0.7770067 (469.21 it/sec) -training >> step=9012500, episode=1503 reward=0.7716916 (441.88 it/sec) -training >> step=9012600, episode=1503 reward=0.7932655 (506.37 it/sec) -training >> step=9012700, episode=1503 reward=0.7847078 (473.52 it/sec) -training >> step=9012800, episode=1503 reward=0.7976173 (508.36 it/sec) -training >> step=9012900, episode=1503 reward=0.7854477 (486.29 it/sec) -training >> step=9013000, episode=1503 reward=0.8113554 (481.21 it/sec) -training >> step=9013100, episode=1503 reward=0.7864046 (493.85 it/sec) -training >> step=9013200, episode=1503 reward=0.7823368 (501.95 it/sec) -training >> step=9013300, episode=1503 reward=0.7731628 (526.21 it/sec) -training >> step=9013400, episode=1503 reward=0.7808722 (492.84 it/sec) -training >> step=9013500, episode=1503 reward=0.7857209 (432.47 it/sec) -training >> step=9013600, episode=1503 reward=0.7932404 (440.40 it/sec) -training >> step=9013700, episode=1503 reward=0.7956807 (423.38 it/sec) -training >> step=9013800, episode=1503 reward=0.7853991 (444.98 it/sec) -training >> step=9013900, episode=1503 reward=0.7867196 (468.28 it/sec) -training >> step=9014000, episode=1503 reward=0.778753 (448.34 it/sec) -training >> step=9014100, episode=1503 reward=0.7933961 (462.39 it/sec) -training >> step=9014200, episode=1503 reward=0.8026518 (470.31 it/sec) -training >> step=9014300, episode=1503 reward=0.7824406 (450.34 it/sec) -training >> step=9014400, episode=1503 reward=0.7977021 (488.64 it/sec) -training >> step=9014500, episode=1503 reward=0.8096089 (501.58 it/sec) -training >> step=9014600, episode=1503 reward=0.7774981 (463.77 it/sec) -training >> step=9014700, episode=1503 reward=0.7825075 (511.38 it/sec) -training >> step=9014800, episode=1503 reward=0.8131984 (480.45 it/sec) -training >> step=9014900, episode=1503 reward=0.784976 (480.12 it/sec) -training >> step=9015000, episode=1503 reward=0.8012589 (498.30 it/sec) -training >> step=9015100, episode=1503 reward=0.7776738 (500.87 it/sec) -training >> step=9015200, episode=1503 reward=0.808056 (480.63 it/sec) -training >> step=9015300, episode=1503 reward=0.7827533 (477.03 it/sec) -training >> step=9015400, episode=1503 reward=0.7907445 (498.07 it/sec) -training >> step=9015500, episode=1503 reward=0.7919673 (524.32 it/sec) -training >> step=9015600, episode=1503 reward=0.7871985 (499.54 it/sec) -training >> step=9015700, episode=1503 reward=0.7792 (492.86 it/sec) -training >> step=9015800, episode=1503 reward=0.8063863 (497.83 it/sec) -training >> step=9015900, episode=1503 reward=0.7879292 (477.88 it/sec) -training >> step=9016000, episode=1503 reward=0.7741617 (489.70 it/sec) -training >> step=9016100, episode=1503 reward=0.7769762 (496.82 it/sec) -training >> step=9016200, episode=1503 reward=0.8050119 (543.30 it/sec) -training >> step=9016300, episode=1503 reward=0.8027817 (503.67 it/sec) -training >> step=9016400, episode=1503 reward=0.7870144 (483.31 it/sec) -training >> step=9016500, episode=1503 reward=0.7798529 (499.98 it/sec) -training >> step=9016600, episode=1503 reward=0.8084025 (489.33 it/sec) -training >> step=9016700, episode=1503 reward=0.7980869 (507.62 it/sec) -training >> step=9016800, episode=1503 reward=0.7999058 (501.05 it/sec) -training >> step=9016900, episode=1503 reward=0.793082 (514.53 it/sec) -training >> step=9017000, episode=1503 reward=0.803784 (481.69 it/sec) -training >> step=9017100, episode=1503 reward=0.7712271 (481.51 it/sec) -training >> step=9017200, episode=1503 reward=0.800469 (486.09 it/sec) -training >> step=9017300, episode=1504 reward=0.7834317 (97.08 it/sec) -training >> step=9017400, episode=1504 reward=0.7866599 (483.29 it/sec) -training >> step=9017500, episode=1504 reward=0.7795382 (502.54 it/sec) -training >> step=9017600, episode=1504 reward=0.796404 (488.47 it/sec) -training >> step=9017700, episode=1504 reward=0.7942401 (493.56 it/sec) -training >> step=9017800, episode=1504 reward=0.8016061 (490.90 it/sec) -training >> step=9017900, episode=1504 reward=0.7851988 (515.31 it/sec) -training >> step=9018000, episode=1504 reward=0.7899609 (471.15 it/sec) -training >> step=9018100, episode=1504 reward=0.7745209 (482.43 it/sec) -training >> step=9018200, episode=1504 reward=0.7602862 (507.18 it/sec) -training >> step=9018300, episode=1504 reward=0.8013179 (476.83 it/sec) -training >> step=9018400, episode=1504 reward=0.8077009 (505.63 it/sec) -training >> step=9018500, episode=1504 reward=0.7809545 (476.91 it/sec) -training >> step=9018600, episode=1504 reward=0.7992498 (540.58 it/sec) -training >> step=9018700, episode=1504 reward=0.7806181 (473.54 it/sec) -training >> step=9018800, episode=1504 reward=0.7749828 (477.21 it/sec) -training >> step=9018900, episode=1504 reward=0.7880583 (503.15 it/sec) -training >> step=9019000, episode=1504 reward=0.7914361 (515.51 it/sec) -training >> step=9019100, episode=1504 reward=0.7960453 (499.18 it/sec) -training >> step=9019200, episode=1504 reward=0.793676 (477.85 it/sec) -training >> step=9019300, episode=1504 reward=0.8045555 (497.16 it/sec) -training >> step=9019400, episode=1504 reward=0.8099755 (496.40 it/sec) -training >> step=9019500, episode=1504 reward=0.7943005 (486.76 it/sec) -training >> step=9019600, episode=1504 reward=0.7761543 (468.24 it/sec) -training >> step=9019700, episode=1504 reward=0.7876209 (498.54 it/sec) -training >> step=9019800, episode=1504 reward=0.7908475 (488.74 it/sec) -training >> step=9019900, episode=1504 reward=0.7857234 (511.01 it/sec) -training >> step=9020000, episode=1504 reward=0.7725524 (484.95 it/sec) -training >> step=9020100, episode=1504 reward=0.7986167 (490.63 it/sec) -training >> step=9020200, episode=1504 reward=0.7840853 (483.77 it/sec) -training >> step=9020300, episode=1504 reward=0.7976476 (434.82 it/sec) -training >> step=9020400, episode=1504 reward=0.8014161 (503.18 it/sec) -training >> step=9020500, episode=1504 reward=0.7935705 (497.09 it/sec) -training >> step=9020600, episode=1504 reward=0.7791365 (484.13 it/sec) -training >> step=9020700, episode=1504 reward=0.8035353 (474.75 it/sec) -training >> step=9020800, episode=1504 reward=0.7857851 (477.64 it/sec) -training >> step=9020900, episode=1504 reward=0.7835794 (506.04 it/sec) -training >> step=9021000, episode=1504 reward=0.7807829 (450.74 it/sec) -training >> step=9021100, episode=1504 reward=0.8027829 (481.95 it/sec) -training >> step=9021200, episode=1504 reward=0.7932889 (475.08 it/sec) -training >> step=9021300, episode=1504 reward=0.7792985 (503.86 it/sec) -training >> step=9021400, episode=1504 reward=0.775741 (490.28 it/sec) -training >> step=9021500, episode=1504 reward=0.7696042 (505.08 it/sec) -training >> step=9021600, episode=1504 reward=0.7850461 (487.94 it/sec) -training >> step=9021700, episode=1504 reward=0.7863904 (487.80 it/sec) -training >> step=9021800, episode=1504 reward=0.7636786 (490.24 it/sec) -training >> step=9021900, episode=1504 reward=0.7942755 (444.20 it/sec) -training >> step=9022000, episode=1504 reward=0.7951052 (461.03 it/sec) -training >> step=9022100, episode=1504 reward=0.7811977 (495.38 it/sec) -training >> step=9022200, episode=1504 reward=0.7760608 (502.27 it/sec) -training >> step=9022300, episode=1504 reward=0.7899904 (460.50 it/sec) -training >> step=9022400, episode=1504 reward=0.7872072 (514.93 it/sec) -training >> step=9022500, episode=1504 reward=0.7948429 (491.40 it/sec) -training >> step=9022600, episode=1504 reward=0.7805527 (524.56 it/sec) -training >> step=9022700, episode=1504 reward=0.7858739 (505.43 it/sec) -training >> step=9022800, episode=1504 reward=0.7850788 (436.34 it/sec) -training >> step=9022900, episode=1504 reward=0.7843254 (494.56 it/sec) -training >> step=9023000, episode=1504 reward=0.7974664 (474.95 it/sec) -training >> step=9023100, episode=1504 reward=0.8048692 (524.26 it/sec) -training >> step=9023200, episode=1504 reward=0.7782789 (459.09 it/sec) -training >> step=9023300, episode=1505 reward=0.8055786 (107.19 it/sec) -training >> step=9023400, episode=1505 reward=0.7753849 (462.63 it/sec) -training >> step=9023500, episode=1505 reward=0.7812536 (489.11 it/sec) -training >> step=9023600, episode=1505 reward=0.7777779 (496.50 it/sec) -training >> step=9023700, episode=1505 reward=0.7947225 (506.65 it/sec) -training >> step=9023800, episode=1505 reward=0.7890342 (527.53 it/sec) -training >> step=9023900, episode=1505 reward=0.8036822 (482.30 it/sec) -training >> step=9024000, episode=1505 reward=0.7887639 (500.12 it/sec) -training >> step=9024100, episode=1505 reward=0.7974083 (521.91 it/sec) -training >> step=9024200, episode=1505 reward=0.7999296 (535.91 it/sec) -training >> step=9024300, episode=1505 reward=0.7925647 (503.71 it/sec) -training >> step=9024400, episode=1505 reward=0.769536 (468.24 it/sec) -training >> step=9024500, episode=1505 reward=0.8081391 (531.65 it/sec) -training >> step=9024600, episode=1505 reward=0.7804275 (474.47 it/sec) -training >> step=9024700, episode=1505 reward=0.795114 (508.08 it/sec) -training >> step=9024800, episode=1505 reward=0.79338 (490.94 it/sec) -training >> step=9024900, episode=1505 reward=0.791576 (493.64 it/sec) -training >> step=9025000, episode=1505 reward=0.7912443 (483.95 it/sec) -training >> step=9025100, episode=1505 reward=0.7796072 (466.78 it/sec) -training >> step=9025200, episode=1505 reward=0.7999973 (475.25 it/sec) -training >> step=9025300, episode=1505 reward=0.791713 (483.09 it/sec) -training >> step=9025400, episode=1505 reward=0.7904655 (426.88 it/sec) -training >> step=9025500, episode=1505 reward=0.7941379 (460.33 it/sec) -training >> step=9025600, episode=1505 reward=0.7896342 (487.49 it/sec) -training >> step=9025700, episode=1505 reward=0.7746801 (496.33 it/sec) -training >> step=9025800, episode=1505 reward=0.7991735 (479.43 it/sec) -training >> step=9025900, episode=1505 reward=0.7866877 (499.09 it/sec) -training >> step=9026000, episode=1505 reward=0.7982144 (517.76 it/sec) -training >> step=9026100, episode=1505 reward=0.7937382 (491.82 it/sec) -training >> step=9026200, episode=1505 reward=0.7836788 (518.90 it/sec) -training >> step=9026300, episode=1505 reward=0.8117747 (496.73 it/sec) -training >> step=9026400, episode=1505 reward=0.7918225 (429.52 it/sec) -training >> step=9026500, episode=1505 reward=0.7905161 (462.62 it/sec) -training >> step=9026600, episode=1505 reward=0.7745556 (479.51 it/sec) -training >> step=9026700, episode=1505 reward=0.7945361 (494.11 it/sec) -training >> step=9026800, episode=1505 reward=0.8099179 (486.73 it/sec) -training >> step=9026900, episode=1505 reward=0.805681 (478.85 it/sec) -training >> step=9027000, episode=1505 reward=0.7938588 (510.73 it/sec) -training >> step=9027100, episode=1505 reward=0.7883865 (480.59 it/sec) -training >> step=9027200, episode=1505 reward=0.7777275 (495.79 it/sec) -training >> step=9027300, episode=1505 reward=0.7877601 (509.19 it/sec) -training >> step=9027400, episode=1505 reward=0.7727357 (497.79 it/sec) -training >> step=9027500, episode=1505 reward=0.7883028 (527.36 it/sec) -training >> step=9027600, episode=1505 reward=0.7893436 (420.00 it/sec) -training >> step=9027700, episode=1505 reward=0.7937241 (482.52 it/sec) -training >> step=9027800, episode=1505 reward=0.7782789 (481.04 it/sec) -training >> step=9027900, episode=1505 reward=0.7770476 (481.03 it/sec) -training >> step=9028000, episode=1505 reward=0.7915929 (532.58 it/sec) -training >> step=9028100, episode=1505 reward=0.7705774 (505.66 it/sec) -training >> step=9028200, episode=1505 reward=0.8044457 (471.32 it/sec) -training >> step=9028300, episode=1505 reward=0.7817854 (495.12 it/sec) -training >> step=9028400, episode=1505 reward=0.7895902 (473.72 it/sec) -training >> step=9028500, episode=1505 reward=0.8077644 (543.77 it/sec) -training >> step=9028600, episode=1505 reward=0.7690212 (509.73 it/sec) -training >> step=9028700, episode=1505 reward=0.7814071 (484.28 it/sec) -training >> step=9028800, episode=1505 reward=0.787245 (493.42 it/sec) -training >> step=9028900, episode=1505 reward=0.7911033 (455.24 it/sec) -training >> step=9029000, episode=1505 reward=0.7812888 (496.84 it/sec) -training >> step=9029100, episode=1505 reward=0.8109043 (483.54 it/sec) -training >> step=9029200, episode=1505 reward=0.7902885 (502.28 it/sec) -training >> step=9029300, episode=1506 reward=0.7823814 (88.03 it/sec) -training >> step=9029400, episode=1506 reward=0.8014959 (473.86 it/sec) -training >> step=9029500, episode=1506 reward=0.7934282 (504.07 it/sec) -training >> step=9029600, episode=1506 reward=0.7792375 (465.68 it/sec) -training >> step=9029700, episode=1506 reward=0.7870659 (389.79 it/sec) -training >> step=9029800, episode=1506 reward=0.8084151 (496.37 it/sec) -training >> step=9029900, episode=1506 reward=0.7916359 (487.73 it/sec) -training >> step=9030000, episode=1506 reward=0.787473 (518.63 it/sec) -training >> step=9030100, episode=1506 reward=0.7973781 (499.00 it/sec) -training >> step=9030200, episode=1506 reward=0.7902127 (504.42 it/sec) -training >> step=9030300, episode=1506 reward=0.7950136 (500.57 it/sec) -training >> step=9030400, episode=1506 reward=0.7896724 (465.70 it/sec) -training >> step=9030500, episode=1506 reward=0.7772301 (496.41 it/sec) -training >> step=9030600, episode=1506 reward=0.8027003 (516.10 it/sec) -training >> step=9030700, episode=1506 reward=0.7957105 (509.71 it/sec) -training >> step=9030800, episode=1506 reward=0.7836581 (500.28 it/sec) -training >> step=9030900, episode=1506 reward=0.7885616 (467.03 it/sec) -training >> step=9031000, episode=1506 reward=0.7931114 (442.17 it/sec) -training >> step=9031100, episode=1506 reward=0.8069624 (489.45 it/sec) -training >> step=9031200, episode=1506 reward=0.7909452 (521.62 it/sec) -training >> step=9031300, episode=1506 reward=0.7783368 (519.28 it/sec) -training >> step=9031400, episode=1506 reward=0.8021328 (461.81 it/sec) -training >> step=9031500, episode=1506 reward=0.7831895 (483.98 it/sec) -training >> step=9031600, episode=1506 reward=0.7878879 (524.29 it/sec) -training >> step=9031700, episode=1506 reward=0.7824665 (480.29 it/sec) -training >> step=9031800, episode=1506 reward=0.7958362 (413.22 it/sec) -training >> step=9031900, episode=1506 reward=0.7959048 (469.26 it/sec) -training >> step=9032000, episode=1506 reward=0.8063805 (529.14 it/sec) -training >> step=9032100, episode=1506 reward=0.7985323 (475.13 it/sec) -training >> step=9032200, episode=1506 reward=0.7855659 (489.76 it/sec) -training >> step=9032300, episode=1506 reward=0.8074961 (523.79 it/sec) -training >> step=9032400, episode=1506 reward=0.7861358 (474.34 it/sec) -training >> step=9032500, episode=1506 reward=0.7819695 (470.43 it/sec) -training >> step=9032600, episode=1506 reward=0.7752396 (494.11 it/sec) -training >> step=9032700, episode=1506 reward=0.7914129 (475.77 it/sec) -training >> step=9032800, episode=1506 reward=0.7903775 (503.13 it/sec) -training >> step=9032900, episode=1506 reward=0.7798126 (475.79 it/sec) -training >> step=9033000, episode=1506 reward=0.7861307 (498.40 it/sec) -training >> step=9033100, episode=1506 reward=0.7983022 (537.73 it/sec) -training >> step=9033200, episode=1506 reward=0.7732905 (487.87 it/sec) -training >> step=9033300, episode=1506 reward=0.8023176 (505.76 it/sec) -training >> step=9033400, episode=1506 reward=0.7916748 (475.89 it/sec) -training >> step=9033500, episode=1506 reward=0.7997646 (472.88 it/sec) -training >> step=9033600, episode=1506 reward=0.7966557 (523.73 it/sec) -training >> step=9033700, episode=1506 reward=0.7990177 (497.71 it/sec) -training >> step=9033800, episode=1506 reward=0.7722117 (513.92 it/sec) -training >> step=9033900, episode=1506 reward=0.7985765 (482.74 it/sec) -training >> step=9034000, episode=1506 reward=0.7647464 (470.44 it/sec) -training >> step=9034100, episode=1506 reward=0.7819036 (533.01 it/sec) -training >> step=9034200, episode=1506 reward=0.7924086 (495.50 it/sec) -training >> step=9034300, episode=1506 reward=0.8069683 (488.30 it/sec) -training >> step=9034400, episode=1506 reward=0.792431 (477.80 it/sec) -training >> step=9034500, episode=1506 reward=0.7870069 (477.64 it/sec) -training >> step=9034600, episode=1506 reward=0.7856433 (484.41 it/sec) -training >> step=9034700, episode=1506 reward=0.7973659 (480.11 it/sec) -training >> step=9034800, episode=1506 reward=0.7937361 (461.67 it/sec) -training >> step=9034900, episode=1506 reward=0.7816491 (506.62 it/sec) -training >> step=9035000, episode=1506 reward=0.7887695 (484.44 it/sec) -training >> step=9035100, episode=1506 reward=0.7618814 (502.17 it/sec) -training >> step=9035200, episode=1506 reward=0.7943388 (504.77 it/sec) -training >> step=9035300, episode=1507 reward=0.7984701 (92.02 it/sec) -training >> step=9035400, episode=1507 reward=0.7798957 (440.69 it/sec) -training >> step=9035500, episode=1507 reward=0.798182 (485.63 it/sec) -training >> step=9035600, episode=1507 reward=0.790716 (536.88 it/sec) -training >> step=9035700, episode=1507 reward=0.7911233 (482.87 it/sec) -training >> step=9035800, episode=1507 reward=0.8031893 (465.22 it/sec) -training >> step=9035900, episode=1507 reward=0.7876401 (475.90 it/sec) -training >> step=9036000, episode=1507 reward=0.7937289 (466.59 it/sec) -training >> step=9036100, episode=1507 reward=0.7824762 (497.84 it/sec) -training >> step=9036200, episode=1507 reward=0.7778521 (506.42 it/sec) -training >> step=9036300, episode=1507 reward=0.7975634 (494.87 it/sec) -training >> step=9036400, episode=1507 reward=0.7857408 (489.59 it/sec) -training >> step=9036500, episode=1507 reward=0.7798764 (486.06 it/sec) -training >> step=9036600, episode=1507 reward=0.7786263 (499.22 it/sec) -training >> step=9036700, episode=1507 reward=0.7886596 (468.83 it/sec) -training >> step=9036800, episode=1507 reward=0.7946958 (435.23 it/sec) -training >> step=9036900, episode=1507 reward=0.7948123 (492.02 it/sec) -training >> step=9037000, episode=1507 reward=0.8126692 (459.84 it/sec) -training >> step=9037100, episode=1507 reward=0.7946457 (469.38 it/sec) -training >> step=9037200, episode=1507 reward=0.7905958 (504.84 it/sec) -training >> step=9037300, episode=1507 reward=0.8123342 (490.57 it/sec) -training >> step=9037400, episode=1507 reward=0.7939149 (481.77 it/sec) -training >> step=9037500, episode=1507 reward=0.7860913 (491.89 it/sec) -training >> step=9037600, episode=1507 reward=0.7654001 (518.69 it/sec) -training >> step=9037700, episode=1507 reward=0.7898667 (445.45 it/sec) -training >> step=9037800, episode=1507 reward=0.7940351 (477.14 it/sec) -training >> step=9037900, episode=1507 reward=0.8024095 (431.81 it/sec) -training >> step=9038000, episode=1507 reward=0.7964395 (484.03 it/sec) -training >> step=9038100, episode=1507 reward=0.7992786 (460.56 it/sec) -training >> step=9038200, episode=1507 reward=0.8041027 (448.45 it/sec) -training >> step=9038300, episode=1507 reward=0.7842039 (507.49 it/sec) -training >> step=9038400, episode=1507 reward=0.7903562 (484.41 it/sec) -training >> step=9038500, episode=1507 reward=0.7987415 (479.71 it/sec) -training >> step=9038600, episode=1507 reward=0.8005456 (496.19 it/sec) -training >> step=9038700, episode=1507 reward=0.7875607 (487.47 it/sec) -training >> step=9038800, episode=1507 reward=0.7891901 (460.35 it/sec) -training >> step=9038900, episode=1507 reward=0.7890028 (493.89 it/sec) -training >> step=9039000, episode=1507 reward=0.7799602 (526.66 it/sec) -training >> step=9039100, episode=1507 reward=0.8111368 (514.42 it/sec) -training >> step=9039200, episode=1507 reward=0.7820485 (500.75 it/sec) -training >> step=9039300, episode=1507 reward=0.8005878 (490.38 it/sec) -training >> step=9039400, episode=1507 reward=0.7863812 (510.20 it/sec) -training >> step=9039500, episode=1507 reward=0.7995593 (478.66 it/sec) -training >> step=9039600, episode=1507 reward=0.7843568 (488.60 it/sec) -training >> step=9039700, episode=1507 reward=0.7851712 (485.37 it/sec) -training >> step=9039800, episode=1507 reward=0.803277 (486.30 it/sec) -training >> step=9039900, episode=1507 reward=0.7864283 (462.89 it/sec) -training >> step=9040000, episode=1507 reward=0.784557 (495.36 it/sec) -training >> step=9040100, episode=1507 reward=0.7980829 (486.19 it/sec) -training >> step=9040200, episode=1507 reward=0.7868174 (462.42 it/sec) -training >> step=9040300, episode=1507 reward=0.7821591 (467.38 it/sec) -training >> step=9040400, episode=1507 reward=0.7969896 (474.33 it/sec) -training >> step=9040500, episode=1507 reward=0.7907782 (476.51 it/sec) -training >> step=9040600, episode=1507 reward=0.7698041 (497.58 it/sec) -training >> step=9040700, episode=1507 reward=0.7878987 (489.93 it/sec) -training >> step=9040800, episode=1507 reward=0.7996591 (500.34 it/sec) -training >> step=9040900, episode=1507 reward=0.7865158 (507.04 it/sec) -training >> step=9041000, episode=1507 reward=0.7794089 (480.58 it/sec) -training >> step=9041100, episode=1507 reward=0.7749516 (485.36 it/sec) -training >> step=9041200, episode=1507 reward=0.7854605 (460.38 it/sec) -training >> step=9041300, episode=1508 reward=0.7991025 (89.45 it/sec) -training >> step=9041400, episode=1508 reward=0.7909456 (459.99 it/sec) -training >> step=9041500, episode=1508 reward=0.8098028 (443.95 it/sec) -training >> step=9041600, episode=1508 reward=0.7708322 (524.36 it/sec) -training >> step=9041700, episode=1508 reward=0.7911075 (467.38 it/sec) -training >> step=9041800, episode=1508 reward=0.8027722 (481.14 it/sec) -training >> step=9041900, episode=1508 reward=0.7859117 (474.89 it/sec) -training >> step=9042000, episode=1508 reward=0.8003912 (508.47 it/sec) -training >> step=9042100, episode=1508 reward=0.7882288 (486.72 it/sec) -training >> step=9042200, episode=1508 reward=0.7909343 (467.52 it/sec) -training >> step=9042300, episode=1508 reward=0.7805611 (470.76 it/sec) -training >> step=9042400, episode=1508 reward=0.7677658 (517.70 it/sec) -training >> step=9042500, episode=1508 reward=0.7978349 (478.16 it/sec) -training >> step=9042600, episode=1508 reward=0.795498 (516.54 it/sec) -training >> step=9042700, episode=1508 reward=0.7895847 (478.51 it/sec) -training >> step=9042800, episode=1508 reward=0.7831735 (513.90 it/sec) -training >> step=9042900, episode=1508 reward=0.7888546 (501.73 it/sec) -training >> step=9043000, episode=1508 reward=0.8043773 (475.69 it/sec) -training >> step=9043100, episode=1508 reward=0.7964506 (526.90 it/sec) -training >> step=9043200, episode=1508 reward=0.7642995 (477.14 it/sec) -training >> step=9043300, episode=1508 reward=0.798656 (462.48 it/sec) -training >> step=9043400, episode=1508 reward=0.791694 (513.97 it/sec) -training >> step=9043500, episode=1508 reward=0.7963887 (470.30 it/sec) -training >> step=9043600, episode=1508 reward=0.7856649 (435.66 it/sec) -training >> step=9043700, episode=1508 reward=0.7652898 (431.40 it/sec) -training >> step=9043800, episode=1508 reward=0.8078934 (484.13 it/sec) -training >> step=9043900, episode=1508 reward=0.7911331 (429.72 it/sec) -training >> step=9044000, episode=1508 reward=0.7755662 (518.29 it/sec) -training >> step=9044100, episode=1508 reward=0.7752172 (533.61 it/sec) -training >> step=9044200, episode=1508 reward=0.7949255 (518.99 it/sec) -training >> step=9044300, episode=1508 reward=0.7955053 (470.20 it/sec) -training >> step=9044400, episode=1508 reward=0.7920023 (421.14 it/sec) -training >> step=9044500, episode=1508 reward=0.7904251 (442.16 it/sec) -training >> step=9044600, episode=1508 reward=0.8002382 (459.50 it/sec) -training >> step=9044700, episode=1508 reward=0.813898 (478.97 it/sec) -training >> step=9044800, episode=1508 reward=0.7952383 (506.21 it/sec) -training >> step=9044900, episode=1508 reward=0.7747952 (524.14 it/sec) -training >> step=9045000, episode=1508 reward=0.783981 (499.40 it/sec) -training >> step=9045100, episode=1508 reward=0.8102107 (477.66 it/sec) -training >> step=9045200, episode=1508 reward=0.7614893 (517.34 it/sec) -training >> step=9045300, episode=1508 reward=0.7958633 (466.55 it/sec) -training >> step=9045400, episode=1508 reward=0.7914898 (459.43 it/sec) -training >> step=9045500, episode=1508 reward=0.8131211 (481.55 it/sec) -training >> step=9045600, episode=1508 reward=0.7925303 (534.99 it/sec) -training >> step=9045700, episode=1508 reward=0.8017987 (486.78 it/sec) -training >> step=9045800, episode=1508 reward=0.7739069 (527.67 it/sec) -training >> step=9045900, episode=1508 reward=0.8012874 (479.85 it/sec) -training >> step=9046000, episode=1508 reward=0.7844054 (518.50 it/sec) -training >> step=9046100, episode=1508 reward=0.8031766 (457.70 it/sec) -training >> step=9046200, episode=1508 reward=0.7908627 (487.90 it/sec) -training >> step=9046300, episode=1508 reward=0.7953459 (509.04 it/sec) -training >> step=9046400, episode=1508 reward=0.800867 (496.19 it/sec) -training >> step=9046500, episode=1508 reward=0.7710881 (489.72 it/sec) -training >> step=9046600, episode=1508 reward=0.7858561 (476.56 it/sec) -training >> step=9046700, episode=1508 reward=0.7839369 (524.17 it/sec) -training >> step=9046800, episode=1508 reward=0.7716515 (517.92 it/sec) -training >> step=9046900, episode=1508 reward=0.7976231 (469.86 it/sec) -training >> step=9047000, episode=1508 reward=0.7759988 (456.81 it/sec) -training >> step=9047100, episode=1508 reward=0.7760547 (508.55 it/sec) -training >> step=9047200, episode=1508 reward=0.7856991 (478.22 it/sec) -training >> step=9047300, episode=1509 reward=0.8049105 (53.08 it/sec) -training >> step=9047400, episode=1509 reward=0.7850413 (388.99 it/sec) -training >> step=9047500, episode=1509 reward=0.7726139 (438.74 it/sec) -training >> step=9047600, episode=1509 reward=0.7955978 (470.08 it/sec) -training >> step=9047700, episode=1509 reward=0.7906942 (487.09 it/sec) -training >> step=9047800, episode=1509 reward=0.787095 (434.07 it/sec) -training >> step=9047900, episode=1509 reward=0.7777556 (367.42 it/sec) -training >> step=9048000, episode=1509 reward=0.773615 (410.15 it/sec) -training >> step=9048100, episode=1509 reward=0.7926565 (307.56 it/sec) -training >> step=9048200, episode=1509 reward=0.7877843 (380.63 it/sec) -training >> step=9048300, episode=1509 reward=0.8025464 (508.62 it/sec) -training >> step=9048400, episode=1509 reward=0.8098975 (460.60 it/sec) -training >> step=9048500, episode=1509 reward=0.7843603 (438.08 it/sec) -training >> step=9048600, episode=1509 reward=0.7969862 (464.65 it/sec) -training >> step=9048700, episode=1509 reward=0.7924742 (446.03 it/sec) -training >> step=9048800, episode=1509 reward=0.7819188 (459.14 it/sec) -training >> step=9048900, episode=1509 reward=0.7715565 (455.52 it/sec) -training >> step=9049000, episode=1509 reward=0.8001195 (454.26 it/sec) -training >> step=9049100, episode=1509 reward=0.8055071 (444.72 it/sec) -training >> step=9049200, episode=1509 reward=0.7872945 (456.38 it/sec) -training >> step=9049300, episode=1509 reward=0.7705027 (486.51 it/sec) -training >> step=9049400, episode=1509 reward=0.809737 (477.82 it/sec) -training >> step=9049500, episode=1509 reward=0.7952228 (465.01 it/sec) -training >> step=9049600, episode=1509 reward=0.790122 (464.34 it/sec) -training >> step=9049700, episode=1509 reward=0.7904313 (476.98 it/sec) -training >> step=9049800, episode=1509 reward=0.7889083 (470.15 it/sec) -training >> step=9049900, episode=1509 reward=0.7953867 (451.03 it/sec) -training >> step=9050000, episode=1509 reward=0.8005604 (456.15 it/sec) -training >> step=9050100, episode=1509 reward=0.7745261 (499.57 it/sec) -training >> step=9050200, episode=1509 reward=0.803921 (518.11 it/sec) -training >> step=9050300, episode=1509 reward=0.7791255 (487.60 it/sec) -training >> step=9050400, episode=1509 reward=0.7878039 (548.47 it/sec) -training >> step=9050500, episode=1509 reward=0.7809464 (494.01 it/sec) -training >> step=9050600, episode=1509 reward=0.7904457 (479.95 it/sec) -training >> step=9050700, episode=1509 reward=0.7834886 (459.40 it/sec) -training >> step=9050800, episode=1509 reward=0.790738 (506.14 it/sec) -training >> step=9050900, episode=1509 reward=0.7970589 (437.68 it/sec) -training >> step=9051000, episode=1509 reward=0.7968851 (489.42 it/sec) -training >> step=9051100, episode=1509 reward=0.7916893 (500.26 it/sec) -training >> step=9051200, episode=1509 reward=0.7943008 (480.13 it/sec) -training >> step=9051300, episode=1509 reward=0.7984321 (488.45 it/sec) -training >> step=9051400, episode=1509 reward=0.7961468 (462.97 it/sec) -training >> step=9051500, episode=1509 reward=0.7894791 (516.27 it/sec) -training >> step=9051600, episode=1509 reward=0.780481 (474.44 it/sec) -training >> step=9051700, episode=1509 reward=0.7964615 (468.11 it/sec) -training >> step=9051800, episode=1509 reward=0.7969765 (521.23 it/sec) -training >> step=9051900, episode=1509 reward=0.7851906 (483.12 it/sec) -training >> step=9052000, episode=1509 reward=0.7854626 (509.19 it/sec) -training >> step=9052100, episode=1509 reward=0.7654211 (464.02 it/sec) -training >> step=9052200, episode=1509 reward=0.7814103 (466.17 it/sec) -training >> step=9052300, episode=1509 reward=0.7852424 (493.95 it/sec) -training >> step=9052400, episode=1509 reward=0.7969378 (485.91 it/sec) -training >> step=9052500, episode=1509 reward=0.8089023 (529.30 it/sec) -training >> step=9052600, episode=1509 reward=0.7890104 (510.27 it/sec) -training >> step=9052700, episode=1509 reward=0.8008044 (473.80 it/sec) -training >> step=9052800, episode=1509 reward=0.7855921 (513.98 it/sec) -training >> step=9052900, episode=1509 reward=0.8208708 (515.18 it/sec) -training >> step=9053000, episode=1509 reward=0.7949683 (473.18 it/sec) -training >> step=9053100, episode=1509 reward=0.7820587 (453.06 it/sec) -training >> step=9053200, episode=1509 reward=0.7893187 (535.18 it/sec) -training >> step=9053300, episode=1510 reward=0.7852716 (42.38 it/sec) -training >> step=9053400, episode=1510 reward=0.7927173 (474.79 it/sec) -training >> step=9053500, episode=1510 reward=0.7832254 (491.57 it/sec) -training >> step=9053600, episode=1510 reward=0.8010424 (479.88 it/sec) -training >> step=9053700, episode=1510 reward=0.789737 (522.20 it/sec) -training >> step=9053800, episode=1510 reward=0.7929321 (503.48 it/sec) -training >> step=9053900, episode=1510 reward=0.7737063 (483.05 it/sec) -training >> step=9054000, episode=1510 reward=0.8000838 (480.28 it/sec) -training >> step=9054100, episode=1510 reward=0.7966704 (487.95 it/sec) -training >> step=9054200, episode=1510 reward=0.7873768 (410.34 it/sec) -training >> step=9054300, episode=1510 reward=0.7758397 (458.13 it/sec) -training >> step=9054400, episode=1510 reward=0.8020805 (461.57 it/sec) -training >> step=9054500, episode=1510 reward=0.8017049 (497.10 it/sec) -training >> step=9054600, episode=1510 reward=0.7915178 (506.08 it/sec) -training >> step=9054700, episode=1510 reward=0.798414 (505.38 it/sec) -training >> step=9054800, episode=1510 reward=0.8022225 (438.98 it/sec) -training >> step=9054900, episode=1510 reward=0.7723368 (469.68 it/sec) -training >> step=9055000, episode=1510 reward=0.7856095 (505.66 it/sec) -training >> step=9055100, episode=1510 reward=0.8025029 (508.10 it/sec) -training >> step=9055200, episode=1510 reward=0.7717947 (537.65 it/sec) -training >> step=9055300, episode=1510 reward=0.8004095 (459.33 it/sec) -training >> step=9055400, episode=1510 reward=0.784574 (452.23 it/sec) -training >> step=9055500, episode=1510 reward=0.8025969 (471.51 it/sec) -training >> step=9055600, episode=1510 reward=0.7874957 (442.96 it/sec) -training >> step=9055700, episode=1510 reward=0.8002455 (445.59 it/sec) -training >> step=9055800, episode=1510 reward=0.778061 (471.62 it/sec) -training >> step=9055900, episode=1510 reward=0.7719765 (439.85 it/sec) -training >> step=9056000, episode=1510 reward=0.7882664 (477.16 it/sec) -training >> step=9056100, episode=1510 reward=0.7990471 (487.07 it/sec) -training >> step=9056200, episode=1510 reward=0.7911322 (432.73 it/sec) -training >> step=9056300, episode=1510 reward=0.7885327 (535.80 it/sec) -training >> step=9056400, episode=1510 reward=0.7901428 (449.05 it/sec) -training >> step=9056500, episode=1510 reward=0.7935331 (468.92 it/sec) -training >> step=9056600, episode=1510 reward=0.7830254 (523.12 it/sec) -training >> step=9056700, episode=1510 reward=0.7844714 (471.80 it/sec) -training >> step=9056800, episode=1510 reward=0.8071885 (494.50 it/sec) -training >> step=9056900, episode=1510 reward=0.8132565 (486.05 it/sec) -training >> step=9057000, episode=1510 reward=0.7821922 (501.50 it/sec) -training >> step=9057100, episode=1510 reward=0.7959162 (461.10 it/sec) -training >> step=9057200, episode=1510 reward=0.7970892 (434.39 it/sec) -training >> step=9057300, episode=1510 reward=0.7937908 (518.26 it/sec) -training >> step=9057400, episode=1510 reward=0.791407 (495.98 it/sec) -training >> step=9057500, episode=1510 reward=0.7948649 (463.24 it/sec) -training >> step=9057600, episode=1510 reward=0.8122396 (412.53 it/sec) -training >> step=9057700, episode=1510 reward=0.7809857 (427.86 it/sec) -training >> step=9057800, episode=1510 reward=0.773138 (409.81 it/sec) -training >> step=9057900, episode=1510 reward=0.810004 (408.38 it/sec) -training >> step=9058000, episode=1510 reward=0.7820837 (456.74 it/sec) -training >> step=9058100, episode=1510 reward=0.8097584 (460.70 it/sec) -training >> step=9058200, episode=1510 reward=0.7930226 (415.15 it/sec) -training >> step=9058300, episode=1510 reward=0.7838134 (480.25 it/sec) -training >> step=9058400, episode=1510 reward=0.7780244 (522.22 it/sec) -training >> step=9058500, episode=1510 reward=0.7901368 (531.85 it/sec) -training >> step=9058600, episode=1510 reward=0.7933043 (490.44 it/sec) -training >> step=9058700, episode=1510 reward=0.7904571 (475.72 it/sec) -training >> step=9058800, episode=1510 reward=0.7800929 (453.68 it/sec) -training >> step=9058900, episode=1510 reward=0.7991927 (494.73 it/sec) -training >> step=9059000, episode=1510 reward=0.8001826 (527.66 it/sec) -training >> step=9059100, episode=1510 reward=0.7980095 (488.01 it/sec) -training >> step=9059200, episode=1510 reward=0.7763225 (544.16 it/sec) -training >> step=9059300, episode=1511 reward=0.7907597 (70.73 it/sec) -training >> step=9059400, episode=1511 reward=0.7921948 (541.34 it/sec) -training >> step=9059500, episode=1511 reward=0.7853941 (494.47 it/sec) -training >> step=9059600, episode=1511 reward=0.7699047 (457.80 it/sec) -training >> step=9059700, episode=1511 reward=0.7739027 (467.24 it/sec) -training >> step=9059800, episode=1511 reward=0.7940434 (514.00 it/sec) -training >> step=9059900, episode=1511 reward=0.7950125 (518.32 it/sec) -training >> step=9060000, episode=1511 reward=0.790641 (495.86 it/sec) -training >> step=9060100, episode=1511 reward=0.8050506 (498.66 it/sec) -training >> step=9060200, episode=1511 reward=0.7932101 (513.74 it/sec) -training >> step=9060300, episode=1511 reward=0.7811339 (468.46 it/sec) -training >> step=9060400, episode=1511 reward=0.7942899 (438.94 it/sec) -training >> step=9060500, episode=1511 reward=0.815051 (416.36 it/sec) -training >> step=9060600, episode=1511 reward=0.8106443 (407.47 it/sec) -training >> step=9060700, episode=1511 reward=0.7940705 (405.58 it/sec) -training >> step=9060800, episode=1511 reward=0.8103139 (494.59 it/sec) -training >> step=9060900, episode=1511 reward=0.8003201 (489.01 it/sec) -training >> step=9061000, episode=1511 reward=0.7942447 (512.10 it/sec) -training >> step=9061100, episode=1511 reward=0.7805966 (542.87 it/sec) -training >> step=9061200, episode=1511 reward=0.7912439 (507.03 it/sec) -training >> step=9061300, episode=1511 reward=0.7894678 (435.24 it/sec) -training >> step=9061400, episode=1511 reward=0.7886972 (458.17 it/sec) -training >> step=9061500, episode=1511 reward=0.7812577 (521.74 it/sec) -training >> step=9061600, episode=1511 reward=0.8104 (469.19 it/sec) -training >> step=9061700, episode=1511 reward=0.8002068 (467.02 it/sec) -training >> step=9061800, episode=1511 reward=0.8095319 (497.88 it/sec) -training >> step=9061900, episode=1511 reward=0.7781624 (466.51 it/sec) -training >> step=9062000, episode=1511 reward=0.8053758 (494.35 it/sec) -training >> step=9062100, episode=1511 reward=0.7985862 (484.15 it/sec) -training >> step=9062200, episode=1511 reward=0.7778478 (493.34 it/sec) -training >> step=9062300, episode=1511 reward=0.8000519 (526.72 it/sec) -training >> step=9062400, episode=1511 reward=0.7723864 (472.32 it/sec) -training >> step=9062500, episode=1511 reward=0.794167 (477.63 it/sec) -training >> step=9062600, episode=1511 reward=0.802718 (461.29 it/sec) -training >> step=9062700, episode=1511 reward=0.7897028 (454.81 it/sec) -training >> step=9062800, episode=1511 reward=0.7856541 (491.37 it/sec) -training >> step=9062900, episode=1511 reward=0.7947144 (497.31 it/sec) -training >> step=9063000, episode=1511 reward=0.7959127 (492.63 it/sec) -training >> step=9063100, episode=1511 reward=0.7923654 (402.31 it/sec) -training >> step=9063200, episode=1511 reward=0.7837039 (509.63 it/sec) -training >> step=9063300, episode=1511 reward=0.7980622 (494.82 it/sec) -training >> step=9063400, episode=1511 reward=0.7660181 (472.17 it/sec) -training >> step=9063500, episode=1511 reward=0.7950176 (493.13 it/sec) -training >> step=9063600, episode=1511 reward=0.7769265 (506.42 it/sec) -training >> step=9063700, episode=1511 reward=0.7910023 (435.09 it/sec) -training >> step=9063800, episode=1511 reward=0.790785 (494.79 it/sec) -training >> step=9063900, episode=1511 reward=0.8043464 (479.66 it/sec) -training >> step=9064000, episode=1511 reward=0.8054404 (513.60 it/sec) -training >> step=9064100, episode=1511 reward=0.7933686 (448.34 it/sec) -training >> step=9064200, episode=1511 reward=0.7833664 (473.15 it/sec) -training >> step=9064300, episode=1511 reward=0.7824557 (495.69 it/sec) -training >> step=9064400, episode=1511 reward=0.7974635 (460.81 it/sec) -training >> step=9064500, episode=1511 reward=0.7676848 (475.86 it/sec) -training >> step=9064600, episode=1511 reward=0.7875838 (460.35 it/sec) -training >> step=9064700, episode=1511 reward=0.8042201 (498.60 it/sec) -training >> step=9064800, episode=1511 reward=0.7692438 (450.28 it/sec) -training >> step=9064900, episode=1511 reward=0.8111888 (469.81 it/sec) -training >> step=9065000, episode=1511 reward=0.7791598 (468.46 it/sec) -training >> step=9065100, episode=1511 reward=0.8068093 (485.06 it/sec) -training >> step=9065200, episode=1511 reward=0.7809237 (485.86 it/sec) -training >> step=9065300, episode=1512 reward=0.7934469 (98.73 it/sec) -training >> step=9065400, episode=1512 reward=0.8082741 (486.19 it/sec) -training >> step=9065500, episode=1512 reward=0.7847824 (434.28 it/sec) -training >> step=9065600, episode=1512 reward=0.7884672 (473.67 it/sec) -training >> step=9065700, episode=1512 reward=0.7908508 (493.77 it/sec) -training >> step=9065800, episode=1512 reward=0.8094491 (448.75 it/sec) -training >> step=9065900, episode=1512 reward=0.7945486 (493.79 it/sec) -training >> step=9066000, episode=1512 reward=0.775314 (486.45 it/sec) -training >> step=9066100, episode=1512 reward=0.7939059 (478.94 it/sec) -training >> step=9066200, episode=1512 reward=0.8051891 (444.92 it/sec) -training >> step=9066300, episode=1512 reward=0.7848176 (465.14 it/sec) -training >> step=9066400, episode=1512 reward=0.7800472 (490.85 it/sec) -training >> step=9066500, episode=1512 reward=0.770834 (473.14 it/sec) -training >> step=9066600, episode=1512 reward=0.8124177 (481.97 it/sec) -training >> step=9066700, episode=1512 reward=0.7489724 (458.07 it/sec) -training >> step=9066800, episode=1512 reward=0.7811016 (427.88 it/sec) -training >> step=9066900, episode=1512 reward=0.7797596 (481.04 it/sec) -training >> step=9067000, episode=1512 reward=0.7804367 (448.66 it/sec) -training >> step=9067100, episode=1512 reward=0.7594729 (450.08 it/sec) -training >> step=9067200, episode=1512 reward=0.7954102 (484.93 it/sec) -training >> step=9067300, episode=1512 reward=0.7721714 (473.66 it/sec) -training >> step=9067400, episode=1512 reward=0.7852952 (506.13 it/sec) -training >> step=9067500, episode=1512 reward=0.8098519 (446.14 it/sec) -training >> step=9067600, episode=1512 reward=0.7940779 (458.32 it/sec) -training >> step=9067700, episode=1512 reward=0.7731217 (463.76 it/sec) -training >> step=9067800, episode=1512 reward=0.7776246 (495.50 it/sec) -training >> step=9067900, episode=1512 reward=0.795315 (460.57 it/sec) -training >> step=9068000, episode=1512 reward=0.7820672 (445.28 it/sec) -training >> step=9068100, episode=1512 reward=0.8077966 (472.91 it/sec) -training >> step=9068200, episode=1512 reward=0.7797796 (482.96 it/sec) -training >> step=9068300, episode=1512 reward=0.8053916 (449.79 it/sec) -training >> step=9068400, episode=1512 reward=0.7817976 (482.77 it/sec) -training >> step=9068500, episode=1512 reward=0.7952309 (478.98 it/sec) -training >> step=9068600, episode=1512 reward=0.7678626 (475.81 it/sec) -training >> step=9068700, episode=1512 reward=0.789796 (494.73 it/sec) -training >> step=9068800, episode=1512 reward=0.8013245 (469.04 it/sec) -training >> step=9068900, episode=1512 reward=0.7858073 (472.68 it/sec) -training >> step=9069000, episode=1512 reward=0.7866566 (436.08 it/sec) -training >> step=9069100, episode=1512 reward=0.80213 (418.80 it/sec) -training >> step=9069200, episode=1512 reward=0.7948358 (493.48 it/sec) -training >> step=9069300, episode=1512 reward=0.7988157 (487.33 it/sec) -training >> step=9069400, episode=1512 reward=0.7956281 (486.91 it/sec) -training >> step=9069500, episode=1512 reward=0.7812765 (476.51 it/sec) -training >> step=9069600, episode=1512 reward=0.7836743 (474.36 it/sec) -training >> step=9069700, episode=1512 reward=0.7770389 (496.77 it/sec) -training >> step=9069800, episode=1512 reward=0.7964014 (492.46 it/sec) -training >> step=9069900, episode=1512 reward=0.7965578 (493.69 it/sec) -training >> step=9070000, episode=1512 reward=0.8050742 (497.16 it/sec) -training >> step=9070100, episode=1512 reward=0.7851372 (492.90 it/sec) -training >> step=9070200, episode=1512 reward=0.7780218 (501.32 it/sec) -training >> step=9070300, episode=1512 reward=0.7905527 (546.69 it/sec) -training >> step=9070400, episode=1512 reward=0.78821 (514.16 it/sec) -training >> step=9070500, episode=1512 reward=0.8095377 (501.49 it/sec) -training >> step=9070600, episode=1512 reward=0.7831677 (484.76 it/sec) -training >> step=9070700, episode=1512 reward=0.776989 (547.79 it/sec) -training >> step=9070800, episode=1512 reward=0.7958879 (507.75 it/sec) -training >> step=9070900, episode=1512 reward=0.7906168 (510.78 it/sec) -training >> step=9071000, episode=1512 reward=0.7876608 (477.07 it/sec) -training >> step=9071100, episode=1512 reward=0.7792491 (498.13 it/sec) -training >> step=9071200, episode=1512 reward=0.8032089 (519.55 it/sec) -training >> step=9071300, episode=1513 reward=0.7855263 (101.93 it/sec) -training >> step=9071400, episode=1513 reward=0.7927778 (543.93 it/sec) -training >> step=9071500, episode=1513 reward=0.7856884 (492.77 it/sec) -training >> step=9071600, episode=1513 reward=0.8088241 (493.58 it/sec) -training >> step=9071700, episode=1513 reward=0.7805901 (515.55 it/sec) -training >> step=9071800, episode=1513 reward=0.7863737 (505.72 it/sec) -training >> step=9071900, episode=1513 reward=0.7952083 (508.12 it/sec) -training >> step=9072000, episode=1513 reward=0.7943786 (482.76 it/sec) -training >> step=9072100, episode=1513 reward=0.7894038 (506.57 it/sec) -training >> step=9072200, episode=1513 reward=0.7964931 (522.88 it/sec) -training >> step=9072300, episode=1513 reward=0.7909758 (509.51 it/sec) -training >> step=9072400, episode=1513 reward=0.7857911 (534.49 it/sec) -training >> step=9072500, episode=1513 reward=0.7961827 (522.21 it/sec) -training >> step=9072600, episode=1513 reward=0.7797177 (525.76 it/sec) -training >> step=9072700, episode=1513 reward=0.787122 (540.63 it/sec) -training >> step=9072800, episode=1513 reward=0.7920329 (503.15 it/sec) -training >> step=9072900, episode=1513 reward=0.7944015 (519.92 it/sec) -training >> step=9073000, episode=1513 reward=0.8005721 (525.95 it/sec) -training >> step=9073100, episode=1513 reward=0.7899808 (509.16 it/sec) -training >> step=9073200, episode=1513 reward=0.7852383 (515.00 it/sec) -training >> step=9073300, episode=1513 reward=0.7925864 (472.47 it/sec) -training >> step=9073400, episode=1513 reward=0.7803242 (546.40 it/sec) -training >> step=9073500, episode=1513 reward=0.7910837 (517.18 it/sec) -training >> step=9073600, episode=1513 reward=0.8001589 (530.86 it/sec) -training >> step=9073700, episode=1513 reward=0.793929 (486.91 it/sec) -training >> step=9073800, episode=1513 reward=0.8012537 (541.81 it/sec) -training >> step=9073900, episode=1513 reward=0.7785243 (490.03 it/sec) -training >> step=9074000, episode=1513 reward=0.7820939 (470.27 it/sec) -training >> step=9074100, episode=1513 reward=0.7979344 (424.60 it/sec) -training >> step=9074200, episode=1513 reward=0.7849553 (419.91 it/sec) -training >> step=9074300, episode=1513 reward=0.7933045 (414.25 it/sec) -training >> step=9074400, episode=1513 reward=0.7932893 (401.91 it/sec) -training >> step=9074500, episode=1513 reward=0.8025445 (501.73 it/sec) -training >> step=9074600, episode=1513 reward=0.7805552 (522.39 it/sec) -training >> step=9074700, episode=1513 reward=0.7799524 (463.76 it/sec) -training >> step=9074800, episode=1513 reward=0.8049825 (534.82 it/sec) -training >> step=9074900, episode=1513 reward=0.7965972 (528.24 it/sec) -training >> step=9075000, episode=1513 reward=0.7905586 (485.88 it/sec) -training >> step=9075100, episode=1513 reward=0.7862948 (449.33 it/sec) -training >> step=9075200, episode=1513 reward=0.7667646 (458.65 it/sec) -training >> step=9075300, episode=1513 reward=0.7913394 (485.69 it/sec) -training >> step=9075400, episode=1513 reward=0.7909245 (532.89 it/sec) -training >> step=9075500, episode=1513 reward=0.7941214 (526.19 it/sec) -training >> step=9075600, episode=1513 reward=0.7830167 (529.81 it/sec) -training >> step=9075700, episode=1513 reward=0.8051178 (502.60 it/sec) -training >> step=9075800, episode=1513 reward=0.8009005 (510.90 it/sec) -training >> step=9075900, episode=1513 reward=0.79245 (530.11 it/sec) -training >> step=9076000, episode=1513 reward=0.7750925 (537.80 it/sec) -training >> step=9076100, episode=1513 reward=0.7803894 (517.29 it/sec) -training >> step=9076200, episode=1513 reward=0.7944192 (505.55 it/sec) -training >> step=9076300, episode=1513 reward=0.7871106 (536.27 it/sec) -training >> step=9076400, episode=1513 reward=0.7890897 (477.71 it/sec) -training >> step=9076500, episode=1513 reward=0.791266 (516.57 it/sec) -training >> step=9076600, episode=1513 reward=0.8023579 (533.48 it/sec) -training >> step=9076700, episode=1513 reward=0.7898777 (565.17 it/sec) -training >> step=9076800, episode=1513 reward=0.7989259 (516.42 it/sec) -training >> step=9076900, episode=1513 reward=0.7918872 (501.38 it/sec) -training >> step=9077000, episode=1513 reward=0.7860045 (533.71 it/sec) -training >> step=9077100, episode=1513 reward=0.7803329 (522.38 it/sec) -training >> step=9077200, episode=1513 reward=0.7797915 (514.27 it/sec) -training >> step=9077300, episode=1514 reward=0.7813727 (120.70 it/sec) -training >> step=9077400, episode=1514 reward=0.778239 (535.43 it/sec) -training >> step=9077500, episode=1514 reward=0.7808464 (511.41 it/sec) -training >> step=9077600, episode=1514 reward=0.7772104 (512.71 it/sec) -training >> step=9077700, episode=1514 reward=0.7829267 (506.69 it/sec) -training >> step=9077800, episode=1514 reward=0.80358 (544.83 it/sec) -training >> step=9077900, episode=1514 reward=0.7987437 (496.46 it/sec) -training >> step=9078000, episode=1514 reward=0.7893322 (498.68 it/sec) -training >> step=9078100, episode=1514 reward=0.7989573 (529.43 it/sec) -training >> step=9078200, episode=1514 reward=0.8069289 (515.86 it/sec) -training >> step=9078300, episode=1514 reward=0.7767244 (483.86 it/sec) -training >> step=9078400, episode=1514 reward=0.7943968 (534.37 it/sec) -training >> step=9078500, episode=1514 reward=0.7852835 (539.61 it/sec) -training >> step=9078600, episode=1514 reward=0.7939521 (472.60 it/sec) -training >> step=9078700, episode=1514 reward=0.7769318 (513.22 it/sec) -training >> step=9078800, episode=1514 reward=0.8031004 (525.39 it/sec) -training >> step=9078900, episode=1514 reward=0.7839364 (526.05 it/sec) -training >> step=9079000, episode=1514 reward=0.7673616 (512.50 it/sec) -training >> step=9079100, episode=1514 reward=0.7971733 (527.77 it/sec) -training >> step=9079200, episode=1514 reward=0.7647749 (539.36 it/sec) -training >> step=9079300, episode=1514 reward=0.7812379 (497.00 it/sec) -training >> step=9079400, episode=1514 reward=0.7893345 (513.16 it/sec) -training >> step=9079500, episode=1514 reward=0.7975851 (534.69 it/sec) -training >> step=9079600, episode=1514 reward=0.8032197 (531.51 it/sec) -training >> step=9079700, episode=1514 reward=0.7952501 (513.07 it/sec) -training >> step=9079800, episode=1514 reward=0.80185 (501.58 it/sec) -training >> step=9079900, episode=1514 reward=0.7960725 (534.05 it/sec) -training >> step=9080000, episode=1514 reward=0.789197 (443.82 it/sec) -training >> step=9080100, episode=1514 reward=0.7716821 (490.15 it/sec) -training >> step=9080200, episode=1514 reward=0.8018473 (516.15 it/sec) -training >> step=9080300, episode=1514 reward=0.8034316 (536.78 it/sec) -training >> step=9080400, episode=1514 reward=0.8030286 (533.96 it/sec) -training >> step=9080500, episode=1514 reward=0.8130515 (486.78 it/sec) -training >> step=9080600, episode=1514 reward=0.7987358 (529.86 it/sec) -training >> step=9080700, episode=1514 reward=0.8087482 (494.52 it/sec) -training >> step=9080800, episode=1514 reward=0.7909321 (429.33 it/sec) -training >> step=9080900, episode=1514 reward=0.7794338 (477.15 it/sec) -training >> step=9081000, episode=1514 reward=0.7914319 (549.06 it/sec) -training >> step=9081100, episode=1514 reward=0.8008219 (478.39 it/sec) -training >> step=9081200, episode=1514 reward=0.8104662 (487.70 it/sec) -training >> step=9081300, episode=1514 reward=0.7825969 (503.53 it/sec) -training >> step=9081400, episode=1514 reward=0.7978546 (542.25 it/sec) -training >> step=9081500, episode=1514 reward=0.7898161 (551.18 it/sec) -training >> step=9081600, episode=1514 reward=0.8041328 (529.79 it/sec) -training >> step=9081700, episode=1514 reward=0.7712071 (516.84 it/sec) -training >> step=9081800, episode=1514 reward=0.7763448 (536.23 it/sec) -training >> step=9081900, episode=1514 reward=0.7946576 (513.90 it/sec) -training >> step=9082000, episode=1514 reward=0.7770824 (520.93 it/sec) -training >> step=9082100, episode=1514 reward=0.8013805 (528.54 it/sec) -training >> step=9082200, episode=1514 reward=0.8113909 (520.66 it/sec) -training >> step=9082300, episode=1514 reward=0.7870104 (519.33 it/sec) -training >> step=9082400, episode=1514 reward=0.7742258 (535.33 it/sec) -training >> step=9082500, episode=1514 reward=0.7943185 (567.63 it/sec) -training >> step=9082600, episode=1514 reward=0.7783858 (527.58 it/sec) -training >> step=9082700, episode=1514 reward=0.795677 (526.27 it/sec) -training >> step=9082800, episode=1514 reward=0.7714416 (523.22 it/sec) -training >> step=9082900, episode=1514 reward=0.7880104 (500.26 it/sec) -training >> step=9083000, episode=1514 reward=0.7883164 (509.87 it/sec) -training >> step=9083100, episode=1514 reward=0.7731166 (522.58 it/sec) -training >> step=9083200, episode=1514 reward=0.8037184 (564.74 it/sec) -training >> step=9083300, episode=1515 reward=0.7915214 (95.75 it/sec) -training >> step=9083400, episode=1515 reward=0.7891386 (519.18 it/sec) -training >> step=9083500, episode=1515 reward=0.7849995 (538.93 it/sec) -training >> step=9083600, episode=1515 reward=0.7916995 (506.36 it/sec) -training >> step=9083700, episode=1515 reward=0.7840367 (525.52 it/sec) -training >> step=9083800, episode=1515 reward=0.7948304 (545.02 it/sec) -training >> step=9083900, episode=1515 reward=0.7813051 (511.59 it/sec) -training >> step=9084000, episode=1515 reward=0.7966163 (497.84 it/sec) -training >> step=9084100, episode=1515 reward=0.779851 (506.69 it/sec) -training >> step=9084200, episode=1515 reward=0.7836088 (552.37 it/sec) -training >> step=9084300, episode=1515 reward=0.79562 (525.12 it/sec) -training >> step=9084400, episode=1515 reward=0.7957819 (506.04 it/sec) -training >> step=9084500, episode=1515 reward=0.7759863 (548.60 it/sec) -training >> step=9084600, episode=1515 reward=0.801555 (515.69 it/sec) -training >> step=9084700, episode=1515 reward=0.7936226 (474.49 it/sec) -training >> step=9084800, episode=1515 reward=0.7849653 (489.09 it/sec) -training >> step=9084900, episode=1515 reward=0.7829189 (540.96 it/sec) -training >> step=9085000, episode=1515 reward=0.7731807 (484.81 it/sec) -training >> step=9085100, episode=1515 reward=0.7826167 (507.28 it/sec) -training >> step=9085200, episode=1515 reward=0.8071659 (476.70 it/sec) -training >> step=9085300, episode=1515 reward=0.7789444 (512.27 it/sec) -training >> step=9085400, episode=1515 reward=0.8014591 (533.98 it/sec) -training >> step=9085500, episode=1515 reward=0.8049501 (514.57 it/sec) -training >> step=9085600, episode=1515 reward=0.7993352 (537.44 it/sec) -training >> step=9085700, episode=1515 reward=0.7894888 (492.91 it/sec) -training >> step=9085800, episode=1515 reward=0.8107664 (508.28 it/sec) -training >> step=9085900, episode=1515 reward=0.7824671 (531.84 it/sec) -training >> step=9086000, episode=1515 reward=0.7985795 (534.92 it/sec) -training >> step=9086100, episode=1515 reward=0.7920905 (498.04 it/sec) -training >> step=9086200, episode=1515 reward=0.7640686 (528.27 it/sec) -training >> step=9086300, episode=1515 reward=0.7930083 (488.36 it/sec) -training >> step=9086400, episode=1515 reward=0.7714862 (544.33 it/sec) -training >> step=9086500, episode=1515 reward=0.7731915 (510.52 it/sec) -training >> step=9086600, episode=1515 reward=0.7848695 (549.39 it/sec) -training >> step=9086700, episode=1515 reward=0.8013293 (524.91 it/sec) -training >> step=9086800, episode=1515 reward=0.8057128 (516.71 it/sec) -training >> step=9086900, episode=1515 reward=0.7937689 (517.09 it/sec) -training >> step=9087000, episode=1515 reward=0.789905 (539.30 it/sec) -training >> step=9087100, episode=1515 reward=0.7837198 (521.57 it/sec) -training >> step=9087200, episode=1515 reward=0.7958468 (499.83 it/sec) -training >> step=9087300, episode=1515 reward=0.7991155 (523.45 it/sec) -training >> step=9087400, episode=1515 reward=0.7978442 (495.78 it/sec) -training >> step=9087500, episode=1515 reward=0.8005757 (530.14 it/sec) -training >> step=9087600, episode=1515 reward=0.7956325 (512.01 it/sec) -training >> step=9087700, episode=1515 reward=0.7994742 (501.58 it/sec) -training >> step=9087800, episode=1515 reward=0.7903763 (462.85 it/sec) -training >> step=9087900, episode=1515 reward=0.7793002 (511.07 it/sec) -training >> step=9088000, episode=1515 reward=0.7911893 (540.92 it/sec) -training >> step=9088100, episode=1515 reward=0.7863644 (541.42 it/sec) -training >> step=9088200, episode=1515 reward=0.7782663 (512.66 it/sec) -training >> step=9088300, episode=1515 reward=0.7893043 (519.96 it/sec) -training >> step=9088400, episode=1515 reward=0.7709566 (509.96 it/sec) -training >> step=9088500, episode=1515 reward=0.7811903 (518.36 it/sec) -training >> step=9088600, episode=1515 reward=0.7690562 (475.36 it/sec) -training >> step=9088700, episode=1515 reward=0.7936954 (456.82 it/sec) -training >> step=9088800, episode=1515 reward=0.8041137 (407.77 it/sec) -training >> step=9088900, episode=1515 reward=0.7977154 (412.18 it/sec) -training >> step=9089000, episode=1515 reward=0.7746795 (485.01 it/sec) -training >> step=9089100, episode=1515 reward=0.7912167 (515.72 it/sec) -training >> step=9089200, episode=1515 reward=0.7950217 (491.60 it/sec) -training >> step=9089300, episode=1516 reward=0.7678428 (115.00 it/sec) -training >> step=9089400, episode=1516 reward=0.790541 (498.35 it/sec) -training >> step=9089500, episode=1516 reward=0.8046395 (463.58 it/sec) -training >> step=9089600, episode=1516 reward=0.7903789 (516.93 it/sec) -training >> step=9089700, episode=1516 reward=0.8020024 (560.30 it/sec) -training >> step=9089800, episode=1516 reward=0.7782661 (498.25 it/sec) -training >> step=9089900, episode=1516 reward=0.8044143 (474.34 it/sec) -training >> step=9090000, episode=1516 reward=0.775589 (555.61 it/sec) -training >> step=9090100, episode=1516 reward=0.7771413 (513.54 it/sec) -training >> step=9090200, episode=1516 reward=0.7932101 (548.66 it/sec) -training >> step=9090300, episode=1516 reward=0.7925045 (528.78 it/sec) -training >> step=9090400, episode=1516 reward=0.7869843 (542.75 it/sec) -training >> step=9090500, episode=1516 reward=0.8102376 (488.06 it/sec) -training >> step=9090600, episode=1516 reward=0.7915499 (498.06 it/sec) -training >> step=9090700, episode=1516 reward=0.7844319 (532.83 it/sec) -training >> step=9090800, episode=1516 reward=0.8069025 (552.15 it/sec) -training >> step=9090900, episode=1516 reward=0.802935 (518.59 it/sec) -training >> step=9091000, episode=1516 reward=0.7965804 (503.56 it/sec) -training >> step=9091100, episode=1516 reward=0.7850615 (535.65 it/sec) -training >> step=9091200, episode=1516 reward=0.7871301 (501.93 it/sec) -training >> step=9091300, episode=1516 reward=0.7979482 (564.53 it/sec) -training >> step=9091400, episode=1516 reward=0.7852315 (506.46 it/sec) -training >> step=9091500, episode=1516 reward=0.782816 (518.53 it/sec) -training >> step=9091600, episode=1516 reward=0.7896723 (487.64 it/sec) -training >> step=9091700, episode=1516 reward=0.8015516 (515.84 it/sec) -training >> step=9091800, episode=1516 reward=0.8018557 (538.93 it/sec) -training >> step=9091900, episode=1516 reward=0.7929964 (544.52 it/sec) -training >> step=9092000, episode=1516 reward=0.8215253 (519.52 it/sec) -training >> step=9092100, episode=1516 reward=0.7846178 (483.83 it/sec) -training >> step=9092200, episode=1516 reward=0.7815623 (541.83 it/sec) -training >> step=9092300, episode=1516 reward=0.7793012 (529.18 it/sec) -training >> step=9092400, episode=1516 reward=0.7791923 (500.68 it/sec) -training >> step=9092500, episode=1516 reward=0.7785446 (543.01 it/sec) -training >> step=9092600, episode=1516 reward=0.8137069 (503.20 it/sec) -training >> step=9092700, episode=1516 reward=0.803856 (504.83 it/sec) -training >> step=9092800, episode=1516 reward=0.8127658 (460.78 it/sec) -training >> step=9092900, episode=1516 reward=0.8022203 (494.39 it/sec) -training >> step=9093000, episode=1516 reward=0.7892132 (518.89 it/sec) -training >> step=9093100, episode=1516 reward=0.7829432 (465.38 it/sec) -training >> step=9093200, episode=1516 reward=0.7895204 (448.23 it/sec) -training >> step=9093300, episode=1516 reward=0.7917964 (504.66 it/sec) -training >> step=9093400, episode=1516 reward=0.7907888 (513.81 it/sec) -training >> step=9093500, episode=1516 reward=0.7841601 (521.68 it/sec) -training >> step=9093600, episode=1516 reward=0.7903078 (485.72 it/sec) -training >> step=9093700, episode=1516 reward=0.7939874 (570.93 it/sec) -training >> step=9093800, episode=1516 reward=0.7824572 (516.59 it/sec) -training >> step=9093900, episode=1516 reward=0.7957108 (524.59 it/sec) -training >> step=9094000, episode=1516 reward=0.7927253 (536.81 it/sec) -training >> step=9094100, episode=1516 reward=0.8035142 (510.42 it/sec) -training >> step=9094200, episode=1516 reward=0.8055848 (502.79 it/sec) -training >> step=9094300, episode=1516 reward=0.7789153 (488.95 it/sec) -training >> step=9094400, episode=1516 reward=0.7803901 (557.84 it/sec) -training >> step=9094500, episode=1516 reward=0.7995471 (507.03 it/sec) -training >> step=9094600, episode=1516 reward=0.7760442 (502.49 it/sec) -training >> step=9094700, episode=1516 reward=0.7763739 (452.16 it/sec) -training >> step=9094800, episode=1516 reward=0.7787306 (539.15 it/sec) -training >> step=9094900, episode=1516 reward=0.7878187 (538.90 it/sec) -training >> step=9095000, episode=1516 reward=0.8046594 (529.98 it/sec) -training >> step=9095100, episode=1516 reward=0.7792962 (564.45 it/sec) -training >> step=9095200, episode=1516 reward=0.7974945 (513.11 it/sec) -training >> step=9095300, episode=1517 reward=0.7972614 (111.77 it/sec) -training >> step=9095400, episode=1517 reward=0.7724405 (514.11 it/sec) -training >> step=9095500, episode=1517 reward=0.8132425 (432.53 it/sec) -training >> step=9095600, episode=1517 reward=0.7749779 (535.94 it/sec) -training >> step=9095700, episode=1517 reward=0.7945504 (534.74 it/sec) -training >> step=9095800, episode=1517 reward=0.7841164 (545.69 it/sec) -training >> step=9095900, episode=1517 reward=0.7784788 (505.60 it/sec) -training >> step=9096000, episode=1517 reward=0.7829022 (534.42 it/sec) -training >> step=9096100, episode=1517 reward=0.7945911 (514.54 it/sec) -training >> step=9096200, episode=1517 reward=0.7853112 (555.84 it/sec) -training >> step=9096300, episode=1517 reward=0.8048915 (502.18 it/sec) -training >> step=9096400, episode=1517 reward=0.7834528 (520.61 it/sec) -training >> step=9096500, episode=1517 reward=0.7888095 (512.20 it/sec) -training >> step=9096600, episode=1517 reward=0.7987672 (532.98 it/sec) -training >> step=9096700, episode=1517 reward=0.8039998 (559.79 it/sec) -training >> step=9096800, episode=1517 reward=0.805158 (527.15 it/sec) -training >> step=9096900, episode=1517 reward=0.7878411 (554.38 it/sec) -training >> step=9097000, episode=1517 reward=0.7774149 (504.74 it/sec) -training >> step=9097100, episode=1517 reward=0.7897929 (524.60 it/sec) -training >> step=9097200, episode=1517 reward=0.7778608 (509.82 it/sec) -training >> step=9097300, episode=1517 reward=0.7993516 (566.82 it/sec) -training >> step=9097400, episode=1517 reward=0.8026289 (510.52 it/sec) -training >> step=9097500, episode=1517 reward=0.7718752 (471.25 it/sec) -training >> step=9097600, episode=1517 reward=0.7927864 (560.60 it/sec) -training >> step=9097700, episode=1517 reward=0.8126563 (509.92 it/sec) -training >> step=9097800, episode=1517 reward=0.7779522 (535.74 it/sec) -training >> step=9097900, episode=1517 reward=0.774894 (525.14 it/sec) -training >> step=9098000, episode=1517 reward=0.7885326 (522.43 it/sec) -training >> step=9098100, episode=1517 reward=0.7863335 (486.42 it/sec) -training >> step=9098200, episode=1517 reward=0.805742 (528.73 it/sec) -training >> step=9098300, episode=1517 reward=0.7720261 (522.60 it/sec) -training >> step=9098400, episode=1517 reward=0.7905735 (504.44 it/sec) -training >> step=9098500, episode=1517 reward=0.7845286 (519.82 it/sec) -training >> step=9098600, episode=1517 reward=0.7729198 (519.96 it/sec) -training >> step=9098700, episode=1517 reward=0.7880514 (527.10 it/sec) -training >> step=9098800, episode=1517 reward=0.7856041 (524.31 it/sec) -training >> step=9098900, episode=1517 reward=0.7827269 (531.23 it/sec) -training >> step=9099000, episode=1517 reward=0.7715691 (519.53 it/sec) -training >> step=9099100, episode=1517 reward=0.8071986 (512.94 it/sec) -training >> step=9099200, episode=1517 reward=0.782935 (523.85 it/sec) -training >> step=9099300, episode=1517 reward=0.7748109 (502.74 it/sec) -training >> step=9099400, episode=1517 reward=0.769589 (542.98 it/sec) -training >> step=9099500, episode=1517 reward=0.7815005 (468.19 it/sec) -training >> step=9099600, episode=1517 reward=0.803558 (486.92 it/sec) -training >> step=9099700, episode=1517 reward=0.805004 (530.92 it/sec) -training >> step=9099800, episode=1517 reward=0.7851712 (503.67 it/sec) -training >> step=9099900, episode=1517 reward=0.7940142 (490.08 it/sec) -training >> step=9100000, episode=1517 reward=0.7959368 (514.19 it/sec) -training >> step=9100100, episode=1517 reward=0.8022445 (561.33 it/sec) -training >> step=9100200, episode=1517 reward=0.8032277 (506.35 it/sec) -training >> step=9100300, episode=1517 reward=0.7790714 (494.42 it/sec) -training >> step=9100400, episode=1517 reward=0.7638697 (537.05 it/sec) -training >> step=9100500, episode=1517 reward=0.7787336 (507.59 it/sec) -training >> step=9100600, episode=1517 reward=0.7966774 (509.84 it/sec) -training >> step=9100700, episode=1517 reward=0.7940079 (499.85 it/sec) -training >> step=9100800, episode=1517 reward=0.7830957 (582.66 it/sec) -training >> step=9100900, episode=1517 reward=0.793152 (532.18 it/sec) -training >> step=9101000, episode=1517 reward=0.8031168 (519.40 it/sec) -training >> step=9101100, episode=1517 reward=0.7782074 (529.84 it/sec) -training >> step=9101200, episode=1517 reward=0.8012645 (513.49 it/sec) -training >> step=9101300, episode=1518 reward=0.7833977 (113.18 it/sec) -training >> step=9101400, episode=1518 reward=0.7723556 (499.96 it/sec) -training >> step=9101500, episode=1518 reward=0.7944578 (552.16 it/sec) -training >> step=9101600, episode=1518 reward=0.8087965 (508.45 it/sec) -training >> step=9101700, episode=1518 reward=0.7991347 (501.53 it/sec) -training >> step=9101800, episode=1518 reward=0.7819479 (534.62 it/sec) -training >> step=9101900, episode=1518 reward=0.7880471 (544.38 it/sec) -training >> step=9102000, episode=1518 reward=0.8116335 (507.22 it/sec) -training >> step=9102100, episode=1518 reward=0.8116521 (503.64 it/sec) -training >> step=9102200, episode=1518 reward=0.7835435 (535.94 it/sec) -training >> step=9102300, episode=1518 reward=0.7807029 (491.58 it/sec) -training >> step=9102400, episode=1518 reward=0.7808878 (544.64 it/sec) -training >> step=9102500, episode=1518 reward=0.7877226 (497.16 it/sec) -training >> step=9102600, episode=1518 reward=0.8016203 (559.45 it/sec) -training >> step=9102700, episode=1518 reward=0.7832687 (498.85 it/sec) -training >> step=9102800, episode=1518 reward=0.8134906 (475.16 it/sec) -training >> step=9102900, episode=1518 reward=0.8140734 (520.86 it/sec) -training >> step=9103000, episode=1518 reward=0.7970527 (449.80 it/sec) -training >> step=9103100, episode=1518 reward=0.7744358 (418.80 it/sec) -training >> step=9103200, episode=1518 reward=0.7889103 (407.37 it/sec) -training >> step=9103300, episode=1518 reward=0.8014148 (430.11 it/sec) -training >> step=9103400, episode=1518 reward=0.8070656 (481.46 it/sec) -training >> step=9103500, episode=1518 reward=0.7921045 (526.58 it/sec) -training >> step=9103600, episode=1518 reward=0.7979555 (513.96 it/sec) -training >> step=9103700, episode=1518 reward=0.7790189 (543.87 it/sec) -training >> step=9103800, episode=1518 reward=0.7818712 (528.22 it/sec) -training >> step=9103900, episode=1518 reward=0.7883682 (523.39 it/sec) -training >> step=9104000, episode=1518 reward=0.794717 (494.06 it/sec) -training >> step=9104100, episode=1518 reward=0.758857 (529.36 it/sec) -training >> step=9104200, episode=1518 reward=0.7817776 (502.91 it/sec) -training >> step=9104300, episode=1518 reward=0.7918876 (479.12 it/sec) -training >> step=9104400, episode=1518 reward=0.7987519 (530.25 it/sec) -training >> step=9104500, episode=1518 reward=0.7898949 (463.57 it/sec) -training >> step=9104600, episode=1518 reward=0.7963482 (492.83 it/sec) -training >> step=9104700, episode=1518 reward=0.7971985 (464.70 it/sec) -training >> step=9104800, episode=1518 reward=0.7901225 (484.13 it/sec) -training >> step=9104900, episode=1518 reward=0.7929631 (445.15 it/sec) -training >> step=9105000, episode=1518 reward=0.8002471 (429.50 it/sec) -training >> step=9105100, episode=1518 reward=0.7928124 (531.89 it/sec) -training >> step=9105200, episode=1518 reward=0.8021227 (482.54 it/sec) -training >> step=9105300, episode=1518 reward=0.8049141 (515.98 it/sec) -training >> step=9105400, episode=1518 reward=0.7774906 (552.75 it/sec) -training >> step=9105500, episode=1518 reward=0.7979624 (485.72 it/sec) -training >> step=9105600, episode=1518 reward=0.8021303 (514.69 it/sec) -training >> step=9105700, episode=1518 reward=0.7842437 (522.07 it/sec) -training >> step=9105800, episode=1518 reward=0.7850083 (512.02 it/sec) -training >> step=9105900, episode=1518 reward=0.7825488 (509.37 it/sec) -training >> step=9106000, episode=1518 reward=0.794751 (475.86 it/sec) -training >> step=9106100, episode=1518 reward=0.7801501 (535.42 it/sec) -training >> step=9106200, episode=1518 reward=0.7784763 (523.02 it/sec) -training >> step=9106300, episode=1518 reward=0.7893771 (527.03 it/sec) -training >> step=9106400, episode=1518 reward=0.7863035 (530.90 it/sec) -training >> step=9106500, episode=1518 reward=0.7985159 (528.26 it/sec) -training >> step=9106600, episode=1518 reward=0.78496 (517.33 it/sec) -training >> step=9106700, episode=1518 reward=0.769904 (536.20 it/sec) -training >> step=9106800, episode=1518 reward=0.7931896 (515.09 it/sec) -training >> step=9106900, episode=1518 reward=0.7977431 (556.08 it/sec) -training >> step=9107000, episode=1518 reward=0.8071085 (522.64 it/sec) -training >> step=9107100, episode=1518 reward=0.8006263 (512.92 it/sec) -training >> step=9107200, episode=1518 reward=0.798537 (532.70 it/sec) -training >> step=9107300, episode=1519 reward=0.7754671 (111.25 it/sec) -training >> step=9107400, episode=1519 reward=0.7825767 (507.10 it/sec) -training >> step=9107500, episode=1519 reward=0.7805901 (498.31 it/sec) -training >> step=9107600, episode=1519 reward=0.7953017 (489.05 it/sec) -training >> step=9107700, episode=1519 reward=0.8034099 (518.68 it/sec) -training >> step=9107800, episode=1519 reward=0.800768 (519.53 it/sec) -training >> step=9107900, episode=1519 reward=0.7961943 (544.69 it/sec) -training >> step=9108000, episode=1519 reward=0.7775497 (511.05 it/sec) -training >> step=9108100, episode=1519 reward=0.7758281 (507.37 it/sec) -training >> step=9108200, episode=1519 reward=0.7987659 (526.78 it/sec) -training >> step=9108300, episode=1519 reward=0.7984489 (432.31 it/sec) -training >> step=9108400, episode=1519 reward=0.806786 (526.26 it/sec) -training >> step=9108500, episode=1519 reward=0.788579 (507.30 it/sec) -training >> step=9108600, episode=1519 reward=0.7962524 (550.91 it/sec) -training >> step=9108700, episode=1519 reward=0.7920561 (531.20 it/sec) -training >> step=9108800, episode=1519 reward=0.7868518 (492.57 it/sec) -training >> step=9108900, episode=1519 reward=0.8171453 (510.13 it/sec) -training >> step=9109000, episode=1519 reward=0.7960138 (469.51 it/sec) -training >> step=9109100, episode=1519 reward=0.7849598 (495.13 it/sec) -training >> step=9109200, episode=1519 reward=0.8034251 (490.44 it/sec) -training >> step=9109300, episode=1519 reward=0.7875994 (559.22 it/sec) -training >> step=9109400, episode=1519 reward=0.7747543 (485.35 it/sec) -training >> step=9109500, episode=1519 reward=0.8054733 (528.06 it/sec) -training >> step=9109600, episode=1519 reward=0.8282371 (507.82 it/sec) -training >> step=9109700, episode=1519 reward=0.7757999 (523.53 it/sec) -training >> step=9109800, episode=1519 reward=0.7895337 (549.00 it/sec) -training >> step=9109900, episode=1519 reward=0.7913886 (457.93 it/sec) -training >> step=9110000, episode=1519 reward=0.7891479 (545.38 it/sec) -training >> step=9110100, episode=1519 reward=0.7869056 (489.96 it/sec) -training >> step=9110200, episode=1519 reward=0.804468 (540.36 it/sec) -training >> step=9110300, episode=1519 reward=0.7907474 (542.25 it/sec) -training >> step=9110400, episode=1519 reward=0.7639956 (528.02 it/sec) -training >> step=9110500, episode=1519 reward=0.7818761 (514.96 it/sec) -training >> step=9110600, episode=1519 reward=0.7882451 (504.99 it/sec) -training >> step=9110700, episode=1519 reward=0.8011156 (521.04 it/sec) -training >> step=9110800, episode=1519 reward=0.7939675 (508.75 it/sec) -training >> step=9110900, episode=1519 reward=0.7945865 (516.11 it/sec) -training >> step=9111000, episode=1519 reward=0.7879536 (505.07 it/sec) -training >> step=9111100, episode=1519 reward=0.7875533 (544.93 it/sec) -training >> step=9111200, episode=1519 reward=0.7748383 (507.85 it/sec) -training >> step=9111300, episode=1519 reward=0.7885668 (519.02 it/sec) -training >> step=9111400, episode=1519 reward=0.7773696 (528.73 it/sec) -training >> step=9111500, episode=1519 reward=0.795853 (481.84 it/sec) -training >> step=9111600, episode=1519 reward=0.7717963 (543.40 it/sec) -training >> step=9111700, episode=1519 reward=0.8029785 (495.49 it/sec) -training >> step=9111800, episode=1519 reward=0.7918655 (552.95 it/sec) -training >> step=9111900, episode=1519 reward=0.7847961 (527.32 it/sec) -training >> step=9112000, episode=1519 reward=0.7902758 (476.65 it/sec) -training >> step=9112100, episode=1519 reward=0.7846828 (536.65 it/sec) -training >> step=9112200, episode=1519 reward=0.7990392 (496.83 it/sec) -training >> step=9112300, episode=1519 reward=0.7902896 (518.62 it/sec) -training >> step=9112400, episode=1519 reward=0.8132292 (534.06 it/sec) -training >> step=9112500, episode=1519 reward=0.7898334 (519.08 it/sec) -training >> step=9112600, episode=1519 reward=0.7787187 (484.45 it/sec) -training >> step=9112700, episode=1519 reward=0.7888972 (524.72 it/sec) -training >> step=9112800, episode=1519 reward=0.787541 (516.29 it/sec) -training >> step=9112900, episode=1519 reward=0.7934279 (554.96 it/sec) -training >> step=9113000, episode=1519 reward=0.782576 (490.35 it/sec) -training >> step=9113100, episode=1519 reward=0.7951113 (463.43 it/sec) -training >> step=9113200, episode=1519 reward=0.7991141 (560.82 it/sec) -training >> step=9113300, episode=1520 reward=0.7837834 (99.20 it/sec) -training >> step=9113400, episode=1520 reward=0.7875069 (512.73 it/sec) -training >> step=9113500, episode=1520 reward=0.7866871 (509.51 it/sec) -training >> step=9113600, episode=1520 reward=0.7920527 (541.95 it/sec) -training >> step=9113700, episode=1520 reward=0.7966546 (472.44 it/sec) -training >> step=9113800, episode=1520 reward=0.784771 (458.36 it/sec) -training >> step=9113900, episode=1520 reward=0.7871965 (508.44 it/sec) -training >> step=9114000, episode=1520 reward=0.8061683 (479.68 it/sec) -training >> step=9114100, episode=1520 reward=0.8036752 (485.32 it/sec) -training >> step=9114200, episode=1520 reward=0.805581 (512.96 it/sec) -training >> step=9114300, episode=1520 reward=0.7989782 (532.90 it/sec) -training >> step=9114400, episode=1520 reward=0.7950052 (481.03 it/sec) -training >> step=9114500, episode=1520 reward=0.812498 (524.43 it/sec) -training >> step=9114600, episode=1520 reward=0.7812303 (545.37 it/sec) -training >> step=9114700, episode=1520 reward=0.7773564 (513.91 it/sec) -training >> step=9114800, episode=1520 reward=0.8092366 (493.62 it/sec) -training >> step=9114900, episode=1520 reward=0.7892812 (490.83 it/sec) -training >> step=9115000, episode=1520 reward=0.811324 (479.28 it/sec) -training >> step=9115100, episode=1520 reward=0.788335 (543.61 it/sec) -training >> step=9115200, episode=1520 reward=0.7727486 (541.41 it/sec) -training >> step=9115300, episode=1520 reward=0.8001694 (499.89 it/sec) -training >> step=9115400, episode=1520 reward=0.8081136 (532.29 it/sec) -training >> step=9115500, episode=1520 reward=0.7812488 (550.63 it/sec) -training >> step=9115600, episode=1520 reward=0.777655 (537.56 it/sec) -training >> step=9115700, episode=1520 reward=0.7775829 (532.87 it/sec) -training >> step=9115800, episode=1520 reward=0.7954037 (478.75 it/sec) -training >> step=9115900, episode=1520 reward=0.7875593 (543.68 it/sec) -training >> step=9116000, episode=1520 reward=0.7961321 (505.34 it/sec) -training >> step=9116100, episode=1520 reward=0.8052042 (528.15 it/sec) -training >> step=9116200, episode=1520 reward=0.7837435 (555.19 it/sec) -training >> step=9116300, episode=1520 reward=0.7967878 (505.60 it/sec) -training >> step=9116400, episode=1520 reward=0.7733319 (508.79 it/sec) -training >> step=9116500, episode=1520 reward=0.779245 (523.16 it/sec) -training >> step=9116600, episode=1520 reward=0.7616406 (495.97 it/sec) -training >> step=9116700, episode=1520 reward=0.7933877 (530.45 it/sec) -training >> step=9116800, episode=1520 reward=0.7856963 (419.47 it/sec) -training >> step=9116900, episode=1520 reward=0.7854801 (461.21 it/sec) -training >> step=9117000, episode=1520 reward=0.7995344 (512.88 it/sec) -training >> step=9117100, episode=1520 reward=0.8087323 (484.74 it/sec) -training >> step=9117200, episode=1520 reward=0.7737581 (501.36 it/sec) -training >> step=9117300, episode=1520 reward=0.7894751 (483.45 it/sec) -training >> step=9117400, episode=1520 reward=0.8002636 (361.95 it/sec) -training >> step=9117500, episode=1520 reward=0.7914854 (363.74 it/sec) -training >> step=9117600, episode=1520 reward=0.7889659 (429.83 it/sec) -training >> step=9117700, episode=1520 reward=0.7867877 (497.13 it/sec) -training >> step=9117800, episode=1520 reward=0.7904176 (423.96 it/sec) -training >> step=9117900, episode=1520 reward=0.8019778 (507.38 it/sec) -training >> step=9118000, episode=1520 reward=0.7919425 (521.49 it/sec) -training >> step=9118100, episode=1520 reward=0.7801161 (553.69 it/sec) -training >> step=9118200, episode=1520 reward=0.7936388 (543.27 it/sec) -training >> step=9118300, episode=1520 reward=0.7730359 (496.64 it/sec) -training >> step=9118400, episode=1520 reward=0.7853807 (559.06 it/sec) -training >> step=9118500, episode=1520 reward=0.7866792 (497.09 it/sec) -training >> step=9118600, episode=1520 reward=0.8105398 (504.25 it/sec) -training >> step=9118700, episode=1520 reward=0.786312 (550.22 it/sec) -training >> step=9118800, episode=1520 reward=0.7869908 (487.76 it/sec) -training >> step=9118900, episode=1520 reward=0.7884194 (532.65 it/sec) -training >> step=9119000, episode=1520 reward=0.7892253 (509.82 it/sec) -training >> step=9119100, episode=1520 reward=0.7695525 (523.54 it/sec) -training >> step=9119200, episode=1520 reward=0.776614 (526.57 it/sec) -training >> step=9119300, episode=1521 reward=0.7937769 (117.89 it/sec) -training >> step=9119400, episode=1521 reward=0.7811145 (537.24 it/sec) -training >> step=9119500, episode=1521 reward=0.7788155 (549.87 it/sec) -training >> step=9119600, episode=1521 reward=0.7837538 (479.64 it/sec) -training >> step=9119700, episode=1521 reward=0.7989346 (485.61 it/sec) -training >> step=9119800, episode=1521 reward=0.7766712 (546.28 it/sec) -training >> step=9119900, episode=1521 reward=0.788318 (516.38 it/sec) -training >> step=9120000, episode=1521 reward=0.7814358 (524.88 it/sec) -training >> step=9120100, episode=1521 reward=0.8064246 (491.65 it/sec) -training >> step=9120200, episode=1521 reward=0.7890423 (524.18 it/sec) -training >> step=9120300, episode=1521 reward=0.7952151 (522.77 it/sec) -training >> step=9120400, episode=1521 reward=0.7774531 (502.37 it/sec) -training >> step=9120500, episode=1521 reward=0.8035226 (560.72 it/sec) -training >> step=9120600, episode=1521 reward=0.7737631 (497.86 it/sec) -training >> step=9120700, episode=1521 reward=0.7969902 (539.71 it/sec) -training >> step=9120800, episode=1521 reward=0.8048375 (521.98 it/sec) -training >> step=9120900, episode=1521 reward=0.7912616 (534.55 it/sec) -training >> step=9121000, episode=1521 reward=0.7988476 (534.36 it/sec) -training >> step=9121100, episode=1521 reward=0.8015507 (499.33 it/sec) -training >> step=9121200, episode=1521 reward=0.7984352 (532.07 it/sec) -training >> step=9121300, episode=1521 reward=0.7867341 (519.74 it/sec) -training >> step=9121400, episode=1521 reward=0.7945451 (502.20 it/sec) -training >> step=9121500, episode=1521 reward=0.7974933 (539.59 it/sec) -training >> step=9121600, episode=1521 reward=0.8003463 (577.89 it/sec) -training >> step=9121700, episode=1521 reward=0.7827942 (493.84 it/sec) -training >> step=9121800, episode=1521 reward=0.774823 (545.21 it/sec) -training >> step=9121900, episode=1521 reward=0.7968345 (519.17 it/sec) -training >> step=9122000, episode=1521 reward=0.7822228 (567.17 it/sec) -training >> step=9122100, episode=1521 reward=0.7884733 (543.46 it/sec) -training >> step=9122200, episode=1521 reward=0.7957339 (497.03 it/sec) -training >> step=9122300, episode=1521 reward=0.7911492 (552.05 it/sec) -training >> step=9122400, episode=1521 reward=0.7945732 (489.69 it/sec) -training >> step=9122500, episode=1521 reward=0.7875296 (488.58 it/sec) -training >> step=9122600, episode=1521 reward=0.7917581 (499.22 it/sec) -training >> step=9122700, episode=1521 reward=0.7988805 (579.29 it/sec) -training >> step=9122800, episode=1521 reward=0.7930098 (497.57 it/sec) -training >> step=9122900, episode=1521 reward=0.7784927 (531.41 it/sec) -training >> step=9123000, episode=1521 reward=0.7853295 (503.19 it/sec) -training >> step=9123100, episode=1521 reward=0.787489 (560.58 it/sec) -training >> step=9123200, episode=1521 reward=0.7866474 (506.26 it/sec) -training >> step=9123300, episode=1521 reward=0.7972821 (504.63 it/sec) -training >> step=9123400, episode=1521 reward=0.7822562 (553.86 it/sec) -training >> step=9123500, episode=1521 reward=0.785702 (500.88 it/sec) -training >> step=9123600, episode=1521 reward=0.7914683 (507.30 it/sec) -training >> step=9123700, episode=1521 reward=0.7937352 (477.05 it/sec) -training >> step=9123800, episode=1521 reward=0.7970476 (507.47 it/sec) -training >> step=9123900, episode=1521 reward=0.7858216 (528.03 it/sec) -training >> step=9124000, episode=1521 reward=0.7877563 (477.93 it/sec) -training >> step=9124100, episode=1521 reward=0.8053418 (527.35 it/sec) -training >> step=9124200, episode=1521 reward=0.7934933 (517.48 it/sec) -training >> step=9124300, episode=1521 reward=0.7971153 (533.97 it/sec) -training >> step=9124400, episode=1521 reward=0.7944381 (503.57 it/sec) -training >> step=9124500, episode=1521 reward=0.7849328 (520.72 it/sec) -training >> step=9124600, episode=1521 reward=0.8033142 (482.79 it/sec) -training >> step=9124700, episode=1521 reward=0.7697569 (457.96 it/sec) -training >> step=9124800, episode=1521 reward=0.778836 (532.84 it/sec) -training >> step=9124900, episode=1521 reward=0.8002831 (510.91 it/sec) -training >> step=9125000, episode=1521 reward=0.8006479 (519.52 it/sec) -training >> step=9125100, episode=1521 reward=0.7865627 (464.62 it/sec) -training >> step=9125200, episode=1521 reward=0.7871295 (553.01 it/sec) -training >> step=9125300, episode=1522 reward=0.8083378 (94.70 it/sec) -training >> step=9125400, episode=1522 reward=0.7986836 (499.67 it/sec) -training >> step=9125500, episode=1522 reward=0.7908601 (496.62 it/sec) -training >> step=9125600, episode=1522 reward=0.766022 (532.01 it/sec) -training >> step=9125700, episode=1522 reward=0.7861854 (491.28 it/sec) -training >> step=9125800, episode=1522 reward=0.7920483 (477.78 it/sec) -training >> step=9125900, episode=1522 reward=0.7979227 (519.16 it/sec) -training >> step=9126000, episode=1522 reward=0.8034452 (486.30 it/sec) -training >> step=9126100, episode=1522 reward=0.8033867 (492.86 it/sec) -training >> step=9126200, episode=1522 reward=0.8027911 (533.31 it/sec) -training >> step=9126300, episode=1522 reward=0.7839648 (533.85 it/sec) -training >> step=9126400, episode=1522 reward=0.8048009 (511.04 it/sec) -training >> step=9126500, episode=1522 reward=0.7937192 (538.85 it/sec) -training >> step=9126600, episode=1522 reward=0.7765854 (494.79 it/sec) -training >> step=9126700, episode=1522 reward=0.782162 (478.50 it/sec) -training >> step=9126800, episode=1522 reward=0.7987836 (509.98 it/sec) -training >> step=9126900, episode=1522 reward=0.7796397 (510.56 it/sec) -training >> step=9127000, episode=1522 reward=0.7856004 (458.19 it/sec) -training >> step=9127100, episode=1522 reward=0.7865649 (495.38 it/sec) -training >> step=9127200, episode=1522 reward=0.7778281 (469.51 it/sec) -training >> step=9127300, episode=1522 reward=0.7739107 (566.33 it/sec) -training >> step=9127400, episode=1522 reward=0.7854037 (518.75 it/sec) -training >> step=9127500, episode=1522 reward=0.7952136 (534.97 it/sec) -training >> step=9127600, episode=1522 reward=0.7905238 (456.61 it/sec) -training >> step=9127700, episode=1522 reward=0.7772369 (471.67 it/sec) -training >> step=9127800, episode=1522 reward=0.7989597 (504.73 it/sec) -training >> step=9127900, episode=1522 reward=0.7712134 (494.47 it/sec) -training >> step=9128000, episode=1522 reward=0.7837614 (515.24 it/sec) -training >> step=9128100, episode=1522 reward=0.7891965 (460.51 it/sec) -training >> step=9128200, episode=1522 reward=0.8062529 (451.69 it/sec) -training >> step=9128300, episode=1522 reward=0.7829242 (452.57 it/sec) -training >> step=9128400, episode=1522 reward=0.784044 (506.35 it/sec) -training >> step=9128500, episode=1522 reward=0.7774714 (492.65 it/sec) -training >> step=9128600, episode=1522 reward=0.8034952 (440.82 it/sec) -training >> step=9128700, episode=1522 reward=0.7991724 (440.27 it/sec) -training >> step=9128800, episode=1522 reward=0.7937936 (460.40 it/sec) -training >> step=9128900, episode=1522 reward=0.7886509 (500.47 it/sec) -training >> step=9129000, episode=1522 reward=0.802184 (458.78 it/sec) -training >> step=9129100, episode=1522 reward=0.7975724 (437.23 it/sec) -training >> step=9129200, episode=1522 reward=0.7930177 (452.68 it/sec) -training >> step=9129300, episode=1522 reward=0.7828376 (483.16 it/sec) -training >> step=9129400, episode=1522 reward=0.7857895 (527.08 it/sec) -training >> step=9129500, episode=1522 reward=0.7833236 (472.23 it/sec) -training >> step=9129600, episode=1522 reward=0.8055081 (506.76 it/sec) -training >> step=9129700, episode=1522 reward=0.7952427 (475.92 it/sec) -training >> step=9129800, episode=1522 reward=0.7852111 (511.72 it/sec) -training >> step=9129900, episode=1522 reward=0.7792508 (468.41 it/sec) -training >> step=9130000, episode=1522 reward=0.8316287 (487.44 it/sec) -training >> step=9130100, episode=1522 reward=0.7852739 (471.28 it/sec) -training >> step=9130200, episode=1522 reward=0.793632 (511.92 it/sec) -training >> step=9130300, episode=1522 reward=0.8049134 (480.83 it/sec) -training >> step=9130400, episode=1522 reward=0.7723647 (476.73 it/sec) -training >> step=9130500, episode=1522 reward=0.7811456 (469.04 it/sec) -training >> step=9130600, episode=1522 reward=0.7993722 (513.51 it/sec) -training >> step=9130700, episode=1522 reward=0.7979565 (506.93 it/sec) -training >> step=9130800, episode=1522 reward=0.7935159 (505.50 it/sec) -training >> step=9130900, episode=1522 reward=0.7984276 (498.50 it/sec) -training >> step=9131000, episode=1522 reward=0.7849855 (476.27 it/sec) -training >> step=9131100, episode=1522 reward=0.7711511 (445.02 it/sec) -training >> step=9131200, episode=1522 reward=0.7924917 (503.89 it/sec) -training >> step=9131300, episode=1523 reward=0.7688923 (89.60 it/sec) -training >> step=9131400, episode=1523 reward=0.7823033 (393.09 it/sec) -training >> step=9131500, episode=1523 reward=0.7711339 (425.21 it/sec) -training >> step=9131600, episode=1523 reward=0.7820964 (426.77 it/sec) -training >> step=9131700, episode=1523 reward=0.7652681 (478.15 it/sec) -training >> step=9131800, episode=1523 reward=0.7880795 (500.73 it/sec) -training >> step=9131900, episode=1523 reward=0.7793158 (528.26 it/sec) -training >> step=9132000, episode=1523 reward=0.800264 (451.56 it/sec) -training >> step=9132100, episode=1523 reward=0.8116238 (508.32 it/sec) -training >> step=9132200, episode=1523 reward=0.8045495 (509.30 it/sec) -training >> step=9132300, episode=1523 reward=0.7743213 (489.22 it/sec) -training >> step=9132400, episode=1523 reward=0.8040044 (524.54 it/sec) -training >> step=9132500, episode=1523 reward=0.8044829 (492.49 it/sec) -training >> step=9132600, episode=1523 reward=0.7888408 (502.10 it/sec) -training >> step=9132700, episode=1523 reward=0.7785197 (451.90 it/sec) -training >> step=9132800, episode=1523 reward=0.7932407 (461.76 it/sec) -training >> step=9132900, episode=1523 reward=0.7903593 (518.00 it/sec) -training >> step=9133000, episode=1523 reward=0.7940492 (483.17 it/sec) -training >> step=9133100, episode=1523 reward=0.7939667 (461.42 it/sec) -training >> step=9133200, episode=1523 reward=0.7811225 (435.51 it/sec) -training >> step=9133300, episode=1523 reward=0.7888649 (529.79 it/sec) -training >> step=9133400, episode=1523 reward=0.7956069 (462.85 it/sec) -training >> step=9133500, episode=1523 reward=0.7927796 (473.98 it/sec) -training >> step=9133600, episode=1523 reward=0.7822499 (438.11 it/sec) -training >> step=9133700, episode=1523 reward=0.789331 (538.53 it/sec) -training >> step=9133800, episode=1523 reward=0.7994563 (521.42 it/sec) -training >> step=9133900, episode=1523 reward=0.7903698 (460.41 it/sec) -training >> step=9134000, episode=1523 reward=0.7841048 (444.59 it/sec) -training >> step=9134100, episode=1523 reward=0.7889197 (491.29 it/sec) -training >> step=9134200, episode=1523 reward=0.7827771 (465.53 it/sec) -training >> step=9134300, episode=1523 reward=0.7746789 (522.97 it/sec) -training >> step=9134400, episode=1523 reward=0.7862213 (513.72 it/sec) -training >> step=9134500, episode=1523 reward=0.7910668 (460.21 it/sec) -training >> step=9134600, episode=1523 reward=0.7598111 (461.60 it/sec) -training >> step=9134700, episode=1523 reward=0.7951009 (471.18 it/sec) -training >> step=9134800, episode=1523 reward=0.7933344 (504.32 it/sec) -training >> step=9134900, episode=1523 reward=0.8000252 (512.32 it/sec) -training >> step=9135000, episode=1523 reward=0.7906262 (495.02 it/sec) -training >> step=9135100, episode=1523 reward=0.786694 (482.28 it/sec) -training >> step=9135200, episode=1523 reward=0.8011746 (432.38 it/sec) -training >> step=9135300, episode=1523 reward=0.7927824 (524.21 it/sec) -training >> step=9135400, episode=1523 reward=0.7984008 (451.87 it/sec) -training >> step=9135500, episode=1523 reward=0.7750832 (500.34 it/sec) -training >> step=9135600, episode=1523 reward=0.7961478 (481.45 it/sec) -training >> step=9135700, episode=1523 reward=0.8047932 (448.90 it/sec) -training >> step=9135800, episode=1523 reward=0.7926554 (483.02 it/sec) -training >> step=9135900, episode=1523 reward=0.7881778 (491.13 it/sec) -training >> step=9136000, episode=1523 reward=0.8003612 (483.50 it/sec) -training >> step=9136100, episode=1523 reward=0.8133397 (461.88 it/sec) -training >> step=9136200, episode=1523 reward=0.7806929 (476.01 it/sec) -training >> step=9136300, episode=1523 reward=0.7965894 (469.24 it/sec) -training >> step=9136400, episode=1523 reward=0.8072379 (450.10 it/sec) -training >> step=9136500, episode=1523 reward=0.7840886 (463.08 it/sec) -training >> step=9136600, episode=1523 reward=0.8005156 (467.52 it/sec) -training >> step=9136700, episode=1523 reward=0.7842066 (479.13 it/sec) -training >> step=9136800, episode=1523 reward=0.7842301 (474.16 it/sec) -training >> step=9136900, episode=1523 reward=0.7939757 (466.50 it/sec) -training >> step=9137000, episode=1523 reward=0.7982409 (435.77 it/sec) -training >> step=9137100, episode=1523 reward=0.7840896 (441.59 it/sec) -training >> step=9137200, episode=1523 reward=0.8067092 (494.44 it/sec) -training >> step=9137300, episode=1524 reward=0.7771595 (90.09 it/sec) -training >> step=9137400, episode=1524 reward=0.7763343 (456.06 it/sec) -training >> step=9137500, episode=1524 reward=0.787496 (468.12 it/sec) -training >> step=9137600, episode=1524 reward=0.7941304 (505.18 it/sec) -training >> step=9137700, episode=1524 reward=0.7962934 (484.09 it/sec) -training >> step=9137800, episode=1524 reward=0.7809044 (474.84 it/sec) -training >> step=9137900, episode=1524 reward=0.7921424 (538.91 it/sec) -training >> step=9138000, episode=1524 reward=0.7923214 (411.61 it/sec) -training >> step=9138100, episode=1524 reward=0.8076568 (497.21 it/sec) -training >> step=9138200, episode=1524 reward=0.7754159 (498.43 it/sec) -training >> step=9138300, episode=1524 reward=0.8115438 (518.21 it/sec) -training >> step=9138400, episode=1524 reward=0.78757 (464.81 it/sec) -training >> step=9138500, episode=1524 reward=0.8066718 (473.72 it/sec) -training >> step=9138600, episode=1524 reward=0.7890674 (472.81 it/sec) -training >> step=9138700, episode=1524 reward=0.7820271 (461.98 it/sec) -training >> step=9138800, episode=1524 reward=0.7945463 (491.51 it/sec) -training >> step=9138900, episode=1524 reward=0.7607551 (467.03 it/sec) -training >> step=9139000, episode=1524 reward=0.7969787 (480.53 it/sec) -training >> step=9139100, episode=1524 reward=0.7814931 (416.38 it/sec) -training >> step=9139200, episode=1524 reward=0.7923219 (403.04 it/sec) -training >> step=9139300, episode=1524 reward=0.8019896 (464.77 it/sec) -training >> step=9139400, episode=1524 reward=0.7871546 (486.16 it/sec) -training >> step=9139500, episode=1524 reward=0.7991395 (490.03 it/sec) -training >> step=9139600, episode=1524 reward=0.7939094 (499.62 it/sec) -training >> step=9139700, episode=1524 reward=0.7646778 (501.03 it/sec) -training >> step=9139800, episode=1524 reward=0.8017895 (485.08 it/sec) -training >> step=9139900, episode=1524 reward=0.7851711 (436.13 it/sec) -training >> step=9140000, episode=1524 reward=0.7883782 (499.47 it/sec) -training >> step=9140100, episode=1524 reward=0.8096971 (518.49 it/sec) -training >> step=9140200, episode=1524 reward=0.7880141 (504.11 it/sec) -training >> step=9140300, episode=1524 reward=0.7918019 (441.25 it/sec) -training >> step=9140400, episode=1524 reward=0.7954245 (516.47 it/sec) -training >> step=9140500, episode=1524 reward=0.7892962 (483.09 it/sec) -training >> step=9140600, episode=1524 reward=0.7878239 (499.97 it/sec) -training >> step=9140700, episode=1524 reward=0.7846251 (530.61 it/sec) -training >> step=9140800, episode=1524 reward=0.7860774 (538.60 it/sec) -training >> step=9140900, episode=1524 reward=0.7968404 (488.49 it/sec) -training >> step=9141000, episode=1524 reward=0.7912195 (495.14 it/sec) -training >> step=9141100, episode=1524 reward=0.7749584 (512.47 it/sec) -training >> step=9141200, episode=1524 reward=0.8063033 (542.35 it/sec) -training >> step=9141300, episode=1524 reward=0.7953948 (501.73 it/sec) -training >> step=9141400, episode=1524 reward=0.7733178 (512.17 it/sec) -training >> step=9141500, episode=1524 reward=0.8014697 (527.91 it/sec) -training >> step=9141600, episode=1524 reward=0.7699219 (497.29 it/sec) -training >> step=9141700, episode=1524 reward=0.7880587 (523.35 it/sec) -training >> step=9141800, episode=1524 reward=0.7873015 (516.28 it/sec) -training >> step=9141900, episode=1524 reward=0.7912991 (509.20 it/sec) -training >> step=9142000, episode=1524 reward=0.7867239 (460.70 it/sec) -training >> step=9142100, episode=1524 reward=0.8072582 (482.06 it/sec) -training >> step=9142200, episode=1524 reward=0.7890193 (504.79 it/sec) -training >> step=9142300, episode=1524 reward=0.7850024 (547.99 it/sec) -training >> step=9142400, episode=1524 reward=0.7960094 (528.11 it/sec) -training >> step=9142500, episode=1524 reward=0.7808868 (464.72 it/sec) -training >> step=9142600, episode=1524 reward=0.7776496 (431.71 it/sec) -training >> step=9142700, episode=1524 reward=0.7762664 (469.82 it/sec) -training >> step=9142800, episode=1524 reward=0.7888528 (488.71 it/sec) -training >> step=9142900, episode=1524 reward=0.7963125 (530.84 it/sec) -training >> step=9143000, episode=1524 reward=0.7913954 (546.26 it/sec) -training >> step=9143100, episode=1524 reward=0.7856881 (485.50 it/sec) -training >> step=9143200, episode=1524 reward=0.7786882 (503.17 it/sec) -training >> step=9143300, episode=1525 reward=0.7816685 (104.88 it/sec) -training >> step=9143400, episode=1525 reward=0.7796836 (539.87 it/sec) -training >> step=9143500, episode=1525 reward=0.8017288 (446.30 it/sec) -training >> step=9143600, episode=1525 reward=0.7739325 (490.95 it/sec) -training >> step=9143700, episode=1525 reward=0.7778226 (525.44 it/sec) -training >> step=9143800, episode=1525 reward=0.7844416 (523.91 it/sec) -training >> step=9143900, episode=1525 reward=0.801976 (484.45 it/sec) -training >> step=9144000, episode=1525 reward=0.8052028 (523.38 it/sec) -training >> step=9144100, episode=1525 reward=0.7982509 (514.00 it/sec) -training >> step=9144200, episode=1525 reward=0.8007948 (468.90 it/sec) -training >> step=9144300, episode=1525 reward=0.7879653 (507.24 it/sec) -training >> step=9144400, episode=1525 reward=0.7968033 (502.24 it/sec) -training >> step=9144500, episode=1525 reward=0.7859874 (477.65 it/sec) -training >> step=9144600, episode=1525 reward=0.803586 (547.70 it/sec) -training >> step=9144700, episode=1525 reward=0.7822863 (450.09 it/sec) -training >> step=9144800, episode=1525 reward=0.7799526 (527.73 it/sec) -training >> step=9144900, episode=1525 reward=0.7895811 (482.09 it/sec) -training >> step=9145000, episode=1525 reward=0.8019098 (449.13 it/sec) -training >> step=9145100, episode=1525 reward=0.7939602 (417.29 it/sec) -training >> step=9145200, episode=1525 reward=0.7752016 (414.60 it/sec) -training >> step=9145300, episode=1525 reward=0.7916588 (465.08 it/sec) -training >> step=9145400, episode=1525 reward=0.7888933 (541.19 it/sec) -training >> step=9145500, episode=1525 reward=0.7904876 (542.44 it/sec) -training >> step=9145600, episode=1525 reward=0.7640828 (524.79 it/sec) -training >> step=9145700, episode=1525 reward=0.7957252 (503.73 it/sec) -training >> step=9145800, episode=1525 reward=0.807171 (546.05 it/sec) -training >> step=9145900, episode=1525 reward=0.7813734 (546.32 it/sec) -training >> step=9146000, episode=1525 reward=0.7854014 (508.82 it/sec) -training >> step=9146100, episode=1525 reward=0.8048059 (516.84 it/sec) -training >> step=9146200, episode=1525 reward=0.7901835 (507.65 it/sec) -training >> step=9146300, episode=1525 reward=0.79 (454.62 it/sec) -training >> step=9146400, episode=1525 reward=0.7722744 (541.22 it/sec) -training >> step=9146500, episode=1525 reward=0.7882215 (507.99 it/sec) -training >> step=9146600, episode=1525 reward=0.7822173 (525.28 it/sec) -training >> step=9146700, episode=1525 reward=0.8078704 (505.50 it/sec) -training >> step=9146800, episode=1525 reward=0.7970126 (474.22 it/sec) -training >> step=9146900, episode=1525 reward=0.7974784 (537.78 it/sec) -training >> step=9147000, episode=1525 reward=0.7924471 (517.41 it/sec) -training >> step=9147100, episode=1525 reward=0.797313 (547.11 it/sec) -training >> step=9147200, episode=1525 reward=0.803156 (488.86 it/sec) -training >> step=9147300, episode=1525 reward=0.8004383 (518.26 it/sec) -training >> step=9147400, episode=1525 reward=0.7973086 (482.03 it/sec) -training >> step=9147500, episode=1525 reward=0.7888963 (527.25 it/sec) -training >> step=9147600, episode=1525 reward=0.779639 (552.02 it/sec) -training >> step=9147700, episode=1525 reward=0.7871219 (560.18 it/sec) -training >> step=9147800, episode=1525 reward=0.789834 (462.93 it/sec) -training >> step=9147900, episode=1525 reward=0.8219878 (501.65 it/sec) -training >> step=9148000, episode=1525 reward=0.7765354 (545.06 it/sec) -training >> step=9148100, episode=1525 reward=0.789814 (486.53 it/sec) -training >> step=9148200, episode=1525 reward=0.7965127 (522.39 it/sec) -training >> step=9148300, episode=1525 reward=0.7898626 (452.51 it/sec) -training >> step=9148400, episode=1525 reward=0.7993398 (526.05 it/sec) -training >> step=9148500, episode=1525 reward=0.7938724 (519.19 it/sec) -training >> step=9148600, episode=1525 reward=0.7726084 (547.00 it/sec) -training >> step=9148700, episode=1525 reward=0.7866144 (517.96 it/sec) -training >> step=9148800, episode=1525 reward=0.7937558 (497.39 it/sec) -training >> step=9148900, episode=1525 reward=0.7886714 (493.17 it/sec) -training >> step=9149000, episode=1525 reward=0.7985978 (509.48 it/sec) -training >> step=9149100, episode=1525 reward=0.7997732 (528.46 it/sec) -training >> step=9149200, episode=1525 reward=0.7928467 (502.50 it/sec) -training >> step=9149300, episode=1526 reward=0.7776661 (118.79 it/sec) -training >> step=9149400, episode=1526 reward=0.7832431 (522.39 it/sec) -training >> step=9149500, episode=1526 reward=0.7951071 (512.57 it/sec) -training >> step=9149600, episode=1526 reward=0.7967918 (468.92 it/sec) -training >> step=9149700, episode=1526 reward=0.8047545 (474.04 it/sec) -training >> step=9149800, episode=1526 reward=0.8045056 (539.56 it/sec) -training >> step=9149900, episode=1526 reward=0.8087726 (458.74 it/sec) -training >> step=9150000, episode=1526 reward=0.7836827 (466.02 it/sec) -training >> step=9150100, episode=1526 reward=0.7901082 (468.29 it/sec) -training >> step=9150200, episode=1526 reward=0.7885609 (527.97 it/sec) -training >> step=9150300, episode=1526 reward=0.7896333 (498.12 it/sec) -training >> step=9150400, episode=1526 reward=0.7833595 (501.47 it/sec) -training >> step=9150500, episode=1526 reward=0.7737101 (488.22 it/sec) -training >> step=9150600, episode=1526 reward=0.7860807 (485.77 it/sec) -training >> step=9150700, episode=1526 reward=0.7938544 (513.30 it/sec) -training >> step=9150800, episode=1526 reward=0.7829458 (472.35 it/sec) -training >> step=9150900, episode=1526 reward=0.803328 (505.10 it/sec) -training >> step=9151000, episode=1526 reward=0.793684 (501.04 it/sec) -training >> step=9151100, episode=1526 reward=0.8153002 (466.54 it/sec) -training >> step=9151200, episode=1526 reward=0.7865452 (482.19 it/sec) -training >> step=9151300, episode=1526 reward=0.8004241 (484.84 it/sec) -training >> step=9151400, episode=1526 reward=0.7756301 (497.77 it/sec) -training >> step=9151500, episode=1526 reward=0.78562 (494.21 it/sec) -training >> step=9151600, episode=1526 reward=0.8039017 (504.25 it/sec) -training >> step=9151700, episode=1526 reward=0.8124674 (454.42 it/sec) -training >> step=9151800, episode=1526 reward=0.8027332 (448.36 it/sec) -training >> step=9151900, episode=1526 reward=0.800812 (487.38 it/sec) -training >> step=9152000, episode=1526 reward=0.8016304 (471.62 it/sec) -training >> step=9152100, episode=1526 reward=0.8061143 (410.20 it/sec) -training >> step=9152200, episode=1526 reward=0.805729 (439.75 it/sec) -training >> step=9152300, episode=1526 reward=0.8029244 (468.35 it/sec) -training >> step=9152400, episode=1526 reward=0.7919049 (470.95 it/sec) -training >> step=9152500, episode=1526 reward=0.7812843 (472.57 it/sec) -training >> step=9152600, episode=1526 reward=0.7764796 (488.96 it/sec) -training >> step=9152700, episode=1526 reward=0.8015849 (503.18 it/sec) -training >> step=9152800, episode=1526 reward=0.7913342 (472.64 it/sec) -training >> step=9152900, episode=1526 reward=0.7878557 (499.82 it/sec) -training >> step=9153000, episode=1526 reward=0.7742399 (473.75 it/sec) -training >> step=9153100, episode=1526 reward=0.7869816 (506.78 it/sec) -training >> step=9153200, episode=1526 reward=0.7784968 (471.88 it/sec) -training >> step=9153300, episode=1526 reward=0.7762815 (475.19 it/sec) -training >> step=9153400, episode=1526 reward=0.7845243 (561.31 it/sec) -training >> step=9153500, episode=1526 reward=0.7587287 (496.89 it/sec) -training >> step=9153600, episode=1526 reward=0.7996402 (457.04 it/sec) -training >> step=9153700, episode=1526 reward=0.775568 (505.65 it/sec) -training >> step=9153800, episode=1526 reward=0.7877395 (531.83 it/sec) -training >> step=9153900, episode=1526 reward=0.8040308 (482.56 it/sec) -training >> step=9154000, episode=1526 reward=0.7931637 (500.18 it/sec) -training >> step=9154100, episode=1526 reward=0.8008301 (496.64 it/sec) -training >> step=9154200, episode=1526 reward=0.7641141 (517.45 it/sec) -training >> step=9154300, episode=1526 reward=0.7862893 (496.71 it/sec) -training >> step=9154400, episode=1526 reward=0.7566659 (505.53 it/sec) -training >> step=9154500, episode=1526 reward=0.7768443 (501.87 it/sec) -training >> step=9154600, episode=1526 reward=0.7917497 (492.66 it/sec) -training >> step=9154700, episode=1526 reward=0.7828909 (501.66 it/sec) -training >> step=9154800, episode=1526 reward=0.7843501 (472.47 it/sec) -training >> step=9154900, episode=1526 reward=0.78198 (501.18 it/sec) -training >> step=9155000, episode=1526 reward=0.7910399 (469.37 it/sec) -training >> step=9155100, episode=1526 reward=0.7889984 (497.87 it/sec) -training >> step=9155200, episode=1526 reward=0.7803749 (503.27 it/sec) -training >> step=9155300, episode=1527 reward=0.783182 (101.90 it/sec) -training >> step=9155400, episode=1527 reward=0.7874666 (499.51 it/sec) -training >> step=9155500, episode=1527 reward=0.7847486 (481.39 it/sec) -training >> step=9155600, episode=1527 reward=0.78957 (538.52 it/sec) -training >> step=9155700, episode=1527 reward=0.8050712 (540.59 it/sec) -training >> step=9155800, episode=1527 reward=0.8056293 (494.35 it/sec) -training >> step=9155900, episode=1527 reward=0.8055333 (534.87 it/sec) -training >> step=9156000, episode=1527 reward=0.7938144 (568.16 it/sec) -training >> step=9156100, episode=1527 reward=0.7941258 (511.00 it/sec) -training >> step=9156200, episode=1527 reward=0.784185 (526.42 it/sec) -training >> step=9156300, episode=1527 reward=0.7694631 (490.95 it/sec) -training >> step=9156400, episode=1527 reward=0.7835467 (499.28 it/sec) -training >> step=9156500, episode=1527 reward=0.7874185 (539.94 it/sec) -training >> step=9156600, episode=1527 reward=0.7771257 (548.45 it/sec) -training >> step=9156700, episode=1527 reward=0.8027812 (538.83 it/sec) -training >> step=9156800, episode=1527 reward=0.7981434 (455.96 it/sec) -training >> step=9156900, episode=1527 reward=0.7906977 (529.17 it/sec) -training >> step=9157000, episode=1527 reward=0.8074561 (472.01 it/sec) -training >> step=9157100, episode=1527 reward=0.7946875 (527.93 it/sec) -training >> step=9157200, episode=1527 reward=0.7996817 (538.12 it/sec) -training >> step=9157300, episode=1527 reward=0.7896001 (520.68 it/sec) -training >> step=9157400, episode=1527 reward=0.785137 (497.31 it/sec) -training >> step=9157500, episode=1527 reward=0.7972392 (497.63 it/sec) -training >> step=9157600, episode=1527 reward=0.795408 (524.57 it/sec) -training >> step=9157700, episode=1527 reward=0.7810202 (552.36 it/sec) -training >> step=9157800, episode=1527 reward=0.7991166 (548.83 it/sec) -training >> step=9157900, episode=1527 reward=0.7601184 (523.30 it/sec) -training >> step=9158000, episode=1527 reward=0.7895604 (527.44 it/sec) -training >> step=9158100, episode=1527 reward=0.7829935 (527.69 it/sec) -training >> step=9158200, episode=1527 reward=0.8043934 (552.52 it/sec) -training >> step=9158300, episode=1527 reward=0.7850941 (474.85 it/sec) -training >> step=9158400, episode=1527 reward=0.8098659 (518.45 it/sec) -training >> step=9158500, episode=1527 reward=0.8076246 (539.90 it/sec) -training >> step=9158600, episode=1527 reward=0.7998177 (511.21 it/sec) -training >> step=9158700, episode=1527 reward=0.7787527 (516.00 it/sec) -training >> step=9158800, episode=1527 reward=0.8010489 (529.96 it/sec) -training >> step=9158900, episode=1527 reward=0.7943655 (546.43 it/sec) -training >> step=9159000, episode=1527 reward=0.7942806 (510.50 it/sec) -training >> step=9159100, episode=1527 reward=0.7840735 (523.53 it/sec) -training >> step=9159200, episode=1527 reward=0.7926783 (461.69 it/sec) -training >> step=9159300, episode=1527 reward=0.7855708 (444.58 it/sec) -training >> step=9159400, episode=1527 reward=0.8012432 (425.55 it/sec) -training >> step=9159500, episode=1527 reward=0.7945465 (386.51 it/sec) -training >> step=9159600, episode=1527 reward=0.791373 (477.29 it/sec) -training >> step=9159700, episode=1527 reward=0.7879862 (509.78 it/sec) -training >> step=9159800, episode=1527 reward=0.7904663 (492.24 it/sec) -training >> step=9159900, episode=1527 reward=0.7767644 (541.96 it/sec) -training >> step=9160000, episode=1527 reward=0.7816688 (480.42 it/sec) -training >> step=9160100, episode=1527 reward=0.7786888 (510.33 it/sec) -training >> step=9160200, episode=1527 reward=0.8072437 (535.09 it/sec) -training >> step=9160300, episode=1527 reward=0.7833154 (494.90 it/sec) -training >> step=9160400, episode=1527 reward=0.7972983 (500.47 it/sec) -training >> step=9160500, episode=1527 reward=0.7812937 (515.99 it/sec) -training >> step=9160600, episode=1527 reward=0.7802637 (508.44 it/sec) -training >> step=9160700, episode=1527 reward=0.8227363 (554.67 it/sec) -training >> step=9160800, episode=1527 reward=0.801896 (513.78 it/sec) -training >> step=9160900, episode=1527 reward=0.7825226 (495.47 it/sec) -training >> step=9161000, episode=1527 reward=0.8044207 (533.80 it/sec) -training >> step=9161100, episode=1527 reward=0.785142 (551.17 it/sec) -training >> step=9161200, episode=1527 reward=0.7799733 (494.89 it/sec) -training >> step=9161300, episode=1528 reward=0.7974278 (114.05 it/sec) -training >> step=9161400, episode=1528 reward=0.7920297 (480.54 it/sec) -training >> step=9161500, episode=1528 reward=0.7854414 (524.29 it/sec) -training >> step=9161600, episode=1528 reward=0.79026 (474.50 it/sec) -training >> step=9161700, episode=1528 reward=0.7984262 (505.39 it/sec) -training >> step=9161800, episode=1528 reward=0.7668925 (525.90 it/sec) -training >> step=9161900, episode=1528 reward=0.7924462 (514.76 it/sec) -training >> step=9162000, episode=1528 reward=0.7955412 (531.26 it/sec) -training >> step=9162100, episode=1528 reward=0.7983505 (513.28 it/sec) -training >> step=9162200, episode=1528 reward=0.798282 (511.97 it/sec) -training >> step=9162300, episode=1528 reward=0.7952557 (513.50 it/sec) -training >> step=9162400, episode=1528 reward=0.8126623 (497.64 it/sec) -training >> step=9162500, episode=1528 reward=0.7854247 (466.27 it/sec) -training >> step=9162600, episode=1528 reward=0.7793955 (478.47 it/sec) -training >> step=9162700, episode=1528 reward=0.7954727 (533.91 it/sec) -training >> step=9162800, episode=1528 reward=0.7987759 (497.39 it/sec) -training >> step=9162900, episode=1528 reward=0.7907733 (505.30 it/sec) -training >> step=9163000, episode=1528 reward=0.7745019 (485.55 it/sec) -training >> step=9163100, episode=1528 reward=0.8017438 (484.11 it/sec) -training >> step=9163200, episode=1528 reward=0.8039222 (490.34 it/sec) -training >> step=9163300, episode=1528 reward=0.7815934 (494.03 it/sec) -training >> step=9163400, episode=1528 reward=0.8003209 (434.52 it/sec) -training >> step=9163500, episode=1528 reward=0.8044663 (480.42 it/sec) -training >> step=9163600, episode=1528 reward=0.7978058 (464.25 it/sec) -training >> step=9163700, episode=1528 reward=0.7866116 (462.10 it/sec) -training >> step=9163800, episode=1528 reward=0.7713695 (522.30 it/sec) -training >> step=9163900, episode=1528 reward=0.7972117 (519.81 it/sec) -training >> step=9164000, episode=1528 reward=0.777196 (553.43 it/sec) -training >> step=9164100, episode=1528 reward=0.7800069 (481.82 it/sec) -training >> step=9164200, episode=1528 reward=0.8109847 (503.26 it/sec) -training >> step=9164300, episode=1528 reward=0.7686431 (529.77 it/sec) -training >> step=9164400, episode=1528 reward=0.7983727 (487.76 it/sec) -training >> step=9164500, episode=1528 reward=0.7905034 (528.49 it/sec) -training >> step=9164600, episode=1528 reward=0.79202 (471.96 it/sec) -training >> step=9164700, episode=1528 reward=0.8012457 (468.73 it/sec) -training >> step=9164800, episode=1528 reward=0.7855378 (526.69 it/sec) -training >> step=9164900, episode=1528 reward=0.7900963 (504.79 it/sec) -training >> step=9165000, episode=1528 reward=0.7781549 (559.03 it/sec) -training >> step=9165100, episode=1528 reward=0.7900484 (523.55 it/sec) -training >> step=9165200, episode=1528 reward=0.7923966 (532.24 it/sec) -training >> step=9165300, episode=1528 reward=0.8072408 (533.68 it/sec) -training >> step=9165400, episode=1528 reward=0.7940606 (479.75 it/sec) -training >> step=9165500, episode=1528 reward=0.8089113 (470.99 it/sec) -training >> step=9165600, episode=1528 reward=0.7993683 (464.45 it/sec) -training >> step=9165700, episode=1528 reward=0.767369 (524.28 it/sec) -training >> step=9165800, episode=1528 reward=0.7879118 (499.02 it/sec) -training >> step=9165900, episode=1528 reward=0.7854158 (496.34 it/sec) -training >> step=9166000, episode=1528 reward=0.7962962 (553.46 it/sec) -training >> step=9166100, episode=1528 reward=0.7956138 (503.09 it/sec) -training >> step=9166200, episode=1528 reward=0.8034991 (491.23 it/sec) -training >> step=9166300, episode=1528 reward=0.7899321 (524.31 it/sec) -training >> step=9166400, episode=1528 reward=0.7888632 (534.28 it/sec) -training >> step=9166500, episode=1528 reward=0.808965 (461.13 it/sec) -training >> step=9166600, episode=1528 reward=0.7807434 (521.14 it/sec) -training >> step=9166700, episode=1528 reward=0.7984207 (481.07 it/sec) -training >> step=9166800, episode=1528 reward=0.8018523 (506.27 it/sec) -training >> step=9166900, episode=1528 reward=0.7701493 (445.05 it/sec) -training >> step=9167000, episode=1528 reward=0.7990105 (478.44 it/sec) -training >> step=9167100, episode=1528 reward=0.8129109 (458.37 it/sec) -training >> step=9167200, episode=1528 reward=0.7965441 (442.06 it/sec) -training >> step=9167300, episode=1529 reward=0.7937427 (102.74 it/sec) -training >> step=9167400, episode=1529 reward=0.7928765 (360.78 it/sec) -training >> step=9167500, episode=1529 reward=0.7776247 (469.99 it/sec) -training >> step=9167600, episode=1529 reward=0.7907067 (458.27 it/sec) -training >> step=9167700, episode=1529 reward=0.800508 (431.83 it/sec) -training >> step=9167800, episode=1529 reward=0.7997419 (496.49 it/sec) -training >> step=9167900, episode=1529 reward=0.7809669 (487.84 it/sec) -training >> step=9168000, episode=1529 reward=0.8026574 (457.82 it/sec) -training >> step=9168100, episode=1529 reward=0.7968664 (470.02 it/sec) -training >> step=9168200, episode=1529 reward=0.7810535 (471.72 it/sec) -training >> step=9168300, episode=1529 reward=0.7807171 (475.68 it/sec) -training >> step=9168400, episode=1529 reward=0.7869599 (487.66 it/sec) -training >> step=9168500, episode=1529 reward=0.7988305 (460.39 it/sec) -training >> step=9168600, episode=1529 reward=0.7853038 (505.60 it/sec) -training >> step=9168700, episode=1529 reward=0.7958211 (456.93 it/sec) -training >> step=9168800, episode=1529 reward=0.781003 (477.37 it/sec) -training >> step=9168900, episode=1529 reward=0.7873886 (459.07 it/sec) -training >> step=9169000, episode=1529 reward=0.8137272 (483.32 it/sec) -training >> step=9169100, episode=1529 reward=0.7819857 (429.49 it/sec) -training >> step=9169200, episode=1529 reward=0.7751504 (449.83 it/sec) -training >> step=9169300, episode=1529 reward=0.7614715 (508.95 it/sec) -training >> step=9169400, episode=1529 reward=0.7768902 (489.59 it/sec) -training >> step=9169500, episode=1529 reward=0.7788669 (443.64 it/sec) -training >> step=9169600, episode=1529 reward=0.7836635 (483.67 it/sec) -training >> step=9169700, episode=1529 reward=0.7960602 (492.78 it/sec) -training >> step=9169800, episode=1529 reward=0.7955392 (475.39 it/sec) -training >> step=9169900, episode=1529 reward=0.7842991 (447.94 it/sec) -training >> step=9170000, episode=1529 reward=0.7979608 (481.61 it/sec) -training >> step=9170100, episode=1529 reward=0.7992582 (438.07 it/sec) -training >> step=9170200, episode=1529 reward=0.7732632 (496.04 it/sec) -training >> step=9170300, episode=1529 reward=0.7764965 (507.39 it/sec) -training >> step=9170400, episode=1529 reward=0.7987384 (474.46 it/sec) -training >> step=9170500, episode=1529 reward=0.7898905 (489.36 it/sec) -training >> step=9170600, episode=1529 reward=0.7981058 (508.85 it/sec) -training >> step=9170700, episode=1529 reward=0.81282 (492.80 it/sec) -training >> step=9170800, episode=1529 reward=0.7881021 (507.17 it/sec) -training >> step=9170900, episode=1529 reward=0.779655 (468.52 it/sec) -training >> step=9171000, episode=1529 reward=0.8013626 (453.95 it/sec) -training >> step=9171100, episode=1529 reward=0.7819909 (518.36 it/sec) -training >> step=9171200, episode=1529 reward=0.7928966 (473.83 it/sec) -training >> step=9171300, episode=1529 reward=0.7939438 (492.69 it/sec) -training >> step=9171400, episode=1529 reward=0.8066707 (489.20 it/sec) -training >> step=9171500, episode=1529 reward=0.8041006 (448.54 it/sec) -training >> step=9171600, episode=1529 reward=0.7996698 (473.85 it/sec) -training >> step=9171700, episode=1529 reward=0.8068061 (501.22 it/sec) -training >> step=9171800, episode=1529 reward=0.7899663 (515.26 it/sec) -training >> step=9171900, episode=1529 reward=0.8011183 (464.48 it/sec) -training >> step=9172000, episode=1529 reward=0.7848694 (467.46 it/sec) -training >> step=9172100, episode=1529 reward=0.7883065 (493.20 it/sec) -training >> step=9172200, episode=1529 reward=0.7647927 (485.39 it/sec) -training >> step=9172300, episode=1529 reward=0.7912279 (515.65 it/sec) -training >> step=9172400, episode=1529 reward=0.8058906 (493.14 it/sec) -training >> step=9172500, episode=1529 reward=0.795777 (486.76 it/sec) -training >> step=9172600, episode=1529 reward=0.7923996 (503.62 it/sec) -training >> step=9172700, episode=1529 reward=0.7812068 (476.43 it/sec) -training >> step=9172800, episode=1529 reward=0.7720917 (517.79 it/sec) -training >> step=9172900, episode=1529 reward=0.7801828 (494.12 it/sec) -training >> step=9173000, episode=1529 reward=0.787721 (441.62 it/sec) -training >> step=9173100, episode=1529 reward=0.7763286 (461.20 it/sec) -training >> step=9173200, episode=1529 reward=0.7867739 (444.15 it/sec) -training >> step=9173300, episode=1530 reward=0.79842 (64.85 it/sec) -training >> step=9173400, episode=1530 reward=0.7736504 (476.03 it/sec) -training >> step=9173500, episode=1530 reward=0.7734843 (495.46 it/sec) -training >> step=9173600, episode=1530 reward=0.7699731 (459.66 it/sec) -training >> step=9173700, episode=1530 reward=0.7831256 (430.85 it/sec) -training >> step=9173800, episode=1530 reward=0.8045802 (499.82 it/sec) -training >> step=9173900, episode=1530 reward=0.8031993 (485.79 it/sec) -training >> step=9174000, episode=1530 reward=0.7937195 (486.61 it/sec) -training >> step=9174100, episode=1530 reward=0.7771688 (530.60 it/sec) -training >> step=9174200, episode=1530 reward=0.7949186 (449.98 it/sec) -training >> step=9174300, episode=1530 reward=0.7677544 (418.70 it/sec) -training >> step=9174400, episode=1530 reward=0.8125701 (445.95 it/sec) -training >> step=9174500, episode=1530 reward=0.8102619 (434.22 it/sec) -training >> step=9174600, episode=1530 reward=0.7979742 (461.52 it/sec) -training >> step=9174700, episode=1530 reward=0.7902053 (450.36 it/sec) -training >> step=9174800, episode=1530 reward=0.7997658 (479.33 it/sec) -training >> step=9174900, episode=1530 reward=0.7912182 (460.10 it/sec) -training >> step=9175000, episode=1530 reward=0.8084655 (450.87 it/sec) -training >> step=9175100, episode=1530 reward=0.8111945 (477.86 it/sec) -training >> step=9175200, episode=1530 reward=0.814206 (481.78 it/sec) -training >> step=9175300, episode=1530 reward=0.8132719 (478.37 it/sec) -training >> step=9175400, episode=1530 reward=0.8022541 (472.99 it/sec) -training >> step=9175500, episode=1530 reward=0.7785953 (467.02 it/sec) -training >> step=9175600, episode=1530 reward=0.7892835 (476.41 it/sec) -training >> step=9175700, episode=1530 reward=0.78741 (440.18 it/sec) -training >> step=9175800, episode=1530 reward=0.7931065 (459.22 it/sec) -training >> step=9175900, episode=1530 reward=0.7875038 (474.58 it/sec) -training >> step=9176000, episode=1530 reward=0.8076795 (470.51 it/sec) -training >> step=9176100, episode=1530 reward=0.7927254 (462.98 it/sec) -training >> step=9176200, episode=1530 reward=0.7714929 (487.61 it/sec) -training >> step=9176300, episode=1530 reward=0.805981 (469.78 it/sec) -training >> step=9176400, episode=1530 reward=0.8002008 (441.43 it/sec) -training >> step=9176500, episode=1530 reward=0.7887781 (471.79 it/sec) -training >> step=9176600, episode=1530 reward=0.7784675 (451.77 it/sec) -training >> step=9176700, episode=1530 reward=0.7992649 (480.38 it/sec) -training >> step=9176800, episode=1530 reward=0.8019624 (522.70 it/sec) -training >> step=9176900, episode=1530 reward=0.7934819 (470.13 it/sec) -training >> step=9177000, episode=1530 reward=0.8088972 (486.52 it/sec) -training >> step=9177100, episode=1530 reward=0.7888082 (472.98 it/sec) -training >> step=9177200, episode=1530 reward=0.7894464 (477.16 it/sec) -training >> step=9177300, episode=1530 reward=0.8161903 (505.38 it/sec) -training >> step=9177400, episode=1530 reward=0.7861778 (506.86 it/sec) -training >> step=9177500, episode=1530 reward=0.7704139 (525.65 it/sec) -training >> step=9177600, episode=1530 reward=0.7685905 (480.69 it/sec) -training >> step=9177700, episode=1530 reward=0.7809548 (477.78 it/sec) -training >> step=9177800, episode=1530 reward=0.782031 (488.46 it/sec) -training >> step=9177900, episode=1530 reward=0.8077809 (486.49 it/sec) -training >> step=9178000, episode=1530 reward=0.7878616 (529.34 it/sec) -training >> step=9178100, episode=1530 reward=0.7863877 (505.24 it/sec) -training >> step=9178200, episode=1530 reward=0.7725726 (488.97 it/sec) -training >> step=9178300, episode=1530 reward=0.8101971 (488.15 it/sec) -training >> step=9178400, episode=1530 reward=0.7655951 (490.20 it/sec) -training >> step=9178500, episode=1530 reward=0.7759023 (514.64 it/sec) -training >> step=9178600, episode=1530 reward=0.7881523 (500.39 it/sec) -training >> step=9178700, episode=1530 reward=0.7855837 (506.42 it/sec) -training >> step=9178800, episode=1530 reward=0.7886746 (530.07 it/sec) -training >> step=9178900, episode=1530 reward=0.800643 (496.10 it/sec) -training >> step=9179000, episode=1530 reward=0.7993259 (456.90 it/sec) -training >> step=9179100, episode=1530 reward=0.7855965 (490.68 it/sec) -training >> step=9179200, episode=1530 reward=0.7860625 (512.67 it/sec) -training >> step=9179300, episode=1531 reward=0.8043571 (91.07 it/sec) -training >> step=9179400, episode=1531 reward=0.7778214 (516.68 it/sec) -training >> step=9179500, episode=1531 reward=0.7877388 (506.08 it/sec) -training >> step=9179600, episode=1531 reward=0.7843894 (517.81 it/sec) -training >> step=9179700, episode=1531 reward=0.7764308 (488.56 it/sec) -training >> step=9179800, episode=1531 reward=0.8039559 (535.16 it/sec) -training >> step=9179900, episode=1531 reward=0.7647735 (437.33 it/sec) -training >> step=9180000, episode=1531 reward=0.7956681 (510.74 it/sec) -training >> step=9180100, episode=1531 reward=0.7951226 (524.27 it/sec) -training >> step=9180200, episode=1531 reward=0.7857204 (469.91 it/sec) -training >> step=9180300, episode=1531 reward=0.8112291 (479.40 it/sec) -training >> step=9180400, episode=1531 reward=0.7837141 (470.09 it/sec) -training >> step=9180500, episode=1531 reward=0.8008747 (477.83 it/sec) -training >> step=9180600, episode=1531 reward=0.7967648 (506.32 it/sec) -training >> step=9180700, episode=1531 reward=0.7913592 (473.28 it/sec) -training >> step=9180800, episode=1531 reward=0.8147731 (488.51 it/sec) -training >> step=9180900, episode=1531 reward=0.8017099 (519.71 it/sec) -training >> step=9181000, episode=1531 reward=0.7933082 (484.15 it/sec) -training >> step=9181100, episode=1531 reward=0.8073922 (490.86 it/sec) -training >> step=9181200, episode=1531 reward=0.8069869 (515.38 it/sec) -training >> step=9181300, episode=1531 reward=0.8155995 (498.88 it/sec) -training >> step=9181400, episode=1531 reward=0.7887394 (491.33 it/sec) -training >> step=9181500, episode=1531 reward=0.7950023 (496.40 it/sec) -training >> step=9181600, episode=1531 reward=0.787582 (549.14 it/sec) -training >> step=9181700, episode=1531 reward=0.7763742 (510.93 it/sec) -training >> step=9181800, episode=1531 reward=0.7967893 (516.77 it/sec) -training >> step=9181900, episode=1531 reward=0.791221 (529.00 it/sec) -training >> step=9182000, episode=1531 reward=0.7817489 (484.81 it/sec) -training >> step=9182100, episode=1531 reward=0.7769355 (435.08 it/sec) -training >> step=9182200, episode=1531 reward=0.7976218 (474.68 it/sec) -training >> step=9182300, episode=1531 reward=0.7939553 (465.43 it/sec) -training >> step=9182400, episode=1531 reward=0.7883291 (443.77 it/sec) -training >> step=9182500, episode=1531 reward=0.8044142 (412.15 it/sec) -training >> step=9182600, episode=1531 reward=0.7989604 (481.02 it/sec) -training >> step=9182700, episode=1531 reward=0.7986815 (485.22 it/sec) -training >> step=9182800, episode=1531 reward=0.8076227 (494.50 it/sec) -training >> step=9182900, episode=1531 reward=0.7936193 (494.81 it/sec) -training >> step=9183000, episode=1531 reward=0.7867541 (453.80 it/sec) -training >> step=9183100, episode=1531 reward=0.7982346 (468.01 it/sec) -training >> step=9183200, episode=1531 reward=0.7817811 (461.00 it/sec) -training >> step=9183300, episode=1531 reward=0.802112 (458.69 it/sec) -training >> step=9183400, episode=1531 reward=0.7975395 (459.17 it/sec) -training >> step=9183500, episode=1531 reward=0.7842976 (474.18 it/sec) -training >> step=9183600, episode=1531 reward=0.7754748 (431.38 it/sec) -training >> step=9183700, episode=1531 reward=0.7732661 (516.45 it/sec) -training >> step=9183800, episode=1531 reward=0.7898983 (462.29 it/sec) -training >> step=9183900, episode=1531 reward=0.7897859 (471.49 it/sec) -training >> step=9184000, episode=1531 reward=0.7893425 (490.50 it/sec) -training >> step=9184100, episode=1531 reward=0.783389 (510.56 it/sec) -training >> step=9184200, episode=1531 reward=0.790613 (423.66 it/sec) -training >> step=9184300, episode=1531 reward=0.808335 (458.85 it/sec) -training >> step=9184400, episode=1531 reward=0.7948315 (476.71 it/sec) -training >> step=9184500, episode=1531 reward=0.8001865 (499.88 it/sec) -training >> step=9184600, episode=1531 reward=0.7944993 (487.35 it/sec) -training >> step=9184700, episode=1531 reward=0.7932515 (438.37 it/sec) -training >> step=9184800, episode=1531 reward=0.7789336 (489.66 it/sec) -training >> step=9184900, episode=1531 reward=0.7990435 (457.16 it/sec) -training >> step=9185000, episode=1531 reward=0.7949361 (493.64 it/sec) -training >> step=9185100, episode=1531 reward=0.7660075 (457.57 it/sec) -training >> step=9185200, episode=1531 reward=0.7891408 (464.80 it/sec) -training >> step=9185300, episode=1532 reward=0.7846022 (46.14 it/sec) -training >> step=9185400, episode=1532 reward=0.7869334 (476.29 it/sec) -training >> step=9185500, episode=1532 reward=0.7875423 (511.53 it/sec) -training >> step=9185600, episode=1532 reward=0.7794559 (439.54 it/sec) -training >> step=9185700, episode=1532 reward=0.8042008 (410.49 it/sec) -training >> step=9185800, episode=1532 reward=0.7902839 (427.31 it/sec) -training >> step=9185900, episode=1532 reward=0.7812313 (424.70 it/sec) -training >> step=9186000, episode=1532 reward=0.7781711 (458.29 it/sec) -training >> step=9186100, episode=1532 reward=0.8067066 (488.62 it/sec) -training >> step=9186200, episode=1532 reward=0.8047823 (508.39 it/sec) -training >> step=9186300, episode=1532 reward=0.8059041 (471.16 it/sec) -training >> step=9186400, episode=1532 reward=0.7855859 (487.02 it/sec) -training >> step=9186500, episode=1532 reward=0.7813787 (509.60 it/sec) -training >> step=9186600, episode=1532 reward=0.798441 (468.31 it/sec) -training >> step=9186700, episode=1532 reward=0.7854471 (450.99 it/sec) -training >> step=9186800, episode=1532 reward=0.8004382 (468.88 it/sec) -training >> step=9186900, episode=1532 reward=0.7847707 (464.77 it/sec) -training >> step=9187000, episode=1532 reward=0.7666775 (487.64 it/sec) -training >> step=9187100, episode=1532 reward=0.7877607 (464.54 it/sec) -training >> step=9187200, episode=1532 reward=0.7951792 (465.28 it/sec) -training >> step=9187300, episode=1532 reward=0.7900308 (474.39 it/sec) -training >> step=9187400, episode=1532 reward=0.785018 (493.25 it/sec) -training >> step=9187500, episode=1532 reward=0.784315 (501.65 it/sec) -training >> step=9187600, episode=1532 reward=0.79633 (498.02 it/sec) -training >> step=9187700, episode=1532 reward=0.7960485 (463.88 it/sec) -training >> step=9187800, episode=1532 reward=0.8022427 (459.57 it/sec) -training >> step=9187900, episode=1532 reward=0.7681304 (502.67 it/sec) -training >> step=9188000, episode=1532 reward=0.8029285 (516.14 it/sec) -training >> step=9188100, episode=1532 reward=0.7812476 (493.83 it/sec) -training >> step=9188200, episode=1532 reward=0.7743769 (479.58 it/sec) -training >> step=9188300, episode=1532 reward=0.7975798 (515.13 it/sec) -training >> step=9188400, episode=1532 reward=0.7909338 (464.52 it/sec) -training >> step=9188500, episode=1532 reward=0.7848048 (466.72 it/sec) -training >> step=9188600, episode=1532 reward=0.7891654 (468.66 it/sec) -training >> step=9188700, episode=1532 reward=0.7851185 (481.10 it/sec) -training >> step=9188800, episode=1532 reward=0.7953548 (464.90 it/sec) -training >> step=9188900, episode=1532 reward=0.7872767 (476.00 it/sec) -training >> step=9189000, episode=1532 reward=0.7976058 (494.78 it/sec) -training >> step=9189100, episode=1532 reward=0.7741888 (460.65 it/sec) -training >> step=9189200, episode=1532 reward=0.8083106 (487.50 it/sec) -training >> step=9189300, episode=1532 reward=0.8071873 (488.56 it/sec) -training >> step=9189400, episode=1532 reward=0.7878593 (457.86 it/sec) -training >> step=9189500, episode=1532 reward=0.7707656 (488.14 it/sec) -training >> step=9189600, episode=1532 reward=0.7836777 (443.05 it/sec) -training >> step=9189700, episode=1532 reward=0.7987323 (499.25 it/sec) -training >> step=9189800, episode=1532 reward=0.7955851 (494.83 it/sec) -training >> step=9189900, episode=1532 reward=0.784139 (449.30 it/sec) -training >> step=9190000, episode=1532 reward=0.7768805 (429.20 it/sec) -training >> step=9190100, episode=1532 reward=0.7877349 (482.86 it/sec) -training >> step=9190200, episode=1532 reward=0.7859505 (452.81 it/sec) -training >> step=9190300, episode=1532 reward=0.7820786 (489.09 it/sec) -training >> step=9190400, episode=1532 reward=0.7933905 (500.41 it/sec) -training >> step=9190500, episode=1532 reward=0.7961961 (489.38 it/sec) -training >> step=9190600, episode=1532 reward=0.7752232 (499.01 it/sec) -training >> step=9190700, episode=1532 reward=0.803825 (495.21 it/sec) -training >> step=9190800, episode=1532 reward=0.7687782 (467.94 it/sec) -training >> step=9190900, episode=1532 reward=0.7766114 (508.49 it/sec) -training >> step=9191000, episode=1532 reward=0.7981485 (481.20 it/sec) -training >> step=9191100, episode=1532 reward=0.8003468 (477.50 it/sec) -training >> step=9191200, episode=1532 reward=0.780986 (480.68 it/sec) -training >> step=9191300, episode=1533 reward=0.7875476 (38.88 it/sec) -training >> step=9191400, episode=1533 reward=0.7906883 (484.27 it/sec) -training >> step=9191500, episode=1533 reward=0.7868106 (480.69 it/sec) -training >> step=9191600, episode=1533 reward=0.7754582 (479.88 it/sec) -training >> step=9191700, episode=1533 reward=0.782434 (478.26 it/sec) -training >> step=9191800, episode=1533 reward=0.7819095 (508.48 it/sec) -training >> step=9191900, episode=1533 reward=0.7992754 (461.58 it/sec) -training >> step=9192000, episode=1533 reward=0.7826213 (515.30 it/sec) -training >> step=9192100, episode=1533 reward=0.7840873 (489.79 it/sec) -training >> step=9192200, episode=1533 reward=0.7782308 (487.18 it/sec) -training >> step=9192300, episode=1533 reward=0.8045725 (504.87 it/sec) -training >> step=9192400, episode=1533 reward=0.7734254 (485.57 it/sec) -training >> step=9192500, episode=1533 reward=0.7876276 (488.82 it/sec) -training >> step=9192600, episode=1533 reward=0.8028 (520.09 it/sec) -training >> step=9192700, episode=1533 reward=0.7983891 (483.41 it/sec) -training >> step=9192800, episode=1533 reward=0.7806175 (519.38 it/sec) -training >> step=9192900, episode=1533 reward=0.7957326 (470.03 it/sec) -training >> step=9193000, episode=1533 reward=0.7794023 (468.35 it/sec) -training >> step=9193100, episode=1533 reward=0.7878691 (461.95 it/sec) -training >> step=9193200, episode=1533 reward=0.7846249 (490.02 it/sec) -training >> step=9193300, episode=1533 reward=0.7879511 (517.47 it/sec) -training >> step=9193400, episode=1533 reward=0.7892111 (492.79 it/sec) -training >> step=9193500, episode=1533 reward=0.7865695 (514.68 it/sec) -training >> step=9193600, episode=1533 reward=0.7982925 (467.43 it/sec) -training >> step=9193700, episode=1533 reward=0.7959139 (439.44 it/sec) -training >> step=9193800, episode=1533 reward=0.7805666 (498.63 it/sec) -training >> step=9193900, episode=1533 reward=0.8149261 (516.65 it/sec) -training >> step=9194000, episode=1533 reward=0.7916438 (510.58 it/sec) -training >> step=9194100, episode=1533 reward=0.7968578 (519.45 it/sec) -training >> step=9194200, episode=1533 reward=0.800885 (520.77 it/sec) -training >> step=9194300, episode=1533 reward=0.7897778 (495.82 it/sec) -training >> step=9194400, episode=1533 reward=0.7808075 (415.96 it/sec) -training >> step=9194500, episode=1533 reward=0.7989503 (492.76 it/sec) -training >> step=9194600, episode=1533 reward=0.795694 (518.11 it/sec) -training >> step=9194700, episode=1533 reward=0.8030692 (489.69 it/sec) -training >> step=9194800, episode=1533 reward=0.7961606 (511.46 it/sec) -training >> step=9194900, episode=1533 reward=0.7914316 (509.32 it/sec) -training >> step=9195000, episode=1533 reward=0.7877707 (484.08 it/sec) -training >> step=9195100, episode=1533 reward=0.7864735 (509.51 it/sec) -training >> step=9195200, episode=1533 reward=0.7935354 (493.84 it/sec) -training >> step=9195300, episode=1533 reward=0.7811647 (531.37 it/sec) -training >> step=9195400, episode=1533 reward=0.7937252 (504.93 it/sec) -training >> step=9195500, episode=1533 reward=0.7997827 (494.02 it/sec) -training >> step=9195600, episode=1533 reward=0.7927026 (526.98 it/sec) -training >> step=9195700, episode=1533 reward=0.7833979 (452.92 it/sec) -training >> step=9195800, episode=1533 reward=0.7891966 (476.96 it/sec) -training >> step=9195900, episode=1533 reward=0.7782503 (425.07 it/sec) -training >> step=9196000, episode=1533 reward=0.7644145 (475.91 it/sec) -training >> step=9196100, episode=1533 reward=0.7988656 (432.63 it/sec) -training >> step=9196200, episode=1533 reward=0.7930388 (464.71 it/sec) -training >> step=9196300, episode=1533 reward=0.7947959 (445.62 it/sec) -training >> step=9196400, episode=1533 reward=0.7940931 (479.76 it/sec) -training >> step=9196500, episode=1533 reward=0.7957035 (474.96 it/sec) -training >> step=9196600, episode=1533 reward=0.7767237 (485.34 it/sec) -training >> step=9196700, episode=1533 reward=0.8077784 (485.57 it/sec) -training >> step=9196800, episode=1533 reward=0.7991571 (500.60 it/sec) -training >> step=9196900, episode=1533 reward=0.799357 (504.64 it/sec) -training >> step=9197000, episode=1533 reward=0.7824413 (479.77 it/sec) -training >> step=9197100, episode=1533 reward=0.7800479 (514.58 it/sec) -training >> step=9197200, episode=1533 reward=0.789174 (464.55 it/sec) -training >> step=9197300, episode=1534 reward=0.7877414 (64.23 it/sec) -training >> step=9197400, episode=1534 reward=0.7831137 (512.57 it/sec) -training >> step=9197500, episode=1534 reward=0.7972034 (477.01 it/sec) -training >> step=9197600, episode=1534 reward=0.7774893 (495.39 it/sec) -training >> step=9197700, episode=1534 reward=0.7769162 (519.34 it/sec) -training >> step=9197800, episode=1534 reward=0.7936682 (449.25 it/sec) -training >> step=9197900, episode=1534 reward=0.7917371 (482.66 it/sec) -training >> step=9198000, episode=1534 reward=0.788213 (470.16 it/sec) -training >> step=9198100, episode=1534 reward=0.7797633 (530.70 it/sec) -training >> step=9198200, episode=1534 reward=0.794475 (448.08 it/sec) -training >> step=9198300, episode=1534 reward=0.7942043 (412.37 it/sec) -training >> step=9198400, episode=1534 reward=0.7811778 (428.50 it/sec) -training >> step=9198500, episode=1534 reward=0.7620518 (390.64 it/sec) -training >> step=9198600, episode=1534 reward=0.7982925 (388.39 it/sec) -training >> step=9198700, episode=1534 reward=0.8064114 (481.96 it/sec) -training >> step=9198800, episode=1534 reward=0.7880177 (470.64 it/sec) -training >> step=9198900, episode=1534 reward=0.8110083 (486.13 it/sec) -training >> step=9199000, episode=1534 reward=0.7730695 (519.01 it/sec) -training >> step=9199100, episode=1534 reward=0.788229 (473.09 it/sec) -training >> step=9199200, episode=1534 reward=0.8059378 (491.20 it/sec) -training >> step=9199300, episode=1534 reward=0.7996589 (503.54 it/sec) -training >> step=9199400, episode=1534 reward=0.7950851 (501.43 it/sec) -training >> step=9199500, episode=1534 reward=0.7856847 (496.56 it/sec) -training >> step=9199600, episode=1534 reward=0.7837397 (466.14 it/sec) -training >> step=9199700, episode=1534 reward=0.786379 (476.89 it/sec) -training >> step=9199800, episode=1534 reward=0.7913224 (498.58 it/sec) -training >> step=9199900, episode=1534 reward=0.7803506 (418.89 it/sec) -training >> step=9200000, episode=1534 reward=0.7884592 (444.16 it/sec) -training >> step=9200100, episode=1534 reward=0.7880229 (457.03 it/sec) -training >> step=9200200, episode=1534 reward=0.7932535 (447.89 it/sec) -training >> step=9200300, episode=1534 reward=0.777342 (452.24 it/sec) -training >> step=9200400, episode=1534 reward=0.7734744 (443.28 it/sec) -training >> step=9200500, episode=1534 reward=0.8102487 (510.00 it/sec) -training >> step=9200600, episode=1534 reward=0.7816671 (459.50 it/sec) -training >> step=9200700, episode=1534 reward=0.7740821 (472.48 it/sec) -training >> step=9200800, episode=1534 reward=0.7731845 (450.06 it/sec) -training >> step=9200900, episode=1534 reward=0.7856848 (441.64 it/sec) -training >> step=9201000, episode=1534 reward=0.7883801 (473.15 it/sec) -training >> step=9201100, episode=1534 reward=0.7896923 (473.48 it/sec) -training >> step=9201200, episode=1534 reward=0.8080719 (434.15 it/sec) -training >> step=9201300, episode=1534 reward=0.8176962 (461.91 it/sec) -training >> step=9201400, episode=1534 reward=0.7929829 (434.81 it/sec) -training >> step=9201500, episode=1534 reward=0.7981188 (508.37 it/sec) -training >> step=9201600, episode=1534 reward=0.8104982 (458.62 it/sec) -training >> step=9201700, episode=1534 reward=0.7901793 (474.93 it/sec) -training >> step=9201800, episode=1534 reward=0.7639609 (436.72 it/sec) -training >> step=9201900, episode=1534 reward=0.7890215 (507.34 it/sec) -training >> step=9202000, episode=1534 reward=0.7980481 (426.44 it/sec) -training >> step=9202100, episode=1534 reward=0.7954215 (437.98 it/sec) -training >> step=9202200, episode=1534 reward=0.7693672 (490.51 it/sec) -training >> step=9202300, episode=1534 reward=0.7903872 (493.55 it/sec) -training >> step=9202400, episode=1534 reward=0.7873349 (481.66 it/sec) -training >> step=9202500, episode=1534 reward=0.7875333 (427.35 it/sec) -training >> step=9202600, episode=1534 reward=0.7794687 (497.67 it/sec) -training >> step=9202700, episode=1534 reward=0.7982662 (470.98 it/sec) -training >> step=9202800, episode=1534 reward=0.789585 (458.36 it/sec) -training >> step=9202900, episode=1534 reward=0.7885575 (479.69 it/sec) -training >> step=9203000, episode=1534 reward=0.7846613 (494.36 it/sec) -training >> step=9203100, episode=1534 reward=0.7968727 (472.16 it/sec) -training >> step=9203200, episode=1534 reward=0.7922754 (455.24 it/sec) -training >> step=9203300, episode=1535 reward=0.7839701 (83.20 it/sec) -training >> step=9203400, episode=1535 reward=0.7835123 (306.70 it/sec) -training >> step=9203500, episode=1535 reward=0.7938097 (463.49 it/sec) -training >> step=9203600, episode=1535 reward=0.7751472 (483.73 it/sec) -training >> step=9203700, episode=1535 reward=0.7781754 (443.95 it/sec) -training >> step=9203800, episode=1535 reward=0.7958087 (471.05 it/sec) -training >> step=9203900, episode=1535 reward=0.7754493 (453.57 it/sec) -training >> step=9204000, episode=1535 reward=0.7996246 (449.05 it/sec) -training >> step=9204100, episode=1535 reward=0.7778493 (438.56 it/sec) -training >> step=9204200, episode=1535 reward=0.8043935 (452.75 it/sec) -training >> step=9204300, episode=1535 reward=0.7920186 (438.85 it/sec) -training >> step=9204400, episode=1535 reward=0.8077713 (461.28 it/sec) -training >> step=9204500, episode=1535 reward=0.793479 (460.21 it/sec) -training >> step=9204600, episode=1535 reward=0.7947579 (470.21 it/sec) -training >> step=9204700, episode=1535 reward=0.791957 (417.74 it/sec) -training >> step=9204800, episode=1535 reward=0.8012186 (458.96 it/sec) -training >> step=9204900, episode=1535 reward=0.8001466 (503.75 it/sec) -training >> step=9205000, episode=1535 reward=0.7819211 (492.55 it/sec) -training >> step=9205100, episode=1535 reward=0.7835833 (424.71 it/sec) -training >> step=9205200, episode=1535 reward=0.7789947 (491.47 it/sec) -training >> step=9205300, episode=1535 reward=0.7893785 (472.68 it/sec) -training >> step=9205400, episode=1535 reward=0.7910997 (466.37 it/sec) -training >> step=9205500, episode=1535 reward=0.7807272 (453.45 it/sec) -training >> step=9205600, episode=1535 reward=0.7962441 (484.83 it/sec) -training >> step=9205700, episode=1535 reward=0.7837419 (454.79 it/sec) -training >> step=9205800, episode=1535 reward=0.8083102 (471.70 it/sec) -training >> step=9205900, episode=1535 reward=0.7857867 (475.30 it/sec) -training >> step=9206000, episode=1535 reward=0.8007689 (497.78 it/sec) -training >> step=9206100, episode=1535 reward=0.7796081 (480.18 it/sec) -training >> step=9206200, episode=1535 reward=0.7896409 (522.03 it/sec) -training >> step=9206300, episode=1535 reward=0.7927107 (440.23 it/sec) -training >> step=9206400, episode=1535 reward=0.7915127 (492.08 it/sec) -training >> step=9206500, episode=1535 reward=0.7951409 (466.60 it/sec) -training >> step=9206600, episode=1535 reward=0.8142706 (460.73 it/sec) -training >> step=9206700, episode=1535 reward=0.7820639 (439.33 it/sec) -training >> step=9206800, episode=1535 reward=0.780969 (454.06 it/sec) -training >> step=9206900, episode=1535 reward=0.792278 (517.37 it/sec) -training >> step=9207000, episode=1535 reward=0.7941236 (523.79 it/sec) -training >> step=9207100, episode=1535 reward=0.7908321 (509.77 it/sec) -training >> step=9207200, episode=1535 reward=0.7937463 (522.82 it/sec) -training >> step=9207300, episode=1535 reward=0.7932221 (499.24 it/sec) -training >> step=9207400, episode=1535 reward=0.8012866 (509.77 it/sec) -training >> step=9207500, episode=1535 reward=0.7802079 (510.36 it/sec) -training >> step=9207600, episode=1535 reward=0.7846186 (508.27 it/sec) -training >> step=9207700, episode=1535 reward=0.7847028 (523.98 it/sec) -training >> step=9207800, episode=1535 reward=0.7828111 (499.10 it/sec) -training >> step=9207900, episode=1535 reward=0.8085878 (500.11 it/sec) -training >> step=9208000, episode=1535 reward=0.7859266 (490.00 it/sec) -training >> step=9208100, episode=1535 reward=0.7844517 (484.06 it/sec) -training >> step=9208200, episode=1535 reward=0.7918421 (528.42 it/sec) -training >> step=9208300, episode=1535 reward=0.8105239 (493.49 it/sec) -training >> step=9208400, episode=1535 reward=0.8049052 (451.51 it/sec) -training >> step=9208500, episode=1535 reward=0.7878355 (513.50 it/sec) -training >> step=9208600, episode=1535 reward=0.7963682 (471.31 it/sec) -training >> step=9208700, episode=1535 reward=0.7874556 (510.06 it/sec) -training >> step=9208800, episode=1535 reward=0.7997751 (496.59 it/sec) -training >> step=9208900, episode=1535 reward=0.8150443 (497.22 it/sec) -training >> step=9209000, episode=1535 reward=0.7994628 (471.28 it/sec) -training >> step=9209100, episode=1535 reward=0.7987397 (455.32 it/sec) -training >> step=9209200, episode=1535 reward=0.7713511 (515.94 it/sec) -training >> step=9209300, episode=1536 reward=0.7841348 (114.35 it/sec) -training >> step=9209400, episode=1536 reward=0.8017752 (377.89 it/sec) -training >> step=9209500, episode=1536 reward=0.7664611 (523.80 it/sec) -training >> step=9209600, episode=1536 reward=0.7757566 (518.39 it/sec) -training >> step=9209700, episode=1536 reward=0.7941401 (518.67 it/sec) -training >> step=9209800, episode=1536 reward=0.7587445 (507.97 it/sec) -training >> step=9209900, episode=1536 reward=0.8042178 (529.76 it/sec) -training >> step=9210000, episode=1536 reward=0.8011287 (503.40 it/sec) -training >> step=9210100, episode=1536 reward=0.8005007 (489.78 it/sec) -training >> step=9210200, episode=1536 reward=0.782084 (464.16 it/sec) -training >> step=9210300, episode=1536 reward=0.7878209 (505.69 it/sec) -training >> step=9210400, episode=1536 reward=0.7929903 (495.33 it/sec) -training >> step=9210500, episode=1536 reward=0.7957535 (493.22 it/sec) -training >> step=9210600, episode=1536 reward=0.7814629 (483.16 it/sec) -training >> step=9210700, episode=1536 reward=0.7780315 (526.60 it/sec) -training >> step=9210800, episode=1536 reward=0.8025164 (502.58 it/sec) -training >> step=9210900, episode=1536 reward=0.7794126 (471.14 it/sec) -training >> step=9211000, episode=1536 reward=0.798268 (512.49 it/sec) -training >> step=9211100, episode=1536 reward=0.7882707 (501.00 it/sec) -training >> step=9211200, episode=1536 reward=0.7969911 (509.18 it/sec) -training >> step=9211300, episode=1536 reward=0.7930588 (501.14 it/sec) -training >> step=9211400, episode=1536 reward=0.7819951 (519.01 it/sec) -training >> step=9211500, episode=1536 reward=0.7827168 (466.62 it/sec) -training >> step=9211600, episode=1536 reward=0.8025395 (444.41 it/sec) -training >> step=9211700, episode=1536 reward=0.825732 (407.82 it/sec) -training >> step=9211800, episode=1536 reward=0.7706204 (393.39 it/sec) -training >> step=9211900, episode=1536 reward=0.7693653 (437.12 it/sec) -training >> step=9212000, episode=1536 reward=0.7836229 (464.89 it/sec) -training >> step=9212100, episode=1536 reward=0.7840274 (524.30 it/sec) -training >> step=9212200, episode=1536 reward=0.7882986 (470.84 it/sec) -training >> step=9212300, episode=1536 reward=0.8038318 (517.60 it/sec) -training >> step=9212400, episode=1536 reward=0.787266 (478.63 it/sec) -training >> step=9212500, episode=1536 reward=0.7988338 (528.73 it/sec) -training >> step=9212600, episode=1536 reward=0.7865091 (513.10 it/sec) -training >> step=9212700, episode=1536 reward=0.7852447 (504.05 it/sec) -training >> step=9212800, episode=1536 reward=0.7763951 (538.87 it/sec) -training >> step=9212900, episode=1536 reward=0.7948685 (509.30 it/sec) -training >> step=9213000, episode=1536 reward=0.8018696 (493.49 it/sec) -training >> step=9213100, episode=1536 reward=0.8055525 (436.19 it/sec) -training >> step=9213200, episode=1536 reward=0.7879741 (497.68 it/sec) -training >> step=9213300, episode=1536 reward=0.7841557 (492.52 it/sec) -training >> step=9213400, episode=1536 reward=0.8022664 (499.86 it/sec) -training >> step=9213500, episode=1536 reward=0.7935134 (443.22 it/sec) -training >> step=9213600, episode=1536 reward=0.8084805 (490.43 it/sec) -training >> step=9213700, episode=1536 reward=0.7927273 (473.83 it/sec) -training >> step=9213800, episode=1536 reward=0.7788315 (501.42 it/sec) -training >> step=9213900, episode=1536 reward=0.7727134 (534.26 it/sec) -training >> step=9214000, episode=1536 reward=0.7877792 (493.24 it/sec) -training >> step=9214100, episode=1536 reward=0.786271 (487.94 it/sec) -training >> step=9214200, episode=1536 reward=0.7930988 (475.26 it/sec) -training >> step=9214300, episode=1536 reward=0.7770759 (554.17 it/sec) -training >> step=9214400, episode=1536 reward=0.7969501 (506.35 it/sec) -training >> step=9214500, episode=1536 reward=0.8013368 (481.01 it/sec) -training >> step=9214600, episode=1536 reward=0.7733815 (501.32 it/sec) -training >> step=9214700, episode=1536 reward=0.7913027 (479.06 it/sec) -training >> step=9214800, episode=1536 reward=0.7830957 (469.76 it/sec) -training >> step=9214900, episode=1536 reward=0.7960211 (535.75 it/sec) -training >> step=9215000, episode=1536 reward=0.8051023 (511.10 it/sec) -training >> step=9215100, episode=1536 reward=0.7834466 (498.53 it/sec) -training >> step=9215200, episode=1536 reward=0.7916546 (469.27 it/sec) -training >> step=9215300, episode=1537 reward=0.7945734 (117.17 it/sec) -training >> step=9215400, episode=1537 reward=0.807004 (452.06 it/sec) -training >> step=9215500, episode=1537 reward=0.8108573 (351.83 it/sec) -training >> step=9215600, episode=1537 reward=0.8002553 (407.87 it/sec) -training >> step=9215700, episode=1537 reward=0.7965561 (456.55 it/sec) -training >> step=9215800, episode=1537 reward=0.7994357 (477.65 it/sec) -training >> step=9215900, episode=1537 reward=0.7872317 (490.44 it/sec) -training >> step=9216000, episode=1537 reward=0.79567 (460.55 it/sec) -training >> step=9216100, episode=1537 reward=0.8064467 (527.55 it/sec) -training >> step=9216200, episode=1537 reward=0.7983339 (493.45 it/sec) -training >> step=9216300, episode=1537 reward=0.8000406 (469.70 it/sec) -training >> step=9216400, episode=1537 reward=0.7945405 (539.02 it/sec) -training >> step=9216500, episode=1537 reward=0.8049217 (510.78 it/sec) -training >> step=9216600, episode=1537 reward=0.7967091 (441.42 it/sec) -training >> step=9216700, episode=1537 reward=0.7972473 (501.73 it/sec) -training >> step=9216800, episode=1537 reward=0.7709287 (514.55 it/sec) -training >> step=9216900, episode=1537 reward=0.7968774 (463.43 it/sec) -training >> step=9217000, episode=1537 reward=0.793854 (512.05 it/sec) -training >> step=9217100, episode=1537 reward=0.8086166 (505.11 it/sec) -training >> step=9217200, episode=1537 reward=0.7874916 (525.94 it/sec) -training >> step=9217300, episode=1537 reward=0.779614 (491.68 it/sec) -training >> step=9217400, episode=1537 reward=0.7939474 (467.11 it/sec) -training >> step=9217500, episode=1537 reward=0.7952619 (488.63 it/sec) -training >> step=9217600, episode=1537 reward=0.8010573 (522.76 it/sec) -training >> step=9217700, episode=1537 reward=0.7893827 (481.82 it/sec) -training >> step=9217800, episode=1537 reward=0.7977921 (445.61 it/sec) -training >> step=9217900, episode=1537 reward=0.7700121 (500.66 it/sec) -training >> step=9218000, episode=1537 reward=0.7911211 (423.35 it/sec) -training >> step=9218100, episode=1537 reward=0.783197 (403.13 it/sec) -training >> step=9218200, episode=1537 reward=0.8168546 (477.77 it/sec) -training >> step=9218300, episode=1537 reward=0.7907806 (498.32 it/sec) -training >> step=9218400, episode=1537 reward=0.7910174 (510.64 it/sec) -training >> step=9218500, episode=1537 reward=0.7921512 (490.81 it/sec) -training >> step=9218600, episode=1537 reward=0.7928538 (514.19 it/sec) -training >> step=9218700, episode=1537 reward=0.8001286 (480.61 it/sec) -training >> step=9218800, episode=1537 reward=0.7946076 (482.74 it/sec) -training >> step=9218900, episode=1537 reward=0.8009813 (510.90 it/sec) -training >> step=9219000, episode=1537 reward=0.790038 (485.91 it/sec) -training >> step=9219100, episode=1537 reward=0.7941032 (482.58 it/sec) -training >> step=9219200, episode=1537 reward=0.7944611 (481.99 it/sec) -training >> step=9219300, episode=1537 reward=0.7872701 (517.17 it/sec) -training >> step=9219400, episode=1537 reward=0.7905198 (477.13 it/sec) -training >> step=9219500, episode=1537 reward=0.7890937 (510.97 it/sec) -training >> step=9219600, episode=1537 reward=0.8112239 (474.59 it/sec) -training >> step=9219700, episode=1537 reward=0.8060023 (511.57 it/sec) -training >> step=9219800, episode=1537 reward=0.7860624 (487.68 it/sec) -training >> step=9219900, episode=1537 reward=0.7996523 (496.68 it/sec) -training >> step=9220000, episode=1537 reward=0.7944225 (476.81 it/sec) -training >> step=9220100, episode=1537 reward=0.806206 (520.86 it/sec) -training >> step=9220200, episode=1537 reward=0.7974327 (476.09 it/sec) -training >> step=9220300, episode=1537 reward=0.76933 (500.76 it/sec) -training >> step=9220400, episode=1537 reward=0.7877846 (518.60 it/sec) -training >> step=9220500, episode=1537 reward=0.8039412 (480.61 it/sec) -training >> step=9220600, episode=1537 reward=0.7928833 (449.07 it/sec) -training >> step=9220700, episode=1537 reward=0.7769188 (498.27 it/sec) -training >> step=9220800, episode=1537 reward=0.7892932 (518.46 it/sec) -training >> step=9220900, episode=1537 reward=0.784864 (486.64 it/sec) -training >> step=9221000, episode=1537 reward=0.7976287 (487.31 it/sec) -training >> step=9221100, episode=1537 reward=0.7757689 (513.10 it/sec) -training >> step=9221200, episode=1537 reward=0.7752655 (495.83 it/sec) -training >> step=9221300, episode=1538 reward=0.7865741 (111.51 it/sec) -training >> step=9221400, episode=1538 reward=0.783591 (366.31 it/sec) -training >> step=9221500, episode=1538 reward=0.7788582 (488.23 it/sec) -training >> step=9221600, episode=1538 reward=0.7801296 (371.59 it/sec) -training >> step=9221700, episode=1538 reward=0.7863426 (533.46 it/sec) -training >> step=9221800, episode=1538 reward=0.8013896 (501.74 it/sec) -training >> step=9221900, episode=1538 reward=0.7832488 (453.32 it/sec) -training >> step=9222000, episode=1538 reward=0.7959479 (506.87 it/sec) -training >> step=9222100, episode=1538 reward=0.7865876 (495.78 it/sec) -training >> step=9222200, episode=1538 reward=0.7919518 (477.92 it/sec) -training >> step=9222300, episode=1538 reward=0.7930667 (496.77 it/sec) -training >> step=9222400, episode=1538 reward=0.7878727 (481.81 it/sec) -training >> step=9222500, episode=1538 reward=0.8058563 (522.18 it/sec) -training >> step=9222600, episode=1538 reward=0.7871127 (474.17 it/sec) -training >> step=9222700, episode=1538 reward=0.7952684 (482.53 it/sec) -training >> step=9222800, episode=1538 reward=0.8000352 (515.51 it/sec) -training >> step=9222900, episode=1538 reward=0.7750551 (483.51 it/sec) -training >> step=9223000, episode=1538 reward=0.7973382 (467.92 it/sec) -training >> step=9223100, episode=1538 reward=0.7886514 (498.55 it/sec) -training >> step=9223200, episode=1538 reward=0.7587509 (536.85 it/sec) -training >> step=9223300, episode=1538 reward=0.7836074 (505.62 it/sec) -training >> step=9223400, episode=1538 reward=0.7929404 (477.24 it/sec) -training >> step=9223500, episode=1538 reward=0.7863767 (516.51 it/sec) -training >> step=9223600, episode=1538 reward=0.7942768 (503.67 it/sec) -training >> step=9223700, episode=1538 reward=0.7961312 (529.45 it/sec) -training >> step=9223800, episode=1538 reward=0.8056667 (500.91 it/sec) -training >> step=9223900, episode=1538 reward=0.787043 (504.99 it/sec) -training >> step=9224000, episode=1538 reward=0.8028959 (486.35 it/sec) -training >> step=9224100, episode=1538 reward=0.7737158 (489.44 it/sec) -training >> step=9224200, episode=1538 reward=0.7747712 (563.89 it/sec) -training >> step=9224300, episode=1538 reward=0.7933187 (510.55 it/sec) -training >> step=9224400, episode=1538 reward=0.7909932 (489.23 it/sec) -training >> step=9224500, episode=1538 reward=0.804469 (491.64 it/sec) -training >> step=9224600, episode=1538 reward=0.7670181 (522.46 it/sec) -training >> step=9224700, episode=1538 reward=0.7785785 (494.71 it/sec) -training >> step=9224800, episode=1538 reward=0.7986165 (531.77 it/sec) -training >> step=9224900, episode=1538 reward=0.796028 (494.30 it/sec) -training >> step=9225000, episode=1538 reward=0.7965713 (518.77 it/sec) -training >> step=9225100, episode=1538 reward=0.7796288 (477.17 it/sec) -training >> step=9225200, episode=1538 reward=0.7815264 (523.62 it/sec) -training >> step=9225300, episode=1538 reward=0.7909229 (532.07 it/sec) -training >> step=9225400, episode=1538 reward=0.7972157 (509.48 it/sec) -training >> step=9225500, episode=1538 reward=0.7979639 (484.36 it/sec) -training >> step=9225600, episode=1538 reward=0.7852771 (420.79 it/sec) -training >> step=9225700, episode=1538 reward=0.8057789 (428.56 it/sec) -training >> step=9225800, episode=1538 reward=0.788189 (429.93 it/sec) -training >> step=9225900, episode=1538 reward=0.7758378 (367.48 it/sec) -training >> step=9226000, episode=1538 reward=0.7953104 (485.96 it/sec) -training >> step=9226100, episode=1538 reward=0.7789257 (470.75 it/sec) -training >> step=9226200, episode=1538 reward=0.7942104 (527.18 it/sec) -training >> step=9226300, episode=1538 reward=0.8172889 (510.12 it/sec) -training >> step=9226400, episode=1538 reward=0.7989652 (525.35 it/sec) -training >> step=9226500, episode=1538 reward=0.8075435 (505.68 it/sec) -training >> step=9226600, episode=1538 reward=0.7787565 (534.89 it/sec) -training >> step=9226700, episode=1538 reward=0.7940928 (470.20 it/sec) -training >> step=9226800, episode=1538 reward=0.7815366 (486.76 it/sec) -training >> step=9226900, episode=1538 reward=0.7940875 (503.55 it/sec) -training >> step=9227000, episode=1538 reward=0.7763836 (483.66 it/sec) -training >> step=9227100, episode=1538 reward=0.7949028 (544.51 it/sec) -training >> step=9227200, episode=1538 reward=0.8034892 (492.93 it/sec) -training >> step=9227300, episode=1539 reward=0.794426 (108.49 it/sec) -training >> step=9227400, episode=1539 reward=0.7842887 (491.68 it/sec) -training >> step=9227500, episode=1539 reward=0.7984783 (508.42 it/sec) -training >> step=9227600, episode=1539 reward=0.7740111 (361.10 it/sec) -training >> step=9227700, episode=1539 reward=0.7933603 (469.72 it/sec) -training >> step=9227800, episode=1539 reward=0.7770122 (505.74 it/sec) -training >> step=9227900, episode=1539 reward=0.7969947 (509.15 it/sec) -training >> step=9228000, episode=1539 reward=0.7744601 (481.08 it/sec) -training >> step=9228100, episode=1539 reward=0.8017918 (506.53 it/sec) -training >> step=9228200, episode=1539 reward=0.7975354 (547.40 it/sec) -training >> step=9228300, episode=1539 reward=0.777267 (517.82 it/sec) -training >> step=9228400, episode=1539 reward=0.8037333 (518.58 it/sec) -training >> step=9228500, episode=1539 reward=0.785774 (498.53 it/sec) -training >> step=9228600, episode=1539 reward=0.8027562 (444.20 it/sec) -training >> step=9228700, episode=1539 reward=0.7664788 (518.80 it/sec) -training >> step=9228800, episode=1539 reward=0.7987315 (469.82 it/sec) -training >> step=9228900, episode=1539 reward=0.7908844 (557.07 it/sec) -training >> step=9229000, episode=1539 reward=0.791303 (515.03 it/sec) -training >> step=9229100, episode=1539 reward=0.8124762 (487.97 it/sec) -training >> step=9229200, episode=1539 reward=0.7955686 (504.26 it/sec) -training >> step=9229300, episode=1539 reward=0.765075 (489.69 it/sec) -training >> step=9229400, episode=1539 reward=0.7790177 (460.20 it/sec) -training >> step=9229500, episode=1539 reward=0.7904646 (441.75 it/sec) -training >> step=9229600, episode=1539 reward=0.805276 (445.67 it/sec) -training >> step=9229700, episode=1539 reward=0.7793565 (460.75 it/sec) -training >> step=9229800, episode=1539 reward=0.7987835 (482.34 it/sec) -training >> step=9229900, episode=1539 reward=0.7938277 (505.77 it/sec) -training >> step=9230000, episode=1539 reward=0.7760972 (495.04 it/sec) -training >> step=9230100, episode=1539 reward=0.8136308 (479.48 it/sec) -training >> step=9230200, episode=1539 reward=0.7826017 (516.28 it/sec) -training >> step=9230300, episode=1539 reward=0.7965175 (502.21 it/sec) -training >> step=9230400, episode=1539 reward=0.7843922 (470.70 it/sec) -training >> step=9230500, episode=1539 reward=0.7880208 (470.09 it/sec) -training >> step=9230600, episode=1539 reward=0.7736701 (449.40 it/sec) -training >> step=9230700, episode=1539 reward=0.7839222 (483.87 it/sec) -training >> step=9230800, episode=1539 reward=0.7998623 (535.63 it/sec) -training >> step=9230900, episode=1539 reward=0.7787663 (521.05 it/sec) -training >> step=9231000, episode=1539 reward=0.7680948 (502.83 it/sec) -training >> step=9231100, episode=1539 reward=0.7943657 (476.61 it/sec) -training >> step=9231200, episode=1539 reward=0.7788529 (501.90 it/sec) -training >> step=9231300, episode=1539 reward=0.7872127 (520.51 it/sec) -training >> step=9231400, episode=1539 reward=0.7910854 (525.32 it/sec) -training >> step=9231500, episode=1539 reward=0.7885514 (507.34 it/sec) -training >> step=9231600, episode=1539 reward=0.7986498 (510.08 it/sec) -training >> step=9231700, episode=1539 reward=0.7879999 (494.99 it/sec) -training >> step=9231800, episode=1539 reward=0.8058801 (512.53 it/sec) -training >> step=9231900, episode=1539 reward=0.803596 (524.88 it/sec) -training >> step=9232000, episode=1539 reward=0.7787677 (520.20 it/sec) -training >> step=9232100, episode=1539 reward=0.7748958 (449.09 it/sec) -training >> step=9232200, episode=1539 reward=0.7975928 (486.60 it/sec) -training >> step=9232300, episode=1539 reward=0.8129776 (482.67 it/sec) -training >> step=9232400, episode=1539 reward=0.7718637 (501.50 it/sec) -training >> step=9232500, episode=1539 reward=0.7892933 (491.87 it/sec) -training >> step=9232600, episode=1539 reward=0.8001506 (431.47 it/sec) -training >> step=9232700, episode=1539 reward=0.7922338 (520.34 it/sec) -training >> step=9232800, episode=1539 reward=0.7945991 (498.88 it/sec) -training >> step=9232900, episode=1539 reward=0.7923713 (538.63 it/sec) -training >> step=9233000, episode=1539 reward=0.8168411 (481.34 it/sec) -training >> step=9233100, episode=1539 reward=0.7857733 (457.59 it/sec) -training >> step=9233200, episode=1539 reward=0.7722594 (473.19 it/sec) -training >> step=9233300, episode=1540 reward=0.7841896 (87.44 it/sec) -training >> step=9233400, episode=1540 reward=0.7949395 (501.03 it/sec) -training >> step=9233500, episode=1540 reward=0.783196 (499.29 it/sec) -training >> step=9233600, episode=1540 reward=0.7898256 (519.70 it/sec) -training >> step=9233700, episode=1540 reward=0.7849003 (406.38 it/sec) -training >> step=9233800, episode=1540 reward=0.8033038 (464.86 it/sec) -training >> step=9233900, episode=1540 reward=0.8105513 (546.35 it/sec) -training >> step=9234000, episode=1540 reward=0.7858532 (479.38 it/sec) -training >> step=9234100, episode=1540 reward=0.7912262 (480.00 it/sec) -training >> step=9234200, episode=1540 reward=0.8125253 (456.93 it/sec) -training >> step=9234300, episode=1540 reward=0.7811169 (487.65 it/sec) -training >> step=9234400, episode=1540 reward=0.7866741 (501.01 it/sec) -training >> step=9234500, episode=1540 reward=0.7777438 (490.38 it/sec) -training >> step=9234600, episode=1540 reward=0.7833962 (518.30 it/sec) -training >> step=9234700, episode=1540 reward=0.787087 (496.31 it/sec) -training >> step=9234800, episode=1540 reward=0.8007989 (507.18 it/sec) -training >> step=9234900, episode=1540 reward=0.7784125 (511.90 it/sec) -training >> step=9235000, episode=1540 reward=0.7986729 (547.51 it/sec) -training >> step=9235100, episode=1540 reward=0.788919 (456.04 it/sec) -training >> step=9235200, episode=1540 reward=0.798748 (458.29 it/sec) -training >> step=9235300, episode=1540 reward=0.8024314 (505.79 it/sec) -training >> step=9235400, episode=1540 reward=0.7781065 (545.44 it/sec) -training >> step=9235500, episode=1540 reward=0.7957228 (474.90 it/sec) -training >> step=9235600, episode=1540 reward=0.7856034 (476.91 it/sec) -training >> step=9235700, episode=1540 reward=0.79159 (499.50 it/sec) -training >> step=9235800, episode=1540 reward=0.7870321 (465.93 it/sec) -training >> step=9235900, episode=1540 reward=0.7877147 (477.49 it/sec) -training >> step=9236000, episode=1540 reward=0.7821734 (522.36 it/sec) -training >> step=9236100, episode=1540 reward=0.7937843 (494.95 it/sec) -training >> step=9236200, episode=1540 reward=0.8092601 (485.55 it/sec) -training >> step=9236300, episode=1540 reward=0.7925742 (461.29 it/sec) -training >> step=9236400, episode=1540 reward=0.7920687 (500.02 it/sec) -training >> step=9236500, episode=1540 reward=0.7723315 (513.93 it/sec) -training >> step=9236600, episode=1540 reward=0.7883615 (462.24 it/sec) -training >> step=9236700, episode=1540 reward=0.7954029 (478.89 it/sec) -training >> step=9236800, episode=1540 reward=0.8004416 (504.82 it/sec) -training >> step=9236900, episode=1540 reward=0.7785575 (484.96 it/sec) -training >> step=9237000, episode=1540 reward=0.8195149 (497.36 it/sec) -training >> step=9237100, episode=1540 reward=0.8006798 (496.20 it/sec) -training >> step=9237200, episode=1540 reward=0.7996109 (487.17 it/sec) -training >> step=9237300, episode=1540 reward=0.8149013 (488.13 it/sec) -training >> step=9237400, episode=1540 reward=0.7936362 (438.54 it/sec) -training >> step=9237500, episode=1540 reward=0.8001295 (486.48 it/sec) -training >> step=9237600, episode=1540 reward=0.8015956 (483.42 it/sec) -training >> step=9237700, episode=1540 reward=0.7862523 (497.44 it/sec) -training >> step=9237800, episode=1540 reward=0.7709548 (509.09 it/sec) -training >> step=9237900, episode=1540 reward=0.8010973 (467.51 it/sec) -training >> step=9238000, episode=1540 reward=0.8029314 (509.17 it/sec) -training >> step=9238100, episode=1540 reward=0.8033274 (502.17 it/sec) -training >> step=9238200, episode=1540 reward=0.7697492 (455.70 it/sec) -training >> step=9238300, episode=1540 reward=0.7999572 (496.77 it/sec) -training >> step=9238400, episode=1540 reward=0.7825929 (472.46 it/sec) -training >> step=9238500, episode=1540 reward=0.7942198 (498.11 it/sec) -training >> step=9238600, episode=1540 reward=0.8041874 (487.35 it/sec) -training >> step=9238700, episode=1540 reward=0.7964972 (383.39 it/sec) -training >> step=9238800, episode=1540 reward=0.7761183 (515.55 it/sec) -training >> step=9238900, episode=1540 reward=0.762808 (504.66 it/sec) -training >> step=9239000, episode=1540 reward=0.7991696 (509.63 it/sec) -training >> step=9239100, episode=1540 reward=0.7993614 (503.30 it/sec) -training >> step=9239200, episode=1540 reward=0.7798902 (508.49 it/sec) -training >> step=9239300, episode=1541 reward=0.7890351 (49.37 it/sec) -training >> step=9239400, episode=1541 reward=0.790497 (478.39 it/sec) -training >> step=9239500, episode=1541 reward=0.7862016 (498.82 it/sec) -training >> step=9239600, episode=1541 reward=0.7843902 (502.50 it/sec) -training >> step=9239700, episode=1541 reward=0.7788905 (494.26 it/sec) -training >> step=9239800, episode=1541 reward=0.7859387 (516.98 it/sec) -training >> step=9239900, episode=1541 reward=0.7892835 (348.02 it/sec) -training >> step=9240000, episode=1541 reward=0.789251 (508.86 it/sec) -training >> step=9240100, episode=1541 reward=0.8114139 (509.13 it/sec) -training >> step=9240200, episode=1541 reward=0.7967431 (489.32 it/sec) -training >> step=9240300, episode=1541 reward=0.7977484 (427.84 it/sec) -training >> step=9240400, episode=1541 reward=0.8028853 (473.56 it/sec) -training >> step=9240500, episode=1541 reward=0.7804004 (449.13 it/sec) -training >> step=9240600, episode=1541 reward=0.8005301 (403.09 it/sec) -training >> step=9240700, episode=1541 reward=0.7889841 (427.00 it/sec) -training >> step=9240800, episode=1541 reward=0.776751 (488.16 it/sec) -training >> step=9240900, episode=1541 reward=0.794991 (479.30 it/sec) -training >> step=9241000, episode=1541 reward=0.801955 (470.39 it/sec) -training >> step=9241100, episode=1541 reward=0.7820889 (508.19 it/sec) -training >> step=9241200, episode=1541 reward=0.8093983 (470.87 it/sec) -training >> step=9241300, episode=1541 reward=0.7813207 (458.52 it/sec) -training >> step=9241400, episode=1541 reward=0.7841267 (500.60 it/sec) -training >> step=9241500, episode=1541 reward=0.792946 (528.73 it/sec) -training >> step=9241600, episode=1541 reward=0.7842302 (490.79 it/sec) -training >> step=9241700, episode=1541 reward=0.8030981 (454.01 it/sec) -training >> step=9241800, episode=1541 reward=0.8117961 (485.66 it/sec) -training >> step=9241900, episode=1541 reward=0.7824963 (505.47 it/sec) -training >> step=9242000, episode=1541 reward=0.7841217 (502.07 it/sec) -training >> step=9242100, episode=1541 reward=0.7746581 (488.90 it/sec) -training >> step=9242200, episode=1541 reward=0.779128 (507.67 it/sec) -training >> step=9242300, episode=1541 reward=0.794493 (477.91 it/sec) -training >> step=9242400, episode=1541 reward=0.7677864 (469.21 it/sec) -training >> step=9242500, episode=1541 reward=0.7921724 (516.34 it/sec) -training >> step=9242600, episode=1541 reward=0.7987881 (458.75 it/sec) -training >> step=9242700, episode=1541 reward=0.7995327 (468.26 it/sec) -training >> step=9242800, episode=1541 reward=0.805877 (469.25 it/sec) -training >> step=9242900, episode=1541 reward=0.7760992 (500.26 it/sec) -training >> step=9243000, episode=1541 reward=0.804586 (486.29 it/sec) -training >> step=9243100, episode=1541 reward=0.7958061 (441.29 it/sec) -training >> step=9243200, episode=1541 reward=0.8008717 (502.36 it/sec) -training >> step=9243300, episode=1541 reward=0.8042862 (516.53 it/sec) -training >> step=9243400, episode=1541 reward=0.7893286 (479.02 it/sec) -training >> step=9243500, episode=1541 reward=0.7838541 (496.44 it/sec) -training >> step=9243600, episode=1541 reward=0.7830935 (485.57 it/sec) -training >> step=9243700, episode=1541 reward=0.8083143 (495.69 it/sec) -training >> step=9243800, episode=1541 reward=0.7978531 (509.16 it/sec) -training >> step=9243900, episode=1541 reward=0.8058306 (456.43 it/sec) -training >> step=9244000, episode=1541 reward=0.7718548 (526.80 it/sec) -training >> step=9244100, episode=1541 reward=0.797838 (443.41 it/sec) -training >> step=9244200, episode=1541 reward=0.8047796 (481.62 it/sec) -training >> step=9244300, episode=1541 reward=0.7828243 (490.85 it/sec) -training >> step=9244400, episode=1541 reward=0.7908692 (472.74 it/sec) -training >> step=9244500, episode=1541 reward=0.7900128 (474.58 it/sec) -training >> step=9244600, episode=1541 reward=0.7937758 (451.95 it/sec) -training >> step=9244700, episode=1541 reward=0.7628824 (500.01 it/sec) -training >> step=9244800, episode=1541 reward=0.7637525 (453.02 it/sec) -training >> step=9244900, episode=1541 reward=0.7981943 (499.71 it/sec) -training >> step=9245000, episode=1541 reward=0.8057807 (500.87 it/sec) -training >> step=9245100, episode=1541 reward=0.7804527 (510.95 it/sec) -training >> step=9245200, episode=1541 reward=0.7842419 (466.46 it/sec) -training >> step=9245300, episode=1542 reward=0.7920602 (121.88 it/sec) -training >> step=9245400, episode=1542 reward=0.7813329 (494.46 it/sec) -training >> step=9245500, episode=1542 reward=0.7792316 (496.38 it/sec) -training >> step=9245600, episode=1542 reward=0.7935553 (492.54 it/sec) -training >> step=9245700, episode=1542 reward=0.8013813 (510.83 it/sec) -training >> step=9245800, episode=1542 reward=0.798371 (482.44 it/sec) -training >> step=9245900, episode=1542 reward=0.7829263 (488.02 it/sec) -training >> step=9246000, episode=1542 reward=0.7950116 (348.13 it/sec) -training >> step=9246100, episode=1542 reward=0.8029954 (472.93 it/sec) -training >> step=9246200, episode=1542 reward=0.8087302 (524.36 it/sec) -training >> step=9246300, episode=1542 reward=0.7976869 (455.34 it/sec) -training >> step=9246400, episode=1542 reward=0.8078655 (482.20 it/sec) -training >> step=9246500, episode=1542 reward=0.7923698 (500.03 it/sec) -training >> step=9246600, episode=1542 reward=0.8113763 (479.55 it/sec) -training >> step=9246700, episode=1542 reward=0.7983867 (484.81 it/sec) -training >> step=9246800, episode=1542 reward=0.7801962 (466.70 it/sec) -training >> step=9246900, episode=1542 reward=0.7899851 (512.63 it/sec) -training >> step=9247000, episode=1542 reward=0.8011959 (469.80 it/sec) -training >> step=9247100, episode=1542 reward=0.7838851 (492.48 it/sec) -training >> step=9247200, episode=1542 reward=0.7791774 (495.18 it/sec) -training >> step=9247300, episode=1542 reward=0.7808414 (500.28 it/sec) -training >> step=9247400, episode=1542 reward=0.7586824 (497.16 it/sec) -training >> step=9247500, episode=1542 reward=0.7725799 (498.55 it/sec) -training >> step=9247600, episode=1542 reward=0.7881521 (507.26 it/sec) -training >> step=9247700, episode=1542 reward=0.789333 (475.59 it/sec) -training >> step=9247800, episode=1542 reward=0.788967 (463.77 it/sec) -training >> step=9247900, episode=1542 reward=0.7874759 (496.44 it/sec) -training >> step=9248000, episode=1542 reward=0.8043997 (517.48 it/sec) -training >> step=9248100, episode=1542 reward=0.7804967 (464.19 it/sec) -training >> step=9248200, episode=1542 reward=0.7906706 (471.60 it/sec) -training >> step=9248300, episode=1542 reward=0.7856495 (495.27 it/sec) -training >> step=9248400, episode=1542 reward=0.8055734 (493.61 it/sec) -training >> step=9248500, episode=1542 reward=0.8138959 (490.34 it/sec) -training >> step=9248600, episode=1542 reward=0.781667 (460.14 it/sec) -training >> step=9248700, episode=1542 reward=0.7852515 (513.81 it/sec) -training >> step=9248800, episode=1542 reward=0.7909804 (463.56 it/sec) -training >> step=9248900, episode=1542 reward=0.7948092 (464.76 it/sec) -training >> step=9249000, episode=1542 reward=0.7863837 (500.69 it/sec) -training >> step=9249100, episode=1542 reward=0.7959839 (510.96 it/sec) -training >> step=9249200, episode=1542 reward=0.7897765 (485.43 it/sec) -training >> step=9249300, episode=1542 reward=0.7902276 (475.55 it/sec) -training >> step=9249400, episode=1542 reward=0.8076212 (486.57 it/sec) -training >> step=9249500, episode=1542 reward=0.7742653 (479.95 it/sec) -training >> step=9249600, episode=1542 reward=0.7837253 (454.81 it/sec) -training >> step=9249700, episode=1542 reward=0.7748446 (497.06 it/sec) -training >> step=9249800, episode=1542 reward=0.7848868 (462.43 it/sec) -training >> step=9249900, episode=1542 reward=0.7992874 (436.58 it/sec) -training >> step=9250000, episode=1542 reward=0.7915319 (439.32 it/sec) -training >> step=9250100, episode=1542 reward=0.8071673 (511.13 it/sec) -training >> step=9250200, episode=1542 reward=0.7651104 (488.44 it/sec) -training >> step=9250300, episode=1542 reward=0.7851042 (494.35 it/sec) -training >> step=9250400, episode=1542 reward=0.7861099 (480.45 it/sec) -training >> step=9250500, episode=1542 reward=0.7850468 (531.65 it/sec) -training >> step=9250600, episode=1542 reward=0.8091238 (487.64 it/sec) -training >> step=9250700, episode=1542 reward=0.7890892 (499.44 it/sec) -training >> step=9250800, episode=1542 reward=0.785053 (493.86 it/sec) -training >> step=9250900, episode=1542 reward=0.7964358 (482.88 it/sec) -training >> step=9251000, episode=1542 reward=0.7896115 (516.96 it/sec) -training >> step=9251100, episode=1542 reward=0.7798856 (506.37 it/sec) -training >> step=9251200, episode=1542 reward=0.7871838 (523.85 it/sec) -training >> step=9251300, episode=1543 reward=0.7892727 (88.17 it/sec) -training >> step=9251400, episode=1543 reward=0.7946562 (459.77 it/sec) -training >> step=9251500, episode=1543 reward=0.773112 (475.11 it/sec) -training >> step=9251600, episode=1543 reward=0.7745308 (497.56 it/sec) -training >> step=9251700, episode=1543 reward=0.7878779 (412.25 it/sec) -training >> step=9251800, episode=1543 reward=0.7888679 (438.28 it/sec) -training >> step=9251900, episode=1543 reward=0.7880836 (480.43 it/sec) -training >> step=9252000, episode=1543 reward=0.8069596 (500.16 it/sec) -training >> step=9252100, episode=1543 reward=0.7811404 (373.34 it/sec) -training >> step=9252200, episode=1543 reward=0.8088196 (448.97 it/sec) -training >> step=9252300, episode=1543 reward=0.7858105 (429.16 it/sec) -training >> step=9252400, episode=1543 reward=0.8014978 (399.82 it/sec) -training >> step=9252500, episode=1543 reward=0.7940921 (451.77 it/sec) -training >> step=9252600, episode=1543 reward=0.7936248 (481.25 it/sec) -training >> step=9252700, episode=1543 reward=0.790935 (477.14 it/sec) -training >> step=9252800, episode=1543 reward=0.7941166 (477.83 it/sec) -training >> step=9252900, episode=1543 reward=0.774183 (498.94 it/sec) -training >> step=9253000, episode=1543 reward=0.7863725 (509.54 it/sec) -training >> step=9253100, episode=1543 reward=0.797872 (496.73 it/sec) -training >> step=9253200, episode=1543 reward=0.7905759 (475.07 it/sec) -training >> step=9253300, episode=1543 reward=0.7760324 (489.59 it/sec) -training >> step=9253400, episode=1543 reward=0.7554337 (454.69 it/sec) -training >> step=9253500, episode=1543 reward=0.8026743 (460.12 it/sec) -training >> step=9253600, episode=1543 reward=0.8028285 (511.81 it/sec) -training >> step=9253700, episode=1543 reward=0.8030538 (498.43 it/sec) -training >> step=9253800, episode=1543 reward=0.800424 (479.53 it/sec) -training >> step=9253900, episode=1543 reward=0.7902393 (474.78 it/sec) -training >> step=9254000, episode=1543 reward=0.7894794 (465.74 it/sec) -training >> step=9254100, episode=1543 reward=0.7953889 (498.46 it/sec) -training >> step=9254200, episode=1543 reward=0.7869583 (484.01 it/sec) -training >> step=9254300, episode=1543 reward=0.787549 (474.82 it/sec) -training >> step=9254400, episode=1543 reward=0.7941763 (446.13 it/sec) -training >> step=9254500, episode=1543 reward=0.8111446 (504.08 it/sec) -training >> step=9254600, episode=1543 reward=0.7877405 (458.31 it/sec) -training >> step=9254700, episode=1543 reward=0.7786216 (492.70 it/sec) -training >> step=9254800, episode=1543 reward=0.7902204 (462.10 it/sec) -training >> step=9254900, episode=1543 reward=0.8028414 (484.64 it/sec) -training >> step=9255000, episode=1543 reward=0.790069 (484.88 it/sec) -training >> step=9255100, episode=1543 reward=0.7933978 (522.81 it/sec) -training >> step=9255200, episode=1543 reward=0.7846286 (456.22 it/sec) -training >> step=9255300, episode=1543 reward=0.788304 (485.63 it/sec) -training >> step=9255400, episode=1543 reward=0.8017657 (500.24 it/sec) -training >> step=9255500, episode=1543 reward=0.7825103 (520.46 it/sec) -training >> step=9255600, episode=1543 reward=0.7868292 (474.69 it/sec) -training >> step=9255700, episode=1543 reward=0.799557 (469.51 it/sec) -training >> step=9255800, episode=1543 reward=0.7911192 (539.25 it/sec) -training >> step=9255900, episode=1543 reward=0.7923495 (459.85 it/sec) -training >> step=9256000, episode=1543 reward=0.7767136 (505.43 it/sec) -training >> step=9256100, episode=1543 reward=0.7825688 (469.81 it/sec) -training >> step=9256200, episode=1543 reward=0.7946734 (514.42 it/sec) -training >> step=9256300, episode=1543 reward=0.7762691 (500.73 it/sec) -training >> step=9256400, episode=1543 reward=0.7694845 (444.44 it/sec) -training >> step=9256500, episode=1543 reward=0.7709257 (499.55 it/sec) -training >> step=9256600, episode=1543 reward=0.7984681 (484.35 it/sec) -training >> step=9256700, episode=1543 reward=0.7915043 (502.20 it/sec) -training >> step=9256800, episode=1543 reward=0.7795567 (513.12 it/sec) -training >> step=9256900, episode=1543 reward=0.7817821 (487.35 it/sec) -training >> step=9257000, episode=1543 reward=0.7892455 (477.28 it/sec) -training >> step=9257100, episode=1543 reward=0.7970976 (500.34 it/sec) -training >> step=9257200, episode=1543 reward=0.8093926 (520.69 it/sec) -training >> step=9257300, episode=1544 reward=0.7858045 (115.90 it/sec) -training >> step=9257400, episode=1544 reward=0.766827 (492.48 it/sec) -training >> step=9257500, episode=1544 reward=0.7837076 (497.69 it/sec) -training >> step=9257600, episode=1544 reward=0.7932655 (445.15 it/sec) -training >> step=9257700, episode=1544 reward=0.7765397 (475.20 it/sec) -training >> step=9257800, episode=1544 reward=0.7973142 (513.84 it/sec) -training >> step=9257900, episode=1544 reward=0.7984636 (495.88 it/sec) -training >> step=9258000, episode=1544 reward=0.7995888 (527.68 it/sec) -training >> step=9258100, episode=1544 reward=0.7999181 (468.39 it/sec) -training >> step=9258200, episode=1544 reward=0.7929822 (507.56 it/sec) -training >> step=9258300, episode=1544 reward=0.7854186 (488.53 it/sec) -training >> step=9258400, episode=1544 reward=0.7898564 (491.87 it/sec) -training >> step=9258500, episode=1544 reward=0.7855976 (377.37 it/sec) -training >> step=9258600, episode=1544 reward=0.7915322 (476.60 it/sec) -training >> step=9258700, episode=1544 reward=0.8016564 (480.24 it/sec) -training >> step=9258800, episode=1544 reward=0.7681402 (478.95 it/sec) -training >> step=9258900, episode=1544 reward=0.7739223 (456.63 it/sec) -training >> step=9259000, episode=1544 reward=0.8128586 (519.93 it/sec) -training >> step=9259100, episode=1544 reward=0.7926688 (468.47 it/sec) -training >> step=9259200, episode=1544 reward=0.7887484 (505.52 it/sec) -training >> step=9259300, episode=1544 reward=0.7807156 (499.50 it/sec) -training >> step=9259400, episode=1544 reward=0.7970123 (472.57 it/sec) -training >> step=9259500, episode=1544 reward=0.7742227 (484.87 it/sec) -training >> step=9259600, episode=1544 reward=0.7953114 (476.97 it/sec) -training >> step=9259700, episode=1544 reward=0.776861 (488.09 it/sec) -training >> step=9259800, episode=1544 reward=0.8012906 (507.53 it/sec) -training >> step=9259900, episode=1544 reward=0.7951388 (491.23 it/sec) -training >> step=9260000, episode=1544 reward=0.7963156 (438.97 it/sec) -training >> step=9260100, episode=1544 reward=0.7731807 (482.07 it/sec) -training >> step=9260200, episode=1544 reward=0.7956051 (497.39 it/sec) -training >> step=9260300, episode=1544 reward=0.8058021 (461.53 it/sec) -training >> step=9260400, episode=1544 reward=0.7782115 (484.52 it/sec) -training >> step=9260500, episode=1544 reward=0.7793834 (505.60 it/sec) -training >> step=9260600, episode=1544 reward=0.7789078 (434.40 it/sec) -training >> step=9260700, episode=1544 reward=0.7890286 (458.92 it/sec) -training >> step=9260800, episode=1544 reward=0.7862473 (513.64 it/sec) -training >> step=9260900, episode=1544 reward=0.7756495 (496.55 it/sec) -training >> step=9261000, episode=1544 reward=0.7982665 (498.12 it/sec) -training >> step=9261100, episode=1544 reward=0.8025387 (425.28 it/sec) -training >> step=9261200, episode=1544 reward=0.7954447 (465.33 it/sec) -training >> step=9261300, episode=1544 reward=0.8032364 (469.80 it/sec) -training >> step=9261400, episode=1544 reward=0.8000693 (467.92 it/sec) -training >> step=9261500, episode=1544 reward=0.7784872 (479.17 it/sec) -training >> step=9261600, episode=1544 reward=0.7910787 (499.84 it/sec) -training >> step=9261700, episode=1544 reward=0.7635309 (504.86 it/sec) -training >> step=9261800, episode=1544 reward=0.7814272 (472.21 it/sec) -training >> step=9261900, episode=1544 reward=0.7998406 (528.31 it/sec) -training >> step=9262000, episode=1544 reward=0.8031753 (506.09 it/sec) -training >> step=9262100, episode=1544 reward=0.7993647 (462.49 it/sec) -training >> step=9262200, episode=1544 reward=0.7950758 (485.69 it/sec) -training >> step=9262300, episode=1544 reward=0.7892256 (440.17 it/sec) -training >> step=9262400, episode=1544 reward=0.8026333 (510.80 it/sec) -training >> step=9262500, episode=1544 reward=0.7937944 (484.56 it/sec) -training >> step=9262600, episode=1544 reward=0.8119659 (448.74 it/sec) -training >> step=9262700, episode=1544 reward=0.7921624 (488.95 it/sec) -training >> step=9262800, episode=1544 reward=0.8045042 (493.18 it/sec) -training >> step=9262900, episode=1544 reward=0.790382 (446.55 it/sec) -training >> step=9263000, episode=1544 reward=0.7662531 (518.81 it/sec) -training >> step=9263100, episode=1544 reward=0.7817311 (464.52 it/sec) -training >> step=9263200, episode=1544 reward=0.7740428 (490.17 it/sec) -training >> step=9263300, episode=1545 reward=0.7890133 (109.32 it/sec) -training >> step=9263400, episode=1545 reward=0.7637316 (487.05 it/sec) -training >> step=9263500, episode=1545 reward=0.7893628 (459.68 it/sec) -training >> step=9263600, episode=1545 reward=0.7800198 (494.90 it/sec) -training >> step=9263700, episode=1545 reward=0.7806776 (478.16 it/sec) -training >> step=9263800, episode=1545 reward=0.7669694 (482.90 it/sec) -training >> step=9263900, episode=1545 reward=0.7653075 (504.57 it/sec) -training >> step=9264000, episode=1545 reward=0.7925342 (514.76 it/sec) -training >> step=9264100, episode=1545 reward=0.7699143 (517.70 it/sec) -training >> step=9264200, episode=1545 reward=0.8015465 (525.93 it/sec) -training >> step=9264300, episode=1545 reward=0.7709814 (439.08 it/sec) -training >> step=9264400, episode=1545 reward=0.7632156 (501.63 it/sec) -training >> step=9264500, episode=1545 reward=0.7901633 (491.89 it/sec) -training >> step=9264600, episode=1545 reward=0.7708809 (463.92 it/sec) -training >> step=9264700, episode=1545 reward=0.7764573 (389.44 it/sec) -training >> step=9264800, episode=1545 reward=0.8014579 (487.84 it/sec) -training >> step=9264900, episode=1545 reward=0.7907193 (454.46 it/sec) -training >> step=9265000, episode=1545 reward=0.784401 (480.43 it/sec) -training >> step=9265100, episode=1545 reward=0.7904773 (510.75 it/sec) -training >> step=9265200, episode=1545 reward=0.7878061 (543.02 it/sec) -training >> step=9265300, episode=1545 reward=0.8108984 (419.40 it/sec) -training >> step=9265400, episode=1545 reward=0.8054339 (457.50 it/sec) -training >> step=9265500, episode=1545 reward=0.7964332 (513.54 it/sec) -training >> step=9265600, episode=1545 reward=0.8013143 (471.92 it/sec) -training >> step=9265700, episode=1545 reward=0.7923581 (481.55 it/sec) -training >> step=9265800, episode=1545 reward=0.7790299 (454.13 it/sec) -training >> step=9265900, episode=1545 reward=0.7771607 (448.42 it/sec) -training >> step=9266000, episode=1545 reward=0.7974671 (421.61 it/sec) -training >> step=9266100, episode=1545 reward=0.7781081 (402.81 it/sec) -training >> step=9266200, episode=1545 reward=0.7658714 (389.82 it/sec) -training >> step=9266300, episode=1545 reward=0.7895628 (406.84 it/sec) -training >> step=9266400, episode=1545 reward=0.7900996 (440.26 it/sec) -training >> step=9266500, episode=1545 reward=0.800792 (481.58 it/sec) -training >> step=9266600, episode=1545 reward=0.7934528 (534.39 it/sec) -training >> step=9266700, episode=1545 reward=0.7742252 (474.49 it/sec) -training >> step=9266800, episode=1545 reward=0.7903202 (453.96 it/sec) -training >> step=9266900, episode=1545 reward=0.7796023 (530.88 it/sec) -training >> step=9267000, episode=1545 reward=0.7954115 (452.18 it/sec) -training >> step=9267100, episode=1545 reward=0.7975342 (490.12 it/sec) -training >> step=9267200, episode=1545 reward=0.7849118 (455.69 it/sec) -training >> step=9267300, episode=1545 reward=0.782476 (486.02 it/sec) -training >> step=9267400, episode=1545 reward=0.7962673 (435.40 it/sec) -training >> step=9267500, episode=1545 reward=0.785916 (505.20 it/sec) -training >> step=9267600, episode=1545 reward=0.7864223 (462.13 it/sec) -training >> step=9267700, episode=1545 reward=0.7700534 (485.43 it/sec) -training >> step=9267800, episode=1545 reward=0.7745661 (505.07 it/sec) -training >> step=9267900, episode=1545 reward=0.7951391 (524.04 it/sec) -training >> step=9268000, episode=1545 reward=0.7860371 (502.02 it/sec) -training >> step=9268100, episode=1545 reward=0.7572783 (462.07 it/sec) -training >> step=9268200, episode=1545 reward=0.765033 (474.51 it/sec) -training >> step=9268300, episode=1545 reward=0.7948934 (496.03 it/sec) -training >> step=9268400, episode=1545 reward=0.7867013 (531.03 it/sec) -training >> step=9268500, episode=1545 reward=0.8079882 (505.25 it/sec) -training >> step=9268600, episode=1545 reward=0.793384 (444.74 it/sec) -training >> step=9268700, episode=1545 reward=0.7898958 (455.84 it/sec) -training >> step=9268800, episode=1545 reward=0.7921717 (499.00 it/sec) -training >> step=9268900, episode=1545 reward=0.7710104 (463.08 it/sec) -training >> step=9269000, episode=1545 reward=0.775851 (508.09 it/sec) -training >> step=9269100, episode=1545 reward=0.7859539 (506.69 it/sec) -training >> step=9269200, episode=1545 reward=0.7873793 (461.67 it/sec) -training >> step=9269300, episode=1546 reward=0.7844568 (141.90 it/sec) -training >> step=9269400, episode=1546 reward=0.8069137 (473.33 it/sec) -training >> step=9269500, episode=1546 reward=0.792219 (452.47 it/sec) -training >> step=9269600, episode=1546 reward=0.8045116 (455.18 it/sec) -training >> step=9269700, episode=1546 reward=0.803812 (508.94 it/sec) -training >> step=9269800, episode=1546 reward=0.8019721 (480.61 it/sec) -training >> step=9269900, episode=1546 reward=0.7749855 (517.38 it/sec) -training >> step=9270000, episode=1546 reward=0.7887006 (438.86 it/sec) -training >> step=9270100, episode=1546 reward=0.7812374 (501.24 it/sec) -training >> step=9270200, episode=1546 reward=0.8007663 (531.66 it/sec) -training >> step=9270300, episode=1546 reward=0.799697 (463.01 it/sec) -training >> step=9270400, episode=1546 reward=0.7865624 (440.90 it/sec) -training >> step=9270500, episode=1546 reward=0.7779725 (450.06 it/sec) -training >> step=9270600, episode=1546 reward=0.7985076 (474.57 it/sec) -training >> step=9270700, episode=1546 reward=0.7926388 (473.79 it/sec) -training >> step=9270800, episode=1546 reward=0.8002348 (500.33 it/sec) -training >> step=9270900, episode=1546 reward=0.7843025 (395.62 it/sec) -training >> step=9271000, episode=1546 reward=0.7861986 (368.23 it/sec) -training >> step=9271100, episode=1546 reward=0.8022935 (483.00 it/sec) -training >> step=9271200, episode=1546 reward=0.7754965 (408.98 it/sec) -training >> step=9271300, episode=1546 reward=0.7693882 (495.07 it/sec) -training >> step=9271400, episode=1546 reward=0.7951221 (426.70 it/sec) -training >> step=9271500, episode=1546 reward=0.7754557 (487.19 it/sec) -training >> step=9271600, episode=1546 reward=0.7841212 (486.85 it/sec) -training >> step=9271700, episode=1546 reward=0.8034795 (500.87 it/sec) -training >> step=9271800, episode=1546 reward=0.7729032 (478.89 it/sec) -training >> step=9271900, episode=1546 reward=0.8045217 (457.21 it/sec) -training >> step=9272000, episode=1546 reward=0.8064096 (543.14 it/sec) -training >> step=9272100, episode=1546 reward=0.7920424 (485.23 it/sec) -training >> step=9272200, episode=1546 reward=0.7744783 (483.20 it/sec) -training >> step=9272300, episode=1546 reward=0.7731812 (496.90 it/sec) -training >> step=9272400, episode=1546 reward=0.7857477 (485.37 it/sec) -training >> step=9272500, episode=1546 reward=0.793378 (451.01 it/sec) -training >> step=9272600, episode=1546 reward=0.8018699 (458.38 it/sec) -training >> step=9272700, episode=1546 reward=0.7860959 (514.79 it/sec) -training >> step=9272800, episode=1546 reward=0.7703192 (450.14 it/sec) -training >> step=9272900, episode=1546 reward=0.783291 (441.03 it/sec) -training >> step=9273000, episode=1546 reward=0.7772695 (490.93 it/sec) -training >> step=9273100, episode=1546 reward=0.789826 (427.51 it/sec) -training >> step=9273200, episode=1546 reward=0.7926378 (527.00 it/sec) -training >> step=9273300, episode=1546 reward=0.7892489 (513.11 it/sec) -training >> step=9273400, episode=1546 reward=0.7965502 (465.43 it/sec) -training >> step=9273500, episode=1546 reward=0.7673587 (517.09 it/sec) -training >> step=9273600, episode=1546 reward=0.7829893 (508.86 it/sec) -training >> step=9273700, episode=1546 reward=0.7968575 (475.04 it/sec) -training >> step=9273800, episode=1546 reward=0.7896655 (488.64 it/sec) -training >> step=9273900, episode=1546 reward=0.7791045 (449.25 it/sec) -training >> step=9274000, episode=1546 reward=0.7870303 (486.59 it/sec) -training >> step=9274100, episode=1546 reward=0.7852855 (535.52 it/sec) -training >> step=9274200, episode=1546 reward=0.7829673 (466.51 it/sec) -training >> step=9274300, episode=1546 reward=0.7771571 (432.92 it/sec) -training >> step=9274400, episode=1546 reward=0.7759449 (434.48 it/sec) -training >> step=9274500, episode=1546 reward=0.7749485 (531.65 it/sec) -training >> step=9274600, episode=1546 reward=0.7815255 (494.35 it/sec) -training >> step=9274700, episode=1546 reward=0.78918 (535.60 it/sec) -training >> step=9274800, episode=1546 reward=0.7986283 (532.58 it/sec) -training >> step=9274900, episode=1546 reward=0.8132479 (580.09 it/sec) -training >> step=9275000, episode=1546 reward=0.8019241 (501.55 it/sec) -training >> step=9275100, episode=1546 reward=0.7856013 (538.85 it/sec) -training >> step=9275200, episode=1546 reward=0.7871621 (531.90 it/sec) -training >> step=9275300, episode=1547 reward=0.7812122 (142.27 it/sec) -training >> step=9275400, episode=1547 reward=0.7972391 (553.42 it/sec) -training >> step=9275500, episode=1547 reward=0.7874517 (523.78 it/sec) -training >> step=9275600, episode=1547 reward=0.788255 (519.67 it/sec) -training >> step=9275700, episode=1547 reward=0.7851224 (545.35 it/sec) -training >> step=9275800, episode=1547 reward=0.7766077 (449.60 it/sec) -training >> step=9275900, episode=1547 reward=0.8150612 (539.11 it/sec) -training >> step=9276000, episode=1547 reward=0.8038517 (556.36 it/sec) -training >> step=9276100, episode=1547 reward=0.7877804 (487.90 it/sec) -training >> step=9276200, episode=1547 reward=0.7898896 (486.51 it/sec) -training >> step=9276300, episode=1547 reward=0.7935621 (448.02 it/sec) -training >> step=9276400, episode=1547 reward=0.7898988 (506.31 it/sec) -training >> step=9276500, episode=1547 reward=0.8043205 (495.39 it/sec) -training >> step=9276600, episode=1547 reward=0.7827799 (511.24 it/sec) -training >> step=9276700, episode=1547 reward=0.7891147 (523.20 it/sec) -training >> step=9276800, episode=1547 reward=0.7877157 (510.90 it/sec) -training >> step=9276900, episode=1547 reward=0.7923077 (510.44 it/sec) -training >> step=9277000, episode=1547 reward=0.7983714 (516.15 it/sec) -training >> step=9277100, episode=1547 reward=0.7857884 (351.03 it/sec) -training >> step=9277200, episode=1547 reward=0.790161 (513.73 it/sec) -training >> step=9277300, episode=1547 reward=0.7855143 (463.12 it/sec) -training >> step=9277400, episode=1547 reward=0.7915714 (533.66 it/sec) -training >> step=9277500, episode=1547 reward=0.7927551 (508.54 it/sec) -training >> step=9277600, episode=1547 reward=0.7853683 (502.01 it/sec) -training >> step=9277700, episode=1547 reward=0.7828487 (526.92 it/sec) -training >> step=9277800, episode=1547 reward=0.803271 (566.09 it/sec) -training >> step=9277900, episode=1547 reward=0.7718778 (479.46 it/sec) -training >> step=9278000, episode=1547 reward=0.8057966 (546.48 it/sec) -training >> step=9278100, episode=1547 reward=0.7940835 (535.11 it/sec) -training >> step=9278200, episode=1547 reward=0.8049824 (537.38 it/sec) -training >> step=9278300, episode=1547 reward=0.7860268 (479.72 it/sec) -training >> step=9278400, episode=1547 reward=0.7965119 (493.28 it/sec) -training >> step=9278500, episode=1547 reward=0.7673582 (506.90 it/sec) -training >> step=9278600, episode=1547 reward=0.7918276 (524.31 it/sec) -training >> step=9278700, episode=1547 reward=0.7913689 (435.18 it/sec) -training >> step=9278800, episode=1547 reward=0.7864051 (548.95 it/sec) -training >> step=9278900, episode=1547 reward=0.7929085 (509.43 it/sec) -training >> step=9279000, episode=1547 reward=0.7760689 (521.97 it/sec) -training >> step=9279100, episode=1547 reward=0.7926376 (518.90 it/sec) -training >> step=9279200, episode=1547 reward=0.7765492 (429.45 it/sec) -training >> step=9279300, episode=1547 reward=0.7888719 (495.10 it/sec) -training >> step=9279400, episode=1547 reward=0.8033441 (501.37 it/sec) -training >> step=9279500, episode=1547 reward=0.7915254 (524.90 it/sec) -training >> step=9279600, episode=1547 reward=0.7755452 (541.50 it/sec) -training >> step=9279700, episode=1547 reward=0.7667132 (450.30 it/sec) -training >> step=9279800, episode=1547 reward=0.759917 (508.18 it/sec) -training >> step=9279900, episode=1547 reward=0.7818555 (431.20 it/sec) -training >> step=9280000, episode=1547 reward=0.7940788 (471.83 it/sec) -training >> step=9280100, episode=1547 reward=0.8092907 (371.56 it/sec) -training >> step=9280200, episode=1547 reward=0.8014638 (389.57 it/sec) -training >> step=9280300, episode=1547 reward=0.7924753 (424.15 it/sec) -training >> step=9280400, episode=1547 reward=0.7901232 (496.18 it/sec) -training >> step=9280500, episode=1547 reward=0.7853227 (457.25 it/sec) -training >> step=9280600, episode=1547 reward=0.781514 (545.57 it/sec) -training >> step=9280700, episode=1547 reward=0.7695341 (519.05 it/sec) -training >> step=9280800, episode=1547 reward=0.7908891 (518.48 it/sec) -training >> step=9280900, episode=1547 reward=0.8069606 (424.58 it/sec) -training >> step=9281000, episode=1547 reward=0.7861489 (483.64 it/sec) -training >> step=9281100, episode=1547 reward=0.7888358 (507.30 it/sec) -training >> step=9281200, episode=1547 reward=0.7920499 (511.12 it/sec) -training >> step=9281300, episode=1548 reward=0.8107687 (127.76 it/sec) -training >> step=9281400, episode=1548 reward=0.7759553 (505.03 it/sec) -training >> step=9281500, episode=1548 reward=0.7736505 (521.11 it/sec) -training >> step=9281600, episode=1548 reward=0.7859263 (501.75 it/sec) -training >> step=9281700, episode=1548 reward=0.8009484 (521.25 it/sec) -training >> step=9281800, episode=1548 reward=0.7987425 (506.15 it/sec) -training >> step=9281900, episode=1548 reward=0.7849903 (513.06 it/sec) -training >> step=9282000, episode=1548 reward=0.804507 (499.88 it/sec) -training >> step=9282100, episode=1548 reward=0.7907507 (518.88 it/sec) -training >> step=9282200, episode=1548 reward=0.8036472 (466.32 it/sec) -training >> step=9282300, episode=1548 reward=0.7992024 (472.96 it/sec) -training >> step=9282400, episode=1548 reward=0.7930065 (505.16 it/sec) -training >> step=9282500, episode=1548 reward=0.7799534 (509.24 it/sec) -training >> step=9282600, episode=1548 reward=0.7982353 (514.42 it/sec) -training >> step=9282700, episode=1548 reward=0.7863774 (504.33 it/sec) -training >> step=9282800, episode=1548 reward=0.7891588 (534.81 it/sec) -training >> step=9282900, episode=1548 reward=0.8089147 (497.23 it/sec) -training >> step=9283000, episode=1548 reward=0.7944664 (498.02 it/sec) -training >> step=9283100, episode=1548 reward=0.7985155 (527.14 it/sec) -training >> step=9283200, episode=1548 reward=0.8059419 (354.72 it/sec) -training >> step=9283300, episode=1548 reward=0.7770779 (514.54 it/sec) -training >> step=9283400, episode=1548 reward=0.7906705 (490.25 it/sec) -training >> step=9283500, episode=1548 reward=0.7929651 (546.35 it/sec) -training >> step=9283600, episode=1548 reward=0.7845517 (494.60 it/sec) -training >> step=9283700, episode=1548 reward=0.784819 (483.57 it/sec) -training >> step=9283800, episode=1548 reward=0.7818568 (502.45 it/sec) -training >> step=9283900, episode=1548 reward=0.7933778 (505.62 it/sec) -training >> step=9284000, episode=1548 reward=0.8085982 (479.36 it/sec) -training >> step=9284100, episode=1548 reward=0.8080244 (487.62 it/sec) -training >> step=9284200, episode=1548 reward=0.8034447 (489.55 it/sec) -training >> step=9284300, episode=1548 reward=0.7857922 (523.29 it/sec) -training >> step=9284400, episode=1548 reward=0.7947781 (497.01 it/sec) -training >> step=9284500, episode=1548 reward=0.7697723 (459.44 it/sec) -training >> step=9284600, episode=1548 reward=0.7913461 (527.32 it/sec) -training >> step=9284700, episode=1548 reward=0.79951 (497.47 it/sec) -training >> step=9284800, episode=1548 reward=0.7936927 (479.48 it/sec) -training >> step=9284900, episode=1548 reward=0.7953343 (459.53 it/sec) -training >> step=9285000, episode=1548 reward=0.7894362 (477.91 it/sec) -training >> step=9285100, episode=1548 reward=0.7960075 (462.97 it/sec) -training >> step=9285200, episode=1548 reward=0.7908341 (440.25 it/sec) -training >> step=9285300, episode=1548 reward=0.8096624 (512.00 it/sec) -training >> step=9285400, episode=1548 reward=0.8100276 (512.01 it/sec) -training >> step=9285500, episode=1548 reward=0.7698851 (475.55 it/sec) -training >> step=9285600, episode=1548 reward=0.7671935 (437.28 it/sec) -training >> step=9285700, episode=1548 reward=0.7833175 (479.33 it/sec) -training >> step=9285800, episode=1548 reward=0.8097821 (465.28 it/sec) -training >> step=9285900, episode=1548 reward=0.7935866 (470.59 it/sec) -training >> step=9286000, episode=1548 reward=0.7923132 (498.52 it/sec) -training >> step=9286100, episode=1548 reward=0.7847272 (508.77 it/sec) -training >> step=9286200, episode=1548 reward=0.7624562 (473.60 it/sec) -training >> step=9286300, episode=1548 reward=0.7819517 (443.11 it/sec) -training >> step=9286400, episode=1548 reward=0.7906347 (449.37 it/sec) -training >> step=9286500, episode=1548 reward=0.7894739 (483.49 it/sec) -training >> step=9286600, episode=1548 reward=0.787828 (450.91 it/sec) -training >> step=9286700, episode=1548 reward=0.7979677 (502.23 it/sec) -training >> step=9286800, episode=1548 reward=0.7854185 (507.08 it/sec) -training >> step=9286900, episode=1548 reward=0.7797493 (464.65 it/sec) -training >> step=9287000, episode=1548 reward=0.7789401 (432.31 it/sec) -training >> step=9287100, episode=1548 reward=0.7974095 (490.84 it/sec) -training >> step=9287200, episode=1548 reward=0.7853751 (529.43 it/sec) -training >> step=9287300, episode=1549 reward=0.8121855 (110.82 it/sec) -training >> step=9287400, episode=1549 reward=0.7897987 (512.25 it/sec) -training >> step=9287500, episode=1549 reward=0.7689786 (507.22 it/sec) -training >> step=9287600, episode=1549 reward=0.7767786 (449.57 it/sec) -training >> step=9287700, episode=1549 reward=0.8014047 (454.70 it/sec) -training >> step=9287800, episode=1549 reward=0.7810739 (436.43 it/sec) -training >> step=9287900, episode=1549 reward=0.8014972 (488.98 it/sec) -training >> step=9288000, episode=1549 reward=0.7973601 (457.53 it/sec) -training >> step=9288100, episode=1549 reward=0.7973841 (454.71 it/sec) -training >> step=9288200, episode=1549 reward=0.776623 (487.71 it/sec) -training >> step=9288300, episode=1549 reward=0.7925121 (504.31 it/sec) -training >> step=9288400, episode=1549 reward=0.7953278 (475.75 it/sec) -training >> step=9288500, episode=1549 reward=0.7889268 (485.71 it/sec) -training >> step=9288600, episode=1549 reward=0.7848491 (484.31 it/sec) -training >> step=9288700, episode=1549 reward=0.7870785 (450.45 it/sec) -training >> step=9288800, episode=1549 reward=0.7947493 (441.46 it/sec) -training >> step=9288900, episode=1549 reward=0.7879403 (503.72 it/sec) -training >> step=9289000, episode=1549 reward=0.7953572 (511.58 it/sec) -training >> step=9289100, episode=1549 reward=0.7785875 (478.29 it/sec) -training >> step=9289200, episode=1549 reward=0.7980031 (414.45 it/sec) -training >> step=9289300, episode=1549 reward=0.8152427 (317.90 it/sec) -training >> step=9289400, episode=1549 reward=0.7867727 (469.25 it/sec) -training >> step=9289500, episode=1549 reward=0.7739283 (513.38 it/sec) -training >> step=9289600, episode=1549 reward=0.7948859 (503.97 it/sec) -training >> step=9289700, episode=1549 reward=0.78429 (510.99 it/sec) -training >> step=9289800, episode=1549 reward=0.8029711 (457.47 it/sec) -training >> step=9289900, episode=1549 reward=0.790593 (487.20 it/sec) -training >> step=9290000, episode=1549 reward=0.7943151 (486.60 it/sec) -training >> step=9290100, episode=1549 reward=0.7969339 (454.80 it/sec) -training >> step=9290200, episode=1549 reward=0.7864339 (488.07 it/sec) -training >> step=9290300, episode=1549 reward=0.7820692 (513.14 it/sec) -training >> step=9290400, episode=1549 reward=0.8155181 (464.61 it/sec) -training >> step=9290500, episode=1549 reward=0.7711524 (518.94 it/sec) -training >> step=9290600, episode=1549 reward=0.800925 (451.84 it/sec) -training >> step=9290700, episode=1549 reward=0.8049356 (468.37 it/sec) -training >> step=9290800, episode=1549 reward=0.7816065 (526.35 it/sec) -training >> step=9290900, episode=1549 reward=0.7988423 (469.16 it/sec) -training >> step=9291000, episode=1549 reward=0.7795466 (488.04 it/sec) -training >> step=9291100, episode=1549 reward=0.7865513 (465.53 it/sec) -training >> step=9291200, episode=1549 reward=0.7889285 (416.87 it/sec) -training >> step=9291300, episode=1549 reward=0.7986997 (483.35 it/sec) -training >> step=9291400, episode=1549 reward=0.7799796 (473.53 it/sec) -training >> step=9291500, episode=1549 reward=0.7737525 (488.79 it/sec) -training >> step=9291600, episode=1549 reward=0.7974113 (480.14 it/sec) -training >> step=9291700, episode=1549 reward=0.7981659 (470.73 it/sec) -training >> step=9291800, episode=1549 reward=0.7847013 (507.49 it/sec) -training >> step=9291900, episode=1549 reward=0.7948994 (500.22 it/sec) -training >> step=9292000, episode=1549 reward=0.7854974 (424.18 it/sec) -training >> step=9292100, episode=1549 reward=0.7922695 (490.58 it/sec) -training >> step=9292200, episode=1549 reward=0.7723488 (448.82 it/sec) -training >> step=9292300, episode=1549 reward=0.778033 (488.01 it/sec) -training >> step=9292400, episode=1549 reward=0.7876743 (485.69 it/sec) -training >> step=9292500, episode=1549 reward=0.7827229 (439.17 it/sec) -training >> step=9292600, episode=1549 reward=0.7853612 (472.66 it/sec) -training >> step=9292700, episode=1549 reward=0.7737309 (474.71 it/sec) -training >> step=9292800, episode=1549 reward=0.7869275 (516.56 it/sec) -training >> step=9292900, episode=1549 reward=0.7819888 (482.74 it/sec) -training >> step=9293000, episode=1549 reward=0.7927604 (451.50 it/sec) -training >> step=9293100, episode=1549 reward=0.7895598 (493.28 it/sec) -training >> step=9293200, episode=1549 reward=0.8091771 (516.54 it/sec) -training >> step=9293300, episode=1550 reward=0.7919063 (125.27 it/sec) -training >> step=9293400, episode=1550 reward=0.7664436 (449.64 it/sec) -training >> step=9293500, episode=1550 reward=0.7709213 (431.18 it/sec) -training >> step=9293600, episode=1550 reward=0.7764123 (370.72 it/sec) -training >> step=9293700, episode=1550 reward=0.8004456 (402.75 it/sec) -training >> step=9293800, episode=1550 reward=0.7828914 (446.33 it/sec) -training >> step=9293900, episode=1550 reward=0.8044761 (501.31 it/sec) -training >> step=9294000, episode=1550 reward=0.8036681 (503.63 it/sec) -training >> step=9294100, episode=1550 reward=0.7984408 (523.63 it/sec) -training >> step=9294200, episode=1550 reward=0.8053292 (482.76 it/sec) -training >> step=9294300, episode=1550 reward=0.7919485 (479.54 it/sec) -training >> step=9294400, episode=1550 reward=0.8195811 (480.65 it/sec) -training >> step=9294500, episode=1550 reward=0.7871658 (490.46 it/sec) -training >> step=9294600, episode=1550 reward=0.7990202 (525.69 it/sec) -training >> step=9294700, episode=1550 reward=0.7936316 (508.53 it/sec) -training >> step=9294800, episode=1550 reward=0.7809235 (502.91 it/sec) -training >> step=9294900, episode=1550 reward=0.805675 (513.79 it/sec) -training >> step=9295000, episode=1550 reward=0.7868134 (528.15 it/sec) -training >> step=9295100, episode=1550 reward=0.7899137 (514.80 it/sec) -training >> step=9295200, episode=1550 reward=0.7616796 (529.87 it/sec) -training >> step=9295300, episode=1550 reward=0.8045964 (491.53 it/sec) -training >> step=9295400, episode=1550 reward=0.8101388 (400.29 it/sec) -training >> step=9295500, episode=1550 reward=0.780469 (466.95 it/sec) -training >> step=9295600, episode=1550 reward=0.7841198 (532.63 it/sec) -training >> step=9295700, episode=1550 reward=0.7919071 (517.81 it/sec) -training >> step=9295800, episode=1550 reward=0.8024661 (511.27 it/sec) -training >> step=9295900, episode=1550 reward=0.7924473 (443.19 it/sec) -training >> step=9296000, episode=1550 reward=0.7940029 (513.78 it/sec) -training >> step=9296100, episode=1550 reward=0.7899812 (503.74 it/sec) -training >> step=9296200, episode=1550 reward=0.7900901 (537.96 it/sec) -training >> step=9296300, episode=1550 reward=0.7922665 (480.36 it/sec) -training >> step=9296400, episode=1550 reward=0.7969807 (521.35 it/sec) -training >> step=9296500, episode=1550 reward=0.8059802 (531.72 it/sec) -training >> step=9296600, episode=1550 reward=0.7841856 (521.18 it/sec) -training >> step=9296700, episode=1550 reward=0.7941176 (523.42 it/sec) -training >> step=9296800, episode=1550 reward=0.7886735 (513.78 it/sec) -training >> step=9296900, episode=1550 reward=0.793081 (534.24 it/sec) -training >> step=9297000, episode=1550 reward=0.7852405 (520.26 it/sec) -training >> step=9297100, episode=1550 reward=0.7927241 (464.51 it/sec) -training >> step=9297200, episode=1550 reward=0.7998481 (517.71 it/sec) -training >> step=9297300, episode=1550 reward=0.8027168 (515.15 it/sec) -training >> step=9297400, episode=1550 reward=0.7913468 (521.39 it/sec) -training >> step=9297500, episode=1550 reward=0.7844185 (527.03 it/sec) -training >> step=9297600, episode=1550 reward=0.791835 (532.87 it/sec) -training >> step=9297700, episode=1550 reward=0.7950541 (462.69 it/sec) -training >> step=9297800, episode=1550 reward=0.8092148 (447.17 it/sec) -training >> step=9297900, episode=1550 reward=0.8028839 (519.49 it/sec) -training >> step=9298000, episode=1550 reward=0.7801058 (511.44 it/sec) -training >> step=9298100, episode=1550 reward=0.7948672 (534.93 it/sec) -training >> step=9298200, episode=1550 reward=0.7896946 (512.06 it/sec) -training >> step=9298300, episode=1550 reward=0.7917591 (497.20 it/sec) -training >> step=9298400, episode=1550 reward=0.7804201 (504.48 it/sec) -training >> step=9298500, episode=1550 reward=0.7874627 (534.40 it/sec) -training >> step=9298600, episode=1550 reward=0.811547 (526.09 it/sec) -training >> step=9298700, episode=1550 reward=0.8054212 (545.55 it/sec) -training >> step=9298800, episode=1550 reward=0.8009115 (489.83 it/sec) -training >> step=9298900, episode=1550 reward=0.787064 (477.42 it/sec) -training >> step=9299000, episode=1550 reward=0.7673988 (517.81 it/sec) -training >> step=9299100, episode=1550 reward=0.7968795 (492.40 it/sec) -training >> step=9299200, episode=1550 reward=0.7902808 (484.37 it/sec) -training >> step=9299300, episode=1551 reward=0.7649691 (134.89 it/sec) -training >> step=9299400, episode=1551 reward=0.7949361 (516.92 it/sec) -training >> step=9299500, episode=1551 reward=0.7783563 (502.67 it/sec) -training >> step=9299600, episode=1551 reward=0.7743006 (487.83 it/sec) -training >> step=9299700, episode=1551 reward=0.7981514 (519.54 it/sec) -training >> step=9299800, episode=1551 reward=0.7762594 (512.09 it/sec) -training >> step=9299900, episode=1551 reward=0.7723235 (474.60 it/sec) -training >> step=9300000, episode=1551 reward=0.7807209 (484.81 it/sec) -training >> step=9300100, episode=1551 reward=0.8028999 (520.70 it/sec) -training >> step=9300200, episode=1551 reward=0.7856398 (539.82 it/sec) -training >> step=9300300, episode=1551 reward=0.7863328 (473.17 it/sec) -training >> step=9300400, episode=1551 reward=0.7990835 (529.32 it/sec) -training >> step=9300500, episode=1551 reward=0.812446 (513.77 it/sec) -training >> step=9300600, episode=1551 reward=0.7911547 (492.68 it/sec) -training >> step=9300700, episode=1551 reward=0.7992528 (478.79 it/sec) -training >> step=9300800, episode=1551 reward=0.814552 (505.59 it/sec) -training >> step=9300900, episode=1551 reward=0.7945021 (519.10 it/sec) -training >> step=9301000, episode=1551 reward=0.7815084 (520.06 it/sec) -training >> step=9301100, episode=1551 reward=0.8064594 (524.49 it/sec) -training >> step=9301200, episode=1551 reward=0.7905144 (539.54 it/sec) -training >> step=9301300, episode=1551 reward=0.7782591 (454.66 it/sec) -training >> step=9301400, episode=1551 reward=0.7926408 (516.38 it/sec) -training >> step=9301500, episode=1551 reward=0.7793723 (346.89 it/sec) -training >> step=9301600, episode=1551 reward=0.8201586 (522.17 it/sec) -training >> step=9301700, episode=1551 reward=0.7949472 (466.11 it/sec) -training >> step=9301800, episode=1551 reward=0.8004598 (511.17 it/sec) -training >> step=9301900, episode=1551 reward=0.7888051 (531.73 it/sec) -training >> step=9302000, episode=1551 reward=0.7761106 (498.34 it/sec) -training >> step=9302100, episode=1551 reward=0.7931592 (505.87 it/sec) -training >> step=9302200, episode=1551 reward=0.7947291 (503.86 it/sec) -training >> step=9302300, episode=1551 reward=0.7894824 (516.70 it/sec) -training >> step=9302400, episode=1551 reward=0.7816408 (535.88 it/sec) -training >> step=9302500, episode=1551 reward=0.7754959 (510.00 it/sec) -training >> step=9302600, episode=1551 reward=0.7845244 (505.41 it/sec) -training >> step=9302700, episode=1551 reward=0.7926401 (510.91 it/sec) -training >> step=9302800, episode=1551 reward=0.8159615 (510.76 it/sec) -training >> step=9302900, episode=1551 reward=0.7929064 (539.39 it/sec) -training >> step=9303000, episode=1551 reward=0.7851623 (517.49 it/sec) -training >> step=9303100, episode=1551 reward=0.79954 (486.73 it/sec) -training >> step=9303200, episode=1551 reward=0.7669901 (477.95 it/sec) -training >> step=9303300, episode=1551 reward=0.7993513 (503.74 it/sec) -training >> step=9303400, episode=1551 reward=0.7951635 (521.19 it/sec) -training >> step=9303500, episode=1551 reward=0.7850116 (462.65 it/sec) -training >> step=9303600, episode=1551 reward=0.8120083 (519.99 it/sec) -training >> step=9303700, episode=1551 reward=0.798524 (505.52 it/sec) -training >> step=9303800, episode=1551 reward=0.7857214 (516.90 it/sec) -training >> step=9303900, episode=1551 reward=0.7687737 (508.04 it/sec) -training >> step=9304000, episode=1551 reward=0.7856987 (510.73 it/sec) -training >> step=9304100, episode=1551 reward=0.7956461 (526.80 it/sec) -training >> step=9304200, episode=1551 reward=0.7797183 (503.06 it/sec) -training >> step=9304300, episode=1551 reward=0.7696292 (523.71 it/sec) -training >> step=9304400, episode=1551 reward=0.7983811 (496.97 it/sec) -training >> step=9304500, episode=1551 reward=0.7678716 (529.27 it/sec) -training >> step=9304600, episode=1551 reward=0.7752645 (520.33 it/sec) -training >> step=9304700, episode=1551 reward=0.7834831 (512.03 it/sec) -training >> step=9304800, episode=1551 reward=0.7912393 (541.45 it/sec) -training >> step=9304900, episode=1551 reward=0.7807461 (502.60 it/sec) -training >> step=9305000, episode=1551 reward=0.7840102 (510.25 it/sec) -training >> step=9305100, episode=1551 reward=0.7802453 (525.91 it/sec) -training >> step=9305200, episode=1551 reward=0.7784504 (519.75 it/sec) -training >> step=9305300, episode=1552 reward=0.7883672 (134.12 it/sec) -training >> step=9305400, episode=1552 reward=0.7919996 (482.99 it/sec) -training >> step=9305500, episode=1552 reward=0.7624462 (478.81 it/sec) -training >> step=9305600, episode=1552 reward=0.7889591 (498.18 it/sec) -training >> step=9305700, episode=1552 reward=0.7976933 (501.96 it/sec) -training >> step=9305800, episode=1552 reward=0.7907615 (523.21 it/sec) -training >> step=9305900, episode=1552 reward=0.7593996 (535.84 it/sec) -training >> step=9306000, episode=1552 reward=0.7954388 (521.55 it/sec) -training >> step=9306100, episode=1552 reward=0.7887127 (508.96 it/sec) -training >> step=9306200, episode=1552 reward=0.7979717 (526.24 it/sec) -training >> step=9306300, episode=1552 reward=0.7886897 (504.06 it/sec) -training >> step=9306400, episode=1552 reward=0.7919183 (512.89 it/sec) -training >> step=9306500, episode=1552 reward=0.7879894 (486.05 it/sec) -training >> step=9306600, episode=1552 reward=0.7914921 (522.32 it/sec) -training >> step=9306700, episode=1552 reward=0.8180749 (510.03 it/sec) -training >> step=9306800, episode=1552 reward=0.7878834 (499.42 it/sec) -training >> step=9306900, episode=1552 reward=0.7774344 (496.16 it/sec) -training >> step=9307000, episode=1552 reward=0.7849119 (526.79 it/sec) -training >> step=9307100, episode=1552 reward=0.7768744 (516.45 it/sec) -training >> step=9307200, episode=1552 reward=0.7886813 (520.99 it/sec) -training >> step=9307300, episode=1552 reward=0.7768618 (550.78 it/sec) -training >> step=9307400, episode=1552 reward=0.7967941 (534.19 it/sec) -training >> step=9307500, episode=1552 reward=0.7970673 (512.82 it/sec) -training >> step=9307600, episode=1552 reward=0.7931441 (503.40 it/sec) -training >> step=9307700, episode=1552 reward=0.8054698 (411.97 it/sec) -training >> step=9307800, episode=1552 reward=0.7841778 (481.22 it/sec) -training >> step=9307900, episode=1552 reward=0.7955056 (438.15 it/sec) -training >> step=9308000, episode=1552 reward=0.7921634 (449.94 it/sec) -training >> step=9308100, episode=1552 reward=0.7905564 (433.43 it/sec) -training >> step=9308200, episode=1552 reward=0.7936106 (408.47 it/sec) -training >> step=9308300, episode=1552 reward=0.8012453 (405.09 it/sec) -training >> step=9308400, episode=1552 reward=0.8036962 (485.57 it/sec) -training >> step=9308500, episode=1552 reward=0.7770796 (497.94 it/sec) -training >> step=9308600, episode=1552 reward=0.8076856 (508.61 it/sec) -training >> step=9308700, episode=1552 reward=0.7768573 (497.00 it/sec) -training >> step=9308800, episode=1552 reward=0.7881554 (547.28 it/sec) -training >> step=9308900, episode=1552 reward=0.7842727 (511.46 it/sec) -training >> step=9309000, episode=1552 reward=0.7869049 (523.46 it/sec) -training >> step=9309100, episode=1552 reward=0.8074725 (524.91 it/sec) -training >> step=9309200, episode=1552 reward=0.7845986 (516.04 it/sec) -training >> step=9309300, episode=1552 reward=0.7972117 (529.78 it/sec) -training >> step=9309400, episode=1552 reward=0.7912107 (515.23 it/sec) -training >> step=9309500, episode=1552 reward=0.789558 (510.02 it/sec) -training >> step=9309600, episode=1552 reward=0.7804379 (506.17 it/sec) -training >> step=9309700, episode=1552 reward=0.8044902 (493.22 it/sec) -training >> step=9309800, episode=1552 reward=0.8045036 (484.86 it/sec) -training >> step=9309900, episode=1552 reward=0.7974254 (484.96 it/sec) -training >> step=9310000, episode=1552 reward=0.8072609 (465.11 it/sec) -training >> step=9310100, episode=1552 reward=0.7901804 (475.53 it/sec) -training >> step=9310200, episode=1552 reward=0.7946561 (531.64 it/sec) -training >> step=9310300, episode=1552 reward=0.7657894 (518.55 it/sec) -training >> step=9310400, episode=1552 reward=0.7905036 (465.24 it/sec) -training >> step=9310500, episode=1552 reward=0.7830268 (478.09 it/sec) -training >> step=9310600, episode=1552 reward=0.7758954 (540.62 it/sec) -training >> step=9310700, episode=1552 reward=0.788537 (487.53 it/sec) -training >> step=9310800, episode=1552 reward=0.7838858 (519.85 it/sec) -training >> step=9310900, episode=1552 reward=0.7952899 (530.95 it/sec) -training >> step=9311000, episode=1552 reward=0.7850746 (516.54 it/sec) -training >> step=9311100, episode=1552 reward=0.7586147 (504.75 it/sec) -training >> step=9311200, episode=1552 reward=0.7591789 (527.00 it/sec) -training >> step=9311300, episode=1553 reward=0.8005705 (128.11 it/sec) -training >> step=9311400, episode=1553 reward=0.7715087 (507.29 it/sec) -training >> step=9311500, episode=1553 reward=0.7936162 (501.28 it/sec) -training >> step=9311600, episode=1553 reward=0.7611861 (522.33 it/sec) -training >> step=9311700, episode=1553 reward=0.8043796 (498.23 it/sec) -training >> step=9311800, episode=1553 reward=0.7725829 (503.07 it/sec) -training >> step=9311900, episode=1553 reward=0.7855743 (507.42 it/sec) -training >> step=9312000, episode=1553 reward=0.8006908 (503.64 it/sec) -training >> step=9312100, episode=1553 reward=0.7870365 (526.67 it/sec) -training >> step=9312200, episode=1553 reward=0.8084244 (512.94 it/sec) -training >> step=9312300, episode=1553 reward=0.7905474 (500.69 it/sec) -training >> step=9312400, episode=1553 reward=0.7943718 (514.84 it/sec) -training >> step=9312500, episode=1553 reward=0.7871819 (463.94 it/sec) -training >> step=9312600, episode=1553 reward=0.8098273 (524.23 it/sec) -training >> step=9312700, episode=1553 reward=0.8040138 (502.17 it/sec) -training >> step=9312800, episode=1553 reward=0.78908 (514.55 it/sec) -training >> step=9312900, episode=1553 reward=0.8106261 (527.96 it/sec) -training >> step=9313000, episode=1553 reward=0.7874004 (473.85 it/sec) -training >> step=9313100, episode=1553 reward=0.8080769 (519.70 it/sec) -training >> step=9313200, episode=1553 reward=0.788931 (499.39 it/sec) -training >> step=9313300, episode=1553 reward=0.7936431 (490.58 it/sec) -training >> step=9313400, episode=1553 reward=0.7844932 (491.38 it/sec) -training >> step=9313500, episode=1553 reward=0.7856055 (558.45 it/sec) -training >> step=9313600, episode=1553 reward=0.8172484 (505.17 it/sec) -training >> step=9313700, episode=1553 reward=0.7853914 (539.62 it/sec) -training >> step=9313800, episode=1553 reward=0.7826981 (356.24 it/sec) -training >> step=9313900, episode=1553 reward=0.786792 (524.26 it/sec) -training >> step=9314000, episode=1553 reward=0.7958826 (513.71 it/sec) -training >> step=9314100, episode=1553 reward=0.8019993 (511.12 it/sec) -training >> step=9314200, episode=1553 reward=0.7757131 (510.99 it/sec) -training >> step=9314300, episode=1553 reward=0.7833229 (509.77 it/sec) -training >> step=9314400, episode=1553 reward=0.8091266 (523.32 it/sec) -training >> step=9314500, episode=1553 reward=0.7761862 (518.67 it/sec) -training >> step=9314600, episode=1553 reward=0.7864475 (488.87 it/sec) -training >> step=9314700, episode=1553 reward=0.789642 (515.32 it/sec) -training >> step=9314800, episode=1553 reward=0.7991077 (504.50 it/sec) -training >> step=9314900, episode=1553 reward=0.8034178 (518.80 it/sec) -training >> step=9315000, episode=1553 reward=0.7900643 (503.92 it/sec) -training >> step=9315100, episode=1553 reward=0.7924638 (441.25 it/sec) -training >> step=9315200, episode=1553 reward=0.8104305 (437.05 it/sec) -training >> step=9315300, episode=1553 reward=0.7965706 (500.11 it/sec) -training >> step=9315400, episode=1553 reward=0.8075466 (518.33 it/sec) -training >> step=9315500, episode=1553 reward=0.8027166 (496.28 it/sec) -training >> step=9315600, episode=1553 reward=0.7895612 (485.53 it/sec) -training >> step=9315700, episode=1553 reward=0.7980781 (565.04 it/sec) -training >> step=9315800, episode=1553 reward=0.7923822 (518.49 it/sec) -training >> step=9315900, episode=1553 reward=0.7770548 (516.66 it/sec) -training >> step=9316000, episode=1553 reward=0.7940573 (531.15 it/sec) -training >> step=9316100, episode=1553 reward=0.7933007 (508.51 it/sec) -training >> step=9316200, episode=1553 reward=0.7950493 (510.11 it/sec) -training >> step=9316300, episode=1553 reward=0.7836366 (514.59 it/sec) -training >> step=9316400, episode=1553 reward=0.8052495 (518.25 it/sec) -training >> step=9316500, episode=1553 reward=0.788646 (520.41 it/sec) -training >> step=9316600, episode=1553 reward=0.7901028 (461.65 it/sec) -training >> step=9316700, episode=1553 reward=0.7899138 (503.00 it/sec) -training >> step=9316800, episode=1553 reward=0.7935088 (565.75 it/sec) -training >> step=9316900, episode=1553 reward=0.7855932 (532.29 it/sec) -training >> step=9317000, episode=1553 reward=0.7997904 (499.00 it/sec) -training >> step=9317100, episode=1553 reward=0.7981085 (542.33 it/sec) -training >> step=9317200, episode=1553 reward=0.7817877 (501.29 it/sec) -training >> step=9317300, episode=1554 reward=0.7873772 (140.20 it/sec) -training >> step=9317400, episode=1554 reward=0.7770786 (518.90 it/sec) -training >> step=9317500, episode=1554 reward=0.7888244 (519.92 it/sec) -training >> step=9317600, episode=1554 reward=0.7910724 (511.51 it/sec) -training >> step=9317700, episode=1554 reward=0.7937982 (512.80 it/sec) -training >> step=9317800, episode=1554 reward=0.7887121 (505.11 it/sec) -training >> step=9317900, episode=1554 reward=0.7953069 (533.92 it/sec) -training >> step=9318000, episode=1554 reward=0.8174563 (496.00 it/sec) -training >> step=9318100, episode=1554 reward=0.7889909 (536.37 it/sec) -training >> step=9318200, episode=1554 reward=0.7730662 (517.42 it/sec) -training >> step=9318300, episode=1554 reward=0.791305 (520.39 it/sec) -training >> step=9318400, episode=1554 reward=0.7912082 (519.54 it/sec) -training >> step=9318500, episode=1554 reward=0.7955686 (498.81 it/sec) -training >> step=9318600, episode=1554 reward=0.7923626 (543.03 it/sec) -training >> step=9318700, episode=1554 reward=0.7859875 (559.25 it/sec) -training >> step=9318800, episode=1554 reward=0.796007 (502.70 it/sec) -training >> step=9318900, episode=1554 reward=0.8116513 (522.82 it/sec) -training >> step=9319000, episode=1554 reward=0.7915624 (539.06 it/sec) -training >> step=9319100, episode=1554 reward=0.7849131 (512.63 it/sec) -training >> step=9319200, episode=1554 reward=0.7956731 (536.26 it/sec) -training >> step=9319300, episode=1554 reward=0.7981093 (491.45 it/sec) -training >> step=9319400, episode=1554 reward=0.8089935 (528.43 it/sec) -training >> step=9319500, episode=1554 reward=0.7814504 (511.29 it/sec) -training >> step=9319600, episode=1554 reward=0.7876121 (504.44 it/sec) -training >> step=9319700, episode=1554 reward=0.7773144 (478.15 it/sec) -training >> step=9319800, episode=1554 reward=0.783481 (549.47 it/sec) -training >> step=9319900, episode=1554 reward=0.7883749 (501.47 it/sec) -training >> step=9320000, episode=1554 reward=0.7907175 (353.40 it/sec) -training >> step=9320100, episode=1554 reward=0.7815686 (529.90 it/sec) -training >> step=9320200, episode=1554 reward=0.7662209 (523.08 it/sec) -training >> step=9320300, episode=1554 reward=0.7908552 (475.17 it/sec) -training >> step=9320400, episode=1554 reward=0.7905405 (515.41 it/sec) -training >> step=9320500, episode=1554 reward=0.8011374 (516.08 it/sec) -training >> step=9320600, episode=1554 reward=0.8069078 (498.65 it/sec) -training >> step=9320700, episode=1554 reward=0.7883643 (526.90 it/sec) -training >> step=9320800, episode=1554 reward=0.7867737 (521.12 it/sec) -training >> step=9320900, episode=1554 reward=0.7892444 (522.66 it/sec) -training >> step=9321000, episode=1554 reward=0.771798 (524.27 it/sec) -training >> step=9321100, episode=1554 reward=0.7963542 (524.23 it/sec) -training >> step=9321200, episode=1554 reward=0.781264 (526.70 it/sec) -training >> step=9321300, episode=1554 reward=0.7983759 (542.02 it/sec) -training >> step=9321400, episode=1554 reward=0.7920802 (493.79 it/sec) -training >> step=9321500, episode=1554 reward=0.7864044 (544.56 it/sec) -training >> step=9321600, episode=1554 reward=0.8037946 (547.94 it/sec) -training >> step=9321700, episode=1554 reward=0.7832021 (508.58 it/sec) -training >> step=9321800, episode=1554 reward=0.7901687 (523.95 it/sec) -training >> step=9321900, episode=1554 reward=0.776011 (534.65 it/sec) -training >> step=9322000, episode=1554 reward=0.7950912 (510.67 it/sec) -training >> step=9322100, episode=1554 reward=0.7837698 (497.91 it/sec) -training >> step=9322200, episode=1554 reward=0.8104935 (453.36 it/sec) -training >> step=9322300, episode=1554 reward=0.7987208 (470.85 it/sec) -training >> step=9322400, episode=1554 reward=0.7767109 (526.09 it/sec) -training >> step=9322500, episode=1554 reward=0.7834789 (437.45 it/sec) -training >> step=9322600, episode=1554 reward=0.7863815 (428.56 it/sec) -training >> step=9322700, episode=1554 reward=0.8071708 (441.56 it/sec) -training >> step=9322800, episode=1554 reward=0.8081591 (409.72 it/sec) -training >> step=9322900, episode=1554 reward=0.7879568 (468.03 it/sec) -training >> step=9323000, episode=1554 reward=0.7782747 (531.88 it/sec) -training >> step=9323100, episode=1554 reward=0.7615411 (532.19 it/sec) -training >> step=9323200, episode=1554 reward=0.8007158 (515.12 it/sec) -training >> step=9323300, episode=1555 reward=0.7962546 (143.00 it/sec) -training >> step=9323400, episode=1555 reward=0.7858514 (515.11 it/sec) -training >> step=9323500, episode=1555 reward=0.7742599 (488.93 it/sec) -training >> step=9323600, episode=1555 reward=0.7769405 (512.23 it/sec) -training >> step=9323700, episode=1555 reward=0.78864 (512.06 it/sec) -training >> step=9323800, episode=1555 reward=0.8019543 (527.84 it/sec) -training >> step=9323900, episode=1555 reward=0.7924469 (511.43 it/sec) -training >> step=9324000, episode=1555 reward=0.7900886 (502.93 it/sec) -training >> step=9324100, episode=1555 reward=0.8047103 (548.99 it/sec) -training >> step=9324200, episode=1555 reward=0.7841923 (508.15 it/sec) -training >> step=9324300, episode=1555 reward=0.7909165 (523.73 it/sec) -training >> step=9324400, episode=1555 reward=0.7683799 (522.61 it/sec) -training >> step=9324500, episode=1555 reward=0.8124514 (528.67 it/sec) -training >> step=9324600, episode=1555 reward=0.7941502 (480.67 it/sec) -training >> step=9324700, episode=1555 reward=0.7996259 (516.97 it/sec) -training >> step=9324800, episode=1555 reward=0.7997378 (478.63 it/sec) -training >> step=9324900, episode=1555 reward=0.8023102 (491.93 it/sec) -training >> step=9325000, episode=1555 reward=0.7840742 (473.27 it/sec) -training >> step=9325100, episode=1555 reward=0.7897043 (509.42 it/sec) -training >> step=9325200, episode=1555 reward=0.7910644 (529.96 it/sec) -training >> step=9325300, episode=1555 reward=0.8021137 (502.54 it/sec) -training >> step=9325400, episode=1555 reward=0.7923021 (526.90 it/sec) -training >> step=9325500, episode=1555 reward=0.7989638 (492.38 it/sec) -training >> step=9325600, episode=1555 reward=0.7717779 (524.00 it/sec) -training >> step=9325700, episode=1555 reward=0.7903841 (518.45 it/sec) -training >> step=9325800, episode=1555 reward=0.7933102 (526.25 it/sec) -training >> step=9325900, episode=1555 reward=0.7971222 (515.66 it/sec) -training >> step=9326000, episode=1555 reward=0.8038846 (540.17 it/sec) -training >> step=9326100, episode=1555 reward=0.7931423 (356.44 it/sec) -training >> step=9326200, episode=1555 reward=0.7971624 (502.91 it/sec) -training >> step=9326300, episode=1555 reward=0.7990307 (457.99 it/sec) -training >> step=9326400, episode=1555 reward=0.792366 (450.02 it/sec) -training >> step=9326500, episode=1555 reward=0.7875654 (456.89 it/sec) -training >> step=9326600, episode=1555 reward=0.8089221 (523.72 it/sec) -training >> step=9326700, episode=1555 reward=0.7923155 (532.02 it/sec) -training >> step=9326800, episode=1555 reward=0.7917927 (504.88 it/sec) -training >> step=9326900, episode=1555 reward=0.780748 (523.03 it/sec) -training >> step=9327000, episode=1555 reward=0.776757 (503.60 it/sec) -training >> step=9327100, episode=1555 reward=0.7965866 (512.42 it/sec) -training >> step=9327200, episode=1555 reward=0.8025686 (539.90 it/sec) -training >> step=9327300, episode=1555 reward=0.7792194 (511.59 it/sec) -training >> step=9327400, episode=1555 reward=0.7862595 (526.51 it/sec) -training >> step=9327500, episode=1555 reward=0.7916534 (534.18 it/sec) -training >> step=9327600, episode=1555 reward=0.7782046 (493.56 it/sec) -training >> step=9327700, episode=1555 reward=0.7696874 (527.85 it/sec) -training >> step=9327800, episode=1555 reward=0.7982231 (536.25 it/sec) -training >> step=9327900, episode=1555 reward=0.7931884 (491.99 it/sec) -training >> step=9328000, episode=1555 reward=0.7775828 (500.44 it/sec) -training >> step=9328100, episode=1555 reward=0.7936847 (504.10 it/sec) -training >> step=9328200, episode=1555 reward=0.7807552 (512.89 it/sec) -training >> step=9328300, episode=1555 reward=0.7802948 (546.51 it/sec) -training >> step=9328400, episode=1555 reward=0.7985988 (499.22 it/sec) -training >> step=9328500, episode=1555 reward=0.7808967 (517.73 it/sec) -training >> step=9328600, episode=1555 reward=0.7942068 (510.32 it/sec) -training >> step=9328700, episode=1555 reward=0.8126705 (515.59 it/sec) -training >> step=9328800, episode=1555 reward=0.7879878 (533.54 it/sec) -training >> step=9328900, episode=1555 reward=0.769282 (491.03 it/sec) -training >> step=9329000, episode=1555 reward=0.80259 (468.45 it/sec) -training >> step=9329100, episode=1555 reward=0.7863502 (506.42 it/sec) -training >> step=9329200, episode=1555 reward=0.7820049 (517.35 it/sec) -training >> step=9329300, episode=1556 reward=0.7787948 (152.35 it/sec) -training >> step=9329400, episode=1556 reward=0.778486 (472.19 it/sec) -training >> step=9329500, episode=1556 reward=0.7851384 (529.72 it/sec) -training >> step=9329600, episode=1556 reward=0.77611 (548.75 it/sec) -training >> step=9329700, episode=1556 reward=0.7919288 (472.65 it/sec) -training >> step=9329800, episode=1556 reward=0.7712291 (501.04 it/sec) -training >> step=9329900, episode=1556 reward=0.8011853 (500.94 it/sec) -training >> step=9330000, episode=1556 reward=0.7795047 (515.80 it/sec) -training >> step=9330100, episode=1556 reward=0.8054575 (535.84 it/sec) -training >> step=9330200, episode=1556 reward=0.7844195 (509.96 it/sec) -training >> step=9330300, episode=1556 reward=0.8062086 (524.21 it/sec) -training >> step=9330400, episode=1556 reward=0.8087674 (447.45 it/sec) -training >> step=9330500, episode=1556 reward=0.7868243 (497.03 it/sec) -training >> step=9330600, episode=1556 reward=0.7965847 (566.79 it/sec) -training >> step=9330700, episode=1556 reward=0.7760096 (524.31 it/sec) -training >> step=9330800, episode=1556 reward=0.784939 (481.86 it/sec) -training >> step=9330900, episode=1556 reward=0.7932909 (498.48 it/sec) -training >> step=9331000, episode=1556 reward=0.7974696 (517.85 it/sec) -training >> step=9331100, episode=1556 reward=0.7774364 (488.62 it/sec) -training >> step=9331200, episode=1556 reward=0.8165271 (548.81 it/sec) -training >> step=9331300, episode=1556 reward=0.8028156 (502.22 it/sec) -training >> step=9331400, episode=1556 reward=0.8071917 (515.26 it/sec) -training >> step=9331500, episode=1556 reward=0.7888254 (511.81 it/sec) -training >> step=9331600, episode=1556 reward=0.7938609 (480.16 it/sec) -training >> step=9331700, episode=1556 reward=0.78413 (545.04 it/sec) -training >> step=9331800, episode=1556 reward=0.774928 (550.62 it/sec) -training >> step=9331900, episode=1556 reward=0.7865704 (484.16 it/sec) -training >> step=9332000, episode=1556 reward=0.7910899 (511.52 it/sec) -training >> step=9332100, episode=1556 reward=0.7810516 (536.65 it/sec) -training >> step=9332200, episode=1556 reward=0.7591327 (457.64 it/sec) -training >> step=9332300, episode=1556 reward=0.7984349 (388.74 it/sec) -training >> step=9332400, episode=1556 reward=0.7886064 (494.77 it/sec) -training >> step=9332500, episode=1556 reward=0.7622221 (518.30 it/sec) -training >> step=9332600, episode=1556 reward=0.7978194 (526.52 it/sec) -training >> step=9332700, episode=1556 reward=0.7605715 (511.21 it/sec) -training >> step=9332800, episode=1556 reward=0.776567 (550.89 it/sec) -training >> step=9332900, episode=1556 reward=0.8040727 (511.64 it/sec) -training >> step=9333000, episode=1556 reward=0.7821972 (505.53 it/sec) -training >> step=9333100, episode=1556 reward=0.784803 (516.06 it/sec) -training >> step=9333200, episode=1556 reward=0.7693614 (550.05 it/sec) -training >> step=9333300, episode=1556 reward=0.7933147 (521.59 it/sec) -training >> step=9333400, episode=1556 reward=0.7834363 (498.15 it/sec) -training >> step=9333500, episode=1556 reward=0.8015856 (512.43 it/sec) -training >> step=9333600, episode=1556 reward=0.7932506 (512.41 it/sec) -training >> step=9333700, episode=1556 reward=0.7763208 (510.14 it/sec) -training >> step=9333800, episode=1556 reward=0.789394 (496.08 it/sec) -training >> step=9333900, episode=1556 reward=0.7946467 (493.84 it/sec) -training >> step=9334000, episode=1556 reward=0.7858419 (425.85 it/sec) -training >> step=9334100, episode=1556 reward=0.7742599 (495.09 it/sec) -training >> step=9334200, episode=1556 reward=0.7888541 (534.23 it/sec) -training >> step=9334300, episode=1556 reward=0.8071275 (543.22 it/sec) -training >> step=9334400, episode=1556 reward=0.8118031 (505.41 it/sec) -training >> step=9334500, episode=1556 reward=0.79678 (440.24 it/sec) -training >> step=9334600, episode=1556 reward=0.7994376 (521.06 it/sec) -training >> step=9334700, episode=1556 reward=0.7737442 (501.06 it/sec) -training >> step=9334800, episode=1556 reward=0.8006631 (461.81 it/sec) -training >> step=9334900, episode=1556 reward=0.7630283 (523.73 it/sec) -training >> step=9335000, episode=1556 reward=0.7833494 (491.70 it/sec) -training >> step=9335100, episode=1556 reward=0.7681854 (535.30 it/sec) -training >> step=9335200, episode=1556 reward=0.7801179 (501.22 it/sec) -training >> step=9335300, episode=1557 reward=0.800753 (133.86 it/sec) -training >> step=9335400, episode=1557 reward=0.7967168 (507.59 it/sec) -training >> step=9335500, episode=1557 reward=0.7820843 (518.40 it/sec) -training >> step=9335600, episode=1557 reward=0.7842408 (513.56 it/sec) -training >> step=9335700, episode=1557 reward=0.7751505 (510.39 it/sec) -training >> step=9335800, episode=1557 reward=0.8039063 (515.63 it/sec) -training >> step=9335900, episode=1557 reward=0.784501 (509.77 it/sec) -training >> step=9336000, episode=1557 reward=0.7999409 (515.06 it/sec) -training >> step=9336100, episode=1557 reward=0.7972105 (507.88 it/sec) -training >> step=9336200, episode=1557 reward=0.8011736 (500.37 it/sec) -training >> step=9336300, episode=1557 reward=0.778369 (486.63 it/sec) -training >> step=9336400, episode=1557 reward=0.7813339 (511.61 it/sec) -training >> step=9336500, episode=1557 reward=0.7926437 (516.12 it/sec) -training >> step=9336600, episode=1557 reward=0.7931076 (530.33 it/sec) -training >> step=9336700, episode=1557 reward=0.8022619 (539.07 it/sec) -training >> step=9336800, episode=1557 reward=0.8086483 (417.31 it/sec) -training >> step=9336900, episode=1557 reward=0.8100038 (459.80 it/sec) -training >> step=9337000, episode=1557 reward=0.8020945 (439.93 it/sec) -training >> step=9337100, episode=1557 reward=0.7998065 (436.79 it/sec) -training >> step=9337200, episode=1557 reward=0.8027453 (432.61 it/sec) -training >> step=9337300, episode=1557 reward=0.80094 (506.52 it/sec) -training >> step=9337400, episode=1557 reward=0.7838208 (480.35 it/sec) -training >> step=9337500, episode=1557 reward=0.7983468 (505.07 it/sec) -training >> step=9337600, episode=1557 reward=0.8030049 (501.69 it/sec) -training >> step=9337700, episode=1557 reward=0.7862412 (504.53 it/sec) -training >> step=9337800, episode=1557 reward=0.784922 (504.21 it/sec) -training >> step=9337900, episode=1557 reward=0.8004903 (506.20 it/sec) -training >> step=9338000, episode=1557 reward=0.7865219 (549.36 it/sec) -training >> step=9338100, episode=1557 reward=0.7811583 (515.22 it/sec) -training >> step=9338200, episode=1557 reward=0.7943352 (490.49 it/sec) -training >> step=9338300, episode=1557 reward=0.7822347 (373.30 it/sec) -training >> step=9338400, episode=1557 reward=0.7794833 (520.22 it/sec) -training >> step=9338500, episode=1557 reward=0.7791751 (506.91 it/sec) -training >> step=9338600, episode=1557 reward=0.7725379 (525.62 it/sec) -training >> step=9338700, episode=1557 reward=0.7838933 (513.81 it/sec) -training >> step=9338800, episode=1557 reward=0.7934693 (503.42 it/sec) -training >> step=9338900, episode=1557 reward=0.7959777 (484.63 it/sec) -training >> step=9339000, episode=1557 reward=0.7788906 (496.11 it/sec) -training >> step=9339100, episode=1557 reward=0.7713331 (571.11 it/sec) -training >> step=9339200, episode=1557 reward=0.7857714 (532.35 it/sec) -training >> step=9339300, episode=1557 reward=0.7941924 (479.78 it/sec) -training >> step=9339400, episode=1557 reward=0.7963332 (522.42 it/sec) -training >> step=9339500, episode=1557 reward=0.7868468 (462.24 it/sec) -training >> step=9339600, episode=1557 reward=0.7737584 (488.40 it/sec) -training >> step=9339700, episode=1557 reward=0.7775929 (527.54 it/sec) -training >> step=9339800, episode=1557 reward=0.7878905 (489.24 it/sec) -training >> step=9339900, episode=1557 reward=0.7950062 (534.15 it/sec) -training >> step=9340000, episode=1557 reward=0.7774609 (477.12 it/sec) -training >> step=9340100, episode=1557 reward=0.7938408 (504.83 it/sec) -training >> step=9340200, episode=1557 reward=0.8096414 (522.50 it/sec) -training >> step=9340300, episode=1557 reward=0.8046426 (436.56 it/sec) -training >> step=9340400, episode=1557 reward=0.7845072 (508.49 it/sec) -training >> step=9340500, episode=1557 reward=0.7809976 (518.79 it/sec) -training >> step=9340600, episode=1557 reward=0.7894114 (546.95 it/sec) -training >> step=9340700, episode=1557 reward=0.7657151 (520.23 it/sec) -training >> step=9340800, episode=1557 reward=0.7793308 (489.42 it/sec) -training >> step=9340900, episode=1557 reward=0.7824705 (499.73 it/sec) -training >> step=9341000, episode=1557 reward=0.7975391 (510.60 it/sec) -training >> step=9341100, episode=1557 reward=0.7812201 (467.23 it/sec) -training >> step=9341200, episode=1557 reward=0.7789068 (509.62 it/sec) -training >> step=9341300, episode=1558 reward=0.7919656 (139.36 it/sec) -training >> step=9341400, episode=1558 reward=0.766822 (544.73 it/sec) -training >> step=9341500, episode=1558 reward=0.7807124 (509.75 it/sec) -training >> step=9341600, episode=1558 reward=0.7968815 (494.51 it/sec) -training >> step=9341700, episode=1558 reward=0.8003265 (536.79 it/sec) -training >> step=9341800, episode=1558 reward=0.7796882 (513.78 it/sec) -training >> step=9341900, episode=1558 reward=0.7795211 (508.04 it/sec) -training >> step=9342000, episode=1558 reward=0.8149164 (501.85 it/sec) -training >> step=9342100, episode=1558 reward=0.7779409 (479.60 it/sec) -training >> step=9342200, episode=1558 reward=0.7816747 (519.12 it/sec) -training >> step=9342300, episode=1558 reward=0.8085634 (501.99 it/sec) -training >> step=9342400, episode=1558 reward=0.7842209 (551.09 it/sec) -training >> step=9342500, episode=1558 reward=0.8002803 (520.46 it/sec) -training >> step=9342600, episode=1558 reward=0.8003425 (526.84 it/sec) -training >> step=9342700, episode=1558 reward=0.7762142 (511.15 it/sec) -training >> step=9342800, episode=1558 reward=0.7843876 (568.03 it/sec) -training >> step=9342900, episode=1558 reward=0.7833105 (530.06 it/sec) -training >> step=9343000, episode=1558 reward=0.7897807 (500.43 it/sec) -training >> step=9343100, episode=1558 reward=0.7964691 (560.63 it/sec) -training >> step=9343200, episode=1558 reward=0.7865409 (482.94 it/sec) -training >> step=9343300, episode=1558 reward=0.8005336 (538.70 it/sec) -training >> step=9343400, episode=1558 reward=0.7925283 (493.62 it/sec) -training >> step=9343500, episode=1558 reward=0.7889066 (544.01 it/sec) -training >> step=9343600, episode=1558 reward=0.7885666 (494.94 it/sec) -training >> step=9343700, episode=1558 reward=0.7919345 (459.75 it/sec) -training >> step=9343800, episode=1558 reward=0.8000795 (519.92 it/sec) -training >> step=9343900, episode=1558 reward=0.7862625 (521.14 it/sec) -training >> step=9344000, episode=1558 reward=0.794095 (533.81 it/sec) -training >> step=9344100, episode=1558 reward=0.7850475 (510.01 it/sec) -training >> step=9344200, episode=1558 reward=0.7922245 (496.92 it/sec) -training >> step=9344300, episode=1558 reward=0.7864643 (472.27 it/sec) -training >> step=9344400, episode=1558 reward=0.7734121 (378.29 it/sec) -training >> step=9344500, episode=1558 reward=0.7941133 (533.25 it/sec) -training >> step=9344600, episode=1558 reward=0.7866254 (540.30 it/sec) -training >> step=9344700, episode=1558 reward=0.7667599 (520.84 it/sec) -training >> step=9344800, episode=1558 reward=0.770537 (482.52 it/sec) -training >> step=9344900, episode=1558 reward=0.7982884 (522.96 it/sec) -training >> step=9345000, episode=1558 reward=0.7951171 (534.22 it/sec) -training >> step=9345100, episode=1558 reward=0.7986618 (516.16 it/sec) -training >> step=9345200, episode=1558 reward=0.7796772 (537.17 it/sec) -training >> step=9345300, episode=1558 reward=0.8054734 (487.98 it/sec) -training >> step=9345400, episode=1558 reward=0.7950302 (564.84 it/sec) -training >> step=9345500, episode=1558 reward=0.7951682 (494.15 it/sec) -training >> step=9345600, episode=1558 reward=0.7788475 (527.75 it/sec) -training >> step=9345700, episode=1558 reward=0.7977045 (515.29 it/sec) -training >> step=9345800, episode=1558 reward=0.7819363 (468.93 it/sec) -training >> step=9345900, episode=1558 reward=0.7756441 (497.82 it/sec) -training >> step=9346000, episode=1558 reward=0.7980539 (451.36 it/sec) -training >> step=9346100, episode=1558 reward=0.7844738 (474.29 it/sec) -training >> step=9346200, episode=1558 reward=0.7995177 (462.77 it/sec) -training >> step=9346300, episode=1558 reward=0.777785 (475.42 it/sec) -training >> step=9346400, episode=1558 reward=0.77793 (574.52 it/sec) -training >> step=9346500, episode=1558 reward=0.7918361 (507.88 it/sec) -training >> step=9346600, episode=1558 reward=0.7962453 (480.67 it/sec) -training >> step=9346700, episode=1558 reward=0.8009233 (427.18 it/sec) -training >> step=9346800, episode=1558 reward=0.7852794 (444.63 it/sec) -training >> step=9346900, episode=1558 reward=0.7857729 (477.80 it/sec) -training >> step=9347000, episode=1558 reward=0.7873486 (448.15 it/sec) -training >> step=9347100, episode=1558 reward=0.7663962 (504.74 it/sec) -training >> step=9347200, episode=1558 reward=0.8061887 (452.10 it/sec) -training >> step=9347300, episode=1559 reward=0.7871072 (122.65 it/sec) -training >> step=9347400, episode=1559 reward=0.7748345 (447.87 it/sec) -training >> step=9347500, episode=1559 reward=0.7848563 (488.80 it/sec) -training >> step=9347600, episode=1559 reward=0.7801029 (469.17 it/sec) -training >> step=9347700, episode=1559 reward=0.7812555 (409.58 it/sec) -training >> step=9347800, episode=1559 reward=0.7859247 (472.36 it/sec) -training >> step=9347900, episode=1559 reward=0.7798978 (454.63 it/sec) -training >> step=9348000, episode=1559 reward=0.8080051 (394.50 it/sec) -training >> step=9348100, episode=1559 reward=0.8039318 (402.64 it/sec) -training >> step=9348200, episode=1559 reward=0.7747847 (407.19 it/sec) -training >> step=9348300, episode=1559 reward=0.7957274 (418.49 it/sec) -training >> step=9348400, episode=1559 reward=0.7808247 (438.05 it/sec) -training >> step=9348500, episode=1559 reward=0.8033291 (519.25 it/sec) -training >> step=9348600, episode=1559 reward=0.8107097 (491.99 it/sec) -training >> step=9348700, episode=1559 reward=0.774746 (472.34 it/sec) -training >> step=9348800, episode=1559 reward=0.7839304 (383.50 it/sec) -training >> step=9348900, episode=1559 reward=0.7880115 (461.08 it/sec) -training >> step=9349000, episode=1559 reward=0.7932751 (472.94 it/sec) -training >> step=9349100, episode=1559 reward=0.7839551 (500.63 it/sec) -training >> step=9349200, episode=1559 reward=0.7946057 (462.06 it/sec) -training >> step=9349300, episode=1559 reward=0.7734114 (487.70 it/sec) -training >> step=9349400, episode=1559 reward=0.7780541 (436.09 it/sec) -training >> step=9349500, episode=1559 reward=0.7925182 (505.58 it/sec) -training >> step=9349600, episode=1559 reward=0.7967605 (528.19 it/sec) -training >> step=9349700, episode=1559 reward=0.788509 (465.98 it/sec) -training >> step=9349800, episode=1559 reward=0.7914689 (498.34 it/sec) -training >> step=9349900, episode=1559 reward=0.7780666 (466.09 it/sec) -training >> step=9350000, episode=1559 reward=0.7786851 (496.54 it/sec) -training >> step=9350100, episode=1559 reward=0.7940214 (518.67 it/sec) -training >> step=9350200, episode=1559 reward=0.7902896 (472.53 it/sec) -training >> step=9350300, episode=1559 reward=0.7774192 (464.12 it/sec) -training >> step=9350400, episode=1559 reward=0.7906808 (493.38 it/sec) -training >> step=9350500, episode=1559 reward=0.7884154 (464.11 it/sec) -training >> step=9350600, episode=1559 reward=0.7896719 (373.64 it/sec) -training >> step=9350700, episode=1559 reward=0.7838337 (458.48 it/sec) -training >> step=9350800, episode=1559 reward=0.792213 (508.43 it/sec) -training >> step=9350900, episode=1559 reward=0.7955969 (445.78 it/sec) -training >> step=9351000, episode=1559 reward=0.791067 (382.15 it/sec) -training >> step=9351100, episode=1559 reward=0.7902654 (428.47 it/sec) -training >> step=9351200, episode=1559 reward=0.7858477 (395.91 it/sec) -training >> step=9351300, episode=1559 reward=0.7952359 (457.64 it/sec) -training >> step=9351400, episode=1559 reward=0.8004217 (455.94 it/sec) -training >> step=9351500, episode=1559 reward=0.7820565 (519.46 it/sec) -training >> step=9351600, episode=1559 reward=0.8201827 (476.47 it/sec) -training >> step=9351700, episode=1559 reward=0.7910604 (499.41 it/sec) -training >> step=9351800, episode=1559 reward=0.7764378 (478.05 it/sec) -training >> step=9351900, episode=1559 reward=0.7700862 (464.51 it/sec) -training >> step=9352000, episode=1559 reward=0.7886606 (479.11 it/sec) -training >> step=9352100, episode=1559 reward=0.7890514 (488.99 it/sec) -training >> step=9352200, episode=1559 reward=0.7958826 (475.02 it/sec) -training >> step=9352300, episode=1559 reward=0.7790835 (488.54 it/sec) -training >> step=9352400, episode=1559 reward=0.7929397 (458.67 it/sec) -training >> step=9352500, episode=1559 reward=0.7904905 (474.06 it/sec) -training >> step=9352600, episode=1559 reward=0.7897811 (515.13 it/sec) -training >> step=9352700, episode=1559 reward=0.7656381 (463.65 it/sec) -training >> step=9352800, episode=1559 reward=0.7882814 (479.51 it/sec) -training >> step=9352900, episode=1559 reward=0.7860456 (506.69 it/sec) -training >> step=9353000, episode=1559 reward=0.7917171 (530.22 it/sec) -training >> step=9353100, episode=1559 reward=0.770753 (494.48 it/sec) -training >> step=9353200, episode=1559 reward=0.7965987 (457.71 it/sec) -training >> step=9353300, episode=1560 reward=0.7711341 (67.82 it/sec) -training >> step=9353400, episode=1560 reward=0.7854922 (500.69 it/sec) -training >> step=9353500, episode=1560 reward=0.8089077 (478.96 it/sec) -training >> step=9353600, episode=1560 reward=0.7840926 (479.29 it/sec) -training >> step=9353700, episode=1560 reward=0.7959173 (483.38 it/sec) -training >> step=9353800, episode=1560 reward=0.7786224 (507.75 it/sec) -training >> step=9353900, episode=1560 reward=0.7869498 (440.76 it/sec) -training >> step=9354000, episode=1560 reward=0.8032264 (483.74 it/sec) -training >> step=9354100, episode=1560 reward=0.7891218 (468.85 it/sec) -training >> step=9354200, episode=1560 reward=0.7950646 (451.55 it/sec) -training >> step=9354300, episode=1560 reward=0.7916086 (463.58 it/sec) -training >> step=9354400, episode=1560 reward=0.8016744 (471.51 it/sec) -training >> step=9354500, episode=1560 reward=0.8012851 (488.94 it/sec) -training >> step=9354600, episode=1560 reward=0.7991959 (492.46 it/sec) -training >> step=9354700, episode=1560 reward=0.8168405 (473.32 it/sec) -training >> step=9354800, episode=1560 reward=0.7840528 (503.12 it/sec) -training >> step=9354900, episode=1560 reward=0.80323 (542.18 it/sec) -training >> step=9355000, episode=1560 reward=0.8108931 (500.43 it/sec) -training >> step=9355100, episode=1560 reward=0.7880093 (462.75 it/sec) -training >> step=9355200, episode=1560 reward=0.7784207 (521.83 it/sec) -training >> step=9355300, episode=1560 reward=0.7913194 (491.12 it/sec) -training >> step=9355400, episode=1560 reward=0.8089631 (491.53 it/sec) -training >> step=9355500, episode=1560 reward=0.7988031 (502.99 it/sec) -training >> step=9355600, episode=1560 reward=0.8002546 (475.19 it/sec) -training >> step=9355700, episode=1560 reward=0.7962298 (479.52 it/sec) -training >> step=9355800, episode=1560 reward=0.7916173 (463.83 it/sec) -training >> step=9355900, episode=1560 reward=0.7701251 (422.72 it/sec) -training >> step=9356000, episode=1560 reward=0.7851421 (502.54 it/sec) -training >> step=9356100, episode=1560 reward=0.7781603 (485.33 it/sec) -training >> step=9356200, episode=1560 reward=0.7802919 (476.11 it/sec) -training >> step=9356300, episode=1560 reward=0.7701277 (532.07 it/sec) -training >> step=9356400, episode=1560 reward=0.7790701 (474.68 it/sec) -training >> step=9356500, episode=1560 reward=0.7942907 (468.84 it/sec) -training >> step=9356600, episode=1560 reward=0.8020386 (348.85 it/sec) -training >> step=9356700, episode=1560 reward=0.7898704 (522.70 it/sec) -training >> step=9356800, episode=1560 reward=0.7980935 (441.14 it/sec) -training >> step=9356900, episode=1560 reward=0.801728 (441.20 it/sec) -training >> step=9357000, episode=1560 reward=0.7791358 (456.13 it/sec) -training >> step=9357100, episode=1560 reward=0.7790332 (486.31 it/sec) -training >> step=9357200, episode=1560 reward=0.7828261 (436.66 it/sec) -training >> step=9357300, episode=1560 reward=0.7897468 (471.92 it/sec) -training >> step=9357400, episode=1560 reward=0.8047843 (481.40 it/sec) -training >> step=9357500, episode=1560 reward=0.7733254 (453.00 it/sec) -training >> step=9357600, episode=1560 reward=0.8040361 (445.44 it/sec) -training >> step=9357700, episode=1560 reward=0.7951558 (424.03 it/sec) -training >> step=9357800, episode=1560 reward=0.7692397 (472.09 it/sec) -training >> step=9357900, episode=1560 reward=0.8076612 (465.89 it/sec) -training >> step=9358000, episode=1560 reward=0.7813159 (426.42 it/sec) -training >> step=9358100, episode=1560 reward=0.7798479 (444.81 it/sec) -training >> step=9358200, episode=1560 reward=0.800507 (416.93 it/sec) -training >> step=9358300, episode=1560 reward=0.7823198 (449.37 it/sec) -training >> step=9358400, episode=1560 reward=0.8076729 (405.59 it/sec) -training >> step=9358500, episode=1560 reward=0.7704816 (519.84 it/sec) -training >> step=9358600, episode=1560 reward=0.7862781 (455.14 it/sec) -training >> step=9358700, episode=1560 reward=0.8060595 (458.29 it/sec) -training >> step=9358800, episode=1560 reward=0.8034155 (455.54 it/sec) -training >> step=9358900, episode=1560 reward=0.7707579 (456.75 it/sec) -training >> step=9359000, episode=1560 reward=0.786239 (421.54 it/sec) -training >> step=9359100, episode=1560 reward=0.7972716 (488.59 it/sec) -training >> step=9359200, episode=1560 reward=0.7643084 (457.87 it/sec) -training >> step=9359300, episode=1561 reward=0.7868056 (70.66 it/sec) -training >> step=9359400, episode=1561 reward=0.7862417 (482.87 it/sec) -training >> step=9359500, episode=1561 reward=0.7916341 (475.29 it/sec) -training >> step=9359600, episode=1561 reward=0.785428 (465.99 it/sec) -training >> step=9359700, episode=1561 reward=0.7921016 (481.62 it/sec) -training >> step=9359800, episode=1561 reward=0.795361 (466.84 it/sec) -training >> step=9359900, episode=1561 reward=0.7932423 (494.19 it/sec) -training >> step=9360000, episode=1561 reward=0.7973521 (444.51 it/sec) -training >> step=9360100, episode=1561 reward=0.7858287 (419.46 it/sec) -training >> step=9360200, episode=1561 reward=0.7884299 (449.38 it/sec) -training >> step=9360300, episode=1561 reward=0.7905121 (464.10 it/sec) -training >> step=9360400, episode=1561 reward=0.8047222 (448.98 it/sec) -training >> step=9360500, episode=1561 reward=0.8001583 (465.27 it/sec) -training >> step=9360600, episode=1561 reward=0.7942265 (462.52 it/sec) -training >> step=9360700, episode=1561 reward=0.7907982 (436.40 it/sec) -training >> step=9360800, episode=1561 reward=0.7922432 (436.56 it/sec) -training >> step=9360900, episode=1561 reward=0.7965404 (499.54 it/sec) -training >> step=9361000, episode=1561 reward=0.8110774 (481.81 it/sec) -training >> step=9361100, episode=1561 reward=0.7866577 (446.20 it/sec) -training >> step=9361200, episode=1561 reward=0.7748109 (485.77 it/sec) -training >> step=9361300, episode=1561 reward=0.7994136 (471.50 it/sec) -training >> step=9361400, episode=1561 reward=0.8148996 (502.75 it/sec) -training >> step=9361500, episode=1561 reward=0.7736124 (460.54 it/sec) -training >> step=9361600, episode=1561 reward=0.8008785 (499.61 it/sec) -training >> step=9361700, episode=1561 reward=0.7939511 (482.20 it/sec) -training >> step=9361800, episode=1561 reward=0.7971395 (454.31 it/sec) -training >> step=9361900, episode=1561 reward=0.8020853 (469.57 it/sec) -training >> step=9362000, episode=1561 reward=0.7976823 (484.67 it/sec) -training >> step=9362100, episode=1561 reward=0.7926468 (476.54 it/sec) -training >> step=9362200, episode=1561 reward=0.7799576 (486.12 it/sec) -training >> step=9362300, episode=1561 reward=0.7828696 (475.19 it/sec) -training >> step=9362400, episode=1561 reward=0.7841483 (495.91 it/sec) -training >> step=9362500, episode=1561 reward=0.7895173 (440.40 it/sec) -training >> step=9362600, episode=1561 reward=0.790508 (485.34 it/sec) -training >> step=9362700, episode=1561 reward=0.7901093 (483.13 it/sec) -training >> step=9362800, episode=1561 reward=0.789248 (351.36 it/sec) -training >> step=9362900, episode=1561 reward=0.7975521 (500.70 it/sec) -training >> step=9363000, episode=1561 reward=0.7902232 (500.71 it/sec) -training >> step=9363100, episode=1561 reward=0.8112807 (480.68 it/sec) -training >> step=9363200, episode=1561 reward=0.7755339 (468.20 it/sec) -training >> step=9363300, episode=1561 reward=0.7799054 (504.22 it/sec) -training >> step=9363400, episode=1561 reward=0.7993495 (470.95 it/sec) -training >> step=9363500, episode=1561 reward=0.7770928 (464.00 it/sec) -training >> step=9363600, episode=1561 reward=0.78456 (398.04 it/sec) -training >> step=9363700, episode=1561 reward=0.7838176 (472.48 it/sec) -training >> step=9363800, episode=1561 reward=0.7859876 (400.96 it/sec) -training >> step=9363900, episode=1561 reward=0.7871375 (386.50 it/sec) -training >> step=9364000, episode=1561 reward=0.7890218 (396.13 it/sec) -training >> step=9364100, episode=1561 reward=0.8034988 (437.05 it/sec) -training >> step=9364200, episode=1561 reward=0.7998871 (460.81 it/sec) -training >> step=9364300, episode=1561 reward=0.784788 (472.82 it/sec) -training >> step=9364400, episode=1561 reward=0.7894398 (487.93 it/sec) -training >> step=9364500, episode=1561 reward=0.7855956 (489.33 it/sec) -training >> step=9364600, episode=1561 reward=0.8063506 (492.46 it/sec) -training >> step=9364700, episode=1561 reward=0.7988681 (468.70 it/sec) -training >> step=9364800, episode=1561 reward=0.7990317 (500.10 it/sec) -training >> step=9364900, episode=1561 reward=0.7618796 (471.74 it/sec) -training >> step=9365000, episode=1561 reward=0.7859122 (439.32 it/sec) -training >> step=9365100, episode=1561 reward=0.769039 (482.18 it/sec) -training >> step=9365200, episode=1561 reward=0.7792738 (486.23 it/sec) -training >> step=9365300, episode=1562 reward=0.7920775 (68.92 it/sec) -training >> step=9365400, episode=1562 reward=0.7869745 (444.95 it/sec) -training >> step=9365500, episode=1562 reward=0.7609922 (494.38 it/sec) -training >> step=9365600, episode=1562 reward=0.7888848 (471.62 it/sec) -training >> step=9365700, episode=1562 reward=0.7990811 (460.44 it/sec) -training >> step=9365800, episode=1562 reward=0.784645 (427.44 it/sec) -training >> step=9365900, episode=1562 reward=0.7821318 (392.31 it/sec) -training >> step=9366000, episode=1562 reward=0.7922714 (503.00 it/sec) -training >> step=9366100, episode=1562 reward=0.8011279 (508.63 it/sec) -training >> step=9366200, episode=1562 reward=0.7769845 (494.52 it/sec) -training >> step=9366300, episode=1562 reward=0.7974186 (510.41 it/sec) -training >> step=9366400, episode=1562 reward=0.8205703 (522.23 it/sec) -training >> step=9366500, episode=1562 reward=0.8174469 (498.78 it/sec) -training >> step=9366600, episode=1562 reward=0.7974361 (491.02 it/sec) -training >> step=9366700, episode=1562 reward=0.7881513 (495.19 it/sec) -training >> step=9366800, episode=1562 reward=0.7941676 (514.13 it/sec) -training >> step=9366900, episode=1562 reward=0.7823726 (471.08 it/sec) -training >> step=9367000, episode=1562 reward=0.8010631 (456.93 it/sec) -training >> step=9367100, episode=1562 reward=0.8079154 (474.81 it/sec) -training >> step=9367200, episode=1562 reward=0.7709872 (440.84 it/sec) -training >> step=9367300, episode=1562 reward=0.8030818 (461.55 it/sec) -training >> step=9367400, episode=1562 reward=0.7868553 (472.59 it/sec) -training >> step=9367500, episode=1562 reward=0.7885196 (405.39 it/sec) -training >> step=9367600, episode=1562 reward=0.7870697 (466.82 it/sec) -training >> step=9367700, episode=1562 reward=0.7897465 (501.52 it/sec) -training >> step=9367800, episode=1562 reward=0.7834138 (405.08 it/sec) -training >> step=9367900, episode=1562 reward=0.7851425 (507.57 it/sec) -training >> step=9368000, episode=1562 reward=0.7776648 (486.86 it/sec) -training >> step=9368100, episode=1562 reward=0.8102633 (491.19 it/sec) -training >> step=9368200, episode=1562 reward=0.7920647 (527.13 it/sec) -training >> step=9368300, episode=1562 reward=0.7954217 (497.16 it/sec) -training >> step=9368400, episode=1562 reward=0.7898145 (491.44 it/sec) -training >> step=9368500, episode=1562 reward=0.796041 (505.15 it/sec) -training >> step=9368600, episode=1562 reward=0.8010756 (513.49 it/sec) -training >> step=9368700, episode=1562 reward=0.7990798 (471.95 it/sec) -training >> step=9368800, episode=1562 reward=0.7768019 (465.75 it/sec) -training >> step=9368900, episode=1562 reward=0.7779644 (544.07 it/sec) -training >> step=9369000, episode=1562 reward=0.7837859 (487.09 it/sec) -training >> step=9369100, episode=1562 reward=0.8018513 (381.31 it/sec) -training >> step=9369200, episode=1562 reward=0.7746897 (465.28 it/sec) -training >> step=9369300, episode=1562 reward=0.7894864 (492.46 it/sec) -training >> step=9369400, episode=1562 reward=0.7975252 (478.10 it/sec) -training >> step=9369500, episode=1562 reward=0.8121307 (514.62 it/sec) -training >> step=9369600, episode=1562 reward=0.7927555 (492.85 it/sec) -training >> step=9369700, episode=1562 reward=0.7773614 (501.96 it/sec) -training >> step=9369800, episode=1562 reward=0.7986143 (474.30 it/sec) -training >> step=9369900, episode=1562 reward=0.7936514 (495.44 it/sec) -training >> step=9370000, episode=1562 reward=0.7889242 (490.16 it/sec) -training >> step=9370100, episode=1562 reward=0.794424 (506.32 it/sec) -training >> step=9370200, episode=1562 reward=0.7962697 (460.64 it/sec) -training >> step=9370300, episode=1562 reward=0.8174282 (447.46 it/sec) -training >> step=9370400, episode=1562 reward=0.7880343 (480.51 it/sec) -training >> step=9370500, episode=1562 reward=0.7937322 (479.28 it/sec) -training >> step=9370600, episode=1562 reward=0.7879958 (450.05 it/sec) -training >> step=9370700, episode=1562 reward=0.796021 (428.09 it/sec) -training >> step=9370800, episode=1562 reward=0.7678461 (503.03 it/sec) -training >> step=9370900, episode=1562 reward=0.8028383 (485.02 it/sec) -training >> step=9371000, episode=1562 reward=0.8021268 (468.95 it/sec) -training >> step=9371100, episode=1562 reward=0.7846761 (454.93 it/sec) -training >> step=9371200, episode=1562 reward=0.7860899 (454.31 it/sec) -training >> step=9371300, episode=1563 reward=0.7932149 (94.85 it/sec) -training >> step=9371400, episode=1563 reward=0.7679666 (442.83 it/sec) -training >> step=9371500, episode=1563 reward=0.7911627 (438.58 it/sec) -training >> step=9371600, episode=1563 reward=0.7742912 (418.79 it/sec) -training >> step=9371700, episode=1563 reward=0.7845179 (411.76 it/sec) -training >> step=9371800, episode=1563 reward=0.7916064 (482.04 it/sec) -training >> step=9371900, episode=1563 reward=0.798256 (481.55 it/sec) -training >> step=9372000, episode=1563 reward=0.786521 (505.77 it/sec) -training >> step=9372100, episode=1563 reward=0.7925622 (474.29 it/sec) -training >> step=9372200, episode=1563 reward=0.7945796 (413.73 it/sec) -training >> step=9372300, episode=1563 reward=0.7867545 (459.18 it/sec) -training >> step=9372400, episode=1563 reward=0.7915736 (505.07 it/sec) -training >> step=9372500, episode=1563 reward=0.7883103 (501.05 it/sec) -training >> step=9372600, episode=1563 reward=0.7769659 (501.42 it/sec) -training >> step=9372700, episode=1563 reward=0.786999 (485.64 it/sec) -training >> step=9372800, episode=1563 reward=0.799217 (492.38 it/sec) -training >> step=9372900, episode=1563 reward=0.8060017 (466.98 it/sec) -training >> step=9373000, episode=1563 reward=0.7829509 (484.56 it/sec) -training >> step=9373100, episode=1563 reward=0.8048296 (484.65 it/sec) -training >> step=9373200, episode=1563 reward=0.8100799 (520.75 it/sec) -training >> step=9373300, episode=1563 reward=0.7773934 (480.72 it/sec) -training >> step=9373400, episode=1563 reward=0.7989712 (482.51 it/sec) -training >> step=9373500, episode=1563 reward=0.7939957 (522.59 it/sec) -training >> step=9373600, episode=1563 reward=0.7989311 (520.31 it/sec) -training >> step=9373700, episode=1563 reward=0.8046613 (503.05 it/sec) -training >> step=9373800, episode=1563 reward=0.7899542 (480.53 it/sec) -training >> step=9373900, episode=1563 reward=0.7837422 (521.83 it/sec) -training >> step=9374000, episode=1563 reward=0.7885389 (526.34 it/sec) -training >> step=9374100, episode=1563 reward=0.784826 (515.96 it/sec) -training >> step=9374200, episode=1563 reward=0.7950955 (463.60 it/sec) -training >> step=9374300, episode=1563 reward=0.7833835 (490.84 it/sec) -training >> step=9374400, episode=1563 reward=0.808461 (505.28 it/sec) -training >> step=9374500, episode=1563 reward=0.7862123 (483.16 it/sec) -training >> step=9374600, episode=1563 reward=0.7960219 (451.25 it/sec) -training >> step=9374700, episode=1563 reward=0.7887106 (486.90 it/sec) -training >> step=9374800, episode=1563 reward=0.7819011 (458.81 it/sec) -training >> step=9374900, episode=1563 reward=0.7771497 (427.19 it/sec) -training >> step=9375000, episode=1563 reward=0.7832889 (417.09 it/sec) -training >> step=9375100, episode=1563 reward=0.8051597 (478.16 it/sec) -training >> step=9375200, episode=1563 reward=0.8176963 (480.88 it/sec) -training >> step=9375300, episode=1563 reward=0.7702438 (385.01 it/sec) -training >> step=9375400, episode=1563 reward=0.804882 (518.51 it/sec) -training >> step=9375500, episode=1563 reward=0.7921401 (474.01 it/sec) -training >> step=9375600, episode=1563 reward=0.7715807 (488.68 it/sec) -training >> step=9375700, episode=1563 reward=0.7765485 (478.12 it/sec) -training >> step=9375800, episode=1563 reward=0.7829682 (484.20 it/sec) -training >> step=9375900, episode=1563 reward=0.7991436 (483.20 it/sec) -training >> step=9376000, episode=1563 reward=0.8018008 (486.77 it/sec) -training >> step=9376100, episode=1563 reward=0.7859275 (469.21 it/sec) -training >> step=9376200, episode=1563 reward=0.794579 (484.39 it/sec) -training >> step=9376300, episode=1563 reward=0.7791373 (477.32 it/sec) -training >> step=9376400, episode=1563 reward=0.7924699 (478.68 it/sec) -training >> step=9376500, episode=1563 reward=0.8077873 (490.12 it/sec) -training >> step=9376600, episode=1563 reward=0.7734444 (443.44 it/sec) -training >> step=9376700, episode=1563 reward=0.7962656 (500.46 it/sec) -training >> step=9376800, episode=1563 reward=0.7862287 (499.01 it/sec) -training >> step=9376900, episode=1563 reward=0.7810954 (460.67 it/sec) -training >> step=9377000, episode=1563 reward=0.7861079 (470.07 it/sec) -training >> step=9377100, episode=1563 reward=0.7810131 (433.76 it/sec) -training >> step=9377200, episode=1563 reward=0.7846493 (425.92 it/sec) -training >> step=9377300, episode=1564 reward=0.8029622 (85.18 it/sec) -training >> step=9377400, episode=1564 reward=0.7819986 (494.99 it/sec) -training >> step=9377500, episode=1564 reward=0.7803881 (478.85 it/sec) -training >> step=9377600, episode=1564 reward=0.7808094 (440.01 it/sec) -training >> step=9377700, episode=1564 reward=0.7919436 (466.28 it/sec) -training >> step=9377800, episode=1564 reward=0.8002267 (497.98 it/sec) -training >> step=9377900, episode=1564 reward=0.7969596 (458.57 it/sec) -training >> step=9378000, episode=1564 reward=0.7958225 (459.97 it/sec) -training >> step=9378100, episode=1564 reward=0.7964462 (449.76 it/sec) -training >> step=9378200, episode=1564 reward=0.7825352 (474.55 it/sec) -training >> step=9378300, episode=1564 reward=0.8054155 (470.84 it/sec) -training >> step=9378400, episode=1564 reward=0.7875826 (512.27 it/sec) -training >> step=9378500, episode=1564 reward=0.8172853 (468.43 it/sec) -training >> step=9378600, episode=1564 reward=0.7954117 (481.85 it/sec) -training >> step=9378700, episode=1564 reward=0.7938402 (479.98 it/sec) -training >> step=9378800, episode=1564 reward=0.8097159 (477.49 it/sec) -training >> step=9378900, episode=1564 reward=0.800708 (463.92 it/sec) -training >> step=9379000, episode=1564 reward=0.804897 (441.54 it/sec) -training >> step=9379100, episode=1564 reward=0.8106612 (488.13 it/sec) -training >> step=9379200, episode=1564 reward=0.7682663 (464.91 it/sec) -training >> step=9379300, episode=1564 reward=0.7988977 (446.31 it/sec) -training >> step=9379400, episode=1564 reward=0.7859564 (453.92 it/sec) -training >> step=9379500, episode=1564 reward=0.8065814 (468.82 it/sec) -training >> step=9379600, episode=1564 reward=0.7911943 (483.78 it/sec) -training >> step=9379700, episode=1564 reward=0.7972464 (533.94 it/sec) -training >> step=9379800, episode=1564 reward=0.7840858 (511.56 it/sec) -training >> step=9379900, episode=1564 reward=0.7793642 (510.92 it/sec) -training >> step=9380000, episode=1564 reward=0.7889708 (467.44 it/sec) -training >> step=9380100, episode=1564 reward=0.7798813 (511.83 it/sec) -training >> step=9380200, episode=1564 reward=0.7990865 (504.02 it/sec) -training >> step=9380300, episode=1564 reward=0.8008907 (446.31 it/sec) -training >> step=9380400, episode=1564 reward=0.7958347 (473.28 it/sec) -training >> step=9380500, episode=1564 reward=0.7930799 (490.48 it/sec) -training >> step=9380600, episode=1564 reward=0.7839174 (504.70 it/sec) -training >> step=9380700, episode=1564 reward=0.7773811 (524.59 it/sec) -training >> step=9380800, episode=1564 reward=0.7915733 (468.81 it/sec) -training >> step=9380900, episode=1564 reward=0.7955913 (488.98 it/sec) -training >> step=9381000, episode=1564 reward=0.7929255 (505.50 it/sec) -training >> step=9381100, episode=1564 reward=0.7985938 (496.67 it/sec) -training >> step=9381200, episode=1564 reward=0.7857959 (529.18 it/sec) -training >> step=9381300, episode=1564 reward=0.7812953 (522.67 it/sec) -training >> step=9381400, episode=1564 reward=0.7960073 (517.49 it/sec) -training >> step=9381500, episode=1564 reward=0.771188 (370.02 it/sec) -training >> step=9381600, episode=1564 reward=0.8071591 (495.36 it/sec) -training >> step=9381700, episode=1564 reward=0.7957798 (496.19 it/sec) -training >> step=9381800, episode=1564 reward=0.7848437 (475.85 it/sec) -training >> step=9381900, episode=1564 reward=0.8250002 (503.88 it/sec) -training >> step=9382000, episode=1564 reward=0.7937326 (477.54 it/sec) -training >> step=9382100, episode=1564 reward=0.7901796 (484.21 it/sec) -training >> step=9382200, episode=1564 reward=0.8120995 (491.67 it/sec) -training >> step=9382300, episode=1564 reward=0.7936401 (493.16 it/sec) -training >> step=9382400, episode=1564 reward=0.8087192 (467.34 it/sec) -training >> step=9382500, episode=1564 reward=0.7939247 (481.10 it/sec) -training >> step=9382600, episode=1564 reward=0.7918252 (489.09 it/sec) -training >> step=9382700, episode=1564 reward=0.7941478 (506.75 it/sec) -training >> step=9382800, episode=1564 reward=0.7953047 (479.55 it/sec) -training >> step=9382900, episode=1564 reward=0.7876344 (483.55 it/sec) -training >> step=9383000, episode=1564 reward=0.7901236 (496.90 it/sec) -training >> step=9383100, episode=1564 reward=0.7904752 (501.48 it/sec) -training >> step=9383200, episode=1564 reward=0.7971506 (503.67 it/sec) -training >> step=9383300, episode=1565 reward=0.7765312 (65.61 it/sec) -training >> step=9383400, episode=1565 reward=0.7966914 (478.57 it/sec) -training >> step=9383500, episode=1565 reward=0.7473693 (464.39 it/sec) -training >> step=9383600, episode=1565 reward=0.7675723 (457.42 it/sec) -training >> step=9383700, episode=1565 reward=0.7842457 (455.24 it/sec) -training >> step=9383800, episode=1565 reward=0.789686 (461.73 it/sec) -training >> step=9383900, episode=1565 reward=0.8070123 (457.65 it/sec) -training >> step=9384000, episode=1565 reward=0.7790873 (471.80 it/sec) -training >> step=9384100, episode=1565 reward=0.798466 (507.69 it/sec) -training >> step=9384200, episode=1565 reward=0.7967572 (464.42 it/sec) -training >> step=9384300, episode=1565 reward=0.7958428 (497.34 it/sec) -training >> step=9384400, episode=1565 reward=0.8051956 (462.22 it/sec) -training >> step=9384500, episode=1565 reward=0.7918401 (473.12 it/sec) -training >> step=9384600, episode=1565 reward=0.7894703 (498.73 it/sec) -training >> step=9384700, episode=1565 reward=0.7902336 (497.70 it/sec) -training >> step=9384800, episode=1565 reward=0.8003396 (460.35 it/sec) -training >> step=9384900, episode=1565 reward=0.8032564 (524.65 it/sec) -training >> step=9385000, episode=1565 reward=0.7905319 (428.86 it/sec) -training >> step=9385100, episode=1565 reward=0.79985 (490.37 it/sec) -training >> step=9385200, episode=1565 reward=0.7732531 (488.89 it/sec) -training >> step=9385300, episode=1565 reward=0.8002304 (434.49 it/sec) -training >> step=9385400, episode=1565 reward=0.7882686 (489.61 it/sec) -training >> step=9385500, episode=1565 reward=0.7824023 (466.67 it/sec) -training >> step=9385600, episode=1565 reward=0.8046105 (497.76 it/sec) -training >> step=9385700, episode=1565 reward=0.7868944 (497.69 it/sec) -training >> step=9385800, episode=1565 reward=0.8045056 (478.95 it/sec) -training >> step=9385900, episode=1565 reward=0.7976156 (466.14 it/sec) -training >> step=9386000, episode=1565 reward=0.7857458 (498.77 it/sec) -training >> step=9386100, episode=1565 reward=0.7803263 (441.76 it/sec) -training >> step=9386200, episode=1565 reward=0.8006864 (462.94 it/sec) -training >> step=9386300, episode=1565 reward=0.7780075 (477.00 it/sec) -training >> step=9386400, episode=1565 reward=0.7885464 (508.69 it/sec) -training >> step=9386500, episode=1565 reward=0.7872437 (470.24 it/sec) -training >> step=9386600, episode=1565 reward=0.7741413 (449.10 it/sec) -training >> step=9386700, episode=1565 reward=0.7813807 (501.72 it/sec) -training >> step=9386800, episode=1565 reward=0.776977 (466.31 it/sec) -training >> step=9386900, episode=1565 reward=0.787141 (475.63 it/sec) -training >> step=9387000, episode=1565 reward=0.7910259 (458.58 it/sec) -training >> step=9387100, episode=1565 reward=0.7919093 (499.97 it/sec) -training >> step=9387200, episode=1565 reward=0.7747442 (500.69 it/sec) -training >> step=9387300, episode=1565 reward=0.7877041 (467.78 it/sec) -training >> step=9387400, episode=1565 reward=0.7952405 (483.85 it/sec) -training >> step=9387500, episode=1565 reward=0.7941009 (494.48 it/sec) -training >> step=9387600, episode=1565 reward=0.8115124 (493.86 it/sec) -training >> step=9387700, episode=1565 reward=0.7905216 (358.51 it/sec) -training >> step=9387800, episode=1565 reward=0.7678424 (471.95 it/sec) -training >> step=9387900, episode=1565 reward=0.7808189 (479.74 it/sec) -training >> step=9388000, episode=1565 reward=0.7929016 (495.56 it/sec) -training >> step=9388100, episode=1565 reward=0.7935539 (496.04 it/sec) -training >> step=9388200, episode=1565 reward=0.7869998 (516.87 it/sec) -training >> step=9388300, episode=1565 reward=0.7859344 (476.13 it/sec) -training >> step=9388400, episode=1565 reward=0.8029488 (465.50 it/sec) -training >> step=9388500, episode=1565 reward=0.7852062 (473.52 it/sec) -training >> step=9388600, episode=1565 reward=0.7708533 (506.13 it/sec) -training >> step=9388700, episode=1565 reward=0.7975234 (464.28 it/sec) -training >> step=9388800, episode=1565 reward=0.7747707 (470.58 it/sec) -training >> step=9388900, episode=1565 reward=0.7987201 (512.03 it/sec) -training >> step=9389000, episode=1565 reward=0.7531986 (486.93 it/sec) -training >> step=9389100, episode=1565 reward=0.8074186 (486.59 it/sec) -training >> step=9389200, episode=1565 reward=0.795749 (496.60 it/sec) -training >> step=9389300, episode=1566 reward=0.8026816 (130.89 it/sec) -training >> step=9389400, episode=1566 reward=0.778959 (465.90 it/sec) -training >> step=9389500, episode=1566 reward=0.791085 (483.90 it/sec) -training >> step=9389600, episode=1566 reward=0.7907789 (441.40 it/sec) -training >> step=9389700, episode=1566 reward=0.8008454 (457.72 it/sec) -training >> step=9389800, episode=1566 reward=0.7966959 (483.47 it/sec) -training >> step=9389900, episode=1566 reward=0.7843308 (485.74 it/sec) -training >> step=9390000, episode=1566 reward=0.7937846 (462.38 it/sec) -training >> step=9390100, episode=1566 reward=0.7835706 (436.59 it/sec) -training >> step=9390200, episode=1566 reward=0.7885865 (433.43 it/sec) -training >> step=9390300, episode=1566 reward=0.7971666 (409.89 it/sec) -training >> step=9390400, episode=1566 reward=0.794647 (471.83 it/sec) -training >> step=9390500, episode=1566 reward=0.7876676 (495.05 it/sec) -training >> step=9390600, episode=1566 reward=0.7903193 (489.69 it/sec) -training >> step=9390700, episode=1566 reward=0.7952344 (478.81 it/sec) -training >> step=9390800, episode=1566 reward=0.8099758 (494.02 it/sec) -training >> step=9390900, episode=1566 reward=0.7894768 (407.21 it/sec) -training >> step=9391000, episode=1566 reward=0.7928962 (505.09 it/sec) -training >> step=9391100, episode=1566 reward=0.7853547 (493.50 it/sec) -training >> step=9391200, episode=1566 reward=0.7785647 (483.35 it/sec) -training >> step=9391300, episode=1566 reward=0.788581 (492.77 it/sec) -training >> step=9391400, episode=1566 reward=0.804839 (423.20 it/sec) -training >> step=9391500, episode=1566 reward=0.801114 (507.59 it/sec) -training >> step=9391600, episode=1566 reward=0.7692416 (463.66 it/sec) -training >> step=9391700, episode=1566 reward=0.7866826 (488.30 it/sec) -training >> step=9391800, episode=1566 reward=0.7801055 (444.41 it/sec) -training >> step=9391900, episode=1566 reward=0.7776969 (440.45 it/sec) -training >> step=9392000, episode=1566 reward=0.7896218 (463.72 it/sec) -training >> step=9392100, episode=1566 reward=0.7747791 (524.58 it/sec) -training >> step=9392200, episode=1566 reward=0.790593 (465.76 it/sec) -training >> step=9392300, episode=1566 reward=0.789553 (417.62 it/sec) -training >> step=9392400, episode=1566 reward=0.7857016 (456.83 it/sec) -training >> step=9392500, episode=1566 reward=0.7853112 (503.93 it/sec) -training >> step=9392600, episode=1566 reward=0.7903792 (487.76 it/sec) -training >> step=9392700, episode=1566 reward=0.7827785 (457.60 it/sec) -training >> step=9392800, episode=1566 reward=0.7853487 (436.52 it/sec) -training >> step=9392900, episode=1566 reward=0.7940238 (457.75 it/sec) -training >> step=9393000, episode=1566 reward=0.7674359 (476.70 it/sec) -training >> step=9393100, episode=1566 reward=0.8001741 (500.80 it/sec) -training >> step=9393200, episode=1566 reward=0.7716265 (481.50 it/sec) -training >> step=9393300, episode=1566 reward=0.789928 (424.33 it/sec) -training >> step=9393400, episode=1566 reward=0.7906118 (506.69 it/sec) -training >> step=9393500, episode=1566 reward=0.7816234 (450.32 it/sec) -training >> step=9393600, episode=1566 reward=0.8007005 (520.25 it/sec) -training >> step=9393700, episode=1566 reward=0.7814524 (481.88 it/sec) -training >> step=9393800, episode=1566 reward=0.7902904 (482.05 it/sec) -training >> step=9393900, episode=1566 reward=0.7896649 (496.63 it/sec) -training >> step=9394000, episode=1566 reward=0.777115 (399.23 it/sec) -training >> step=9394100, episode=1566 reward=0.8066724 (482.86 it/sec) -training >> step=9394200, episode=1566 reward=0.7817699 (494.18 it/sec) -training >> step=9394300, episode=1566 reward=0.7805163 (471.81 it/sec) -training >> step=9394400, episode=1566 reward=0.7871159 (489.92 it/sec) -training >> step=9394500, episode=1566 reward=0.800459 (486.86 it/sec) -training >> step=9394600, episode=1566 reward=0.8092459 (473.02 it/sec) -training >> step=9394700, episode=1566 reward=0.7923806 (482.65 it/sec) -training >> step=9394800, episode=1566 reward=0.8073531 (469.80 it/sec) -training >> step=9394900, episode=1566 reward=0.8072785 (501.16 it/sec) -training >> step=9395000, episode=1566 reward=0.7890389 (499.24 it/sec) -training >> step=9395100, episode=1566 reward=0.7896235 (470.12 it/sec) -training >> step=9395200, episode=1566 reward=0.7996525 (478.98 it/sec) -training >> step=9395300, episode=1567 reward=0.7770098 (148.81 it/sec) -training >> step=9395400, episode=1567 reward=0.7749475 (494.90 it/sec) -training >> step=9395500, episode=1567 reward=0.7861581 (475.47 it/sec) -training >> step=9395600, episode=1567 reward=0.8023487 (454.87 it/sec) -training >> step=9395700, episode=1567 reward=0.7916032 (501.25 it/sec) -training >> step=9395800, episode=1567 reward=0.7743603 (481.42 it/sec) -training >> step=9395900, episode=1567 reward=0.7791793 (513.23 it/sec) -training >> step=9396000, episode=1567 reward=0.7899503 (499.30 it/sec) -training >> step=9396100, episode=1567 reward=0.8008472 (457.25 it/sec) -training >> step=9396200, episode=1567 reward=0.7959023 (476.06 it/sec) -training >> step=9396300, episode=1567 reward=0.7838539 (465.36 it/sec) -training >> step=9396400, episode=1567 reward=0.7676026 (518.83 it/sec) -training >> step=9396500, episode=1567 reward=0.8008414 (490.07 it/sec) -training >> step=9396600, episode=1567 reward=0.7890448 (480.88 it/sec) -training >> step=9396700, episode=1567 reward=0.7811434 (492.65 it/sec) -training >> step=9396800, episode=1567 reward=0.7926542 (507.19 it/sec) -training >> step=9396900, episode=1567 reward=0.8016808 (455.37 it/sec) -training >> step=9397000, episode=1567 reward=0.7653152 (465.99 it/sec) -training >> step=9397100, episode=1567 reward=0.7750404 (487.37 it/sec) -training >> step=9397200, episode=1567 reward=0.7922505 (507.94 it/sec) -training >> step=9397300, episode=1567 reward=0.7953811 (478.02 it/sec) -training >> step=9397400, episode=1567 reward=0.7894602 (460.57 it/sec) -training >> step=9397500, episode=1567 reward=0.7922112 (488.50 it/sec) -training >> step=9397600, episode=1567 reward=0.8187845 (451.63 it/sec) -training >> step=9397700, episode=1567 reward=0.7902769 (421.66 it/sec) -training >> step=9397800, episode=1567 reward=0.7841313 (467.58 it/sec) -training >> step=9397900, episode=1567 reward=0.7960164 (533.17 it/sec) -training >> step=9398000, episode=1567 reward=0.801413 (480.87 it/sec) -training >> step=9398100, episode=1567 reward=0.7803903 (456.07 it/sec) -training >> step=9398200, episode=1567 reward=0.7967957 (473.89 it/sec) -training >> step=9398300, episode=1567 reward=0.7732549 (475.13 it/sec) -training >> step=9398400, episode=1567 reward=0.7737142 (414.32 it/sec) -training >> step=9398500, episode=1567 reward=0.7866256 (441.21 it/sec) -training >> step=9398600, episode=1567 reward=0.7794497 (484.10 it/sec) -training >> step=9398700, episode=1567 reward=0.7979955 (421.07 it/sec) -training >> step=9398800, episode=1567 reward=0.8002515 (504.51 it/sec) -training >> step=9398900, episode=1567 reward=0.7979226 (505.16 it/sec) -training >> step=9399000, episode=1567 reward=0.7980629 (505.45 it/sec) -training >> step=9399100, episode=1567 reward=0.7727265 (502.66 it/sec) -training >> step=9399200, episode=1567 reward=0.79083 (494.50 it/sec) -training >> step=9399300, episode=1567 reward=0.7987608 (450.09 it/sec) -training >> step=9399400, episode=1567 reward=0.799855 (505.15 it/sec) -training >> step=9399500, episode=1567 reward=0.8015354 (500.15 it/sec) -training >> step=9399600, episode=1567 reward=0.7825415 (487.06 it/sec) -training >> step=9399700, episode=1567 reward=0.8014049 (537.46 it/sec) -training >> step=9399800, episode=1567 reward=0.8031574 (454.63 it/sec) -training >> step=9399900, episode=1567 reward=0.8026057 (413.12 it/sec) -training >> step=9400000, episode=1567 reward=0.7781309 (481.86 it/sec) -training >> step=9400100, episode=1567 reward=0.7977055 (501.59 it/sec) -training >> step=9400200, episode=1567 reward=0.7724819 (372.10 it/sec) -training >> step=9400300, episode=1567 reward=0.781937 (504.88 it/sec) -training >> step=9400400, episode=1567 reward=0.798269 (462.59 it/sec) -training >> step=9400500, episode=1567 reward=0.7874908 (445.69 it/sec) -training >> step=9400600, episode=1567 reward=0.8022107 (512.32 it/sec) -training >> step=9400700, episode=1567 reward=0.8037991 (458.19 it/sec) -training >> step=9400800, episode=1567 reward=0.7891833 (497.97 it/sec) -training >> step=9400900, episode=1567 reward=0.7962033 (464.55 it/sec) -training >> step=9401000, episode=1567 reward=0.7826185 (468.32 it/sec) -training >> step=9401100, episode=1567 reward=0.791388 (461.07 it/sec) -training >> step=9401200, episode=1567 reward=0.787934 (462.21 it/sec) -training >> step=9401300, episode=1568 reward=0.7893289 (147.69 it/sec) -training >> step=9401400, episode=1568 reward=0.7720504 (471.24 it/sec) -training >> step=9401500, episode=1568 reward=0.7800454 (515.81 it/sec) -training >> step=9401600, episode=1568 reward=0.7757196 (476.91 it/sec) -training >> step=9401700, episode=1568 reward=0.7850553 (486.97 it/sec) -training >> step=9401800, episode=1568 reward=0.7888584 (449.39 it/sec) -training >> step=9401900, episode=1568 reward=0.789207 (478.38 it/sec) -training >> step=9402000, episode=1568 reward=0.785904 (461.72 it/sec) -training >> step=9402100, episode=1568 reward=0.7746131 (487.06 it/sec) -training >> step=9402200, episode=1568 reward=0.798984 (469.31 it/sec) -training >> step=9402300, episode=1568 reward=0.815876 (473.30 it/sec) -training >> step=9402400, episode=1568 reward=0.7640262 (504.13 it/sec) -training >> step=9402500, episode=1568 reward=0.7881467 (494.58 it/sec) -training >> step=9402600, episode=1568 reward=0.7974789 (499.36 it/sec) -training >> step=9402700, episode=1568 reward=0.8017515 (459.90 it/sec) -training >> step=9402800, episode=1568 reward=0.7916642 (461.79 it/sec) -training >> step=9402900, episode=1568 reward=0.8139021 (525.78 it/sec) -training >> step=9403000, episode=1568 reward=0.8057053 (467.75 it/sec) -training >> step=9403100, episode=1568 reward=0.7894364 (448.60 it/sec) -training >> step=9403200, episode=1568 reward=0.7873242 (487.87 it/sec) -training >> step=9403300, episode=1568 reward=0.7933541 (527.67 it/sec) -training >> step=9403400, episode=1568 reward=0.7967339 (494.08 it/sec) -training >> step=9403500, episode=1568 reward=0.7934871 (476.66 it/sec) -training >> step=9403600, episode=1568 reward=0.7994066 (522.45 it/sec) -training >> step=9403700, episode=1568 reward=0.7838915 (465.59 it/sec) -training >> step=9403800, episode=1568 reward=0.7731236 (435.98 it/sec) -training >> step=9403900, episode=1568 reward=0.7857386 (423.92 it/sec) -training >> step=9404000, episode=1568 reward=0.7738166 (411.85 it/sec) -training >> step=9404100, episode=1568 reward=0.7870789 (414.21 it/sec) -training >> step=9404200, episode=1568 reward=0.8020567 (444.97 it/sec) -training >> step=9404300, episode=1568 reward=0.8062855 (472.18 it/sec) -training >> step=9404400, episode=1568 reward=0.7844114 (512.91 it/sec) -training >> step=9404500, episode=1568 reward=0.7907771 (492.31 it/sec) -training >> step=9404600, episode=1568 reward=0.8060728 (444.48 it/sec) -training >> step=9404700, episode=1568 reward=0.8021172 (523.40 it/sec) -training >> step=9404800, episode=1568 reward=0.8021976 (497.06 it/sec) -training >> step=9404900, episode=1568 reward=0.8066884 (488.11 it/sec) -training >> step=9405000, episode=1568 reward=0.7877839 (492.76 it/sec) -training >> step=9405100, episode=1568 reward=0.7935447 (473.43 it/sec) -training >> step=9405200, episode=1568 reward=0.7709918 (491.00 it/sec) -training >> step=9405300, episode=1568 reward=0.7981941 (478.84 it/sec) -training >> step=9405400, episode=1568 reward=0.7942936 (490.38 it/sec) -training >> step=9405500, episode=1568 reward=0.7920384 (487.09 it/sec) -training >> step=9405600, episode=1568 reward=0.78055 (487.03 it/sec) -training >> step=9405700, episode=1568 reward=0.7790088 (491.77 it/sec) -training >> step=9405800, episode=1568 reward=0.785865 (522.67 it/sec) -training >> step=9405900, episode=1568 reward=0.790104 (491.65 it/sec) -training >> step=9406000, episode=1568 reward=0.7916642 (491.23 it/sec) -training >> step=9406100, episode=1568 reward=0.7834196 (493.60 it/sec) -training >> step=9406200, episode=1568 reward=0.7850318 (476.21 it/sec) -training >> step=9406300, episode=1568 reward=0.8003252 (453.28 it/sec) -training >> step=9406400, episode=1568 reward=0.7913615 (350.69 it/sec) -training >> step=9406500, episode=1568 reward=0.7823302 (494.86 it/sec) -training >> step=9406600, episode=1568 reward=0.7846638 (466.81 it/sec) -training >> step=9406700, episode=1568 reward=0.781121 (479.32 it/sec) -training >> step=9406800, episode=1568 reward=0.8003519 (489.89 it/sec) -training >> step=9406900, episode=1568 reward=0.7894918 (496.81 it/sec) -training >> step=9407000, episode=1568 reward=0.7705892 (497.69 it/sec) -training >> step=9407100, episode=1568 reward=0.7909706 (449.52 it/sec) -training >> step=9407200, episode=1568 reward=0.7971652 (504.31 it/sec) -training >> step=9407300, episode=1569 reward=0.7809241 (146.48 it/sec) -training >> step=9407400, episode=1569 reward=0.77356 (444.88 it/sec) -training >> step=9407500, episode=1569 reward=0.783587 (484.48 it/sec) -training >> step=9407600, episode=1569 reward=0.7811393 (467.82 it/sec) -training >> step=9407700, episode=1569 reward=0.7827676 (478.63 it/sec) -training >> step=9407800, episode=1569 reward=0.8050101 (449.90 it/sec) -training >> step=9407900, episode=1569 reward=0.7895009 (480.29 it/sec) -training >> step=9408000, episode=1569 reward=0.8004636 (485.43 it/sec) -training >> step=9408100, episode=1569 reward=0.8083925 (487.48 it/sec) -training >> step=9408200, episode=1569 reward=0.7702844 (448.55 it/sec) -training >> step=9408300, episode=1569 reward=0.7872193 (433.32 it/sec) -training >> step=9408400, episode=1569 reward=0.7666402 (506.40 it/sec) -training >> step=9408500, episode=1569 reward=0.7731446 (449.41 it/sec) -training >> step=9408600, episode=1569 reward=0.8074144 (486.46 it/sec) -training >> step=9408700, episode=1569 reward=0.7711809 (514.39 it/sec) -training >> step=9408800, episode=1569 reward=0.7889306 (474.74 it/sec) -training >> step=9408900, episode=1569 reward=0.7832526 (452.23 it/sec) -training >> step=9409000, episode=1569 reward=0.8010968 (464.44 it/sec) -training >> step=9409100, episode=1569 reward=0.78831 (507.96 it/sec) -training >> step=9409200, episode=1569 reward=0.7767762 (493.68 it/sec) -training >> step=9409300, episode=1569 reward=0.7844058 (477.29 it/sec) -training >> step=9409400, episode=1569 reward=0.7940388 (490.92 it/sec) -training >> step=9409500, episode=1569 reward=0.790824 (436.78 it/sec) -training >> step=9409600, episode=1569 reward=0.8046611 (460.28 it/sec) -training >> step=9409700, episode=1569 reward=0.778025 (502.05 it/sec) -training >> step=9409800, episode=1569 reward=0.79313 (497.92 it/sec) -training >> step=9409900, episode=1569 reward=0.7876147 (497.08 it/sec) -training >> step=9410000, episode=1569 reward=0.7869343 (447.42 it/sec) -training >> step=9410100, episode=1569 reward=0.7962214 (509.27 it/sec) -training >> step=9410200, episode=1569 reward=0.7781131 (476.31 it/sec) -training >> step=9410300, episode=1569 reward=0.7848708 (472.92 it/sec) -training >> step=9410400, episode=1569 reward=0.791577 (496.89 it/sec) -training >> step=9410500, episode=1569 reward=0.7788684 (502.84 it/sec) -training >> step=9410600, episode=1569 reward=0.8019188 (488.24 it/sec) -training >> step=9410700, episode=1569 reward=0.7866676 (461.74 it/sec) -training >> step=9410800, episode=1569 reward=0.7852941 (475.45 it/sec) -training >> step=9410900, episode=1569 reward=0.79643 (486.74 it/sec) -training >> step=9411000, episode=1569 reward=0.7865618 (454.59 it/sec) -training >> step=9411100, episode=1569 reward=0.7915809 (491.02 it/sec) -training >> step=9411200, episode=1569 reward=0.7967415 (451.85 it/sec) -training >> step=9411300, episode=1569 reward=0.8011332 (445.02 it/sec) -training >> step=9411400, episode=1569 reward=0.7844347 (495.50 it/sec) -training >> step=9411500, episode=1569 reward=0.7928374 (448.01 it/sec) -training >> step=9411600, episode=1569 reward=0.7815753 (500.58 it/sec) -training >> step=9411700, episode=1569 reward=0.7856542 (460.66 it/sec) -training >> step=9411800, episode=1569 reward=0.7867258 (462.03 it/sec) -training >> step=9411900, episode=1569 reward=0.7795006 (523.89 it/sec) -training >> step=9412000, episode=1569 reward=0.8069075 (489.43 it/sec) -training >> step=9412100, episode=1569 reward=0.7767837 (472.95 it/sec) -training >> step=9412200, episode=1569 reward=0.7830493 (505.72 it/sec) -training >> step=9412300, episode=1569 reward=0.7852771 (525.23 it/sec) -training >> step=9412400, episode=1569 reward=0.7927738 (479.64 it/sec) -training >> step=9412500, episode=1569 reward=0.7924821 (483.80 it/sec) -training >> step=9412600, episode=1569 reward=0.7798254 (318.23 it/sec) -training >> step=9412700, episode=1569 reward=0.7921178 (454.48 it/sec) -training >> step=9412800, episode=1569 reward=0.7860207 (476.60 it/sec) -training >> step=9412900, episode=1569 reward=0.7851864 (477.08 it/sec) -training >> step=9413000, episode=1569 reward=0.7730289 (520.13 it/sec) -training >> step=9413100, episode=1569 reward=0.7600694 (477.61 it/sec) -training >> step=9413200, episode=1569 reward=0.7844472 (490.11 it/sec) -training >> step=9413300, episode=1570 reward=0.8004241 (154.32 it/sec) -training >> step=9413400, episode=1570 reward=0.7839742 (452.86 it/sec) -training >> step=9413500, episode=1570 reward=0.781864 (476.54 it/sec) -training >> step=9413600, episode=1570 reward=0.8008893 (485.50 it/sec) -training >> step=9413700, episode=1570 reward=0.7945048 (482.32 it/sec) -training >> step=9413800, episode=1570 reward=0.7840499 (451.27 it/sec) -training >> step=9413900, episode=1570 reward=0.7780256 (505.52 it/sec) -training >> step=9414000, episode=1570 reward=0.8139619 (490.28 it/sec) -training >> step=9414100, episode=1570 reward=0.7848754 (528.60 it/sec) -training >> step=9414200, episode=1570 reward=0.7968153 (484.93 it/sec) -training >> step=9414300, episode=1570 reward=0.8046343 (425.94 it/sec) -training >> step=9414400, episode=1570 reward=0.7951652 (450.11 it/sec) -training >> step=9414500, episode=1570 reward=0.798165 (504.93 it/sec) -training >> step=9414600, episode=1570 reward=0.7980635 (459.18 it/sec) -training >> step=9414700, episode=1570 reward=0.7647473 (477.47 it/sec) -training >> step=9414800, episode=1570 reward=0.8008067 (512.48 it/sec) -training >> step=9414900, episode=1570 reward=0.7866248 (470.59 it/sec) -training >> step=9415000, episode=1570 reward=0.802895 (450.70 it/sec) -training >> step=9415100, episode=1570 reward=0.7897511 (490.98 it/sec) -training >> step=9415200, episode=1570 reward=0.807928 (460.38 it/sec) -training >> step=9415300, episode=1570 reward=0.7839198 (485.30 it/sec) -training >> step=9415400, episode=1570 reward=0.8045231 (470.34 it/sec) -training >> step=9415500, episode=1570 reward=0.7978833 (506.39 it/sec) -training >> step=9415600, episode=1570 reward=0.7709821 (471.69 it/sec) -training >> step=9415700, episode=1570 reward=0.7909247 (427.72 it/sec) -training >> step=9415800, episode=1570 reward=0.7917495 (471.85 it/sec) -training >> step=9415900, episode=1570 reward=0.7896055 (520.62 it/sec) -training >> step=9416000, episode=1570 reward=0.8028572 (505.17 it/sec) -training >> step=9416100, episode=1570 reward=0.7835768 (486.75 it/sec) -training >> step=9416200, episode=1570 reward=0.7608478 (462.05 it/sec) -training >> step=9416300, episode=1570 reward=0.7940522 (494.21 it/sec) -training >> step=9416400, episode=1570 reward=0.793223 (483.19 it/sec) -training >> step=9416500, episode=1570 reward=0.7820694 (450.08 it/sec) -training >> step=9416600, episode=1570 reward=0.7932341 (453.66 it/sec) -training >> step=9416700, episode=1570 reward=0.7943895 (507.04 it/sec) -training >> step=9416800, episode=1570 reward=0.7893983 (473.56 it/sec) -training >> step=9416900, episode=1570 reward=0.7604871 (470.75 it/sec) -training >> step=9417000, episode=1570 reward=0.783411 (480.10 it/sec) -training >> step=9417100, episode=1570 reward=0.7854739 (454.21 it/sec) -training >> step=9417200, episode=1570 reward=0.7771994 (493.61 it/sec) -training >> step=9417300, episode=1570 reward=0.7838695 (461.52 it/sec) -training >> step=9417400, episode=1570 reward=0.8081228 (495.57 it/sec) -training >> step=9417500, episode=1570 reward=0.8058935 (411.83 it/sec) -training >> step=9417600, episode=1570 reward=0.7977396 (392.99 it/sec) -training >> step=9417700, episode=1570 reward=0.8031667 (447.38 it/sec) -training >> step=9417800, episode=1570 reward=0.7772591 (437.02 it/sec) -training >> step=9417900, episode=1570 reward=0.7814214 (437.00 it/sec) -training >> step=9418000, episode=1570 reward=0.7653509 (478.25 it/sec) -training >> step=9418100, episode=1570 reward=0.7880071 (450.30 it/sec) -training >> step=9418200, episode=1570 reward=0.7911268 (464.86 it/sec) -training >> step=9418300, episode=1570 reward=0.7906787 (464.30 it/sec) -training >> step=9418400, episode=1570 reward=0.7751167 (505.99 it/sec) -training >> step=9418500, episode=1570 reward=0.7947255 (449.11 it/sec) -training >> step=9418600, episode=1570 reward=0.8032507 (457.31 it/sec) -training >> step=9418700, episode=1570 reward=0.7951939 (363.50 it/sec) -training >> step=9418800, episode=1570 reward=0.7799622 (476.77 it/sec) -training >> step=9418900, episode=1570 reward=0.7848437 (498.58 it/sec) -training >> step=9419000, episode=1570 reward=0.7644905 (479.99 it/sec) -training >> step=9419100, episode=1570 reward=0.7718953 (513.38 it/sec) -training >> step=9419200, episode=1570 reward=0.7979605 (463.22 it/sec) -training >> step=9419300, episode=1571 reward=0.7776097 (135.91 it/sec) -training >> step=9419400, episode=1571 reward=0.7765572 (486.71 it/sec) -training >> step=9419500, episode=1571 reward=0.782784 (502.42 it/sec) -training >> step=9419600, episode=1571 reward=0.7643902 (484.25 it/sec) -training >> step=9419700, episode=1571 reward=0.7846978 (461.19 it/sec) -training >> step=9419800, episode=1571 reward=0.7918703 (491.94 it/sec) -training >> step=9419900, episode=1571 reward=0.8015662 (456.71 it/sec) -training >> step=9420000, episode=1571 reward=0.7960835 (495.81 it/sec) -training >> step=9420100, episode=1571 reward=0.80793 (484.78 it/sec) -training >> step=9420200, episode=1571 reward=0.8041826 (489.75 it/sec) -training >> step=9420300, episode=1571 reward=0.7935466 (431.99 it/sec) -training >> step=9420400, episode=1571 reward=0.8111177 (433.01 it/sec) -training >> step=9420500, episode=1571 reward=0.7941185 (484.97 it/sec) -training >> step=9420600, episode=1571 reward=0.7948759 (485.80 it/sec) -training >> step=9420700, episode=1571 reward=0.813774 (477.93 it/sec) -training >> step=9420800, episode=1571 reward=0.7985972 (476.39 it/sec) -training >> step=9420900, episode=1571 reward=0.7968642 (514.25 it/sec) -training >> step=9421000, episode=1571 reward=0.7918987 (482.56 it/sec) -training >> step=9421100, episode=1571 reward=0.7937078 (471.69 it/sec) -training >> step=9421200, episode=1571 reward=0.7813686 (457.80 it/sec) -training >> step=9421300, episode=1571 reward=0.7827203 (416.77 it/sec) -training >> step=9421400, episode=1571 reward=0.8095849 (496.51 it/sec) -training >> step=9421500, episode=1571 reward=0.8008862 (507.89 it/sec) -training >> step=9421600, episode=1571 reward=0.7939439 (478.66 it/sec) -training >> step=9421700, episode=1571 reward=0.787796 (495.01 it/sec) -training >> step=9421800, episode=1571 reward=0.80901 (440.97 it/sec) -training >> step=9421900, episode=1571 reward=0.7946274 (439.26 it/sec) -training >> step=9422000, episode=1571 reward=0.7999899 (502.48 it/sec) -training >> step=9422100, episode=1571 reward=0.7998526 (461.06 it/sec) -training >> step=9422200, episode=1571 reward=0.7930724 (468.81 it/sec) -training >> step=9422300, episode=1571 reward=0.7974445 (491.91 it/sec) -training >> step=9422400, episode=1571 reward=0.7981196 (494.22 it/sec) -training >> step=9422500, episode=1571 reward=0.7595331 (444.58 it/sec) -training >> step=9422600, episode=1571 reward=0.7960991 (492.35 it/sec) -training >> step=9422700, episode=1571 reward=0.7839711 (518.09 it/sec) -training >> step=9422800, episode=1571 reward=0.8038657 (510.18 it/sec) -training >> step=9422900, episode=1571 reward=0.7885965 (481.48 it/sec) -training >> step=9423000, episode=1571 reward=0.7914774 (511.89 it/sec) -training >> step=9423100, episode=1571 reward=0.7930303 (444.40 it/sec) -training >> step=9423200, episode=1571 reward=0.8016521 (481.46 it/sec) -training >> step=9423300, episode=1571 reward=0.7944141 (477.25 it/sec) -training >> step=9423400, episode=1571 reward=0.7726581 (459.57 it/sec) -training >> step=9423500, episode=1571 reward=0.7703387 (473.05 it/sec) -training >> step=9423600, episode=1571 reward=0.7851038 (429.30 it/sec) -training >> step=9423700, episode=1571 reward=0.7864192 (434.90 it/sec) -training >> step=9423800, episode=1571 reward=0.7711875 (471.55 it/sec) -training >> step=9423900, episode=1571 reward=0.7746035 (480.82 it/sec) -training >> step=9424000, episode=1571 reward=0.80137 (468.80 it/sec) -training >> step=9424100, episode=1571 reward=0.7962255 (446.70 it/sec) -training >> step=9424200, episode=1571 reward=0.7955212 (466.50 it/sec) -training >> step=9424300, episode=1571 reward=0.7849983 (449.60 it/sec) -training >> step=9424400, episode=1571 reward=0.7724862 (456.28 it/sec) -training >> step=9424500, episode=1571 reward=0.7790858 (483.98 it/sec) -training >> step=9424600, episode=1571 reward=0.7992122 (437.79 it/sec) -training >> step=9424700, episode=1571 reward=0.7873687 (459.85 it/sec) -training >> step=9424800, episode=1571 reward=0.7818142 (501.81 it/sec) -training >> step=9424900, episode=1571 reward=0.7979864 (352.13 it/sec) -training >> step=9425000, episode=1571 reward=0.7976952 (450.02 it/sec) -training >> step=9425100, episode=1571 reward=0.7770466 (473.86 it/sec) -training >> step=9425200, episode=1571 reward=0.7865869 (495.47 it/sec) -training >> step=9425300, episode=1572 reward=0.7994713 (129.81 it/sec) -training >> step=9425400, episode=1572 reward=0.7830739 (475.05 it/sec) -training >> step=9425500, episode=1572 reward=0.7810811 (495.34 it/sec) -training >> step=9425600, episode=1572 reward=0.7982463 (491.04 it/sec) -training >> step=9425700, episode=1572 reward=0.8078722 (447.84 it/sec) -training >> step=9425800, episode=1572 reward=0.8155112 (485.92 it/sec) -training >> step=9425900, episode=1572 reward=0.7911599 (503.88 it/sec) -training >> step=9426000, episode=1572 reward=0.7896504 (472.45 it/sec) -training >> step=9426100, episode=1572 reward=0.7784762 (489.53 it/sec) -training >> step=9426200, episode=1572 reward=0.7747989 (487.82 it/sec) -training >> step=9426300, episode=1572 reward=0.7780194 (508.06 it/sec) -training >> step=9426400, episode=1572 reward=0.7989869 (459.71 it/sec) -training >> step=9426500, episode=1572 reward=0.7787992 (460.12 it/sec) -training >> step=9426600, episode=1572 reward=0.7845718 (501.95 it/sec) -training >> step=9426700, episode=1572 reward=0.7824153 (496.07 it/sec) -training >> step=9426800, episode=1572 reward=0.7724997 (496.95 it/sec) -training >> step=9426900, episode=1572 reward=0.8006364 (495.01 it/sec) -training >> step=9427000, episode=1572 reward=0.7906158 (479.92 it/sec) -training >> step=9427100, episode=1572 reward=0.7937765 (458.98 it/sec) -training >> step=9427200, episode=1572 reward=0.7846714 (487.45 it/sec) -training >> step=9427300, episode=1572 reward=0.7783214 (475.10 it/sec) -training >> step=9427400, episode=1572 reward=0.779458 (508.34 it/sec) -training >> step=9427500, episode=1572 reward=0.7978798 (491.11 it/sec) -training >> step=9427600, episode=1572 reward=0.8007591 (464.05 it/sec) -training >> step=9427700, episode=1572 reward=0.7661554 (486.24 it/sec) -training >> step=9427800, episode=1572 reward=0.7887778 (474.01 it/sec) -training >> step=9427900, episode=1572 reward=0.7857225 (455.97 it/sec) -training >> step=9428000, episode=1572 reward=0.7977519 (470.39 it/sec) -training >> step=9428100, episode=1572 reward=0.7931336 (517.65 it/sec) -training >> step=9428200, episode=1572 reward=0.794401 (491.29 it/sec) -training >> step=9428300, episode=1572 reward=0.7898476 (448.19 it/sec) -training >> step=9428400, episode=1572 reward=0.7835248 (452.02 it/sec) -training >> step=9428500, episode=1572 reward=0.802968 (474.17 it/sec) -training >> step=9428600, episode=1572 reward=0.8024833 (447.36 it/sec) -training >> step=9428700, episode=1572 reward=0.7662111 (457.20 it/sec) -training >> step=9428800, episode=1572 reward=0.7867119 (519.19 it/sec) -training >> step=9428900, episode=1572 reward=0.8143016 (495.23 it/sec) -training >> step=9429000, episode=1572 reward=0.7887463 (490.76 it/sec) -training >> step=9429100, episode=1572 reward=0.796013 (480.31 it/sec) -training >> step=9429200, episode=1572 reward=0.7864766 (502.05 it/sec) -training >> step=9429300, episode=1572 reward=0.8056887 (502.98 it/sec) -training >> step=9429400, episode=1572 reward=0.8000771 (463.21 it/sec) -training >> step=9429500, episode=1572 reward=0.785899 (483.70 it/sec) -training >> step=9429600, episode=1572 reward=0.8017813 (440.23 it/sec) -training >> step=9429700, episode=1572 reward=0.7811084 (488.77 it/sec) -training >> step=9429800, episode=1572 reward=0.7828403 (498.46 it/sec) -training >> step=9429900, episode=1572 reward=0.7996042 (483.11 it/sec) -training >> step=9430000, episode=1572 reward=0.7952894 (487.63 it/sec) -training >> step=9430100, episode=1572 reward=0.8089502 (467.38 it/sec) -training >> step=9430200, episode=1572 reward=0.7839885 (519.18 it/sec) -training >> step=9430300, episode=1572 reward=0.7763413 (456.66 it/sec) -training >> step=9430400, episode=1572 reward=0.7870435 (391.12 it/sec) -training >> step=9430500, episode=1572 reward=0.7727965 (480.28 it/sec) -training >> step=9430600, episode=1572 reward=0.7882875 (453.01 it/sec) -training >> step=9430700, episode=1572 reward=0.7934274 (446.98 it/sec) -training >> step=9430800, episode=1572 reward=0.7982641 (472.66 it/sec) -training >> step=9430900, episode=1572 reward=0.7993797 (469.19 it/sec) -training >> step=9431000, episode=1572 reward=0.754937 (537.88 it/sec) -training >> step=9431100, episode=1572 reward=0.7752194 (344.18 it/sec) -training >> step=9431200, episode=1572 reward=0.8069351 (456.62 it/sec) -training >> step=9431300, episode=1573 reward=0.7876025 (110.02 it/sec) -training >> step=9431400, episode=1573 reward=0.7877102 (491.94 it/sec) -training >> step=9431500, episode=1573 reward=0.7832741 (480.32 it/sec) -training >> step=9431600, episode=1573 reward=0.7640905 (485.39 it/sec) -training >> step=9431700, episode=1573 reward=0.7947376 (503.02 it/sec) -training >> step=9431800, episode=1573 reward=0.7854991 (500.74 it/sec) -training >> step=9431900, episode=1573 reward=0.7832512 (484.77 it/sec) -training >> step=9432000, episode=1573 reward=0.7942529 (448.48 it/sec) -training >> step=9432100, episode=1573 reward=0.8044307 (470.06 it/sec) -training >> step=9432200, episode=1573 reward=0.8023466 (458.71 it/sec) -training >> step=9432300, episode=1573 reward=0.7794186 (493.76 it/sec) -training >> step=9432400, episode=1573 reward=0.7938028 (480.13 it/sec) -training >> step=9432500, episode=1573 reward=0.7936835 (471.16 it/sec) -training >> step=9432600, episode=1573 reward=0.8068456 (483.21 it/sec) -training >> step=9432700, episode=1573 reward=0.7878555 (443.25 it/sec) -training >> step=9432800, episode=1573 reward=0.7903697 (482.68 it/sec) -training >> step=9432900, episode=1573 reward=0.7888477 (485.79 it/sec) -training >> step=9433000, episode=1573 reward=0.7820039 (500.76 it/sec) -training >> step=9433100, episode=1573 reward=0.8013592 (453.23 it/sec) -training >> step=9433200, episode=1573 reward=0.7756008 (481.51 it/sec) -training >> step=9433300, episode=1573 reward=0.7930745 (481.39 it/sec) -training >> step=9433400, episode=1573 reward=0.8118634 (484.46 it/sec) -training >> step=9433500, episode=1573 reward=0.8110734 (508.64 it/sec) -training >> step=9433600, episode=1573 reward=0.7781763 (436.40 it/sec) -training >> step=9433700, episode=1573 reward=0.7938883 (452.52 it/sec) -training >> step=9433800, episode=1573 reward=0.7935376 (506.69 it/sec) -training >> step=9433900, episode=1573 reward=0.8012897 (483.98 it/sec) -training >> step=9434000, episode=1573 reward=0.8006628 (478.75 it/sec) -training >> step=9434100, episode=1573 reward=0.7884341 (435.44 it/sec) -training >> step=9434200, episode=1573 reward=0.778574 (425.04 it/sec) -training >> step=9434300, episode=1573 reward=0.8051602 (467.03 it/sec) -training >> step=9434400, episode=1573 reward=0.7939103 (526.17 it/sec) -training >> step=9434500, episode=1573 reward=0.7742953 (532.40 it/sec) -training >> step=9434600, episode=1573 reward=0.7786381 (469.37 it/sec) -training >> step=9434700, episode=1573 reward=0.7768189 (495.74 it/sec) -training >> step=9434800, episode=1573 reward=0.8028266 (508.46 it/sec) -training >> step=9434900, episode=1573 reward=0.7923185 (559.75 it/sec) -training >> step=9435000, episode=1573 reward=0.7987461 (481.73 it/sec) -training >> step=9435100, episode=1573 reward=0.7920784 (462.92 it/sec) -training >> step=9435200, episode=1573 reward=0.794333 (498.98 it/sec) -training >> step=9435300, episode=1573 reward=0.789215 (538.60 it/sec) -training >> step=9435400, episode=1573 reward=0.8017476 (495.19 it/sec) -training >> step=9435500, episode=1573 reward=0.7738793 (517.52 it/sec) -training >> step=9435600, episode=1573 reward=0.7791005 (443.22 it/sec) -training >> step=9435700, episode=1573 reward=0.7708338 (497.15 it/sec) -training >> step=9435800, episode=1573 reward=0.7702411 (492.33 it/sec) -training >> step=9435900, episode=1573 reward=0.7904277 (515.84 it/sec) -training >> step=9436000, episode=1573 reward=0.7763184 (531.44 it/sec) -training >> step=9436100, episode=1573 reward=0.8018324 (514.02 it/sec) -training >> step=9436200, episode=1573 reward=0.7941284 (507.77 it/sec) -training >> step=9436300, episode=1573 reward=0.7972518 (533.07 it/sec) -training >> step=9436400, episode=1573 reward=0.7845479 (546.93 it/sec) -training >> step=9436500, episode=1573 reward=0.7857614 (535.61 it/sec) -training >> step=9436600, episode=1573 reward=0.7691966 (525.07 it/sec) -training >> step=9436700, episode=1573 reward=0.7885025 (554.82 it/sec) -training >> step=9436800, episode=1573 reward=0.7962797 (500.52 it/sec) -training >> step=9436900, episode=1573 reward=0.7911152 (533.76 it/sec) -training >> step=9437000, episode=1573 reward=0.7956333 (524.99 it/sec) -training >> step=9437100, episode=1573 reward=0.7943805 (517.44 it/sec) -training >> step=9437200, episode=1573 reward=0.7756686 (523.03 it/sec) -training >> step=9437300, episode=1574 reward=0.801295 (119.98 it/sec) -training >> step=9437400, episode=1574 reward=0.7654849 (489.26 it/sec) -training >> step=9437500, episode=1574 reward=0.7811809 (495.98 it/sec) -training >> step=9437600, episode=1574 reward=0.7860992 (531.45 it/sec) -training >> step=9437700, episode=1574 reward=0.7752942 (502.13 it/sec) -training >> step=9437800, episode=1574 reward=0.7759445 (527.09 it/sec) -training >> step=9437900, episode=1574 reward=0.7938128 (534.76 it/sec) -training >> step=9438000, episode=1574 reward=0.7912137 (475.46 it/sec) -training >> step=9438100, episode=1574 reward=0.7953146 (535.06 it/sec) -training >> step=9438200, episode=1574 reward=0.788354 (526.67 it/sec) -training >> step=9438300, episode=1574 reward=0.8155335 (499.05 it/sec) -training >> step=9438400, episode=1574 reward=0.8104631 (504.21 it/sec) -training >> step=9438500, episode=1574 reward=0.8011322 (509.75 it/sec) -training >> step=9438600, episode=1574 reward=0.79202 (491.02 it/sec) -training >> step=9438700, episode=1574 reward=0.7840905 (483.89 it/sec) -training >> step=9438800, episode=1574 reward=0.7925268 (524.98 it/sec) -training >> step=9438900, episode=1574 reward=0.8016989 (522.42 it/sec) -training >> step=9439000, episode=1574 reward=0.7957919 (508.70 it/sec) -training >> step=9439100, episode=1574 reward=0.7969429 (500.25 it/sec) -training >> step=9439200, episode=1574 reward=0.7923932 (547.59 it/sec) -training >> step=9439300, episode=1574 reward=0.8018986 (528.37 it/sec) -training >> step=9439400, episode=1574 reward=0.7861676 (513.32 it/sec) -training >> step=9439500, episode=1574 reward=0.7842634 (511.63 it/sec) -training >> step=9439600, episode=1574 reward=0.795936 (521.63 it/sec) -training >> step=9439700, episode=1574 reward=0.768567 (444.72 it/sec) -training >> step=9439800, episode=1574 reward=0.7769215 (491.56 it/sec) -training >> step=9439900, episode=1574 reward=0.7956488 (504.15 it/sec) -training >> step=9440000, episode=1574 reward=0.7991146 (517.35 it/sec) -training >> step=9440100, episode=1574 reward=0.792537 (501.84 it/sec) -training >> step=9440200, episode=1574 reward=0.793939 (515.51 it/sec) -training >> step=9440300, episode=1574 reward=0.7922558 (535.73 it/sec) -training >> step=9440400, episode=1574 reward=0.8109658 (475.85 it/sec) -training >> step=9440500, episode=1574 reward=0.7750436 (514.26 it/sec) -training >> step=9440600, episode=1574 reward=0.7997572 (478.27 it/sec) -training >> step=9440700, episode=1574 reward=0.7776736 (502.04 it/sec) -training >> step=9440800, episode=1574 reward=0.7808472 (514.72 it/sec) -training >> step=9440900, episode=1574 reward=0.8100073 (518.69 it/sec) -training >> step=9441000, episode=1574 reward=0.7894193 (524.66 it/sec) -training >> step=9441100, episode=1574 reward=0.8011361 (495.94 it/sec) -training >> step=9441200, episode=1574 reward=0.76257 (517.43 it/sec) -training >> step=9441300, episode=1574 reward=0.7969238 (516.91 it/sec) -training >> step=9441400, episode=1574 reward=0.7777472 (536.34 it/sec) -training >> step=9441500, episode=1574 reward=0.7877854 (492.04 it/sec) -training >> step=9441600, episode=1574 reward=0.795786 (507.39 it/sec) -training >> step=9441700, episode=1574 reward=0.7692614 (525.57 it/sec) -training >> step=9441800, episode=1574 reward=0.8039746 (553.90 it/sec) -training >> step=9441900, episode=1574 reward=0.7906988 (500.45 it/sec) -training >> step=9442000, episode=1574 reward=0.7774231 (497.23 it/sec) -training >> step=9442100, episode=1574 reward=0.8011181 (480.90 it/sec) -training >> step=9442200, episode=1574 reward=0.7853177 (458.27 it/sec) -training >> step=9442300, episode=1574 reward=0.7952425 (528.78 it/sec) -training >> step=9442400, episode=1574 reward=0.7920864 (483.39 it/sec) -training >> step=9442500, episode=1574 reward=0.79572 (517.34 it/sec) -training >> step=9442600, episode=1574 reward=0.7982579 (498.61 it/sec) -training >> step=9442700, episode=1574 reward=0.7763624 (504.28 it/sec) -training >> step=9442800, episode=1574 reward=0.8008162 (519.37 it/sec) -training >> step=9442900, episode=1574 reward=0.7936634 (513.17 it/sec) -training >> step=9443000, episode=1574 reward=0.8149154 (457.04 it/sec) -training >> step=9443100, episode=1574 reward=0.7668511 (506.11 it/sec) -training >> step=9443200, episode=1574 reward=0.7594402 (493.67 it/sec) -training >> step=9443300, episode=1575 reward=0.7871602 (116.51 it/sec) -training >> step=9443400, episode=1575 reward=0.7973604 (463.96 it/sec) -training >> step=9443500, episode=1575 reward=0.7653511 (480.89 it/sec) -training >> step=9443600, episode=1575 reward=0.7935247 (501.22 it/sec) -training >> step=9443700, episode=1575 reward=0.79737 (534.57 it/sec) -training >> step=9443800, episode=1575 reward=0.773614 (514.55 it/sec) -training >> step=9443900, episode=1575 reward=0.7980724 (483.54 it/sec) -training >> step=9444000, episode=1575 reward=0.8067963 (546.18 it/sec) -training >> step=9444100, episode=1575 reward=0.7995721 (501.76 it/sec) -training >> step=9444200, episode=1575 reward=0.8130243 (516.70 it/sec) -training >> step=9444300, episode=1575 reward=0.807399 (504.33 it/sec) -training >> step=9444400, episode=1575 reward=0.7870271 (478.84 it/sec) -training >> step=9444500, episode=1575 reward=0.7847094 (526.00 it/sec) -training >> step=9444600, episode=1575 reward=0.8008347 (503.38 it/sec) -training >> step=9444700, episode=1575 reward=0.7842697 (513.67 it/sec) -training >> step=9444800, episode=1575 reward=0.7836373 (513.85 it/sec) -training >> step=9444900, episode=1575 reward=0.8124272 (492.81 it/sec) -training >> step=9445000, episode=1575 reward=0.8021644 (499.54 it/sec) -training >> step=9445100, episode=1575 reward=0.7742154 (460.71 it/sec) -training >> step=9445200, episode=1575 reward=0.7693518 (431.61 it/sec) -training >> step=9445300, episode=1575 reward=0.788685 (398.33 it/sec) -training >> step=9445400, episode=1575 reward=0.785591 (395.41 it/sec) -training >> step=9445500, episode=1575 reward=0.7924166 (399.31 it/sec) -training >> step=9445600, episode=1575 reward=0.7953206 (491.71 it/sec) -training >> step=9445700, episode=1575 reward=0.7816106 (499.37 it/sec) -training >> step=9445800, episode=1575 reward=0.7995306 (488.74 it/sec) -training >> step=9445900, episode=1575 reward=0.7929516 (518.35 it/sec) -training >> step=9446000, episode=1575 reward=0.7926322 (524.24 it/sec) -training >> step=9446100, episode=1575 reward=0.7880499 (509.57 it/sec) -training >> step=9446200, episode=1575 reward=0.8001189 (554.48 it/sec) -training >> step=9446300, episode=1575 reward=0.7802765 (486.71 it/sec) -training >> step=9446400, episode=1575 reward=0.7828819 (472.82 it/sec) -training >> step=9446500, episode=1575 reward=0.792531 (528.23 it/sec) -training >> step=9446600, episode=1575 reward=0.7878208 (495.49 it/sec) -training >> step=9446700, episode=1575 reward=0.8101971 (490.08 it/sec) -training >> step=9446800, episode=1575 reward=0.8006244 (433.66 it/sec) -training >> step=9446900, episode=1575 reward=0.7841349 (449.93 it/sec) -training >> step=9447000, episode=1575 reward=0.7974378 (425.87 it/sec) -training >> step=9447100, episode=1575 reward=0.7859774 (477.11 it/sec) -training >> step=9447200, episode=1575 reward=0.7789435 (453.56 it/sec) -training >> step=9447300, episode=1575 reward=0.8133379 (458.01 it/sec) -training >> step=9447400, episode=1575 reward=0.7928305 (396.56 it/sec) -training >> step=9447500, episode=1575 reward=0.7945988 (443.75 it/sec) -training >> step=9447600, episode=1575 reward=0.7675034 (505.89 it/sec) -training >> step=9447700, episode=1575 reward=0.7675387 (480.10 it/sec) -training >> step=9447800, episode=1575 reward=0.7767403 (449.84 it/sec) -training >> step=9447900, episode=1575 reward=0.7998092 (483.16 it/sec) -training >> step=9448000, episode=1575 reward=0.786341 (513.40 it/sec) -training >> step=9448100, episode=1575 reward=0.7650713 (515.47 it/sec) -training >> step=9448200, episode=1575 reward=0.7903824 (494.54 it/sec) -training >> step=9448300, episode=1575 reward=0.7829773 (447.07 it/sec) -training >> step=9448400, episode=1575 reward=0.7890474 (526.66 it/sec) -training >> step=9448500, episode=1575 reward=0.8002577 (525.44 it/sec) -training >> step=9448600, episode=1575 reward=0.8092056 (532.44 it/sec) -training >> step=9448700, episode=1575 reward=0.7729763 (534.28 it/sec) -training >> step=9448800, episode=1575 reward=0.7864103 (498.06 it/sec) -training >> step=9448900, episode=1575 reward=0.7781463 (478.80 it/sec) -training >> step=9449000, episode=1575 reward=0.7983798 (530.97 it/sec) -training >> step=9449100, episode=1575 reward=0.7857164 (524.10 it/sec) -training >> step=9449200, episode=1575 reward=0.7993545 (484.46 it/sec) -training >> step=9449300, episode=1576 reward=0.8107521 (98.56 it/sec) -training >> step=9449400, episode=1576 reward=0.7753869 (503.03 it/sec) -training >> step=9449500, episode=1576 reward=0.7690635 (467.49 it/sec) -training >> step=9449600, episode=1576 reward=0.7856637 (522.90 it/sec) -training >> step=9449700, episode=1576 reward=0.7821501 (507.53 it/sec) -training >> step=9449800, episode=1576 reward=0.7871856 (524.31 it/sec) -training >> step=9449900, episode=1576 reward=0.7704746 (493.70 it/sec) -training >> step=9450000, episode=1576 reward=0.794857 (461.51 it/sec) -training >> step=9450100, episode=1576 reward=0.8056195 (535.16 it/sec) -training >> step=9450200, episode=1576 reward=0.7810919 (513.63 it/sec) -training >> step=9450300, episode=1576 reward=0.7977831 (501.06 it/sec) -training >> step=9450400, episode=1576 reward=0.7747344 (501.73 it/sec) -training >> step=9450500, episode=1576 reward=0.7825002 (484.75 it/sec) -training >> step=9450600, episode=1576 reward=0.7983326 (483.12 it/sec) -training >> step=9450700, episode=1576 reward=0.7773463 (511.18 it/sec) -training >> step=9450800, episode=1576 reward=0.7885174 (505.65 it/sec) -training >> step=9450900, episode=1576 reward=0.7923031 (525.20 it/sec) -training >> step=9451000, episode=1576 reward=0.8107652 (508.82 it/sec) -training >> step=9451100, episode=1576 reward=0.7920045 (452.50 it/sec) -training >> step=9451200, episode=1576 reward=0.789278 (548.38 it/sec) -training >> step=9451300, episode=1576 reward=0.8087233 (464.80 it/sec) -training >> step=9451400, episode=1576 reward=0.7878687 (506.01 it/sec) -training >> step=9451500, episode=1576 reward=0.8004274 (520.74 it/sec) -training >> step=9451600, episode=1576 reward=0.7993821 (471.96 it/sec) -training >> step=9451700, episode=1576 reward=0.7985536 (515.41 it/sec) -training >> step=9451800, episode=1576 reward=0.8023319 (475.84 it/sec) -training >> step=9451900, episode=1576 reward=0.807182 (528.57 it/sec) -training >> step=9452000, episode=1576 reward=0.8050537 (531.91 it/sec) -training >> step=9452100, episode=1576 reward=0.796324 (474.70 it/sec) -training >> step=9452200, episode=1576 reward=0.7920665 (465.99 it/sec) -training >> step=9452300, episode=1576 reward=0.7993482 (478.96 it/sec) -training >> step=9452400, episode=1576 reward=0.8037919 (459.25 it/sec) -training >> step=9452500, episode=1576 reward=0.787799 (458.07 it/sec) -training >> step=9452600, episode=1576 reward=0.7947093 (458.32 it/sec) -training >> step=9452700, episode=1576 reward=0.7865028 (507.62 it/sec) -training >> step=9452800, episode=1576 reward=0.7917994 (456.80 it/sec) -training >> step=9452900, episode=1576 reward=0.7930436 (462.83 it/sec) -training >> step=9453000, episode=1576 reward=0.7936547 (510.29 it/sec) -training >> step=9453100, episode=1576 reward=0.7942854 (496.70 it/sec) -training >> step=9453200, episode=1576 reward=0.7924618 (489.40 it/sec) -training >> step=9453300, episode=1576 reward=0.7976102 (500.75 it/sec) -training >> step=9453400, episode=1576 reward=0.7890503 (496.69 it/sec) -training >> step=9453500, episode=1576 reward=0.8070512 (475.03 it/sec) -training >> step=9453600, episode=1576 reward=0.7972688 (473.54 it/sec) -training >> step=9453700, episode=1576 reward=0.7887897 (501.14 it/sec) -training >> step=9453800, episode=1576 reward=0.7884102 (471.10 it/sec) -training >> step=9453900, episode=1576 reward=0.7798099 (482.38 it/sec) -training >> step=9454000, episode=1576 reward=0.8021546 (454.53 it/sec) -training >> step=9454100, episode=1576 reward=0.7956476 (463.93 it/sec) -training >> step=9454200, episode=1576 reward=0.7895842 (478.83 it/sec) -training >> step=9454300, episode=1576 reward=0.7633159 (448.31 it/sec) -training >> step=9454400, episode=1576 reward=0.7827283 (474.37 it/sec) -training >> step=9454500, episode=1576 reward=0.7891322 (486.84 it/sec) -training >> step=9454600, episode=1576 reward=0.7816141 (441.49 it/sec) -training >> step=9454700, episode=1576 reward=0.7873005 (507.19 it/sec) -training >> step=9454800, episode=1576 reward=0.7918611 (487.03 it/sec) -training >> step=9454900, episode=1576 reward=0.7840125 (513.43 it/sec) -training >> step=9455000, episode=1576 reward=0.7926267 (486.41 it/sec) -training >> step=9455100, episode=1576 reward=0.7949284 (476.76 it/sec) -training >> step=9455200, episode=1576 reward=0.7827636 (498.41 it/sec) -training >> step=9455300, episode=1577 reward=0.7821626 (87.54 it/sec) -training >> step=9455400, episode=1577 reward=0.766827 (445.81 it/sec) -training >> step=9455500, episode=1577 reward=0.7661994 (465.62 it/sec) -training >> step=9455600, episode=1577 reward=0.7661863 (457.67 it/sec) -training >> step=9455700, episode=1577 reward=0.7732176 (425.57 it/sec) -training >> step=9455800, episode=1577 reward=0.8066381 (439.38 it/sec) -training >> step=9455900, episode=1577 reward=0.7789404 (421.86 it/sec) -training >> step=9456000, episode=1577 reward=0.7855005 (466.45 it/sec) -training >> step=9456100, episode=1577 reward=0.7999036 (493.28 it/sec) -training >> step=9456200, episode=1577 reward=0.7865565 (449.90 it/sec) -training >> step=9456300, episode=1577 reward=0.7960462 (480.00 it/sec) -training >> step=9456400, episode=1577 reward=0.7963705 (472.21 it/sec) -training >> step=9456500, episode=1577 reward=0.7809097 (465.99 it/sec) -training >> step=9456600, episode=1577 reward=0.803046 (481.75 it/sec) -training >> step=9456700, episode=1577 reward=0.7934889 (489.85 it/sec) -training >> step=9456800, episode=1577 reward=0.7845445 (490.14 it/sec) -training >> step=9456900, episode=1577 reward=0.7999814 (477.13 it/sec) -training >> step=9457000, episode=1577 reward=0.7844259 (485.40 it/sec) -training >> step=9457100, episode=1577 reward=0.8046771 (471.91 it/sec) -training >> step=9457200, episode=1577 reward=0.7897472 (483.00 it/sec) -training >> step=9457300, episode=1577 reward=0.7979264 (453.56 it/sec) -training >> step=9457400, episode=1577 reward=0.7983803 (511.62 it/sec) -training >> step=9457500, episode=1577 reward=0.7943928 (421.27 it/sec) -training >> step=9457600, episode=1577 reward=0.7903781 (474.58 it/sec) -training >> step=9457700, episode=1577 reward=0.8034299 (485.35 it/sec) -training >> step=9457800, episode=1577 reward=0.8034928 (476.39 it/sec) -training >> step=9457900, episode=1577 reward=0.797848 (448.50 it/sec) -training >> step=9458000, episode=1577 reward=0.7712025 (403.02 it/sec) -training >> step=9458100, episode=1577 reward=0.8041093 (489.87 it/sec) -training >> step=9458200, episode=1577 reward=0.7997044 (397.54 it/sec) -training >> step=9458300, episode=1577 reward=0.7923902 (472.46 it/sec) -training >> step=9458400, episode=1577 reward=0.8024831 (477.54 it/sec) -training >> step=9458500, episode=1577 reward=0.7818549 (472.55 it/sec) -training >> step=9458600, episode=1577 reward=0.7930565 (453.20 it/sec) -training >> step=9458700, episode=1577 reward=0.8025412 (369.81 it/sec) -training >> step=9458800, episode=1577 reward=0.7951515 (409.53 it/sec) -training >> step=9458900, episode=1577 reward=0.7958838 (435.45 it/sec) -training >> step=9459000, episode=1577 reward=0.8069558 (457.30 it/sec) -training >> step=9459100, episode=1577 reward=0.7973373 (447.69 it/sec) -training >> step=9459200, episode=1577 reward=0.8021328 (520.25 it/sec) -training >> step=9459300, episode=1577 reward=0.7890053 (490.03 it/sec) -training >> step=9459400, episode=1577 reward=0.7955515 (463.17 it/sec) -training >> step=9459500, episode=1577 reward=0.8009192 (472.97 it/sec) -training >> step=9459600, episode=1577 reward=0.7952328 (497.99 it/sec) -training >> step=9459700, episode=1577 reward=0.7978253 (500.07 it/sec) -training >> step=9459800, episode=1577 reward=0.7789455 (496.50 it/sec) -training >> step=9459900, episode=1577 reward=0.7911649 (481.04 it/sec) -training >> step=9460000, episode=1577 reward=0.7751994 (483.45 it/sec) -training >> step=9460100, episode=1577 reward=0.7878137 (450.13 it/sec) -training >> step=9460200, episode=1577 reward=0.7864136 (465.75 it/sec) -training >> step=9460300, episode=1577 reward=0.8035948 (475.98 it/sec) -training >> step=9460400, episode=1577 reward=0.7707239 (471.49 it/sec) -training >> step=9460500, episode=1577 reward=0.7755326 (477.24 it/sec) -training >> step=9460600, episode=1577 reward=0.7812988 (464.74 it/sec) -training >> step=9460700, episode=1577 reward=0.7761236 (497.34 it/sec) -training >> step=9460800, episode=1577 reward=0.8060188 (439.11 it/sec) -training >> step=9460900, episode=1577 reward=0.7762561 (447.08 it/sec) -training >> step=9461000, episode=1577 reward=0.7863399 (449.59 it/sec) -training >> step=9461100, episode=1577 reward=0.8044808 (479.16 it/sec) -training >> step=9461200, episode=1577 reward=0.8007874 (449.97 it/sec) -training >> step=9461300, episode=1578 reward=0.7924973 (88.01 it/sec) -training >> step=9461400, episode=1578 reward=0.7786455 (449.43 it/sec) -training >> step=9461500, episode=1578 reward=0.7801384 (465.53 it/sec) -training >> step=9461600, episode=1578 reward=0.8036518 (455.67 it/sec) -training >> step=9461700, episode=1578 reward=0.7921383 (454.25 it/sec) -training >> step=9461800, episode=1578 reward=0.7776601 (486.49 it/sec) -training >> step=9461900, episode=1578 reward=0.7821367 (465.19 it/sec) -training >> step=9462000, episode=1578 reward=0.7835929 (457.60 it/sec) -training >> step=9462100, episode=1578 reward=0.7933006 (475.74 it/sec) -training >> step=9462200, episode=1578 reward=0.7982588 (478.80 it/sec) -training >> step=9462300, episode=1578 reward=0.7920569 (477.69 it/sec) -training >> step=9462400, episode=1578 reward=0.7755858 (464.46 it/sec) -training >> step=9462500, episode=1578 reward=0.791574 (487.26 it/sec) -training >> step=9462600, episode=1578 reward=0.8055527 (513.56 it/sec) -training >> step=9462700, episode=1578 reward=0.8046242 (527.08 it/sec) -training >> step=9462800, episode=1578 reward=0.7796957 (525.82 it/sec) -training >> step=9462900, episode=1578 reward=0.7910611 (491.42 it/sec) -training >> step=9463000, episode=1578 reward=0.8006948 (441.45 it/sec) -training >> step=9463100, episode=1578 reward=0.7796382 (456.73 it/sec) -training >> step=9463200, episode=1578 reward=0.7911445 (492.03 it/sec) -training >> step=9463300, episode=1578 reward=0.7999767 (472.73 it/sec) -training >> step=9463400, episode=1578 reward=0.8100411 (478.26 it/sec) -training >> step=9463500, episode=1578 reward=0.776405 (516.13 it/sec) -training >> step=9463600, episode=1578 reward=0.7763041 (496.67 it/sec) -training >> step=9463700, episode=1578 reward=0.7831942 (489.26 it/sec) -training >> step=9463800, episode=1578 reward=0.7824171 (506.31 it/sec) -training >> step=9463900, episode=1578 reward=0.7983577 (494.53 it/sec) -training >> step=9464000, episode=1578 reward=0.7953397 (514.28 it/sec) -training >> step=9464100, episode=1578 reward=0.7864237 (494.64 it/sec) -training >> step=9464200, episode=1578 reward=0.7846373 (521.65 it/sec) -training >> step=9464300, episode=1578 reward=0.7975839 (474.06 it/sec) -training >> step=9464400, episode=1578 reward=0.7744688 (475.97 it/sec) -training >> step=9464500, episode=1578 reward=0.8202708 (485.89 it/sec) -training >> step=9464600, episode=1578 reward=0.8042699 (509.73 it/sec) -training >> step=9464700, episode=1578 reward=0.7953155 (503.56 it/sec) -training >> step=9464800, episode=1578 reward=0.780696 (457.38 it/sec) -training >> step=9464900, episode=1578 reward=0.7921633 (474.59 it/sec) -training >> step=9465000, episode=1578 reward=0.7802618 (511.81 it/sec) -training >> step=9465100, episode=1578 reward=0.7875451 (489.13 it/sec) -training >> step=9465200, episode=1578 reward=0.7761049 (504.16 it/sec) -training >> step=9465300, episode=1578 reward=0.7937723 (490.19 it/sec) -training >> step=9465400, episode=1578 reward=0.7658704 (430.48 it/sec) -training >> step=9465500, episode=1578 reward=0.7832143 (481.62 it/sec) -training >> step=9465600, episode=1578 reward=0.797842 (503.56 it/sec) -training >> step=9465700, episode=1578 reward=0.7728567 (546.33 it/sec) -training >> step=9465800, episode=1578 reward=0.7814048 (487.34 it/sec) -training >> step=9465900, episode=1578 reward=0.7871729 (490.48 it/sec) -training >> step=9466000, episode=1578 reward=0.7967951 (509.52 it/sec) -training >> step=9466100, episode=1578 reward=0.7920063 (498.97 it/sec) -training >> step=9466200, episode=1578 reward=0.7803431 (484.42 it/sec) -training >> step=9466300, episode=1578 reward=0.7884238 (460.45 it/sec) -training >> step=9466400, episode=1578 reward=0.8000718 (509.71 it/sec) -training >> step=9466500, episode=1578 reward=0.7853752 (506.33 it/sec) -training >> step=9466600, episode=1578 reward=0.7881842 (469.53 it/sec) -training >> step=9466700, episode=1578 reward=0.8012924 (523.46 it/sec) -training >> step=9466800, episode=1578 reward=0.7978383 (490.05 it/sec) -training >> step=9466900, episode=1578 reward=0.8211718 (500.06 it/sec) -training >> step=9467000, episode=1578 reward=0.7772141 (499.13 it/sec) -training >> step=9467100, episode=1578 reward=0.7927813 (513.69 it/sec) -training >> step=9467200, episode=1578 reward=0.7884087 (500.92 it/sec) -training >> step=9467300, episode=1579 reward=0.7976912 (129.42 it/sec) -training >> step=9467400, episode=1579 reward=0.7885009 (500.89 it/sec) -training >> step=9467500, episode=1579 reward=0.7960078 (512.09 it/sec) -training >> step=9467600, episode=1579 reward=0.7765742 (481.96 it/sec) -training >> step=9467700, episode=1579 reward=0.7939509 (498.08 it/sec) -training >> step=9467800, episode=1579 reward=0.7915278 (502.94 it/sec) -training >> step=9467900, episode=1579 reward=0.7966156 (503.30 it/sec) -training >> step=9468000, episode=1579 reward=0.7822366 (511.07 it/sec) -training >> step=9468100, episode=1579 reward=0.7844512 (476.79 it/sec) -training >> step=9468200, episode=1579 reward=0.7832899 (525.82 it/sec) -training >> step=9468300, episode=1579 reward=0.7797354 (485.36 it/sec) -training >> step=9468400, episode=1579 reward=0.7901996 (476.55 it/sec) -training >> step=9468500, episode=1579 reward=0.7914399 (519.76 it/sec) -training >> step=9468600, episode=1579 reward=0.7943581 (495.09 it/sec) -training >> step=9468700, episode=1579 reward=0.7861972 (488.27 it/sec) -training >> step=9468800, episode=1579 reward=0.8056882 (493.25 it/sec) -training >> step=9468900, episode=1579 reward=0.8007141 (519.49 it/sec) -training >> step=9469000, episode=1579 reward=0.7836403 (486.71 it/sec) -training >> step=9469100, episode=1579 reward=0.8058932 (477.95 it/sec) -training >> step=9469200, episode=1579 reward=0.8062684 (440.77 it/sec) -training >> step=9469300, episode=1579 reward=0.7986496 (491.95 it/sec) -training >> step=9469400, episode=1579 reward=0.8068077 (460.20 it/sec) -training >> step=9469500, episode=1579 reward=0.7870106 (469.48 it/sec) -training >> step=9469600, episode=1579 reward=0.7947727 (422.94 it/sec) -training >> step=9469700, episode=1579 reward=0.7900582 (493.08 it/sec) -training >> step=9469800, episode=1579 reward=0.7792118 (477.16 it/sec) -training >> step=9469900, episode=1579 reward=0.8018509 (503.29 it/sec) -training >> step=9470000, episode=1579 reward=0.7702386 (514.58 it/sec) -training >> step=9470100, episode=1579 reward=0.8212481 (450.01 it/sec) -training >> step=9470200, episode=1579 reward=0.7790354 (493.44 it/sec) -training >> step=9470300, episode=1579 reward=0.8278307 (473.57 it/sec) -training >> step=9470400, episode=1579 reward=0.7877449 (521.76 it/sec) -training >> step=9470500, episode=1579 reward=0.7820224 (506.30 it/sec) -training >> step=9470600, episode=1579 reward=0.8044212 (475.50 it/sec) -training >> step=9470700, episode=1579 reward=0.7984233 (498.89 it/sec) -training >> step=9470800, episode=1579 reward=0.7744645 (430.72 it/sec) -training >> step=9470900, episode=1579 reward=0.7800197 (426.78 it/sec) -training >> step=9471000, episode=1579 reward=0.7881977 (454.67 it/sec) -training >> step=9471100, episode=1579 reward=0.7908236 (496.42 it/sec) -training >> step=9471200, episode=1579 reward=0.7859201 (500.66 it/sec) -training >> step=9471300, episode=1579 reward=0.7937957 (474.76 it/sec) -training >> step=9471400, episode=1579 reward=0.7988806 (509.03 it/sec) -training >> step=9471500, episode=1579 reward=0.776349 (477.84 it/sec) -training >> step=9471600, episode=1579 reward=0.7874284 (498.49 it/sec) -training >> step=9471700, episode=1579 reward=0.8123218 (485.27 it/sec) -training >> step=9471800, episode=1579 reward=0.792598 (486.23 it/sec) -training >> step=9471900, episode=1579 reward=0.8046123 (492.76 it/sec) -training >> step=9472000, episode=1579 reward=0.7874454 (456.16 it/sec) -training >> step=9472100, episode=1579 reward=0.7738103 (484.59 it/sec) -training >> step=9472200, episode=1579 reward=0.7696974 (497.48 it/sec) -training >> step=9472300, episode=1579 reward=0.7727723 (445.83 it/sec) -training >> step=9472400, episode=1579 reward=0.7702985 (431.48 it/sec) -training >> step=9472500, episode=1579 reward=0.7890157 (425.18 it/sec) -training >> step=9472600, episode=1579 reward=0.7916099 (407.31 it/sec) -training >> step=9472700, episode=1579 reward=0.7966023 (397.86 it/sec) -training >> step=9472800, episode=1579 reward=0.7928281 (474.78 it/sec) -training >> step=9472900, episode=1579 reward=0.7988993 (479.16 it/sec) -training >> step=9473000, episode=1579 reward=0.7703004 (479.89 it/sec) -training >> step=9473100, episode=1579 reward=0.7691247 (472.84 it/sec) -training >> step=9473200, episode=1579 reward=0.8025342 (502.35 it/sec) -training >> step=9473300, episode=1580 reward=0.7820204 (119.83 it/sec) -training >> step=9473400, episode=1580 reward=0.7760876 (492.36 it/sec) -training >> step=9473500, episode=1580 reward=0.7783005 (496.29 it/sec) -training >> step=9473600, episode=1580 reward=0.7836015 (508.11 it/sec) -training >> step=9473700, episode=1580 reward=0.7902449 (478.73 it/sec) -training >> step=9473800, episode=1580 reward=0.7765515 (498.28 it/sec) -training >> step=9473900, episode=1580 reward=0.7898927 (492.42 it/sec) -training >> step=9474000, episode=1580 reward=0.8040769 (544.84 it/sec) -training >> step=9474100, episode=1580 reward=0.7818893 (498.16 it/sec) -training >> step=9474200, episode=1580 reward=0.776118 (502.65 it/sec) -training >> step=9474300, episode=1580 reward=0.790706 (502.75 it/sec) -training >> step=9474400, episode=1580 reward=0.8115777 (478.08 it/sec) -training >> step=9474500, episode=1580 reward=0.7890937 (535.52 it/sec) -training >> step=9474600, episode=1580 reward=0.8054493 (450.57 it/sec) -training >> step=9474700, episode=1580 reward=0.7882777 (482.06 it/sec) -training >> step=9474800, episode=1580 reward=0.7902442 (506.30 it/sec) -training >> step=9474900, episode=1580 reward=0.8143332 (488.34 it/sec) -training >> step=9475000, episode=1580 reward=0.7806858 (508.55 it/sec) -training >> step=9475100, episode=1580 reward=0.797536 (501.76 it/sec) -training >> step=9475200, episode=1580 reward=0.8173265 (486.53 it/sec) -training >> step=9475300, episode=1580 reward=0.7996551 (457.99 it/sec) -training >> step=9475400, episode=1580 reward=0.8126345 (475.27 it/sec) -training >> step=9475500, episode=1580 reward=0.7816089 (496.98 it/sec) -training >> step=9475600, episode=1580 reward=0.7825176 (471.68 it/sec) -training >> step=9475700, episode=1580 reward=0.7966511 (465.19 it/sec) -training >> step=9475800, episode=1580 reward=0.7808123 (532.09 it/sec) -training >> step=9475900, episode=1580 reward=0.7962329 (460.98 it/sec) -training >> step=9476000, episode=1580 reward=0.7834136 (494.24 it/sec) -training >> step=9476100, episode=1580 reward=0.7888162 (503.77 it/sec) -training >> step=9476200, episode=1580 reward=0.8062486 (482.95 it/sec) -training >> step=9476300, episode=1580 reward=0.7818206 (504.57 it/sec) -training >> step=9476400, episode=1580 reward=0.7763547 (475.04 it/sec) -training >> step=9476500, episode=1580 reward=0.7812136 (493.68 it/sec) -training >> step=9476600, episode=1580 reward=0.7786707 (471.63 it/sec) -training >> step=9476700, episode=1580 reward=0.7964114 (489.39 it/sec) -training >> step=9476800, episode=1580 reward=0.7710827 (502.18 it/sec) -training >> step=9476900, episode=1580 reward=0.7794676 (516.95 it/sec) -training >> step=9477000, episode=1580 reward=0.7798842 (512.86 it/sec) -training >> step=9477100, episode=1580 reward=0.780795 (469.84 it/sec) -training >> step=9477200, episode=1580 reward=0.7953826 (492.23 it/sec) -training >> step=9477300, episode=1580 reward=0.7895622 (483.12 it/sec) -training >> step=9477400, episode=1580 reward=0.7933458 (471.63 it/sec) -training >> step=9477500, episode=1580 reward=0.7830753 (510.17 it/sec) -training >> step=9477600, episode=1580 reward=0.7952582 (511.05 it/sec) -training >> step=9477700, episode=1580 reward=0.7786539 (493.82 it/sec) -training >> step=9477800, episode=1580 reward=0.7612919 (431.98 it/sec) -training >> step=9477900, episode=1580 reward=0.7779441 (501.29 it/sec) -training >> step=9478000, episode=1580 reward=0.795245 (487.85 it/sec) -training >> step=9478100, episode=1580 reward=0.7897649 (475.06 it/sec) -training >> step=9478200, episode=1580 reward=0.7880349 (474.35 it/sec) -training >> step=9478300, episode=1580 reward=0.7864023 (516.88 it/sec) -training >> step=9478400, episode=1580 reward=0.778534 (473.35 it/sec) -training >> step=9478500, episode=1580 reward=0.7990489 (475.55 it/sec) -training >> step=9478600, episode=1580 reward=0.7952736 (464.79 it/sec) -training >> step=9478700, episode=1580 reward=0.7981622 (502.89 it/sec) -training >> step=9478800, episode=1580 reward=0.7840043 (499.13 it/sec) -training >> step=9478900, episode=1580 reward=0.7798495 (505.43 it/sec) -training >> step=9479000, episode=1580 reward=0.7873707 (473.07 it/sec) -training >> step=9479100, episode=1580 reward=0.7985624 (490.69 it/sec) -training >> step=9479200, episode=1580 reward=0.7885469 (488.31 it/sec) -training >> step=9479300, episode=1581 reward=0.7848147 (106.86 it/sec) -training >> step=9479400, episode=1581 reward=0.7726943 (519.89 it/sec) -training >> step=9479500, episode=1581 reward=0.7843086 (483.99 it/sec) -training >> step=9479600, episode=1581 reward=0.79215 (473.11 it/sec) -training >> step=9479700, episode=1581 reward=0.8022965 (474.61 it/sec) -training >> step=9479800, episode=1581 reward=0.7753441 (505.42 it/sec) -training >> step=9479900, episode=1581 reward=0.8083295 (473.77 it/sec) -training >> step=9480000, episode=1581 reward=0.82327 (486.51 it/sec) -training >> step=9480100, episode=1581 reward=0.7825197 (491.72 it/sec) -training >> step=9480200, episode=1581 reward=0.7856886 (491.67 it/sec) -training >> step=9480300, episode=1581 reward=0.7679903 (486.05 it/sec) -training >> step=9480400, episode=1581 reward=0.7985636 (474.73 it/sec) -training >> step=9480500, episode=1581 reward=0.8090587 (507.92 it/sec) -training >> step=9480600, episode=1581 reward=0.8027628 (515.22 it/sec) -training >> step=9480700, episode=1581 reward=0.7818585 (517.03 it/sec) -training >> step=9480800, episode=1581 reward=0.7853369 (484.71 it/sec) -training >> step=9480900, episode=1581 reward=0.7837697 (457.81 it/sec) -training >> step=9481000, episode=1581 reward=0.7840346 (505.75 it/sec) -training >> step=9481100, episode=1581 reward=0.7859343 (462.20 it/sec) -training >> step=9481200, episode=1581 reward=0.7906308 (491.98 it/sec) -training >> step=9481300, episode=1581 reward=0.7868863 (439.05 it/sec) -training >> step=9481400, episode=1581 reward=0.7796083 (429.86 it/sec) -training >> step=9481500, episode=1581 reward=0.7958918 (502.72 it/sec) -training >> step=9481600, episode=1581 reward=0.8114012 (540.39 it/sec) -training >> step=9481700, episode=1581 reward=0.788479 (504.08 it/sec) -training >> step=9481800, episode=1581 reward=0.78684 (476.44 it/sec) -training >> step=9481900, episode=1581 reward=0.7855508 (519.10 it/sec) -training >> step=9482000, episode=1581 reward=0.8025135 (518.10 it/sec) -training >> step=9482100, episode=1581 reward=0.7827533 (474.73 it/sec) -training >> step=9482200, episode=1581 reward=0.7938983 (506.98 it/sec) -training >> step=9482300, episode=1581 reward=0.7987946 (512.36 it/sec) -training >> step=9482400, episode=1581 reward=0.7729368 (455.04 it/sec) -training >> step=9482500, episode=1581 reward=0.7981766 (475.64 it/sec) -training >> step=9482600, episode=1581 reward=0.8014683 (531.70 it/sec) -training >> step=9482700, episode=1581 reward=0.7979742 (495.29 it/sec) -training >> step=9482800, episode=1581 reward=0.7736493 (493.06 it/sec) -training >> step=9482900, episode=1581 reward=0.7983424 (487.48 it/sec) -training >> step=9483000, episode=1581 reward=0.7898583 (483.07 it/sec) -training >> step=9483100, episode=1581 reward=0.7962506 (455.98 it/sec) -training >> step=9483200, episode=1581 reward=0.7962767 (483.60 it/sec) -training >> step=9483300, episode=1581 reward=0.7847131 (488.86 it/sec) -training >> step=9483400, episode=1581 reward=0.8058971 (518.78 it/sec) -training >> step=9483500, episode=1581 reward=0.7632777 (467.35 it/sec) -training >> step=9483600, episode=1581 reward=0.7845565 (432.36 it/sec) -training >> step=9483700, episode=1581 reward=0.7985033 (504.35 it/sec) -training >> step=9483800, episode=1581 reward=0.7862969 (480.30 it/sec) -training >> step=9483900, episode=1581 reward=0.8046124 (489.59 it/sec) -training >> step=9484000, episode=1581 reward=0.7982689 (482.34 it/sec) -training >> step=9484100, episode=1581 reward=0.7773047 (530.64 it/sec) -training >> step=9484200, episode=1581 reward=0.8000581 (499.26 it/sec) -training >> step=9484300, episode=1581 reward=0.7836522 (485.90 it/sec) -training >> step=9484400, episode=1581 reward=0.8162029 (502.06 it/sec) -training >> step=9484500, episode=1581 reward=0.7771646 (516.67 it/sec) -training >> step=9484600, episode=1581 reward=0.8000033 (533.80 it/sec) -training >> step=9484700, episode=1581 reward=0.8054192 (457.42 it/sec) -training >> step=9484800, episode=1581 reward=0.7815619 (505.13 it/sec) -training >> step=9484900, episode=1581 reward=0.7718502 (460.26 it/sec) -training >> step=9485000, episode=1581 reward=0.7869931 (461.79 it/sec) -training >> step=9485100, episode=1581 reward=0.7881879 (519.47 it/sec) -training >> step=9485200, episode=1581 reward=0.7838494 (504.91 it/sec) -training >> step=9485300, episode=1582 reward=0.7937924 (118.39 it/sec) -training >> step=9485400, episode=1582 reward=0.8071433 (472.08 it/sec) -training >> step=9485500, episode=1582 reward=0.7844458 (465.42 it/sec) -training >> step=9485600, episode=1582 reward=0.7838027 (501.07 it/sec) -training >> step=9485700, episode=1582 reward=0.7847283 (485.05 it/sec) -training >> step=9485800, episode=1582 reward=0.7925947 (502.72 it/sec) -training >> step=9485900, episode=1582 reward=0.7823257 (503.26 it/sec) -training >> step=9486000, episode=1582 reward=0.7927315 (401.54 it/sec) -training >> step=9486100, episode=1582 reward=0.8139158 (413.09 it/sec) -training >> step=9486200, episode=1582 reward=0.7963901 (440.62 it/sec) -training >> step=9486300, episode=1582 reward=0.7919824 (466.22 it/sec) -training >> step=9486400, episode=1582 reward=0.7944772 (450.76 it/sec) -training >> step=9486500, episode=1582 reward=0.796662 (383.70 it/sec) -training >> step=9486600, episode=1582 reward=0.787672 (496.05 it/sec) -training >> step=9486700, episode=1582 reward=0.7690007 (492.31 it/sec) -training >> step=9486800, episode=1582 reward=0.8214004 (486.04 it/sec) -training >> step=9486900, episode=1582 reward=0.7888281 (448.72 it/sec) -training >> step=9487000, episode=1582 reward=0.785529 (497.55 it/sec) -training >> step=9487100, episode=1582 reward=0.8000113 (493.23 it/sec) -training >> step=9487200, episode=1582 reward=0.8019223 (506.10 it/sec) -training >> step=9487300, episode=1582 reward=0.7907537 (469.64 it/sec) -training >> step=9487400, episode=1582 reward=0.8042027 (470.80 it/sec) -training >> step=9487500, episode=1582 reward=0.7830378 (480.35 it/sec) -training >> step=9487600, episode=1582 reward=0.7823886 (511.18 it/sec) -training >> step=9487700, episode=1582 reward=0.7705994 (512.65 it/sec) -training >> step=9487800, episode=1582 reward=0.7764608 (441.24 it/sec) -training >> step=9487900, episode=1582 reward=0.789464 (475.05 it/sec) -training >> step=9488000, episode=1582 reward=0.7702251 (506.10 it/sec) -training >> step=9488100, episode=1582 reward=0.7877128 (536.75 it/sec) -training >> step=9488200, episode=1582 reward=0.8061268 (493.04 it/sec) -training >> step=9488300, episode=1582 reward=0.78553 (464.78 it/sec) -training >> step=9488400, episode=1582 reward=0.7865961 (484.26 it/sec) -training >> step=9488500, episode=1582 reward=0.795037 (514.81 it/sec) -training >> step=9488600, episode=1582 reward=0.7903009 (503.47 it/sec) -training >> step=9488700, episode=1582 reward=0.7882952 (495.87 it/sec) -training >> step=9488800, episode=1582 reward=0.7983527 (518.00 it/sec) -training >> step=9488900, episode=1582 reward=0.791627 (483.87 it/sec) -training >> step=9489000, episode=1582 reward=0.8011399 (472.20 it/sec) -training >> step=9489100, episode=1582 reward=0.7818499 (498.94 it/sec) -training >> step=9489200, episode=1582 reward=0.807945 (530.40 it/sec) -training >> step=9489300, episode=1582 reward=0.8246628 (488.34 it/sec) -training >> step=9489400, episode=1582 reward=0.8003437 (467.58 it/sec) -training >> step=9489500, episode=1582 reward=0.7848374 (516.99 it/sec) -training >> step=9489600, episode=1582 reward=0.77343 (472.04 it/sec) -training >> step=9489700, episode=1582 reward=0.7691088 (520.50 it/sec) -training >> step=9489800, episode=1582 reward=0.7917346 (473.03 it/sec) -training >> step=9489900, episode=1582 reward=0.7910244 (493.95 it/sec) -training >> step=9490000, episode=1582 reward=0.7960339 (516.01 it/sec) -training >> step=9490100, episode=1582 reward=0.7958825 (511.37 it/sec) -training >> step=9490200, episode=1582 reward=0.7988805 (514.16 it/sec) -training >> step=9490300, episode=1582 reward=0.7933017 (501.81 it/sec) -training >> step=9490400, episode=1582 reward=0.7877018 (494.68 it/sec) -training >> step=9490500, episode=1582 reward=0.784188 (463.76 it/sec) -training >> step=9490600, episode=1582 reward=0.8030682 (457.14 it/sec) -training >> step=9490700, episode=1582 reward=0.7975356 (508.01 it/sec) -training >> step=9490800, episode=1582 reward=0.7838448 (478.27 it/sec) -training >> step=9490900, episode=1582 reward=0.7704426 (469.78 it/sec) -training >> step=9491000, episode=1582 reward=0.800482 (541.89 it/sec) -training >> step=9491100, episode=1582 reward=0.8014964 (488.75 it/sec) -training >> step=9491200, episode=1582 reward=0.775939 (501.97 it/sec) -training >> step=9491300, episode=1583 reward=0.7913077 (103.63 it/sec) -training >> step=9491400, episode=1583 reward=0.7713763 (493.35 it/sec) -training >> step=9491500, episode=1583 reward=0.8031502 (461.03 it/sec) -training >> step=9491600, episode=1583 reward=0.7599595 (478.77 it/sec) -training >> step=9491700, episode=1583 reward=0.7838368 (526.70 it/sec) -training >> step=9491800, episode=1583 reward=0.7962167 (531.79 it/sec) -training >> step=9491900, episode=1583 reward=0.7818658 (505.80 it/sec) -training >> step=9492000, episode=1583 reward=0.7980202 (480.58 it/sec) -training >> step=9492100, episode=1583 reward=0.8084776 (500.72 it/sec) -training >> step=9492200, episode=1583 reward=0.8091708 (485.63 it/sec) -training >> step=9492300, episode=1583 reward=0.7835183 (458.98 it/sec) -training >> step=9492400, episode=1583 reward=0.787676 (528.01 it/sec) -training >> step=9492500, episode=1583 reward=0.8092353 (455.16 it/sec) -training >> step=9492600, episode=1583 reward=0.799697 (469.12 it/sec) -training >> step=9492700, episode=1583 reward=0.8061953 (453.51 it/sec) -training >> step=9492800, episode=1583 reward=0.8061202 (534.10 it/sec) -training >> step=9492900, episode=1583 reward=0.7978228 (481.53 it/sec) -training >> step=9493000, episode=1583 reward=0.7839897 (468.96 it/sec) -training >> step=9493100, episode=1583 reward=0.7801421 (491.47 it/sec) -training >> step=9493200, episode=1583 reward=0.7896226 (531.18 it/sec) -training >> step=9493300, episode=1583 reward=0.7923709 (500.31 it/sec) -training >> step=9493400, episode=1583 reward=0.7813893 (486.99 it/sec) -training >> step=9493500, episode=1583 reward=0.81054 (449.55 it/sec) -training >> step=9493600, episode=1583 reward=0.7905664 (422.48 it/sec) -training >> step=9493700, episode=1583 reward=0.7809784 (480.80 it/sec) -training >> step=9493800, episode=1583 reward=0.7921078 (506.24 it/sec) -training >> step=9493900, episode=1583 reward=0.7951331 (505.80 it/sec) -training >> step=9494000, episode=1583 reward=0.7944732 (422.59 it/sec) -training >> step=9494100, episode=1583 reward=0.7914768 (484.20 it/sec) -training >> step=9494200, episode=1583 reward=0.7820644 (491.98 it/sec) -training >> step=9494300, episode=1583 reward=0.7863337 (498.26 it/sec) -training >> step=9494400, episode=1583 reward=0.7916907 (503.24 it/sec) -training >> step=9494500, episode=1583 reward=0.7735854 (410.07 it/sec) -training >> step=9494600, episode=1583 reward=0.8056892 (506.40 it/sec) -training >> step=9494700, episode=1583 reward=0.7988161 (451.18 it/sec) -training >> step=9494800, episode=1583 reward=0.7800254 (494.64 it/sec) -training >> step=9494900, episode=1583 reward=0.7758656 (501.57 it/sec) -training >> step=9495000, episode=1583 reward=0.78608 (478.15 it/sec) -training >> step=9495100, episode=1583 reward=0.7955908 (497.30 it/sec) -training >> step=9495200, episode=1583 reward=0.8111774 (507.76 it/sec) -training >> step=9495300, episode=1583 reward=0.7788788 (493.73 it/sec) -training >> step=9495400, episode=1583 reward=0.797113 (506.20 it/sec) -training >> step=9495500, episode=1583 reward=0.8039955 (434.37 it/sec) -training >> step=9495600, episode=1583 reward=0.7944533 (449.49 it/sec) -training >> step=9495700, episode=1583 reward=0.8142674 (488.33 it/sec) -training >> step=9495800, episode=1583 reward=0.7932666 (495.09 it/sec) -training >> step=9495900, episode=1583 reward=0.7858569 (492.54 it/sec) -training >> step=9496000, episode=1583 reward=0.7984318 (453.45 it/sec) -training >> step=9496100, episode=1583 reward=0.7716728 (535.59 it/sec) -training >> step=9496200, episode=1583 reward=0.8022665 (513.39 it/sec) -training >> step=9496300, episode=1583 reward=0.7957573 (525.91 it/sec) -training >> step=9496400, episode=1583 reward=0.7727648 (462.36 it/sec) -training >> step=9496500, episode=1583 reward=0.7797287 (412.47 it/sec) -training >> step=9496600, episode=1583 reward=0.778361 (468.20 it/sec) -training >> step=9496700, episode=1583 reward=0.8056597 (521.90 it/sec) -training >> step=9496800, episode=1583 reward=0.7864432 (479.04 it/sec) -training >> step=9496900, episode=1583 reward=0.7725231 (520.81 it/sec) -training >> step=9497000, episode=1583 reward=0.7861038 (438.24 it/sec) -training >> step=9497100, episode=1583 reward=0.7680412 (497.04 it/sec) -training >> step=9497200, episode=1583 reward=0.792897 (475.49 it/sec) -training >> step=9497300, episode=1584 reward=0.781473 (135.01 it/sec) -training >> step=9497400, episode=1584 reward=0.7786258 (528.24 it/sec) -training >> step=9497500, episode=1584 reward=0.766476 (521.63 it/sec) -training >> step=9497600, episode=1584 reward=0.8112941 (457.73 it/sec) -training >> step=9497700, episode=1584 reward=0.7834724 (488.21 it/sec) -training >> step=9497800, episode=1584 reward=0.7933521 (502.46 it/sec) -training >> step=9497900, episode=1584 reward=0.7826643 (515.04 it/sec) -training >> step=9498000, episode=1584 reward=0.7917255 (547.52 it/sec) -training >> step=9498100, episode=1584 reward=0.7867177 (494.56 it/sec) -training >> step=9498200, episode=1584 reward=0.7977542 (485.22 it/sec) -training >> step=9498300, episode=1584 reward=0.7962797 (473.16 it/sec) -training >> step=9498400, episode=1584 reward=0.7841177 (495.40 it/sec) -training >> step=9498500, episode=1584 reward=0.7837995 (523.70 it/sec) -training >> step=9498600, episode=1584 reward=0.7760013 (489.22 it/sec) -training >> step=9498700, episode=1584 reward=0.7832769 (465.61 it/sec) -training >> step=9498800, episode=1584 reward=0.8035263 (479.03 it/sec) -training >> step=9498900, episode=1584 reward=0.7818081 (488.09 it/sec) -training >> step=9499000, episode=1584 reward=0.7844794 (531.47 it/sec) -training >> step=9499100, episode=1584 reward=0.7982102 (488.13 it/sec) -training >> step=9499200, episode=1584 reward=0.7878224 (465.10 it/sec) -training >> step=9499300, episode=1584 reward=0.7783587 (465.39 it/sec) -training >> step=9499400, episode=1584 reward=0.800117 (499.33 it/sec) -training >> step=9499500, episode=1584 reward=0.7816519 (522.34 it/sec) -training >> step=9499600, episode=1584 reward=0.8012932 (498.83 it/sec) -training >> step=9499700, episode=1584 reward=0.8239855 (518.95 it/sec) -training >> step=9499800, episode=1584 reward=0.7934718 (455.30 it/sec) -training >> step=9499900, episode=1584 reward=0.7792011 (473.43 it/sec) -training >> step=9500000, episode=1584 reward=0.8079998 (437.48 it/sec) -training >> step=9500100, episode=1584 reward=0.7877777 (441.38 it/sec) -training >> step=9500200, episode=1584 reward=0.7700282 (446.54 it/sec) -training >> step=9500300, episode=1584 reward=0.799382 (401.93 it/sec) -training >> step=9500400, episode=1584 reward=0.7701643 (536.92 it/sec) -training >> step=9500500, episode=1584 reward=0.7899663 (483.14 it/sec) -training >> step=9500600, episode=1584 reward=0.8057019 (515.40 it/sec) -training >> step=9500700, episode=1584 reward=0.7988048 (462.98 it/sec) -training >> step=9500800, episode=1584 reward=0.7941747 (484.43 it/sec) -training >> step=9500900, episode=1584 reward=0.7886175 (521.74 it/sec) -training >> step=9501000, episode=1584 reward=0.7959628 (493.09 it/sec) -training >> step=9501100, episode=1584 reward=0.8000081 (481.47 it/sec) -training >> step=9501200, episode=1584 reward=0.7875468 (488.01 it/sec) -training >> step=9501300, episode=1584 reward=0.7987548 (465.24 it/sec) -training >> step=9501400, episode=1584 reward=0.7790773 (530.55 it/sec) -training >> step=9501500, episode=1584 reward=0.790129 (522.82 it/sec) -training >> step=9501600, episode=1584 reward=0.7955821 (499.22 it/sec) -training >> step=9501700, episode=1584 reward=0.8162528 (514.81 it/sec) -training >> step=9501800, episode=1584 reward=0.7935054 (466.63 it/sec) -training >> step=9501900, episode=1584 reward=0.7775922 (522.55 it/sec) -training >> step=9502000, episode=1584 reward=0.7762383 (456.81 it/sec) -training >> step=9502100, episode=1584 reward=0.8026134 (478.90 it/sec) -training >> step=9502200, episode=1584 reward=0.7997793 (498.71 it/sec) -training >> step=9502300, episode=1584 reward=0.7797336 (464.24 it/sec) -training >> step=9502400, episode=1584 reward=0.7779643 (505.46 it/sec) -training >> step=9502500, episode=1584 reward=0.7806457 (510.65 it/sec) -training >> step=9502600, episode=1584 reward=0.7658528 (507.27 it/sec) -training >> step=9502700, episode=1584 reward=0.7787768 (481.86 it/sec) -training >> step=9502800, episode=1584 reward=0.7933698 (448.85 it/sec) -training >> step=9502900, episode=1584 reward=0.7892755 (520.03 it/sec) -training >> step=9503000, episode=1584 reward=0.7790254 (497.70 it/sec) -training >> step=9503100, episode=1584 reward=0.7827525 (489.48 it/sec) -training >> step=9503200, episode=1584 reward=0.7878988 (489.06 it/sec) -training >> step=9503300, episode=1585 reward=0.7761629 (98.13 it/sec) -training >> step=9503400, episode=1585 reward=0.7913008 (225.31 it/sec) -training >> step=9503500, episode=1585 reward=0.7898298 (479.52 it/sec) -training >> step=9503600, episode=1585 reward=0.7610489 (466.66 it/sec) -training >> step=9503700, episode=1585 reward=0.769882 (439.49 it/sec) -training >> step=9503800, episode=1585 reward=0.8062518 (462.66 it/sec) -training >> step=9503900, episode=1585 reward=0.7762554 (563.65 it/sec) -training >> step=9504000, episode=1585 reward=0.7938471 (493.11 it/sec) -training >> step=9504100, episode=1585 reward=0.7815889 (476.88 it/sec) -training >> step=9504200, episode=1585 reward=0.7834006 (486.98 it/sec) -training >> step=9504300, episode=1585 reward=0.7947376 (453.99 it/sec) -training >> step=9504400, episode=1585 reward=0.7963328 (497.74 it/sec) -training >> step=9504500, episode=1585 reward=0.7904897 (525.27 it/sec) -training >> step=9504600, episode=1585 reward=0.8038497 (515.78 it/sec) -training >> step=9504700, episode=1585 reward=0.7745174 (497.71 it/sec) -training >> step=9504800, episode=1585 reward=0.7913633 (443.84 it/sec) -training >> step=9504900, episode=1585 reward=0.7944016 (505.86 it/sec) -training >> step=9505000, episode=1585 reward=0.8047663 (513.69 it/sec) -training >> step=9505100, episode=1585 reward=0.7941625 (493.30 it/sec) -training >> step=9505200, episode=1585 reward=0.7943286 (494.04 it/sec) -training >> step=9505300, episode=1585 reward=0.7967984 (465.92 it/sec) -training >> step=9505400, episode=1585 reward=0.782046 (531.74 it/sec) -training >> step=9505500, episode=1585 reward=0.8106568 (502.54 it/sec) -training >> step=9505600, episode=1585 reward=0.8026177 (482.64 it/sec) -training >> step=9505700, episode=1585 reward=0.7879304 (444.93 it/sec) -training >> step=9505800, episode=1585 reward=0.8066096 (467.96 it/sec) -training >> step=9505900, episode=1585 reward=0.7802489 (472.16 it/sec) -training >> step=9506000, episode=1585 reward=0.7732501 (521.26 it/sec) -training >> step=9506100, episode=1585 reward=0.7890296 (501.84 it/sec) -training >> step=9506200, episode=1585 reward=0.7760576 (497.27 it/sec) -training >> step=9506300, episode=1585 reward=0.7863678 (505.11 it/sec) -training >> step=9506400, episode=1585 reward=0.7787759 (474.56 it/sec) -training >> step=9506500, episode=1585 reward=0.7749916 (489.94 it/sec) -training >> step=9506600, episode=1585 reward=0.8028038 (526.82 it/sec) -training >> step=9506700, episode=1585 reward=0.7760558 (481.60 it/sec) -training >> step=9506800, episode=1585 reward=0.8208125 (496.74 it/sec) -training >> step=9506900, episode=1585 reward=0.7830408 (453.32 it/sec) -training >> step=9507000, episode=1585 reward=0.7919565 (464.55 it/sec) -training >> step=9507100, episode=1585 reward=0.7840247 (528.90 it/sec) -training >> step=9507200, episode=1585 reward=0.782142 (526.90 it/sec) -training >> step=9507300, episode=1585 reward=0.7987819 (491.54 it/sec) -training >> step=9507400, episode=1585 reward=0.777007 (477.83 it/sec) -training >> step=9507500, episode=1585 reward=0.783693 (537.30 it/sec) -training >> step=9507600, episode=1585 reward=0.774237 (485.89 it/sec) -training >> step=9507700, episode=1585 reward=0.8009822 (452.28 it/sec) -training >> step=9507800, episode=1585 reward=0.7861109 (506.52 it/sec) -training >> step=9507900, episode=1585 reward=0.798009 (478.12 it/sec) -training >> step=9508000, episode=1585 reward=0.7878885 (510.87 it/sec) -training >> step=9508100, episode=1585 reward=0.805576 (496.20 it/sec) -training >> step=9508200, episode=1585 reward=0.7784754 (482.82 it/sec) -training >> step=9508300, episode=1585 reward=0.7816228 (485.14 it/sec) -training >> step=9508400, episode=1585 reward=0.7872921 (450.31 it/sec) -training >> step=9508500, episode=1585 reward=0.8088053 (506.34 it/sec) -training >> step=9508600, episode=1585 reward=0.783177 (514.79 it/sec) -training >> step=9508700, episode=1585 reward=0.7797664 (477.90 it/sec) -training >> step=9508800, episode=1585 reward=0.8184345 (456.98 it/sec) -training >> step=9508900, episode=1585 reward=0.7954683 (501.70 it/sec) -training >> step=9509000, episode=1585 reward=0.7887305 (515.39 it/sec) -training >> step=9509100, episode=1585 reward=0.7872806 (510.01 it/sec) -training >> step=9509200, episode=1585 reward=0.8031446 (490.21 it/sec) -training >> step=9509300, episode=1586 reward=0.7809563 (146.86 it/sec) -training >> step=9509400, episode=1586 reward=0.7769494 (498.34 it/sec) -training >> step=9509500, episode=1586 reward=0.780026 (498.56 it/sec) -training >> step=9509600, episode=1586 reward=0.8079605 (480.48 it/sec) -training >> step=9509700, episode=1586 reward=0.7867331 (485.48 it/sec) -training >> step=9509800, episode=1586 reward=0.7823645 (496.10 it/sec) -training >> step=9509900, episode=1586 reward=0.7857698 (498.37 it/sec) -training >> step=9510000, episode=1586 reward=0.7921607 (523.36 it/sec) -training >> step=9510100, episode=1586 reward=0.7882507 (479.59 it/sec) -training >> step=9510200, episode=1586 reward=0.7886239 (465.23 it/sec) -training >> step=9510300, episode=1586 reward=0.8028774 (480.56 it/sec) -training >> step=9510400, episode=1586 reward=0.7824573 (522.76 it/sec) -training >> step=9510500, episode=1586 reward=0.8024776 (513.56 it/sec) -training >> step=9510600, episode=1586 reward=0.7969303 (495.61 it/sec) -training >> step=9510700, episode=1586 reward=0.7941744 (526.23 it/sec) -training >> step=9510800, episode=1586 reward=0.8067585 (480.91 it/sec) -training >> step=9510900, episode=1586 reward=0.8044841 (505.56 it/sec) -training >> step=9511000, episode=1586 reward=0.7847582 (488.42 it/sec) -training >> step=9511100, episode=1586 reward=0.7776641 (500.25 it/sec) -training >> step=9511200, episode=1586 reward=0.8000428 (462.68 it/sec) -training >> step=9511300, episode=1586 reward=0.8040434 (437.09 it/sec) -training >> step=9511400, episode=1586 reward=0.7863805 (458.85 it/sec) -training >> step=9511500, episode=1586 reward=0.7818144 (519.79 it/sec) -training >> step=9511600, episode=1586 reward=0.7918202 (509.23 it/sec) -training >> step=9511700, episode=1586 reward=0.7655495 (478.30 it/sec) -training >> step=9511800, episode=1586 reward=0.7837693 (471.89 it/sec) -training >> step=9511900, episode=1586 reward=0.7899008 (439.99 it/sec) -training >> step=9512000, episode=1586 reward=0.7908307 (488.13 it/sec) -training >> step=9512100, episode=1586 reward=0.7963711 (517.15 it/sec) -training >> step=9512200, episode=1586 reward=0.7783262 (489.90 it/sec) -training >> step=9512300, episode=1586 reward=0.7896585 (466.51 it/sec) -training >> step=9512400, episode=1586 reward=0.7850386 (475.92 it/sec) -training >> step=9512500, episode=1586 reward=0.7989786 (501.19 it/sec) -training >> step=9512600, episode=1586 reward=0.771485 (499.09 it/sec) -training >> step=9512700, episode=1586 reward=0.7945378 (490.78 it/sec) -training >> step=9512800, episode=1586 reward=0.7865223 (479.19 it/sec) -training >> step=9512900, episode=1586 reward=0.7854132 (529.41 it/sec) -training >> step=9513000, episode=1586 reward=0.796611 (480.12 it/sec) -training >> step=9513100, episode=1586 reward=0.7990734 (485.00 it/sec) -training >> step=9513200, episode=1586 reward=0.7849288 (481.06 it/sec) -training >> step=9513300, episode=1586 reward=0.8002612 (496.48 it/sec) -training >> step=9513400, episode=1586 reward=0.7898891 (484.90 it/sec) -training >> step=9513500, episode=1586 reward=0.7937577 (482.06 it/sec) -training >> step=9513600, episode=1586 reward=0.7844237 (510.01 it/sec) -training >> step=9513700, episode=1586 reward=0.8065283 (434.01 it/sec) -training >> step=9513800, episode=1586 reward=0.8043218 (422.99 it/sec) -training >> step=9513900, episode=1586 reward=0.7976674 (448.97 it/sec) -training >> step=9514000, episode=1586 reward=0.8060068 (439.91 it/sec) -training >> step=9514100, episode=1586 reward=0.7911085 (405.66 it/sec) -training >> step=9514200, episode=1586 reward=0.8080543 (393.86 it/sec) -training >> step=9514300, episode=1586 reward=0.8035882 (494.58 it/sec) -training >> step=9514400, episode=1586 reward=0.7903219 (516.44 it/sec) -training >> step=9514500, episode=1586 reward=0.7867118 (498.37 it/sec) -training >> step=9514600, episode=1586 reward=0.7898265 (515.48 it/sec) -training >> step=9514700, episode=1586 reward=0.7939543 (484.72 it/sec) -training >> step=9514800, episode=1586 reward=0.7994084 (472.27 it/sec) -training >> step=9514900, episode=1586 reward=0.7905543 (508.73 it/sec) -training >> step=9515000, episode=1586 reward=0.7631964 (532.69 it/sec) -training >> step=9515100, episode=1586 reward=0.7827835 (509.32 it/sec) -training >> step=9515200, episode=1586 reward=0.7957944 (462.34 it/sec) -training >> step=9515300, episode=1587 reward=0.7802208 (115.98 it/sec) -training >> step=9515400, episode=1587 reward=0.7630042 (486.02 it/sec) -training >> step=9515500, episode=1587 reward=0.7723085 (470.23 it/sec) -training >> step=9515600, episode=1587 reward=0.7693735 (495.13 it/sec) -training >> step=9515700, episode=1587 reward=0.7948426 (534.73 it/sec) -training >> step=9515800, episode=1587 reward=0.8085003 (531.51 it/sec) -training >> step=9515900, episode=1587 reward=0.7879837 (490.58 it/sec) -training >> step=9516000, episode=1587 reward=0.7833374 (476.75 it/sec) -training >> step=9516100, episode=1587 reward=0.7931667 (486.92 it/sec) -training >> step=9516200, episode=1587 reward=0.8076645 (533.42 it/sec) -training >> step=9516300, episode=1587 reward=0.7861666 (486.00 it/sec) -training >> step=9516400, episode=1587 reward=0.7869204 (475.25 it/sec) -training >> step=9516500, episode=1587 reward=0.7760627 (546.01 it/sec) -training >> step=9516600, episode=1587 reward=0.7877077 (464.12 it/sec) -training >> step=9516700, episode=1587 reward=0.7952147 (501.17 it/sec) -training >> step=9516800, episode=1587 reward=0.7989182 (516.09 it/sec) -training >> step=9516900, episode=1587 reward=0.8052098 (482.94 it/sec) -training >> step=9517000, episode=1587 reward=0.810928 (483.88 it/sec) -training >> step=9517100, episode=1587 reward=0.7927058 (464.08 it/sec) -training >> step=9517200, episode=1587 reward=0.7932633 (514.19 it/sec) -training >> step=9517300, episode=1587 reward=0.787707 (500.23 it/sec) -training >> step=9517400, episode=1587 reward=0.7920081 (468.71 it/sec) -training >> step=9517500, episode=1587 reward=0.7861012 (488.92 it/sec) -training >> step=9517600, episode=1587 reward=0.7747597 (492.02 it/sec) -training >> step=9517700, episode=1587 reward=0.7931418 (483.54 it/sec) -training >> step=9517800, episode=1587 reward=0.7826426 (493.66 it/sec) -training >> step=9517900, episode=1587 reward=0.7803198 (489.41 it/sec) -training >> step=9518000, episode=1587 reward=0.7989836 (507.77 it/sec) -training >> step=9518100, episode=1587 reward=0.7815939 (498.67 it/sec) -training >> step=9518200, episode=1587 reward=0.8165569 (491.42 it/sec) -training >> step=9518300, episode=1587 reward=0.791694 (555.62 it/sec) -training >> step=9518400, episode=1587 reward=0.7996294 (468.35 it/sec) -training >> step=9518500, episode=1587 reward=0.8032361 (483.70 it/sec) -training >> step=9518600, episode=1587 reward=0.8042793 (489.34 it/sec) -training >> step=9518700, episode=1587 reward=0.7934198 (516.27 it/sec) -training >> step=9518800, episode=1587 reward=0.7971238 (490.67 it/sec) -training >> step=9518900, episode=1587 reward=0.7872842 (500.39 it/sec) -training >> step=9519000, episode=1587 reward=0.8000533 (511.12 it/sec) -training >> step=9519100, episode=1587 reward=0.7892414 (481.90 it/sec) -training >> step=9519200, episode=1587 reward=0.7905248 (475.91 it/sec) -training >> step=9519300, episode=1587 reward=0.7903976 (475.87 it/sec) -training >> step=9519400, episode=1587 reward=0.7938505 (522.35 it/sec) -training >> step=9519500, episode=1587 reward=0.7722373 (478.14 it/sec) -training >> step=9519600, episode=1587 reward=0.7889922 (476.18 it/sec) -training >> step=9519700, episode=1587 reward=0.7983853 (522.56 it/sec) -training >> step=9519800, episode=1587 reward=0.7708542 (505.14 it/sec) -training >> step=9519900, episode=1587 reward=0.7699501 (493.22 it/sec) -training >> step=9520000, episode=1587 reward=0.788363 (501.91 it/sec) -training >> step=9520100, episode=1587 reward=0.7920673 (525.39 it/sec) -training >> step=9520200, episode=1587 reward=0.776908 (493.65 it/sec) -training >> step=9520300, episode=1587 reward=0.8036368 (499.05 it/sec) -training >> step=9520400, episode=1587 reward=0.792173 (494.53 it/sec) -training >> step=9520500, episode=1587 reward=0.7906317 (494.57 it/sec) -training >> step=9520600, episode=1587 reward=0.7731283 (528.17 it/sec) -training >> step=9520700, episode=1587 reward=0.7941955 (476.27 it/sec) -training >> step=9520800, episode=1587 reward=0.7843493 (512.40 it/sec) -training >> step=9520900, episode=1587 reward=0.7734293 (509.50 it/sec) -training >> step=9521000, episode=1587 reward=0.8011689 (428.72 it/sec) -training >> step=9521100, episode=1587 reward=0.812057 (483.28 it/sec) -training >> step=9521200, episode=1587 reward=0.7942051 (557.12 it/sec) -training >> step=9521300, episode=1588 reward=0.7969017 (121.26 it/sec) -training >> step=9521400, episode=1588 reward=0.7753293 (527.47 it/sec) -training >> step=9521500, episode=1588 reward=0.7987649 (500.33 it/sec) -training >> step=9521600, episode=1588 reward=0.7772213 (538.77 it/sec) -training >> step=9521700, episode=1588 reward=0.7928335 (481.17 it/sec) -training >> step=9521800, episode=1588 reward=0.7966839 (464.63 it/sec) -training >> step=9521900, episode=1588 reward=0.7938384 (511.38 it/sec) -training >> step=9522000, episode=1588 reward=0.7982607 (506.37 it/sec) -training >> step=9522100, episode=1588 reward=0.8005874 (514.23 it/sec) -training >> step=9522200, episode=1588 reward=0.7888757 (484.55 it/sec) -training >> step=9522300, episode=1588 reward=0.7971835 (494.11 it/sec) -training >> step=9522400, episode=1588 reward=0.8033363 (458.68 it/sec) -training >> step=9522500, episode=1588 reward=0.7707835 (482.33 it/sec) -training >> step=9522600, episode=1588 reward=0.7958542 (496.45 it/sec) -training >> step=9522700, episode=1588 reward=0.7878157 (502.67 it/sec) -training >> step=9522800, episode=1588 reward=0.7885701 (502.40 it/sec) -training >> step=9522900, episode=1588 reward=0.7822274 (489.72 it/sec) -training >> step=9523000, episode=1588 reward=0.7847798 (516.35 it/sec) -training >> step=9523100, episode=1588 reward=0.7921684 (507.07 it/sec) -training >> step=9523200, episode=1588 reward=0.7975076 (484.61 it/sec) -training >> step=9523300, episode=1588 reward=0.8019674 (483.55 it/sec) -training >> step=9523400, episode=1588 reward=0.7978681 (519.71 it/sec) -training >> step=9523500, episode=1588 reward=0.7889857 (491.29 it/sec) -training >> step=9523600, episode=1588 reward=0.7975948 (516.15 it/sec) -training >> step=9523700, episode=1588 reward=0.8101716 (480.85 it/sec) -training >> step=9523800, episode=1588 reward=0.791088 (445.78 it/sec) -training >> step=9523900, episode=1588 reward=0.7910957 (486.68 it/sec) -training >> step=9524000, episode=1588 reward=0.7815275 (491.38 it/sec) -training >> step=9524100, episode=1588 reward=0.7826425 (517.11 it/sec) -training >> step=9524200, episode=1588 reward=0.787038 (476.30 it/sec) -training >> step=9524300, episode=1588 reward=0.8024884 (476.79 it/sec) -training >> step=9524400, episode=1588 reward=0.7842054 (508.66 it/sec) -training >> step=9524500, episode=1588 reward=0.7958243 (510.65 it/sec) -training >> step=9524600, episode=1588 reward=0.777365 (505.25 it/sec) -training >> step=9524700, episode=1588 reward=0.7831403 (503.13 it/sec) -training >> step=9524800, episode=1588 reward=0.7984969 (498.21 it/sec) -training >> step=9524900, episode=1588 reward=0.8020382 (515.52 it/sec) -training >> step=9525000, episode=1588 reward=0.7923332 (530.95 it/sec) -training >> step=9525100, episode=1588 reward=0.7831746 (531.95 it/sec) -training >> step=9525200, episode=1588 reward=0.7979437 (499.29 it/sec) -training >> step=9525300, episode=1588 reward=0.7863396 (485.52 it/sec) -training >> step=9525400, episode=1588 reward=0.7917619 (491.78 it/sec) -training >> step=9525500, episode=1588 reward=0.776315 (506.45 it/sec) -training >> step=9525600, episode=1588 reward=0.7922006 (503.45 it/sec) -training >> step=9525700, episode=1588 reward=0.7917978 (474.91 it/sec) -training >> step=9525800, episode=1588 reward=0.7696224 (461.66 it/sec) -training >> step=9525900, episode=1588 reward=0.7690197 (510.14 it/sec) -training >> step=9526000, episode=1588 reward=0.8025326 (502.71 it/sec) -training >> step=9526100, episode=1588 reward=0.7992871 (544.06 it/sec) -training >> step=9526200, episode=1588 reward=0.7935507 (507.21 it/sec) -training >> step=9526300, episode=1588 reward=0.7836204 (532.51 it/sec) -training >> step=9526400, episode=1588 reward=0.774354 (444.24 it/sec) -training >> step=9526500, episode=1588 reward=0.7705085 (485.99 it/sec) -training >> step=9526600, episode=1588 reward=0.7846123 (564.95 it/sec) -training >> step=9526700, episode=1588 reward=0.7936038 (529.11 it/sec) -training >> step=9526800, episode=1588 reward=0.8114049 (477.84 it/sec) -training >> step=9526900, episode=1588 reward=0.7830043 (469.40 it/sec) -training >> step=9527000, episode=1588 reward=0.7981495 (511.52 it/sec) -training >> step=9527100, episode=1588 reward=0.78392 (472.09 it/sec) -training >> step=9527200, episode=1588 reward=0.8014405 (441.61 it/sec) -training >> step=9527300, episode=1589 reward=0.7842797 (121.22 it/sec) -training >> step=9527400, episode=1589 reward=0.7823831 (524.85 it/sec) -training >> step=9527500, episode=1589 reward=0.7886094 (525.97 it/sec) -training >> step=9527600, episode=1589 reward=0.7836935 (494.44 it/sec) -training >> step=9527700, episode=1589 reward=0.7859894 (454.02 it/sec) -training >> step=9527800, episode=1589 reward=0.7849195 (422.08 it/sec) -training >> step=9527900, episode=1589 reward=0.7945994 (397.88 it/sec) -training >> step=9528000, episode=1589 reward=0.7990716 (405.72 it/sec) -training >> step=9528100, episode=1589 reward=0.7922641 (470.69 it/sec) -training >> step=9528200, episode=1589 reward=0.7906083 (500.75 it/sec) -training >> step=9528300, episode=1589 reward=0.8060312 (491.27 it/sec) -training >> step=9528400, episode=1589 reward=0.8062781 (479.16 it/sec) -training >> step=9528500, episode=1589 reward=0.7916057 (488.98 it/sec) -training >> step=9528600, episode=1589 reward=0.7817968 (484.88 it/sec) -training >> step=9528700, episode=1589 reward=0.7965542 (527.09 it/sec) -training >> step=9528800, episode=1589 reward=0.7813758 (523.94 it/sec) -training >> step=9528900, episode=1589 reward=0.7975109 (478.05 it/sec) -training >> step=9529000, episode=1589 reward=0.7710524 (471.28 it/sec) -training >> step=9529100, episode=1589 reward=0.7839069 (500.36 it/sec) -training >> step=9529200, episode=1589 reward=0.810304 (540.76 it/sec) -training >> step=9529300, episode=1589 reward=0.7905403 (508.19 it/sec) -training >> step=9529400, episode=1589 reward=0.8075009 (496.84 it/sec) -training >> step=9529500, episode=1589 reward=0.7848231 (519.06 it/sec) -training >> step=9529600, episode=1589 reward=0.8090028 (460.71 it/sec) -training >> step=9529700, episode=1589 reward=0.815812 (508.02 it/sec) -training >> step=9529800, episode=1589 reward=0.7766116 (520.16 it/sec) -training >> step=9529900, episode=1589 reward=0.7840621 (525.40 it/sec) -training >> step=9530000, episode=1589 reward=0.7898129 (487.18 it/sec) -training >> step=9530100, episode=1589 reward=0.784898 (468.56 it/sec) -training >> step=9530200, episode=1589 reward=0.7848318 (502.67 it/sec) -training >> step=9530300, episode=1589 reward=0.7929586 (521.92 it/sec) -training >> step=9530400, episode=1589 reward=0.7942955 (489.47 it/sec) -training >> step=9530500, episode=1589 reward=0.7838856 (418.79 it/sec) -training >> step=9530600, episode=1589 reward=0.7897479 (423.34 it/sec) -training >> step=9530700, episode=1589 reward=0.7806535 (441.89 it/sec) -training >> step=9530800, episode=1589 reward=0.7943447 (450.05 it/sec) -training >> step=9530900, episode=1589 reward=0.7930238 (469.15 it/sec) -training >> step=9531000, episode=1589 reward=0.7888333 (503.69 it/sec) -training >> step=9531100, episode=1589 reward=0.7889454 (467.42 it/sec) -training >> step=9531200, episode=1589 reward=0.7844528 (487.55 it/sec) -training >> step=9531300, episode=1589 reward=0.7804013 (464.53 it/sec) -training >> step=9531400, episode=1589 reward=0.7837138 (420.26 it/sec) -training >> step=9531500, episode=1589 reward=0.7924935 (479.39 it/sec) -training >> step=9531600, episode=1589 reward=0.7954429 (506.19 it/sec) -training >> step=9531700, episode=1589 reward=0.7975584 (545.72 it/sec) -training >> step=9531800, episode=1589 reward=0.77868 (461.94 it/sec) -training >> step=9531900, episode=1589 reward=0.7905532 (467.05 it/sec) -training >> step=9532000, episode=1589 reward=0.7970635 (474.99 it/sec) -training >> step=9532100, episode=1589 reward=0.786257 (493.99 it/sec) -training >> step=9532200, episode=1589 reward=0.7898439 (488.41 it/sec) -training >> step=9532300, episode=1589 reward=0.794392 (478.39 it/sec) -training >> step=9532400, episode=1589 reward=0.7661456 (486.97 it/sec) -training >> step=9532500, episode=1589 reward=0.7917543 (453.19 it/sec) -training >> step=9532600, episode=1589 reward=0.7925946 (494.34 it/sec) -training >> step=9532700, episode=1589 reward=0.8056394 (498.44 it/sec) -training >> step=9532800, episode=1589 reward=0.7909938 (472.41 it/sec) -training >> step=9532900, episode=1589 reward=0.8132259 (465.77 it/sec) -training >> step=9533000, episode=1589 reward=0.7891341 (467.42 it/sec) -training >> step=9533100, episode=1589 reward=0.8024516 (490.20 it/sec) -training >> step=9533200, episode=1589 reward=0.7859263 (517.02 it/sec) -training >> step=9533300, episode=1590 reward=0.7852214 (61.08 it/sec) -training >> step=9533400, episode=1590 reward=0.7888485 (485.38 it/sec) -training >> step=9533500, episode=1590 reward=0.7842133 (487.71 it/sec) -training >> step=9533600, episode=1590 reward=0.8056327 (476.79 it/sec) -training >> step=9533700, episode=1590 reward=0.7967864 (471.19 it/sec) -training >> step=9533800, episode=1590 reward=0.7890746 (476.20 it/sec) -training >> step=9533900, episode=1590 reward=0.7828554 (476.78 it/sec) -training >> step=9534000, episode=1590 reward=0.809349 (471.63 it/sec) -training >> step=9534100, episode=1590 reward=0.79844 (505.63 it/sec) -training >> step=9534200, episode=1590 reward=0.7845105 (443.58 it/sec) -training >> step=9534300, episode=1590 reward=0.8011441 (434.37 it/sec) -training >> step=9534400, episode=1590 reward=0.7923749 (428.34 it/sec) -training >> step=9534500, episode=1590 reward=0.7876225 (467.05 it/sec) -training >> step=9534600, episode=1590 reward=0.7841192 (463.50 it/sec) -training >> step=9534700, episode=1590 reward=0.7929881 (466.34 it/sec) -training >> step=9534800, episode=1590 reward=0.7948483 (481.50 it/sec) -training >> step=9534900, episode=1590 reward=0.7870416 (477.53 it/sec) -training >> step=9535000, episode=1590 reward=0.8020496 (440.48 it/sec) -training >> step=9535100, episode=1590 reward=0.7974914 (465.28 it/sec) -training >> step=9535200, episode=1590 reward=0.7937591 (415.81 it/sec) -training >> step=9535300, episode=1590 reward=0.8057333 (471.04 it/sec) -training >> step=9535400, episode=1590 reward=0.7912799 (485.40 it/sec) -training >> step=9535500, episode=1590 reward=0.7963632 (484.42 it/sec) -training >> step=9535600, episode=1590 reward=0.7824046 (442.44 it/sec) -training >> step=9535700, episode=1590 reward=0.7938774 (463.48 it/sec) -training >> step=9535800, episode=1590 reward=0.7660021 (471.01 it/sec) -training >> step=9535900, episode=1590 reward=0.7778118 (440.93 it/sec) -training >> step=9536000, episode=1590 reward=0.7835075 (473.66 it/sec) -training >> step=9536100, episode=1590 reward=0.7796817 (486.21 it/sec) -training >> step=9536200, episode=1590 reward=0.7879083 (438.07 it/sec) -training >> step=9536300, episode=1590 reward=0.7889698 (484.20 it/sec) -training >> step=9536400, episode=1590 reward=0.7840096 (505.95 it/sec) -training >> step=9536500, episode=1590 reward=0.8324443 (492.33 it/sec) -training >> step=9536600, episode=1590 reward=0.7892782 (449.96 it/sec) -training >> step=9536700, episode=1590 reward=0.7943105 (440.49 it/sec) -training >> step=9536800, episode=1590 reward=0.7847957 (462.04 it/sec) -training >> step=9536900, episode=1590 reward=0.7843649 (446.99 it/sec) -training >> step=9537000, episode=1590 reward=0.8068497 (453.01 it/sec) -training >> step=9537100, episode=1590 reward=0.7732875 (456.31 it/sec) -training >> step=9537200, episode=1590 reward=0.7829461 (521.48 it/sec) -training >> step=9537300, episode=1590 reward=0.7804204 (446.42 it/sec) -training >> step=9537400, episode=1590 reward=0.7710643 (458.43 it/sec) -training >> step=9537500, episode=1590 reward=0.8065377 (452.54 it/sec) -training >> step=9537600, episode=1590 reward=0.7930024 (491.69 it/sec) -training >> step=9537700, episode=1590 reward=0.8041261 (464.69 it/sec) -training >> step=9537800, episode=1590 reward=0.7897127 (494.29 it/sec) -training >> step=9537900, episode=1590 reward=0.784907 (446.23 it/sec) -training >> step=9538000, episode=1590 reward=0.8075998 (393.65 it/sec) -training >> step=9538100, episode=1590 reward=0.7904271 (441.73 it/sec) -training >> step=9538200, episode=1590 reward=0.7828295 (456.66 it/sec) -training >> step=9538300, episode=1590 reward=0.7912682 (531.96 it/sec) -training >> step=9538400, episode=1590 reward=0.7877224 (474.83 it/sec) -training >> step=9538500, episode=1590 reward=0.7915551 (414.87 it/sec) -training >> step=9538600, episode=1590 reward=0.8028595 (484.14 it/sec) -training >> step=9538700, episode=1590 reward=0.7987843 (478.57 it/sec) -training >> step=9538800, episode=1590 reward=0.7977595 (447.84 it/sec) -training >> step=9538900, episode=1590 reward=0.7889975 (452.99 it/sec) -training >> step=9539000, episode=1590 reward=0.7778008 (467.68 it/sec) -training >> step=9539100, episode=1590 reward=0.7904958 (441.75 it/sec) -training >> step=9539200, episode=1590 reward=0.7797217 (489.97 it/sec) -training >> step=9539300, episode=1591 reward=0.7886308 (45.12 it/sec) -training >> step=9539400, episode=1591 reward=0.7892961 (451.05 it/sec) -training >> step=9539500, episode=1591 reward=0.8013652 (442.83 it/sec) -training >> step=9539600, episode=1591 reward=0.7728533 (461.83 it/sec) -training >> step=9539700, episode=1591 reward=0.7872084 (505.19 it/sec) -training >> step=9539800, episode=1591 reward=0.7890961 (479.35 it/sec) -training >> step=9539900, episode=1591 reward=0.8073339 (425.24 it/sec) -training >> step=9540000, episode=1591 reward=0.7994022 (516.14 it/sec) -training >> step=9540100, episode=1591 reward=0.8037261 (468.74 it/sec) -training >> step=9540200, episode=1591 reward=0.7924395 (443.58 it/sec) -training >> step=9540300, episode=1591 reward=0.8026558 (403.50 it/sec) -training >> step=9540400, episode=1591 reward=0.8051513 (402.78 it/sec) -training >> step=9540500, episode=1591 reward=0.8003787 (411.35 it/sec) -training >> step=9540600, episode=1591 reward=0.7989829 (435.11 it/sec) -training >> step=9540700, episode=1591 reward=0.8084378 (472.25 it/sec) -training >> step=9540800, episode=1591 reward=0.7909498 (471.12 it/sec) -training >> step=9540900, episode=1591 reward=0.7795066 (498.72 it/sec) -training >> step=9541000, episode=1591 reward=0.7939748 (465.79 it/sec) -training >> step=9541100, episode=1591 reward=0.7768515 (474.31 it/sec) -training >> step=9541200, episode=1591 reward=0.8017357 (471.22 it/sec) -training >> step=9541300, episode=1591 reward=0.7840205 (477.04 it/sec) -training >> step=9541400, episode=1591 reward=0.8025526 (454.52 it/sec) -training >> step=9541500, episode=1591 reward=0.7743116 (480.28 it/sec) -training >> step=9541600, episode=1591 reward=0.7871494 (450.71 it/sec) -training >> step=9541700, episode=1591 reward=0.7853226 (476.04 it/sec) -training >> step=9541800, episode=1591 reward=0.7889383 (486.03 it/sec) -training >> step=9541900, episode=1591 reward=0.8102799 (435.14 it/sec) -training >> step=9542000, episode=1591 reward=0.7768446 (517.88 it/sec) -training >> step=9542100, episode=1591 reward=0.8004006 (498.10 it/sec) -training >> step=9542200, episode=1591 reward=0.7886724 (489.51 it/sec) -training >> step=9542300, episode=1591 reward=0.7857034 (448.27 it/sec) -training >> step=9542400, episode=1591 reward=0.7924982 (519.41 it/sec) -training >> step=9542500, episode=1591 reward=0.7980448 (463.08 it/sec) -training >> step=9542600, episode=1591 reward=0.7627733 (449.60 it/sec) -training >> step=9542700, episode=1591 reward=0.7732776 (436.05 it/sec) -training >> step=9542800, episode=1591 reward=0.7696596 (498.91 it/sec) -training >> step=9542900, episode=1591 reward=0.7972729 (465.85 it/sec) -training >> step=9543000, episode=1591 reward=0.7909055 (451.36 it/sec) -training >> step=9543100, episode=1591 reward=0.7939155 (508.47 it/sec) -training >> step=9543200, episode=1591 reward=0.8144286 (464.72 it/sec) -training >> step=9543300, episode=1591 reward=0.805613 (500.31 it/sec) -training >> step=9543400, episode=1591 reward=0.7956405 (472.69 it/sec) -training >> step=9543500, episode=1591 reward=0.8015583 (483.84 it/sec) -training >> step=9543600, episode=1591 reward=0.7616372 (424.11 it/sec) -training >> step=9543700, episode=1591 reward=0.7739815 (457.48 it/sec) -training >> step=9543800, episode=1591 reward=0.7728899 (461.10 it/sec) -training >> step=9543900, episode=1591 reward=0.7781932 (478.81 it/sec) -training >> step=9544000, episode=1591 reward=0.7927231 (533.03 it/sec) -training >> step=9544100, episode=1591 reward=0.8062639 (445.16 it/sec) -training >> step=9544200, episode=1591 reward=0.7850011 (465.48 it/sec) -training >> step=9544300, episode=1591 reward=0.7775867 (502.00 it/sec) -training >> step=9544400, episode=1591 reward=0.7892187 (480.05 it/sec) -training >> step=9544500, episode=1591 reward=0.7899762 (504.39 it/sec) -training >> step=9544600, episode=1591 reward=0.7804938 (520.29 it/sec) -training >> step=9544700, episode=1591 reward=0.8058274 (468.58 it/sec) -training >> step=9544800, episode=1591 reward=0.7643439 (476.86 it/sec) -training >> step=9544900, episode=1591 reward=0.7893904 (484.55 it/sec) -training >> step=9545000, episode=1591 reward=0.7762156 (498.01 it/sec) -training >> step=9545100, episode=1591 reward=0.7827412 (398.65 it/sec) -training >> step=9545200, episode=1591 reward=0.793593 (442.50 it/sec) -training >> step=9545300, episode=1592 reward=0.7883166 (46.89 it/sec) -training >> step=9545400, episode=1592 reward=0.7761571 (444.78 it/sec) -training >> step=9545500, episode=1592 reward=0.7798803 (503.30 it/sec) -training >> step=9545600, episode=1592 reward=0.7832099 (480.30 it/sec) -training >> step=9545700, episode=1592 reward=0.7899317 (452.50 it/sec) -training >> step=9545800, episode=1592 reward=0.8089253 (479.84 it/sec) -training >> step=9545900, episode=1592 reward=0.7850584 (492.30 it/sec) -training >> step=9546000, episode=1592 reward=0.7838174 (472.61 it/sec) -training >> step=9546100, episode=1592 reward=0.7989028 (451.18 it/sec) -training >> step=9546200, episode=1592 reward=0.7961762 (486.86 it/sec) -training >> step=9546300, episode=1592 reward=0.7997759 (431.57 it/sec) -training >> step=9546400, episode=1592 reward=0.7974187 (469.97 it/sec) -training >> step=9546500, episode=1592 reward=0.7827246 (502.57 it/sec) -training >> step=9546600, episode=1592 reward=0.7993297 (490.16 it/sec) -training >> step=9546700, episode=1592 reward=0.7863658 (426.71 it/sec) -training >> step=9546800, episode=1592 reward=0.8035395 (486.98 it/sec) -training >> step=9546900, episode=1592 reward=0.7859944 (464.62 it/sec) -training >> step=9547000, episode=1592 reward=0.8107309 (478.37 it/sec) -training >> step=9547100, episode=1592 reward=0.7914094 (467.03 it/sec) -training >> step=9547200, episode=1592 reward=0.7679245 (462.22 it/sec) -training >> step=9547300, episode=1592 reward=0.798032 (442.78 it/sec) -training >> step=9547400, episode=1592 reward=0.8004532 (426.38 it/sec) -training >> step=9547500, episode=1592 reward=0.7997162 (426.68 it/sec) -training >> step=9547600, episode=1592 reward=0.794393 (446.64 it/sec) -training >> step=9547700, episode=1592 reward=0.7809556 (427.67 it/sec) -training >> step=9547800, episode=1592 reward=0.7928267 (423.03 it/sec) -training >> step=9547900, episode=1592 reward=0.8035554 (466.70 it/sec) -training >> step=9548000, episode=1592 reward=0.7889157 (512.92 it/sec) -training >> step=9548100, episode=1592 reward=0.784572 (446.65 it/sec) -training >> step=9548200, episode=1592 reward=0.8121393 (480.57 it/sec) -training >> step=9548300, episode=1592 reward=0.7921665 (458.08 it/sec) -training >> step=9548400, episode=1592 reward=0.7835624 (438.20 it/sec) -training >> step=9548500, episode=1592 reward=0.7950513 (432.82 it/sec) -training >> step=9548600, episode=1592 reward=0.7853512 (509.40 it/sec) -training >> step=9548700, episode=1592 reward=0.8010286 (448.57 it/sec) -training >> step=9548800, episode=1592 reward=0.7891882 (490.69 it/sec) -training >> step=9548900, episode=1592 reward=0.7830507 (458.15 it/sec) -training >> step=9549000, episode=1592 reward=0.7794433 (479.04 it/sec) -training >> step=9549100, episode=1592 reward=0.774819 (477.39 it/sec) -training >> step=9549200, episode=1592 reward=0.7829241 (434.49 it/sec) -training >> step=9549300, episode=1592 reward=0.7754545 (504.16 it/sec) -training >> step=9549400, episode=1592 reward=0.812379 (500.42 it/sec) -training >> step=9549500, episode=1592 reward=0.7970669 (465.51 it/sec) -training >> step=9549600, episode=1592 reward=0.785412 (502.24 it/sec) -training >> step=9549700, episode=1592 reward=0.7892683 (517.14 it/sec) -training >> step=9549800, episode=1592 reward=0.7763814 (507.11 it/sec) -training >> step=9549900, episode=1592 reward=0.7934075 (486.20 it/sec) -training >> step=9550000, episode=1592 reward=0.7895036 (455.56 it/sec) -training >> step=9550100, episode=1592 reward=0.7797338 (511.66 it/sec) -training >> step=9550200, episode=1592 reward=0.7612603 (457.34 it/sec) -training >> step=9550300, episode=1592 reward=0.7803174 (442.40 it/sec) -training >> step=9550400, episode=1592 reward=0.7764551 (496.09 it/sec) -training >> step=9550500, episode=1592 reward=0.7989926 (509.57 it/sec) -training >> step=9550600, episode=1592 reward=0.7994556 (478.91 it/sec) -training >> step=9550700, episode=1592 reward=0.7958462 (478.05 it/sec) -training >> step=9550800, episode=1592 reward=0.7992609 (529.51 it/sec) -training >> step=9550900, episode=1592 reward=0.7784294 (477.43 it/sec) -training >> step=9551000, episode=1592 reward=0.7874241 (440.50 it/sec) -training >> step=9551100, episode=1592 reward=0.7739315 (464.51 it/sec) -training >> step=9551200, episode=1592 reward=0.7641922 (470.64 it/sec) -training >> step=9551300, episode=1593 reward=0.7833425 (47.58 it/sec) -training >> step=9551400, episode=1593 reward=0.7833862 (448.54 it/sec) -training >> step=9551500, episode=1593 reward=0.790688 (486.45 it/sec) -training >> step=9551600, episode=1593 reward=0.7772317 (459.05 it/sec) -training >> step=9551700, episode=1593 reward=0.7943013 (476.51 it/sec) -training >> step=9551800, episode=1593 reward=0.7845542 (505.49 it/sec) -training >> step=9551900, episode=1593 reward=0.7911164 (493.50 it/sec) -training >> step=9552000, episode=1593 reward=0.7882569 (518.25 it/sec) -training >> step=9552100, episode=1593 reward=0.8108542 (475.40 it/sec) -training >> step=9552200, episode=1593 reward=0.779364 (507.82 it/sec) -training >> step=9552300, episode=1593 reward=0.8130621 (484.37 it/sec) -training >> step=9552400, episode=1593 reward=0.8112235 (496.45 it/sec) -training >> step=9552500, episode=1593 reward=0.8135123 (389.77 it/sec) -training >> step=9552600, episode=1593 reward=0.8072325 (366.77 it/sec) -training >> step=9552700, episode=1593 reward=0.7894279 (430.47 it/sec) -training >> step=9552800, episode=1593 reward=0.7995657 (393.48 it/sec) -training >> step=9552900, episode=1593 reward=0.7830693 (443.68 it/sec) -training >> step=9553000, episode=1593 reward=0.7871637 (461.63 it/sec) -training >> step=9553100, episode=1593 reward=0.7898594 (521.38 it/sec) -training >> step=9553200, episode=1593 reward=0.7884879 (489.24 it/sec) -training >> step=9553300, episode=1593 reward=0.7923895 (429.50 it/sec) -training >> step=9553400, episode=1593 reward=0.7910755 (509.09 it/sec) -training >> step=9553500, episode=1593 reward=0.7780247 (477.87 it/sec) -training >> step=9553600, episode=1593 reward=0.7915586 (460.96 it/sec) -training >> step=9553700, episode=1593 reward=0.7934626 (476.82 it/sec) -training >> step=9553800, episode=1593 reward=0.7789725 (483.41 it/sec) -training >> step=9553900, episode=1593 reward=0.7874705 (452.11 it/sec) -training >> step=9554000, episode=1593 reward=0.7835928 (510.85 it/sec) -training >> step=9554100, episode=1593 reward=0.7913118 (468.07 it/sec) -training >> step=9554200, episode=1593 reward=0.7972543 (546.01 it/sec) -training >> step=9554300, episode=1593 reward=0.7979432 (488.24 it/sec) -training >> step=9554400, episode=1593 reward=0.7971922 (470.05 it/sec) -training >> step=9554500, episode=1593 reward=0.7805647 (522.04 it/sec) -training >> step=9554600, episode=1593 reward=0.8068277 (479.26 it/sec) -training >> step=9554700, episode=1593 reward=0.7958316 (476.17 it/sec) -training >> step=9554800, episode=1593 reward=0.7847089 (440.75 it/sec) -training >> step=9554900, episode=1593 reward=0.7824707 (482.45 it/sec) -training >> step=9555000, episode=1593 reward=0.7942873 (462.49 it/sec) -training >> step=9555100, episode=1593 reward=0.777979 (459.36 it/sec) -training >> step=9555200, episode=1593 reward=0.7981148 (508.73 it/sec) -training >> step=9555300, episode=1593 reward=0.7904633 (529.10 it/sec) -training >> step=9555400, episode=1593 reward=0.8046452 (485.33 it/sec) -training >> step=9555500, episode=1593 reward=0.7970935 (476.93 it/sec) -training >> step=9555600, episode=1593 reward=0.775364 (512.03 it/sec) -training >> step=9555700, episode=1593 reward=0.7933661 (468.35 it/sec) -training >> step=9555800, episode=1593 reward=0.7955061 (502.12 it/sec) -training >> step=9555900, episode=1593 reward=0.7861749 (451.67 it/sec) -training >> step=9556000, episode=1593 reward=0.7889425 (502.22 it/sec) -training >> step=9556100, episode=1593 reward=0.7948318 (477.10 it/sec) -training >> step=9556200, episode=1593 reward=0.7899672 (450.06 it/sec) -training >> step=9556300, episode=1593 reward=0.7652058 (470.59 it/sec) -training >> step=9556400, episode=1593 reward=0.7812114 (502.81 it/sec) -training >> step=9556500, episode=1593 reward=0.7749434 (480.66 it/sec) -training >> step=9556600, episode=1593 reward=0.7663228 (459.79 it/sec) -training >> step=9556700, episode=1593 reward=0.7760669 (526.08 it/sec) -training >> step=9556800, episode=1593 reward=0.7889057 (481.54 it/sec) -training >> step=9556900, episode=1593 reward=0.7774108 (475.37 it/sec) -training >> step=9557000, episode=1593 reward=0.7920132 (447.82 it/sec) -training >> step=9557100, episode=1593 reward=0.7823814 (516.55 it/sec) -training >> step=9557200, episode=1593 reward=0.7853977 (450.93 it/sec) -training >> step=9557300, episode=1594 reward=0.7828555 (69.00 it/sec) -training >> step=9557400, episode=1594 reward=0.7835908 (520.72 it/sec) -training >> step=9557500, episode=1594 reward=0.7923077 (499.56 it/sec) -training >> step=9557600, episode=1594 reward=0.7818407 (501.68 it/sec) -training >> step=9557700, episode=1594 reward=0.7872151 (483.69 it/sec) -training >> step=9557800, episode=1594 reward=0.7703149 (492.69 it/sec) -training >> step=9557900, episode=1594 reward=0.8082902 (476.48 it/sec) -training >> step=9558000, episode=1594 reward=0.7888648 (482.93 it/sec) -training >> step=9558100, episode=1594 reward=0.776648 (446.85 it/sec) -training >> step=9558200, episode=1594 reward=0.8012942 (452.39 it/sec) -training >> step=9558300, episode=1594 reward=0.7813401 (461.18 it/sec) -training >> step=9558400, episode=1594 reward=0.8108351 (474.08 it/sec) -training >> step=9558500, episode=1594 reward=0.7940992 (494.31 it/sec) -training >> step=9558600, episode=1594 reward=0.8085368 (490.41 it/sec) -training >> step=9558700, episode=1594 reward=0.8032193 (513.32 it/sec) -training >> step=9558800, episode=1594 reward=0.7833165 (495.02 it/sec) -training >> step=9558900, episode=1594 reward=0.7972178 (470.30 it/sec) -training >> step=9559000, episode=1594 reward=0.7782518 (500.17 it/sec) -training >> step=9559100, episode=1594 reward=0.7973933 (493.92 it/sec) -training >> step=9559200, episode=1594 reward=0.7852263 (455.34 it/sec) -training >> step=9559300, episode=1594 reward=0.7811249 (489.32 it/sec) -training >> step=9559400, episode=1594 reward=0.8162365 (512.08 it/sec) -training >> step=9559500, episode=1594 reward=0.7939129 (488.47 it/sec) -training >> step=9559600, episode=1594 reward=0.8012729 (476.89 it/sec) -training >> step=9559700, episode=1594 reward=0.8149852 (468.29 it/sec) -training >> step=9559800, episode=1594 reward=0.7844035 (502.73 it/sec) -training >> step=9559900, episode=1594 reward=0.7920707 (465.41 it/sec) -training >> step=9560000, episode=1594 reward=0.8127301 (502.44 it/sec) -training >> step=9560100, episode=1594 reward=0.7788551 (498.98 it/sec) -training >> step=9560200, episode=1594 reward=0.8059131 (491.79 it/sec) -training >> step=9560300, episode=1594 reward=0.8006597 (508.25 it/sec) -training >> step=9560400, episode=1594 reward=0.7826543 (462.53 it/sec) -training >> step=9560500, episode=1594 reward=0.7881554 (459.41 it/sec) -training >> step=9560600, episode=1594 reward=0.7827733 (481.41 it/sec) -training >> step=9560700, episode=1594 reward=0.7636387 (491.80 it/sec) -training >> step=9560800, episode=1594 reward=0.7798217 (523.04 it/sec) -training >> step=9560900, episode=1594 reward=0.798331 (466.14 it/sec) -training >> step=9561000, episode=1594 reward=0.811904 (524.51 it/sec) -training >> step=9561100, episode=1594 reward=0.7934235 (481.23 it/sec) -training >> step=9561200, episode=1594 reward=0.7622274 (505.92 it/sec) -training >> step=9561300, episode=1594 reward=0.7888932 (493.54 it/sec) -training >> step=9561400, episode=1594 reward=0.772821 (484.22 it/sec) -training >> step=9561500, episode=1594 reward=0.7974662 (484.33 it/sec) -training >> step=9561600, episode=1594 reward=0.7985503 (499.20 it/sec) -training >> step=9561700, episode=1594 reward=0.7826139 (466.96 it/sec) -training >> step=9561800, episode=1594 reward=0.7994917 (493.14 it/sec) -training >> step=9561900, episode=1594 reward=0.7881101 (505.37 it/sec) -training >> step=9562000, episode=1594 reward=0.7704352 (473.47 it/sec) -training >> step=9562100, episode=1594 reward=0.7978282 (505.81 it/sec) -training >> step=9562200, episode=1594 reward=0.7898939 (493.97 it/sec) -training >> step=9562300, episode=1594 reward=0.7776722 (490.43 it/sec) -training >> step=9562400, episode=1594 reward=0.7807463 (485.57 it/sec) -training >> step=9562500, episode=1594 reward=0.8047479 (465.93 it/sec) -training >> step=9562600, episode=1594 reward=0.7958952 (493.88 it/sec) -training >> step=9562700, episode=1594 reward=0.7768542 (503.96 it/sec) -training >> step=9562800, episode=1594 reward=0.7785178 (486.72 it/sec) -training >> step=9562900, episode=1594 reward=0.7948679 (454.22 it/sec) -training >> step=9563000, episode=1594 reward=0.7813731 (461.95 it/sec) -training >> step=9563100, episode=1594 reward=0.7897819 (463.56 it/sec) -training >> step=9563200, episode=1594 reward=0.779458 (501.06 it/sec) -training >> step=9563300, episode=1595 reward=0.7934266 (99.83 it/sec) -training >> step=9563400, episode=1595 reward=0.7936853 (446.17 it/sec) -training >> step=9563500, episode=1595 reward=0.7929025 (467.73 it/sec) -training >> step=9563600, episode=1595 reward=0.7965603 (528.38 it/sec) -training >> step=9563700, episode=1595 reward=0.7817024 (459.19 it/sec) -training >> step=9563800, episode=1595 reward=0.7609844 (473.16 it/sec) -training >> step=9563900, episode=1595 reward=0.8213652 (455.26 it/sec) -training >> step=9564000, episode=1595 reward=0.7853513 (515.31 it/sec) -training >> step=9564100, episode=1595 reward=0.7960312 (475.78 it/sec) -training >> step=9564200, episode=1595 reward=0.7973415 (451.44 it/sec) -training >> step=9564300, episode=1595 reward=0.7996319 (483.63 it/sec) -training >> step=9564400, episode=1595 reward=0.7931363 (488.37 it/sec) -training >> step=9564500, episode=1595 reward=0.7804721 (485.65 it/sec) -training >> step=9564600, episode=1595 reward=0.8044678 (473.35 it/sec) -training >> step=9564700, episode=1595 reward=0.7801166 (502.11 it/sec) -training >> step=9564800, episode=1595 reward=0.7837374 (482.71 it/sec) -training >> step=9564900, episode=1595 reward=0.785805 (497.33 it/sec) -training >> step=9565000, episode=1595 reward=0.8098408 (470.41 it/sec) -training >> step=9565100, episode=1595 reward=0.7754606 (467.27 it/sec) -training >> step=9565200, episode=1595 reward=0.796543 (494.52 it/sec) -training >> step=9565300, episode=1595 reward=0.7957166 (507.32 it/sec) -training >> step=9565400, episode=1595 reward=0.7785943 (492.18 it/sec) -training >> step=9565500, episode=1595 reward=0.7807705 (486.68 it/sec) -training >> step=9565600, episode=1595 reward=0.7712253 (517.21 it/sec) -training >> step=9565700, episode=1595 reward=0.8094602 (483.81 it/sec) -training >> step=9565800, episode=1595 reward=0.7926098 (494.33 it/sec) -training >> step=9565900, episode=1595 reward=0.7870892 (440.39 it/sec) -training >> step=9566000, episode=1595 reward=0.7812821 (428.54 it/sec) -training >> step=9566100, episode=1595 reward=0.7785642 (403.65 it/sec) -training >> step=9566200, episode=1595 reward=0.7962438 (441.78 it/sec) -training >> step=9566300, episode=1595 reward=0.7839615 (399.33 it/sec) -training >> step=9566400, episode=1595 reward=0.7971967 (448.83 it/sec) -training >> step=9566500, episode=1595 reward=0.7918317 (492.95 it/sec) -training >> step=9566600, episode=1595 reward=0.7956131 (477.25 it/sec) -training >> step=9566700, episode=1595 reward=0.7871096 (515.00 it/sec) -training >> step=9566800, episode=1595 reward=0.8007541 (502.49 it/sec) -training >> step=9566900, episode=1595 reward=0.7860096 (465.08 it/sec) -training >> step=9567000, episode=1595 reward=0.8019637 (491.13 it/sec) -training >> step=9567100, episode=1595 reward=0.7869368 (506.70 it/sec) -training >> step=9567200, episode=1595 reward=0.7736153 (461.28 it/sec) -training >> step=9567300, episode=1595 reward=0.7960944 (476.88 it/sec) -training >> step=9567400, episode=1595 reward=0.792583 (485.91 it/sec) -training >> step=9567500, episode=1595 reward=0.7889712 (492.47 it/sec) -training >> step=9567600, episode=1595 reward=0.7722164 (498.04 it/sec) -training >> step=9567700, episode=1595 reward=0.8062227 (500.33 it/sec) -training >> step=9567800, episode=1595 reward=0.7592349 (522.64 it/sec) -training >> step=9567900, episode=1595 reward=0.7809368 (480.24 it/sec) -training >> step=9568000, episode=1595 reward=0.7897044 (471.44 it/sec) -training >> step=9568100, episode=1595 reward=0.7959666 (472.46 it/sec) -training >> step=9568200, episode=1595 reward=0.7931002 (512.17 it/sec) -training >> step=9568300, episode=1595 reward=0.7736305 (473.58 it/sec) -training >> step=9568400, episode=1595 reward=0.7810587 (404.60 it/sec) -training >> step=9568500, episode=1595 reward=0.7905076 (498.68 it/sec) -training >> step=9568600, episode=1595 reward=0.7809089 (545.43 it/sec) -training >> step=9568700, episode=1595 reward=0.7935821 (532.72 it/sec) -training >> step=9568800, episode=1595 reward=0.7949349 (533.96 it/sec) -training >> step=9568900, episode=1595 reward=0.7898374 (496.54 it/sec) -training >> step=9569000, episode=1595 reward=0.7766636 (484.70 it/sec) -training >> step=9569100, episode=1595 reward=0.7893221 (528.52 it/sec) -training >> step=9569200, episode=1595 reward=0.778831 (509.08 it/sec) -training >> step=9569300, episode=1596 reward=0.7793556 (74.31 it/sec) -training >> step=9569400, episode=1596 reward=0.7786511 (474.20 it/sec) -training >> step=9569500, episode=1596 reward=0.792922 (454.41 it/sec) -training >> step=9569600, episode=1596 reward=0.791594 (469.72 it/sec) -training >> step=9569700, episode=1596 reward=0.7819413 (483.75 it/sec) -training >> step=9569800, episode=1596 reward=0.7865966 (471.67 it/sec) -training >> step=9569900, episode=1596 reward=0.7974436 (464.51 it/sec) -training >> step=9570000, episode=1596 reward=0.8038642 (495.41 it/sec) -training >> step=9570100, episode=1596 reward=0.7795445 (453.73 it/sec) -training >> step=9570200, episode=1596 reward=0.7861499 (486.92 it/sec) -training >> step=9570300, episode=1596 reward=0.7898125 (494.49 it/sec) -training >> step=9570400, episode=1596 reward=0.7633852 (484.13 it/sec) -training >> step=9570500, episode=1596 reward=0.8078453 (513.35 it/sec) -training >> step=9570600, episode=1596 reward=0.7910709 (486.23 it/sec) -training >> step=9570700, episode=1596 reward=0.7967236 (493.08 it/sec) -training >> step=9570800, episode=1596 reward=0.8076935 (467.78 it/sec) -training >> step=9570900, episode=1596 reward=0.8000813 (474.21 it/sec) -training >> step=9571000, episode=1596 reward=0.7884191 (485.58 it/sec) -training >> step=9571100, episode=1596 reward=0.8138685 (504.05 it/sec) -training >> step=9571200, episode=1596 reward=0.7917498 (486.73 it/sec) -training >> step=9571300, episode=1596 reward=0.7982672 (519.71 it/sec) -training >> step=9571400, episode=1596 reward=0.7869376 (501.99 it/sec) -training >> step=9571500, episode=1596 reward=0.7990334 (503.69 it/sec) -training >> step=9571600, episode=1596 reward=0.7712329 (550.71 it/sec) -training >> step=9571700, episode=1596 reward=0.7996331 (504.60 it/sec) -training >> step=9571800, episode=1596 reward=0.7740976 (506.72 it/sec) -training >> step=9571900, episode=1596 reward=0.8006738 (499.95 it/sec) -training >> step=9572000, episode=1596 reward=0.7855208 (510.33 it/sec) -training >> step=9572100, episode=1596 reward=0.7996004 (546.67 it/sec) -training >> step=9572200, episode=1596 reward=0.7892333 (463.53 it/sec) -training >> step=9572300, episode=1596 reward=0.7741542 (534.36 it/sec) -training >> step=9572400, episode=1596 reward=0.7963296 (483.53 it/sec) -training >> step=9572500, episode=1596 reward=0.7954972 (462.53 it/sec) -training >> step=9572600, episode=1596 reward=0.797931 (541.49 it/sec) -training >> step=9572700, episode=1596 reward=0.799593 (536.11 it/sec) -training >> step=9572800, episode=1596 reward=0.7754171 (496.00 it/sec) -training >> step=9572900, episode=1596 reward=0.7941998 (483.72 it/sec) -training >> step=9573000, episode=1596 reward=0.7605959 (476.16 it/sec) -training >> step=9573100, episode=1596 reward=0.7908739 (494.26 it/sec) -training >> step=9573200, episode=1596 reward=0.7763195 (533.43 it/sec) -training >> step=9573300, episode=1596 reward=0.7881218 (530.05 it/sec) -training >> step=9573400, episode=1596 reward=0.7974343 (556.44 it/sec) -training >> step=9573500, episode=1596 reward=0.7784865 (496.09 it/sec) -training >> step=9573600, episode=1596 reward=0.8023816 (528.37 it/sec) -training >> step=9573700, episode=1596 reward=0.7977235 (534.81 it/sec) -training >> step=9573800, episode=1596 reward=0.7960086 (533.25 it/sec) -training >> step=9573900, episode=1596 reward=0.8063766 (483.32 it/sec) -training >> step=9574000, episode=1596 reward=0.7914425 (477.74 it/sec) -training >> step=9574100, episode=1596 reward=0.7988726 (493.53 it/sec) -training >> step=9574200, episode=1596 reward=0.7903337 (471.33 it/sec) -training >> step=9574300, episode=1596 reward=0.795556 (468.97 it/sec) -training >> step=9574400, episode=1596 reward=0.8051209 (519.35 it/sec) -training >> step=9574500, episode=1596 reward=0.7557105 (545.51 it/sec) -training >> step=9574600, episode=1596 reward=0.7865053 (469.61 it/sec) -training >> step=9574700, episode=1596 reward=0.7825928 (512.23 it/sec) -training >> step=9574800, episode=1596 reward=0.7805675 (549.99 it/sec) -training >> step=9574900, episode=1596 reward=0.7667083 (561.35 it/sec) -training >> step=9575000, episode=1596 reward=0.7821944 (523.98 it/sec) -training >> step=9575100, episode=1596 reward=0.7798334 (437.16 it/sec) -training >> step=9575200, episode=1596 reward=0.8075677 (517.68 it/sec) -training >> step=9575300, episode=1597 reward=0.7846497 (93.95 it/sec) -training >> step=9575400, episode=1597 reward=0.7725018 (508.20 it/sec) -training >> step=9575500, episode=1597 reward=0.776431 (494.81 it/sec) -training >> step=9575600, episode=1597 reward=0.7939367 (553.97 it/sec) -training >> step=9575700, episode=1597 reward=0.7594359 (461.32 it/sec) -training >> step=9575800, episode=1597 reward=0.7830527 (525.27 it/sec) -training >> step=9575900, episode=1597 reward=0.7957386 (572.83 it/sec) -training >> step=9576000, episode=1597 reward=0.8140282 (549.89 it/sec) -training >> step=9576100, episode=1597 reward=0.7911531 (533.77 it/sec) -training >> step=9576200, episode=1597 reward=0.7917673 (530.80 it/sec) -training >> step=9576300, episode=1597 reward=0.7965991 (482.05 it/sec) -training >> step=9576400, episode=1597 reward=0.7805011 (490.90 it/sec) -training >> step=9576500, episode=1597 reward=0.8008657 (549.85 it/sec) -training >> step=9576600, episode=1597 reward=0.8040266 (535.08 it/sec) -training >> step=9576700, episode=1597 reward=0.8058875 (533.57 it/sec) -training >> step=9576800, episode=1597 reward=0.7968512 (536.21 it/sec) -training >> step=9576900, episode=1597 reward=0.7992889 (504.49 it/sec) -training >> step=9577000, episode=1597 reward=0.7863502 (509.88 it/sec) -training >> step=9577100, episode=1597 reward=0.7890263 (503.76 it/sec) -training >> step=9577200, episode=1597 reward=0.7881461 (558.65 it/sec) -training >> step=9577300, episode=1597 reward=0.7955389 (454.76 it/sec) -training >> step=9577400, episode=1597 reward=0.7948056 (481.89 it/sec) -training >> step=9577500, episode=1597 reward=0.7988368 (526.07 it/sec) -training >> step=9577600, episode=1597 reward=0.7817253 (477.77 it/sec) -training >> step=9577700, episode=1597 reward=0.787976 (503.85 it/sec) -training >> step=9577800, episode=1597 reward=0.8039877 (503.50 it/sec) -training >> step=9577900, episode=1597 reward=0.7835444 (526.66 it/sec) -training >> step=9578000, episode=1597 reward=0.7834051 (462.79 it/sec) -training >> step=9578100, episode=1597 reward=0.77228 (493.19 it/sec) -training >> step=9578200, episode=1597 reward=0.7859542 (539.86 it/sec) -training >> step=9578300, episode=1597 reward=0.813651 (529.73 it/sec) -training >> step=9578400, episode=1597 reward=0.7832417 (492.22 it/sec) -training >> step=9578500, episode=1597 reward=0.7816318 (503.57 it/sec) -training >> step=9578600, episode=1597 reward=0.7748233 (558.41 it/sec) -training >> step=9578700, episode=1597 reward=0.7679119 (530.47 it/sec) -training >> step=9578800, episode=1597 reward=0.7997153 (528.49 it/sec) -training >> step=9578900, episode=1597 reward=0.8003168 (481.05 it/sec) -training >> step=9579000, episode=1597 reward=0.820987 (511.25 it/sec) -training >> step=9579100, episode=1597 reward=0.7853941 (510.35 it/sec) -training >> step=9579200, episode=1597 reward=0.7896238 (527.00 it/sec) -training >> step=9579300, episode=1597 reward=0.7819273 (527.51 it/sec) -training >> step=9579400, episode=1597 reward=0.8002759 (534.19 it/sec) -training >> step=9579500, episode=1597 reward=0.7785615 (499.70 it/sec) -training >> step=9579600, episode=1597 reward=0.8013369 (514.70 it/sec) -training >> step=9579700, episode=1597 reward=0.7734412 (550.58 it/sec) -training >> step=9579800, episode=1597 reward=0.7768689 (515.13 it/sec) -training >> step=9579900, episode=1597 reward=0.7673998 (456.49 it/sec) -training >> step=9580000, episode=1597 reward=0.7925657 (411.24 it/sec) -training >> step=9580100, episode=1597 reward=0.7775968 (453.86 it/sec) -training >> step=9580200, episode=1597 reward=0.7634768 (364.59 it/sec) -training >> step=9580300, episode=1597 reward=0.7839424 (450.72 it/sec) -training >> step=9580400, episode=1597 reward=0.8053563 (466.56 it/sec) -training >> step=9580500, episode=1597 reward=0.7693781 (541.80 it/sec) -training >> step=9580600, episode=1597 reward=0.7872131 (530.05 it/sec) -training >> step=9580700, episode=1597 reward=0.7981052 (507.36 it/sec) -training >> step=9580800, episode=1597 reward=0.7905427 (543.44 it/sec) -training >> step=9580900, episode=1597 reward=0.8089163 (480.27 it/sec) -training >> step=9581000, episode=1597 reward=0.7891617 (512.95 it/sec) -training >> step=9581100, episode=1597 reward=0.7957862 (544.44 it/sec) -training >> step=9581200, episode=1597 reward=0.7868791 (532.49 it/sec) -training >> step=9581300, episode=1598 reward=0.7955953 (92.40 it/sec) -training >> step=9581400, episode=1598 reward=0.7937548 (432.69 it/sec) -training >> step=9581500, episode=1598 reward=0.7854417 (465.71 it/sec) -training >> step=9581600, episode=1598 reward=0.7873886 (462.68 it/sec) -training >> step=9581700, episode=1598 reward=0.7927863 (440.05 it/sec) -training >> step=9581800, episode=1598 reward=0.7854872 (517.71 it/sec) -training >> step=9581900, episode=1598 reward=0.8088195 (522.70 it/sec) -training >> step=9582000, episode=1598 reward=0.7927334 (441.97 it/sec) -training >> step=9582100, episode=1598 reward=0.8006463 (535.08 it/sec) -training >> step=9582200, episode=1598 reward=0.7883267 (546.98 it/sec) -training >> step=9582300, episode=1598 reward=0.7994474 (513.70 it/sec) -training >> step=9582400, episode=1598 reward=0.7945299 (491.11 it/sec) -training >> step=9582500, episode=1598 reward=0.7846131 (520.16 it/sec) -training >> step=9582600, episode=1598 reward=0.8070396 (448.19 it/sec) -training >> step=9582700, episode=1598 reward=0.7871496 (505.14 it/sec) -training >> step=9582800, episode=1598 reward=0.7954697 (515.92 it/sec) -training >> step=9582900, episode=1598 reward=0.8109292 (497.18 it/sec) -training >> step=9583000, episode=1598 reward=0.796853 (505.75 it/sec) -training >> step=9583100, episode=1598 reward=0.7931175 (468.98 it/sec) -training >> step=9583200, episode=1598 reward=0.7834906 (527.11 it/sec) -training >> step=9583300, episode=1598 reward=0.7872562 (537.78 it/sec) -training >> step=9583400, episode=1598 reward=0.8010026 (490.08 it/sec) -training >> step=9583500, episode=1598 reward=0.8130335 (466.80 it/sec) -training >> step=9583600, episode=1598 reward=0.8085281 (475.32 it/sec) -training >> step=9583700, episode=1598 reward=0.7921298 (487.10 it/sec) -training >> step=9583800, episode=1598 reward=0.8178811 (504.92 it/sec) -training >> step=9583900, episode=1598 reward=0.7805573 (530.79 it/sec) -training >> step=9584000, episode=1598 reward=0.7848457 (542.47 it/sec) -training >> step=9584100, episode=1598 reward=0.7856681 (480.42 it/sec) -training >> step=9584200, episode=1598 reward=0.7962709 (503.07 it/sec) -training >> step=9584300, episode=1598 reward=0.8105711 (486.20 it/sec) -training >> step=9584400, episode=1598 reward=0.795875 (535.32 it/sec) -training >> step=9584500, episode=1598 reward=0.8001494 (478.33 it/sec) -training >> step=9584600, episode=1598 reward=0.8071412 (495.86 it/sec) -training >> step=9584700, episode=1598 reward=0.7821268 (486.78 it/sec) -training >> step=9584800, episode=1598 reward=0.7829939 (514.64 it/sec) -training >> step=9584900, episode=1598 reward=0.778499 (492.23 it/sec) -training >> step=9585000, episode=1598 reward=0.7990056 (485.29 it/sec) -training >> step=9585100, episode=1598 reward=0.7759773 (502.16 it/sec) -training >> step=9585200, episode=1598 reward=0.7891589 (466.72 it/sec) -training >> step=9585300, episode=1598 reward=0.7725701 (517.15 it/sec) -training >> step=9585400, episode=1598 reward=0.8000496 (517.69 it/sec) -training >> step=9585500, episode=1598 reward=0.7867804 (524.67 it/sec) -training >> step=9585600, episode=1598 reward=0.7999406 (485.87 it/sec) -training >> step=9585700, episode=1598 reward=0.7927988 (483.12 it/sec) -training >> step=9585800, episode=1598 reward=0.7753155 (522.63 it/sec) -training >> step=9585900, episode=1598 reward=0.7507263 (488.26 it/sec) -training >> step=9586000, episode=1598 reward=0.7835013 (494.18 it/sec) -training >> step=9586100, episode=1598 reward=0.8026479 (538.36 it/sec) -training >> step=9586200, episode=1598 reward=0.7833577 (478.91 it/sec) -training >> step=9586300, episode=1598 reward=0.8018773 (504.45 it/sec) -training >> step=9586400, episode=1598 reward=0.7838672 (493.37 it/sec) -training >> step=9586500, episode=1598 reward=0.7694578 (540.08 it/sec) -training >> step=9586600, episode=1598 reward=0.7894586 (515.16 it/sec) -training >> step=9586700, episode=1598 reward=0.7845223 (518.04 it/sec) -training >> step=9586800, episode=1598 reward=0.7799034 (468.15 it/sec) -training >> step=9586900, episode=1598 reward=0.798076 (480.57 it/sec) -training >> step=9587000, episode=1598 reward=0.7873455 (429.66 it/sec) -training >> step=9587100, episode=1598 reward=0.7798649 (495.56 it/sec) -training >> step=9587200, episode=1598 reward=0.7987634 (524.25 it/sec) -training >> step=9587300, episode=1599 reward=0.7538389 (129.24 it/sec) -training >> step=9587400, episode=1599 reward=0.7748247 (484.98 it/sec) -training >> step=9587500, episode=1599 reward=0.784329 (462.36 it/sec) -training >> step=9587600, episode=1599 reward=0.7724823 (515.91 it/sec) -training >> step=9587700, episode=1599 reward=0.7989411 (509.68 it/sec) -training >> step=9587800, episode=1599 reward=0.7876608 (510.63 it/sec) -training >> step=9587900, episode=1599 reward=0.8106641 (492.09 it/sec) -training >> step=9588000, episode=1599 reward=0.7842388 (451.88 it/sec) -training >> step=9588100, episode=1599 reward=0.7794309 (495.90 it/sec) -training >> step=9588200, episode=1599 reward=0.7856197 (453.03 it/sec) -training >> step=9588300, episode=1599 reward=0.7983507 (522.43 it/sec) -training >> step=9588400, episode=1599 reward=0.7862473 (510.66 it/sec) -training >> step=9588500, episode=1599 reward=0.7688466 (490.36 it/sec) -training >> step=9588600, episode=1599 reward=0.7832623 (515.48 it/sec) -training >> step=9588700, episode=1599 reward=0.7918903 (501.23 it/sec) -training >> step=9588800, episode=1599 reward=0.7944505 (480.62 it/sec) -training >> step=9588900, episode=1599 reward=0.7794545 (492.86 it/sec) -training >> step=9589000, episode=1599 reward=0.8014704 (531.74 it/sec) -training >> step=9589100, episode=1599 reward=0.7978019 (493.00 it/sec) -training >> step=9589200, episode=1599 reward=0.8088676 (484.46 it/sec) -training >> step=9589300, episode=1599 reward=0.7996722 (527.22 it/sec) -training >> step=9589400, episode=1599 reward=0.7974549 (524.43 it/sec) -training >> step=9589500, episode=1599 reward=0.7833337 (524.99 it/sec) -training >> step=9589600, episode=1599 reward=0.7860453 (472.88 it/sec) -training >> step=9589700, episode=1599 reward=0.8060536 (454.50 it/sec) -training >> step=9589800, episode=1599 reward=0.7914099 (493.66 it/sec) -training >> step=9589900, episode=1599 reward=0.8042363 (498.38 it/sec) -training >> step=9590000, episode=1599 reward=0.7828519 (522.52 it/sec) -training >> step=9590100, episode=1599 reward=0.7747895 (500.25 it/sec) -training >> step=9590200, episode=1599 reward=0.7862347 (495.73 it/sec) -training >> step=9590300, episode=1599 reward=0.7870514 (497.96 it/sec) -training >> step=9590400, episode=1599 reward=0.7912135 (521.02 it/sec) -training >> step=9590500, episode=1599 reward=0.7944024 (513.68 it/sec) -training >> step=9590600, episode=1599 reward=0.7922773 (456.50 it/sec) -training >> step=9590700, episode=1599 reward=0.7938283 (515.23 it/sec) -training >> step=9590800, episode=1599 reward=0.7888248 (481.11 it/sec) -training >> step=9590900, episode=1599 reward=0.7954572 (509.13 it/sec) -training >> step=9591000, episode=1599 reward=0.7893128 (465.00 it/sec) -training >> step=9591100, episode=1599 reward=0.768099 (471.77 it/sec) -training >> step=9591200, episode=1599 reward=0.7754871 (532.93 it/sec) -training >> step=9591300, episode=1599 reward=0.7801348 (494.88 it/sec) -training >> step=9591400, episode=1599 reward=0.7996921 (515.15 it/sec) -training >> step=9591500, episode=1599 reward=0.7679731 (495.13 it/sec) -training >> step=9591600, episode=1599 reward=0.7731954 (504.59 it/sec) -training >> step=9591700, episode=1599 reward=0.8075497 (478.50 it/sec) -training >> step=9591800, episode=1599 reward=0.7947527 (500.08 it/sec) -training >> step=9591900, episode=1599 reward=0.7881948 (542.30 it/sec) -training >> step=9592000, episode=1599 reward=0.7916979 (510.73 it/sec) -training >> step=9592100, episode=1599 reward=0.7933062 (503.59 it/sec) -training >> step=9592200, episode=1599 reward=0.7800268 (459.94 it/sec) -training >> step=9592300, episode=1599 reward=0.763756 (493.16 it/sec) -training >> step=9592400, episode=1599 reward=0.7872975 (512.40 it/sec) -training >> step=9592500, episode=1599 reward=0.7838238 (484.20 it/sec) -training >> step=9592600, episode=1599 reward=0.7593197 (515.19 it/sec) -training >> step=9592700, episode=1599 reward=0.7848914 (474.32 it/sec) -training >> step=9592800, episode=1599 reward=0.8023968 (439.67 it/sec) -training >> step=9592900, episode=1599 reward=0.7683998 (474.63 it/sec) -training >> step=9593000, episode=1599 reward=0.778904 (434.32 it/sec) -training >> step=9593100, episode=1599 reward=0.7800066 (401.09 it/sec) -training >> step=9593200, episode=1599 reward=0.8001699 (418.07 it/sec) -training >> step=9593300, episode=1600 reward=0.7860214 (88.44 it/sec) -training >> step=9593400, episode=1600 reward=0.7930527 (489.55 it/sec) -training >> step=9593500, episode=1600 reward=0.7974055 (401.89 it/sec) -training >> step=9593600, episode=1600 reward=0.7772428 (430.35 it/sec) -training >> step=9593700, episode=1600 reward=0.7831615 (405.81 it/sec) -training >> step=9593800, episode=1600 reward=0.7983941 (385.95 it/sec) -training >> step=9593900, episode=1600 reward=0.8027096 (493.58 it/sec) -training >> step=9594000, episode=1600 reward=0.7823825 (511.98 it/sec) -training >> step=9594100, episode=1600 reward=0.8062984 (485.92 it/sec) -training >> step=9594200, episode=1600 reward=0.77142 (450.60 it/sec) -training >> step=9594300, episode=1600 reward=0.7895826 (523.69 it/sec) -training >> step=9594400, episode=1600 reward=0.8070041 (554.71 it/sec) -training >> step=9594500, episode=1600 reward=0.7824982 (497.75 it/sec) -training >> step=9594600, episode=1600 reward=0.7980198 (479.41 it/sec) -training >> step=9594700, episode=1600 reward=0.7781324 (552.63 it/sec) -training >> step=9594800, episode=1600 reward=0.7954713 (529.43 it/sec) -training >> step=9594900, episode=1600 reward=0.8084944 (534.79 it/sec) -training >> step=9595000, episode=1600 reward=0.8076602 (484.78 it/sec) -training >> step=9595100, episode=1600 reward=0.7920778 (532.81 it/sec) -training >> step=9595200, episode=1600 reward=0.7901356 (526.54 it/sec) -training >> step=9595300, episode=1600 reward=0.7899933 (509.21 it/sec) -training >> step=9595400, episode=1600 reward=0.7806877 (543.05 it/sec) -training >> step=9595500, episode=1600 reward=0.791086 (450.70 it/sec) -training >> step=9595600, episode=1600 reward=0.7948832 (527.02 it/sec) -training >> step=9595700, episode=1600 reward=0.7912992 (521.79 it/sec) -training >> step=9595800, episode=1600 reward=0.7951635 (505.27 it/sec) -training >> step=9595900, episode=1600 reward=0.773972 (506.84 it/sec) -training >> step=9596000, episode=1600 reward=0.7875928 (509.68 it/sec) -training >> step=9596100, episode=1600 reward=0.7930045 (455.82 it/sec) -training >> step=9596200, episode=1600 reward=0.8001531 (511.91 it/sec) -training >> step=9596300, episode=1600 reward=0.7934616 (512.07 it/sec) -training >> step=9596400, episode=1600 reward=0.7928599 (494.79 it/sec) -training >> step=9596500, episode=1600 reward=0.8016019 (503.21 it/sec) -training >> step=9596600, episode=1600 reward=0.7876689 (523.99 it/sec) -training >> step=9596700, episode=1600 reward=0.8112425 (520.20 it/sec) -training >> step=9596800, episode=1600 reward=0.78 (507.15 it/sec) -training >> step=9596900, episode=1600 reward=0.793664 (560.00 it/sec) -training >> step=9597000, episode=1600 reward=0.7889554 (510.17 it/sec) -training >> step=9597100, episode=1600 reward=0.789528 (539.09 it/sec) -training >> step=9597200, episode=1600 reward=0.7961344 (519.72 it/sec) -training >> step=9597300, episode=1600 reward=0.7738566 (513.41 it/sec) -training >> step=9597400, episode=1600 reward=0.7837166 (518.76 it/sec) -training >> step=9597500, episode=1600 reward=0.7945035 (538.32 it/sec) -training >> step=9597600, episode=1600 reward=0.7943831 (480.79 it/sec) -training >> step=9597700, episode=1600 reward=0.7841762 (446.48 it/sec) -training >> step=9597800, episode=1600 reward=0.7857107 (464.67 it/sec) -training >> step=9597900, episode=1600 reward=0.7860566 (461.81 it/sec) -training >> step=9598000, episode=1600 reward=0.7843421 (523.08 it/sec) -training >> step=9598100, episode=1600 reward=0.7858291 (543.13 it/sec) -training >> step=9598200, episode=1600 reward=0.7786814 (487.29 it/sec) -training >> step=9598300, episode=1600 reward=0.7909459 (522.69 it/sec) -training >> step=9598400, episode=1600 reward=0.7819014 (514.46 it/sec) -training >> step=9598500, episode=1600 reward=0.7850971 (498.44 it/sec) -training >> step=9598600, episode=1600 reward=0.79434 (502.64 it/sec) -training >> step=9598700, episode=1600 reward=0.7891319 (502.22 it/sec) -training >> step=9598800, episode=1600 reward=0.7771056 (484.65 it/sec) -training >> step=9598900, episode=1600 reward=0.7900618 (484.09 it/sec) -training >> step=9599000, episode=1600 reward=0.795649 (470.23 it/sec) -training >> step=9599100, episode=1600 reward=0.784977 (532.14 it/sec) -training >> step=9599200, episode=1600 reward=0.7983615 (513.82 it/sec) -training >> step=9599300, episode=1601 reward=0.7911975 (116.85 it/sec) -training >> step=9599400, episode=1601 reward=0.7967576 (500.20 it/sec) -training >> step=9599500, episode=1601 reward=0.7873417 (463.29 it/sec) -training >> step=9599600, episode=1601 reward=0.7843804 (515.56 it/sec) -training >> step=9599700, episode=1601 reward=0.7875608 (495.82 it/sec) -training >> step=9599800, episode=1601 reward=0.7895579 (492.40 it/sec) -training >> step=9599900, episode=1601 reward=0.8114813 (490.39 it/sec) -training >> step=9600000, episode=1601 reward=0.7925032 (527.68 it/sec) -training >> step=9600100, episode=1601 reward=0.796606 (474.55 it/sec) -training >> step=9600200, episode=1601 reward=0.8131113 (555.55 it/sec) -training >> step=9600300, episode=1601 reward=0.7880836 (513.21 it/sec) -training >> step=9600400, episode=1601 reward=0.7834504 (531.85 it/sec) -training >> step=9600500, episode=1601 reward=0.7805946 (507.26 it/sec) -training >> step=9600600, episode=1601 reward=0.7732996 (500.54 it/sec) -training >> step=9600700, episode=1601 reward=0.7849887 (528.13 it/sec) -training >> step=9600800, episode=1601 reward=0.7899924 (470.73 it/sec) -training >> step=9600900, episode=1601 reward=0.7995977 (541.18 it/sec) -training >> step=9601000, episode=1601 reward=0.812328 (503.77 it/sec) -training >> step=9601100, episode=1601 reward=0.7755821 (462.82 it/sec) -training >> step=9601200, episode=1601 reward=0.7807263 (513.32 it/sec) -training >> step=9601300, episode=1601 reward=0.7805167 (532.44 it/sec) -training >> step=9601400, episode=1601 reward=0.7780303 (484.44 it/sec) -training >> step=9601500, episode=1601 reward=0.785938 (518.60 it/sec) -training >> step=9601600, episode=1601 reward=0.8039335 (504.09 it/sec) -training >> step=9601700, episode=1601 reward=0.8024082 (525.11 it/sec) -training >> step=9601800, episode=1601 reward=0.808167 (546.08 it/sec) -training >> step=9601900, episode=1601 reward=0.7876493 (475.30 it/sec) -training >> step=9602000, episode=1601 reward=0.7964898 (523.74 it/sec) -training >> step=9602100, episode=1601 reward=0.7855043 (482.98 it/sec) -training >> step=9602200, episode=1601 reward=0.7938003 (509.50 it/sec) -training >> step=9602300, episode=1601 reward=0.7971757 (538.06 it/sec) -training >> step=9602400, episode=1601 reward=0.7730893 (463.86 it/sec) -training >> step=9602500, episode=1601 reward=0.7968678 (492.17 it/sec) -training >> step=9602600, episode=1601 reward=0.7761667 (451.65 it/sec) -training >> step=9602700, episode=1601 reward=0.79689 (517.93 it/sec) -training >> step=9602800, episode=1601 reward=0.795131 (527.29 it/sec) -training >> step=9602900, episode=1601 reward=0.7694879 (521.36 it/sec) -training >> step=9603000, episode=1601 reward=0.8184649 (509.31 it/sec) -training >> step=9603100, episode=1601 reward=0.7727421 (512.84 it/sec) -training >> step=9603200, episode=1601 reward=0.7901357 (501.08 it/sec) -training >> step=9603300, episode=1601 reward=0.7837876 (485.04 it/sec) -training >> step=9603400, episode=1601 reward=0.7813419 (533.47 it/sec) -training >> step=9603500, episode=1601 reward=0.78419 (475.84 it/sec) -training >> step=9603600, episode=1601 reward=0.7872663 (476.42 it/sec) -training >> step=9603700, episode=1601 reward=0.7745367 (515.34 it/sec) -training >> step=9603800, episode=1601 reward=0.7825794 (529.34 it/sec) -training >> step=9603900, episode=1601 reward=0.7784341 (514.61 it/sec) -training >> step=9604000, episode=1601 reward=0.7753246 (474.04 it/sec) -training >> step=9604100, episode=1601 reward=0.7964158 (561.69 it/sec) -training >> step=9604200, episode=1601 reward=0.7754951 (476.22 it/sec) -training >> step=9604300, episode=1601 reward=0.7926461 (509.20 it/sec) -training >> step=9604400, episode=1601 reward=0.7744758 (510.79 it/sec) -training >> step=9604500, episode=1601 reward=0.7958559 (532.14 it/sec) -training >> step=9604600, episode=1601 reward=0.7881002 (500.97 it/sec) -training >> step=9604700, episode=1601 reward=0.7834958 (477.65 it/sec) -training >> step=9604800, episode=1601 reward=0.792031 (528.41 it/sec) -training >> step=9604900, episode=1601 reward=0.768948 (433.25 it/sec) -training >> step=9605000, episode=1601 reward=0.7975459 (485.83 it/sec) -training >> step=9605100, episode=1601 reward=0.7784601 (482.24 it/sec) -training >> step=9605200, episode=1601 reward=0.7994836 (472.34 it/sec) -training >> step=9605300, episode=1602 reward=0.8055314 (96.20 it/sec) -training >> step=9605400, episode=1602 reward=0.7728606 (506.07 it/sec) -training >> step=9605500, episode=1602 reward=0.772352 (462.11 it/sec) -training >> step=9605600, episode=1602 reward=0.8075177 (506.43 it/sec) -training >> step=9605700, episode=1602 reward=0.7880433 (509.36 it/sec) -training >> step=9605800, episode=1602 reward=0.7972828 (463.73 it/sec) -training >> step=9605900, episode=1602 reward=0.7854604 (539.47 it/sec) -training >> step=9606000, episode=1602 reward=0.7751648 (481.40 it/sec) -training >> step=9606100, episode=1602 reward=0.7761521 (512.25 it/sec) -training >> step=9606200, episode=1602 reward=0.8026997 (507.93 it/sec) -training >> step=9606300, episode=1602 reward=0.7980582 (563.03 it/sec) -training >> step=9606400, episode=1602 reward=0.7932532 (488.17 it/sec) -training >> step=9606500, episode=1602 reward=0.7833055 (531.58 it/sec) -training >> step=9606600, episode=1602 reward=0.8025479 (521.46 it/sec) -training >> step=9606700, episode=1602 reward=0.7969539 (454.97 it/sec) -training >> step=9606800, episode=1602 reward=0.8167097 (452.98 it/sec) -training >> step=9606900, episode=1602 reward=0.7862309 (478.49 it/sec) -training >> step=9607000, episode=1602 reward=0.775859 (496.54 it/sec) -training >> step=9607100, episode=1602 reward=0.7981259 (496.01 it/sec) -training >> step=9607200, episode=1602 reward=0.7831718 (484.61 it/sec) -training >> step=9607300, episode=1602 reward=0.77154 (502.51 it/sec) -training >> step=9607400, episode=1602 reward=0.8027505 (465.48 it/sec) -training >> step=9607500, episode=1602 reward=0.7817814 (494.17 it/sec) -training >> step=9607600, episode=1602 reward=0.8015764 (521.04 it/sec) -training >> step=9607700, episode=1602 reward=0.791872 (494.48 it/sec) -training >> step=9607800, episode=1602 reward=0.772077 (378.76 it/sec) -training >> step=9607900, episode=1602 reward=0.7831952 (402.08 it/sec) -training >> step=9608000, episode=1602 reward=0.7869148 (429.14 it/sec) -training >> step=9608100, episode=1602 reward=0.7848853 (453.20 it/sec) -training >> step=9608200, episode=1602 reward=0.8020709 (483.90 it/sec) -training >> step=9608300, episode=1602 reward=0.7885785 (479.61 it/sec) -training >> step=9608400, episode=1602 reward=0.7954083 (481.84 it/sec) -training >> step=9608500, episode=1602 reward=0.7746735 (454.83 it/sec) -training >> step=9608600, episode=1602 reward=0.7973247 (484.37 it/sec) -training >> step=9608700, episode=1602 reward=0.7892911 (457.01 it/sec) -training >> step=9608800, episode=1602 reward=0.7841592 (490.19 it/sec) -training >> step=9608900, episode=1602 reward=0.8061801 (455.59 it/sec) -training >> step=9609000, episode=1602 reward=0.7848638 (474.05 it/sec) -training >> step=9609100, episode=1602 reward=0.8107141 (412.53 it/sec) -training >> step=9609200, episode=1602 reward=0.7998927 (428.64 it/sec) -training >> step=9609300, episode=1602 reward=0.8076261 (488.58 it/sec) -training >> step=9609400, episode=1602 reward=0.7877482 (532.45 it/sec) -training >> step=9609500, episode=1602 reward=0.8106489 (516.98 it/sec) -training >> step=9609600, episode=1602 reward=0.7754773 (487.23 it/sec) -training >> step=9609700, episode=1602 reward=0.775451 (488.95 it/sec) -training >> step=9609800, episode=1602 reward=0.7862059 (500.97 it/sec) -training >> step=9609900, episode=1602 reward=0.788348 (520.57 it/sec) -training >> step=9610000, episode=1602 reward=0.7650276 (512.83 it/sec) -training >> step=9610100, episode=1602 reward=0.7815278 (440.16 it/sec) -training >> step=9610200, episode=1602 reward=0.7870537 (533.91 it/sec) -training >> step=9610300, episode=1602 reward=0.7843874 (459.12 it/sec) -training >> step=9610400, episode=1602 reward=0.8070042 (519.31 it/sec) -training >> step=9610500, episode=1602 reward=0.7929722 (482.83 it/sec) -training >> step=9610600, episode=1602 reward=0.7952169 (492.36 it/sec) -training >> step=9610700, episode=1602 reward=0.7707855 (435.92 it/sec) -training >> step=9610800, episode=1602 reward=0.7881023 (471.81 it/sec) -training >> step=9610900, episode=1602 reward=0.7750774 (516.05 it/sec) -training >> step=9611000, episode=1602 reward=0.7781843 (502.70 it/sec) -training >> step=9611100, episode=1602 reward=0.7830305 (500.03 it/sec) -training >> step=9611200, episode=1602 reward=0.7845721 (465.35 it/sec) -training >> step=9611300, episode=1603 reward=0.7915408 (89.69 it/sec) -training >> step=9611400, episode=1603 reward=0.7764167 (454.11 it/sec) -training >> step=9611500, episode=1603 reward=0.7824593 (476.74 it/sec) -training >> step=9611600, episode=1603 reward=0.7813494 (511.52 it/sec) -training >> step=9611700, episode=1603 reward=0.7946625 (482.23 it/sec) -training >> step=9611800, episode=1603 reward=0.795479 (464.65 it/sec) -training >> step=9611900, episode=1603 reward=0.7925908 (460.71 it/sec) -training >> step=9612000, episode=1603 reward=0.7842183 (503.56 it/sec) -training >> step=9612100, episode=1603 reward=0.7948889 (478.78 it/sec) -training >> step=9612200, episode=1603 reward=0.7837903 (470.77 it/sec) -training >> step=9612300, episode=1603 reward=0.7940078 (455.27 it/sec) -training >> step=9612400, episode=1603 reward=0.7979603 (486.02 it/sec) -training >> step=9612500, episode=1603 reward=0.8002766 (470.33 it/sec) -training >> step=9612600, episode=1603 reward=0.7878857 (459.89 it/sec) -training >> step=9612700, episode=1603 reward=0.7865754 (479.15 it/sec) -training >> step=9612800, episode=1603 reward=0.8030717 (496.52 it/sec) -training >> step=9612900, episode=1603 reward=0.787523 (496.82 it/sec) -training >> step=9613000, episode=1603 reward=0.8170849 (486.24 it/sec) -training >> step=9613100, episode=1603 reward=0.7934946 (527.78 it/sec) -training >> step=9613200, episode=1603 reward=0.7987755 (475.22 it/sec) -training >> step=9613300, episode=1603 reward=0.7834114 (472.69 it/sec) -training >> step=9613400, episode=1603 reward=0.7872704 (508.63 it/sec) -training >> step=9613500, episode=1603 reward=0.7916822 (498.84 it/sec) -training >> step=9613600, episode=1603 reward=0.7965486 (480.41 it/sec) -training >> step=9613700, episode=1603 reward=0.7784803 (467.94 it/sec) -training >> step=9613800, episode=1603 reward=0.7834795 (473.71 it/sec) -training >> step=9613900, episode=1603 reward=0.7716513 (471.09 it/sec) -training >> step=9614000, episode=1603 reward=0.7932061 (486.11 it/sec) -training >> step=9614100, episode=1603 reward=0.7904283 (489.85 it/sec) -training >> step=9614200, episode=1603 reward=0.7701048 (435.21 it/sec) -training >> step=9614300, episode=1603 reward=0.7877954 (437.25 it/sec) -training >> step=9614400, episode=1603 reward=0.7986051 (458.84 it/sec) -training >> step=9614500, episode=1603 reward=0.7754596 (500.33 it/sec) -training >> step=9614600, episode=1603 reward=0.7949678 (445.46 it/sec) -training >> step=9614700, episode=1603 reward=0.7517053 (457.85 it/sec) -training >> step=9614800, episode=1603 reward=0.8160494 (515.02 it/sec) -training >> step=9614900, episode=1603 reward=0.7886163 (501.15 it/sec) -training >> step=9615000, episode=1603 reward=0.7787023 (494.83 it/sec) -training >> step=9615100, episode=1603 reward=0.7953232 (469.27 it/sec) -training >> step=9615200, episode=1603 reward=0.8135884 (473.45 it/sec) -training >> step=9615300, episode=1603 reward=0.7873679 (508.57 it/sec) -training >> step=9615400, episode=1603 reward=0.8008058 (487.63 it/sec) -training >> step=9615500, episode=1603 reward=0.7843885 (508.57 it/sec) -training >> step=9615600, episode=1603 reward=0.8033515 (460.87 it/sec) -training >> step=9615700, episode=1603 reward=0.7915826 (461.16 it/sec) -training >> step=9615800, episode=1603 reward=0.7939895 (497.32 it/sec) -training >> step=9615900, episode=1603 reward=0.783314 (491.53 it/sec) -training >> step=9616000, episode=1603 reward=0.7792885 (432.40 it/sec) -training >> step=9616100, episode=1603 reward=0.7953451 (424.02 it/sec) -training >> step=9616200, episode=1603 reward=0.7722458 (418.87 it/sec) -training >> step=9616300, episode=1603 reward=0.7851262 (433.46 it/sec) -training >> step=9616400, episode=1603 reward=0.7842332 (461.70 it/sec) -training >> step=9616500, episode=1603 reward=0.7952809 (480.04 it/sec) -training >> step=9616600, episode=1603 reward=0.7753922 (476.95 it/sec) -training >> step=9616700, episode=1603 reward=0.7652989 (469.99 it/sec) -training >> step=9616800, episode=1603 reward=0.7634274 (489.75 it/sec) -training >> step=9616900, episode=1603 reward=0.7757407 (488.45 it/sec) -training >> step=9617000, episode=1603 reward=0.771038 (512.63 it/sec) -training >> step=9617100, episode=1603 reward=0.7930301 (480.41 it/sec) -training >> step=9617200, episode=1603 reward=0.7943622 (506.69 it/sec) -training >> step=9617300, episode=1604 reward=0.7973973 (94.94 it/sec) -training >> step=9617400, episode=1604 reward=0.7953041 (445.36 it/sec) -training >> step=9617500, episode=1604 reward=0.7839066 (481.05 it/sec) -training >> step=9617600, episode=1604 reward=0.7784455 (453.70 it/sec) -training >> step=9617700, episode=1604 reward=0.7740251 (481.02 it/sec) -training >> step=9617800, episode=1604 reward=0.8206612 (482.90 it/sec) -training >> step=9617900, episode=1604 reward=0.7887487 (508.42 it/sec) -training >> step=9618000, episode=1604 reward=0.8072101 (499.93 it/sec) -training >> step=9618100, episode=1604 reward=0.8003116 (480.99 it/sec) -training >> step=9618200, episode=1604 reward=0.7809917 (464.24 it/sec) -training >> step=9618300, episode=1604 reward=0.7902237 (496.21 it/sec) -training >> step=9618400, episode=1604 reward=0.7941094 (523.82 it/sec) -training >> step=9618500, episode=1604 reward=0.7786537 (482.07 it/sec) -training >> step=9618600, episode=1604 reward=0.7751506 (469.93 it/sec) -training >> step=9618700, episode=1604 reward=0.7778689 (499.42 it/sec) -training >> step=9618800, episode=1604 reward=0.8011318 (486.54 it/sec) -training >> step=9618900, episode=1604 reward=0.7967113 (521.42 it/sec) -training >> step=9619000, episode=1604 reward=0.7678433 (496.41 it/sec) -training >> step=9619100, episode=1604 reward=0.7922221 (500.04 it/sec) -training >> step=9619200, episode=1604 reward=0.796739 (499.29 it/sec) -training >> step=9619300, episode=1604 reward=0.79648 (455.72 it/sec) -training >> step=9619400, episode=1604 reward=0.7737082 (509.66 it/sec) -training >> step=9619500, episode=1604 reward=0.7897429 (514.89 it/sec) -training >> step=9619600, episode=1604 reward=0.781021 (479.99 it/sec) -training >> step=9619700, episode=1604 reward=0.7848279 (466.10 it/sec) -training >> step=9619800, episode=1604 reward=0.8108951 (508.80 it/sec) -training >> step=9619900, episode=1604 reward=0.8031692 (495.18 it/sec) -training >> step=9620000, episode=1604 reward=0.7931911 (490.13 it/sec) -training >> step=9620100, episode=1604 reward=0.7884811 (503.74 it/sec) -training >> step=9620200, episode=1604 reward=0.7793382 (486.25 it/sec) -training >> step=9620300, episode=1604 reward=0.7989663 (500.41 it/sec) -training >> step=9620400, episode=1604 reward=0.8051407 (441.27 it/sec) -training >> step=9620500, episode=1604 reward=0.796602 (488.54 it/sec) -training >> step=9620600, episode=1604 reward=0.7728617 (454.77 it/sec) -training >> step=9620700, episode=1604 reward=0.7737046 (475.60 it/sec) -training >> step=9620800, episode=1604 reward=0.7848221 (490.85 it/sec) -training >> step=9620900, episode=1604 reward=0.7879703 (538.01 it/sec) -training >> step=9621000, episode=1604 reward=0.7815746 (513.69 it/sec) -training >> step=9621100, episode=1604 reward=0.784198 (516.16 it/sec) -training >> step=9621200, episode=1604 reward=0.8024822 (434.11 it/sec) -training >> step=9621300, episode=1604 reward=0.7880525 (509.89 it/sec) -training >> step=9621400, episode=1604 reward=0.7983677 (490.44 it/sec) -training >> step=9621500, episode=1604 reward=0.7749476 (488.91 it/sec) -training >> step=9621600, episode=1604 reward=0.7694011 (488.50 it/sec) -training >> step=9621700, episode=1604 reward=0.792275 (420.76 it/sec) -training >> step=9621800, episode=1604 reward=0.7938644 (418.45 it/sec) -training >> step=9621900, episode=1604 reward=0.7782648 (486.48 it/sec) -training >> step=9622000, episode=1604 reward=0.7866918 (525.00 it/sec) -training >> step=9622100, episode=1604 reward=0.8024552 (530.36 it/sec) -training >> step=9622200, episode=1604 reward=0.7866213 (484.56 it/sec) -training >> step=9622300, episode=1604 reward=0.7958742 (490.64 it/sec) -training >> step=9622400, episode=1604 reward=0.7894162 (506.93 it/sec) -training >> step=9622500, episode=1604 reward=0.7839229 (486.83 it/sec) -training >> step=9622600, episode=1604 reward=0.8046849 (494.90 it/sec) -training >> step=9622700, episode=1604 reward=0.7985165 (504.15 it/sec) -training >> step=9622800, episode=1604 reward=0.8077881 (488.06 it/sec) -training >> step=9622900, episode=1604 reward=0.8056501 (491.44 it/sec) -training >> step=9623000, episode=1604 reward=0.7792729 (506.69 it/sec) -training >> step=9623100, episode=1604 reward=0.7843575 (474.33 it/sec) -training >> step=9623200, episode=1604 reward=0.7989061 (479.53 it/sec) -training >> step=9623300, episode=1605 reward=0.7855411 (95.42 it/sec) -training >> step=9623400, episode=1605 reward=0.7951633 (485.70 it/sec) -training >> step=9623500, episode=1605 reward=0.7975703 (496.29 it/sec) -training >> step=9623600, episode=1605 reward=0.7990789 (529.69 it/sec) -training >> step=9623700, episode=1605 reward=0.7776357 (503.91 it/sec) -training >> step=9623800, episode=1605 reward=0.777279 (457.78 it/sec) -training >> step=9623900, episode=1605 reward=0.7914879 (457.71 it/sec) -training >> step=9624000, episode=1605 reward=0.7810642 (486.78 it/sec) -training >> step=9624100, episode=1605 reward=0.790612 (508.75 it/sec) -training >> step=9624200, episode=1605 reward=0.7941749 (447.89 it/sec) -training >> step=9624300, episode=1605 reward=0.7930665 (460.19 it/sec) -training >> step=9624400, episode=1605 reward=0.7888231 (504.83 it/sec) -training >> step=9624500, episode=1605 reward=0.7846954 (474.81 it/sec) -training >> step=9624600, episode=1605 reward=0.7873709 (496.27 it/sec) -training >> step=9624700, episode=1605 reward=0.7839246 (437.24 it/sec) -training >> step=9624800, episode=1605 reward=0.7871733 (484.55 it/sec) -training >> step=9624900, episode=1605 reward=0.7975269 (462.28 it/sec) -training >> step=9625000, episode=1605 reward=0.7982712 (463.70 it/sec) -training >> step=9625100, episode=1605 reward=0.7898165 (466.15 it/sec) -training >> step=9625200, episode=1605 reward=0.7984328 (509.74 it/sec) -training >> step=9625300, episode=1605 reward=0.7835809 (482.72 it/sec) -training >> step=9625400, episode=1605 reward=0.806465 (487.23 it/sec) -training >> step=9625500, episode=1605 reward=0.8013462 (446.68 it/sec) -training >> step=9625600, episode=1605 reward=0.7854938 (409.93 it/sec) -training >> step=9625700, episode=1605 reward=0.7905142 (438.11 it/sec) -training >> step=9625800, episode=1605 reward=0.7791298 (500.74 it/sec) -training >> step=9625900, episode=1605 reward=0.7927076 (456.51 it/sec) -training >> step=9626000, episode=1605 reward=0.7887733 (472.33 it/sec) -training >> step=9626100, episode=1605 reward=0.7809662 (401.27 it/sec) -training >> step=9626200, episode=1605 reward=0.7834897 (441.97 it/sec) -training >> step=9626300, episode=1605 reward=0.8107481 (477.84 it/sec) -training >> step=9626400, episode=1605 reward=0.795853 (470.64 it/sec) -training >> step=9626500, episode=1605 reward=0.7898814 (463.36 it/sec) -training >> step=9626600, episode=1605 reward=0.8051791 (497.60 it/sec) -training >> step=9626700, episode=1605 reward=0.7708191 (464.26 it/sec) -training >> step=9626800, episode=1605 reward=0.7808615 (457.08 it/sec) -training >> step=9626900, episode=1605 reward=0.7838495 (448.88 it/sec) -training >> step=9627000, episode=1605 reward=0.8042108 (480.95 it/sec) -training >> step=9627100, episode=1605 reward=0.7730709 (436.85 it/sec) -training >> step=9627200, episode=1605 reward=0.7744361 (418.28 it/sec) -training >> step=9627300, episode=1605 reward=0.8016038 (466.41 it/sec) -training >> step=9627400, episode=1605 reward=0.8067989 (472.51 it/sec) -training >> step=9627500, episode=1605 reward=0.7982779 (459.73 it/sec) -training >> step=9627600, episode=1605 reward=0.7995937 (386.40 it/sec) -training >> step=9627700, episode=1605 reward=0.7934997 (457.55 it/sec) -training >> step=9627800, episode=1605 reward=0.7827253 (439.58 it/sec) -training >> step=9627900, episode=1605 reward=0.7909234 (462.19 it/sec) -training >> step=9628000, episode=1605 reward=0.8007472 (467.64 it/sec) -training >> step=9628100, episode=1605 reward=0.8044339 (459.25 it/sec) -training >> step=9628200, episode=1605 reward=0.8045602 (469.26 it/sec) -training >> step=9628300, episode=1605 reward=0.7785711 (482.97 it/sec) -training >> step=9628400, episode=1605 reward=0.8042663 (464.39 it/sec) -training >> step=9628500, episode=1605 reward=0.7829859 (476.68 it/sec) -training >> step=9628600, episode=1605 reward=0.7921085 (466.01 it/sec) -training >> step=9628700, episode=1605 reward=0.7743959 (463.85 it/sec) -training >> step=9628800, episode=1605 reward=0.7708513 (454.77 it/sec) -training >> step=9628900, episode=1605 reward=0.7948297 (495.98 it/sec) -training >> step=9629000, episode=1605 reward=0.7818864 (480.76 it/sec) -training >> step=9629100, episode=1605 reward=0.806702 (498.52 it/sec) -training >> step=9629200, episode=1605 reward=0.7843369 (475.67 it/sec) -training >> step=9629300, episode=1606 reward=0.7819175 (100.05 it/sec) -training >> step=9629400, episode=1606 reward=0.7819656 (423.84 it/sec) -training >> step=9629500, episode=1606 reward=0.7927953 (461.09 it/sec) -training >> step=9629600, episode=1606 reward=0.8001397 (485.26 it/sec) -training >> step=9629700, episode=1606 reward=0.8033229 (410.70 it/sec) -training >> step=9629800, episode=1606 reward=0.8053256 (470.25 it/sec) -training >> step=9629900, episode=1606 reward=0.7855458 (506.58 it/sec) -training >> step=9630000, episode=1606 reward=0.7807066 (479.49 it/sec) -training >> step=9630100, episode=1606 reward=0.7921697 (486.82 it/sec) -training >> step=9630200, episode=1606 reward=0.814842 (485.21 it/sec) -training >> step=9630300, episode=1606 reward=0.7987115 (453.04 it/sec) -training >> step=9630400, episode=1606 reward=0.7843674 (489.15 it/sec) -training >> step=9630500, episode=1606 reward=0.7925814 (431.36 it/sec) -training >> step=9630600, episode=1606 reward=0.7898345 (444.25 it/sec) -training >> step=9630700, episode=1606 reward=0.8074247 (480.81 it/sec) -training >> step=9630800, episode=1606 reward=0.7853264 (462.15 it/sec) -training >> step=9630900, episode=1606 reward=0.8096361 (457.58 it/sec) -training >> step=9631000, episode=1606 reward=0.8079274 (431.91 it/sec) -training >> step=9631100, episode=1606 reward=0.8099408 (462.03 it/sec) -training >> step=9631200, episode=1606 reward=0.8025364 (482.89 it/sec) -training >> step=9631300, episode=1606 reward=0.7844261 (487.94 it/sec) -training >> step=9631400, episode=1606 reward=0.7900113 (494.25 it/sec) -training >> step=9631500, episode=1606 reward=0.7860332 (406.12 it/sec) -training >> step=9631600, episode=1606 reward=0.7945817 (465.57 it/sec) -training >> step=9631700, episode=1606 reward=0.7956945 (469.28 it/sec) -training >> step=9631800, episode=1606 reward=0.7819174 (465.33 it/sec) -training >> step=9631900, episode=1606 reward=0.7725973 (467.87 it/sec) -training >> step=9632000, episode=1606 reward=0.7936841 (439.43 it/sec) -training >> step=9632100, episode=1606 reward=0.7890266 (494.04 it/sec) -training >> step=9632200, episode=1606 reward=0.7959015 (412.36 it/sec) -training >> step=9632300, episode=1606 reward=0.7893468 (436.47 it/sec) -training >> step=9632400, episode=1606 reward=0.7933769 (427.95 it/sec) -training >> step=9632500, episode=1606 reward=0.7919253 (444.74 it/sec) -training >> step=9632600, episode=1606 reward=0.7949888 (458.55 it/sec) -training >> step=9632700, episode=1606 reward=0.7894135 (446.92 it/sec) -training >> step=9632800, episode=1606 reward=0.7953852 (475.25 it/sec) -training >> step=9632900, episode=1606 reward=0.8050009 (468.39 it/sec) -training >> step=9633000, episode=1606 reward=0.7975783 (417.92 it/sec) -training >> step=9633100, episode=1606 reward=0.7901608 (423.54 it/sec) -training >> step=9633200, episode=1606 reward=0.8071072 (482.42 it/sec) -training >> step=9633300, episode=1606 reward=0.7546562 (485.30 it/sec) -training >> step=9633400, episode=1606 reward=0.7854685 (444.27 it/sec) -training >> step=9633500, episode=1606 reward=0.7891024 (462.87 it/sec) -training >> step=9633600, episode=1606 reward=0.8029522 (387.45 it/sec) -training >> step=9633700, episode=1606 reward=0.7892433 (450.01 it/sec) -training >> step=9633800, episode=1606 reward=0.7799824 (443.30 it/sec) -training >> step=9633900, episode=1606 reward=0.7981805 (435.69 it/sec) -training >> step=9634000, episode=1606 reward=0.7861034 (449.90 it/sec) -training >> step=9634100, episode=1606 reward=0.7744381 (463.39 it/sec) -training >> step=9634200, episode=1606 reward=0.7821504 (468.08 it/sec) -training >> step=9634300, episode=1606 reward=0.7704791 (470.89 it/sec) -training >> step=9634400, episode=1606 reward=0.7828736 (479.72 it/sec) -training >> step=9634500, episode=1606 reward=0.7908394 (480.20 it/sec) -training >> step=9634600, episode=1606 reward=0.8008149 (434.61 it/sec) -training >> step=9634700, episode=1606 reward=0.7803206 (461.27 it/sec) -training >> step=9634800, episode=1606 reward=0.7876393 (470.49 it/sec) -training >> step=9634900, episode=1606 reward=0.7837102 (496.92 it/sec) -training >> step=9635000, episode=1606 reward=0.798622 (424.82 it/sec) -training >> step=9635100, episode=1606 reward=0.7846249 (410.75 it/sec) -training >> step=9635200, episode=1606 reward=0.8053902 (505.11 it/sec) -training >> step=9635300, episode=1607 reward=0.792922 (95.62 it/sec) -training >> step=9635400, episode=1607 reward=0.7742212 (462.77 it/sec) -training >> step=9635500, episode=1607 reward=0.7709572 (348.43 it/sec) -training >> step=9635600, episode=1607 reward=0.784475 (455.50 it/sec) -training >> step=9635700, episode=1607 reward=0.7917169 (480.96 it/sec) -training >> step=9635800, episode=1607 reward=0.7944554 (450.93 it/sec) -training >> step=9635900, episode=1607 reward=0.7867149 (473.05 it/sec) -training >> step=9636000, episode=1607 reward=0.7917494 (455.26 it/sec) -training >> step=9636100, episode=1607 reward=0.8103387 (482.22 it/sec) -training >> step=9636200, episode=1607 reward=0.7898723 (459.49 it/sec) -training >> step=9636300, episode=1607 reward=0.7781448 (517.24 it/sec) -training >> step=9636400, episode=1607 reward=0.7840273 (483.51 it/sec) -training >> step=9636500, episode=1607 reward=0.8079045 (474.19 it/sec) -training >> step=9636600, episode=1607 reward=0.7937985 (515.57 it/sec) -training >> step=9636700, episode=1607 reward=0.8058305 (484.68 it/sec) -training >> step=9636800, episode=1607 reward=0.7688563 (511.77 it/sec) -training >> step=9636900, episode=1607 reward=0.8035891 (463.75 it/sec) -training >> step=9637000, episode=1607 reward=0.7866434 (539.60 it/sec) -training >> step=9637100, episode=1607 reward=0.797831 (486.46 it/sec) -training >> step=9637200, episode=1607 reward=0.7869034 (502.71 it/sec) -training >> step=9637300, episode=1607 reward=0.7789683 (461.72 it/sec) -training >> step=9637400, episode=1607 reward=0.7788426 (558.78 it/sec) -training >> step=9637500, episode=1607 reward=0.8078387 (465.90 it/sec) -training >> step=9637600, episode=1607 reward=0.7937532 (452.72 it/sec) -training >> step=9637700, episode=1607 reward=0.792267 (505.85 it/sec) -training >> step=9637800, episode=1607 reward=0.7942945 (496.27 it/sec) -training >> step=9637900, episode=1607 reward=0.8056141 (482.83 it/sec) -training >> step=9638000, episode=1607 reward=0.7866439 (448.10 it/sec) -training >> step=9638100, episode=1607 reward=0.7939002 (467.25 it/sec) -training >> step=9638200, episode=1607 reward=0.8015307 (451.87 it/sec) -training >> step=9638300, episode=1607 reward=0.7895219 (457.18 it/sec) -training >> step=9638400, episode=1607 reward=0.7823992 (462.90 it/sec) -training >> step=9638500, episode=1607 reward=0.8007017 (435.12 it/sec) -training >> step=9638600, episode=1607 reward=0.7850076 (467.63 it/sec) -training >> step=9638700, episode=1607 reward=0.7799792 (455.25 it/sec) -training >> step=9638800, episode=1607 reward=0.797786 (481.86 it/sec) -training >> step=9638900, episode=1607 reward=0.7727095 (479.47 it/sec) -training >> step=9639000, episode=1607 reward=0.7722908 (491.17 it/sec) -training >> step=9639100, episode=1607 reward=0.7839026 (497.56 it/sec) -training >> step=9639200, episode=1607 reward=0.7865859 (486.13 it/sec) -training >> step=9639300, episode=1607 reward=0.7635683 (525.35 it/sec) -training >> step=9639400, episode=1607 reward=0.7815207 (503.23 it/sec) -training >> step=9639500, episode=1607 reward=0.7801614 (489.47 it/sec) -training >> step=9639600, episode=1607 reward=0.7925423 (530.03 it/sec) -training >> step=9639700, episode=1607 reward=0.798399 (470.12 it/sec) -training >> step=9639800, episode=1607 reward=0.7709119 (505.53 it/sec) -training >> step=9639900, episode=1607 reward=0.7770211 (494.21 it/sec) -training >> step=9640000, episode=1607 reward=0.7946793 (508.01 it/sec) -training >> step=9640100, episode=1607 reward=0.8027983 (474.15 it/sec) -training >> step=9640200, episode=1607 reward=0.7972891 (465.15 it/sec) -training >> step=9640300, episode=1607 reward=0.789884 (543.96 it/sec) -training >> step=9640400, episode=1607 reward=0.7988746 (525.66 it/sec) -training >> step=9640500, episode=1607 reward=0.7950687 (506.98 it/sec) -training >> step=9640600, episode=1607 reward=0.7814851 (475.96 it/sec) -training >> step=9640700, episode=1607 reward=0.7953776 (518.47 it/sec) -training >> step=9640800, episode=1607 reward=0.7830729 (507.10 it/sec) -training >> step=9640900, episode=1607 reward=0.7737399 (498.73 it/sec) -training >> step=9641000, episode=1607 reward=0.7830203 (536.52 it/sec) -training >> step=9641100, episode=1607 reward=0.7813419 (487.06 it/sec) -training >> step=9641200, episode=1607 reward=0.7772619 (488.82 it/sec) -training >> step=9641300, episode=1608 reward=0.7841213 (97.89 it/sec) -training >> step=9641400, episode=1608 reward=0.805833 (444.48 it/sec) -training >> step=9641500, episode=1608 reward=0.781576 (497.76 it/sec) -training >> step=9641600, episode=1608 reward=0.7823489 (496.84 it/sec) -training >> step=9641700, episode=1608 reward=0.7941689 (491.64 it/sec) -training >> step=9641800, episode=1608 reward=0.8045103 (365.47 it/sec) -training >> step=9641900, episode=1608 reward=0.8130853 (476.44 it/sec) -training >> step=9642000, episode=1608 reward=0.8026301 (533.94 it/sec) -training >> step=9642100, episode=1608 reward=0.8089167 (512.43 it/sec) -training >> step=9642200, episode=1608 reward=0.8052812 (469.39 it/sec) -training >> step=9642300, episode=1608 reward=0.7836012 (515.07 it/sec) -training >> step=9642400, episode=1608 reward=0.7879555 (517.17 it/sec) -training >> step=9642500, episode=1608 reward=0.794113 (492.15 it/sec) -training >> step=9642600, episode=1608 reward=0.7941356 (472.87 it/sec) -training >> step=9642700, episode=1608 reward=0.795532 (470.78 it/sec) -training >> step=9642800, episode=1608 reward=0.7751864 (499.21 it/sec) -training >> step=9642900, episode=1608 reward=0.792944 (482.62 it/sec) -training >> step=9643000, episode=1608 reward=0.7842904 (483.00 it/sec) -training >> step=9643100, episode=1608 reward=0.7899573 (506.94 it/sec) -training >> step=9643200, episode=1608 reward=0.7983127 (493.88 it/sec) -training >> step=9643300, episode=1608 reward=0.7946991 (471.56 it/sec) -training >> step=9643400, episode=1608 reward=0.8042111 (504.98 it/sec) -training >> step=9643500, episode=1608 reward=0.7923943 (544.36 it/sec) -training >> step=9643600, episode=1608 reward=0.7945216 (465.19 it/sec) -training >> step=9643700, episode=1608 reward=0.7911807 (500.22 it/sec) -training >> step=9643800, episode=1608 reward=0.759282 (487.78 it/sec) -training >> step=9643900, episode=1608 reward=0.7885443 (465.73 it/sec) -training >> step=9644000, episode=1608 reward=0.7885809 (478.47 it/sec) -training >> step=9644100, episode=1608 reward=0.7745925 (505.45 it/sec) -training >> step=9644200, episode=1608 reward=0.7808021 (486.81 it/sec) -training >> step=9644300, episode=1608 reward=0.8011115 (485.94 it/sec) -training >> step=9644400, episode=1608 reward=0.7659242 (493.56 it/sec) -training >> step=9644500, episode=1608 reward=0.7888452 (463.14 it/sec) -training >> step=9644600, episode=1608 reward=0.7881421 (474.48 it/sec) -training >> step=9644700, episode=1608 reward=0.8005509 (488.50 it/sec) -training >> step=9644800, episode=1608 reward=0.7986383 (524.99 it/sec) -training >> step=9644900, episode=1608 reward=0.7953024 (541.22 it/sec) -training >> step=9645000, episode=1608 reward=0.7909734 (486.06 it/sec) -training >> step=9645100, episode=1608 reward=0.8021147 (468.39 it/sec) -training >> step=9645200, episode=1608 reward=0.7943701 (473.45 it/sec) -training >> step=9645300, episode=1608 reward=0.800675 (490.41 it/sec) -training >> step=9645400, episode=1608 reward=0.7913463 (487.88 it/sec) -training >> step=9645500, episode=1608 reward=0.8004168 (486.88 it/sec) -training >> step=9645600, episode=1608 reward=0.7955717 (527.37 it/sec) -training >> step=9645700, episode=1608 reward=0.8096443 (488.02 it/sec) -training >> step=9645800, episode=1608 reward=0.7903974 (453.90 it/sec) -training >> step=9645900, episode=1608 reward=0.7980682 (487.66 it/sec) -training >> step=9646000, episode=1608 reward=0.7912435 (510.04 it/sec) -training >> step=9646100, episode=1608 reward=0.7750966 (488.18 it/sec) -training >> step=9646200, episode=1608 reward=0.7860377 (482.97 it/sec) -training >> step=9646300, episode=1608 reward=0.8111429 (498.91 it/sec) -training >> step=9646400, episode=1608 reward=0.7702916 (430.30 it/sec) -training >> step=9646500, episode=1608 reward=0.7901973 (537.26 it/sec) -training >> step=9646600, episode=1608 reward=0.7621346 (522.51 it/sec) -training >> step=9646700, episode=1608 reward=0.7720246 (497.74 it/sec) -training >> step=9646800, episode=1608 reward=0.7877212 (506.39 it/sec) -training >> step=9646900, episode=1608 reward=0.7831405 (475.81 it/sec) -training >> step=9647000, episode=1608 reward=0.7833185 (510.72 it/sec) -training >> step=9647100, episode=1608 reward=0.7941108 (563.74 it/sec) -training >> step=9647200, episode=1608 reward=0.7969 (479.85 it/sec) -training >> step=9647300, episode=1609 reward=0.7926354 (128.64 it/sec) -training >> step=9647400, episode=1609 reward=0.8105842 (436.61 it/sec) -training >> step=9647500, episode=1609 reward=0.793993 (459.38 it/sec) -training >> step=9647600, episode=1609 reward=0.7620853 (484.08 it/sec) -training >> step=9647700, episode=1609 reward=0.7888281 (514.82 it/sec) -training >> step=9647800, episode=1609 reward=0.7961633 (519.09 it/sec) -training >> step=9647900, episode=1609 reward=0.7740225 (349.05 it/sec) -training >> step=9648000, episode=1609 reward=0.8014791 (498.06 it/sec) -training >> step=9648100, episode=1609 reward=0.8087134 (493.55 it/sec) -training >> step=9648200, episode=1609 reward=0.8023807 (535.42 it/sec) -training >> step=9648300, episode=1609 reward=0.7987033 (507.64 it/sec) -training >> step=9648400, episode=1609 reward=0.7887059 (477.19 it/sec) -training >> step=9648500, episode=1609 reward=0.7980351 (429.09 it/sec) -training >> step=9648600, episode=1609 reward=0.8050157 (498.33 it/sec) -training >> step=9648700, episode=1609 reward=0.8074585 (471.33 it/sec) -training >> step=9648800, episode=1609 reward=0.7959764 (499.78 it/sec) -training >> step=9648900, episode=1609 reward=0.7899571 (515.51 it/sec) -training >> step=9649000, episode=1609 reward=0.8063555 (500.32 it/sec) -training >> step=9649100, episode=1609 reward=0.7828724 (503.98 it/sec) -training >> step=9649200, episode=1609 reward=0.7931284 (444.24 it/sec) -training >> step=9649300, episode=1609 reward=0.7846993 (487.07 it/sec) -training >> step=9649400, episode=1609 reward=0.7877571 (464.88 it/sec) -training >> step=9649500, episode=1609 reward=0.7892709 (505.81 it/sec) -training >> step=9649600, episode=1609 reward=0.7864375 (452.78 it/sec) -training >> step=9649700, episode=1609 reward=0.8022301 (478.79 it/sec) -training >> step=9649800, episode=1609 reward=0.8130945 (500.40 it/sec) -training >> step=9649900, episode=1609 reward=0.7904966 (451.25 it/sec) -training >> step=9650000, episode=1609 reward=0.7854378 (459.54 it/sec) -training >> step=9650100, episode=1609 reward=0.7875167 (464.20 it/sec) -training >> step=9650200, episode=1609 reward=0.7901022 (487.88 it/sec) -training >> step=9650300, episode=1609 reward=0.7952871 (496.81 it/sec) -training >> step=9650400, episode=1609 reward=0.7852029 (474.89 it/sec) -training >> step=9650500, episode=1609 reward=0.8011717 (509.00 it/sec) -training >> step=9650600, episode=1609 reward=0.7989199 (501.88 it/sec) -training >> step=9650700, episode=1609 reward=0.7981004 (528.16 it/sec) -training >> step=9650800, episode=1609 reward=0.8037566 (533.97 it/sec) -training >> step=9650900, episode=1609 reward=0.7918459 (494.03 it/sec) -training >> step=9651000, episode=1609 reward=0.7977473 (535.48 it/sec) -training >> step=9651100, episode=1609 reward=0.8011995 (497.30 it/sec) -training >> step=9651200, episode=1609 reward=0.7906533 (521.58 it/sec) -training >> step=9651300, episode=1609 reward=0.8158706 (512.90 it/sec) -training >> step=9651400, episode=1609 reward=0.7827196 (517.75 it/sec) -training >> step=9651500, episode=1609 reward=0.7955484 (505.69 it/sec) -training >> step=9651600, episode=1609 reward=0.7944952 (479.48 it/sec) -training >> step=9651700, episode=1609 reward=0.7803486 (538.85 it/sec) -training >> step=9651800, episode=1609 reward=0.7826278 (492.25 it/sec) -training >> step=9651900, episode=1609 reward=0.7984894 (491.96 it/sec) -training >> step=9652000, episode=1609 reward=0.7958107 (495.32 it/sec) -training >> step=9652100, episode=1609 reward=0.7728029 (526.03 it/sec) -training >> step=9652200, episode=1609 reward=0.7989399 (464.70 it/sec) -training >> step=9652300, episode=1609 reward=0.7827309 (476.36 it/sec) -training >> step=9652400, episode=1609 reward=0.7907023 (473.53 it/sec) -training >> step=9652500, episode=1609 reward=0.7851545 (513.47 it/sec) -training >> step=9652600, episode=1609 reward=0.7722967 (476.98 it/sec) -training >> step=9652700, episode=1609 reward=0.7763805 (512.78 it/sec) -training >> step=9652800, episode=1609 reward=0.7741623 (504.80 it/sec) -training >> step=9652900, episode=1609 reward=0.7927181 (415.15 it/sec) -training >> step=9653000, episode=1609 reward=0.7895605 (514.81 it/sec) -training >> step=9653100, episode=1609 reward=0.785287 (521.09 it/sec) -training >> step=9653200, episode=1609 reward=0.78838 (536.60 it/sec) -training >> step=9653300, episode=1610 reward=0.795829 (107.01 it/sec) -training >> step=9653400, episode=1610 reward=0.7944842 (368.18 it/sec) -training >> step=9653500, episode=1610 reward=0.7967666 (452.91 it/sec) -training >> step=9653600, episode=1610 reward=0.7705787 (472.14 it/sec) -training >> step=9653700, episode=1610 reward=0.7930071 (517.95 it/sec) -training >> step=9653800, episode=1610 reward=0.8005502 (398.95 it/sec) -training >> step=9653900, episode=1610 reward=0.7794193 (425.61 it/sec) -training >> step=9654000, episode=1610 reward=0.7805881 (350.66 it/sec) -training >> step=9654100, episode=1610 reward=0.7838031 (449.93 it/sec) -training >> step=9654200, episode=1610 reward=0.7918779 (339.18 it/sec) -training >> step=9654300, episode=1610 reward=0.7883394 (478.31 it/sec) -training >> step=9654400, episode=1610 reward=0.7969343 (415.95 it/sec) -training >> step=9654500, episode=1610 reward=0.779601 (411.29 it/sec) -training >> step=9654600, episode=1610 reward=0.8048723 (429.17 it/sec) -training >> step=9654700, episode=1610 reward=0.7771937 (475.64 it/sec) -training >> step=9654800, episode=1610 reward=0.7970312 (335.75 it/sec) -training >> step=9654900, episode=1610 reward=0.7910182 (399.85 it/sec) -training >> step=9655000, episode=1610 reward=0.8033222 (357.56 it/sec) -training >> step=9655100, episode=1610 reward=0.799163 (386.98 it/sec) -training >> step=9655200, episode=1610 reward=0.7681839 (425.23 it/sec) -training >> step=9655300, episode=1610 reward=0.7745394 (425.41 it/sec) -training >> step=9655400, episode=1610 reward=0.790917 (436.63 it/sec) -training >> step=9655500, episode=1610 reward=0.787204 (424.14 it/sec) -training >> step=9655600, episode=1610 reward=0.8127859 (388.01 it/sec) -training >> step=9655700, episode=1610 reward=0.7987663 (437.20 it/sec) -training >> step=9655800, episode=1610 reward=0.7631367 (467.27 it/sec) -training >> step=9655900, episode=1610 reward=0.7835243 (452.62 it/sec) -training >> step=9656000, episode=1610 reward=0.7946501 (428.18 it/sec) -training >> step=9656100, episode=1610 reward=0.8045996 (386.93 it/sec) -training >> step=9656200, episode=1610 reward=0.7840425 (448.41 it/sec) -training >> step=9656300, episode=1610 reward=0.7971851 (372.85 it/sec) -training >> step=9656400, episode=1610 reward=0.7887492 (399.67 it/sec) -training >> step=9656500, episode=1610 reward=0.788847 (437.26 it/sec) -training >> step=9656600, episode=1610 reward=0.7971803 (346.08 it/sec) -training >> step=9656700, episode=1610 reward=0.8110306 (450.22 it/sec) -training >> step=9656800, episode=1610 reward=0.7804833 (375.70 it/sec) -training >> step=9656900, episode=1610 reward=0.7999238 (371.42 it/sec) -training >> step=9657000, episode=1610 reward=0.7858182 (338.03 it/sec) -training >> step=9657100, episode=1610 reward=0.8076953 (357.41 it/sec) -training >> step=9657200, episode=1610 reward=0.7982141 (441.55 it/sec) -training >> step=9657300, episode=1610 reward=0.8004172 (422.12 it/sec) -training >> step=9657400, episode=1610 reward=0.7858222 (409.26 it/sec) -training >> step=9657500, episode=1610 reward=0.7836115 (481.50 it/sec) -training >> step=9657600, episode=1610 reward=0.8056901 (419.78 it/sec) -training >> step=9657700, episode=1610 reward=0.791486 (465.24 it/sec) -training >> step=9657800, episode=1610 reward=0.7898939 (430.19 it/sec) -training >> step=9657900, episode=1610 reward=0.7773371 (430.99 it/sec) -training >> step=9658000, episode=1610 reward=0.7737433 (456.12 it/sec) -training >> step=9658100, episode=1610 reward=0.7799215 (444.09 it/sec) -training >> step=9658200, episode=1610 reward=0.7983389 (417.10 it/sec) -training >> step=9658300, episode=1610 reward=0.7621062 (430.63 it/sec) -training >> step=9658400, episode=1610 reward=0.7718897 (368.53 it/sec) -training >> step=9658500, episode=1610 reward=0.7753533 (423.04 it/sec) -training >> step=9658600, episode=1610 reward=0.7949697 (426.88 it/sec) -training >> step=9658700, episode=1610 reward=0.7844036 (461.94 it/sec) -training >> step=9658800, episode=1610 reward=0.7730005 (431.78 it/sec) -training >> step=9658900, episode=1610 reward=0.7835919 (433.11 it/sec) -training >> step=9659000, episode=1610 reward=0.7853326 (392.85 it/sec) -training >> step=9659100, episode=1610 reward=0.8119364 (351.59 it/sec) -training >> step=9659200, episode=1610 reward=0.7919096 (337.44 it/sec) -training >> step=9659300, episode=1611 reward=0.7871605 (53.33 it/sec) -training >> step=9659400, episode=1611 reward=0.7997339 (347.57 it/sec) -training >> step=9659500, episode=1611 reward=0.7965488 (297.45 it/sec) -training >> step=9659600, episode=1611 reward=0.7719002 (358.00 it/sec) -training >> step=9659700, episode=1611 reward=0.7893203 (354.00 it/sec) -training >> step=9659800, episode=1611 reward=0.7859254 (418.23 it/sec) -training >> step=9659900, episode=1611 reward=0.7739959 (472.21 it/sec) -training >> step=9660000, episode=1611 reward=0.803588 (439.78 it/sec) -training >> step=9660100, episode=1611 reward=0.8157019 (428.01 it/sec) -training >> step=9660200, episode=1611 reward=0.7969323 (448.16 it/sec) -training >> step=9660300, episode=1611 reward=0.7823488 (443.88 it/sec) -training >> step=9660400, episode=1611 reward=0.7765016 (312.75 it/sec) -training >> step=9660500, episode=1611 reward=0.7989832 (420.53 it/sec) -training >> step=9660600, episode=1611 reward=0.7689613 (462.82 it/sec) -training >> step=9660700, episode=1611 reward=0.7800934 (394.69 it/sec) -training >> step=9660800, episode=1611 reward=0.8080937 (374.10 it/sec) -training >> step=9660900, episode=1611 reward=0.7738018 (434.05 it/sec) -training >> step=9661000, episode=1611 reward=0.7915428 (431.51 it/sec) -training >> step=9661100, episode=1611 reward=0.7706524 (442.25 it/sec) -training >> step=9661200, episode=1611 reward=0.7896515 (391.94 it/sec) -training >> step=9661300, episode=1611 reward=0.7933244 (430.86 it/sec) -training >> step=9661400, episode=1611 reward=0.7888733 (460.00 it/sec) -training >> step=9661500, episode=1611 reward=0.7922589 (467.38 it/sec) -training >> step=9661600, episode=1611 reward=0.8022241 (412.53 it/sec) -training >> step=9661700, episode=1611 reward=0.787658 (484.02 it/sec) -training >> step=9661800, episode=1611 reward=0.8028995 (458.61 it/sec) -training >> step=9661900, episode=1611 reward=0.8046345 (444.77 it/sec) -training >> step=9662000, episode=1611 reward=0.7967662 (483.29 it/sec) -training >> step=9662100, episode=1611 reward=0.7971123 (422.96 it/sec) -training >> step=9662200, episode=1611 reward=0.814774 (468.67 it/sec) -training >> step=9662300, episode=1611 reward=0.803067 (477.11 it/sec) -training >> step=9662400, episode=1611 reward=0.7943606 (436.51 it/sec) -training >> step=9662500, episode=1611 reward=0.7861586 (424.79 it/sec) -training >> step=9662600, episode=1611 reward=0.792484 (457.16 it/sec) -training >> step=9662700, episode=1611 reward=0.7859321 (490.80 it/sec) -training >> step=9662800, episode=1611 reward=0.7762192 (469.75 it/sec) -training >> step=9662900, episode=1611 reward=0.7715321 (505.81 it/sec) -training >> step=9663000, episode=1611 reward=0.777611 (454.10 it/sec) -training >> step=9663100, episode=1611 reward=0.7830397 (488.47 it/sec) -training >> step=9663200, episode=1611 reward=0.782069 (470.74 it/sec) -training >> step=9663300, episode=1611 reward=0.7653766 (489.57 it/sec) -training >> step=9663400, episode=1611 reward=0.7962441 (487.57 it/sec) -training >> step=9663500, episode=1611 reward=0.7762051 (454.89 it/sec) -training >> step=9663600, episode=1611 reward=0.7779057 (429.38 it/sec) -training >> step=9663700, episode=1611 reward=0.7918283 (453.61 it/sec) -training >> step=9663800, episode=1611 reward=0.7730228 (451.46 it/sec) -training >> step=9663900, episode=1611 reward=0.7844571 (455.54 it/sec) -training >> step=9664000, episode=1611 reward=0.7837722 (434.59 it/sec) -training >> step=9664100, episode=1611 reward=0.7922189 (436.27 it/sec) -training >> step=9664200, episode=1611 reward=0.7956713 (361.55 it/sec) -training >> step=9664300, episode=1611 reward=0.7892541 (427.48 it/sec) -training >> step=9664400, episode=1611 reward=0.772311 (407.94 it/sec) -training >> step=9664500, episode=1611 reward=0.7870121 (433.40 it/sec) -training >> step=9664600, episode=1611 reward=0.7902244 (421.45 it/sec) -training >> step=9664700, episode=1611 reward=0.7830753 (423.86 it/sec) -training >> step=9664800, episode=1611 reward=0.7986712 (431.90 it/sec) -training >> step=9664900, episode=1611 reward=0.7918325 (409.15 it/sec) -training >> step=9665000, episode=1611 reward=0.7929286 (465.00 it/sec) -training >> step=9665100, episode=1611 reward=0.792374 (459.62 it/sec) -training >> step=9665200, episode=1611 reward=0.7822515 (466.40 it/sec) -training >> step=9665300, episode=1612 reward=0.7925216 (69.68 it/sec) -training >> step=9665400, episode=1612 reward=0.7727922 (392.83 it/sec) -training >> step=9665500, episode=1612 reward=0.7776392 (444.13 it/sec) -training >> step=9665600, episode=1612 reward=0.7708139 (346.64 it/sec) -training >> step=9665700, episode=1612 reward=0.7813533 (309.32 it/sec) -training >> step=9665800, episode=1612 reward=0.7945489 (357.57 it/sec) -training >> step=9665900, episode=1612 reward=0.7808542 (346.43 it/sec) -training >> step=9666000, episode=1612 reward=0.7850605 (373.63 it/sec) -training >> step=9666100, episode=1612 reward=0.7853023 (413.23 it/sec) -training >> step=9666200, episode=1612 reward=0.804294 (448.47 it/sec) -training >> step=9666300, episode=1612 reward=0.7911364 (467.10 it/sec) -training >> step=9666400, episode=1612 reward=0.7925154 (464.98 it/sec) -training >> step=9666500, episode=1612 reward=0.7903677 (330.92 it/sec) -training >> step=9666600, episode=1612 reward=0.7911094 (469.75 it/sec) -training >> step=9666700, episode=1612 reward=0.7968207 (439.16 it/sec) -training >> step=9666800, episode=1612 reward=0.8179018 (498.20 it/sec) -training >> step=9666900, episode=1612 reward=0.7724573 (442.69 it/sec) -training >> step=9667000, episode=1612 reward=0.7839777 (439.69 it/sec) -training >> step=9667100, episode=1612 reward=0.8006029 (422.16 it/sec) -training >> step=9667200, episode=1612 reward=0.7864939 (504.83 it/sec) -training >> step=9667300, episode=1612 reward=0.803977 (436.89 it/sec) -training >> step=9667400, episode=1612 reward=0.789221 (418.94 it/sec) -training >> step=9667500, episode=1612 reward=0.7949969 (465.60 it/sec) -training >> step=9667600, episode=1612 reward=0.7943153 (484.82 it/sec) -training >> step=9667700, episode=1612 reward=0.790025 (481.28 it/sec) -training >> step=9667800, episode=1612 reward=0.7925442 (473.97 it/sec) -training >> step=9667900, episode=1612 reward=0.7723359 (446.00 it/sec) -training >> step=9668000, episode=1612 reward=0.7754302 (419.44 it/sec) -training >> step=9668100, episode=1612 reward=0.7980188 (476.66 it/sec) -training >> step=9668200, episode=1612 reward=0.7972534 (457.56 it/sec) -training >> step=9668300, episode=1612 reward=0.8063176 (503.06 it/sec) -training >> step=9668400, episode=1612 reward=0.7723872 (434.75 it/sec) -training >> step=9668500, episode=1612 reward=0.7957783 (449.21 it/sec) -training >> step=9668600, episode=1612 reward=0.7828377 (503.93 it/sec) -training >> step=9668700, episode=1612 reward=0.7787358 (481.36 it/sec) -training >> step=9668800, episode=1612 reward=0.7982222 (496.45 it/sec) -training >> step=9668900, episode=1612 reward=0.7901602 (442.35 it/sec) -training >> step=9669000, episode=1612 reward=0.8109257 (481.13 it/sec) -training >> step=9669100, episode=1612 reward=0.7779529 (432.03 it/sec) -training >> step=9669200, episode=1612 reward=0.7866165 (488.04 it/sec) -training >> step=9669300, episode=1612 reward=0.8051516 (466.78 it/sec) -training >> step=9669400, episode=1612 reward=0.7825948 (465.38 it/sec) -training >> step=9669500, episode=1612 reward=0.7937453 (473.11 it/sec) -training >> step=9669600, episode=1612 reward=0.7955841 (451.69 it/sec) -training >> step=9669700, episode=1612 reward=0.784021 (452.50 it/sec) -training >> step=9669800, episode=1612 reward=0.7830432 (414.44 it/sec) -training >> step=9669900, episode=1612 reward=0.811591 (426.66 it/sec) -training >> step=9670000, episode=1612 reward=0.7840608 (437.96 it/sec) -training >> step=9670100, episode=1612 reward=0.793404 (395.86 it/sec) -training >> step=9670200, episode=1612 reward=0.7923296 (406.20 it/sec) -training >> step=9670300, episode=1612 reward=0.796303 (401.71 it/sec) -training >> step=9670400, episode=1612 reward=0.7752821 (426.57 it/sec) -training >> step=9670500, episode=1612 reward=0.8029243 (452.27 it/sec) -training >> step=9670600, episode=1612 reward=0.7807938 (463.16 it/sec) -training >> step=9670700, episode=1612 reward=0.7963047 (397.09 it/sec) -training >> step=9670800, episode=1612 reward=0.7809113 (491.51 it/sec) -training >> step=9670900, episode=1612 reward=0.7699628 (459.67 it/sec) -training >> step=9671000, episode=1612 reward=0.7778425 (428.92 it/sec) -training >> step=9671100, episode=1612 reward=0.7956846 (447.72 it/sec) -training >> step=9671200, episode=1612 reward=0.7818854 (413.85 it/sec) -training >> step=9671300, episode=1613 reward=0.7368773 (98.56 it/sec) -training >> step=9671400, episode=1613 reward=0.7778784 (444.88 it/sec) -training >> step=9671500, episode=1613 reward=0.7751422 (440.05 it/sec) -training >> step=9671600, episode=1613 reward=0.7757119 (473.51 it/sec) -training >> step=9671700, episode=1613 reward=0.7897279 (427.93 it/sec) -training >> step=9671800, episode=1613 reward=0.7988315 (392.45 it/sec) -training >> step=9671900, episode=1613 reward=0.8051417 (451.95 it/sec) -training >> step=9672000, episode=1613 reward=0.7704775 (403.94 it/sec) -training >> step=9672100, episode=1613 reward=0.7856988 (420.90 it/sec) -training >> step=9672200, episode=1613 reward=0.8021033 (419.85 it/sec) -training >> step=9672300, episode=1613 reward=0.7966795 (460.08 it/sec) -training >> step=9672400, episode=1613 reward=0.8027242 (452.35 it/sec) -training >> step=9672500, episode=1613 reward=0.7875818 (442.42 it/sec) -training >> step=9672600, episode=1613 reward=0.7955481 (298.09 it/sec) -training >> step=9672700, episode=1613 reward=0.8193936 (467.52 it/sec) -training >> step=9672800, episode=1613 reward=0.796841 (491.65 it/sec) -training >> step=9672900, episode=1613 reward=0.8015662 (484.04 it/sec) -training >> step=9673000, episode=1613 reward=0.7977373 (506.22 it/sec) -training >> step=9673100, episode=1613 reward=0.8040043 (457.33 it/sec) -training >> step=9673200, episode=1613 reward=0.7965653 (402.58 it/sec) -training >> step=9673300, episode=1613 reward=0.8117001 (474.70 it/sec) -training >> step=9673400, episode=1613 reward=0.8095381 (502.66 it/sec) -training >> step=9673500, episode=1613 reward=0.7917803 (463.30 it/sec) -training >> step=9673600, episode=1613 reward=0.7847849 (461.04 it/sec) -training >> step=9673700, episode=1613 reward=0.7675379 (491.93 it/sec) -training >> step=9673800, episode=1613 reward=0.7889371 (488.72 it/sec) -training >> step=9673900, episode=1613 reward=0.7832333 (484.85 it/sec) -training >> step=9674000, episode=1613 reward=0.7973937 (450.36 it/sec) -training >> step=9674100, episode=1613 reward=0.7939945 (441.78 it/sec) -training >> step=9674200, episode=1613 reward=0.8065498 (454.96 it/sec) -training >> step=9674300, episode=1613 reward=0.788296 (443.50 it/sec) -training >> step=9674400, episode=1613 reward=0.7939062 (401.49 it/sec) -training >> step=9674500, episode=1613 reward=0.7886561 (420.56 it/sec) -training >> step=9674600, episode=1613 reward=0.8016726 (447.36 it/sec) -training >> step=9674700, episode=1613 reward=0.7979417 (453.80 it/sec) -training >> step=9674800, episode=1613 reward=0.7862203 (471.12 it/sec) -training >> step=9674900, episode=1613 reward=0.7651945 (452.52 it/sec) -training >> step=9675000, episode=1613 reward=0.7916113 (427.40 it/sec) -training >> step=9675100, episode=1613 reward=0.7850418 (425.82 it/sec) -training >> step=9675200, episode=1613 reward=0.7961153 (461.81 it/sec) -training >> step=9675300, episode=1613 reward=0.798955 (419.11 it/sec) -training >> step=9675400, episode=1613 reward=0.7754338 (430.94 it/sec) -training >> step=9675500, episode=1613 reward=0.7937689 (470.59 it/sec) -training >> step=9675600, episode=1613 reward=0.8099248 (465.75 it/sec) -training >> step=9675700, episode=1613 reward=0.7976518 (459.06 it/sec) -training >> step=9675800, episode=1613 reward=0.802616 (448.44 it/sec) -training >> step=9675900, episode=1613 reward=0.7854961 (458.71 it/sec) -training >> step=9676000, episode=1613 reward=0.7745627 (476.93 it/sec) -training >> step=9676100, episode=1613 reward=0.7872495 (481.35 it/sec) -training >> step=9676200, episode=1613 reward=0.7875659 (449.72 it/sec) -training >> step=9676300, episode=1613 reward=0.8160983 (468.73 it/sec) -training >> step=9676400, episode=1613 reward=0.7708021 (444.95 it/sec) -training >> step=9676500, episode=1613 reward=0.7880024 (463.66 it/sec) -training >> step=9676600, episode=1613 reward=0.8113963 (474.18 it/sec) -training >> step=9676700, episode=1613 reward=0.7915092 (459.16 it/sec) -training >> step=9676800, episode=1613 reward=0.7965518 (461.22 it/sec) -training >> step=9676900, episode=1613 reward=0.7956033 (456.96 it/sec) -training >> step=9677000, episode=1613 reward=0.8056769 (523.96 it/sec) -training >> step=9677100, episode=1613 reward=0.7950174 (433.49 it/sec) -training >> step=9677200, episode=1613 reward=0.7916976 (358.82 it/sec) -training >> step=9677300, episode=1614 reward=0.7976602 (63.04 it/sec) -training >> step=9677400, episode=1614 reward=0.7776415 (432.85 it/sec) -training >> step=9677500, episode=1614 reward=0.7973923 (376.03 it/sec) -training >> step=9677600, episode=1614 reward=0.7951919 (408.43 it/sec) -training >> step=9677700, episode=1614 reward=0.7965991 (439.54 it/sec) -training >> step=9677800, episode=1614 reward=0.8053414 (470.51 it/sec) -training >> step=9677900, episode=1614 reward=0.7767309 (465.08 it/sec) -training >> step=9678000, episode=1614 reward=0.7894816 (434.71 it/sec) -training >> step=9678100, episode=1614 reward=0.790455 (428.29 it/sec) -training >> step=9678200, episode=1614 reward=0.790672 (441.66 it/sec) -training >> step=9678300, episode=1614 reward=0.8168634 (455.82 it/sec) -training >> step=9678400, episode=1614 reward=0.8036085 (463.97 it/sec) -training >> step=9678500, episode=1614 reward=0.8122258 (431.11 it/sec) -training >> step=9678600, episode=1614 reward=0.7879336 (436.95 it/sec) -training >> step=9678700, episode=1614 reward=0.8074393 (297.64 it/sec) -training >> step=9678800, episode=1614 reward=0.7911826 (425.36 it/sec) -training >> step=9678900, episode=1614 reward=0.7963414 (464.47 it/sec) -training >> step=9679000, episode=1614 reward=0.7908379 (467.35 it/sec) -training >> step=9679100, episode=1614 reward=0.7901886 (431.67 it/sec) -training >> step=9679200, episode=1614 reward=0.7937818 (415.66 it/sec) -training >> step=9679300, episode=1614 reward=0.7888904 (452.56 it/sec) -training >> step=9679400, episode=1614 reward=0.8093042 (440.56 it/sec) -training >> step=9679500, episode=1614 reward=0.8083525 (450.24 it/sec) -training >> step=9679600, episode=1614 reward=0.78295 (460.53 it/sec) -training >> step=9679700, episode=1614 reward=0.7796863 (469.23 it/sec) -training >> step=9679800, episode=1614 reward=0.787359 (438.06 it/sec) -training >> step=9679900, episode=1614 reward=0.7937979 (375.75 it/sec) -training >> step=9680000, episode=1614 reward=0.7963353 (445.40 it/sec) -training >> step=9680100, episode=1614 reward=0.7750264 (423.01 it/sec) -training >> step=9680200, episode=1614 reward=0.7870905 (399.51 it/sec) -training >> step=9680300, episode=1614 reward=0.7892829 (380.57 it/sec) -training >> step=9680400, episode=1614 reward=0.807924 (420.51 it/sec) -training >> step=9680500, episode=1614 reward=0.7855297 (415.95 it/sec) -training >> step=9680600, episode=1614 reward=0.781725 (443.66 it/sec) -training >> step=9680700, episode=1614 reward=0.7816972 (409.85 it/sec) -training >> step=9680800, episode=1614 reward=0.7944959 (453.22 it/sec) -training >> step=9680900, episode=1614 reward=0.7930918 (463.12 it/sec) -training >> step=9681000, episode=1614 reward=0.790688 (441.39 it/sec) -training >> step=9681100, episode=1614 reward=0.7969226 (482.26 it/sec) -training >> step=9681200, episode=1614 reward=0.7855883 (442.96 it/sec) -training >> step=9681300, episode=1614 reward=0.7837743 (462.57 it/sec) -training >> step=9681400, episode=1614 reward=0.7950861 (487.86 it/sec) -training >> step=9681500, episode=1614 reward=0.7951946 (433.31 it/sec) -training >> step=9681600, episode=1614 reward=0.789177 (398.02 it/sec) -training >> step=9681700, episode=1614 reward=0.802457 (437.97 it/sec) -training >> step=9681800, episode=1614 reward=0.7818892 (487.12 it/sec) -training >> step=9681900, episode=1614 reward=0.7924603 (438.69 it/sec) -training >> step=9682000, episode=1614 reward=0.8007481 (443.96 it/sec) -training >> step=9682100, episode=1614 reward=0.7738257 (340.27 it/sec) -training >> step=9682200, episode=1614 reward=0.7949885 (405.03 it/sec) -training >> step=9682300, episode=1614 reward=0.7941466 (417.81 it/sec) -training >> step=9682400, episode=1614 reward=0.804064 (467.78 it/sec) -training >> step=9682500, episode=1614 reward=0.7891378 (425.42 it/sec) -training >> step=9682600, episode=1614 reward=0.7769099 (439.37 it/sec) -training >> step=9682700, episode=1614 reward=0.8004373 (447.95 it/sec) -training >> step=9682800, episode=1614 reward=0.7828925 (446.07 it/sec) -training >> step=9682900, episode=1614 reward=0.7925933 (457.09 it/sec) -training >> step=9683000, episode=1614 reward=0.7935763 (407.00 it/sec) -training >> step=9683100, episode=1614 reward=0.769124 (427.43 it/sec) -training >> step=9683200, episode=1614 reward=0.7835991 (453.45 it/sec) -training >> step=9683300, episode=1615 reward=0.799328 (72.16 it/sec) -training >> step=9683400, episode=1615 reward=0.7814404 (463.68 it/sec) -training >> step=9683500, episode=1615 reward=0.7739339 (495.74 it/sec) -training >> step=9683600, episode=1615 reward=0.7708569 (450.81 it/sec) -training >> step=9683700, episode=1615 reward=0.8078831 (436.87 it/sec) -training >> step=9683800, episode=1615 reward=0.7836162 (473.97 it/sec) -training >> step=9683900, episode=1615 reward=0.7720547 (435.05 it/sec) -training >> step=9684000, episode=1615 reward=0.7618973 (457.70 it/sec) -training >> step=9684100, episode=1615 reward=0.8032209 (459.87 it/sec) -training >> step=9684200, episode=1615 reward=0.8039511 (448.85 it/sec) -training >> step=9684300, episode=1615 reward=0.7828299 (389.74 it/sec) -training >> step=9684400, episode=1615 reward=0.8018889 (402.45 it/sec) -training >> step=9684500, episode=1615 reward=0.8056452 (491.38 it/sec) -training >> step=9684600, episode=1615 reward=0.7872251 (446.10 it/sec) -training >> step=9684700, episode=1615 reward=0.7996355 (443.05 it/sec) -training >> step=9684800, episode=1615 reward=0.7860036 (422.42 it/sec) -training >> step=9684900, episode=1615 reward=0.7615492 (478.05 it/sec) -training >> step=9685000, episode=1615 reward=0.811058 (318.68 it/sec) -training >> step=9685100, episode=1615 reward=0.7671036 (467.38 it/sec) -training >> step=9685200, episode=1615 reward=0.7919456 (442.90 it/sec) -training >> step=9685300, episode=1615 reward=0.7965106 (459.10 it/sec) -training >> step=9685400, episode=1615 reward=0.7851486 (467.75 it/sec) -training >> step=9685500, episode=1615 reward=0.8139511 (434.78 it/sec) -training >> step=9685600, episode=1615 reward=0.7981608 (439.10 it/sec) -training >> step=9685700, episode=1615 reward=0.7897537 (413.50 it/sec) -training >> step=9685800, episode=1615 reward=0.8144771 (452.87 it/sec) -training >> step=9685900, episode=1615 reward=0.7832843 (484.40 it/sec) -training >> step=9686000, episode=1615 reward=0.7847851 (419.15 it/sec) -training >> step=9686100, episode=1615 reward=0.7849756 (403.62 it/sec) -training >> step=9686200, episode=1615 reward=0.7953241 (436.72 it/sec) -training >> step=9686300, episode=1615 reward=0.7872083 (495.57 it/sec) -training >> step=9686400, episode=1615 reward=0.7905144 (476.38 it/sec) -training >> step=9686500, episode=1615 reward=0.7867014 (362.99 it/sec) -training >> step=9686600, episode=1615 reward=0.7917979 (352.04 it/sec) -training >> step=9686700, episode=1615 reward=0.8028695 (432.37 it/sec) -training >> step=9686800, episode=1615 reward=0.7963609 (416.41 it/sec) -training >> step=9686900, episode=1615 reward=0.803432 (431.98 it/sec) -training >> step=9687000, episode=1615 reward=0.789652 (459.86 it/sec) -training >> step=9687100, episode=1615 reward=0.7859903 (468.67 it/sec) -training >> step=9687200, episode=1615 reward=0.7742335 (447.03 it/sec) -training >> step=9687300, episode=1615 reward=0.777222 (451.68 it/sec) -training >> step=9687400, episode=1615 reward=0.7961786 (448.63 it/sec) -training >> step=9687500, episode=1615 reward=0.7830563 (394.73 it/sec) -training >> step=9687600, episode=1615 reward=0.7766241 (343.09 it/sec) -training >> step=9687700, episode=1615 reward=0.7879704 (399.76 it/sec) -training >> step=9687800, episode=1615 reward=0.8050433 (456.87 it/sec) -training >> step=9687900, episode=1615 reward=0.7902725 (394.48 it/sec) -training >> step=9688000, episode=1615 reward=0.7851263 (419.46 it/sec) -training >> step=9688100, episode=1615 reward=0.7721491 (464.81 it/sec) -training >> step=9688200, episode=1615 reward=0.7880067 (438.60 it/sec) -training >> step=9688300, episode=1615 reward=0.7867818 (438.53 it/sec) -training >> step=9688400, episode=1615 reward=0.7975181 (445.22 it/sec) -training >> step=9688500, episode=1615 reward=0.7737945 (466.66 it/sec) -training >> step=9688600, episode=1615 reward=0.7924514 (448.14 it/sec) -training >> step=9688700, episode=1615 reward=0.7801962 (451.65 it/sec) -training >> step=9688800, episode=1615 reward=0.799728 (435.11 it/sec) -training >> step=9688900, episode=1615 reward=0.800208 (479.71 it/sec) -training >> step=9689000, episode=1615 reward=0.8038462 (473.11 it/sec) -training >> step=9689100, episode=1615 reward=0.7926342 (436.28 it/sec) -training >> step=9689200, episode=1615 reward=0.7741466 (450.25 it/sec) -training >> step=9689300, episode=1616 reward=0.7904174 (95.50 it/sec) -training >> step=9689400, episode=1616 reward=0.7922475 (457.40 it/sec) -training >> step=9689500, episode=1616 reward=0.7932369 (473.19 it/sec) -training >> step=9689600, episode=1616 reward=0.7904981 (492.97 it/sec) -training >> step=9689700, episode=1616 reward=0.7910061 (466.62 it/sec) -training >> step=9689800, episode=1616 reward=0.7870933 (477.50 it/sec) -training >> step=9689900, episode=1616 reward=0.7948133 (462.52 it/sec) -training >> step=9690000, episode=1616 reward=0.7845405 (438.93 it/sec) -training >> step=9690100, episode=1616 reward=0.7719302 (436.40 it/sec) -training >> step=9690200, episode=1616 reward=0.7838634 (448.14 it/sec) -training >> step=9690300, episode=1616 reward=0.8017042 (442.95 it/sec) -training >> step=9690400, episode=1616 reward=0.776092 (414.06 it/sec) -training >> step=9690500, episode=1616 reward=0.8052309 (422.12 it/sec) -training >> step=9690600, episode=1616 reward=0.7906676 (436.86 it/sec) -training >> step=9690700, episode=1616 reward=0.7928842 (422.51 it/sec) -training >> step=9690800, episode=1616 reward=0.7960321 (391.60 it/sec) -training >> step=9690900, episode=1616 reward=0.8030827 (408.14 it/sec) -training >> step=9691000, episode=1616 reward=0.7976786 (414.56 it/sec) -training >> step=9691100, episode=1616 reward=0.7883619 (317.16 it/sec) -training >> step=9691200, episode=1616 reward=0.7935138 (403.33 it/sec) -training >> step=9691300, episode=1616 reward=0.7844165 (406.29 it/sec) -training >> step=9691400, episode=1616 reward=0.8053831 (445.47 it/sec) -training >> step=9691500, episode=1616 reward=0.782302 (415.72 it/sec) -training >> step=9691600, episode=1616 reward=0.8022137 (364.20 it/sec) -training >> step=9691700, episode=1616 reward=0.7756979 (366.83 it/sec) -training >> step=9691800, episode=1616 reward=0.7882334 (399.66 it/sec) -training >> step=9691900, episode=1616 reward=0.79627 (421.12 it/sec) -training >> step=9692000, episode=1616 reward=0.7917792 (399.30 it/sec) -training >> step=9692100, episode=1616 reward=0.797805 (416.80 it/sec) -training >> step=9692200, episode=1616 reward=0.7991953 (449.86 it/sec) -training >> step=9692300, episode=1616 reward=0.7631928 (433.52 it/sec) -training >> step=9692400, episode=1616 reward=0.7783124 (450.11 it/sec) -training >> step=9692500, episode=1616 reward=0.8007925 (408.51 it/sec) -training >> step=9692600, episode=1616 reward=0.7952149 (423.21 it/sec) -training >> step=9692700, episode=1616 reward=0.786219 (442.87 it/sec) -training >> step=9692800, episode=1616 reward=0.7850797 (432.97 it/sec) -training >> step=9692900, episode=1616 reward=0.7800854 (430.33 it/sec) -training >> step=9693000, episode=1616 reward=0.795104 (423.00 it/sec) -training >> step=9693100, episode=1616 reward=0.7930316 (474.16 it/sec) -training >> step=9693200, episode=1616 reward=0.7940316 (468.88 it/sec) -training >> step=9693300, episode=1616 reward=0.7755315 (464.72 it/sec) -training >> step=9693400, episode=1616 reward=0.786217 (422.99 it/sec) -training >> step=9693500, episode=1616 reward=0.7783157 (430.68 it/sec) -training >> step=9693600, episode=1616 reward=0.7962357 (420.66 it/sec) -training >> step=9693700, episode=1616 reward=0.7930222 (440.06 it/sec) -training >> step=9693800, episode=1616 reward=0.813009 (417.08 it/sec) -training >> step=9693900, episode=1616 reward=0.7783567 (470.47 it/sec) -training >> step=9694000, episode=1616 reward=0.7786912 (456.29 it/sec) -training >> step=9694100, episode=1616 reward=0.7930488 (422.78 it/sec) -training >> step=9694200, episode=1616 reward=0.7853687 (434.87 it/sec) -training >> step=9694300, episode=1616 reward=0.7951292 (397.65 it/sec) -training >> step=9694400, episode=1616 reward=0.8200407 (432.89 it/sec) -training >> step=9694500, episode=1616 reward=0.776718 (430.43 it/sec) -training >> step=9694600, episode=1616 reward=0.7975982 (434.28 it/sec) -training >> step=9694700, episode=1616 reward=0.7942965 (446.06 it/sec) -training >> step=9694800, episode=1616 reward=0.8071975 (452.38 it/sec) -training >> step=9694900, episode=1616 reward=0.7979559 (465.77 it/sec) -training >> step=9695000, episode=1616 reward=0.8120777 (443.17 it/sec) -training >> step=9695100, episode=1616 reward=0.7675837 (455.01 it/sec) -training >> step=9695200, episode=1616 reward=0.7705725 (442.96 it/sec) -training >> step=9695300, episode=1617 reward=0.7888424 (67.22 it/sec) -training >> step=9695400, episode=1617 reward=0.7589838 (366.01 it/sec) -training >> step=9695500, episode=1617 reward=0.7749082 (374.01 it/sec) -training >> step=9695600, episode=1617 reward=0.7762542 (416.89 it/sec) -training >> step=9695700, episode=1617 reward=0.7727257 (446.73 it/sec) -training >> step=9695800, episode=1617 reward=0.7681155 (433.46 it/sec) -training >> step=9695900, episode=1617 reward=0.799384 (451.23 it/sec) -training >> step=9696000, episode=1617 reward=0.8100561 (469.42 it/sec) -training >> step=9696100, episode=1617 reward=0.7847918 (470.67 it/sec) -training >> step=9696200, episode=1617 reward=0.7780374 (505.77 it/sec) -training >> step=9696300, episode=1617 reward=0.7862012 (406.18 it/sec) -training >> step=9696400, episode=1617 reward=0.786623 (457.93 it/sec) -training >> step=9696500, episode=1617 reward=0.7710761 (460.97 it/sec) -training >> step=9696600, episode=1617 reward=0.798713 (480.65 it/sec) -training >> step=9696700, episode=1617 reward=0.800868 (457.46 it/sec) -training >> step=9696800, episode=1617 reward=0.7934568 (445.49 it/sec) -training >> step=9696900, episode=1617 reward=0.7994286 (454.64 it/sec) -training >> step=9697000, episode=1617 reward=0.7955775 (479.40 it/sec) -training >> step=9697100, episode=1617 reward=0.8044933 (470.60 it/sec) -training >> step=9697200, episode=1617 reward=0.7995672 (471.00 it/sec) -training >> step=9697300, episode=1617 reward=0.7973801 (363.83 it/sec) -training >> step=9697400, episode=1617 reward=0.7928536 (467.05 it/sec) -training >> step=9697500, episode=1617 reward=0.7802965 (475.51 it/sec) -training >> step=9697600, episode=1617 reward=0.797757 (507.70 it/sec) -training >> step=9697700, episode=1617 reward=0.7855251 (442.84 it/sec) -training >> step=9697800, episode=1617 reward=0.7874353 (474.82 it/sec) -training >> step=9697900, episode=1617 reward=0.8100654 (480.60 it/sec) -training >> step=9698000, episode=1617 reward=0.7698759 (411.14 it/sec) -training >> step=9698100, episode=1617 reward=0.8007234 (463.54 it/sec) -training >> step=9698200, episode=1617 reward=0.8165969 (473.39 it/sec) -training >> step=9698300, episode=1617 reward=0.7913574 (466.29 it/sec) -training >> step=9698400, episode=1617 reward=0.7757149 (472.83 it/sec) -training >> step=9698500, episode=1617 reward=0.7832112 (477.62 it/sec) -training >> step=9698600, episode=1617 reward=0.7793577 (463.94 it/sec) -training >> step=9698700, episode=1617 reward=0.7602584 (484.22 it/sec) -training >> step=9698800, episode=1617 reward=0.79625 (453.36 it/sec) -training >> step=9698900, episode=1617 reward=0.7831002 (456.12 it/sec) -training >> step=9699000, episode=1617 reward=0.7916107 (436.19 it/sec) -training >> step=9699100, episode=1617 reward=0.7860507 (447.21 it/sec) -training >> step=9699200, episode=1617 reward=0.7868789 (477.25 it/sec) -training >> step=9699300, episode=1617 reward=0.7968879 (470.37 it/sec) -training >> step=9699400, episode=1617 reward=0.790136 (440.36 it/sec) -training >> step=9699500, episode=1617 reward=0.792047 (443.82 it/sec) -training >> step=9699600, episode=1617 reward=0.8003668 (479.51 it/sec) -training >> step=9699700, episode=1617 reward=0.7872933 (495.89 it/sec) -training >> step=9699800, episode=1617 reward=0.7954289 (463.31 it/sec) -training >> step=9699900, episode=1617 reward=0.7992789 (474.20 it/sec) -training >> step=9700000, episode=1617 reward=0.7897437 (433.17 it/sec) -training >> step=9700100, episode=1617 reward=0.7956733 (449.31 it/sec) -training >> step=9700200, episode=1617 reward=0.780794 (430.52 it/sec) -training >> step=9700300, episode=1617 reward=0.7987232 (466.63 it/sec) -training >> step=9700400, episode=1617 reward=0.7900475 (458.72 it/sec) -training >> step=9700500, episode=1617 reward=0.7994087 (429.76 it/sec) -training >> step=9700600, episode=1617 reward=0.7983758 (431.46 it/sec) -training >> step=9700700, episode=1617 reward=0.7923772 (437.75 it/sec) -training >> step=9700800, episode=1617 reward=0.7888867 (476.20 it/sec) -training >> step=9700900, episode=1617 reward=0.7908226 (459.67 it/sec) -training >> step=9701000, episode=1617 reward=0.7933462 (350.46 it/sec) -training >> step=9701100, episode=1617 reward=0.7815328 (410.09 it/sec) -training >> step=9701200, episode=1617 reward=0.7854231 (462.93 it/sec) -training >> step=9701300, episode=1618 reward=0.7624501 (98.34 it/sec) -training >> step=9701400, episode=1618 reward=0.7847397 (478.31 it/sec) -training >> step=9701500, episode=1618 reward=0.780193 (481.58 it/sec) -training >> step=9701600, episode=1618 reward=0.7877291 (456.69 it/sec) -training >> step=9701700, episode=1618 reward=0.7838367 (478.45 it/sec) -training >> step=9701800, episode=1618 reward=0.7791027 (453.94 it/sec) -training >> step=9701900, episode=1618 reward=0.781575 (510.48 it/sec) -training >> step=9702000, episode=1618 reward=0.7976981 (448.51 it/sec) -training >> step=9702100, episode=1618 reward=0.8036613 (509.79 it/sec) -training >> step=9702200, episode=1618 reward=0.7968892 (468.36 it/sec) -training >> step=9702300, episode=1618 reward=0.7937163 (436.58 it/sec) -training >> step=9702400, episode=1618 reward=0.7867908 (402.12 it/sec) -training >> step=9702500, episode=1618 reward=0.7883424 (490.37 it/sec) -training >> step=9702600, episode=1618 reward=0.7947176 (446.96 it/sec) -training >> step=9702700, episode=1618 reward=0.8009469 (371.67 it/sec) -training >> step=9702800, episode=1618 reward=0.7846153 (401.69 it/sec) -training >> step=9702900, episode=1618 reward=0.7934884 (474.01 it/sec) -training >> step=9703000, episode=1618 reward=0.7877023 (457.78 it/sec) -training >> step=9703100, episode=1618 reward=0.7880556 (428.54 it/sec) -training >> step=9703200, episode=1618 reward=0.7980438 (471.48 it/sec) -training >> step=9703300, episode=1618 reward=0.7935713 (464.85 it/sec) -training >> step=9703400, episode=1618 reward=0.7903367 (384.48 it/sec) -training >> step=9703500, episode=1618 reward=0.8098177 (301.06 it/sec) -training >> step=9703600, episode=1618 reward=0.8136756 (468.20 it/sec) -training >> step=9703700, episode=1618 reward=0.7917201 (495.21 it/sec) -training >> step=9703800, episode=1618 reward=0.7989689 (419.45 it/sec) -training >> step=9703900, episode=1618 reward=0.7958294 (484.62 it/sec) -training >> step=9704000, episode=1618 reward=0.7906581 (517.80 it/sec) -training >> step=9704100, episode=1618 reward=0.7925059 (426.19 it/sec) -training >> step=9704200, episode=1618 reward=0.7927999 (442.09 it/sec) -training >> step=9704300, episode=1618 reward=0.7922998 (458.53 it/sec) -training >> step=9704400, episode=1618 reward=0.8031354 (424.52 it/sec) -training >> step=9704500, episode=1618 reward=0.7886522 (420.70 it/sec) -training >> step=9704600, episode=1618 reward=0.793923 (479.54 it/sec) -training >> step=9704700, episode=1618 reward=0.7820598 (486.68 it/sec) -training >> step=9704800, episode=1618 reward=0.7885843 (420.09 it/sec) -training >> step=9704900, episode=1618 reward=0.7879638 (365.29 it/sec) -training >> step=9705000, episode=1618 reward=0.7791319 (390.79 it/sec) -training >> step=9705100, episode=1618 reward=0.7923463 (431.85 it/sec) -training >> step=9705200, episode=1618 reward=0.7959775 (461.60 it/sec) -training >> step=9705300, episode=1618 reward=0.8041344 (445.54 it/sec) -training >> step=9705400, episode=1618 reward=0.7785961 (484.96 it/sec) -training >> step=9705500, episode=1618 reward=0.7834099 (494.77 it/sec) -training >> step=9705600, episode=1618 reward=0.7879416 (480.39 it/sec) -training >> step=9705700, episode=1618 reward=0.771084 (436.23 it/sec) -training >> step=9705800, episode=1618 reward=0.7974135 (472.18 it/sec) -training >> step=9705900, episode=1618 reward=0.7829492 (471.61 it/sec) -training >> step=9706000, episode=1618 reward=0.7745017 (452.62 it/sec) -training >> step=9706100, episode=1618 reward=0.7845066 (477.59 it/sec) -training >> step=9706200, episode=1618 reward=0.8154543 (522.92 it/sec) -training >> step=9706300, episode=1618 reward=0.7918225 (463.53 it/sec) -training >> step=9706400, episode=1618 reward=0.7894876 (497.94 it/sec) -training >> step=9706500, episode=1618 reward=0.7852173 (507.60 it/sec) -training >> step=9706600, episode=1618 reward=0.7967297 (477.73 it/sec) -training >> step=9706700, episode=1618 reward=0.8021447 (487.94 it/sec) -training >> step=9706800, episode=1618 reward=0.77887 (502.30 it/sec) -training >> step=9706900, episode=1618 reward=0.7893714 (502.43 it/sec) -training >> step=9707000, episode=1618 reward=0.8002006 (457.21 it/sec) -training >> step=9707100, episode=1618 reward=0.7945725 (497.66 it/sec) -training >> step=9707200, episode=1618 reward=0.7736297 (484.36 it/sec) -training >> step=9707300, episode=1619 reward=0.7974058 (109.22 it/sec) -training >> step=9707400, episode=1619 reward=0.7967278 (327.85 it/sec) -training >> step=9707500, episode=1619 reward=0.7825 (488.04 it/sec) -training >> step=9707600, episode=1619 reward=0.773364 (493.76 it/sec) -training >> step=9707700, episode=1619 reward=0.7813395 (451.65 it/sec) -training >> step=9707800, episode=1619 reward=0.7698188 (513.41 it/sec) -training >> step=9707900, episode=1619 reward=0.776335 (458.37 it/sec) -training >> step=9708000, episode=1619 reward=0.7837917 (497.12 it/sec) -training >> step=9708100, episode=1619 reward=0.8063436 (494.90 it/sec) -training >> step=9708200, episode=1619 reward=0.795269 (480.79 it/sec) -training >> step=9708300, episode=1619 reward=0.7863274 (445.91 it/sec) -training >> step=9708400, episode=1619 reward=0.7931972 (473.04 it/sec) -training >> step=9708500, episode=1619 reward=0.7956062 (453.46 it/sec) -training >> step=9708600, episode=1619 reward=0.7851619 (518.63 it/sec) -training >> step=9708700, episode=1619 reward=0.7810225 (483.26 it/sec) -training >> step=9708800, episode=1619 reward=0.7909691 (449.76 it/sec) -training >> step=9708900, episode=1619 reward=0.7963496 (444.83 it/sec) -training >> step=9709000, episode=1619 reward=0.7758756 (475.65 it/sec) -training >> step=9709100, episode=1619 reward=0.7781237 (441.80 it/sec) -training >> step=9709200, episode=1619 reward=0.7767781 (450.40 it/sec) -training >> step=9709300, episode=1619 reward=0.784948 (465.03 it/sec) -training >> step=9709400, episode=1619 reward=0.7920657 (427.06 it/sec) -training >> step=9709500, episode=1619 reward=0.7787371 (487.72 it/sec) -training >> step=9709600, episode=1619 reward=0.7884027 (475.50 it/sec) -training >> step=9709700, episode=1619 reward=0.7964841 (503.93 it/sec) -training >> step=9709800, episode=1619 reward=0.7804327 (368.64 it/sec) -training >> step=9709900, episode=1619 reward=0.7774801 (467.58 it/sec) -training >> step=9710000, episode=1619 reward=0.7889009 (496.76 it/sec) -training >> step=9710100, episode=1619 reward=0.8082415 (446.42 it/sec) -training >> step=9710200, episode=1619 reward=0.7948858 (480.69 it/sec) -training >> step=9710300, episode=1619 reward=0.7840808 (423.57 it/sec) -training >> step=9710400, episode=1619 reward=0.7979929 (470.94 it/sec) -training >> step=9710500, episode=1619 reward=0.7986903 (455.42 it/sec) -training >> step=9710600, episode=1619 reward=0.8089519 (479.44 it/sec) -training >> step=9710700, episode=1619 reward=0.779098 (496.14 it/sec) -training >> step=9710800, episode=1619 reward=0.7902958 (407.63 it/sec) -training >> step=9710900, episode=1619 reward=0.8125345 (514.42 it/sec) -training >> step=9711000, episode=1619 reward=0.7982205 (482.12 it/sec) -training >> step=9711100, episode=1619 reward=0.7885005 (415.43 it/sec) -training >> step=9711200, episode=1619 reward=0.782581 (490.38 it/sec) -training >> step=9711300, episode=1619 reward=0.7715598 (438.24 it/sec) -training >> step=9711400, episode=1619 reward=0.7935232 (530.68 it/sec) -training >> step=9711500, episode=1619 reward=0.7984216 (499.99 it/sec) -training >> step=9711600, episode=1619 reward=0.792625 (482.77 it/sec) -training >> step=9711700, episode=1619 reward=0.7718865 (459.75 it/sec) -training >> step=9711800, episode=1619 reward=0.7983196 (418.39 it/sec) -training >> step=9711900, episode=1619 reward=0.7871041 (455.05 it/sec) -training >> step=9712000, episode=1619 reward=0.7867144 (455.01 it/sec) -training >> step=9712100, episode=1619 reward=0.7751245 (435.83 it/sec) -training >> step=9712200, episode=1619 reward=0.7801712 (462.71 it/sec) -training >> step=9712300, episode=1619 reward=0.78367 (500.60 it/sec) -training >> step=9712400, episode=1619 reward=0.7840357 (511.54 it/sec) -training >> step=9712500, episode=1619 reward=0.7793353 (468.99 it/sec) -training >> step=9712600, episode=1619 reward=0.79691 (429.37 it/sec) -training >> step=9712700, episode=1619 reward=0.793578 (431.26 it/sec) -training >> step=9712800, episode=1619 reward=0.7581225 (515.73 it/sec) -training >> step=9712900, episode=1619 reward=0.7826862 (476.57 it/sec) -training >> step=9713000, episode=1619 reward=0.8039176 (441.71 it/sec) -training >> step=9713100, episode=1619 reward=0.7858447 (430.38 it/sec) -training >> step=9713200, episode=1619 reward=0.7947026 (431.33 it/sec) -training >> step=9713300, episode=1620 reward=0.7870047 (68.08 it/sec) -training >> step=9713400, episode=1620 reward=0.7809359 (436.91 it/sec) -training >> step=9713500, episode=1620 reward=0.7895004 (411.28 it/sec) -training >> step=9713600, episode=1620 reward=0.7920642 (499.05 it/sec) -training >> step=9713700, episode=1620 reward=0.7894751 (472.15 it/sec) -training >> step=9713800, episode=1620 reward=0.7927739 (461.43 it/sec) -training >> step=9713900, episode=1620 reward=0.7966366 (419.38 it/sec) -training >> step=9714000, episode=1620 reward=0.7903948 (400.62 it/sec) -training >> step=9714100, episode=1620 reward=0.7808747 (388.35 it/sec) -training >> step=9714200, episode=1620 reward=0.7802824 (442.26 it/sec) -training >> step=9714300, episode=1620 reward=0.7925528 (459.87 it/sec) -training >> step=9714400, episode=1620 reward=0.7883554 (481.29 it/sec) -training >> step=9714500, episode=1620 reward=0.7878374 (501.02 it/sec) -training >> step=9714600, episode=1620 reward=0.7886243 (460.46 it/sec) -training >> step=9714700, episode=1620 reward=0.7809719 (495.14 it/sec) -training >> step=9714800, episode=1620 reward=0.8035852 (523.57 it/sec) -training >> step=9714900, episode=1620 reward=0.7890932 (467.17 it/sec) -training >> step=9715000, episode=1620 reward=0.7893489 (489.59 it/sec) -training >> step=9715100, episode=1620 reward=0.8070864 (502.90 it/sec) -training >> step=9715200, episode=1620 reward=0.7978907 (471.25 it/sec) -training >> step=9715300, episode=1620 reward=0.791212 (500.03 it/sec) -training >> step=9715400, episode=1620 reward=0.776878 (432.33 it/sec) -training >> step=9715500, episode=1620 reward=0.8070546 (510.73 it/sec) -training >> step=9715600, episode=1620 reward=0.7852445 (494.58 it/sec) -training >> step=9715700, episode=1620 reward=0.7797112 (484.26 it/sec) -training >> step=9715800, episode=1620 reward=0.789778 (491.61 it/sec) -training >> step=9715900, episode=1620 reward=0.7740844 (472.21 it/sec) -training >> step=9716000, episode=1620 reward=0.8008307 (462.64 it/sec) -training >> step=9716100, episode=1620 reward=0.803706 (329.14 it/sec) -training >> step=9716200, episode=1620 reward=0.8008012 (517.26 it/sec) -training >> step=9716300, episode=1620 reward=0.7596288 (449.55 it/sec) -training >> step=9716400, episode=1620 reward=0.7845606 (412.43 it/sec) -training >> step=9716500, episode=1620 reward=0.7879928 (428.42 it/sec) -training >> step=9716600, episode=1620 reward=0.7942953 (435.05 it/sec) -training >> step=9716700, episode=1620 reward=0.7771224 (443.25 it/sec) -training >> step=9716800, episode=1620 reward=0.7904372 (436.83 it/sec) -training >> step=9716900, episode=1620 reward=0.7976456 (442.29 it/sec) -training >> step=9717000, episode=1620 reward=0.8106051 (475.89 it/sec) -training >> step=9717100, episode=1620 reward=0.8225662 (440.54 it/sec) -training >> step=9717200, episode=1620 reward=0.7816194 (423.16 it/sec) -training >> step=9717300, episode=1620 reward=0.8004457 (438.84 it/sec) -training >> step=9717400, episode=1620 reward=0.7964457 (459.00 it/sec) -training >> step=9717500, episode=1620 reward=0.7909665 (345.31 it/sec) -training >> step=9717600, episode=1620 reward=0.7785259 (380.50 it/sec) -training >> step=9717700, episode=1620 reward=0.7893597 (422.57 it/sec) -training >> step=9717800, episode=1620 reward=0.7879317 (429.58 it/sec) -training >> step=9717900, episode=1620 reward=0.7863784 (458.52 it/sec) -training >> step=9718000, episode=1620 reward=0.784653 (463.55 it/sec) -training >> step=9718100, episode=1620 reward=0.8173668 (377.10 it/sec) -training >> step=9718200, episode=1620 reward=0.7900691 (444.05 it/sec) -training >> step=9718300, episode=1620 reward=0.7933105 (470.85 it/sec) -training >> step=9718400, episode=1620 reward=0.7867785 (478.70 it/sec) -training >> step=9718500, episode=1620 reward=0.7792873 (493.91 it/sec) -training >> step=9718600, episode=1620 reward=0.809745 (433.04 it/sec) -training >> step=9718700, episode=1620 reward=0.7789698 (452.34 it/sec) -training >> step=9718800, episode=1620 reward=0.7909709 (468.37 it/sec) -training >> step=9718900, episode=1620 reward=0.7808613 (451.20 it/sec) -training >> step=9719000, episode=1620 reward=0.7853549 (437.87 it/sec) -training >> step=9719100, episode=1620 reward=0.7719539 (452.86 it/sec) -training >> step=9719200, episode=1620 reward=0.7937832 (417.60 it/sec) -training >> step=9719300, episode=1621 reward=0.7807261 (53.03 it/sec) -training >> step=9719400, episode=1621 reward=0.7858187 (434.89 it/sec) -training >> step=9719500, episode=1621 reward=0.7861341 (458.51 it/sec) -training >> step=9719600, episode=1621 reward=0.7913051 (461.17 it/sec) -training >> step=9719700, episode=1621 reward=0.7792346 (431.74 it/sec) -training >> step=9719800, episode=1621 reward=0.8003218 (429.71 it/sec) -training >> step=9719900, episode=1621 reward=0.8037382 (467.41 it/sec) -training >> step=9720000, episode=1621 reward=0.776951 (466.59 it/sec) -training >> step=9720100, episode=1621 reward=0.8016808 (430.01 it/sec) -training >> step=9720200, episode=1621 reward=0.8103403 (385.63 it/sec) -training >> step=9720300, episode=1621 reward=0.7807216 (400.03 it/sec) -training >> step=9720400, episode=1621 reward=0.7835501 (396.65 it/sec) -training >> step=9720500, episode=1621 reward=0.7712312 (442.14 it/sec) -training >> step=9720600, episode=1621 reward=0.8051215 (440.00 it/sec) -training >> step=9720700, episode=1621 reward=0.796155 (461.54 it/sec) -training >> step=9720800, episode=1621 reward=0.8073223 (451.45 it/sec) -training >> step=9720900, episode=1621 reward=0.8001328 (423.69 it/sec) -training >> step=9721000, episode=1621 reward=0.7885849 (363.21 it/sec) -training >> step=9721100, episode=1621 reward=0.7908161 (421.87 it/sec) -training >> step=9721200, episode=1621 reward=0.7878448 (371.68 it/sec) -training >> step=9721300, episode=1621 reward=0.8134602 (431.92 it/sec) -training >> step=9721400, episode=1621 reward=0.7911397 (373.20 it/sec) -training >> step=9721500, episode=1621 reward=0.8010062 (421.12 it/sec) -training >> step=9721600, episode=1621 reward=0.8037122 (431.59 it/sec) -training >> step=9721700, episode=1621 reward=0.806293 (398.72 it/sec) -training >> step=9721800, episode=1621 reward=0.789416 (403.26 it/sec) -training >> step=9721900, episode=1621 reward=0.7850605 (439.68 it/sec) -training >> step=9722000, episode=1621 reward=0.8142376 (464.85 it/sec) -training >> step=9722100, episode=1621 reward=0.7810935 (438.98 it/sec) -training >> step=9722200, episode=1621 reward=0.7993625 (331.56 it/sec) -training >> step=9722300, episode=1621 reward=0.7757019 (483.54 it/sec) -training >> step=9722400, episode=1621 reward=0.8165076 (453.15 it/sec) -training >> step=9722500, episode=1621 reward=0.7907051 (413.15 it/sec) -training >> step=9722600, episode=1621 reward=0.7906778 (430.83 it/sec) -training >> step=9722700, episode=1621 reward=0.7903178 (455.52 it/sec) -training >> step=9722800, episode=1621 reward=0.8187289 (407.11 it/sec) -training >> step=9722900, episode=1621 reward=0.81012 (451.12 it/sec) -training >> step=9723000, episode=1621 reward=0.7916274 (430.45 it/sec) -training >> step=9723100, episode=1621 reward=0.8003022 (489.27 it/sec) -training >> step=9723200, episode=1621 reward=0.8024414 (396.59 it/sec) -training >> step=9723300, episode=1621 reward=0.7966328 (426.80 it/sec) -training >> step=9723400, episode=1621 reward=0.7828585 (460.64 it/sec) -training >> step=9723500, episode=1621 reward=0.7850609 (422.24 it/sec) -training >> step=9723600, episode=1621 reward=0.7869778 (437.53 it/sec) -training >> step=9723700, episode=1621 reward=0.773904 (432.86 it/sec) -training >> step=9723800, episode=1621 reward=0.7904274 (465.06 it/sec) -training >> step=9723900, episode=1621 reward=0.7737184 (427.81 it/sec) -training >> step=9724000, episode=1621 reward=0.7902745 (413.12 it/sec) -training >> step=9724100, episode=1621 reward=0.7866986 (365.77 it/sec) -training >> step=9724200, episode=1621 reward=0.7651803 (460.97 it/sec) -training >> step=9724300, episode=1621 reward=0.7860206 (459.38 it/sec) -training >> step=9724400, episode=1621 reward=0.8048043 (433.07 it/sec) -training >> step=9724500, episode=1621 reward=0.7976849 (436.87 it/sec) -training >> step=9724600, episode=1621 reward=0.7849277 (459.97 it/sec) -training >> step=9724700, episode=1621 reward=0.7945423 (450.69 it/sec) -training >> step=9724800, episode=1621 reward=0.8070939 (462.84 it/sec) -training >> step=9724900, episode=1621 reward=0.7900991 (477.66 it/sec) -training >> step=9725000, episode=1621 reward=0.7770629 (406.86 it/sec) -training >> step=9725100, episode=1621 reward=0.7882865 (454.16 it/sec) -training >> step=9725200, episode=1621 reward=0.7881462 (481.37 it/sec) -training >> step=9725300, episode=1622 reward=0.7928339 (109.18 it/sec) -training >> step=9725400, episode=1622 reward=0.7937515 (454.07 it/sec) -training >> step=9725500, episode=1622 reward=0.7850163 (460.75 it/sec) -training >> step=9725600, episode=1622 reward=0.7915831 (473.48 it/sec) -training >> step=9725700, episode=1622 reward=0.8003564 (392.23 it/sec) -training >> step=9725800, episode=1622 reward=0.7748814 (408.48 it/sec) -training >> step=9725900, episode=1622 reward=0.7969993 (393.61 it/sec) -training >> step=9726000, episode=1622 reward=0.7802486 (437.84 it/sec) -training >> step=9726100, episode=1622 reward=0.7740725 (434.39 it/sec) -training >> step=9726200, episode=1622 reward=0.7951304 (435.76 it/sec) -training >> step=9726300, episode=1622 reward=0.7924335 (437.78 it/sec) -training >> step=9726400, episode=1622 reward=0.7916118 (452.41 it/sec) -training >> step=9726500, episode=1622 reward=0.7910007 (455.19 it/sec) -training >> step=9726600, episode=1622 reward=0.7868727 (402.35 it/sec) -training >> step=9726700, episode=1622 reward=0.8202627 (421.07 it/sec) -training >> step=9726800, episode=1622 reward=0.8019478 (460.72 it/sec) -training >> step=9726900, episode=1622 reward=0.7987831 (448.04 it/sec) -training >> step=9727000, episode=1622 reward=0.8061507 (448.23 it/sec) -training >> step=9727100, episode=1622 reward=0.7976311 (449.37 it/sec) -training >> step=9727200, episode=1622 reward=0.786409 (463.26 it/sec) -training >> step=9727300, episode=1622 reward=0.7906611 (434.09 it/sec) -training >> step=9727400, episode=1622 reward=0.7992221 (438.91 it/sec) -training >> step=9727500, episode=1622 reward=0.7934099 (432.98 it/sec) -training >> step=9727600, episode=1622 reward=0.8017074 (477.92 it/sec) -training >> step=9727700, episode=1622 reward=0.7911708 (465.37 it/sec) -training >> step=9727800, episode=1622 reward=0.8029429 (459.28 it/sec) -training >> step=9727900, episode=1622 reward=0.7943698 (464.29 it/sec) -training >> step=9728000, episode=1622 reward=0.8024607 (421.16 it/sec) -training >> step=9728100, episode=1622 reward=0.7805671 (378.66 it/sec) -training >> step=9728200, episode=1622 reward=0.803144 (320.44 it/sec) -training >> step=9728300, episode=1622 reward=0.7764216 (392.12 it/sec) -training >> step=9728400, episode=1622 reward=0.7742717 (305.35 it/sec) -training >> step=9728500, episode=1622 reward=0.7907612 (412.62 it/sec) -training >> step=9728600, episode=1622 reward=0.7952407 (445.11 it/sec) -training >> step=9728700, episode=1622 reward=0.7903902 (436.31 it/sec) -training >> step=9728800, episode=1622 reward=0.7827622 (437.83 it/sec) -training >> step=9728900, episode=1622 reward=0.7860302 (443.76 it/sec) -training >> step=9729000, episode=1622 reward=0.7936658 (467.99 it/sec) -training >> step=9729100, episode=1622 reward=0.7852361 (472.36 it/sec) -training >> step=9729200, episode=1622 reward=0.7979584 (415.75 it/sec) -training >> step=9729300, episode=1622 reward=0.7905186 (447.43 it/sec) -training >> step=9729400, episode=1622 reward=0.8156867 (492.92 it/sec) -training >> step=9729500, episode=1622 reward=0.7929675 (461.90 it/sec) -training >> step=9729600, episode=1622 reward=0.7792533 (457.70 it/sec) -training >> step=9729700, episode=1622 reward=0.7790544 (437.71 it/sec) -training >> step=9729800, episode=1622 reward=0.7766465 (443.15 it/sec) -training >> step=9729900, episode=1622 reward=0.7969534 (461.60 it/sec) -training >> step=9730000, episode=1622 reward=0.7780271 (447.12 it/sec) -training >> step=9730100, episode=1622 reward=0.7769306 (486.92 it/sec) -training >> step=9730200, episode=1622 reward=0.7937991 (483.83 it/sec) -training >> step=9730300, episode=1622 reward=0.7939243 (458.31 it/sec) -training >> step=9730400, episode=1622 reward=0.7671078 (445.45 it/sec) -training >> step=9730500, episode=1622 reward=0.7805729 (490.22 it/sec) -training >> step=9730600, episode=1622 reward=0.7891293 (446.36 it/sec) -training >> step=9730700, episode=1622 reward=0.762399 (441.73 it/sec) -training >> step=9730800, episode=1622 reward=0.7897755 (452.17 it/sec) -training >> step=9730900, episode=1622 reward=0.7744287 (450.43 it/sec) -training >> step=9731000, episode=1622 reward=0.801239 (481.60 it/sec) -training >> step=9731100, episode=1622 reward=0.777211 (451.44 it/sec) -training >> step=9731200, episode=1622 reward=0.777487 (455.49 it/sec) -training >> step=9731300, episode=1623 reward=0.7924904 (45.93 it/sec) -training >> step=9731400, episode=1623 reward=0.7805313 (439.14 it/sec) -training >> step=9731500, episode=1623 reward=0.7846456 (462.50 it/sec) -training >> step=9731600, episode=1623 reward=0.7796674 (460.51 it/sec) -training >> step=9731700, episode=1623 reward=0.7998777 (472.57 it/sec) -training >> step=9731800, episode=1623 reward=0.8092306 (464.45 it/sec) -training >> step=9731900, episode=1623 reward=0.7764543 (452.75 it/sec) -training >> step=9732000, episode=1623 reward=0.7989221 (499.98 it/sec) -training >> step=9732100, episode=1623 reward=0.7980271 (448.57 it/sec) -training >> step=9732200, episode=1623 reward=0.8022375 (464.90 it/sec) -training >> step=9732300, episode=1623 reward=0.8107154 (434.54 it/sec) -training >> step=9732400, episode=1623 reward=0.8079289 (488.78 it/sec) -training >> step=9732500, episode=1623 reward=0.8007272 (474.65 it/sec) -training >> step=9732600, episode=1623 reward=0.7869989 (465.58 it/sec) -training >> step=9732700, episode=1623 reward=0.8040192 (464.89 it/sec) -training >> step=9732800, episode=1623 reward=0.7957515 (491.39 it/sec) -training >> step=9732900, episode=1623 reward=0.7872746 (478.36 it/sec) -training >> step=9733000, episode=1623 reward=0.7909193 (481.32 it/sec) -training >> step=9733100, episode=1623 reward=0.7876087 (443.03 it/sec) -training >> step=9733200, episode=1623 reward=0.7926261 (432.17 it/sec) -training >> step=9733300, episode=1623 reward=0.7820122 (427.10 it/sec) -training >> step=9733400, episode=1623 reward=0.796316 (418.70 it/sec) -training >> step=9733500, episode=1623 reward=0.7887201 (474.44 it/sec) -training >> step=9733600, episode=1623 reward=0.7989219 (450.90 it/sec) -training >> step=9733700, episode=1623 reward=0.7842829 (443.98 it/sec) -training >> step=9733800, episode=1623 reward=0.7856781 (471.53 it/sec) -training >> step=9733900, episode=1623 reward=0.8029962 (484.10 it/sec) -training >> step=9734000, episode=1623 reward=0.7849586 (449.20 it/sec) -training >> step=9734100, episode=1623 reward=0.7762094 (426.11 it/sec) -training >> step=9734200, episode=1623 reward=0.7918743 (505.09 it/sec) -training >> step=9734300, episode=1623 reward=0.8027436 (501.55 it/sec) -training >> step=9734400, episode=1623 reward=0.7868161 (467.98 it/sec) -training >> step=9734500, episode=1623 reward=0.79132 (483.32 it/sec) -training >> step=9734600, episode=1623 reward=0.7922693 (359.29 it/sec) -training >> step=9734700, episode=1623 reward=0.7986976 (429.79 it/sec) -training >> step=9734800, episode=1623 reward=0.8022125 (472.75 it/sec) -training >> step=9734900, episode=1623 reward=0.7970822 (521.42 it/sec) -training >> step=9735000, episode=1623 reward=0.7839787 (441.62 it/sec) -training >> step=9735100, episode=1623 reward=0.8072644 (457.02 it/sec) -training >> step=9735200, episode=1623 reward=0.7804326 (484.88 it/sec) -training >> step=9735300, episode=1623 reward=0.7934991 (457.61 it/sec) -training >> step=9735400, episode=1623 reward=0.7804765 (419.90 it/sec) -training >> step=9735500, episode=1623 reward=0.784665 (449.34 it/sec) -training >> step=9735600, episode=1623 reward=0.7972385 (461.58 it/sec) -training >> step=9735700, episode=1623 reward=0.7816762 (464.72 it/sec) -training >> step=9735800, episode=1623 reward=0.782783 (502.76 it/sec) -training >> step=9735900, episode=1623 reward=0.7789412 (479.79 it/sec) -training >> step=9736000, episode=1623 reward=0.7785137 (481.97 it/sec) -training >> step=9736100, episode=1623 reward=0.7924783 (472.98 it/sec) -training >> step=9736200, episode=1623 reward=0.7964666 (456.26 it/sec) -training >> step=9736300, episode=1623 reward=0.7791198 (494.42 it/sec) -training >> step=9736400, episode=1623 reward=0.7953334 (479.97 it/sec) -training >> step=9736500, episode=1623 reward=0.7892396 (498.94 it/sec) -training >> step=9736600, episode=1623 reward=0.77452 (471.27 it/sec) -training >> step=9736700, episode=1623 reward=0.783309 (463.25 it/sec) -training >> step=9736800, episode=1623 reward=0.8015532 (460.63 it/sec) -training >> step=9736900, episode=1623 reward=0.7746786 (446.36 it/sec) -training >> step=9737000, episode=1623 reward=0.7836183 (472.59 it/sec) -training >> step=9737100, episode=1623 reward=0.7955244 (456.78 it/sec) -training >> step=9737200, episode=1623 reward=0.7689478 (474.62 it/sec) -training >> step=9737300, episode=1624 reward=0.7872825 (34.69 it/sec) -training >> step=9737400, episode=1624 reward=0.7810815 (459.73 it/sec) -training >> step=9737500, episode=1624 reward=0.7768521 (449.77 it/sec) -training >> step=9737600, episode=1624 reward=0.785205 (453.77 it/sec) -training >> step=9737700, episode=1624 reward=0.7758836 (474.13 it/sec) -training >> step=9737800, episode=1624 reward=0.7898026 (473.26 it/sec) -training >> step=9737900, episode=1624 reward=0.7802991 (474.27 it/sec) -training >> step=9738000, episode=1624 reward=0.7706509 (498.19 it/sec) -training >> step=9738100, episode=1624 reward=0.8077174 (451.80 it/sec) -training >> step=9738200, episode=1624 reward=0.781404 (474.19 it/sec) -training >> step=9738300, episode=1624 reward=0.7858303 (463.88 it/sec) -training >> step=9738400, episode=1624 reward=0.790081 (502.75 it/sec) -training >> step=9738500, episode=1624 reward=0.7834044 (467.18 it/sec) -training >> step=9738600, episode=1624 reward=0.790846 (478.14 it/sec) -training >> step=9738700, episode=1624 reward=0.7847067 (468.00 it/sec) -training >> step=9738800, episode=1624 reward=0.807524 (487.02 it/sec) -training >> step=9738900, episode=1624 reward=0.7883091 (487.81 it/sec) -training >> step=9739000, episode=1624 reward=0.7928932 (416.05 it/sec) -training >> step=9739100, episode=1624 reward=0.7935321 (486.02 it/sec) -training >> step=9739200, episode=1624 reward=0.7975382 (483.37 it/sec) -training >> step=9739300, episode=1624 reward=0.7848876 (427.22 it/sec) -training >> step=9739400, episode=1624 reward=0.7928394 (487.07 it/sec) -training >> step=9739500, episode=1624 reward=0.787639 (475.29 it/sec) -training >> step=9739600, episode=1624 reward=0.7857648 (445.30 it/sec) -training >> step=9739700, episode=1624 reward=0.8128323 (465.71 it/sec) -training >> step=9739800, episode=1624 reward=0.7892179 (490.95 it/sec) -training >> step=9739900, episode=1624 reward=0.7913838 (501.70 it/sec) -training >> step=9740000, episode=1624 reward=0.7884398 (454.72 it/sec) -training >> step=9740100, episode=1624 reward=0.7896262 (491.87 it/sec) -training >> step=9740200, episode=1624 reward=0.7864246 (448.85 it/sec) -training >> step=9740300, episode=1624 reward=0.7943102 (475.47 it/sec) -training >> step=9740400, episode=1624 reward=0.8005834 (467.66 it/sec) -training >> step=9740500, episode=1624 reward=0.8114892 (464.45 it/sec) -training >> step=9740600, episode=1624 reward=0.7953462 (500.28 it/sec) -training >> step=9740700, episode=1624 reward=0.7900406 (463.31 it/sec) -training >> step=9740800, episode=1624 reward=0.8022029 (399.90 it/sec) -training >> step=9740900, episode=1624 reward=0.7733564 (359.74 it/sec) -training >> step=9741000, episode=1624 reward=0.7983343 (463.28 it/sec) -training >> step=9741100, episode=1624 reward=0.7882103 (444.81 it/sec) -training >> step=9741200, episode=1624 reward=0.793623 (392.49 it/sec) -training >> step=9741300, episode=1624 reward=0.7928995 (426.68 it/sec) -training >> step=9741400, episode=1624 reward=0.790269 (433.37 it/sec) -training >> step=9741500, episode=1624 reward=0.7736018 (462.18 it/sec) -training >> step=9741600, episode=1624 reward=0.7882234 (489.62 it/sec) -training >> step=9741700, episode=1624 reward=0.7784306 (480.09 it/sec) -training >> step=9741800, episode=1624 reward=0.7837481 (458.97 it/sec) -training >> step=9741900, episode=1624 reward=0.795178 (447.86 it/sec) -training >> step=9742000, episode=1624 reward=0.7924833 (472.23 it/sec) -training >> step=9742100, episode=1624 reward=0.7754971 (505.69 it/sec) -training >> step=9742200, episode=1624 reward=0.7703404 (468.73 it/sec) -training >> step=9742300, episode=1624 reward=0.7717755 (473.09 it/sec) -training >> step=9742400, episode=1624 reward=0.7888809 (439.58 it/sec) -training >> step=9742500, episode=1624 reward=0.7972956 (470.79 it/sec) -training >> step=9742600, episode=1624 reward=0.779829 (495.42 it/sec) -training >> step=9742700, episode=1624 reward=0.7990793 (487.41 it/sec) -training >> step=9742800, episode=1624 reward=0.7687991 (484.18 it/sec) -training >> step=9742900, episode=1624 reward=0.7896059 (463.76 it/sec) -training >> step=9743000, episode=1624 reward=0.8048505 (431.37 it/sec) -training >> step=9743100, episode=1624 reward=0.780646 (498.19 it/sec) -training >> step=9743200, episode=1624 reward=0.7947009 (468.56 it/sec) -training >> step=9743300, episode=1625 reward=0.7824457 (41.99 it/sec) -training >> step=9743400, episode=1625 reward=0.7914402 (483.91 it/sec) -training >> step=9743500, episode=1625 reward=0.7859917 (498.33 it/sec) -training >> step=9743600, episode=1625 reward=0.7824219 (448.31 it/sec) -training >> step=9743700, episode=1625 reward=0.7851765 (487.11 it/sec) -training >> step=9743800, episode=1625 reward=0.8284127 (481.98 it/sec) -training >> step=9743900, episode=1625 reward=0.7978694 (488.45 it/sec) -training >> step=9744000, episode=1625 reward=0.8000512 (473.39 it/sec) -training >> step=9744100, episode=1625 reward=0.8021349 (464.21 it/sec) -training >> step=9744200, episode=1625 reward=0.7821087 (427.61 it/sec) -training >> step=9744300, episode=1625 reward=0.7847755 (482.70 it/sec) -training >> step=9744400, episode=1625 reward=0.795809 (536.21 it/sec) -training >> step=9744500, episode=1625 reward=0.7809644 (483.99 it/sec) -training >> step=9744600, episode=1625 reward=0.7811196 (435.93 it/sec) -training >> step=9744700, episode=1625 reward=0.7980289 (485.16 it/sec) -training >> step=9744800, episode=1625 reward=0.7987731 (482.16 it/sec) -training >> step=9744900, episode=1625 reward=0.783333 (509.29 it/sec) -training >> step=9745000, episode=1625 reward=0.8058395 (489.71 it/sec) -training >> step=9745100, episode=1625 reward=0.7856975 (506.85 it/sec) -training >> step=9745200, episode=1625 reward=0.8065962 (450.86 it/sec) -training >> step=9745300, episode=1625 reward=0.801353 (455.94 it/sec) -training >> step=9745400, episode=1625 reward=0.7920059 (493.36 it/sec) -training >> step=9745500, episode=1625 reward=0.8044896 (490.19 it/sec) -training >> step=9745600, episode=1625 reward=0.7904178 (482.17 it/sec) -training >> step=9745700, episode=1625 reward=0.7793543 (476.63 it/sec) -training >> step=9745800, episode=1625 reward=0.7824057 (483.80 it/sec) -training >> step=9745900, episode=1625 reward=0.8223924 (460.53 it/sec) -training >> step=9746000, episode=1625 reward=0.7802961 (462.47 it/sec) -training >> step=9746100, episode=1625 reward=0.7812082 (495.33 it/sec) -training >> step=9746200, episode=1625 reward=0.7942853 (486.04 it/sec) -training >> step=9746300, episode=1625 reward=0.7579497 (489.40 it/sec) -training >> step=9746400, episode=1625 reward=0.7775791 (465.53 it/sec) -training >> step=9746500, episode=1625 reward=0.803162 (513.03 it/sec) -training >> step=9746600, episode=1625 reward=0.797731 (501.76 it/sec) -training >> step=9746700, episode=1625 reward=0.7935151 (459.86 it/sec) -training >> step=9746800, episode=1625 reward=0.7922928 (457.94 it/sec) -training >> step=9746900, episode=1625 reward=0.7925217 (470.30 it/sec) -training >> step=9747000, episode=1625 reward=0.8052797 (372.51 it/sec) -training >> step=9747100, episode=1625 reward=0.7845549 (451.78 it/sec) -training >> step=9747200, episode=1625 reward=0.7945048 (449.30 it/sec) -training >> step=9747300, episode=1625 reward=0.8187516 (468.56 it/sec) -training >> step=9747400, episode=1625 reward=0.7933098 (479.26 it/sec) -training >> step=9747500, episode=1625 reward=0.7815094 (459.38 it/sec) -training >> step=9747600, episode=1625 reward=0.7771896 (483.04 it/sec) -training >> step=9747700, episode=1625 reward=0.7664927 (451.74 it/sec) -training >> step=9747800, episode=1625 reward=0.7929438 (512.49 it/sec) -training >> step=9747900, episode=1625 reward=0.7959476 (485.99 it/sec) -training >> step=9748000, episode=1625 reward=0.7986948 (513.83 it/sec) -training >> step=9748100, episode=1625 reward=0.7881819 (494.31 it/sec) -training >> step=9748200, episode=1625 reward=0.795018 (463.10 it/sec) -training >> step=9748300, episode=1625 reward=0.8023077 (469.55 it/sec) -training >> step=9748400, episode=1625 reward=0.7738633 (428.21 it/sec) -training >> step=9748500, episode=1625 reward=0.7867505 (481.22 it/sec) -training >> step=9748600, episode=1625 reward=0.8187422 (464.68 it/sec) -training >> step=9748700, episode=1625 reward=0.7916665 (471.45 it/sec) -training >> step=9748800, episode=1625 reward=0.7873554 (492.99 it/sec) -training >> step=9748900, episode=1625 reward=0.7858022 (512.01 it/sec) -training >> step=9749000, episode=1625 reward=0.7954917 (492.64 it/sec) -training >> step=9749100, episode=1625 reward=0.7821129 (455.94 it/sec) -training >> step=9749200, episode=1625 reward=0.7839854 (491.96 it/sec) -training >> step=9749300, episode=1626 reward=0.7872443 (38.10 it/sec) -training >> step=9749400, episode=1626 reward=0.7996356 (463.22 it/sec) -training >> step=9749500, episode=1626 reward=0.7865481 (445.60 it/sec) -training >> step=9749600, episode=1626 reward=0.7814564 (453.04 it/sec) -training >> step=9749700, episode=1626 reward=0.7927644 (493.26 it/sec) -training >> step=9749800, episode=1626 reward=0.7787131 (469.19 it/sec) -training >> step=9749900, episode=1626 reward=0.8039548 (486.01 it/sec) -training >> step=9750000, episode=1626 reward=0.8046946 (488.60 it/sec) -training >> step=9750100, episode=1626 reward=0.7761957 (498.70 it/sec) -training >> step=9750200, episode=1626 reward=0.7888371 (433.91 it/sec) -training >> step=9750300, episode=1626 reward=0.791617 (474.47 it/sec) -training >> step=9750400, episode=1626 reward=0.7897302 (449.64 it/sec) -training >> step=9750500, episode=1626 reward=0.787771 (434.48 it/sec) -training >> step=9750600, episode=1626 reward=0.7929162 (464.25 it/sec) -training >> step=9750700, episode=1626 reward=0.7837256 (469.38 it/sec) -training >> step=9750800, episode=1626 reward=0.7993065 (501.98 it/sec) -training >> step=9750900, episode=1626 reward=0.7783948 (426.66 it/sec) -training >> step=9751000, episode=1626 reward=0.7947382 (501.42 it/sec) -training >> step=9751100, episode=1626 reward=0.7920447 (493.59 it/sec) -training >> step=9751200, episode=1626 reward=0.7839945 (498.23 it/sec) -training >> step=9751300, episode=1626 reward=0.7929901 (476.22 it/sec) -training >> step=9751400, episode=1626 reward=0.7924083 (462.02 it/sec) -training >> step=9751500, episode=1626 reward=0.8004336 (505.29 it/sec) -training >> step=9751600, episode=1626 reward=0.8019293 (459.51 it/sec) -training >> step=9751700, episode=1626 reward=0.7971699 (463.96 it/sec) -training >> step=9751800, episode=1626 reward=0.7978919 (466.09 it/sec) -training >> step=9751900, episode=1626 reward=0.7875865 (456.55 it/sec) -training >> step=9752000, episode=1626 reward=0.7957751 (488.26 it/sec) -training >> step=9752100, episode=1626 reward=0.7896613 (422.03 it/sec) -training >> step=9752200, episode=1626 reward=0.7832855 (492.77 it/sec) -training >> step=9752300, episode=1626 reward=0.7999355 (493.33 it/sec) -training >> step=9752400, episode=1626 reward=0.768535 (461.84 it/sec) -training >> step=9752500, episode=1626 reward=0.7799439 (458.72 it/sec) -training >> step=9752600, episode=1626 reward=0.7865527 (491.69 it/sec) -training >> step=9752700, episode=1626 reward=0.798305 (489.91 it/sec) -training >> step=9752800, episode=1626 reward=0.7961446 (480.45 it/sec) -training >> step=9752900, episode=1626 reward=0.7875574 (453.32 it/sec) -training >> step=9753000, episode=1626 reward=0.789528 (491.44 it/sec) -training >> step=9753100, episode=1626 reward=0.7806939 (371.41 it/sec) -training >> step=9753200, episode=1626 reward=0.7847842 (445.77 it/sec) -training >> step=9753300, episode=1626 reward=0.7830138 (455.55 it/sec) -training >> step=9753400, episode=1626 reward=0.8017259 (508.52 it/sec) -training >> step=9753500, episode=1626 reward=0.7789977 (470.88 it/sec) -training >> step=9753600, episode=1626 reward=0.7965993 (492.76 it/sec) -training >> step=9753700, episode=1626 reward=0.7947951 (485.77 it/sec) -training >> step=9753800, episode=1626 reward=0.7839423 (500.33 it/sec) -training >> step=9753900, episode=1626 reward=0.7840175 (468.08 it/sec) -training >> step=9754000, episode=1626 reward=0.7838526 (466.65 it/sec) -training >> step=9754100, episode=1626 reward=0.7717007 (518.72 it/sec) -training >> step=9754200, episode=1626 reward=0.7950264 (502.66 it/sec) -training >> step=9754300, episode=1626 reward=0.774748 (485.00 it/sec) -training >> step=9754400, episode=1626 reward=0.8083991 (456.04 it/sec) -training >> step=9754500, episode=1626 reward=0.8084866 (503.87 it/sec) -training >> step=9754600, episode=1626 reward=0.7849672 (467.19 it/sec) -training >> step=9754700, episode=1626 reward=0.7792153 (496.51 it/sec) -training >> step=9754800, episode=1626 reward=0.7820376 (455.02 it/sec) -training >> step=9754900, episode=1626 reward=0.7816302 (480.19 it/sec) -training >> step=9755000, episode=1626 reward=0.7853648 (468.00 it/sec) -training >> step=9755100, episode=1626 reward=0.773481 (471.45 it/sec) -training >> step=9755200, episode=1626 reward=0.7601398 (513.77 it/sec) -training >> step=9755300, episode=1627 reward=0.7920032 (38.11 it/sec) -training >> step=9755400, episode=1627 reward=0.7696752 (436.57 it/sec) -training >> step=9755500, episode=1627 reward=0.7748953 (424.40 it/sec) -training >> step=9755600, episode=1627 reward=0.7925773 (418.88 it/sec) -training >> step=9755700, episode=1627 reward=0.7872854 (441.73 it/sec) -training >> step=9755800, episode=1627 reward=0.7828037 (460.15 it/sec) -training >> step=9755900, episode=1627 reward=0.775393 (448.28 it/sec) -training >> step=9756000, episode=1627 reward=0.8076526 (464.25 it/sec) -training >> step=9756100, episode=1627 reward=0.7891762 (474.16 it/sec) -training >> step=9756200, episode=1627 reward=0.8064739 (438.99 it/sec) -training >> step=9756300, episode=1627 reward=0.7812172 (443.04 it/sec) -training >> step=9756400, episode=1627 reward=0.7893268 (414.02 it/sec) -training >> step=9756500, episode=1627 reward=0.8191991 (442.10 it/sec) -training >> step=9756600, episode=1627 reward=0.782886 (456.47 it/sec) -training >> step=9756700, episode=1627 reward=0.8037896 (476.00 it/sec) -training >> step=9756800, episode=1627 reward=0.777644 (459.54 it/sec) -training >> step=9756900, episode=1627 reward=0.8004574 (437.52 it/sec) -training >> step=9757000, episode=1627 reward=0.7978101 (504.08 it/sec) -training >> step=9757100, episode=1627 reward=0.7980119 (473.47 it/sec) -training >> step=9757200, episode=1627 reward=0.8138172 (500.65 it/sec) -training >> step=9757300, episode=1627 reward=0.7877866 (475.19 it/sec) -training >> step=9757400, episode=1627 reward=0.7897481 (502.51 it/sec) -training >> step=9757500, episode=1627 reward=0.7852164 (452.23 it/sec) -training >> step=9757600, episode=1627 reward=0.814652 (481.20 it/sec) -training >> step=9757700, episode=1627 reward=0.8040378 (489.54 it/sec) -training >> step=9757800, episode=1627 reward=0.7672546 (504.17 it/sec) -training >> step=9757900, episode=1627 reward=0.7965634 (488.33 it/sec) -training >> step=9758000, episode=1627 reward=0.7858082 (459.08 it/sec) -training >> step=9758100, episode=1627 reward=0.797677 (520.90 it/sec) -training >> step=9758200, episode=1627 reward=0.7802652 (505.80 it/sec) -training >> step=9758300, episode=1627 reward=0.793837 (506.22 it/sec) -training >> step=9758400, episode=1627 reward=0.7891143 (490.85 it/sec) -training >> step=9758500, episode=1627 reward=0.7856007 (498.28 it/sec) -training >> step=9758600, episode=1627 reward=0.7871366 (473.24 it/sec) -training >> step=9758700, episode=1627 reward=0.7863238 (510.64 it/sec) -training >> step=9758800, episode=1627 reward=0.7880994 (470.21 it/sec) -training >> step=9758900, episode=1627 reward=0.7940173 (463.04 it/sec) -training >> step=9759000, episode=1627 reward=0.774662 (424.27 it/sec) -training >> step=9759100, episode=1627 reward=0.8088955 (450.05 it/sec) -training >> step=9759200, episode=1627 reward=0.793418 (331.35 it/sec) -training >> step=9759300, episode=1627 reward=0.7801877 (453.69 it/sec) -training >> step=9759400, episode=1627 reward=0.8018572 (473.35 it/sec) -training >> step=9759500, episode=1627 reward=0.8091571 (472.82 it/sec) -training >> step=9759600, episode=1627 reward=0.7931806 (482.62 it/sec) -training >> step=9759700, episode=1627 reward=0.7728896 (472.85 it/sec) -training >> step=9759800, episode=1627 reward=0.7863395 (482.68 it/sec) -training >> step=9759900, episode=1627 reward=0.7740151 (446.87 it/sec) -training >> step=9760000, episode=1627 reward=0.7878188 (376.36 it/sec) -training >> step=9760100, episode=1627 reward=0.8046273 (446.08 it/sec) -training >> step=9760200, episode=1627 reward=0.7874926 (453.07 it/sec) -training >> step=9760300, episode=1627 reward=0.7772267 (418.87 it/sec) -training >> step=9760400, episode=1627 reward=0.7807743 (307.17 it/sec) -training >> step=9760500, episode=1627 reward=0.7606181 (369.12 it/sec) -training >> step=9760600, episode=1627 reward=0.792272 (484.47 it/sec) -training >> step=9760700, episode=1627 reward=0.7965556 (392.79 it/sec) -training >> step=9760800, episode=1627 reward=0.7969244 (411.13 it/sec) -training >> step=9760900, episode=1627 reward=0.7724671 (412.76 it/sec) -training >> step=9761000, episode=1627 reward=0.798281 (428.72 it/sec) -training >> step=9761100, episode=1627 reward=0.7902461 (405.73 it/sec) -training >> step=9761200, episode=1627 reward=0.8007865 (405.36 it/sec) -training >> step=9761300, episode=1628 reward=0.7796361 (47.43 it/sec) -training >> step=9761400, episode=1628 reward=0.803636 (447.74 it/sec) -training >> step=9761500, episode=1628 reward=0.7784118 (443.90 it/sec) -training >> step=9761600, episode=1628 reward=0.8014317 (435.19 it/sec) -training >> step=9761700, episode=1628 reward=0.7856752 (463.95 it/sec) -training >> step=9761800, episode=1628 reward=0.7867877 (449.67 it/sec) -training >> step=9761900, episode=1628 reward=0.7802233 (390.72 it/sec) -training >> step=9762000, episode=1628 reward=0.7812791 (483.25 it/sec) -training >> step=9762100, episode=1628 reward=0.769232 (498.87 it/sec) -training >> step=9762200, episode=1628 reward=0.8115315 (464.56 it/sec) -training >> step=9762300, episode=1628 reward=0.8066286 (520.50 it/sec) -training >> step=9762400, episode=1628 reward=0.7948113 (493.84 it/sec) -training >> step=9762500, episode=1628 reward=0.7902866 (482.32 it/sec) -training >> step=9762600, episode=1628 reward=0.7922381 (482.46 it/sec) -training >> step=9762700, episode=1628 reward=0.7825256 (480.78 it/sec) -training >> step=9762800, episode=1628 reward=0.7807585 (484.10 it/sec) -training >> step=9762900, episode=1628 reward=0.8080126 (447.05 it/sec) -training >> step=9763000, episode=1628 reward=0.7985818 (476.85 it/sec) -training >> step=9763100, episode=1628 reward=0.8041216 (454.92 it/sec) -training >> step=9763200, episode=1628 reward=0.8046319 (438.85 it/sec) -training >> step=9763300, episode=1628 reward=0.7847671 (487.01 it/sec) -training >> step=9763400, episode=1628 reward=0.7989042 (471.12 it/sec) -training >> step=9763500, episode=1628 reward=0.8065572 (475.45 it/sec) -training >> step=9763600, episode=1628 reward=0.7931463 (449.13 it/sec) -training >> step=9763700, episode=1628 reward=0.7979115 (490.16 it/sec) -training >> step=9763800, episode=1628 reward=0.7959458 (447.82 it/sec) -training >> step=9763900, episode=1628 reward=0.7927864 (488.49 it/sec) -training >> step=9764000, episode=1628 reward=0.7976935 (502.33 it/sec) -training >> step=9764100, episode=1628 reward=0.7966231 (448.00 it/sec) -training >> step=9764200, episode=1628 reward=0.796823 (477.45 it/sec) -training >> step=9764300, episode=1628 reward=0.8026816 (454.84 it/sec) -training >> step=9764400, episode=1628 reward=0.7942637 (450.92 it/sec) -training >> step=9764500, episode=1628 reward=0.7942278 (442.42 it/sec) -training >> step=9764600, episode=1628 reward=0.7938914 (488.54 it/sec) -training >> step=9764700, episode=1628 reward=0.7946233 (422.94 it/sec) -training >> step=9764800, episode=1628 reward=0.8179631 (455.10 it/sec) -training >> step=9764900, episode=1628 reward=0.7916608 (443.58 it/sec) -training >> step=9765000, episode=1628 reward=0.8050433 (461.41 it/sec) -training >> step=9765100, episode=1628 reward=0.7738485 (451.84 it/sec) -training >> step=9765200, episode=1628 reward=0.7924944 (468.51 it/sec) -training >> step=9765300, episode=1628 reward=0.7868019 (443.44 it/sec) -training >> step=9765400, episode=1628 reward=0.7938336 (453.77 it/sec) -training >> step=9765500, episode=1628 reward=0.7981742 (318.21 it/sec) -training >> step=9765600, episode=1628 reward=0.7778291 (474.06 it/sec) -training >> step=9765700, episode=1628 reward=0.7899398 (437.17 it/sec) -training >> step=9765800, episode=1628 reward=0.7941551 (465.73 it/sec) -training >> step=9765900, episode=1628 reward=0.7780325 (464.12 it/sec) -training >> step=9766000, episode=1628 reward=0.8035386 (456.19 it/sec) -training >> step=9766100, episode=1628 reward=0.8043876 (396.80 it/sec) -training >> step=9766200, episode=1628 reward=0.7653064 (467.38 it/sec) -training >> step=9766300, episode=1628 reward=0.7964317 (333.32 it/sec) -training >> step=9766400, episode=1628 reward=0.7829932 (424.63 it/sec) -training >> step=9766500, episode=1628 reward=0.7818747 (461.78 it/sec) -training >> step=9766600, episode=1628 reward=0.7829706 (455.88 it/sec) -training >> step=9766700, episode=1628 reward=0.7968014 (443.41 it/sec) -training >> step=9766800, episode=1628 reward=0.7639147 (440.40 it/sec) -training >> step=9766900, episode=1628 reward=0.7875826 (438.00 it/sec) -training >> step=9767000, episode=1628 reward=0.8057358 (449.10 it/sec) -training >> step=9767100, episode=1628 reward=0.8003314 (436.31 it/sec) -training >> step=9767200, episode=1628 reward=0.7952724 (404.18 it/sec) -training >> step=9767300, episode=1629 reward=0.785592 (74.40 it/sec) -training >> step=9767400, episode=1629 reward=0.7758716 (426.48 it/sec) -training >> step=9767500, episode=1629 reward=0.7753879 (468.37 it/sec) -training >> step=9767600, episode=1629 reward=0.796396 (458.89 it/sec) -training >> step=9767700, episode=1629 reward=0.7897698 (468.91 it/sec) -training >> step=9767800, episode=1629 reward=0.7955084 (453.87 it/sec) -training >> step=9767900, episode=1629 reward=0.8011553 (443.64 it/sec) -training >> step=9768000, episode=1629 reward=0.7844317 (488.62 it/sec) -training >> step=9768100, episode=1629 reward=0.7901334 (441.78 it/sec) -training >> step=9768200, episode=1629 reward=0.7973131 (455.42 it/sec) -training >> step=9768300, episode=1629 reward=0.7832499 (463.64 it/sec) -training >> step=9768400, episode=1629 reward=0.7790398 (481.93 it/sec) -training >> step=9768500, episode=1629 reward=0.8076128 (463.32 it/sec) -training >> step=9768600, episode=1629 reward=0.783094 (467.79 it/sec) -training >> step=9768700, episode=1629 reward=0.7730924 (488.91 it/sec) -training >> step=9768800, episode=1629 reward=0.7931757 (475.85 it/sec) -training >> step=9768900, episode=1629 reward=0.7818746 (485.82 it/sec) -training >> step=9769000, episode=1629 reward=0.7909129 (454.40 it/sec) -training >> step=9769100, episode=1629 reward=0.8039855 (429.97 it/sec) -training >> step=9769200, episode=1629 reward=0.7894682 (446.47 it/sec) -training >> step=9769300, episode=1629 reward=0.8016196 (475.71 it/sec) -training >> step=9769400, episode=1629 reward=0.7910124 (479.42 it/sec) -training >> step=9769500, episode=1629 reward=0.8053699 (483.52 it/sec) -training >> step=9769600, episode=1629 reward=0.7932857 (439.23 it/sec) -training >> step=9769700, episode=1629 reward=0.7926542 (431.36 it/sec) -training >> step=9769800, episode=1629 reward=0.8087789 (491.22 it/sec) -training >> step=9769900, episode=1629 reward=0.7801434 (497.73 it/sec) -training >> step=9770000, episode=1629 reward=0.7998019 (488.90 it/sec) -training >> step=9770100, episode=1629 reward=0.7905799 (471.27 it/sec) -training >> step=9770200, episode=1629 reward=0.7961112 (466.60 it/sec) -training >> step=9770300, episode=1629 reward=0.798032 (462.45 it/sec) -training >> step=9770400, episode=1629 reward=0.7840702 (459.87 it/sec) -training >> step=9770500, episode=1629 reward=0.7778915 (433.15 it/sec) -training >> step=9770600, episode=1629 reward=0.7831667 (408.83 it/sec) -training >> step=9770700, episode=1629 reward=0.7789301 (459.09 it/sec) -training >> step=9770800, episode=1629 reward=0.7996776 (460.65 it/sec) -training >> step=9770900, episode=1629 reward=0.7939951 (472.93 it/sec) -training >> step=9771000, episode=1629 reward=0.7800745 (457.43 it/sec) -training >> step=9771100, episode=1629 reward=0.8106301 (445.03 it/sec) -training >> step=9771200, episode=1629 reward=0.8103216 (474.93 it/sec) -training >> step=9771300, episode=1629 reward=0.7831898 (440.00 it/sec) -training >> step=9771400, episode=1629 reward=0.7988387 (470.16 it/sec) -training >> step=9771500, episode=1629 reward=0.7778401 (417.65 it/sec) -training >> step=9771600, episode=1629 reward=0.7966775 (470.77 it/sec) -training >> step=9771700, episode=1629 reward=0.7936575 (383.87 it/sec) -training >> step=9771800, episode=1629 reward=0.7897477 (474.51 it/sec) -training >> step=9771900, episode=1629 reward=0.791217 (498.17 it/sec) -training >> step=9772000, episode=1629 reward=0.8000647 (449.40 it/sec) -training >> step=9772100, episode=1629 reward=0.7875131 (474.07 it/sec) -training >> step=9772200, episode=1629 reward=0.79794 (464.87 it/sec) -training >> step=9772300, episode=1629 reward=0.7812194 (498.88 it/sec) -training >> step=9772400, episode=1629 reward=0.7866306 (494.58 it/sec) -training >> step=9772500, episode=1629 reward=0.7930749 (430.08 it/sec) -training >> step=9772600, episode=1629 reward=0.7811514 (501.40 it/sec) -training >> step=9772700, episode=1629 reward=0.7815959 (414.68 it/sec) -training >> step=9772800, episode=1629 reward=0.8161472 (451.17 it/sec) -training >> step=9772900, episode=1629 reward=0.7799056 (449.15 it/sec) -training >> step=9773000, episode=1629 reward=0.7900515 (472.15 it/sec) -training >> step=9773100, episode=1629 reward=0.7910013 (473.70 it/sec) -training >> step=9773200, episode=1629 reward=0.7880575 (491.70 it/sec) -training >> step=9773300, episode=1630 reward=0.795401 (145.40 it/sec) -training >> step=9773400, episode=1630 reward=0.7779984 (473.23 it/sec) -training >> step=9773500, episode=1630 reward=0.7813677 (464.32 it/sec) -training >> step=9773600, episode=1630 reward=0.7934358 (498.51 it/sec) -training >> step=9773700, episode=1630 reward=0.7966319 (474.89 it/sec) -training >> step=9773800, episode=1630 reward=0.7896525 (455.98 it/sec) -training >> step=9773900, episode=1630 reward=0.797371 (513.63 it/sec) -training >> step=9774000, episode=1630 reward=0.8147273 (490.46 it/sec) -training >> step=9774100, episode=1630 reward=0.8114505 (483.51 it/sec) -training >> step=9774200, episode=1630 reward=0.79222 (433.12 it/sec) -training >> step=9774300, episode=1630 reward=0.7968098 (493.23 it/sec) -training >> step=9774400, episode=1630 reward=0.8001382 (499.17 it/sec) -training >> step=9774500, episode=1630 reward=0.7941153 (443.99 it/sec) -training >> step=9774600, episode=1630 reward=0.8175519 (466.47 it/sec) -training >> step=9774700, episode=1630 reward=0.7989655 (433.59 it/sec) -training >> step=9774800, episode=1630 reward=0.7740287 (519.12 it/sec) -training >> step=9774900, episode=1630 reward=0.8088191 (473.91 it/sec) -training >> step=9775000, episode=1630 reward=0.7918113 (461.91 it/sec) -training >> step=9775100, episode=1630 reward=0.7989843 (517.08 it/sec) -training >> step=9775200, episode=1630 reward=0.7717571 (431.90 it/sec) -training >> step=9775300, episode=1630 reward=0.7749529 (400.49 it/sec) -training >> step=9775400, episode=1630 reward=0.7822398 (503.51 it/sec) -training >> step=9775500, episode=1630 reward=0.7871929 (482.26 it/sec) -training >> step=9775600, episode=1630 reward=0.7684345 (473.83 it/sec) -training >> step=9775700, episode=1630 reward=0.8060535 (420.65 it/sec) -training >> step=9775800, episode=1630 reward=0.7958425 (488.17 it/sec) -training >> step=9775900, episode=1630 reward=0.7798746 (480.29 it/sec) -training >> step=9776000, episode=1630 reward=0.7941913 (466.52 it/sec) -training >> step=9776100, episode=1630 reward=0.8105529 (477.11 it/sec) -training >> step=9776200, episode=1630 reward=0.7897058 (453.09 it/sec) -training >> step=9776300, episode=1630 reward=0.7907503 (481.34 it/sec) -training >> step=9776400, episode=1630 reward=0.7957155 (498.11 it/sec) -training >> step=9776500, episode=1630 reward=0.7862767 (478.65 it/sec) -training >> step=9776600, episode=1630 reward=0.7731215 (515.44 it/sec) -training >> step=9776700, episode=1630 reward=0.7812241 (451.99 it/sec) -training >> step=9776800, episode=1630 reward=0.7858406 (473.48 it/sec) -training >> step=9776900, episode=1630 reward=0.7937033 (469.02 it/sec) -training >> step=9777000, episode=1630 reward=0.781826 (470.13 it/sec) -training >> step=9777100, episode=1630 reward=0.7762152 (461.05 it/sec) -training >> step=9777200, episode=1630 reward=0.7911726 (481.67 it/sec) -training >> step=9777300, episode=1630 reward=0.7906838 (525.27 it/sec) -training >> step=9777400, episode=1630 reward=0.7697189 (443.79 it/sec) -training >> step=9777500, episode=1630 reward=0.8040546 (462.59 it/sec) -training >> step=9777600, episode=1630 reward=0.7893289 (431.75 it/sec) -training >> step=9777700, episode=1630 reward=0.7889162 (515.32 it/sec) -training >> step=9777800, episode=1630 reward=0.7767264 (463.29 it/sec) -training >> step=9777900, episode=1630 reward=0.8010325 (371.04 it/sec) -training >> step=9778000, episode=1630 reward=0.8110553 (494.11 it/sec) -training >> step=9778100, episode=1630 reward=0.7784865 (433.32 it/sec) -training >> step=9778200, episode=1630 reward=0.7728556 (462.76 it/sec) -training >> step=9778300, episode=1630 reward=0.7971176 (422.34 it/sec) -training >> step=9778400, episode=1630 reward=0.7958938 (498.29 it/sec) -training >> step=9778500, episode=1630 reward=0.7784608 (484.26 it/sec) -training >> step=9778600, episode=1630 reward=0.7888894 (429.09 it/sec) -training >> step=9778700, episode=1630 reward=0.7847211 (496.67 it/sec) -training >> step=9778800, episode=1630 reward=0.7889313 (505.77 it/sec) -training >> step=9778900, episode=1630 reward=0.7958938 (486.47 it/sec) -training >> step=9779000, episode=1630 reward=0.7695062 (446.05 it/sec) -training >> step=9779100, episode=1630 reward=0.7944183 (451.93 it/sec) -training >> step=9779200, episode=1630 reward=0.7941326 (437.66 it/sec) -training >> step=9779300, episode=1631 reward=0.7878262 (198.49 it/sec) -training >> step=9779400, episode=1631 reward=0.7799858 (477.81 it/sec) -training >> step=9779500, episode=1631 reward=0.7765999 (506.15 it/sec) -training >> step=9779600, episode=1631 reward=0.782171 (498.96 it/sec) -training >> step=9779700, episode=1631 reward=0.7955546 (404.86 it/sec) -training >> step=9779800, episode=1631 reward=0.8118775 (432.11 it/sec) -training >> step=9779900, episode=1631 reward=0.8044623 (474.70 it/sec) -training >> step=9780000, episode=1631 reward=0.803288 (469.04 it/sec) -training >> step=9780100, episode=1631 reward=0.7885944 (465.41 it/sec) -training >> step=9780200, episode=1631 reward=0.7962343 (506.42 it/sec) -training >> step=9780300, episode=1631 reward=0.7778382 (460.66 it/sec) -training >> step=9780400, episode=1631 reward=0.8123547 (377.74 it/sec) -training >> step=9780500, episode=1631 reward=0.7868071 (447.58 it/sec) -training >> step=9780600, episode=1631 reward=0.799551 (516.65 it/sec) -training >> step=9780700, episode=1631 reward=0.7913106 (487.29 it/sec) -training >> step=9780800, episode=1631 reward=0.7696086 (494.06 it/sec) -training >> step=9780900, episode=1631 reward=0.7864191 (437.99 it/sec) -training >> step=9781000, episode=1631 reward=0.767981 (456.53 it/sec) -training >> step=9781100, episode=1631 reward=0.8048842 (435.85 it/sec) -training >> step=9781200, episode=1631 reward=0.8123932 (442.46 it/sec) -training >> step=9781300, episode=1631 reward=0.8135728 (475.98 it/sec) -training >> step=9781400, episode=1631 reward=0.7883522 (457.04 it/sec) -training >> step=9781500, episode=1631 reward=0.8028627 (450.49 it/sec) -training >> step=9781600, episode=1631 reward=0.7934338 (454.62 it/sec) -training >> step=9781700, episode=1631 reward=0.793846 (422.10 it/sec) -training >> step=9781800, episode=1631 reward=0.7942224 (375.87 it/sec) -training >> step=9781900, episode=1631 reward=0.7845429 (427.34 it/sec) -training >> step=9782000, episode=1631 reward=0.798067 (421.66 it/sec) -training >> step=9782100, episode=1631 reward=0.7950818 (443.62 it/sec) -training >> step=9782200, episode=1631 reward=0.7933829 (467.75 it/sec) -training >> step=9782300, episode=1631 reward=0.7885818 (447.96 it/sec) -training >> step=9782400, episode=1631 reward=0.7817824 (429.28 it/sec) -training >> step=9782500, episode=1631 reward=0.7979734 (460.82 it/sec) -training >> step=9782600, episode=1631 reward=0.7797698 (459.04 it/sec) -training >> step=9782700, episode=1631 reward=0.803194 (432.98 it/sec) -training >> step=9782800, episode=1631 reward=0.7856746 (499.49 it/sec) -training >> step=9782900, episode=1631 reward=0.8220147 (512.13 it/sec) -training >> step=9783000, episode=1631 reward=0.7917629 (499.57 it/sec) -training >> step=9783100, episode=1631 reward=0.7988808 (491.36 it/sec) -training >> step=9783200, episode=1631 reward=0.79499 (468.16 it/sec) -training >> step=9783300, episode=1631 reward=0.8087239 (455.26 it/sec) -training >> step=9783400, episode=1631 reward=0.7837241 (499.63 it/sec) -training >> step=9783500, episode=1631 reward=0.7872944 (492.73 it/sec) -training >> step=9783600, episode=1631 reward=0.7783316 (414.90 it/sec) -training >> step=9783700, episode=1631 reward=0.8077785 (411.64 it/sec) -training >> step=9783800, episode=1631 reward=0.7852995 (440.87 it/sec) -training >> step=9783900, episode=1631 reward=0.797188 (412.23 it/sec) -training >> step=9784000, episode=1631 reward=0.7868731 (378.51 it/sec) -training >> step=9784100, episode=1631 reward=0.7686039 (330.36 it/sec) -training >> step=9784200, episode=1631 reward=0.7905018 (500.36 it/sec) -training >> step=9784300, episode=1631 reward=0.7958154 (534.54 it/sec) -training >> step=9784400, episode=1631 reward=0.7902824 (529.26 it/sec) -training >> step=9784500, episode=1631 reward=0.7771555 (557.74 it/sec) -training >> step=9784600, episode=1631 reward=0.8045527 (471.48 it/sec) -training >> step=9784700, episode=1631 reward=0.808103 (500.18 it/sec) -training >> step=9784800, episode=1631 reward=0.7873265 (519.84 it/sec) -training >> step=9784900, episode=1631 reward=0.7764766 (531.03 it/sec) -training >> step=9785000, episode=1631 reward=0.7950471 (531.81 it/sec) -training >> step=9785100, episode=1631 reward=0.7890635 (517.35 it/sec) -training >> step=9785200, episode=1631 reward=0.770124 (537.61 it/sec) -training >> step=9785300, episode=1632 reward=0.7894526 (123.32 it/sec) -training >> step=9785400, episode=1632 reward=0.7840127 (506.99 it/sec) -training >> step=9785500, episode=1632 reward=0.7955111 (535.38 it/sec) -training >> step=9785600, episode=1632 reward=0.791509 (521.67 it/sec) -training >> step=9785700, episode=1632 reward=0.7913867 (514.77 it/sec) -training >> step=9785800, episode=1632 reward=0.7935075 (501.33 it/sec) -training >> step=9785900, episode=1632 reward=0.7711754 (509.01 it/sec) -training >> step=9786000, episode=1632 reward=0.7955701 (558.63 it/sec) -training >> step=9786100, episode=1632 reward=0.8080294 (510.80 it/sec) -training >> step=9786200, episode=1632 reward=0.8158821 (490.48 it/sec) -training >> step=9786300, episode=1632 reward=0.7976823 (553.86 it/sec) -training >> step=9786400, episode=1632 reward=0.7937735 (488.31 it/sec) -training >> step=9786500, episode=1632 reward=0.7883891 (522.85 it/sec) -training >> step=9786600, episode=1632 reward=0.7891671 (553.07 it/sec) -training >> step=9786700, episode=1632 reward=0.8045002 (528.98 it/sec) -training >> step=9786800, episode=1632 reward=0.8182417 (505.31 it/sec) -training >> step=9786900, episode=1632 reward=0.7911611 (486.96 it/sec) -training >> step=9787000, episode=1632 reward=0.7843221 (548.87 it/sec) -training >> step=9787100, episode=1632 reward=0.7953469 (535.07 it/sec) -training >> step=9787200, episode=1632 reward=0.7985651 (531.29 it/sec) -training >> step=9787300, episode=1632 reward=0.7935881 (506.91 it/sec) -training >> step=9787400, episode=1632 reward=0.8077316 (496.11 it/sec) -training >> step=9787500, episode=1632 reward=0.7863702 (476.44 it/sec) -training >> step=9787600, episode=1632 reward=0.8107121 (535.63 it/sec) -training >> step=9787700, episode=1632 reward=0.7905297 (523.25 it/sec) -training >> step=9787800, episode=1632 reward=0.7813177 (481.06 it/sec) -training >> step=9787900, episode=1632 reward=0.8001919 (487.56 it/sec) -training >> step=9788000, episode=1632 reward=0.7925067 (441.59 it/sec) -training >> step=9788100, episode=1632 reward=0.7875138 (521.69 it/sec) -training >> step=9788200, episode=1632 reward=0.813158 (432.78 it/sec) -training >> step=9788300, episode=1632 reward=0.7911399 (475.12 it/sec) -training >> step=9788400, episode=1632 reward=0.776957 (547.27 it/sec) -training >> step=9788500, episode=1632 reward=0.7991208 (511.11 it/sec) -training >> step=9788600, episode=1632 reward=0.7702531 (495.30 it/sec) -training >> step=9788700, episode=1632 reward=0.7731026 (528.56 it/sec) -training >> step=9788800, episode=1632 reward=0.791849 (476.79 it/sec) -training >> step=9788900, episode=1632 reward=0.7668405 (555.36 it/sec) -training >> step=9789000, episode=1632 reward=0.7950122 (482.77 it/sec) -training >> step=9789100, episode=1632 reward=0.7781708 (533.31 it/sec) -training >> step=9789200, episode=1632 reward=0.785741 (532.31 it/sec) -training >> step=9789300, episode=1632 reward=0.7842474 (537.03 it/sec) -training >> step=9789400, episode=1632 reward=0.8062078 (512.19 it/sec) -training >> step=9789500, episode=1632 reward=0.7916598 (529.17 it/sec) -training >> step=9789600, episode=1632 reward=0.7962928 (518.37 it/sec) -training >> step=9789700, episode=1632 reward=0.7944956 (544.67 it/sec) -training >> step=9789800, episode=1632 reward=0.7884353 (513.61 it/sec) -training >> step=9789900, episode=1632 reward=0.7775868 (517.61 it/sec) -training >> step=9790000, episode=1632 reward=0.7878929 (534.04 it/sec) -training >> step=9790100, episode=1632 reward=0.7936749 (488.95 it/sec) -training >> step=9790200, episode=1632 reward=0.8010502 (406.23 it/sec) -training >> step=9790300, episode=1632 reward=0.793795 (556.65 it/sec) -training >> step=9790400, episode=1632 reward=0.8082244 (476.75 it/sec) -training >> step=9790500, episode=1632 reward=0.789005 (525.50 it/sec) -training >> step=9790600, episode=1632 reward=0.8094379 (486.15 it/sec) -training >> step=9790700, episode=1632 reward=0.7795759 (554.09 it/sec) -training >> step=9790800, episode=1632 reward=0.7649629 (500.84 it/sec) -training >> step=9790900, episode=1632 reward=0.7852674 (481.67 it/sec) -training >> step=9791000, episode=1632 reward=0.8057786 (492.06 it/sec) -training >> step=9791100, episode=1632 reward=0.763337 (493.63 it/sec) -training >> step=9791200, episode=1632 reward=0.7964073 (544.99 it/sec) -training >> step=9791300, episode=1633 reward=0.7953181 (118.33 it/sec) -training >> step=9791400, episode=1633 reward=0.7857533 (509.32 it/sec) -training >> step=9791500, episode=1633 reward=0.801014 (510.75 it/sec) -training >> step=9791600, episode=1633 reward=0.7994383 (518.63 it/sec) -training >> step=9791700, episode=1633 reward=0.7916107 (434.40 it/sec) -training >> step=9791800, episode=1633 reward=0.8079391 (402.12 it/sec) -training >> step=9791900, episode=1633 reward=0.7908052 (429.28 it/sec) -training >> step=9792000, episode=1633 reward=0.783424 (452.02 it/sec) -training >> step=9792100, episode=1633 reward=0.7889293 (433.83 it/sec) -training >> step=9792200, episode=1633 reward=0.8157645 (409.44 it/sec) -training >> step=9792300, episode=1633 reward=0.8063674 (443.74 it/sec) -training >> step=9792400, episode=1633 reward=0.7806846 (471.93 it/sec) -training >> step=9792500, episode=1633 reward=0.8043528 (512.76 it/sec) -training >> step=9792600, episode=1633 reward=0.7815939 (414.05 it/sec) -training >> step=9792700, episode=1633 reward=0.8076693 (407.13 it/sec) -training >> step=9792800, episode=1633 reward=0.8037899 (447.75 it/sec) -training >> step=9792900, episode=1633 reward=0.7892575 (459.49 it/sec) -training >> step=9793000, episode=1633 reward=0.7912738 (516.74 it/sec) -training >> step=9793100, episode=1633 reward=0.8204451 (425.85 it/sec) -training >> step=9793200, episode=1633 reward=0.7993123 (455.92 it/sec) -training >> step=9793300, episode=1633 reward=0.7941806 (478.24 it/sec) -training >> step=9793400, episode=1633 reward=0.8061102 (439.62 it/sec) -training >> step=9793500, episode=1633 reward=0.7791174 (482.02 it/sec) -training >> step=9793600, episode=1633 reward=0.7942096 (432.40 it/sec) -training >> step=9793700, episode=1633 reward=0.8011951 (423.21 it/sec) -training >> step=9793800, episode=1633 reward=0.7830163 (374.93 it/sec) -training >> step=9793900, episode=1633 reward=0.7903695 (409.00 it/sec) -training >> step=9794000, episode=1633 reward=0.7809119 (370.72 it/sec) -training >> step=9794100, episode=1633 reward=0.7729812 (346.31 it/sec) -training >> step=9794200, episode=1633 reward=0.796274 (401.96 it/sec) -training >> step=9794300, episode=1633 reward=0.7837444 (376.32 it/sec) -training >> step=9794400, episode=1633 reward=0.7918516 (428.88 it/sec) -training >> step=9794500, episode=1633 reward=0.7870108 (440.55 it/sec) -training >> step=9794600, episode=1633 reward=0.7839473 (489.42 it/sec) -training >> step=9794700, episode=1633 reward=0.7925442 (491.36 it/sec) -training >> step=9794800, episode=1633 reward=0.7911305 (441.77 it/sec) -training >> step=9794900, episode=1633 reward=0.766322 (432.51 it/sec) -training >> step=9795000, episode=1633 reward=0.7962388 (460.48 it/sec) -training >> step=9795100, episode=1633 reward=0.788345 (453.66 it/sec) -training >> step=9795200, episode=1633 reward=0.7882543 (445.27 it/sec) -training >> step=9795300, episode=1633 reward=0.7997302 (480.01 it/sec) -training >> step=9795400, episode=1633 reward=0.7846938 (407.39 it/sec) -training >> step=9795500, episode=1633 reward=0.8156585 (447.38 it/sec) -training >> step=9795600, episode=1633 reward=0.7990825 (431.07 it/sec) -training >> step=9795700, episode=1633 reward=0.8046671 (464.51 it/sec) -training >> step=9795800, episode=1633 reward=0.7749387 (445.94 it/sec) -training >> step=9795900, episode=1633 reward=0.7821844 (432.65 it/sec) -training >> step=9796000, episode=1633 reward=0.787643 (472.77 it/sec) -training >> step=9796100, episode=1633 reward=0.8009269 (475.73 it/sec) -training >> step=9796200, episode=1633 reward=0.7907164 (390.99 it/sec) -training >> step=9796300, episode=1633 reward=0.7758667 (465.25 it/sec) -training >> step=9796400, episode=1633 reward=0.7907542 (357.75 it/sec) -training >> step=9796500, episode=1633 reward=0.773104 (463.27 it/sec) -training >> step=9796600, episode=1633 reward=0.7980322 (432.18 it/sec) -training >> step=9796700, episode=1633 reward=0.8046634 (501.01 it/sec) -training >> step=9796800, episode=1633 reward=0.7864131 (481.07 it/sec) -training >> step=9796900, episode=1633 reward=0.8120327 (466.09 it/sec) -training >> step=9797000, episode=1633 reward=0.798015 (429.86 it/sec) -training >> step=9797100, episode=1633 reward=0.7883431 (441.45 it/sec) -training >> step=9797200, episode=1633 reward=0.7971492 (475.98 it/sec) -training >> step=9797300, episode=1634 reward=0.7816982 (96.48 it/sec) -training >> step=9797400, episode=1634 reward=0.7877188 (406.01 it/sec) -training >> step=9797500, episode=1634 reward=0.7920225 (417.38 it/sec) -training >> step=9797600, episode=1634 reward=0.7887366 (442.85 it/sec) -training >> step=9797700, episode=1634 reward=0.7736632 (462.73 it/sec) -training >> step=9797800, episode=1634 reward=0.7908224 (451.94 it/sec) -training >> step=9797900, episode=1634 reward=0.7883085 (455.68 it/sec) -training >> step=9798000, episode=1634 reward=0.8014081 (441.11 it/sec) -training >> step=9798100, episode=1634 reward=0.7875504 (439.86 it/sec) -training >> step=9798200, episode=1634 reward=0.8049766 (463.86 it/sec) -training >> step=9798300, episode=1634 reward=0.8171797 (484.21 it/sec) -training >> step=9798400, episode=1634 reward=0.7871867 (428.68 it/sec) -training >> step=9798500, episode=1634 reward=0.7980299 (461.83 it/sec) -training >> step=9798600, episode=1634 reward=0.8142169 (509.23 it/sec) -training >> step=9798700, episode=1634 reward=0.7864769 (445.05 it/sec) -training >> step=9798800, episode=1634 reward=0.7903148 (422.12 it/sec) -training >> step=9798900, episode=1634 reward=0.785111 (515.04 it/sec) -training >> step=9799000, episode=1634 reward=0.8278609 (483.22 it/sec) -training >> step=9799100, episode=1634 reward=0.8022965 (463.98 it/sec) -training >> step=9799200, episode=1634 reward=0.818656 (444.20 it/sec) -training >> step=9799300, episode=1634 reward=0.7736776 (477.27 it/sec) -training >> step=9799400, episode=1634 reward=0.8015776 (441.29 it/sec) -training >> step=9799500, episode=1634 reward=0.7894956 (412.01 it/sec) -training >> step=9799600, episode=1634 reward=0.7781479 (452.02 it/sec) -training >> step=9799700, episode=1634 reward=0.7835361 (494.24 it/sec) -training >> step=9799800, episode=1634 reward=0.7798609 (481.93 it/sec) -training >> step=9799900, episode=1634 reward=0.7978476 (463.04 it/sec) -training >> step=9800000, episode=1634 reward=0.7838662 (467.94 it/sec) -training >> step=9800100, episode=1634 reward=0.7910572 (463.17 it/sec) -training >> step=9800200, episode=1634 reward=0.7871528 (449.59 it/sec) -training >> step=9800300, episode=1634 reward=0.817951 (488.24 it/sec) -training >> step=9800400, episode=1634 reward=0.8022422 (477.49 it/sec) -training >> step=9800500, episode=1634 reward=0.8133462 (490.44 it/sec) -training >> step=9800600, episode=1634 reward=0.7979903 (445.35 it/sec) -training >> step=9800700, episode=1634 reward=0.7799496 (484.39 it/sec) -training >> step=9800800, episode=1634 reward=0.7971994 (470.64 it/sec) -training >> step=9800900, episode=1634 reward=0.7939504 (502.34 it/sec) -training >> step=9801000, episode=1634 reward=0.7955531 (470.79 it/sec) -training >> step=9801100, episode=1634 reward=0.7875758 (463.22 it/sec) -training >> step=9801200, episode=1634 reward=0.7798912 (420.32 it/sec) -training >> step=9801300, episode=1634 reward=0.7873514 (459.54 it/sec) -training >> step=9801400, episode=1634 reward=0.7959631 (494.06 it/sec) -training >> step=9801500, episode=1634 reward=0.7941034 (463.66 it/sec) -training >> step=9801600, episode=1634 reward=0.8000428 (476.61 it/sec) -training >> step=9801700, episode=1634 reward=0.7835132 (490.24 it/sec) -training >> step=9801800, episode=1634 reward=0.8065721 (537.02 it/sec) -training >> step=9801900, episode=1634 reward=0.7864805 (459.70 it/sec) -training >> step=9802000, episode=1634 reward=0.7852978 (503.52 it/sec) -training >> step=9802100, episode=1634 reward=0.7867414 (467.75 it/sec) -training >> step=9802200, episode=1634 reward=0.7890432 (516.65 it/sec) -training >> step=9802300, episode=1634 reward=0.7844073 (488.94 it/sec) -training >> step=9802400, episode=1634 reward=0.7926462 (496.50 it/sec) -training >> step=9802500, episode=1634 reward=0.7908627 (353.99 it/sec) -training >> step=9802600, episode=1634 reward=0.7929952 (434.71 it/sec) -training >> step=9802700, episode=1634 reward=0.7888603 (501.83 it/sec) -training >> step=9802800, episode=1634 reward=0.7900488 (438.20 it/sec) -training >> step=9802900, episode=1634 reward=0.7955117 (496.19 it/sec) -training >> step=9803000, episode=1634 reward=0.7910476 (424.18 it/sec) -training >> step=9803100, episode=1634 reward=0.7897903 (462.63 it/sec) -training >> step=9803200, episode=1634 reward=0.7843421 (548.00 it/sec) -training >> step=9803300, episode=1635 reward=0.7825729 (90.38 it/sec) -training >> step=9803400, episode=1635 reward=0.7812673 (469.03 it/sec) -training >> step=9803500, episode=1635 reward=0.7900634 (451.69 it/sec) -training >> step=9803600, episode=1635 reward=0.7773883 (460.87 it/sec) -training >> step=9803700, episode=1635 reward=0.7980295 (469.06 it/sec) -training >> step=9803800, episode=1635 reward=0.7884184 (462.47 it/sec) -training >> step=9803900, episode=1635 reward=0.7961996 (432.45 it/sec) -training >> step=9804000, episode=1635 reward=0.7931185 (433.04 it/sec) -training >> step=9804100, episode=1635 reward=0.7835971 (465.46 it/sec) -training >> step=9804200, episode=1635 reward=0.7948539 (415.17 it/sec) -training >> step=9804300, episode=1635 reward=0.7849827 (463.36 it/sec) -training >> step=9804400, episode=1635 reward=0.790039 (412.84 it/sec) -training >> step=9804500, episode=1635 reward=0.7910553 (393.62 it/sec) -training >> step=9804600, episode=1635 reward=0.7746779 (437.36 it/sec) -training >> step=9804700, episode=1635 reward=0.797296 (486.34 it/sec) -training >> step=9804800, episode=1635 reward=0.7805185 (420.60 it/sec) -training >> step=9804900, episode=1635 reward=0.7997441 (497.53 it/sec) -training >> step=9805000, episode=1635 reward=0.7774003 (457.42 it/sec) -training >> step=9805100, episode=1635 reward=0.808722 (475.45 it/sec) -training >> step=9805200, episode=1635 reward=0.7893322 (502.09 it/sec) -training >> step=9805300, episode=1635 reward=0.8040096 (451.42 it/sec) -training >> step=9805400, episode=1635 reward=0.8151374 (408.46 it/sec) -training >> step=9805500, episode=1635 reward=0.7808624 (406.33 it/sec) -training >> step=9805600, episode=1635 reward=0.7821456 (469.68 it/sec) -training >> step=9805700, episode=1635 reward=0.7986111 (453.80 it/sec) -training >> step=9805800, episode=1635 reward=0.7876697 (441.94 it/sec) -training >> step=9805900, episode=1635 reward=0.8006355 (444.73 it/sec) -training >> step=9806000, episode=1635 reward=0.7959957 (472.79 it/sec) -training >> step=9806100, episode=1635 reward=0.780611 (493.36 it/sec) -training >> step=9806200, episode=1635 reward=0.7804589 (430.16 it/sec) -training >> step=9806300, episode=1635 reward=0.7826291 (409.11 it/sec) -training >> step=9806400, episode=1635 reward=0.7593515 (424.93 it/sec) -training >> step=9806500, episode=1635 reward=0.7893678 (457.42 it/sec) -training >> step=9806600, episode=1635 reward=0.7863076 (485.92 it/sec) -training >> step=9806700, episode=1635 reward=0.7762876 (422.72 it/sec) -training >> step=9806800, episode=1635 reward=0.7845533 (414.65 it/sec) -training >> step=9806900, episode=1635 reward=0.8082808 (390.80 it/sec) -training >> step=9807000, episode=1635 reward=0.783285 (381.89 it/sec) -training >> step=9807100, episode=1635 reward=0.7810705 (386.67 it/sec) -training >> step=9807200, episode=1635 reward=0.7821816 (374.00 it/sec) -training >> step=9807300, episode=1635 reward=0.7662792 (437.74 it/sec) -training >> step=9807400, episode=1635 reward=0.8023498 (447.58 it/sec) -training >> step=9807500, episode=1635 reward=0.7849081 (448.98 it/sec) -training >> step=9807600, episode=1635 reward=0.8139921 (482.24 it/sec) -training >> step=9807700, episode=1635 reward=0.7864457 (444.88 it/sec) -training >> step=9807800, episode=1635 reward=0.7782419 (472.87 it/sec) -training >> step=9807900, episode=1635 reward=0.7844288 (470.57 it/sec) -training >> step=9808000, episode=1635 reward=0.8057448 (478.03 it/sec) -training >> step=9808100, episode=1635 reward=0.778374 (473.96 it/sec) -training >> step=9808200, episode=1635 reward=0.789131 (442.42 it/sec) -training >> step=9808300, episode=1635 reward=0.7962964 (476.33 it/sec) -training >> step=9808400, episode=1635 reward=0.7762354 (433.35 it/sec) -training >> step=9808500, episode=1635 reward=0.7838762 (422.86 it/sec) -training >> step=9808600, episode=1635 reward=0.7727539 (362.35 it/sec) -training >> step=9808700, episode=1635 reward=0.7923561 (451.09 it/sec) -training >> step=9808800, episode=1635 reward=0.7816648 (417.20 it/sec) -training >> step=9808900, episode=1635 reward=0.7928952 (451.41 it/sec) -training >> step=9809000, episode=1635 reward=0.7858988 (401.04 it/sec) -training >> step=9809100, episode=1635 reward=0.8000429 (446.86 it/sec) -training >> step=9809200, episode=1635 reward=0.7955199 (485.73 it/sec) -training >> step=9809300, episode=1636 reward=0.7930444 (100.69 it/sec) -training >> step=9809400, episode=1636 reward=0.7814694 (492.24 it/sec) -training >> step=9809500, episode=1636 reward=0.7800482 (434.78 it/sec) -training >> step=9809600, episode=1636 reward=0.7999305 (412.80 it/sec) -training >> step=9809700, episode=1636 reward=0.7939206 (469.01 it/sec) -training >> step=9809800, episode=1636 reward=0.7847359 (411.80 it/sec) -training >> step=9809900, episode=1636 reward=0.7962452 (424.52 it/sec) -training >> step=9810000, episode=1636 reward=0.8014152 (443.17 it/sec) -training >> step=9810100, episode=1636 reward=0.795952 (487.91 it/sec) -training >> step=9810200, episode=1636 reward=0.7950307 (489.37 it/sec) -training >> step=9810300, episode=1636 reward=0.7958164 (464.19 it/sec) -training >> step=9810400, episode=1636 reward=0.8033416 (462.74 it/sec) -training >> step=9810500, episode=1636 reward=0.7812923 (509.58 it/sec) -training >> step=9810600, episode=1636 reward=0.8042345 (436.74 it/sec) -training >> step=9810700, episode=1636 reward=0.7956319 (493.78 it/sec) -training >> step=9810800, episode=1636 reward=0.7859199 (459.10 it/sec) -training >> step=9810900, episode=1636 reward=0.792516 (458.20 it/sec) -training >> step=9811000, episode=1636 reward=0.7993264 (507.10 it/sec) -training >> step=9811100, episode=1636 reward=0.7943389 (501.98 it/sec) -training >> step=9811200, episode=1636 reward=0.7843075 (506.15 it/sec) -training >> step=9811300, episode=1636 reward=0.7996253 (473.92 it/sec) -training >> step=9811400, episode=1636 reward=0.8018962 (481.04 it/sec) -training >> step=9811500, episode=1636 reward=0.7891437 (452.07 it/sec) -training >> step=9811600, episode=1636 reward=0.788367 (477.68 it/sec) -training >> step=9811700, episode=1636 reward=0.8003781 (485.97 it/sec) -training >> step=9811800, episode=1636 reward=0.7889881 (518.06 it/sec) -training >> step=9811900, episode=1636 reward=0.7986982 (470.49 it/sec) -training >> step=9812000, episode=1636 reward=0.7893053 (418.79 it/sec) -training >> step=9812100, episode=1636 reward=0.8205341 (434.03 it/sec) -training >> step=9812200, episode=1636 reward=0.7809094 (464.62 it/sec) -training >> step=9812300, episode=1636 reward=0.8003644 (463.58 it/sec) -training >> step=9812400, episode=1636 reward=0.810622 (445.57 it/sec) -training >> step=9812500, episode=1636 reward=0.7718409 (414.48 it/sec) -training >> step=9812600, episode=1636 reward=0.8155463 (455.21 it/sec) -training >> step=9812700, episode=1636 reward=0.7939715 (445.31 it/sec) -training >> step=9812800, episode=1636 reward=0.7992979 (434.91 it/sec) -training >> step=9812900, episode=1636 reward=0.7755992 (445.99 it/sec) -training >> step=9813000, episode=1636 reward=0.7782157 (482.39 it/sec) -training >> step=9813100, episode=1636 reward=0.7942096 (495.79 it/sec) -training >> step=9813200, episode=1636 reward=0.7833881 (505.70 it/sec) -training >> step=9813300, episode=1636 reward=0.7827475 (459.50 it/sec) -training >> step=9813400, episode=1636 reward=0.8007826 (455.17 it/sec) -training >> step=9813500, episode=1636 reward=0.7765834 (451.96 it/sec) -training >> step=9813600, episode=1636 reward=0.8178131 (512.52 it/sec) -training >> step=9813700, episode=1636 reward=0.7727388 (459.77 it/sec) -training >> step=9813800, episode=1636 reward=0.7701599 (473.84 it/sec) -training >> step=9813900, episode=1636 reward=0.7791564 (521.40 it/sec) -training >> step=9814000, episode=1636 reward=0.7812468 (474.74 it/sec) -training >> step=9814100, episode=1636 reward=0.7652513 (481.64 it/sec) -training >> step=9814200, episode=1636 reward=0.7873309 (422.34 it/sec) -training >> step=9814300, episode=1636 reward=0.7975263 (478.82 it/sec) -training >> step=9814400, episode=1636 reward=0.8103498 (466.63 it/sec) -training >> step=9814500, episode=1636 reward=0.7793249 (430.99 it/sec) -training >> step=9814600, episode=1636 reward=0.7948313 (498.35 it/sec) -training >> step=9814700, episode=1636 reward=0.7730351 (310.23 it/sec) -training >> step=9814800, episode=1636 reward=0.7943923 (450.73 it/sec) -training >> step=9814900, episode=1636 reward=0.7782838 (463.17 it/sec) -training >> step=9815000, episode=1636 reward=0.7913989 (462.94 it/sec) -training >> step=9815100, episode=1636 reward=0.7797309 (410.79 it/sec) -training >> step=9815200, episode=1636 reward=0.7675424 (436.92 it/sec) -training >> step=9815300, episode=1637 reward=0.788997 (102.70 it/sec) -training >> step=9815400, episode=1637 reward=0.7827144 (335.17 it/sec) -training >> step=9815500, episode=1637 reward=0.7842957 (516.56 it/sec) -training >> step=9815600, episode=1637 reward=0.7656752 (495.52 it/sec) -training >> step=9815700, episode=1637 reward=0.7983325 (539.81 it/sec) -training >> step=9815800, episode=1637 reward=0.7991778 (483.23 it/sec) -training >> step=9815900, episode=1637 reward=0.8163683 (493.73 it/sec) -training >> step=9816000, episode=1637 reward=0.7804712 (535.52 it/sec) -training >> step=9816100, episode=1637 reward=0.7829345 (508.99 it/sec) -training >> step=9816200, episode=1637 reward=0.7862271 (545.35 it/sec) -training >> step=9816300, episode=1637 reward=0.7958929 (550.66 it/sec) -training >> step=9816400, episode=1637 reward=0.7832138 (551.01 it/sec) -training >> step=9816500, episode=1637 reward=0.7971475 (500.79 it/sec) -training >> step=9816600, episode=1637 reward=0.7998164 (494.42 it/sec) -training >> step=9816700, episode=1637 reward=0.7803855 (493.31 it/sec) -training >> step=9816800, episode=1637 reward=0.7793815 (558.64 it/sec) -training >> step=9816900, episode=1637 reward=0.7770416 (511.18 it/sec) -training >> step=9817000, episode=1637 reward=0.7964726 (496.63 it/sec) -training >> step=9817100, episode=1637 reward=0.780288 (527.43 it/sec) -training >> step=9817200, episode=1637 reward=0.8027874 (532.92 it/sec) -training >> step=9817300, episode=1637 reward=0.7794436 (519.01 it/sec) -training >> step=9817400, episode=1637 reward=0.7907092 (495.69 it/sec) -training >> step=9817500, episode=1637 reward=0.7796912 (487.15 it/sec) -training >> step=9817600, episode=1637 reward=0.7880934 (542.26 it/sec) -training >> step=9817700, episode=1637 reward=0.784483 (516.35 it/sec) -training >> step=9817800, episode=1637 reward=0.7809897 (476.54 it/sec) -training >> step=9817900, episode=1637 reward=0.8005351 (441.78 it/sec) -training >> step=9818000, episode=1637 reward=0.7984719 (501.96 it/sec) -training >> step=9818100, episode=1637 reward=0.7836392 (528.72 it/sec) -training >> step=9818200, episode=1637 reward=0.7944745 (433.63 it/sec) -training >> step=9818300, episode=1637 reward=0.8003229 (350.89 it/sec) -training >> step=9818400, episode=1637 reward=0.7951777 (396.93 it/sec) -training >> step=9818500, episode=1637 reward=0.7907867 (425.82 it/sec) -training >> step=9818600, episode=1637 reward=0.7887307 (482.47 it/sec) -training >> step=9818700, episode=1637 reward=0.8034898 (473.61 it/sec) -training >> step=9818800, episode=1637 reward=0.7909781 (460.58 it/sec) -training >> step=9818900, episode=1637 reward=0.7828033 (529.31 it/sec) -training >> step=9819000, episode=1637 reward=0.7673451 (476.15 it/sec) -training >> step=9819100, episode=1637 reward=0.7906948 (511.78 it/sec) -training >> step=9819200, episode=1637 reward=0.7961976 (441.95 it/sec) -training >> step=9819300, episode=1637 reward=0.8024094 (453.33 it/sec) -training >> step=9819400, episode=1637 reward=0.7841623 (405.49 it/sec) -training >> step=9819500, episode=1637 reward=0.777706 (446.33 it/sec) -training >> step=9819600, episode=1637 reward=0.7851403 (448.84 it/sec) -training >> step=9819700, episode=1637 reward=0.7862557 (418.88 it/sec) -training >> step=9819800, episode=1637 reward=0.7854514 (428.26 it/sec) -training >> step=9819900, episode=1637 reward=0.7951992 (489.98 it/sec) -training >> step=9820000, episode=1637 reward=0.7987415 (465.60 it/sec) -training >> step=9820100, episode=1637 reward=0.7718174 (446.89 it/sec) -training >> step=9820200, episode=1637 reward=0.7913616 (429.37 it/sec) -training >> step=9820300, episode=1637 reward=0.7963229 (427.23 it/sec) -training >> step=9820400, episode=1637 reward=0.7998958 (437.14 it/sec) -training >> step=9820500, episode=1637 reward=0.7967843 (398.31 it/sec) -training >> step=9820600, episode=1637 reward=0.7755213 (474.32 it/sec) -training >> step=9820700, episode=1637 reward=0.7997223 (461.81 it/sec) -training >> step=9820800, episode=1637 reward=0.7914811 (484.80 it/sec) -training >> step=9820900, episode=1637 reward=0.7924745 (359.71 it/sec) -training >> step=9821000, episode=1637 reward=0.7880245 (488.30 it/sec) -training >> step=9821100, episode=1637 reward=0.7959213 (476.62 it/sec) -training >> step=9821200, episode=1637 reward=0.7909045 (460.91 it/sec) -training >> step=9821300, episode=1638 reward=0.7800888 (117.78 it/sec) -training >> step=9821400, episode=1638 reward=0.7835038 (442.00 it/sec) -training >> step=9821500, episode=1638 reward=0.7902438 (534.56 it/sec) -training >> step=9821600, episode=1638 reward=0.7812572 (522.47 it/sec) -training >> step=9821700, episode=1638 reward=0.7806472 (537.65 it/sec) -training >> step=9821800, episode=1638 reward=0.7949252 (492.05 it/sec) -training >> step=9821900, episode=1638 reward=0.7981094 (488.59 it/sec) -training >> step=9822000, episode=1638 reward=0.7949458 (510.81 it/sec) -training >> step=9822100, episode=1638 reward=0.765491 (509.95 it/sec) -training >> step=9822200, episode=1638 reward=0.7902622 (468.49 it/sec) -training >> step=9822300, episode=1638 reward=0.79251 (508.90 it/sec) -training >> step=9822400, episode=1638 reward=0.8016108 (520.89 it/sec) -training >> step=9822500, episode=1638 reward=0.8016686 (507.58 it/sec) -training >> step=9822600, episode=1638 reward=0.7914054 (482.05 it/sec) -training >> step=9822700, episode=1638 reward=0.7925565 (497.71 it/sec) -training >> step=9822800, episode=1638 reward=0.7895862 (507.72 it/sec) -training >> step=9822900, episode=1638 reward=0.795797 (517.02 it/sec) -training >> step=9823000, episode=1638 reward=0.7886299 (434.40 it/sec) -training >> step=9823100, episode=1638 reward=0.8199645 (460.85 it/sec) -training >> step=9823200, episode=1638 reward=0.7829532 (493.70 it/sec) -training >> step=9823300, episode=1638 reward=0.7865287 (468.42 it/sec) -training >> step=9823400, episode=1638 reward=0.7830912 (487.89 it/sec) -training >> step=9823500, episode=1638 reward=0.7941031 (508.26 it/sec) -training >> step=9823600, episode=1638 reward=0.7920366 (475.38 it/sec) -training >> step=9823700, episode=1638 reward=0.8014297 (504.49 it/sec) -training >> step=9823800, episode=1638 reward=0.7684686 (535.59 it/sec) -training >> step=9823900, episode=1638 reward=0.8020152 (512.19 it/sec) -training >> step=9824000, episode=1638 reward=0.8100985 (542.29 it/sec) -training >> step=9824100, episode=1638 reward=0.7784711 (533.06 it/sec) -training >> step=9824200, episode=1638 reward=0.7696502 (504.49 it/sec) -training >> step=9824300, episode=1638 reward=0.7909243 (506.29 it/sec) -training >> step=9824400, episode=1638 reward=0.7816302 (468.49 it/sec) -training >> step=9824500, episode=1638 reward=0.8101639 (475.23 it/sec) -training >> step=9824600, episode=1638 reward=0.7941377 (506.48 it/sec) -training >> step=9824700, episode=1638 reward=0.7898916 (549.10 it/sec) -training >> step=9824800, episode=1638 reward=0.789153 (475.01 it/sec) -training >> step=9824900, episode=1638 reward=0.7807711 (463.31 it/sec) -training >> step=9825000, episode=1638 reward=0.7718677 (475.30 it/sec) -training >> step=9825100, episode=1638 reward=0.7783879 (473.06 it/sec) -training >> step=9825200, episode=1638 reward=0.797721 (475.51 it/sec) -training >> step=9825300, episode=1638 reward=0.8038363 (459.14 it/sec) -training >> step=9825400, episode=1638 reward=0.7698548 (491.51 it/sec) -training >> step=9825500, episode=1638 reward=0.7869941 (409.93 it/sec) -training >> step=9825600, episode=1638 reward=0.7814703 (465.38 it/sec) -training >> step=9825700, episode=1638 reward=0.7812136 (466.32 it/sec) -training >> step=9825800, episode=1638 reward=0.7745519 (492.15 it/sec) -training >> step=9825900, episode=1638 reward=0.7887669 (483.02 it/sec) -training >> step=9826000, episode=1638 reward=0.7921971 (432.33 it/sec) -training >> step=9826100, episode=1638 reward=0.7801523 (468.57 it/sec) -training >> step=9826200, episode=1638 reward=0.7902492 (446.97 it/sec) -training >> step=9826300, episode=1638 reward=0.7902557 (463.74 it/sec) -training >> step=9826400, episode=1638 reward=0.7935062 (448.82 it/sec) -training >> step=9826500, episode=1638 reward=0.8007514 (480.37 it/sec) -training >> step=9826600, episode=1638 reward=0.7709898 (399.59 it/sec) -training >> step=9826700, episode=1638 reward=0.7837287 (394.29 it/sec) -training >> step=9826800, episode=1638 reward=0.7785429 (459.42 it/sec) -training >> step=9826900, episode=1638 reward=0.7927469 (430.74 it/sec) -training >> step=9827000, episode=1638 reward=0.8019515 (467.01 it/sec) -training >> step=9827100, episode=1638 reward=0.7980847 (366.15 it/sec) -training >> step=9827200, episode=1638 reward=0.7855786 (544.98 it/sec) -training >> step=9827300, episode=1639 reward=0.8060527 (86.48 it/sec) -training >> step=9827400, episode=1639 reward=0.7792117 (452.83 it/sec) -training >> step=9827500, episode=1639 reward=0.7770316 (464.79 it/sec) -training >> step=9827600, episode=1639 reward=0.7843913 (501.87 it/sec) -training >> step=9827700, episode=1639 reward=0.7961703 (519.69 it/sec) -training >> step=9827800, episode=1639 reward=0.7828516 (480.67 it/sec) -training >> step=9827900, episode=1639 reward=0.797605 (479.29 it/sec) -training >> step=9828000, episode=1639 reward=0.7974346 (490.10 it/sec) -training >> step=9828100, episode=1639 reward=0.7992421 (464.77 it/sec) -training >> step=9828200, episode=1639 reward=0.7993094 (468.49 it/sec) -training >> step=9828300, episode=1639 reward=0.7808338 (486.98 it/sec) -training >> step=9828400, episode=1639 reward=0.8016889 (504.96 it/sec) -training >> step=9828500, episode=1639 reward=0.7881054 (476.60 it/sec) -training >> step=9828600, episode=1639 reward=0.7970444 (422.81 it/sec) -training >> step=9828700, episode=1639 reward=0.7863615 (451.65 it/sec) -training >> step=9828800, episode=1639 reward=0.7956308 (450.74 it/sec) -training >> step=9828900, episode=1639 reward=0.8042218 (412.14 it/sec) -training >> step=9829000, episode=1639 reward=0.7902452 (454.34 it/sec) -training >> step=9829100, episode=1639 reward=0.7961193 (405.48 it/sec) -training >> step=9829200, episode=1639 reward=0.8014991 (431.07 it/sec) -training >> step=9829300, episode=1639 reward=0.7978082 (491.95 it/sec) -training >> step=9829400, episode=1639 reward=0.7911982 (484.98 it/sec) -training >> step=9829500, episode=1639 reward=0.7979811 (458.38 it/sec) -training >> step=9829600, episode=1639 reward=0.7854675 (510.99 it/sec) -training >> step=9829700, episode=1639 reward=0.7839044 (498.05 it/sec) -training >> step=9829800, episode=1639 reward=0.7844521 (512.42 it/sec) -training >> step=9829900, episode=1639 reward=0.7958361 (520.61 it/sec) -training >> step=9830000, episode=1639 reward=0.7949046 (492.77 it/sec) -training >> step=9830100, episode=1639 reward=0.7924512 (515.57 it/sec) -training >> step=9830200, episode=1639 reward=0.8023559 (482.97 it/sec) -training >> step=9830300, episode=1639 reward=0.7847369 (436.97 it/sec) -training >> step=9830400, episode=1639 reward=0.7872549 (480.25 it/sec) -training >> step=9830500, episode=1639 reward=0.7924123 (479.70 it/sec) -training >> step=9830600, episode=1639 reward=0.7987358 (506.49 it/sec) -training >> step=9830700, episode=1639 reward=0.7911149 (515.66 it/sec) -training >> step=9830800, episode=1639 reward=0.8092721 (511.91 it/sec) -training >> step=9830900, episode=1639 reward=0.7827212 (428.51 it/sec) -training >> step=9831000, episode=1639 reward=0.7894402 (424.79 it/sec) -training >> step=9831100, episode=1639 reward=0.8156718 (415.45 it/sec) -training >> step=9831200, episode=1639 reward=0.7893429 (428.45 it/sec) -training >> step=9831300, episode=1639 reward=0.7856867 (445.41 it/sec) -training >> step=9831400, episode=1639 reward=0.7971756 (500.15 it/sec) -training >> step=9831500, episode=1639 reward=0.7840489 (432.50 it/sec) -training >> step=9831600, episode=1639 reward=0.803258 (516.57 it/sec) -training >> step=9831700, episode=1639 reward=0.8051445 (501.12 it/sec) -training >> step=9831800, episode=1639 reward=0.7718437 (485.57 it/sec) -training >> step=9831900, episode=1639 reward=0.7897639 (540.00 it/sec) -training >> step=9832000, episode=1639 reward=0.7971877 (468.66 it/sec) -training >> step=9832100, episode=1639 reward=0.7801626 (405.98 it/sec) -training >> step=9832200, episode=1639 reward=0.7826442 (468.89 it/sec) -training >> step=9832300, episode=1639 reward=0.7929116 (465.82 it/sec) -training >> step=9832400, episode=1639 reward=0.7928653 (488.18 it/sec) -training >> step=9832500, episode=1639 reward=0.7933137 (504.40 it/sec) -training >> step=9832600, episode=1639 reward=0.7807183 (539.93 it/sec) -training >> step=9832700, episode=1639 reward=0.8096039 (474.71 it/sec) -training >> step=9832800, episode=1639 reward=0.8026371 (447.71 it/sec) -training >> step=9832900, episode=1639 reward=0.7904643 (410.59 it/sec) -training >> step=9833000, episode=1639 reward=0.796323 (410.61 it/sec) -training >> step=9833100, episode=1639 reward=0.790473 (419.88 it/sec) -training >> step=9833200, episode=1639 reward=0.7867213 (390.20 it/sec) -training >> step=9833300, episode=1640 reward=0.7950655 (88.98 it/sec) -training >> step=9833400, episode=1640 reward=0.8126175 (485.25 it/sec) -training >> step=9833500, episode=1640 reward=0.7978804 (532.82 it/sec) -training >> step=9833600, episode=1640 reward=0.7760258 (503.65 it/sec) -training >> step=9833700, episode=1640 reward=0.7938504 (540.93 it/sec) -training >> step=9833800, episode=1640 reward=0.7798789 (485.23 it/sec) -training >> step=9833900, episode=1640 reward=0.7803599 (532.84 it/sec) -training >> step=9834000, episode=1640 reward=0.8035218 (475.91 it/sec) -training >> step=9834100, episode=1640 reward=0.7770936 (500.30 it/sec) -training >> step=9834200, episode=1640 reward=0.7988958 (507.25 it/sec) -training >> step=9834300, episode=1640 reward=0.798036 (469.38 it/sec) -training >> step=9834400, episode=1640 reward=0.7630209 (469.01 it/sec) -training >> step=9834500, episode=1640 reward=0.7880278 (455.91 it/sec) -training >> step=9834600, episode=1640 reward=0.8008124 (480.13 it/sec) -training >> step=9834700, episode=1640 reward=0.7871815 (492.35 it/sec) -training >> step=9834800, episode=1640 reward=0.7797285 (435.53 it/sec) -training >> step=9834900, episode=1640 reward=0.8136682 (435.09 it/sec) -training >> step=9835000, episode=1640 reward=0.7842538 (438.88 it/sec) -training >> step=9835100, episode=1640 reward=0.7872537 (447.80 it/sec) -training >> step=9835200, episode=1640 reward=0.7940528 (513.68 it/sec) -training >> step=9835300, episode=1640 reward=0.7880861 (451.29 it/sec) -training >> step=9835400, episode=1640 reward=0.7929408 (485.29 it/sec) -training >> step=9835500, episode=1640 reward=0.7680424 (497.23 it/sec) -training >> step=9835600, episode=1640 reward=0.7911174 (500.08 it/sec) -training >> step=9835700, episode=1640 reward=0.8054796 (486.50 it/sec) -training >> step=9835800, episode=1640 reward=0.7799715 (475.41 it/sec) -training >> step=9835900, episode=1640 reward=0.7921647 (500.82 it/sec) -training >> step=9836000, episode=1640 reward=0.7960032 (485.08 it/sec) -training >> step=9836100, episode=1640 reward=0.7726377 (450.08 it/sec) -training >> step=9836200, episode=1640 reward=0.801571 (461.17 it/sec) -training >> step=9836300, episode=1640 reward=0.7962743 (387.15 it/sec) -training >> step=9836400, episode=1640 reward=0.7920595 (469.32 it/sec) -training >> step=9836500, episode=1640 reward=0.7972549 (482.18 it/sec) -training >> step=9836600, episode=1640 reward=0.8106595 (533.57 it/sec) -training >> step=9836700, episode=1640 reward=0.8114274 (449.93 it/sec) -training >> step=9836800, episode=1640 reward=0.7907839 (507.09 it/sec) -training >> step=9836900, episode=1640 reward=0.7987858 (478.70 it/sec) -training >> step=9837000, episode=1640 reward=0.7961689 (501.25 it/sec) -training >> step=9837100, episode=1640 reward=0.8031581 (432.05 it/sec) -training >> step=9837200, episode=1640 reward=0.8200806 (397.11 it/sec) -training >> step=9837300, episode=1640 reward=0.7847679 (437.60 it/sec) -training >> step=9837400, episode=1640 reward=0.7999548 (443.39 it/sec) -training >> step=9837500, episode=1640 reward=0.7713968 (444.64 it/sec) -training >> step=9837600, episode=1640 reward=0.7980741 (444.14 it/sec) -training >> step=9837700, episode=1640 reward=0.7854051 (422.48 it/sec) -training >> step=9837800, episode=1640 reward=0.796158 (422.84 it/sec) -training >> step=9837900, episode=1640 reward=0.8052422 (533.82 it/sec) -training >> step=9838000, episode=1640 reward=0.8081828 (553.60 it/sec) -training >> step=9838100, episode=1640 reward=0.7913512 (442.31 it/sec) -training >> step=9838200, episode=1640 reward=0.7886373 (513.59 it/sec) -training >> step=9838300, episode=1640 reward=0.7789297 (471.98 it/sec) -training >> step=9838400, episode=1640 reward=0.7929056 (414.39 it/sec) -training >> step=9838500, episode=1640 reward=0.7825361 (476.15 it/sec) -training >> step=9838600, episode=1640 reward=0.7867351 (456.67 it/sec) -training >> step=9838700, episode=1640 reward=0.7841119 (454.06 it/sec) -training >> step=9838800, episode=1640 reward=0.7645116 (465.76 it/sec) -training >> step=9838900, episode=1640 reward=0.7823645 (456.00 it/sec) -training >> step=9839000, episode=1640 reward=0.8003943 (455.95 it/sec) -training >> step=9839100, episode=1640 reward=0.7661008 (474.84 it/sec) -training >> step=9839200, episode=1640 reward=0.8012425 (482.71 it/sec) -training >> step=9839300, episode=1641 reward=0.7868392 (115.49 it/sec) -training >> step=9839400, episode=1641 reward=0.7791089 (533.60 it/sec) -training >> step=9839500, episode=1641 reward=0.7809086 (457.19 it/sec) -training >> step=9839600, episode=1641 reward=0.7944154 (529.32 it/sec) -training >> step=9839700, episode=1641 reward=0.7928333 (516.80 it/sec) -training >> step=9839800, episode=1641 reward=0.798756 (497.71 it/sec) -training >> step=9839900, episode=1641 reward=0.801112 (533.76 it/sec) -training >> step=9840000, episode=1641 reward=0.7800229 (487.92 it/sec) -training >> step=9840100, episode=1641 reward=0.7726793 (523.72 it/sec) -training >> step=9840200, episode=1641 reward=0.8030088 (551.61 it/sec) -training >> step=9840300, episode=1641 reward=0.8018279 (487.16 it/sec) -training >> step=9840400, episode=1641 reward=0.789683 (483.42 it/sec) -training >> step=9840500, episode=1641 reward=0.7982419 (530.70 it/sec) -training >> step=9840600, episode=1641 reward=0.7858093 (484.21 it/sec) -training >> step=9840700, episode=1641 reward=0.8153328 (533.79 it/sec) -training >> step=9840800, episode=1641 reward=0.7991939 (459.76 it/sec) -training >> step=9840900, episode=1641 reward=0.8012822 (487.74 it/sec) -training >> step=9841000, episode=1641 reward=0.7718813 (487.45 it/sec) -training >> step=9841100, episode=1641 reward=0.7776649 (523.49 it/sec) -training >> step=9841200, episode=1641 reward=0.7808316 (546.87 it/sec) -training >> step=9841300, episode=1641 reward=0.7942311 (466.51 it/sec) -training >> step=9841400, episode=1641 reward=0.7697468 (535.61 it/sec) -training >> step=9841500, episode=1641 reward=0.7792605 (518.78 it/sec) -training >> step=9841600, episode=1641 reward=0.7991486 (490.86 it/sec) -training >> step=9841700, episode=1641 reward=0.7789032 (563.45 it/sec) -training >> step=9841800, episode=1641 reward=0.7909038 (490.96 it/sec) -training >> step=9841900, episode=1641 reward=0.7998071 (521.40 it/sec) -training >> step=9842000, episode=1641 reward=0.7848027 (520.79 it/sec) -training >> step=9842100, episode=1641 reward=0.7849609 (516.79 it/sec) -training >> step=9842200, episode=1641 reward=0.80467 (510.33 it/sec) -training >> step=9842300, episode=1641 reward=0.7916344 (508.47 it/sec) -training >> step=9842400, episode=1641 reward=0.7864981 (483.64 it/sec) -training >> step=9842500, episode=1641 reward=0.7891298 (516.49 it/sec) -training >> step=9842600, episode=1641 reward=0.7793837 (531.29 it/sec) -training >> step=9842700, episode=1641 reward=0.8064775 (546.46 it/sec) -training >> step=9842800, episode=1641 reward=0.781112 (437.34 it/sec) -training >> step=9842900, episode=1641 reward=0.7981292 (502.88 it/sec) -training >> step=9843000, episode=1641 reward=0.8063115 (524.16 it/sec) -training >> step=9843100, episode=1641 reward=0.7891755 (463.74 it/sec) -training >> step=9843200, episode=1641 reward=0.8132486 (517.21 it/sec) -training >> step=9843300, episode=1641 reward=0.7770733 (569.23 it/sec) -training >> step=9843400, episode=1641 reward=0.7901943 (504.17 it/sec) -training >> step=9843500, episode=1641 reward=0.8065597 (529.08 it/sec) -training >> step=9843600, episode=1641 reward=0.7901224 (513.26 it/sec) -training >> step=9843700, episode=1641 reward=0.7773771 (575.62 it/sec) -training >> step=9843800, episode=1641 reward=0.7633705 (537.85 it/sec) -training >> step=9843900, episode=1641 reward=0.8089951 (529.14 it/sec) -training >> step=9844000, episode=1641 reward=0.798418 (517.33 it/sec) -training >> step=9844100, episode=1641 reward=0.7967048 (478.21 it/sec) -training >> step=9844200, episode=1641 reward=0.8029966 (539.70 it/sec) -training >> step=9844300, episode=1641 reward=0.7799342 (539.25 it/sec) -training >> step=9844400, episode=1641 reward=0.8133054 (559.81 it/sec) -training >> step=9844500, episode=1641 reward=0.8005273 (486.28 it/sec) -training >> step=9844600, episode=1641 reward=0.7895638 (496.10 it/sec) -training >> step=9844700, episode=1641 reward=0.7935849 (513.23 it/sec) -training >> step=9844800, episode=1641 reward=0.7985049 (544.95 it/sec) -training >> step=9844900, episode=1641 reward=0.7909372 (520.71 it/sec) -training >> step=9845000, episode=1641 reward=0.7857321 (507.39 it/sec) -training >> step=9845100, episode=1641 reward=0.7933998 (515.87 it/sec) -training >> step=9845200, episode=1641 reward=0.7978261 (513.13 it/sec) -training >> step=9845300, episode=1642 reward=0.7849283 (109.52 it/sec) -training >> step=9845400, episode=1642 reward=0.7905902 (504.96 it/sec) -training >> step=9845500, episode=1642 reward=0.7948805 (474.62 it/sec) -training >> step=9845600, episode=1642 reward=0.7664786 (464.05 it/sec) -training >> step=9845700, episode=1642 reward=0.7951498 (502.92 it/sec) -training >> step=9845800, episode=1642 reward=0.7807302 (520.32 it/sec) -training >> step=9845900, episode=1642 reward=0.775673 (452.83 it/sec) -training >> step=9846000, episode=1642 reward=0.7996194 (527.26 it/sec) -training >> step=9846100, episode=1642 reward=0.7810974 (506.54 it/sec) -training >> step=9846200, episode=1642 reward=0.7755971 (551.91 it/sec) -training >> step=9846300, episode=1642 reward=0.7838342 (455.47 it/sec) -training >> step=9846400, episode=1642 reward=0.8021139 (441.13 it/sec) -training >> step=9846500, episode=1642 reward=0.7902735 (437.58 it/sec) -training >> step=9846600, episode=1642 reward=0.8047103 (400.01 it/sec) -training >> step=9846700, episode=1642 reward=0.7657117 (376.98 it/sec) -training >> step=9846800, episode=1642 reward=0.7895378 (395.00 it/sec) -training >> step=9846900, episode=1642 reward=0.798032 (480.53 it/sec) -training >> step=9847000, episode=1642 reward=0.8019902 (492.01 it/sec) -training >> step=9847100, episode=1642 reward=0.7968339 (476.52 it/sec) -training >> step=9847200, episode=1642 reward=0.7776788 (502.60 it/sec) -training >> step=9847300, episode=1642 reward=0.8029761 (498.99 it/sec) -training >> step=9847400, episode=1642 reward=0.789527 (457.62 it/sec) -training >> step=9847500, episode=1642 reward=0.7982274 (480.43 it/sec) -training >> step=9847600, episode=1642 reward=0.8063483 (432.56 it/sec) -training >> step=9847700, episode=1642 reward=0.8038712 (413.54 it/sec) -training >> step=9847800, episode=1642 reward=0.7716068 (471.80 it/sec) -training >> step=9847900, episode=1642 reward=0.7782413 (499.28 it/sec) -training >> step=9848000, episode=1642 reward=0.789737 (537.10 it/sec) -training >> step=9848100, episode=1642 reward=0.8004553 (506.94 it/sec) -training >> step=9848200, episode=1642 reward=0.7872605 (476.12 it/sec) -training >> step=9848300, episode=1642 reward=0.7840154 (422.41 it/sec) -training >> step=9848400, episode=1642 reward=0.7888923 (440.56 it/sec) -training >> step=9848500, episode=1642 reward=0.7812764 (378.46 it/sec) -training >> step=9848600, episode=1642 reward=0.7822674 (362.38 it/sec) -training >> step=9848700, episode=1642 reward=0.7829444 (390.74 it/sec) -training >> step=9848800, episode=1642 reward=0.7868005 (485.55 it/sec) -training >> step=9848900, episode=1642 reward=0.787323 (455.12 it/sec) -training >> step=9849000, episode=1642 reward=0.7916493 (443.63 it/sec) -training >> step=9849100, episode=1642 reward=0.7927254 (440.85 it/sec) -training >> step=9849200, episode=1642 reward=0.7975411 (424.82 it/sec) -training >> step=9849300, episode=1642 reward=0.7983465 (456.51 it/sec) -training >> step=9849400, episode=1642 reward=0.8011307 (442.61 it/sec) -training >> step=9849500, episode=1642 reward=0.8007346 (423.74 it/sec) -training >> step=9849600, episode=1642 reward=0.802071 (475.02 it/sec) -training >> step=9849700, episode=1642 reward=0.7780797 (529.41 it/sec) -training >> step=9849800, episode=1642 reward=0.7769422 (558.89 it/sec) -training >> step=9849900, episode=1642 reward=0.7771577 (511.75 it/sec) -training >> step=9850000, episode=1642 reward=0.8001189 (435.85 it/sec) -training >> step=9850100, episode=1642 reward=0.8041387 (506.89 it/sec) -training >> step=9850200, episode=1642 reward=0.8096946 (497.50 it/sec) -training >> step=9850300, episode=1642 reward=0.7921639 (494.03 it/sec) -training >> step=9850400, episode=1642 reward=0.7725092 (480.54 it/sec) -training >> step=9850500, episode=1642 reward=0.8070857 (468.51 it/sec) -training >> step=9850600, episode=1642 reward=0.7888479 (481.83 it/sec) -training >> step=9850700, episode=1642 reward=0.7915686 (463.41 it/sec) -training >> step=9850800, episode=1642 reward=0.7883702 (449.63 it/sec) -training >> step=9850900, episode=1642 reward=0.7896353 (558.01 it/sec) -training >> step=9851000, episode=1642 reward=0.7881256 (460.10 it/sec) -training >> step=9851100, episode=1642 reward=0.7885607 (495.99 it/sec) -training >> step=9851200, episode=1642 reward=0.7834361 (505.10 it/sec) -training >> step=9851300, episode=1643 reward=0.7882981 (68.53 it/sec) -training >> step=9851400, episode=1643 reward=0.7926199 (393.15 it/sec) -training >> step=9851500, episode=1643 reward=0.7777495 (543.79 it/sec) -training >> step=9851600, episode=1643 reward=0.7969432 (464.76 it/sec) -training >> step=9851700, episode=1643 reward=0.780711 (475.41 it/sec) -training >> step=9851800, episode=1643 reward=0.7807749 (475.36 it/sec) -training >> step=9851900, episode=1643 reward=0.7968555 (431.00 it/sec) -training >> step=9852000, episode=1643 reward=0.8000804 (490.28 it/sec) -training >> step=9852100, episode=1643 reward=0.777968 (505.15 it/sec) -training >> step=9852200, episode=1643 reward=0.788084 (487.20 it/sec) -training >> step=9852300, episode=1643 reward=0.7842673 (528.85 it/sec) -training >> step=9852400, episode=1643 reward=0.7931445 (488.19 it/sec) -training >> step=9852500, episode=1643 reward=0.7818098 (520.68 it/sec) -training >> step=9852600, episode=1643 reward=0.7818334 (545.30 it/sec) -training >> step=9852700, episode=1643 reward=0.7911262 (547.29 it/sec) -training >> step=9852800, episode=1643 reward=0.8055676 (546.22 it/sec) -training >> step=9852900, episode=1643 reward=0.7922964 (472.20 it/sec) -training >> step=9853000, episode=1643 reward=0.7994861 (533.39 it/sec) -training >> step=9853100, episode=1643 reward=0.8039301 (489.51 it/sec) -training >> step=9853200, episode=1643 reward=0.7922264 (504.68 it/sec) -training >> step=9853300, episode=1643 reward=0.8021576 (494.79 it/sec) -training >> step=9853400, episode=1643 reward=0.7910302 (476.10 it/sec) -training >> step=9853500, episode=1643 reward=0.7888881 (512.28 it/sec) -training >> step=9853600, episode=1643 reward=0.8017657 (527.82 it/sec) -training >> step=9853700, episode=1643 reward=0.7726812 (502.91 it/sec) -training >> step=9853800, episode=1643 reward=0.7988718 (535.28 it/sec) -training >> step=9853900, episode=1643 reward=0.7965426 (536.37 it/sec) -training >> step=9854000, episode=1643 reward=0.7783643 (479.42 it/sec) -training >> step=9854100, episode=1643 reward=0.797115 (544.91 it/sec) -training >> step=9854200, episode=1643 reward=0.7953021 (558.62 it/sec) -training >> step=9854300, episode=1643 reward=0.8056135 (491.72 it/sec) -training >> step=9854400, episode=1643 reward=0.8014998 (513.74 it/sec) -training >> step=9854500, episode=1643 reward=0.7922502 (450.70 it/sec) -training >> step=9854600, episode=1643 reward=0.7770491 (492.52 it/sec) -training >> step=9854700, episode=1643 reward=0.8034135 (534.22 it/sec) -training >> step=9854800, episode=1643 reward=0.7899877 (500.48 it/sec) -training >> step=9854900, episode=1643 reward=0.788933 (539.63 it/sec) -training >> step=9855000, episode=1643 reward=0.7980733 (501.01 it/sec) -training >> step=9855100, episode=1643 reward=0.8113618 (487.73 it/sec) -training >> step=9855200, episode=1643 reward=0.7931727 (533.48 it/sec) -training >> step=9855300, episode=1643 reward=0.7922161 (550.11 it/sec) -training >> step=9855400, episode=1643 reward=0.7997988 (475.44 it/sec) -training >> step=9855500, episode=1643 reward=0.7576242 (486.12 it/sec) -training >> step=9855600, episode=1643 reward=0.7983009 (431.79 it/sec) -training >> step=9855700, episode=1643 reward=0.7887614 (564.58 it/sec) -training >> step=9855800, episode=1643 reward=0.7789304 (479.65 it/sec) -training >> step=9855900, episode=1643 reward=0.8016372 (530.83 it/sec) -training >> step=9856000, episode=1643 reward=0.7858123 (519.17 it/sec) -training >> step=9856100, episode=1643 reward=0.8050649 (472.45 it/sec) -training >> step=9856200, episode=1643 reward=0.799714 (528.19 it/sec) -training >> step=9856300, episode=1643 reward=0.764713 (557.97 it/sec) -training >> step=9856400, episode=1643 reward=0.8002937 (550.20 it/sec) -training >> step=9856500, episode=1643 reward=0.785181 (510.92 it/sec) -training >> step=9856600, episode=1643 reward=0.7877699 (489.66 it/sec) -training >> step=9856700, episode=1643 reward=0.7757893 (513.46 it/sec) -training >> step=9856800, episode=1643 reward=0.776326 (545.76 it/sec) -training >> step=9856900, episode=1643 reward=0.801353 (528.06 it/sec) -training >> step=9857000, episode=1643 reward=0.7699701 (516.86 it/sec) -training >> step=9857100, episode=1643 reward=0.7666324 (568.31 it/sec) -training >> step=9857200, episode=1643 reward=0.8077542 (455.29 it/sec) -training >> step=9857300, episode=1644 reward=0.8074865 (135.70 it/sec) -training >> step=9857400, episode=1644 reward=0.7743049 (517.06 it/sec) -training >> step=9857500, episode=1644 reward=0.7757685 (532.88 it/sec) -training >> step=9857600, episode=1644 reward=0.7928748 (532.92 it/sec) -training >> step=9857700, episode=1644 reward=0.8150072 (534.29 it/sec) -training >> step=9857800, episode=1644 reward=0.8040836 (513.48 it/sec) -training >> step=9857900, episode=1644 reward=0.7890039 (531.10 it/sec) -training >> step=9858000, episode=1644 reward=0.7880821 (493.31 it/sec) -training >> step=9858100, episode=1644 reward=0.7906089 (545.88 it/sec) -training >> step=9858200, episode=1644 reward=0.7807779 (565.83 it/sec) -training >> step=9858300, episode=1644 reward=0.8108749 (511.82 it/sec) -training >> step=9858400, episode=1644 reward=0.7999062 (491.58 it/sec) -training >> step=9858500, episode=1644 reward=0.7904895 (484.38 it/sec) -training >> step=9858600, episode=1644 reward=0.8059617 (505.98 it/sec) -training >> step=9858700, episode=1644 reward=0.783522 (544.59 it/sec) -training >> step=9858800, episode=1644 reward=0.8036999 (544.09 it/sec) -training >> step=9858900, episode=1644 reward=0.8120493 (557.95 it/sec) -training >> step=9859000, episode=1644 reward=0.7806022 (522.42 it/sec) -training >> step=9859100, episode=1644 reward=0.7720678 (471.28 it/sec) -training >> step=9859200, episode=1644 reward=0.7880923 (536.96 it/sec) -training >> step=9859300, episode=1644 reward=0.7927771 (531.41 it/sec) -training >> step=9859400, episode=1644 reward=0.7764928 (511.31 it/sec) -training >> step=9859500, episode=1644 reward=0.7854874 (474.58 it/sec) -training >> step=9859600, episode=1644 reward=0.7880632 (402.36 it/sec) -training >> step=9859700, episode=1644 reward=0.7934031 (460.62 it/sec) -training >> step=9859800, episode=1644 reward=0.795037 (498.98 it/sec) -training >> step=9859900, episode=1644 reward=0.7623541 (454.64 it/sec) -training >> step=9860000, episode=1644 reward=0.7908383 (523.65 it/sec) -training >> step=9860100, episode=1644 reward=0.7830611 (388.17 it/sec) -training >> step=9860200, episode=1644 reward=0.7869206 (378.51 it/sec) -training >> step=9860300, episode=1644 reward=0.7981534 (370.58 it/sec) -training >> step=9860400, episode=1644 reward=0.7901294 (376.87 it/sec) -training >> step=9860500, episode=1644 reward=0.787122 (419.19 it/sec) -training >> step=9860600, episode=1644 reward=0.788131 (522.00 it/sec) -training >> step=9860700, episode=1644 reward=0.801659 (517.37 it/sec) -training >> step=9860800, episode=1644 reward=0.7747805 (497.93 it/sec) -training >> step=9860900, episode=1644 reward=0.7713519 (527.56 it/sec) -training >> step=9861000, episode=1644 reward=0.8107325 (493.93 it/sec) -training >> step=9861100, episode=1644 reward=0.7999398 (565.31 it/sec) -training >> step=9861200, episode=1644 reward=0.7781946 (448.59 it/sec) -training >> step=9861300, episode=1644 reward=0.7825954 (448.08 it/sec) -training >> step=9861400, episode=1644 reward=0.7975906 (449.11 it/sec) -training >> step=9861500, episode=1644 reward=0.7940419 (449.39 it/sec) -training >> step=9861600, episode=1644 reward=0.7917244 (482.53 it/sec) -training >> step=9861700, episode=1644 reward=0.7626963 (470.35 it/sec) -training >> step=9861800, episode=1644 reward=0.7859004 (513.21 it/sec) -training >> step=9861900, episode=1644 reward=0.7880053 (506.55 it/sec) -training >> step=9862000, episode=1644 reward=0.7781625 (455.18 it/sec) -training >> step=9862100, episode=1644 reward=0.7697658 (508.61 it/sec) -training >> step=9862200, episode=1644 reward=0.795625 (473.67 it/sec) -training >> step=9862300, episode=1644 reward=0.7928267 (495.22 it/sec) -training >> step=9862400, episode=1644 reward=0.7945879 (468.69 it/sec) -training >> step=9862500, episode=1644 reward=0.802779 (480.28 it/sec) -training >> step=9862600, episode=1644 reward=0.7820862 (478.91 it/sec) -training >> step=9862700, episode=1644 reward=0.8000023 (500.63 it/sec) -training >> step=9862800, episode=1644 reward=0.7916513 (530.50 it/sec) -training >> step=9862900, episode=1644 reward=0.7912555 (567.21 it/sec) -training >> step=9863000, episode=1644 reward=0.8077489 (530.37 it/sec) -training >> step=9863100, episode=1644 reward=0.8102355 (472.51 it/sec) -training >> step=9863200, episode=1644 reward=0.7903419 (541.26 it/sec) -training >> step=9863300, episode=1645 reward=0.797811 (118.59 it/sec) -training >> step=9863400, episode=1645 reward=0.7940049 (505.45 it/sec) -training >> step=9863500, episode=1645 reward=0.7876881 (523.17 it/sec) -training >> step=9863600, episode=1645 reward=0.7794269 (552.17 it/sec) -training >> step=9863700, episode=1645 reward=0.7802946 (567.52 it/sec) -training >> step=9863800, episode=1645 reward=0.8098556 (475.84 it/sec) -training >> step=9863900, episode=1645 reward=0.7711359 (531.14 it/sec) -training >> step=9864000, episode=1645 reward=0.7925081 (469.89 it/sec) -training >> step=9864100, episode=1645 reward=0.8117576 (555.48 it/sec) -training >> step=9864200, episode=1645 reward=0.7971402 (541.04 it/sec) -training >> step=9864300, episode=1645 reward=0.7768173 (472.67 it/sec) -training >> step=9864400, episode=1645 reward=0.7941785 (546.46 it/sec) -training >> step=9864500, episode=1645 reward=0.7997929 (532.47 it/sec) -training >> step=9864600, episode=1645 reward=0.806304 (527.70 it/sec) -training >> step=9864700, episode=1645 reward=0.8046263 (549.02 it/sec) -training >> step=9864800, episode=1645 reward=0.793959 (544.74 it/sec) -training >> step=9864900, episode=1645 reward=0.8163181 (486.60 it/sec) -training >> step=9865000, episode=1645 reward=0.7939034 (529.25 it/sec) -training >> step=9865100, episode=1645 reward=0.8070388 (529.59 it/sec) -training >> step=9865200, episode=1645 reward=0.7876121 (538.36 it/sec) -training >> step=9865300, episode=1645 reward=0.7725494 (540.50 it/sec) -training >> step=9865400, episode=1645 reward=0.7994447 (509.35 it/sec) -training >> step=9865500, episode=1645 reward=0.7946511 (534.86 it/sec) -training >> step=9865600, episode=1645 reward=0.7677256 (506.27 it/sec) -training >> step=9865700, episode=1645 reward=0.7772015 (569.65 it/sec) -training >> step=9865800, episode=1645 reward=0.7877177 (523.27 it/sec) -training >> step=9865900, episode=1645 reward=0.7997814 (515.14 it/sec) -training >> step=9866000, episode=1645 reward=0.8178741 (513.87 it/sec) -training >> step=9866100, episode=1645 reward=0.7861169 (532.92 it/sec) -training >> step=9866200, episode=1645 reward=0.8067867 (484.59 it/sec) -training >> step=9866300, episode=1645 reward=0.782539 (521.11 it/sec) -training >> step=9866400, episode=1645 reward=0.7946495 (510.86 it/sec) -training >> step=9866500, episode=1645 reward=0.7876503 (516.03 it/sec) -training >> step=9866600, episode=1645 reward=0.8058251 (517.32 it/sec) -training >> step=9866700, episode=1645 reward=0.7808787 (521.36 it/sec) -training >> step=9866800, episode=1645 reward=0.7896512 (575.13 it/sec) -training >> step=9866900, episode=1645 reward=0.8068904 (492.47 it/sec) -training >> step=9867000, episode=1645 reward=0.7907759 (473.85 it/sec) -training >> step=9867100, episode=1645 reward=0.7821224 (495.61 it/sec) -training >> step=9867200, episode=1645 reward=0.7974994 (492.33 it/sec) -training >> step=9867300, episode=1645 reward=0.782863 (499.56 it/sec) -training >> step=9867400, episode=1645 reward=0.7807683 (481.74 it/sec) -training >> step=9867500, episode=1645 reward=0.7967603 (488.08 it/sec) -training >> step=9867600, episode=1645 reward=0.7941196 (492.88 it/sec) -training >> step=9867700, episode=1645 reward=0.7790461 (472.53 it/sec) -training >> step=9867800, episode=1645 reward=0.770525 (500.13 it/sec) -training >> step=9867900, episode=1645 reward=0.7785539 (528.74 it/sec) -training >> step=9868000, episode=1645 reward=0.7921861 (488.79 it/sec) -training >> step=9868100, episode=1645 reward=0.7713417 (513.98 it/sec) -training >> step=9868200, episode=1645 reward=0.8053895 (466.60 it/sec) -training >> step=9868300, episode=1645 reward=0.7911971 (509.22 it/sec) -training >> step=9868400, episode=1645 reward=0.8072297 (506.20 it/sec) -training >> step=9868500, episode=1645 reward=0.7890849 (472.05 it/sec) -training >> step=9868600, episode=1645 reward=0.787017 (479.89 it/sec) -training >> step=9868700, episode=1645 reward=0.8069681 (479.50 it/sec) -training >> step=9868800, episode=1645 reward=0.7827119 (462.59 it/sec) -training >> step=9868900, episode=1645 reward=0.7901108 (453.36 it/sec) -training >> step=9869000, episode=1645 reward=0.77675 (431.50 it/sec) -training >> step=9869100, episode=1645 reward=0.7926739 (433.07 it/sec) -training >> step=9869200, episode=1645 reward=0.7808867 (456.57 it/sec) -training >> step=9869300, episode=1646 reward=0.7924994 (88.51 it/sec) -training >> step=9869400, episode=1646 reward=0.7988055 (488.41 it/sec) -training >> step=9869500, episode=1646 reward=0.7771617 (475.03 it/sec) -training >> step=9869600, episode=1646 reward=0.7741566 (409.42 it/sec) -training >> step=9869700, episode=1646 reward=0.78563 (375.65 it/sec) -training >> step=9869800, episode=1646 reward=0.7846391 (507.70 it/sec) -training >> step=9869900, episode=1646 reward=0.7929263 (481.85 it/sec) -training >> step=9870000, episode=1646 reward=0.7975156 (499.33 it/sec) -training >> step=9870100, episode=1646 reward=0.7985711 (512.03 it/sec) -training >> step=9870200, episode=1646 reward=0.7840899 (490.53 it/sec) -training >> step=9870300, episode=1646 reward=0.8059649 (492.19 it/sec) -training >> step=9870400, episode=1646 reward=0.7738119 (501.36 it/sec) -training >> step=9870500, episode=1646 reward=0.7989811 (485.01 it/sec) -training >> step=9870600, episode=1646 reward=0.7830006 (545.39 it/sec) -training >> step=9870700, episode=1646 reward=0.7799222 (517.91 it/sec) -training >> step=9870800, episode=1646 reward=0.8025607 (502.99 it/sec) -training >> step=9870900, episode=1646 reward=0.79606 (492.54 it/sec) -training >> step=9871000, episode=1646 reward=0.7861683 (441.42 it/sec) -training >> step=9871100, episode=1646 reward=0.7851395 (523.59 it/sec) -training >> step=9871200, episode=1646 reward=0.7817071 (521.53 it/sec) -training >> step=9871300, episode=1646 reward=0.7969221 (431.26 it/sec) -training >> step=9871400, episode=1646 reward=0.8042294 (457.92 it/sec) -training >> step=9871500, episode=1646 reward=0.8006587 (432.82 it/sec) -training >> step=9871600, episode=1646 reward=0.7832378 (450.55 it/sec) -training >> step=9871700, episode=1646 reward=0.7834086 (436.04 it/sec) -training >> step=9871800, episode=1646 reward=0.785863 (429.69 it/sec) -training >> step=9871900, episode=1646 reward=0.787219 (419.49 it/sec) -training >> step=9872000, episode=1646 reward=0.7750096 (388.78 it/sec) -training >> step=9872100, episode=1646 reward=0.7901837 (491.05 it/sec) -training >> step=9872200, episode=1646 reward=0.8031684 (441.84 it/sec) -training >> step=9872300, episode=1646 reward=0.7967039 (477.54 it/sec) -training >> step=9872400, episode=1646 reward=0.7710014 (431.02 it/sec) -training >> step=9872500, episode=1646 reward=0.7952825 (484.79 it/sec) -training >> step=9872600, episode=1646 reward=0.7896047 (472.81 it/sec) -training >> step=9872700, episode=1646 reward=0.8155608 (476.00 it/sec) -training >> step=9872800, episode=1646 reward=0.7697056 (476.54 it/sec) -training >> step=9872900, episode=1646 reward=0.7853528 (415.50 it/sec) -training >> step=9873000, episode=1646 reward=0.7919799 (472.60 it/sec) -training >> step=9873100, episode=1646 reward=0.7887093 (451.27 it/sec) -training >> step=9873200, episode=1646 reward=0.8156827 (438.57 it/sec) -training >> step=9873300, episode=1646 reward=0.8039234 (418.43 it/sec) -training >> step=9873400, episode=1646 reward=0.7836362 (441.34 it/sec) -training >> step=9873500, episode=1646 reward=0.7921504 (423.70 it/sec) -training >> step=9873600, episode=1646 reward=0.799575 (453.62 it/sec) -training >> step=9873700, episode=1646 reward=0.7881191 (393.91 it/sec) -training >> step=9873800, episode=1646 reward=0.7674263 (420.22 it/sec) -training >> step=9873900, episode=1646 reward=0.786872 (386.84 it/sec) -training >> step=9874000, episode=1646 reward=0.7862509 (399.84 it/sec) -training >> step=9874100, episode=1646 reward=0.7644577 (421.22 it/sec) -training >> step=9874200, episode=1646 reward=0.7897235 (458.38 it/sec) -training >> step=9874300, episode=1646 reward=0.793397 (450.12 it/sec) -training >> step=9874400, episode=1646 reward=0.7552009 (444.74 it/sec) -training >> step=9874500, episode=1646 reward=0.7911288 (475.38 it/sec) -training >> step=9874600, episode=1646 reward=0.7850285 (432.03 it/sec) -training >> step=9874700, episode=1646 reward=0.7764376 (425.24 it/sec) -training >> step=9874800, episode=1646 reward=0.8101485 (385.41 it/sec) -training >> step=9874900, episode=1646 reward=0.7891396 (448.76 it/sec) -training >> step=9875000, episode=1646 reward=0.7747585 (404.86 it/sec) -training >> step=9875100, episode=1646 reward=0.8044881 (427.37 it/sec) -training >> step=9875200, episode=1646 reward=0.7945923 (449.98 it/sec) -training >> step=9875300, episode=1647 reward=0.7982184 (92.27 it/sec) -training >> step=9875400, episode=1647 reward=0.7698278 (439.31 it/sec) -training >> step=9875500, episode=1647 reward=0.7866266 (492.16 it/sec) -training >> step=9875600, episode=1647 reward=0.7738974 (494.86 it/sec) -training >> step=9875700, episode=1647 reward=0.7808605 (502.74 it/sec) -training >> step=9875800, episode=1647 reward=0.7907622 (491.86 it/sec) -training >> step=9875900, episode=1647 reward=0.8025373 (467.17 it/sec) -training >> step=9876000, episode=1647 reward=0.7909384 (543.26 it/sec) -training >> step=9876100, episode=1647 reward=0.7777141 (501.71 it/sec) -training >> step=9876200, episode=1647 reward=0.7774929 (498.10 it/sec) -training >> step=9876300, episode=1647 reward=0.8022502 (493.76 it/sec) -training >> step=9876400, episode=1647 reward=0.7830524 (501.68 it/sec) -training >> step=9876500, episode=1647 reward=0.8026271 (520.95 it/sec) -training >> step=9876600, episode=1647 reward=0.7944742 (475.99 it/sec) -training >> step=9876700, episode=1647 reward=0.8084427 (525.02 it/sec) -training >> step=9876800, episode=1647 reward=0.7990959 (507.89 it/sec) -training >> step=9876900, episode=1647 reward=0.7949404 (504.46 it/sec) -training >> step=9877000, episode=1647 reward=0.7702954 (512.37 it/sec) -training >> step=9877100, episode=1647 reward=0.7897366 (515.90 it/sec) -training >> step=9877200, episode=1647 reward=0.807813 (465.29 it/sec) -training >> step=9877300, episode=1647 reward=0.7918782 (518.21 it/sec) -training >> step=9877400, episode=1647 reward=0.7859269 (481.68 it/sec) -training >> step=9877500, episode=1647 reward=0.8082217 (486.50 it/sec) -training >> step=9877600, episode=1647 reward=0.7993432 (462.96 it/sec) -training >> step=9877700, episode=1647 reward=0.7888117 (459.98 it/sec) -training >> step=9877800, episode=1647 reward=0.8176265 (539.03 it/sec) -training >> step=9877900, episode=1647 reward=0.7935549 (494.23 it/sec) -training >> step=9878000, episode=1647 reward=0.8042753 (517.63 it/sec) -training >> step=9878100, episode=1647 reward=0.7833633 (468.98 it/sec) -training >> step=9878200, episode=1647 reward=0.7966839 (463.85 it/sec) -training >> step=9878300, episode=1647 reward=0.7882873 (469.74 it/sec) -training >> step=9878400, episode=1647 reward=0.770665 (501.75 it/sec) -training >> step=9878500, episode=1647 reward=0.793756 (501.38 it/sec) -training >> step=9878600, episode=1647 reward=0.7998077 (471.72 it/sec) -training >> step=9878700, episode=1647 reward=0.7832486 (446.29 it/sec) -training >> step=9878800, episode=1647 reward=0.7834087 (481.19 it/sec) -training >> step=9878900, episode=1647 reward=0.7917739 (521.70 it/sec) -training >> step=9879000, episode=1647 reward=0.7795587 (492.58 it/sec) -training >> step=9879100, episode=1647 reward=0.8023084 (459.26 it/sec) -training >> step=9879200, episode=1647 reward=0.8104675 (489.48 it/sec) -training >> step=9879300, episode=1647 reward=0.7923349 (519.20 it/sec) -training >> step=9879400, episode=1647 reward=0.7869183 (474.19 it/sec) -training >> step=9879500, episode=1647 reward=0.7910543 (496.40 it/sec) -training >> step=9879600, episode=1647 reward=0.8130356 (510.91 it/sec) -training >> step=9879700, episode=1647 reward=0.8078613 (497.99 it/sec) -training >> step=9879800, episode=1647 reward=0.7832109 (520.98 it/sec) -training >> step=9879900, episode=1647 reward=0.8009076 (476.79 it/sec) -training >> step=9880000, episode=1647 reward=0.7724413 (515.36 it/sec) -training >> step=9880100, episode=1647 reward=0.7942767 (488.41 it/sec) -training >> step=9880200, episode=1647 reward=0.7980216 (519.46 it/sec) -training >> step=9880300, episode=1647 reward=0.8032793 (527.92 it/sec) -training >> step=9880400, episode=1647 reward=0.7965009 (523.92 it/sec) -training >> step=9880500, episode=1647 reward=0.7715426 (509.64 it/sec) -training >> step=9880600, episode=1647 reward=0.7966599 (508.06 it/sec) -training >> step=9880700, episode=1647 reward=0.784344 (510.45 it/sec) -training >> step=9880800, episode=1647 reward=0.7935477 (499.68 it/sec) -training >> step=9880900, episode=1647 reward=0.8061464 (511.98 it/sec) -training >> step=9881000, episode=1647 reward=0.7746153 (533.81 it/sec) -training >> step=9881100, episode=1647 reward=0.7967879 (552.67 it/sec) -training >> step=9881200, episode=1647 reward=0.7867544 (476.59 it/sec) -training >> step=9881300, episode=1648 reward=0.8103979 (116.91 it/sec) -training >> step=9881400, episode=1648 reward=0.7753854 (502.08 it/sec) -training >> step=9881500, episode=1648 reward=0.7948511 (365.19 it/sec) -training >> step=9881600, episode=1648 reward=0.7788166 (497.09 it/sec) -training >> step=9881700, episode=1648 reward=0.7801345 (528.11 it/sec) -training >> step=9881800, episode=1648 reward=0.8213634 (535.79 it/sec) -training >> step=9881900, episode=1648 reward=0.813944 (508.27 it/sec) -training >> step=9882000, episode=1648 reward=0.7984903 (512.17 it/sec) -training >> step=9882100, episode=1648 reward=0.7898507 (473.02 it/sec) -training >> step=9882200, episode=1648 reward=0.8140988 (475.85 it/sec) -training >> step=9882300, episode=1648 reward=0.7831914 (444.55 it/sec) -training >> step=9882400, episode=1648 reward=0.783516 (529.36 it/sec) -training >> step=9882500, episode=1648 reward=0.7992467 (470.07 it/sec) -training >> step=9882600, episode=1648 reward=0.784851 (410.88 it/sec) -training >> step=9882700, episode=1648 reward=0.7919031 (485.23 it/sec) -training >> step=9882800, episode=1648 reward=0.798776 (457.53 it/sec) -training >> step=9882900, episode=1648 reward=0.8081534 (456.27 it/sec) -training >> step=9883000, episode=1648 reward=0.7942098 (479.29 it/sec) -training >> step=9883100, episode=1648 reward=0.7825359 (418.32 it/sec) -training >> step=9883200, episode=1648 reward=0.784092 (482.24 it/sec) -training >> step=9883300, episode=1648 reward=0.8065934 (507.00 it/sec) -training >> step=9883400, episode=1648 reward=0.7978781 (486.77 it/sec) -training >> step=9883500, episode=1648 reward=0.7833972 (514.64 it/sec) -training >> step=9883600, episode=1648 reward=0.8015981 (471.09 it/sec) -training >> step=9883700, episode=1648 reward=0.7840567 (488.62 it/sec) -training >> step=9883800, episode=1648 reward=0.7827775 (521.61 it/sec) -training >> step=9883900, episode=1648 reward=0.7933691 (503.15 it/sec) -training >> step=9884000, episode=1648 reward=0.7717147 (476.42 it/sec) -training >> step=9884100, episode=1648 reward=0.7897764 (433.14 it/sec) -training >> step=9884200, episode=1648 reward=0.7822868 (491.26 it/sec) -training >> step=9884300, episode=1648 reward=0.7863053 (450.53 it/sec) -training >> step=9884400, episode=1648 reward=0.7691504 (452.99 it/sec) -training >> step=9884500, episode=1648 reward=0.8166516 (503.31 it/sec) -training >> step=9884600, episode=1648 reward=0.7824467 (451.59 it/sec) -training >> step=9884700, episode=1648 reward=0.8059731 (479.71 it/sec) -training >> step=9884800, episode=1648 reward=0.8092381 (431.94 it/sec) -training >> step=9884900, episode=1648 reward=0.7880058 (488.24 it/sec) -training >> step=9885000, episode=1648 reward=0.7844511 (455.41 it/sec) -training >> step=9885100, episode=1648 reward=0.8136287 (444.09 it/sec) -training >> step=9885200, episode=1648 reward=0.7779478 (478.66 it/sec) -training >> step=9885300, episode=1648 reward=0.7981495 (502.16 it/sec) -training >> step=9885400, episode=1648 reward=0.7908091 (477.01 it/sec) -training >> step=9885500, episode=1648 reward=0.7883881 (503.33 it/sec) -training >> step=9885600, episode=1648 reward=0.782842 (508.68 it/sec) -training >> step=9885700, episode=1648 reward=0.806442 (470.06 it/sec) -training >> step=9885800, episode=1648 reward=0.7790959 (487.48 it/sec) -training >> step=9885900, episode=1648 reward=0.8127389 (497.56 it/sec) -training >> step=9886000, episode=1648 reward=0.7818224 (544.84 it/sec) -training >> step=9886100, episode=1648 reward=0.7819068 (468.76 it/sec) -training >> step=9886200, episode=1648 reward=0.7913013 (465.14 it/sec) -training >> step=9886300, episode=1648 reward=0.7908327 (507.87 it/sec) -training >> step=9886400, episode=1648 reward=0.7819631 (503.15 it/sec) -training >> step=9886500, episode=1648 reward=0.7834013 (505.94 it/sec) -training >> step=9886600, episode=1648 reward=0.7794809 (520.41 it/sec) -training >> step=9886700, episode=1648 reward=0.7656052 (489.35 it/sec) -training >> step=9886800, episode=1648 reward=0.7890951 (486.17 it/sec) -training >> step=9886900, episode=1648 reward=0.781523 (470.96 it/sec) -training >> step=9887000, episode=1648 reward=0.802376 (429.34 it/sec) -training >> step=9887100, episode=1648 reward=0.7646029 (454.00 it/sec) -training >> step=9887200, episode=1648 reward=0.7761062 (450.42 it/sec) -training >> step=9887300, episode=1649 reward=0.7939641 (93.55 it/sec) -training >> step=9887400, episode=1649 reward=0.7862126 (463.77 it/sec) -training >> step=9887500, episode=1649 reward=0.7826036 (416.37 it/sec) -training >> step=9887600, episode=1649 reward=0.7799296 (383.46 it/sec) -training >> step=9887700, episode=1649 reward=0.7876292 (442.64 it/sec) -training >> step=9887800, episode=1649 reward=0.7753866 (449.50 it/sec) -training >> step=9887900, episode=1649 reward=0.774379 (373.33 it/sec) -training >> step=9888000, episode=1649 reward=0.7867653 (479.32 it/sec) -training >> step=9888100, episode=1649 reward=0.8101444 (490.91 it/sec) -training >> step=9888200, episode=1649 reward=0.7970053 (485.36 it/sec) -training >> step=9888300, episode=1649 reward=0.7936831 (494.92 it/sec) -training >> step=9888400, episode=1649 reward=0.7878801 (478.70 it/sec) -training >> step=9888500, episode=1649 reward=0.7981516 (496.02 it/sec) -training >> step=9888600, episode=1649 reward=0.7924443 (458.16 it/sec) -training >> step=9888700, episode=1649 reward=0.8024217 (456.48 it/sec) -training >> step=9888800, episode=1649 reward=0.7909519 (488.57 it/sec) -training >> step=9888900, episode=1649 reward=0.8071808 (498.97 it/sec) -training >> step=9889000, episode=1649 reward=0.7730228 (526.81 it/sec) -training >> step=9889100, episode=1649 reward=0.803447 (496.25 it/sec) -training >> step=9889200, episode=1649 reward=0.7843529 (438.50 it/sec) -training >> step=9889300, episode=1649 reward=0.7938101 (481.36 it/sec) -training >> step=9889400, episode=1649 reward=0.8024138 (424.47 it/sec) -training >> step=9889500, episode=1649 reward=0.7904998 (425.06 it/sec) -training >> step=9889600, episode=1649 reward=0.8034545 (403.43 it/sec) -training >> step=9889700, episode=1649 reward=0.7794616 (458.07 it/sec) -training >> step=9889800, episode=1649 reward=0.784107 (443.41 it/sec) -training >> step=9889900, episode=1649 reward=0.7896497 (483.55 it/sec) -training >> step=9890000, episode=1649 reward=0.8166627 (457.10 it/sec) -training >> step=9890100, episode=1649 reward=0.8005266 (455.40 it/sec) -training >> step=9890200, episode=1649 reward=0.8156834 (462.15 it/sec) -training >> step=9890300, episode=1649 reward=0.79923 (448.38 it/sec) -training >> step=9890400, episode=1649 reward=0.7884011 (426.53 it/sec) -training >> step=9890500, episode=1649 reward=0.794741 (473.43 it/sec) -training >> step=9890600, episode=1649 reward=0.7770441 (477.06 it/sec) -training >> step=9890700, episode=1649 reward=0.8029983 (423.55 it/sec) -training >> step=9890800, episode=1649 reward=0.8160861 (472.59 it/sec) -training >> step=9890900, episode=1649 reward=0.8019171 (511.87 it/sec) -training >> step=9891000, episode=1649 reward=0.7839119 (505.34 it/sec) -training >> step=9891100, episode=1649 reward=0.7919503 (488.00 it/sec) -training >> step=9891200, episode=1649 reward=0.7722202 (443.57 it/sec) -training >> step=9891300, episode=1649 reward=0.792663 (499.66 it/sec) -training >> step=9891400, episode=1649 reward=0.7959622 (465.42 it/sec) -training >> step=9891500, episode=1649 reward=0.7691931 (477.37 it/sec) -training >> step=9891600, episode=1649 reward=0.8008943 (473.47 it/sec) -training >> step=9891700, episode=1649 reward=0.7843063 (515.92 it/sec) -training >> step=9891800, episode=1649 reward=0.7774594 (492.73 it/sec) -training >> step=9891900, episode=1649 reward=0.784041 (462.54 it/sec) -training >> step=9892000, episode=1649 reward=0.7842004 (498.45 it/sec) -training >> step=9892100, episode=1649 reward=0.7967699 (492.59 it/sec) -training >> step=9892200, episode=1649 reward=0.7780082 (489.14 it/sec) -training >> step=9892300, episode=1649 reward=0.7877482 (492.34 it/sec) -training >> step=9892400, episode=1649 reward=0.7941296 (504.13 it/sec) -training >> step=9892500, episode=1649 reward=0.8024531 (478.24 it/sec) -training >> step=9892600, episode=1649 reward=0.803097 (468.99 it/sec) -training >> step=9892700, episode=1649 reward=0.7798789 (483.77 it/sec) -training >> step=9892800, episode=1649 reward=0.7823118 (484.83 it/sec) -training >> step=9892900, episode=1649 reward=0.7892454 (451.81 it/sec) -training >> step=9893000, episode=1649 reward=0.7899384 (500.06 it/sec) -training >> step=9893100, episode=1649 reward=0.7704712 (514.79 it/sec) -training >> step=9893200, episode=1649 reward=0.7946075 (474.68 it/sec) -training >> step=9893300, episode=1650 reward=0.759558 (113.55 it/sec) -training >> step=9893400, episode=1650 reward=0.7854884 (430.82 it/sec) -training >> step=9893500, episode=1650 reward=0.7783234 (437.59 it/sec) -training >> step=9893600, episode=1650 reward=0.7841787 (407.20 it/sec) -training >> step=9893700, episode=1650 reward=0.7952355 (486.09 it/sec) -training >> step=9893800, episode=1650 reward=0.7916828 (453.39 it/sec) -training >> step=9893900, episode=1650 reward=0.7746424 (420.27 it/sec) -training >> step=9894000, episode=1650 reward=0.8042735 (414.50 it/sec) -training >> step=9894100, episode=1650 reward=0.8059195 (442.29 it/sec) -training >> step=9894200, episode=1650 reward=0.7835298 (465.11 it/sec) -training >> step=9894300, episode=1650 reward=0.7730254 (484.82 it/sec) -training >> step=9894400, episode=1650 reward=0.7990144 (508.34 it/sec) -training >> step=9894500, episode=1650 reward=0.7986384 (454.63 it/sec) -training >> step=9894600, episode=1650 reward=0.7871891 (470.14 it/sec) -training >> step=9894700, episode=1650 reward=0.7998413 (511.86 it/sec) -training >> step=9894800, episode=1650 reward=0.7979847 (468.94 it/sec) -training >> step=9894900, episode=1650 reward=0.8082578 (507.53 it/sec) -training >> step=9895000, episode=1650 reward=0.7750483 (491.43 it/sec) -training >> step=9895100, episode=1650 reward=0.8063811 (474.08 it/sec) -training >> step=9895200, episode=1650 reward=0.7774293 (501.02 it/sec) -training >> step=9895300, episode=1650 reward=0.7956318 (496.90 it/sec) -training >> step=9895400, episode=1650 reward=0.7996742 (504.06 it/sec) -training >> step=9895500, episode=1650 reward=0.7828131 (485.37 it/sec) -training >> step=9895600, episode=1650 reward=0.796369 (478.54 it/sec) -training >> step=9895700, episode=1650 reward=0.795446 (430.14 it/sec) -training >> step=9895800, episode=1650 reward=0.7983195 (495.93 it/sec) -training >> step=9895900, episode=1650 reward=0.8022171 (511.39 it/sec) -training >> step=9896000, episode=1650 reward=0.7805905 (487.69 it/sec) -training >> step=9896100, episode=1650 reward=0.7945108 (489.54 it/sec) -training >> step=9896200, episode=1650 reward=0.7817343 (520.76 it/sec) -training >> step=9896300, episode=1650 reward=0.7960963 (494.42 it/sec) -training >> step=9896400, episode=1650 reward=0.7847443 (470.56 it/sec) -training >> step=9896500, episode=1650 reward=0.8062481 (473.00 it/sec) -training >> step=9896600, episode=1650 reward=0.7942258 (526.66 it/sec) -training >> step=9896700, episode=1650 reward=0.7992082 (487.91 it/sec) -training >> step=9896800, episode=1650 reward=0.7978784 (507.92 it/sec) -training >> step=9896900, episode=1650 reward=0.7938277 (500.14 it/sec) -training >> step=9897000, episode=1650 reward=0.7923247 (515.97 it/sec) -training >> step=9897100, episode=1650 reward=0.7889226 (501.20 it/sec) -training >> step=9897200, episode=1650 reward=0.7939494 (507.68 it/sec) -training >> step=9897300, episode=1650 reward=0.8082435 (515.33 it/sec) -training >> step=9897400, episode=1650 reward=0.7926245 (496.20 it/sec) -training >> step=9897500, episode=1650 reward=0.7819164 (510.32 it/sec) -training >> step=9897600, episode=1650 reward=0.8021058 (513.65 it/sec) -training >> step=9897700, episode=1650 reward=0.7913122 (510.62 it/sec) -training >> step=9897800, episode=1650 reward=0.793437 (503.06 it/sec) -training >> step=9897900, episode=1650 reward=0.8019678 (474.16 it/sec) -training >> step=9898000, episode=1650 reward=0.7889692 (547.31 it/sec) -training >> step=9898100, episode=1650 reward=0.7990047 (499.04 it/sec) -training >> step=9898200, episode=1650 reward=0.8090411 (455.28 it/sec) -training >> step=9898300, episode=1650 reward=0.7771875 (476.66 it/sec) -training >> step=9898400, episode=1650 reward=0.7792106 (503.19 it/sec) -training >> step=9898500, episode=1650 reward=0.7821212 (478.00 it/sec) -training >> step=9898600, episode=1650 reward=0.7859419 (509.78 it/sec) -training >> step=9898700, episode=1650 reward=0.7867458 (509.17 it/sec) -training >> step=9898800, episode=1650 reward=0.7727054 (520.04 it/sec) -training >> step=9898900, episode=1650 reward=0.7925186 (507.45 it/sec) -training >> step=9899000, episode=1650 reward=0.7903347 (491.19 it/sec) -training >> step=9899100, episode=1650 reward=0.7895569 (499.58 it/sec) -training >> step=9899200, episode=1650 reward=0.793353 (480.74 it/sec) -training >> step=9899300, episode=1651 reward=0.7950479 (94.81 it/sec) -training >> step=9899400, episode=1651 reward=0.7694563 (513.09 it/sec) -training >> step=9899500, episode=1651 reward=0.7816944 (492.95 it/sec) -training >> step=9899600, episode=1651 reward=0.778214 (507.50 it/sec) -training >> step=9899700, episode=1651 reward=0.8058685 (490.74 it/sec) -training >> step=9899800, episode=1651 reward=0.7951749 (521.26 it/sec) -training >> step=9899900, episode=1651 reward=0.787785 (466.84 it/sec) -training >> step=9900000, episode=1651 reward=0.7858497 (478.35 it/sec) -training >> step=9900100, episode=1651 reward=0.8010714 (537.51 it/sec) -training >> step=9900200, episode=1651 reward=0.7962877 (471.45 it/sec) -training >> step=9900300, episode=1651 reward=0.7949165 (412.48 it/sec) -training >> step=9900400, episode=1651 reward=0.796848 (529.19 it/sec) -training >> step=9900500, episode=1651 reward=0.7834337 (516.45 it/sec) -training >> step=9900600, episode=1651 reward=0.8003086 (513.80 it/sec) -training >> step=9900700, episode=1651 reward=0.8072537 (492.16 it/sec) -training >> step=9900800, episode=1651 reward=0.796479 (504.91 it/sec) -training >> step=9900900, episode=1651 reward=0.8107086 (494.98 it/sec) -training >> step=9901000, episode=1651 reward=0.7907085 (495.69 it/sec) -training >> step=9901100, episode=1651 reward=0.7914303 (497.62 it/sec) -training >> step=9901200, episode=1651 reward=0.8105099 (491.29 it/sec) -training >> step=9901300, episode=1651 reward=0.7892745 (495.96 it/sec) -training >> step=9901400, episode=1651 reward=0.7932964 (511.21 it/sec) -training >> step=9901500, episode=1651 reward=0.8016689 (474.92 it/sec) -training >> step=9901600, episode=1651 reward=0.7832332 (509.72 it/sec) -training >> step=9901700, episode=1651 reward=0.7908467 (471.22 it/sec) -training >> step=9901800, episode=1651 reward=0.8015045 (470.36 it/sec) -training >> step=9901900, episode=1651 reward=0.7719084 (502.27 it/sec) -training >> step=9902000, episode=1651 reward=0.7955673 (492.73 it/sec) -training >> step=9902100, episode=1651 reward=0.7919806 (476.29 it/sec) -training >> step=9902200, episode=1651 reward=0.8018928 (509.91 it/sec) -training >> step=9902300, episode=1651 reward=0.7842348 (473.12 it/sec) -training >> step=9902400, episode=1651 reward=0.804876 (478.13 it/sec) -training >> step=9902500, episode=1651 reward=0.8009433 (472.11 it/sec) -training >> step=9902600, episode=1651 reward=0.7911762 (529.85 it/sec) -training >> step=9902700, episode=1651 reward=0.7928895 (491.38 it/sec) -training >> step=9902800, episode=1651 reward=0.7905955 (483.41 it/sec) -training >> step=9902900, episode=1651 reward=0.8054129 (491.54 it/sec) -training >> step=9903000, episode=1651 reward=0.8017559 (550.70 it/sec) -training >> step=9903100, episode=1651 reward=0.7862519 (484.92 it/sec) -training >> step=9903200, episode=1651 reward=0.8015809 (498.81 it/sec) -training >> step=9903300, episode=1651 reward=0.7947876 (506.38 it/sec) -training >> step=9903400, episode=1651 reward=0.8045372 (491.07 it/sec) -training >> step=9903500, episode=1651 reward=0.7845125 (505.11 it/sec) -training >> step=9903600, episode=1651 reward=0.7986633 (487.58 it/sec) -training >> step=9903700, episode=1651 reward=0.8080616 (512.81 it/sec) -training >> step=9903800, episode=1651 reward=0.7608266 (467.81 it/sec) -training >> step=9903900, episode=1651 reward=0.7817009 (498.51 it/sec) -training >> step=9904000, episode=1651 reward=0.8116264 (511.69 it/sec) -training >> step=9904100, episode=1651 reward=0.7880678 (510.77 it/sec) -training >> step=9904200, episode=1651 reward=0.7716982 (522.20 it/sec) -training >> step=9904300, episode=1651 reward=0.8037822 (476.19 it/sec) -training >> step=9904400, episode=1651 reward=0.7764927 (527.62 it/sec) -training >> step=9904500, episode=1651 reward=0.8023603 (480.24 it/sec) -training >> step=9904600, episode=1651 reward=0.7910469 (456.87 it/sec) -training >> step=9904700, episode=1651 reward=0.786038 (488.39 it/sec) -training >> step=9904800, episode=1651 reward=0.7984797 (530.71 it/sec) -training >> step=9904900, episode=1651 reward=0.7887356 (482.61 it/sec) -training >> step=9905000, episode=1651 reward=0.7901735 (500.40 it/sec) -training >> step=9905100, episode=1651 reward=0.7975367 (496.74 it/sec) -training >> step=9905200, episode=1651 reward=0.7897187 (490.89 it/sec) -training >> step=9905300, episode=1652 reward=0.7848489 (92.17 it/sec) -training >> step=9905400, episode=1652 reward=0.7972539 (449.92 it/sec) -training >> step=9905500, episode=1652 reward=0.7754324 (454.85 it/sec) -training >> step=9905600, episode=1652 reward=0.771822 (441.33 it/sec) -training >> step=9905700, episode=1652 reward=0.7644371 (466.51 it/sec) -training >> step=9905800, episode=1652 reward=0.7794664 (483.06 it/sec) -training >> step=9905900, episode=1652 reward=0.7889194 (511.26 it/sec) -training >> step=9906000, episode=1652 reward=0.7965927 (468.67 it/sec) -training >> step=9906100, episode=1652 reward=0.7891451 (516.27 it/sec) -training >> step=9906200, episode=1652 reward=0.806349 (455.25 it/sec) -training >> step=9906300, episode=1652 reward=0.7979501 (506.51 it/sec) -training >> step=9906400, episode=1652 reward=0.7873318 (476.11 it/sec) -training >> step=9906500, episode=1652 reward=0.8064694 (497.78 it/sec) -training >> step=9906600, episode=1652 reward=0.77588 (529.49 it/sec) -training >> step=9906700, episode=1652 reward=0.7804053 (473.01 it/sec) -training >> step=9906800, episode=1652 reward=0.8093514 (517.74 it/sec) -training >> step=9906900, episode=1652 reward=0.7818526 (496.50 it/sec) -training >> step=9907000, episode=1652 reward=0.7956612 (512.63 it/sec) -training >> step=9907100, episode=1652 reward=0.804145 (491.15 it/sec) -training >> step=9907200, episode=1652 reward=0.8158464 (519.47 it/sec) -training >> step=9907300, episode=1652 reward=0.7937775 (492.12 it/sec) -training >> step=9907400, episode=1652 reward=0.7934556 (486.21 it/sec) -training >> step=9907500, episode=1652 reward=0.7955341 (530.52 it/sec) -training >> step=9907600, episode=1652 reward=0.793303 (509.53 it/sec) -training >> step=9907700, episode=1652 reward=0.8012002 (460.19 it/sec) -training >> step=9907800, episode=1652 reward=0.7822003 (512.33 it/sec) -training >> step=9907900, episode=1652 reward=0.804742 (530.97 it/sec) -training >> step=9908000, episode=1652 reward=0.7745618 (503.05 it/sec) -training >> step=9908100, episode=1652 reward=0.7938446 (517.19 it/sec) -training >> step=9908200, episode=1652 reward=0.779978 (477.87 it/sec) -training >> step=9908300, episode=1652 reward=0.7556041 (495.71 it/sec) -training >> step=9908400, episode=1652 reward=0.8011563 (465.85 it/sec) -training >> step=9908500, episode=1652 reward=0.7715011 (474.75 it/sec) -training >> step=9908600, episode=1652 reward=0.7927938 (497.37 it/sec) -training >> step=9908700, episode=1652 reward=0.8000653 (493.01 it/sec) -training >> step=9908800, episode=1652 reward=0.7903413 (491.03 it/sec) -training >> step=9908900, episode=1652 reward=0.7931256 (473.92 it/sec) -training >> step=9909000, episode=1652 reward=0.7929364 (505.61 it/sec) -training >> step=9909100, episode=1652 reward=0.7833807 (506.09 it/sec) -training >> step=9909200, episode=1652 reward=0.7701272 (485.98 it/sec) -training >> step=9909300, episode=1652 reward=0.7773241 (505.24 it/sec) -training >> step=9909400, episode=1652 reward=0.7938963 (531.62 it/sec) -training >> step=9909500, episode=1652 reward=0.8007663 (498.77 it/sec) -training >> step=9909600, episode=1652 reward=0.7819098 (502.27 it/sec) -training >> step=9909700, episode=1652 reward=0.7881092 (501.00 it/sec) -training >> step=9909800, episode=1652 reward=0.7740147 (416.22 it/sec) -training >> step=9909900, episode=1652 reward=0.8063243 (516.32 it/sec) -training >> step=9910000, episode=1652 reward=0.7801811 (489.10 it/sec) -training >> step=9910100, episode=1652 reward=0.7870818 (491.73 it/sec) -training >> step=9910200, episode=1652 reward=0.7909589 (501.77 it/sec) -training >> step=9910300, episode=1652 reward=0.7990742 (445.51 it/sec) -training >> step=9910400, episode=1652 reward=0.79857 (543.63 it/sec) -training >> step=9910500, episode=1652 reward=0.8006077 (503.31 it/sec) -training >> step=9910600, episode=1652 reward=0.7848452 (515.41 it/sec) -training >> step=9910700, episode=1652 reward=0.7633585 (487.29 it/sec) -training >> step=9910800, episode=1652 reward=0.7927595 (529.55 it/sec) -training >> step=9910900, episode=1652 reward=0.7933745 (492.47 it/sec) -training >> step=9911000, episode=1652 reward=0.7883918 (507.88 it/sec) -training >> step=9911100, episode=1652 reward=0.7891434 (484.42 it/sec) -training >> step=9911200, episode=1652 reward=0.7875319 (553.59 it/sec) -training >> step=9911300, episode=1653 reward=0.7838061 (106.41 it/sec) -training >> step=9911400, episode=1653 reward=0.7841119 (508.25 it/sec) -training >> step=9911500, episode=1653 reward=0.7862708 (511.24 it/sec) -training >> step=9911600, episode=1653 reward=0.7794262 (477.76 it/sec) -training >> step=9911700, episode=1653 reward=0.8008124 (512.59 it/sec) -training >> step=9911800, episode=1653 reward=0.7863333 (481.40 it/sec) -training >> step=9911900, episode=1653 reward=0.7974359 (497.14 it/sec) -training >> step=9912000, episode=1653 reward=0.8163394 (503.90 it/sec) -training >> step=9912100, episode=1653 reward=0.7726018 (467.68 it/sec) -training >> step=9912200, episode=1653 reward=0.7917689 (542.32 it/sec) -training >> step=9912300, episode=1653 reward=0.7870311 (485.03 it/sec) -training >> step=9912400, episode=1653 reward=0.7874643 (506.56 it/sec) -training >> step=9912500, episode=1653 reward=0.8044862 (459.19 it/sec) -training >> step=9912600, episode=1653 reward=0.8098691 (504.35 it/sec) -training >> step=9912700, episode=1653 reward=0.7984475 (464.10 it/sec) -training >> step=9912800, episode=1653 reward=0.7865573 (487.02 it/sec) -training >> step=9912900, episode=1653 reward=0.7988603 (496.04 it/sec) -training >> step=9913000, episode=1653 reward=0.7984641 (527.16 it/sec) -training >> step=9913100, episode=1653 reward=0.8009 (448.79 it/sec) -training >> step=9913200, episode=1653 reward=0.8094082 (473.11 it/sec) -training >> step=9913300, episode=1653 reward=0.8024362 (518.79 it/sec) -training >> step=9913400, episode=1653 reward=0.7874823 (507.03 it/sec) -training >> step=9913500, episode=1653 reward=0.8038312 (493.81 it/sec) -training >> step=9913600, episode=1653 reward=0.787855 (500.82 it/sec) -training >> step=9913700, episode=1653 reward=0.795939 (525.33 it/sec) -training >> step=9913800, episode=1653 reward=0.7985418 (485.59 it/sec) -training >> step=9913900, episode=1653 reward=0.7834054 (487.43 it/sec) -training >> step=9914000, episode=1653 reward=0.8053103 (484.83 it/sec) -training >> step=9914100, episode=1653 reward=0.7807555 (533.52 it/sec) -training >> step=9914200, episode=1653 reward=0.8171306 (466.93 it/sec) -training >> step=9914300, episode=1653 reward=0.7829583 (469.26 it/sec) -training >> step=9914400, episode=1653 reward=0.7909538 (488.17 it/sec) -training >> step=9914500, episode=1653 reward=0.7776943 (464.83 it/sec) -training >> step=9914600, episode=1653 reward=0.7927732 (502.82 it/sec) -training >> step=9914700, episode=1653 reward=0.7784864 (506.61 it/sec) -training >> step=9914800, episode=1653 reward=0.7970597 (516.33 it/sec) -training >> step=9914900, episode=1653 reward=0.7877085 (519.35 it/sec) -training >> step=9915000, episode=1653 reward=0.7957309 (454.54 it/sec) -training >> step=9915100, episode=1653 reward=0.8041549 (528.94 it/sec) -training >> step=9915200, episode=1653 reward=0.8194999 (503.82 it/sec) -training >> step=9915300, episode=1653 reward=0.7983125 (470.11 it/sec) -training >> step=9915400, episode=1653 reward=0.7790534 (498.43 it/sec) -training >> step=9915500, episode=1653 reward=0.7972613 (517.39 it/sec) -training >> step=9915600, episode=1653 reward=0.7683287 (525.56 it/sec) -training >> step=9915700, episode=1653 reward=0.7883674 (473.80 it/sec) -training >> step=9915800, episode=1653 reward=0.7728596 (497.18 it/sec) -training >> step=9915900, episode=1653 reward=0.7888889 (514.18 it/sec) -training >> step=9916000, episode=1653 reward=0.8000018 (510.67 it/sec) -training >> step=9916100, episode=1653 reward=0.8176982 (524.43 it/sec) -training >> step=9916200, episode=1653 reward=0.7940897 (528.72 it/sec) -training >> step=9916300, episode=1653 reward=0.7847793 (516.05 it/sec) -training >> step=9916400, episode=1653 reward=0.7772535 (501.81 it/sec) -training >> step=9916500, episode=1653 reward=0.7830694 (512.93 it/sec) -training >> step=9916600, episode=1653 reward=0.7884203 (518.46 it/sec) -training >> step=9916700, episode=1653 reward=0.7965105 (514.18 it/sec) -training >> step=9916800, episode=1653 reward=0.785075 (513.44 it/sec) -training >> step=9916900, episode=1653 reward=0.7885795 (480.77 it/sec) -training >> step=9917000, episode=1653 reward=0.7996396 (470.20 it/sec) -training >> step=9917100, episode=1653 reward=0.8013923 (429.54 it/sec) -training >> step=9917200, episode=1653 reward=0.783199 (455.17 it/sec) -training >> step=9917300, episode=1654 reward=0.7985651 (92.60 it/sec) -training >> step=9917400, episode=1654 reward=0.7728816 (510.18 it/sec) -training >> step=9917500, episode=1654 reward=0.7795389 (485.64 it/sec) -training >> step=9917600, episode=1654 reward=0.7878215 (517.61 it/sec) -training >> step=9917700, episode=1654 reward=0.7747334 (505.16 it/sec) -training >> step=9917800, episode=1654 reward=0.8177066 (511.63 it/sec) -training >> step=9917900, episode=1654 reward=0.7777176 (492.88 it/sec) -training >> step=9918000, episode=1654 reward=0.7946654 (517.28 it/sec) -training >> step=9918100, episode=1654 reward=0.7979789 (495.91 it/sec) -training >> step=9918200, episode=1654 reward=0.8051973 (520.83 it/sec) -training >> step=9918300, episode=1654 reward=0.7927623 (528.36 it/sec) -training >> step=9918400, episode=1654 reward=0.7861567 (471.94 it/sec) -training >> step=9918500, episode=1654 reward=0.7999526 (498.79 it/sec) -training >> step=9918600, episode=1654 reward=0.8110222 (489.97 it/sec) -training >> step=9918700, episode=1654 reward=0.7791293 (468.96 it/sec) -training >> step=9918800, episode=1654 reward=0.7767358 (499.50 it/sec) -training >> step=9918900, episode=1654 reward=0.8041899 (499.47 it/sec) -training >> step=9919000, episode=1654 reward=0.7797133 (411.44 it/sec) -training >> step=9919100, episode=1654 reward=0.7901023 (504.12 it/sec) -training >> step=9919200, episode=1654 reward=0.775918 (489.02 it/sec) -training >> step=9919300, episode=1654 reward=0.7820523 (505.09 it/sec) -training >> step=9919400, episode=1654 reward=0.7851486 (533.58 it/sec) -training >> step=9919500, episode=1654 reward=0.8075767 (490.90 it/sec) -training >> step=9919600, episode=1654 reward=0.7868664 (494.26 it/sec) -training >> step=9919700, episode=1654 reward=0.7800782 (482.25 it/sec) -training >> step=9919800, episode=1654 reward=0.7854928 (536.64 it/sec) -training >> step=9919900, episode=1654 reward=0.7802083 (478.35 it/sec) -training >> step=9920000, episode=1654 reward=0.8000908 (510.88 it/sec) -training >> step=9920100, episode=1654 reward=0.7640144 (510.49 it/sec) -training >> step=9920200, episode=1654 reward=0.7856243 (492.43 it/sec) -training >> step=9920300, episode=1654 reward=0.8018036 (490.75 it/sec) -training >> step=9920400, episode=1654 reward=0.7920888 (481.85 it/sec) -training >> step=9920500, episode=1654 reward=0.78406 (536.33 it/sec) -training >> step=9920600, episode=1654 reward=0.8270291 (530.94 it/sec) -training >> step=9920700, episode=1654 reward=0.7859364 (494.51 it/sec) -training >> step=9920800, episode=1654 reward=0.7952381 (500.67 it/sec) -training >> step=9920900, episode=1654 reward=0.791115 (542.25 it/sec) -training >> step=9921000, episode=1654 reward=0.7996568 (521.64 it/sec) -training >> step=9921100, episode=1654 reward=0.7923708 (482.43 it/sec) -training >> step=9921200, episode=1654 reward=0.8005219 (492.53 it/sec) -training >> step=9921300, episode=1654 reward=0.7882481 (521.52 it/sec) -training >> step=9921400, episode=1654 reward=0.7888491 (500.87 it/sec) -training >> step=9921500, episode=1654 reward=0.7889431 (498.42 it/sec) -training >> step=9921600, episode=1654 reward=0.7842978 (484.39 it/sec) -training >> step=9921700, episode=1654 reward=0.7899599 (490.77 it/sec) -training >> step=9921800, episode=1654 reward=0.7973241 (519.95 it/sec) -training >> step=9921900, episode=1654 reward=0.7851523 (479.41 it/sec) -training >> step=9922000, episode=1654 reward=0.7875893 (508.50 it/sec) -training >> step=9922100, episode=1654 reward=0.7828794 (482.26 it/sec) -training >> step=9922200, episode=1654 reward=0.7689713 (500.88 it/sec) -training >> step=9922300, episode=1654 reward=0.7866654 (511.51 it/sec) -training >> step=9922400, episode=1654 reward=0.7830914 (494.63 it/sec) -training >> step=9922500, episode=1654 reward=0.7918915 (508.04 it/sec) -training >> step=9922600, episode=1654 reward=0.8004426 (482.83 it/sec) -training >> step=9922700, episode=1654 reward=0.7945619 (502.29 it/sec) -training >> step=9922800, episode=1654 reward=0.7891874 (493.19 it/sec) -training >> step=9922900, episode=1654 reward=0.806466 (497.87 it/sec) -training >> step=9923000, episode=1654 reward=0.780956 (493.86 it/sec) -training >> step=9923100, episode=1654 reward=0.8104268 (497.47 it/sec) -training >> step=9923200, episode=1654 reward=0.7774947 (484.77 it/sec) -training >> step=9923300, episode=1655 reward=0.7813957 (117.67 it/sec) -training >> step=9923400, episode=1655 reward=0.7730248 (550.99 it/sec) -training >> step=9923500, episode=1655 reward=0.7947581 (480.08 it/sec) -training >> step=9923600, episode=1655 reward=0.7843615 (459.93 it/sec) -training >> step=9923700, episode=1655 reward=0.7612784 (486.18 it/sec) -training >> step=9923800, episode=1655 reward=0.7911912 (520.99 it/sec) -training >> step=9923900, episode=1655 reward=0.782772 (507.35 it/sec) -training >> step=9924000, episode=1655 reward=0.801077 (467.24 it/sec) -training >> step=9924100, episode=1655 reward=0.7858841 (499.25 it/sec) -training >> step=9924200, episode=1655 reward=0.7621711 (509.04 it/sec) -training >> step=9924300, episode=1655 reward=0.7587885 (513.75 it/sec) -training >> step=9924400, episode=1655 reward=0.7658226 (524.10 it/sec) -training >> step=9924500, episode=1655 reward=0.8029892 (504.86 it/sec) -training >> step=9924600, episode=1655 reward=0.7634118 (517.38 it/sec) -training >> step=9924700, episode=1655 reward=0.7926108 (501.80 it/sec) -training >> step=9924800, episode=1655 reward=0.7898856 (504.01 it/sec) -training >> step=9924900, episode=1655 reward=0.7862238 (486.81 it/sec) -training >> step=9925000, episode=1655 reward=0.7951779 (463.29 it/sec) -training >> step=9925100, episode=1655 reward=0.8002648 (495.07 it/sec) -training >> step=9925200, episode=1655 reward=0.7822329 (498.57 it/sec) -training >> step=9925300, episode=1655 reward=0.7909914 (515.15 it/sec) -training >> step=9925400, episode=1655 reward=0.7788965 (504.06 it/sec) -training >> step=9925500, episode=1655 reward=0.7916224 (487.34 it/sec) -training >> step=9925600, episode=1655 reward=0.8030565 (510.79 it/sec) -training >> step=9925700, episode=1655 reward=0.7855587 (489.56 it/sec) -training >> step=9925800, episode=1655 reward=0.7734883 (501.95 it/sec) -training >> step=9925900, episode=1655 reward=0.7897028 (537.61 it/sec) -training >> step=9926000, episode=1655 reward=0.7885609 (483.60 it/sec) -training >> step=9926100, episode=1655 reward=0.793191 (518.65 it/sec) -training >> step=9926200, episode=1655 reward=0.7769333 (543.51 it/sec) -training >> step=9926300, episode=1655 reward=0.7949536 (481.82 it/sec) -training >> step=9926400, episode=1655 reward=0.7777559 (468.51 it/sec) -training >> step=9926500, episode=1655 reward=0.7986186 (488.41 it/sec) -training >> step=9926600, episode=1655 reward=0.8109661 (526.17 it/sec) -training >> step=9926700, episode=1655 reward=0.7922369 (481.90 it/sec) -training >> step=9926800, episode=1655 reward=0.7909107 (508.27 it/sec) -training >> step=9926900, episode=1655 reward=0.797752 (501.97 it/sec) -training >> step=9927000, episode=1655 reward=0.8181083 (533.35 it/sec) -training >> step=9927100, episode=1655 reward=0.8062561 (497.97 it/sec) -training >> step=9927200, episode=1655 reward=0.7982233 (488.35 it/sec) -training >> step=9927300, episode=1655 reward=0.8056381 (512.73 it/sec) -training >> step=9927400, episode=1655 reward=0.7735307 (491.79 it/sec) -training >> step=9927500, episode=1655 reward=0.8067878 (512.57 it/sec) -training >> step=9927600, episode=1655 reward=0.7884686 (487.69 it/sec) -training >> step=9927700, episode=1655 reward=0.7695718 (517.03 it/sec) -training >> step=9927800, episode=1655 reward=0.8055732 (522.62 it/sec) -training >> step=9927900, episode=1655 reward=0.7748636 (476.04 it/sec) -training >> step=9928000, episode=1655 reward=0.7944918 (507.00 it/sec) -training >> step=9928100, episode=1655 reward=0.7766742 (488.35 it/sec) -training >> step=9928200, episode=1655 reward=0.7917814 (486.52 it/sec) -training >> step=9928300, episode=1655 reward=0.8053284 (521.28 it/sec) -training >> step=9928400, episode=1655 reward=0.8028101 (536.40 it/sec) -training >> step=9928500, episode=1655 reward=0.8163606 (512.85 it/sec) -training >> step=9928600, episode=1655 reward=0.791487 (435.02 it/sec) -training >> step=9928700, episode=1655 reward=0.7729168 (466.05 it/sec) -training >> step=9928800, episode=1655 reward=0.7859181 (463.35 it/sec) -training >> step=9928900, episode=1655 reward=0.7827274 (491.12 it/sec) -training >> step=9929000, episode=1655 reward=0.7794563 (468.48 it/sec) -training >> step=9929100, episode=1655 reward=0.7724836 (469.07 it/sec) -training >> step=9929200, episode=1655 reward=0.7901409 (427.08 it/sec) -training >> step=9929300, episode=1656 reward=0.7907649 (89.24 it/sec) -training >> step=9929400, episode=1656 reward=0.7750816 (516.99 it/sec) -training >> step=9929500, episode=1656 reward=0.7846719 (471.07 it/sec) -training >> step=9929600, episode=1656 reward=0.8032827 (473.63 it/sec) -training >> step=9929700, episode=1656 reward=0.8084029 (508.83 it/sec) -training >> step=9929800, episode=1656 reward=0.8064552 (500.75 it/sec) -training >> step=9929900, episode=1656 reward=0.7994023 (484.19 it/sec) -training >> step=9930000, episode=1656 reward=0.7945853 (498.22 it/sec) -training >> step=9930100, episode=1656 reward=0.7856218 (479.45 it/sec) -training >> step=9930200, episode=1656 reward=0.7787741 (462.07 it/sec) -training >> step=9930300, episode=1656 reward=0.7950029 (461.63 it/sec) -training >> step=9930400, episode=1656 reward=0.7898347 (514.60 it/sec) -training >> step=9930500, episode=1656 reward=0.7978824 (504.43 it/sec) -training >> step=9930600, episode=1656 reward=0.7759277 (500.87 it/sec) -training >> step=9930700, episode=1656 reward=0.7960653 (518.72 it/sec) -training >> step=9930800, episode=1656 reward=0.7818067 (501.10 it/sec) -training >> step=9930900, episode=1656 reward=0.7825881 (522.64 it/sec) -training >> step=9931000, episode=1656 reward=0.7851552 (485.56 it/sec) -training >> step=9931100, episode=1656 reward=0.7785199 (521.18 it/sec) -training >> step=9931200, episode=1656 reward=0.814041 (488.45 it/sec) -training >> step=9931300, episode=1656 reward=0.7963088 (506.22 it/sec) -training >> step=9931400, episode=1656 reward=0.792005 (496.08 it/sec) -training >> step=9931500, episode=1656 reward=0.7896916 (525.81 it/sec) -training >> step=9931600, episode=1656 reward=0.7823519 (521.49 it/sec) -training >> step=9931700, episode=1656 reward=0.7837112 (460.97 it/sec) -training >> step=9931800, episode=1656 reward=0.8076403 (531.39 it/sec) -training >> step=9931900, episode=1656 reward=0.7972497 (495.68 it/sec) -training >> step=9932000, episode=1656 reward=0.7980205 (500.81 it/sec) -training >> step=9932100, episode=1656 reward=0.7810634 (531.97 it/sec) -training >> step=9932200, episode=1656 reward=0.7983437 (506.89 it/sec) -training >> step=9932300, episode=1656 reward=0.7956113 (491.22 it/sec) -training >> step=9932400, episode=1656 reward=0.8036859 (495.53 it/sec) -training >> step=9932500, episode=1656 reward=0.8036435 (524.73 it/sec) -training >> step=9932600, episode=1656 reward=0.7866058 (543.15 it/sec) -training >> step=9932700, episode=1656 reward=0.8017337 (490.65 it/sec) -training >> step=9932800, episode=1656 reward=0.8064544 (499.76 it/sec) -training >> step=9932900, episode=1656 reward=0.7920189 (499.29 it/sec) -training >> step=9933000, episode=1656 reward=0.7944663 (486.06 it/sec) -training >> step=9933100, episode=1656 reward=0.8042471 (420.39 it/sec) -training >> step=9933200, episode=1656 reward=0.7954891 (447.16 it/sec) -training >> step=9933300, episode=1656 reward=0.7947234 (513.96 it/sec) -training >> step=9933400, episode=1656 reward=0.8087506 (480.22 it/sec) -training >> step=9933500, episode=1656 reward=0.7883329 (545.90 it/sec) -training >> step=9933600, episode=1656 reward=0.8099883 (502.41 it/sec) -training >> step=9933700, episode=1656 reward=0.7966399 (500.89 it/sec) -training >> step=9933800, episode=1656 reward=0.8053113 (493.14 it/sec) -training >> step=9933900, episode=1656 reward=0.7726007 (566.81 it/sec) -training >> step=9934000, episode=1656 reward=0.7951141 (498.02 it/sec) -training >> step=9934100, episode=1656 reward=0.7920454 (487.61 it/sec) -training >> step=9934200, episode=1656 reward=0.7733722 (499.87 it/sec) -training >> step=9934300, episode=1656 reward=0.8076393 (495.02 it/sec) -training >> step=9934400, episode=1656 reward=0.7936182 (521.52 it/sec) -training >> step=9934500, episode=1656 reward=0.7758145 (516.41 it/sec) -training >> step=9934600, episode=1656 reward=0.7646571 (516.89 it/sec) -training >> step=9934700, episode=1656 reward=0.7758896 (495.52 it/sec) -training >> step=9934800, episode=1656 reward=0.7903939 (507.59 it/sec) -training >> step=9934900, episode=1656 reward=0.7951938 (514.54 it/sec) -training >> step=9935000, episode=1656 reward=0.7930667 (500.02 it/sec) -training >> step=9935100, episode=1656 reward=0.8011125 (496.21 it/sec) -training >> step=9935200, episode=1656 reward=0.7983847 (493.14 it/sec) -training >> step=9935300, episode=1657 reward=0.7927132 (109.52 it/sec) -training >> step=9935400, episode=1657 reward=0.7877133 (523.76 it/sec) -training >> step=9935500, episode=1657 reward=0.7781636 (355.65 it/sec) -training >> step=9935600, episode=1657 reward=0.7910653 (492.47 it/sec) -training >> step=9935700, episode=1657 reward=0.801842 (523.35 it/sec) -training >> step=9935800, episode=1657 reward=0.797213 (490.68 it/sec) -training >> step=9935900, episode=1657 reward=0.780222 (472.26 it/sec) -training >> step=9936000, episode=1657 reward=0.7754993 (515.53 it/sec) -training >> step=9936100, episode=1657 reward=0.7853104 (538.04 it/sec) -training >> step=9936200, episode=1657 reward=0.7816619 (483.83 it/sec) -training >> step=9936300, episode=1657 reward=0.7906424 (510.40 it/sec) -training >> step=9936400, episode=1657 reward=0.781709 (488.17 it/sec) -training >> step=9936500, episode=1657 reward=0.8106008 (466.91 it/sec) -training >> step=9936600, episode=1657 reward=0.8000941 (508.41 it/sec) -training >> step=9936700, episode=1657 reward=0.8076398 (505.07 it/sec) -training >> step=9936800, episode=1657 reward=0.7878756 (502.32 it/sec) -training >> step=9936900, episode=1657 reward=0.7759841 (476.61 it/sec) -training >> step=9937000, episode=1657 reward=0.7986781 (473.53 it/sec) -training >> step=9937100, episode=1657 reward=0.7919414 (467.89 it/sec) -training >> step=9937200, episode=1657 reward=0.7925863 (518.44 it/sec) -training >> step=9937300, episode=1657 reward=0.7900182 (485.83 it/sec) -training >> step=9937400, episode=1657 reward=0.7912719 (457.64 it/sec) -training >> step=9937500, episode=1657 reward=0.806186 (515.28 it/sec) -training >> step=9937600, episode=1657 reward=0.7956855 (438.03 it/sec) -training >> step=9937700, episode=1657 reward=0.8026853 (401.40 it/sec) -training >> step=9937800, episode=1657 reward=0.794962 (449.55 it/sec) -training >> step=9937900, episode=1657 reward=0.788725 (431.34 it/sec) -training >> step=9938000, episode=1657 reward=0.8165386 (473.13 it/sec) -training >> step=9938100, episode=1657 reward=0.8002073 (476.98 it/sec) -training >> step=9938200, episode=1657 reward=0.794543 (485.94 it/sec) -training >> step=9938300, episode=1657 reward=0.779517 (425.61 it/sec) -training >> step=9938400, episode=1657 reward=0.8018147 (440.89 it/sec) -training >> step=9938500, episode=1657 reward=0.7854167 (374.12 it/sec) -training >> step=9938600, episode=1657 reward=0.7912656 (502.83 it/sec) -training >> step=9938700, episode=1657 reward=0.7768034 (503.42 it/sec) -training >> step=9938800, episode=1657 reward=0.7699181 (476.39 it/sec) -training >> step=9938900, episode=1657 reward=0.7835123 (517.88 it/sec) -training >> step=9939000, episode=1657 reward=0.7856304 (555.87 it/sec) -training >> step=9939100, episode=1657 reward=0.7881145 (530.20 it/sec) -training >> step=9939200, episode=1657 reward=0.7892444 (540.66 it/sec) -training >> step=9939300, episode=1657 reward=0.8054454 (496.56 it/sec) -training >> step=9939400, episode=1657 reward=0.7942642 (459.26 it/sec) -training >> step=9939500, episode=1657 reward=0.7976113 (534.60 it/sec) -training >> step=9939600, episode=1657 reward=0.799685 (509.19 it/sec) -training >> step=9939700, episode=1657 reward=0.7965782 (548.05 it/sec) -training >> step=9939800, episode=1657 reward=0.7874145 (544.11 it/sec) -training >> step=9939900, episode=1657 reward=0.785574 (462.89 it/sec) -training >> step=9940000, episode=1657 reward=0.7930122 (481.03 it/sec) -training >> step=9940100, episode=1657 reward=0.7917231 (501.28 it/sec) -training >> step=9940200, episode=1657 reward=0.7899579 (484.64 it/sec) -training >> step=9940300, episode=1657 reward=0.7934334 (494.85 it/sec) -training >> step=9940400, episode=1657 reward=0.7805415 (447.48 it/sec) -training >> step=9940500, episode=1657 reward=0.7891297 (472.46 it/sec) -training >> step=9940600, episode=1657 reward=0.7770255 (509.98 it/sec) -training >> step=9940700, episode=1657 reward=0.8071996 (514.41 it/sec) -training >> step=9940800, episode=1657 reward=0.7787898 (538.68 it/sec) -training >> step=9940900, episode=1657 reward=0.7748314 (506.49 it/sec) -training >> step=9941000, episode=1657 reward=0.7650473 (509.93 it/sec) -training >> step=9941100, episode=1657 reward=0.7823754 (569.05 it/sec) -training >> step=9941200, episode=1657 reward=0.7801688 (488.12 it/sec) -training >> step=9941300, episode=1658 reward=0.7808782 (98.59 it/sec) -training >> step=9941400, episode=1658 reward=0.7867878 (528.51 it/sec) -training >> step=9941500, episode=1658 reward=0.7891318 (519.83 it/sec) -training >> step=9941600, episode=1658 reward=0.7834293 (490.26 it/sec) -training >> step=9941700, episode=1658 reward=0.784658 (513.32 it/sec) -training >> step=9941800, episode=1658 reward=0.7842441 (501.06 it/sec) -training >> step=9941900, episode=1658 reward=0.7844674 (509.70 it/sec) -training >> step=9942000, episode=1658 reward=0.7889265 (474.23 it/sec) -training >> step=9942100, episode=1658 reward=0.8061727 (527.59 it/sec) -training >> step=9942200, episode=1658 reward=0.7961553 (477.01 it/sec) -training >> step=9942300, episode=1658 reward=0.7927512 (451.74 it/sec) -training >> step=9942400, episode=1658 reward=0.7820165 (504.46 it/sec) -training >> step=9942500, episode=1658 reward=0.8117319 (482.44 it/sec) -training >> step=9942600, episode=1658 reward=0.7972993 (501.98 it/sec) -training >> step=9942700, episode=1658 reward=0.803328 (551.34 it/sec) -training >> step=9942800, episode=1658 reward=0.782454 (506.20 it/sec) -training >> step=9942900, episode=1658 reward=0.7936941 (520.55 it/sec) -training >> step=9943000, episode=1658 reward=0.807157 (514.80 it/sec) -training >> step=9943100, episode=1658 reward=0.7917728 (498.75 it/sec) -training >> step=9943200, episode=1658 reward=0.8055329 (512.22 it/sec) -training >> step=9943300, episode=1658 reward=0.7836894 (520.28 it/sec) -training >> step=9943400, episode=1658 reward=0.8033932 (561.40 it/sec) -training >> step=9943500, episode=1658 reward=0.7852309 (479.02 it/sec) -training >> step=9943600, episode=1658 reward=0.799864 (488.86 it/sec) -training >> step=9943700, episode=1658 reward=0.7952096 (532.50 it/sec) -training >> step=9943800, episode=1658 reward=0.807724 (551.74 it/sec) -training >> step=9943900, episode=1658 reward=0.7886001 (513.78 it/sec) -training >> step=9944000, episode=1658 reward=0.7902169 (503.89 it/sec) -training >> step=9944100, episode=1658 reward=0.8027886 (514.40 it/sec) -training >> step=9944200, episode=1658 reward=0.7971848 (505.26 it/sec) -training >> step=9944300, episode=1658 reward=0.7884504 (547.75 it/sec) -training >> step=9944400, episode=1658 reward=0.79349 (529.32 it/sec) -training >> step=9944500, episode=1658 reward=0.7629455 (542.00 it/sec) -training >> step=9944600, episode=1658 reward=0.7900674 (518.45 it/sec) -training >> step=9944700, episode=1658 reward=0.8052701 (483.34 it/sec) -training >> step=9944800, episode=1658 reward=0.7912472 (545.34 it/sec) -training >> step=9944900, episode=1658 reward=0.7980216 (536.29 it/sec) -training >> step=9945000, episode=1658 reward=0.8037551 (539.29 it/sec) -training >> step=9945100, episode=1658 reward=0.7898123 (520.34 it/sec) -training >> step=9945200, episode=1658 reward=0.7649928 (496.73 it/sec) -training >> step=9945300, episode=1658 reward=0.7823024 (509.61 it/sec) -training >> step=9945400, episode=1658 reward=0.8026546 (552.40 it/sec) -training >> step=9945500, episode=1658 reward=0.7835186 (520.63 it/sec) -training >> step=9945600, episode=1658 reward=0.8023363 (529.94 it/sec) -training >> step=9945700, episode=1658 reward=0.8125867 (532.88 it/sec) -training >> step=9945800, episode=1658 reward=0.812327 (505.82 it/sec) -training >> step=9945900, episode=1658 reward=0.8006328 (554.26 it/sec) -training >> step=9946000, episode=1658 reward=0.7764355 (529.85 it/sec) -training >> step=9946100, episode=1658 reward=0.8165212 (540.94 it/sec) -training >> step=9946200, episode=1658 reward=0.7803265 (525.75 it/sec) -training >> step=9946300, episode=1658 reward=0.7925358 (520.61 it/sec) -training >> step=9946400, episode=1658 reward=0.7923865 (517.15 it/sec) -training >> step=9946500, episode=1658 reward=0.7763996 (501.31 it/sec) -training >> step=9946600, episode=1658 reward=0.8026558 (548.15 it/sec) -training >> step=9946700, episode=1658 reward=0.7848662 (523.83 it/sec) -training >> step=9946800, episode=1658 reward=0.7765359 (526.06 it/sec) -training >> step=9946900, episode=1658 reward=0.7981705 (510.39 it/sec) -training >> step=9947000, episode=1658 reward=0.8019896 (569.31 it/sec) -training >> step=9947100, episode=1658 reward=0.7827089 (498.53 it/sec) -training >> step=9947200, episode=1658 reward=0.7979072 (527.93 it/sec) -training >> step=9947300, episode=1659 reward=0.7986108 (94.87 it/sec) -training >> step=9947400, episode=1659 reward=0.7846131 (530.15 it/sec) -training >> step=9947500, episode=1659 reward=0.7827933 (468.58 it/sec) -training >> step=9947600, episode=1659 reward=0.7915537 (533.62 it/sec) -training >> step=9947700, episode=1659 reward=0.7896676 (532.51 it/sec) -training >> step=9947800, episode=1659 reward=0.7997923 (496.16 it/sec) -training >> step=9947900, episode=1659 reward=0.8013991 (504.59 it/sec) -training >> step=9948000, episode=1659 reward=0.8076664 (493.46 it/sec) -training >> step=9948100, episode=1659 reward=0.7903931 (514.46 it/sec) -training >> step=9948200, episode=1659 reward=0.7842026 (544.37 it/sec) -training >> step=9948300, episode=1659 reward=0.7780614 (518.76 it/sec) -training >> step=9948400, episode=1659 reward=0.7924278 (494.96 it/sec) -training >> step=9948500, episode=1659 reward=0.7798042 (480.73 it/sec) -training >> step=9948600, episode=1659 reward=0.7967229 (542.05 it/sec) -training >> step=9948700, episode=1659 reward=0.7811477 (524.95 it/sec) -training >> step=9948800, episode=1659 reward=0.7907318 (510.29 it/sec) -training >> step=9948900, episode=1659 reward=0.7910026 (529.58 it/sec) -training >> step=9949000, episode=1659 reward=0.7894214 (513.70 it/sec) -training >> step=9949100, episode=1659 reward=0.7889338 (513.52 it/sec) -training >> step=9949200, episode=1659 reward=0.7839913 (524.74 it/sec) -training >> step=9949300, episode=1659 reward=0.8359548 (528.94 it/sec) -training >> step=9949400, episode=1659 reward=0.7750183 (509.02 it/sec) -training >> step=9949500, episode=1659 reward=0.7927912 (530.38 it/sec) -training >> step=9949600, episode=1659 reward=0.781507 (486.66 it/sec) -training >> step=9949700, episode=1659 reward=0.7887782 (524.51 it/sec) -training >> step=9949800, episode=1659 reward=0.7942619 (544.37 it/sec) -training >> step=9949900, episode=1659 reward=0.7838712 (513.53 it/sec) -training >> step=9950000, episode=1659 reward=0.7888656 (557.08 it/sec) -training >> step=9950100, episode=1659 reward=0.8058102 (486.97 it/sec) -training >> step=9950200, episode=1659 reward=0.7773765 (533.99 it/sec) -training >> step=9950300, episode=1659 reward=0.797402 (521.51 it/sec) -training >> step=9950400, episode=1659 reward=0.8021087 (557.94 it/sec) -training >> step=9950500, episode=1659 reward=0.7946156 (521.90 it/sec) -training >> step=9950600, episode=1659 reward=0.7794071 (529.00 it/sec) -training >> step=9950700, episode=1659 reward=0.7925084 (490.42 it/sec) -training >> step=9950800, episode=1659 reward=0.7761172 (557.44 it/sec) -training >> step=9950900, episode=1659 reward=0.7789462 (527.53 it/sec) -training >> step=9951000, episode=1659 reward=0.7891906 (516.05 it/sec) -training >> step=9951100, episode=1659 reward=0.7968194 (559.78 it/sec) -training >> step=9951200, episode=1659 reward=0.7942095 (491.70 it/sec) -training >> step=9951300, episode=1659 reward=0.7756146 (519.77 it/sec) -training >> step=9951400, episode=1659 reward=0.7975808 (554.96 it/sec) -training >> step=9951500, episode=1659 reward=0.7828733 (539.49 it/sec) -training >> step=9951600, episode=1659 reward=0.7859651 (508.44 it/sec) -training >> step=9951700, episode=1659 reward=0.7839303 (527.21 it/sec) -training >> step=9951800, episode=1659 reward=0.7898672 (556.60 it/sec) -training >> step=9951900, episode=1659 reward=0.7903602 (542.63 it/sec) -training >> step=9952000, episode=1659 reward=0.7624994 (497.14 it/sec) -training >> step=9952100, episode=1659 reward=0.782846 (483.51 it/sec) -training >> step=9952200, episode=1659 reward=0.7968369 (518.99 it/sec) -training >> step=9952300, episode=1659 reward=0.7983642 (425.27 it/sec) -training >> step=9952400, episode=1659 reward=0.80122 (438.52 it/sec) -training >> step=9952500, episode=1659 reward=0.8027639 (480.24 it/sec) -training >> step=9952600, episode=1659 reward=0.776869 (489.06 it/sec) -training >> step=9952700, episode=1659 reward=0.792132 (538.22 it/sec) -training >> step=9952800, episode=1659 reward=0.7884941 (479.50 it/sec) -training >> step=9952900, episode=1659 reward=0.7815611 (581.17 it/sec) -training >> step=9953000, episode=1659 reward=0.7878584 (524.78 it/sec) -training >> step=9953100, episode=1659 reward=0.7934124 (529.60 it/sec) -training >> step=9953200, episode=1659 reward=0.7873992 (528.18 it/sec) -training >> step=9953300, episode=1660 reward=0.8121082 (116.67 it/sec) -training >> step=9953400, episode=1660 reward=0.775977 (538.59 it/sec) -training >> step=9953500, episode=1660 reward=0.7988973 (498.81 it/sec) -training >> step=9953600, episode=1660 reward=0.7865491 (577.47 it/sec) -training >> step=9953700, episode=1660 reward=0.7793519 (518.62 it/sec) -training >> step=9953800, episode=1660 reward=0.8032141 (499.51 it/sec) -training >> step=9953900, episode=1660 reward=0.8069509 (557.64 it/sec) -training >> step=9954000, episode=1660 reward=0.8018272 (490.50 it/sec) -training >> step=9954100, episode=1660 reward=0.7848899 (518.00 it/sec) -training >> step=9954200, episode=1660 reward=0.7874248 (525.78 it/sec) -training >> step=9954300, episode=1660 reward=0.8102121 (528.42 it/sec) -training >> step=9954400, episode=1660 reward=0.7804204 (499.42 it/sec) -training >> step=9954500, episode=1660 reward=0.7853722 (514.64 it/sec) -training >> step=9954600, episode=1660 reward=0.8017234 (505.81 it/sec) -training >> step=9954700, episode=1660 reward=0.787816 (523.74 it/sec) -training >> step=9954800, episode=1660 reward=0.8013773 (457.94 it/sec) -training >> step=9954900, episode=1660 reward=0.7830319 (557.84 it/sec) -training >> step=9955000, episode=1660 reward=0.7872491 (489.41 it/sec) -training >> step=9955100, episode=1660 reward=0.7974516 (502.59 it/sec) -training >> step=9955200, episode=1660 reward=0.7925962 (513.01 it/sec) -training >> step=9955300, episode=1660 reward=0.797491 (539.79 it/sec) -training >> step=9955400, episode=1660 reward=0.7918893 (520.74 it/sec) -training >> step=9955500, episode=1660 reward=0.7880652 (531.86 it/sec) -training >> step=9955600, episode=1660 reward=0.7921523 (465.04 it/sec) -training >> step=9955700, episode=1660 reward=0.8156736 (524.85 it/sec) -training >> step=9955800, episode=1660 reward=0.7926061 (543.09 it/sec) -training >> step=9955900, episode=1660 reward=0.7886615 (520.30 it/sec) -training >> step=9956000, episode=1660 reward=0.8098075 (543.11 it/sec) -training >> step=9956100, episode=1660 reward=0.7842575 (504.58 it/sec) -training >> step=9956200, episode=1660 reward=0.7932155 (516.58 it/sec) -training >> step=9956300, episode=1660 reward=0.7826995 (526.61 it/sec) -training >> step=9956400, episode=1660 reward=0.7781703 (510.85 it/sec) -training >> step=9956500, episode=1660 reward=0.7904748 (527.47 it/sec) -training >> step=9956600, episode=1660 reward=0.7829279 (528.27 it/sec) -training >> step=9956700, episode=1660 reward=0.7853385 (477.20 it/sec) -training >> step=9956800, episode=1660 reward=0.7897508 (555.10 it/sec) -training >> step=9956900, episode=1660 reward=0.7915592 (517.60 it/sec) -training >> step=9957000, episode=1660 reward=0.7872768 (522.60 it/sec) -training >> step=9957100, episode=1660 reward=0.7876133 (506.63 it/sec) -training >> step=9957200, episode=1660 reward=0.7951267 (466.92 it/sec) -training >> step=9957300, episode=1660 reward=0.8020293 (492.75 it/sec) -training >> step=9957400, episode=1660 reward=0.7831764 (479.05 it/sec) -training >> step=9957500, episode=1660 reward=0.7782841 (517.21 it/sec) -training >> step=9957600, episode=1660 reward=0.8045827 (545.43 it/sec) -training >> step=9957700, episode=1660 reward=0.7915914 (474.32 it/sec) -training >> step=9957800, episode=1660 reward=0.7942299 (552.59 it/sec) -training >> step=9957900, episode=1660 reward=0.7904527 (498.48 it/sec) -training >> step=9958000, episode=1660 reward=0.7964907 (533.75 it/sec) -training >> step=9958100, episode=1660 reward=0.7986562 (534.11 it/sec) -training >> step=9958200, episode=1660 reward=0.7849597 (545.75 it/sec) -training >> step=9958300, episode=1660 reward=0.7804317 (513.23 it/sec) -training >> step=9958400, episode=1660 reward=0.7842954 (529.95 it/sec) -training >> step=9958500, episode=1660 reward=0.796223 (508.80 it/sec) -training >> step=9958600, episode=1660 reward=0.7875613 (523.78 it/sec) -training >> step=9958700, episode=1660 reward=0.7990851 (545.06 it/sec) -training >> step=9958800, episode=1660 reward=0.7956191 (515.82 it/sec) -training >> step=9958900, episode=1660 reward=0.7837365 (572.17 it/sec) -training >> step=9959000, episode=1660 reward=0.7895098 (491.39 it/sec) -training >> step=9959100, episode=1660 reward=0.7854214 (513.64 it/sec) -training >> step=9959200, episode=1660 reward=0.7937676 (526.98 it/sec) -training >> step=9959300, episode=1661 reward=0.7832626 (110.01 it/sec) -training >> step=9959400, episode=1661 reward=0.7860925 (534.55 it/sec) -training >> step=9959500, episode=1661 reward=0.7966621 (510.00 it/sec) -training >> step=9959600, episode=1661 reward=0.7902715 (497.44 it/sec) -training >> step=9959700, episode=1661 reward=0.8023807 (521.11 it/sec) -training >> step=9959800, episode=1661 reward=0.7916062 (536.73 it/sec) -training >> step=9959900, episode=1661 reward=0.7921693 (528.19 it/sec) -training >> step=9960000, episode=1661 reward=0.7933589 (546.78 it/sec) -training >> step=9960100, episode=1661 reward=0.7973834 (478.50 it/sec) -training >> step=9960200, episode=1661 reward=0.7704145 (528.41 it/sec) -training >> step=9960300, episode=1661 reward=0.7647452 (506.43 it/sec) -training >> step=9960400, episode=1661 reward=0.7876877 (515.58 it/sec) -training >> step=9960500, episode=1661 reward=0.7832984 (544.54 it/sec) -training >> step=9960600, episode=1661 reward=0.8013902 (485.52 it/sec) -training >> step=9960700, episode=1661 reward=0.7841879 (556.69 it/sec) -training >> step=9960800, episode=1661 reward=0.8079033 (484.19 it/sec) -training >> step=9960900, episode=1661 reward=0.8026871 (522.84 it/sec) -training >> step=9961000, episode=1661 reward=0.7946648 (539.91 it/sec) -training >> step=9961100, episode=1661 reward=0.7858592 (522.73 it/sec) -training >> step=9961200, episode=1661 reward=0.7788636 (530.76 it/sec) -training >> step=9961300, episode=1661 reward=0.8006181 (509.43 it/sec) -training >> step=9961400, episode=1661 reward=0.7713495 (542.55 it/sec) -training >> step=9961500, episode=1661 reward=0.7892488 (543.67 it/sec) -training >> step=9961600, episode=1661 reward=0.8032805 (531.87 it/sec) -training >> step=9961700, episode=1661 reward=0.8027086 (502.82 it/sec) -training >> step=9961800, episode=1661 reward=0.7898798 (568.25 it/sec) -training >> step=9961900, episode=1661 reward=0.784427 (495.08 it/sec) -training >> step=9962000, episode=1661 reward=0.783273 (447.99 it/sec) -training >> step=9962100, episode=1661 reward=0.7980425 (501.38 it/sec) -training >> step=9962200, episode=1661 reward=0.8191389 (507.15 it/sec) -training >> step=9962300, episode=1661 reward=0.8034036 (529.79 it/sec) -training >> step=9962400, episode=1661 reward=0.7895207 (478.59 it/sec) -training >> step=9962500, episode=1661 reward=0.7962746 (550.56 it/sec) -training >> step=9962600, episode=1661 reward=0.8134005 (500.89 it/sec) -training >> step=9962700, episode=1661 reward=0.7842709 (510.33 it/sec) -training >> step=9962800, episode=1661 reward=0.8058796 (520.23 it/sec) -training >> step=9962900, episode=1661 reward=0.8000339 (547.59 it/sec) -training >> step=9963000, episode=1661 reward=0.7962145 (517.81 it/sec) -training >> step=9963100, episode=1661 reward=0.781624 (504.42 it/sec) -training >> step=9963200, episode=1661 reward=0.7959948 (521.62 it/sec) -training >> step=9963300, episode=1661 reward=0.8105912 (529.34 it/sec) -training >> step=9963400, episode=1661 reward=0.7923239 (512.70 it/sec) -training >> step=9963500, episode=1661 reward=0.7967832 (527.82 it/sec) -training >> step=9963600, episode=1661 reward=0.7699731 (530.34 it/sec) -training >> step=9963700, episode=1661 reward=0.7935219 (525.43 it/sec) -training >> step=9963800, episode=1661 reward=0.7910428 (497.97 it/sec) -training >> step=9963900, episode=1661 reward=0.7964272 (543.03 it/sec) -training >> step=9964000, episode=1661 reward=0.809288 (513.99 it/sec) -training >> step=9964100, episode=1661 reward=0.7862574 (516.28 it/sec) -training >> step=9964200, episode=1661 reward=0.8007464 (472.24 it/sec) -training >> step=9964300, episode=1661 reward=0.7787252 (477.83 it/sec) -training >> step=9964400, episode=1661 reward=0.7827373 (481.80 it/sec) -training >> step=9964500, episode=1661 reward=0.8000104 (482.33 it/sec) -training >> step=9964600, episode=1661 reward=0.7965227 (457.48 it/sec) -training >> step=9964700, episode=1661 reward=0.794928 (547.85 it/sec) -training >> step=9964800, episode=1661 reward=0.7794896 (476.04 it/sec) -training >> step=9964900, episode=1661 reward=0.7870213 (524.94 it/sec) -training >> step=9965000, episode=1661 reward=0.8076754 (526.42 it/sec) -training >> step=9965100, episode=1661 reward=0.8049704 (510.59 it/sec) -training >> step=9965200, episode=1661 reward=0.7824544 (512.65 it/sec) -training >> step=9965300, episode=1662 reward=0.791351 (100.97 it/sec) -training >> step=9965400, episode=1662 reward=0.7557603 (552.89 it/sec) -training >> step=9965500, episode=1662 reward=0.787214 (473.16 it/sec) -training >> step=9965600, episode=1662 reward=0.7867185 (525.18 it/sec) -training >> step=9965700, episode=1662 reward=0.7880025 (525.55 it/sec) -training >> step=9965800, episode=1662 reward=0.7722499 (522.21 it/sec) -training >> step=9965900, episode=1662 reward=0.8148957 (510.77 it/sec) -training >> step=9966000, episode=1662 reward=0.7861298 (503.88 it/sec) -training >> step=9966100, episode=1662 reward=0.7965888 (517.21 it/sec) -training >> step=9966200, episode=1662 reward=0.789754 (470.18 it/sec) -training >> step=9966300, episode=1662 reward=0.7985536 (529.37 it/sec) -training >> step=9966400, episode=1662 reward=0.7967566 (533.98 it/sec) -training >> step=9966500, episode=1662 reward=0.7936493 (511.68 it/sec) -training >> step=9966600, episode=1662 reward=0.8038931 (547.13 it/sec) -training >> step=9966700, episode=1662 reward=0.8143294 (486.20 it/sec) -training >> step=9966800, episode=1662 reward=0.8175092 (481.05 it/sec) -training >> step=9966900, episode=1662 reward=0.7920292 (495.99 it/sec) -training >> step=9967000, episode=1662 reward=0.7739167 (506.67 it/sec) -training >> step=9967100, episode=1662 reward=0.7873811 (526.44 it/sec) -training >> step=9967200, episode=1662 reward=0.7940938 (538.47 it/sec) -training >> step=9967300, episode=1662 reward=0.7785272 (509.39 it/sec) -training >> step=9967400, episode=1662 reward=0.7644021 (546.25 it/sec) -training >> step=9967500, episode=1662 reward=0.7890105 (545.13 it/sec) -training >> step=9967600, episode=1662 reward=0.7802929 (463.32 it/sec) -training >> step=9967700, episode=1662 reward=0.789554 (544.54 it/sec) -training >> step=9967800, episode=1662 reward=0.7905059 (523.74 it/sec) -training >> step=9967900, episode=1662 reward=0.7830825 (524.94 it/sec) -training >> step=9968000, episode=1662 reward=0.7884774 (532.65 it/sec) -training >> step=9968100, episode=1662 reward=0.7898182 (497.35 it/sec) -training >> step=9968200, episode=1662 reward=0.79725 (454.37 it/sec) -training >> step=9968300, episode=1662 reward=0.7808363 (522.03 it/sec) -training >> step=9968400, episode=1662 reward=0.7726665 (499.95 it/sec) -training >> step=9968500, episode=1662 reward=0.7795234 (538.22 it/sec) -training >> step=9968600, episode=1662 reward=0.7820278 (511.43 it/sec) -training >> step=9968700, episode=1662 reward=0.7962627 (530.25 it/sec) -training >> step=9968800, episode=1662 reward=0.7955061 (469.84 it/sec) -training >> step=9968900, episode=1662 reward=0.7958155 (395.77 it/sec) -training >> step=9969000, episode=1662 reward=0.7803038 (488.23 it/sec) -training >> step=9969100, episode=1662 reward=0.7709149 (478.12 it/sec) -training >> step=9969200, episode=1662 reward=0.7822778 (524.08 it/sec) -training >> step=9969300, episode=1662 reward=0.7972602 (387.16 it/sec) -training >> step=9969400, episode=1662 reward=0.8137683 (445.11 it/sec) -training >> step=9969500, episode=1662 reward=0.7754665 (392.08 it/sec) -training >> step=9969600, episode=1662 reward=0.81378 (404.43 it/sec) -training >> step=9969700, episode=1662 reward=0.782405 (505.43 it/sec) -training >> step=9969800, episode=1662 reward=0.7998904 (463.26 it/sec) -training >> step=9969900, episode=1662 reward=0.7856203 (489.62 it/sec) -training >> step=9970000, episode=1662 reward=0.7911164 (518.40 it/sec) -training >> step=9970100, episode=1662 reward=0.777864 (482.97 it/sec) -training >> step=9970200, episode=1662 reward=0.7827008 (485.70 it/sec) -training >> step=9970300, episode=1662 reward=0.7948158 (494.47 it/sec) -training >> step=9970400, episode=1662 reward=0.7717636 (492.58 it/sec) -training >> step=9970500, episode=1662 reward=0.7827625 (440.94 it/sec) -training >> step=9970600, episode=1662 reward=0.7817743 (505.19 it/sec) -training >> step=9970700, episode=1662 reward=0.7906868 (474.62 it/sec) -training >> step=9970800, episode=1662 reward=0.7906882 (480.19 it/sec) -training >> step=9970900, episode=1662 reward=0.7795566 (427.38 it/sec) -training >> step=9971000, episode=1662 reward=0.7956108 (510.90 it/sec) -training >> step=9971100, episode=1662 reward=0.7844462 (467.96 it/sec) -training >> step=9971200, episode=1662 reward=0.7891697 (483.63 it/sec) -training >> step=9971300, episode=1663 reward=0.7876758 (107.34 it/sec) -training >> step=9971400, episode=1663 reward=0.7733532 (447.17 it/sec) -training >> step=9971500, episode=1663 reward=0.7984517 (492.78 it/sec) -training >> step=9971600, episode=1663 reward=0.8058644 (507.23 it/sec) -training >> step=9971700, episode=1663 reward=0.7768473 (477.05 it/sec) -training >> step=9971800, episode=1663 reward=0.7864627 (464.38 it/sec) -training >> step=9971900, episode=1663 reward=0.7825986 (495.25 it/sec) -training >> step=9972000, episode=1663 reward=0.7784805 (496.31 it/sec) -training >> step=9972100, episode=1663 reward=0.7873798 (512.66 it/sec) -training >> step=9972200, episode=1663 reward=0.8022991 (536.98 it/sec) -training >> step=9972300, episode=1663 reward=0.799883 (512.65 it/sec) -training >> step=9972400, episode=1663 reward=0.77277 (496.85 it/sec) -training >> step=9972500, episode=1663 reward=0.7963104 (527.52 it/sec) -training >> step=9972600, episode=1663 reward=0.7902871 (528.97 it/sec) -training >> step=9972700, episode=1663 reward=0.7864854 (471.02 it/sec) -training >> step=9972800, episode=1663 reward=0.7990106 (512.74 it/sec) -training >> step=9972900, episode=1663 reward=0.8019356 (551.22 it/sec) -training >> step=9973000, episode=1663 reward=0.7675726 (511.25 it/sec) -training >> step=9973100, episode=1663 reward=0.7739205 (510.80 it/sec) -training >> step=9973200, episode=1663 reward=0.7927021 (491.39 it/sec) -training >> step=9973300, episode=1663 reward=0.7734244 (513.59 it/sec) -training >> step=9973400, episode=1663 reward=0.7877702 (493.47 it/sec) -training >> step=9973500, episode=1663 reward=0.7872197 (471.96 it/sec) -training >> step=9973600, episode=1663 reward=0.7919302 (529.50 it/sec) -training >> step=9973700, episode=1663 reward=0.7858651 (512.13 it/sec) -training >> step=9973800, episode=1663 reward=0.79429 (486.55 it/sec) -training >> step=9973900, episode=1663 reward=0.7869954 (484.62 it/sec) -training >> step=9974000, episode=1663 reward=0.807678 (522.84 it/sec) -training >> step=9974100, episode=1663 reward=0.7982647 (474.10 it/sec) -training >> step=9974200, episode=1663 reward=0.7923025 (476.55 it/sec) -training >> step=9974300, episode=1663 reward=0.7832897 (477.33 it/sec) -training >> step=9974400, episode=1663 reward=0.8023041 (547.08 it/sec) -training >> step=9974500, episode=1663 reward=0.7798632 (500.98 it/sec) -training >> step=9974600, episode=1663 reward=0.7899176 (496.85 it/sec) -training >> step=9974700, episode=1663 reward=0.7897025 (477.31 it/sec) -training >> step=9974800, episode=1663 reward=0.7783556 (468.53 it/sec) -training >> step=9974900, episode=1663 reward=0.798878 (488.02 it/sec) -training >> step=9975000, episode=1663 reward=0.7773033 (480.19 it/sec) -training >> step=9975100, episode=1663 reward=0.8148441 (500.83 it/sec) -training >> step=9975200, episode=1663 reward=0.7730066 (523.75 it/sec) -training >> step=9975300, episode=1663 reward=0.7892657 (491.30 it/sec) -training >> step=9975400, episode=1663 reward=0.7766636 (433.75 it/sec) -training >> step=9975500, episode=1663 reward=0.815316 (459.09 it/sec) -training >> step=9975600, episode=1663 reward=0.7940556 (498.30 it/sec) -training >> step=9975700, episode=1663 reward=0.7553813 (475.38 it/sec) -training >> step=9975800, episode=1663 reward=0.7973772 (506.08 it/sec) -training >> step=9975900, episode=1663 reward=0.7889562 (493.73 it/sec) -training >> step=9976000, episode=1663 reward=0.7875659 (470.54 it/sec) -training >> step=9976100, episode=1663 reward=0.7905911 (500.79 it/sec) -training >> step=9976200, episode=1663 reward=0.7852242 (480.11 it/sec) -training >> step=9976300, episode=1663 reward=0.8013152 (461.86 it/sec) -training >> step=9976400, episode=1663 reward=0.7936931 (479.74 it/sec) -training >> step=9976500, episode=1663 reward=0.8006384 (446.86 it/sec) -training >> step=9976600, episode=1663 reward=0.8017755 (474.54 it/sec) -training >> step=9976700, episode=1663 reward=0.7800195 (496.46 it/sec) -training >> step=9976800, episode=1663 reward=0.794908 (491.95 it/sec) -training >> step=9976900, episode=1663 reward=0.7725347 (504.58 it/sec) -training >> step=9977000, episode=1663 reward=0.7908889 (508.39 it/sec) -training >> step=9977100, episode=1663 reward=0.8041027 (514.04 it/sec) -training >> step=9977200, episode=1663 reward=0.8107964 (511.26 it/sec) -training >> step=9977300, episode=1664 reward=0.7740576 (50.34 it/sec) -training >> step=9977400, episode=1664 reward=0.7802105 (473.41 it/sec) -training >> step=9977500, episode=1664 reward=0.7709858 (518.73 it/sec) -training >> step=9977600, episode=1664 reward=0.7867787 (495.92 it/sec) -training >> step=9977700, episode=1664 reward=0.781343 (461.20 it/sec) -training >> step=9977800, episode=1664 reward=0.7787254 (498.52 it/sec) -training >> step=9977900, episode=1664 reward=0.7946033 (468.44 it/sec) -training >> step=9978000, episode=1664 reward=0.7844899 (494.82 it/sec) -training >> step=9978100, episode=1664 reward=0.7788754 (545.22 it/sec) -training >> step=9978200, episode=1664 reward=0.7933704 (510.40 it/sec) -training >> step=9978300, episode=1664 reward=0.8032456 (507.15 it/sec) -training >> step=9978400, episode=1664 reward=0.7904896 (490.65 it/sec) -training >> step=9978500, episode=1664 reward=0.8092267 (484.92 it/sec) -training >> step=9978600, episode=1664 reward=0.7756085 (498.24 it/sec) -training >> step=9978700, episode=1664 reward=0.791806 (527.47 it/sec) -training >> step=9978800, episode=1664 reward=0.8102327 (523.56 it/sec) -training >> step=9978900, episode=1664 reward=0.802568 (492.60 it/sec) -training >> step=9979000, episode=1664 reward=0.798619 (528.59 it/sec) -training >> step=9979100, episode=1664 reward=0.7906814 (498.38 it/sec) -training >> step=9979200, episode=1664 reward=0.778137 (462.23 it/sec) -training >> step=9979300, episode=1664 reward=0.8006721 (496.77 it/sec) -training >> step=9979400, episode=1664 reward=0.8087407 (527.02 it/sec) -training >> step=9979500, episode=1664 reward=0.7812034 (491.73 it/sec) -training >> step=9979600, episode=1664 reward=0.800707 (472.65 it/sec) -training >> step=9979700, episode=1664 reward=0.8020409 (487.17 it/sec) -training >> step=9979800, episode=1664 reward=0.7929882 (516.72 it/sec) -training >> step=9979900, episode=1664 reward=0.7935933 (517.56 it/sec) -training >> step=9980000, episode=1664 reward=0.7771401 (507.72 it/sec) -training >> step=9980100, episode=1664 reward=0.7760794 (512.32 it/sec) -training >> step=9980200, episode=1664 reward=0.7990239 (486.56 it/sec) -training >> step=9980300, episode=1664 reward=0.8090844 (490.55 it/sec) -training >> step=9980400, episode=1664 reward=0.7862069 (493.82 it/sec) -training >> step=9980500, episode=1664 reward=0.8017057 (528.17 it/sec) -training >> step=9980600, episode=1664 reward=0.7896852 (495.52 it/sec) -training >> step=9980700, episode=1664 reward=0.7903973 (433.97 it/sec) -training >> step=9980800, episode=1664 reward=0.7866504 (514.63 it/sec) -training >> step=9980900, episode=1664 reward=0.8155981 (488.35 it/sec) -training >> step=9981000, episode=1664 reward=0.786361 (497.45 it/sec) -training >> step=9981100, episode=1664 reward=0.8028712 (487.25 it/sec) -training >> step=9981200, episode=1664 reward=0.7972698 (498.48 it/sec) -training >> step=9981300, episode=1664 reward=0.7890662 (484.22 it/sec) -training >> step=9981400, episode=1664 reward=0.7956918 (528.57 it/sec) -training >> step=9981500, episode=1664 reward=0.8097772 (524.91 it/sec) -training >> step=9981600, episode=1664 reward=0.7863439 (461.97 it/sec) -training >> step=9981700, episode=1664 reward=0.8104635 (484.27 it/sec) -training >> step=9981800, episode=1664 reward=0.8103181 (530.92 it/sec) -training >> step=9981900, episode=1664 reward=0.7701797 (501.58 it/sec) -training >> step=9982000, episode=1664 reward=0.7749531 (441.23 it/sec) -training >> step=9982100, episode=1664 reward=0.7996066 (513.05 it/sec) -training >> step=9982200, episode=1664 reward=0.7849897 (513.55 it/sec) -training >> step=9982300, episode=1664 reward=0.8064116 (511.24 it/sec) -training >> step=9982400, episode=1664 reward=0.786743 (526.61 it/sec) -training >> step=9982500, episode=1664 reward=0.7822111 (452.92 it/sec) -training >> step=9982600, episode=1664 reward=0.7875824 (484.97 it/sec) -training >> step=9982700, episode=1664 reward=0.7800049 (484.54 it/sec) -training >> step=9982800, episode=1664 reward=0.7790124 (503.66 it/sec) -training >> step=9982900, episode=1664 reward=0.7878655 (520.03 it/sec) -training >> step=9983000, episode=1664 reward=0.7773461 (507.88 it/sec) -training >> step=9983100, episode=1664 reward=0.786782 (492.81 it/sec) -training >> step=9983200, episode=1664 reward=0.7785599 (481.75 it/sec) -training >> step=9983300, episode=1665 reward=0.7842443 (45.07 it/sec) -training >> step=9983400, episode=1665 reward=0.7754276 (491.73 it/sec) -training >> step=9983500, episode=1665 reward=0.7877862 (480.64 it/sec) -training >> step=9983600, episode=1665 reward=0.7904175 (518.66 it/sec) -training >> step=9983700, episode=1665 reward=0.7813022 (487.97 it/sec) -training >> step=9983800, episode=1665 reward=0.804179 (518.46 it/sec) -training >> step=9983900, episode=1665 reward=0.7928293 (531.74 it/sec) -training >> step=9984000, episode=1665 reward=0.7881123 (538.86 it/sec) -training >> step=9984100, episode=1665 reward=0.7900121 (506.78 it/sec) -training >> step=9984200, episode=1665 reward=0.8011664 (519.88 it/sec) -training >> step=9984300, episode=1665 reward=0.7769409 (481.79 it/sec) -training >> step=9984400, episode=1665 reward=0.7824389 (534.73 it/sec) -training >> step=9984500, episode=1665 reward=0.7896628 (508.71 it/sec) -training >> step=9984600, episode=1665 reward=0.805537 (535.39 it/sec) -training >> step=9984700, episode=1665 reward=0.795146 (491.37 it/sec) -training >> step=9984800, episode=1665 reward=0.7733086 (492.68 it/sec) -training >> step=9984900, episode=1665 reward=0.8010885 (511.49 it/sec) -training >> step=9985000, episode=1665 reward=0.7933754 (494.52 it/sec) -training >> step=9985100, episode=1665 reward=0.8064034 (517.51 it/sec) -training >> step=9985200, episode=1665 reward=0.7939159 (491.48 it/sec) -training >> step=9985300, episode=1665 reward=0.806538 (543.29 it/sec) -training >> step=9985400, episode=1665 reward=0.794176 (526.38 it/sec) -training >> step=9985500, episode=1665 reward=0.8071992 (521.89 it/sec) -training >> step=9985600, episode=1665 reward=0.7890758 (508.38 it/sec) -training >> step=9985700, episode=1665 reward=0.7879468 (532.86 it/sec) -training >> step=9985800, episode=1665 reward=0.7876799 (450.55 it/sec) -training >> step=9985900, episode=1665 reward=0.7835003 (517.40 it/sec) -training >> step=9986000, episode=1665 reward=0.7699881 (513.77 it/sec) -training >> step=9986100, episode=1665 reward=0.7972921 (513.30 it/sec) -training >> step=9986200, episode=1665 reward=0.7713253 (494.92 it/sec) -training >> step=9986300, episode=1665 reward=0.7782604 (522.62 it/sec) -training >> step=9986400, episode=1665 reward=0.7910368 (503.99 it/sec) -training >> step=9986500, episode=1665 reward=0.7960699 (493.33 it/sec) -training >> step=9986600, episode=1665 reward=0.790943 (487.17 it/sec) -training >> step=9986700, episode=1665 reward=0.7858415 (477.58 it/sec) -training >> step=9986800, episode=1665 reward=0.7698212 (475.88 it/sec) -training >> step=9986900, episode=1665 reward=0.7855383 (456.95 it/sec) -training >> step=9987000, episode=1665 reward=0.7785068 (488.52 it/sec) -training >> step=9987100, episode=1665 reward=0.7916692 (483.15 it/sec) -training >> step=9987200, episode=1665 reward=0.7773505 (511.22 it/sec) -training >> step=9987300, episode=1665 reward=0.7933426 (495.44 it/sec) -training >> step=9987400, episode=1665 reward=0.7841244 (537.88 it/sec) -training >> step=9987500, episode=1665 reward=0.7977948 (537.42 it/sec) -training >> step=9987600, episode=1665 reward=0.7840251 (493.85 it/sec) -training >> step=9987700, episode=1665 reward=0.7893398 (503.87 it/sec) -training >> step=9987800, episode=1665 reward=0.7809273 (480.27 it/sec) -training >> step=9987900, episode=1665 reward=0.7921653 (503.69 it/sec) -training >> step=9988000, episode=1665 reward=0.7760271 (542.37 it/sec) -training >> step=9988100, episode=1665 reward=0.7763311 (507.68 it/sec) -training >> step=9988200, episode=1665 reward=0.7746663 (503.00 it/sec) -training >> step=9988300, episode=1665 reward=0.7875311 (462.50 it/sec) -training >> step=9988400, episode=1665 reward=0.7776588 (512.69 it/sec) -training >> step=9988500, episode=1665 reward=0.785189 (505.33 it/sec) -training >> step=9988600, episode=1665 reward=0.8104002 (560.08 it/sec) -training >> step=9988700, episode=1665 reward=0.787004 (511.66 it/sec) -training >> step=9988800, episode=1665 reward=0.7841758 (496.30 it/sec) -training >> step=9988900, episode=1665 reward=0.7853962 (519.65 it/sec) -training >> step=9989000, episode=1665 reward=0.7833958 (525.49 it/sec) -training >> step=9989100, episode=1665 reward=0.8028491 (514.03 it/sec) -training >> step=9989200, episode=1665 reward=0.7873758 (498.25 it/sec) -training >> step=9989300, episode=1666 reward=0.7765142 (40.23 it/sec) -training >> step=9989400, episode=1666 reward=0.7753241 (494.19 it/sec) -training >> step=9989500, episode=1666 reward=0.7976864 (531.60 it/sec) -training >> step=9989600, episode=1666 reward=0.7877498 (537.90 it/sec) -training >> step=9989700, episode=1666 reward=0.8113213 (473.45 it/sec) -training >> step=9989800, episode=1666 reward=0.7897799 (499.68 it/sec) -training >> step=9989900, episode=1666 reward=0.8053495 (507.28 it/sec) -training >> step=9990000, episode=1666 reward=0.8161584 (509.16 it/sec) -training >> step=9990100, episode=1666 reward=0.7894894 (529.38 it/sec) -training >> step=9990200, episode=1666 reward=0.7954832 (527.42 it/sec) -training >> step=9990300, episode=1666 reward=0.7886722 (551.61 it/sec) -training >> step=9990400, episode=1666 reward=0.7850844 (478.40 it/sec) -training >> step=9990500, episode=1666 reward=0.791103 (556.79 it/sec) -training >> step=9990600, episode=1666 reward=0.7834627 (533.75 it/sec) -training >> step=9990700, episode=1666 reward=0.7888276 (514.53 it/sec) -training >> step=9990800, episode=1666 reward=0.7628841 (497.12 it/sec) -training >> step=9990900, episode=1666 reward=0.8025926 (516.38 it/sec) -training >> step=9991000, episode=1666 reward=0.7853804 (509.95 it/sec) -training >> step=9991100, episode=1666 reward=0.7850974 (532.24 it/sec) -training >> step=9991200, episode=1666 reward=0.8182601 (521.28 it/sec) -training >> step=9991300, episode=1666 reward=0.7853651 (507.81 it/sec) -training >> step=9991400, episode=1666 reward=0.7764512 (484.47 it/sec) -training >> step=9991500, episode=1666 reward=0.7916462 (468.82 it/sec) -training >> step=9991600, episode=1666 reward=0.7879375 (557.41 it/sec) -training >> step=9991700, episode=1666 reward=0.7830632 (520.38 it/sec) -training >> step=9991800, episode=1666 reward=0.7888523 (492.89 it/sec) -training >> step=9991900, episode=1666 reward=0.7861857 (473.06 it/sec) -training >> step=9992000, episode=1666 reward=0.788245 (536.89 it/sec) -training >> step=9992100, episode=1666 reward=0.8075768 (498.37 it/sec) -training >> step=9992200, episode=1666 reward=0.8023839 (458.86 it/sec) -training >> step=9992300, episode=1666 reward=0.8012677 (502.67 it/sec) -training >> step=9992400, episode=1666 reward=0.8050742 (496.63 it/sec) -training >> step=9992500, episode=1666 reward=0.7799576 (489.73 it/sec) -training >> step=9992600, episode=1666 reward=0.8051005 (518.91 it/sec) -training >> step=9992700, episode=1666 reward=0.7936444 (540.35 it/sec) -training >> step=9992800, episode=1666 reward=0.7808437 (510.89 it/sec) -training >> step=9992900, episode=1666 reward=0.8104724 (505.00 it/sec) -training >> step=9993000, episode=1666 reward=0.7892141 (490.12 it/sec) -training >> step=9993100, episode=1666 reward=0.7966124 (542.43 it/sec) -training >> step=9993200, episode=1666 reward=0.7839766 (511.93 it/sec) -training >> step=9993300, episode=1666 reward=0.7867599 (465.72 it/sec) -training >> step=9993400, episode=1666 reward=0.799778 (519.67 it/sec) -training >> step=9993500, episode=1666 reward=0.7905973 (493.22 it/sec) -training >> step=9993600, episode=1666 reward=0.7971527 (523.79 it/sec) -training >> step=9993700, episode=1666 reward=0.7761363 (499.93 it/sec) -training >> step=9993800, episode=1666 reward=0.7874888 (467.33 it/sec) -training >> step=9993900, episode=1666 reward=0.779422 (416.68 it/sec) -training >> step=9994000, episode=1666 reward=0.8068805 (511.00 it/sec) -training >> step=9994100, episode=1666 reward=0.7754022 (538.11 it/sec) -training >> step=9994200, episode=1666 reward=0.7910489 (533.77 it/sec) -training >> step=9994300, episode=1666 reward=0.7942755 (503.68 it/sec) -training >> step=9994400, episode=1666 reward=0.7754663 (480.08 it/sec) -training >> step=9994500, episode=1666 reward=0.7984278 (492.00 it/sec) -training >> step=9994600, episode=1666 reward=0.7984235 (511.54 it/sec) -training >> step=9994700, episode=1666 reward=0.7807997 (527.26 it/sec) -training >> step=9994800, episode=1666 reward=0.7960232 (523.86 it/sec) -training >> step=9994900, episode=1666 reward=0.7790934 (491.30 it/sec) -training >> step=9995000, episode=1666 reward=0.7887748 (528.84 it/sec) -training >> step=9995100, episode=1666 reward=0.7992447 (497.29 it/sec) -training >> step=9995200, episode=1666 reward=0.8126836 (547.06 it/sec) -training >> step=9995300, episode=1667 reward=0.7698131 (41.56 it/sec) -training >> step=9995400, episode=1667 reward=0.7998792 (479.21 it/sec) -training >> step=9995500, episode=1667 reward=0.7949165 (472.16 it/sec) -training >> step=9995600, episode=1667 reward=0.777773 (490.05 it/sec) -training >> step=9995700, episode=1667 reward=0.8016627 (482.86 it/sec) -training >> step=9995800, episode=1667 reward=0.770258 (543.78 it/sec) -training >> step=9995900, episode=1667 reward=0.786986 (514.99 it/sec) -training >> step=9996000, episode=1667 reward=0.7753075 (486.46 it/sec) -training >> step=9996100, episode=1667 reward=0.7861254 (542.19 it/sec) -training >> step=9996200, episode=1667 reward=0.7955578 (511.98 it/sec) -training >> step=9996300, episode=1667 reward=0.7963408 (510.58 it/sec) -training >> step=9996400, episode=1667 reward=0.7950541 (531.66 it/sec) -training >> step=9996500, episode=1667 reward=0.7665794 (535.39 it/sec) -training >> step=9996600, episode=1667 reward=0.7788149 (522.67 it/sec) -training >> step=9996700, episode=1667 reward=0.7907087 (516.39 it/sec) -training >> step=9996800, episode=1667 reward=0.8094125 (537.82 it/sec) -training >> step=9996900, episode=1667 reward=0.7993823 (495.81 it/sec) -training >> step=9997000, episode=1667 reward=0.7955354 (426.70 it/sec) -training >> step=9997100, episode=1667 reward=0.8044433 (514.68 it/sec) -training >> step=9997200, episode=1667 reward=0.8026247 (511.63 it/sec) -training >> step=9997300, episode=1667 reward=0.8022442 (478.76 it/sec) -training >> step=9997400, episode=1667 reward=0.7887658 (511.70 it/sec) -training >> step=9997500, episode=1667 reward=0.7900865 (515.74 it/sec) -training >> step=9997600, episode=1667 reward=0.7742614 (500.50 it/sec) -training >> step=9997700, episode=1667 reward=0.790139 (510.11 it/sec) -training >> step=9997800, episode=1667 reward=0.7793284 (502.62 it/sec) -training >> step=9997900, episode=1667 reward=0.7898955 (529.19 it/sec) -training >> step=9998000, episode=1667 reward=0.7751215 (472.91 it/sec) -training >> step=9998100, episode=1667 reward=0.7933522 (545.13 it/sec) -training >> step=9998200, episode=1667 reward=0.8096728 (550.15 it/sec) -training >> step=9998300, episode=1667 reward=0.7964402 (526.79 it/sec) -training >> step=9998400, episode=1667 reward=0.7938179 (537.53 it/sec) -training >> step=9998500, episode=1667 reward=0.7941555 (491.30 it/sec) -training >> step=9998600, episode=1667 reward=0.7991976 (477.83 it/sec) -training >> step=9998700, episode=1667 reward=0.8019835 (507.13 it/sec) -training >> step=9998800, episode=1667 reward=0.809925 (495.22 it/sec) -training >> step=9998900, episode=1667 reward=0.7818965 (497.35 it/sec) -training >> step=9999000, episode=1667 reward=0.8044886 (510.02 it/sec) -training >> step=9999100, episode=1667 reward=0.7931072 (474.54 it/sec) -training >> step=9999200, episode=1667 reward=0.8008677 (528.42 it/sec) -training >> step=9999300, episode=1667 reward=0.7946149 (525.27 it/sec) -training >> step=9999400, episode=1667 reward=0.7833021 (497.77 it/sec) -training >> step=9999500, episode=1667 reward=0.7762456 (510.52 it/sec) -training >> step=9999600, episode=1667 reward=0.7916558 (487.27 it/sec) -training >> step=9999700, episode=1667 reward=0.7844504 (517.82 it/sec) -training >> step=9999800, episode=1667 reward=0.8065975 (544.77 it/sec) -training >> step=9999900, episode=1667 reward=0.7848377 (517.01 it/sec) -training >> step=10000000, episode=1667 reward=0.792927 (515.35 it/sec) -training >> step=10000100, episode=1667 reward=0.7954696 (477.10 it/sec) -training >> step=10000200, episode=1667 reward=0.8078986 (540.68 it/sec) -training >> step=10000300, episode=1667 reward=0.8060049 (512.61 it/sec) -training >> step=10000400, episode=1667 reward=0.7930337 (551.21 it/sec) -training >> step=10000500, episode=1667 reward=0.7830002 (500.21 it/sec) -training >> step=10000600, episode=1667 reward=0.792935 (507.33 it/sec) -training >> step=10000700, episode=1667 reward=0.7762302 (474.55 it/sec) -training >> step=10000800, episode=1667 reward=0.7844638 (540.15 it/sec) -training >> step=10000900, episode=1667 reward=0.793855 (480.23 it/sec) -training >> step=10001000, episode=1667 reward=0.7962189 (529.95 it/sec) -training >> step=10001100, episode=1667 reward=0.7958851 (546.63 it/sec) -training >> step=10001200, episode=1667 reward=0.7892457 (476.97 it/sec) diff --git a/results/tau_agent_B3_30M/run_logs/timers.json b/results/tau_agent_B3_30M/run_logs/timers.json deleted file mode 100644 index 04d600fe06e2dc2ae60226693db12a99f95ee4b7..0000000000000000000000000000000000000000 --- a/results/tau_agent_B3_30M/run_logs/timers.json +++ /dev/null @@ -1,395 +0,0 @@ -{ - "name": "root", - "gauges": { - "TauAgent.Policy.Entropy.mean": { - "value": 1.9342690706253052, - "min": 1.7753900289535522, - "max": 1.9342691898345947, - "count": 1000 - }, - "TauAgent.Policy.Entropy.sum": { - "value": 11605.6142578125, - "min": 10624.953125, - "max": 23211.23046875, - "count": 1000 - }, - "TauAgent.AveragedReward.mean": { - "value": 0.7915799021720886, - "min": 0.7548644289374351, - "max": 0.7938086420297623, - "count": 1000 - }, - "TauAgent.AveragedReward.sum": { - "value": 47.49479413032532, - "min": 45.31859803199768, - "max": 95.19813793897629, - "count": 1000 - }, - "TauAgent.Step.mean": { - "value": 9999848.0, - "min": 9820.0, - "max": 9999848.0, - "count": 1000 - }, - "TauAgent.Step.sum": { - "value": 9999848.0, - "min": 9820.0, - "max": 9999848.0, - "count": 1000 - }, - "TauAgent.Policy.ExtrinsicValueEstimate.mean": { - "value": 49.412166595458984, - "min": 19.28923225402832, - "max": 49.412166595458984, - "count": 1000 - }, - "TauAgent.Policy.ExtrinsicValueEstimate.sum": { - "value": 2767.081298828125, - "min": 1105.77197265625, - "max": 3010.7783203125, - "count": 1000 - }, - "TauAgent.Policy.CuriosityValueEstimate.mean": { - "value": 14.126309394836426, - "min": 13.883576393127441, - "max": 39.59188461303711, - "count": 1000 - }, - "TauAgent.Policy.CuriosityValueEstimate.sum": { - "value": 791.0733032226562, - "min": 789.1708374023438, - "max": 2847.930419921875, - "count": 1000 - }, - "TauAgent.Environment.EpisodeLength.mean": { - "value": 299.0, - "min": 298.0, - "max": 299.0, - "count": 1000 - }, - "TauAgent.Environment.EpisodeLength.sum": { - "value": 10764.0, - "min": 5980.0, - "max": 14950.0, - "count": 1000 - }, - "TauAgent.Losses.PolicyLoss.mean": { - "value": 0.3700002171669621, - "min": 0.18322236576583237, - "max": 0.9998006623075343, - "count": 1000 - }, - "TauAgent.Losses.PolicyLoss.sum": { - "value": 0.7400004343339242, - "min": 0.36644473153166474, - "max": 3.999202649230137, - "count": 1000 - }, - "TauAgent.Losses.ValueLoss.mean": { - "value": 13.491872815489769, - "min": 6.517763823270798, - "max": 72.8747960436344, - "count": 1000 - }, - "TauAgent.Losses.ValueLoss.sum": { - "value": 26.983745630979538, - "min": 13.035527646541595, - "max": 291.4991841745376, - "count": 1000 - }, - "TauAgent.Policy.LearningRate.mean": { - "value": 1.5699948000000614e-08, - "min": 1.5699948000000614e-08, - "max": 2.9983350055500002e-05, - "count": 1000 - }, - "TauAgent.Policy.LearningRate.sum": { - "value": 3.139989600000123e-08, - "min": 3.139989600000123e-08, - "max": 0.00011982576058080001, - "count": 1000 - }, - "TauAgent.Policy.Epsilon.mean": { - "value": 0.100052, - "min": 0.100052, - "max": 0.19994450000000002, - "count": 1000 - }, - "TauAgent.Policy.Epsilon.sum": { - "value": 0.200104, - "min": 0.200104, - "max": 0.7994192, - "count": 1000 - }, - "TauAgent.Policy.Beta.mean": { - "value": 1.2594800000000098e-05, - "min": 1.2594800000000098e-05, - "max": 0.0049972305500000005, - "count": 1000 - }, - "TauAgent.Policy.Beta.sum": { - "value": 2.5189600000000197e-05, - "min": 2.5189600000000197e-05, - "max": 0.01997101808, - "count": 1000 - }, - "TauAgent.Losses.CuriosityForwardLoss.mean": { - "value": 0.7028209407627583, - "min": 0.5522816904634238, - "max": 3.984716608077288, - "count": 1000 - }, - "TauAgent.Losses.CuriosityForwardLoss.sum": { - "value": 1.4056418815255165, - "min": 1.2111109605431556, - "max": 15.938866432309151, - "count": 1000 - }, - "TauAgent.Losses.CuriosityInverseLoss.mean": { - "value": 1269.5834356689452, - "min": 959.1193981933594, - "max": 1342.635277404785, - "count": 1000 - }, - "TauAgent.Losses.CuriosityInverseLoss.sum": { - "value": 2539.1668713378904, - "min": 1939.1132708740233, - "max": 5370.54110961914, - "count": 1000 - }, - "TauAgent.Environment.CumulativeReward.mean": { - "value": 237.45140953063964, - "min": 226.0676055908203, - "max": 237.97897777557372, - "count": 1000 - }, - "TauAgent.Environment.CumulativeReward.sum": { - "value": 4749.028190612793, - "min": 4521.352111816406, - "max": 9510.635234832764, - "count": 1000 - }, - "TauAgent.Policy.ExtrinsicReward.mean": { - "value": 237.45140953063964, - "min": 226.0676055908203, - "max": 237.97897777557372, - "count": 1000 - }, - "TauAgent.Policy.ExtrinsicReward.sum": { - "value": 4749.028190612793, - "min": 4521.352111816406, - "max": 9510.635234832764, - "count": 1000 - }, - "TauAgent.Policy.CuriosityReward.mean": { - "value": 20.415450191497804, - "min": 13.266634559631347, - "max": 101.54527498483658, - "count": 1000 - }, - "TauAgent.Policy.CuriosityReward.sum": { - "value": 408.30900382995605, - "min": 265.33269119262695, - "max": 4061.810999393463, - "count": 1000 - }, - "TauAgent.IsTraining.mean": { - "value": 1.0, - "min": 1.0, - "max": 1.0, - "count": 1000 - }, - "TauAgent.IsTraining.sum": { - "value": 1.0, - "min": 1.0, - "max": 1.0, - "count": 1000 - } - }, - "metadata": { - "timer_format_version": "0.1.0", - "start_time_seconds": "1726863185", - "python_version": "3.10.11 (tags/v3.10.11:7d4cc5a, Apr 5 2023, 00:38:17) [MSC v.1929 64 bit (AMD64)]", - "command_line_arguments": "C:\\Users\\3nigma\\source\\repos\\Tau\\MLAgentsProject\\venv\\ml-agents\\Scripts\\mlagents-learn .\\config\\tau_agent_ppo_c.yaml --initialize-from=tau_agent_B2 --run-id=tau_agent_B3 --env .\\Build --torch-device cuda --timeout-wait 300 --force", - "mlagents_version": "1.0.0", - "mlagents_envs_version": "1.0.0", - "communication_protocol_version": "1.5.0", - "pytorch_version": "2.2.2+cu121", - "numpy_version": "1.23.5", - "end_time_seconds": "1726885012" - }, - "total": 21827.451941700012, - "count": 1, - "self": 0.7669775000249501, - "children": { - "run_training.setup": { - "total": 0.10503129998687655, - "count": 1, - "self": 0.10503129998687655 - }, - "TrainerController.start_learning": { - "total": 21826.5799329, - "count": 1, - "self": 15.825440005748533, - "children": { - "TrainerController._reset_env": { - "total": 78.59107170000789, - "count": 1, - "self": 78.59107170000789 - }, - "TrainerController.advance": { - "total": 21731.95893599422, - "count": 501728, - "self": 7.068378486845177, - "children": { - "env_step": { - "total": 21724.890557507373, - "count": 501728, - "self": 19051.82315200963, - "children": { - "SubprocessEnvManager._take_step": { - "total": 2664.7809590923716, - "count": 501728, - "self": 76.79833638935816, - "children": { - "TorchPolicy.evaluate": { - "total": 2587.9826227030135, - "count": 500062, - "self": 2587.9826227030135 - } - } - }, - "workers": { - "total": 8.286446405370953, - "count": 501728, - "self": 0.0, - "children": { - "worker_root": { - "total": 21729.33268601686, - "count": 501728, - "is_parallel": true, - "self": 5063.764864402503, - "children": { - "steps_from_proto": { - "total": 0.0014186000043991953, - "count": 1, - "is_parallel": true, - "self": 0.00014470002497546375, - "children": { - "_process_rank_one_or_two_observation": { - "total": 0.0012738999794237316, - "count": 2, - "is_parallel": true, - "self": 0.0012738999794237316 - } - } - }, - "UnityEnvironment.step": { - "total": 16665.566403014353, - "count": 501728, - "is_parallel": true, - "self": 206.65595291682985, - "children": { - "UnityEnvironment._generate_step_input": { - "total": 1236.5819672984362, - "count": 501728, - "is_parallel": true, - "self": 1236.5819672984362 - }, - "communicator.exchange": { - "total": 14698.02332379861, - "count": 501728, - "is_parallel": true, - "self": 14698.02332379861 - }, - "steps_from_proto": { - "total": 524.3051590004761, - "count": 501728, - "is_parallel": true, - "self": 69.20558690087637, - "children": { - "_process_rank_one_or_two_observation": { - "total": 455.09957209959975, - "count": 1003456, - "is_parallel": true, - "self": 455.09957209959975 - } - } - } - } - } - } - } - } - } - } - } - } - }, - "trainer_threads": { - "total": 5.1300012273713946e-05, - "count": 1, - "self": 5.1300012273713946e-05, - "children": { - "thread_root": { - "total": 0.0, - "count": 0, - "is_parallel": true, - "self": 0.0, - "children": { - "trainer_advance": { - "total": 21747.708942399768, - "count": 3855, - "is_parallel": true, - "self": 5.501622798619792, - "children": { - "process_trajectory": { - "total": 1628.2920038007724, - "count": 3855, - "is_parallel": true, - "self": 1622.5799688007392, - "children": { - "RLTrainer._checkpoint": { - "total": 5.712035000033211, - "count": 40, - "is_parallel": true, - "self": 5.712035000033211 - } - } - }, - "_update_policy": { - "total": 20113.915315800376, - "count": 3332, - "is_parallel": true, - "self": 8936.68095340248, - "children": { - "TorchPPOOptimizer.update": { - "total": 11177.234362397896, - "count": 333200, - "is_parallel": true, - "self": 11177.234362397896 - } - } - } - } - } - } - } - } - }, - "TrainerController._save_models": { - "total": 0.20443390001310036, - "count": 1, - "self": 0.007288700027856976, - "children": { - "RLTrainer._checkpoint": { - "total": 0.19714519998524338, - "count": 1, - "self": 0.19714519998524338 - } - } - } - } - } - } -} \ No newline at end of file diff --git a/results/tau_agent_B3_30M/run_logs/training_status.json b/results/tau_agent_B3_30M/run_logs/training_status.json deleted file mode 100644 index 076b12f6a542a76dfcc1b9ecbf8b30bdc3f5f123..0000000000000000000000000000000000000000 --- a/results/tau_agent_B3_30M/run_logs/training_status.json +++ /dev/null @@ -1,380 +0,0 @@ -{ - "TauAgent": { - "checkpoints": [ - { - "steps": 499800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-499800.onnx", - "reward": null, - "creation_time": 1726864263.028644, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-499800.pt" - ] - }, - { - "steps": 749752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-749752.onnx", - "reward": null, - "creation_time": 1726864793.1011937, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-749752.pt" - ] - }, - { - "steps": 999848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-999848.onnx", - "reward": null, - "creation_time": 1726865324.2881722, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-999848.pt" - ] - }, - { - "steps": 1249800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-1249800.onnx", - "reward": null, - "creation_time": 1726865868.5107632, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-1249800.pt" - ] - }, - { - "steps": 1499752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-1499752.onnx", - "reward": null, - "creation_time": 1726866391.6557612, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-1499752.pt" - ] - }, - { - "steps": 1749848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-1749848.onnx", - "reward": null, - "creation_time": 1726866911.729134, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-1749848.pt" - ] - }, - { - "steps": 1999800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-1999800.onnx", - "reward": null, - "creation_time": 1726867438.6356852, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-1999800.pt" - ] - }, - { - "steps": 2249752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-2249752.onnx", - "reward": null, - "creation_time": 1726867996.184426, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-2249752.pt" - ] - }, - { - "steps": 2499848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-2499848.onnx", - "reward": null, - "creation_time": 1726868472.0328994, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-2499848.pt" - ] - }, - { - "steps": 2749800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-2749800.onnx", - "reward": null, - "creation_time": 1726868992.7363517, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-2749800.pt" - ] - }, - { - "steps": 2999752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-2999752.onnx", - "reward": null, - "creation_time": 1726869513.747206, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-2999752.pt" - ] - }, - { - "steps": 3249848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-3249848.onnx", - "reward": null, - "creation_time": 1726870026.6554596, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-3249848.pt" - ] - }, - { - "steps": 3499800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-3499800.onnx", - "reward": null, - "creation_time": 1726870571.6395957, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-3499800.pt" - ] - }, - { - "steps": 3749752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-3749752.onnx", - "reward": null, - "creation_time": 1726871110.6083488, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-3749752.pt" - ] - }, - { - "steps": 3999848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-3999848.onnx", - "reward": null, - "creation_time": 1726871659.7189555, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-3999848.pt" - ] - }, - { - "steps": 4249800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-4249800.onnx", - "reward": null, - "creation_time": 1726872224.657589, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-4249800.pt" - ] - }, - { - "steps": 4499752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-4499752.onnx", - "reward": null, - "creation_time": 1726872790.2818408, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-4499752.pt" - ] - }, - { - "steps": 4749848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-4749848.onnx", - "reward": null, - "creation_time": 1726873331.475235, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-4749848.pt" - ] - }, - { - "steps": 4999800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-4999800.onnx", - "reward": null, - "creation_time": 1726873902.9824262, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-4999800.pt" - ] - }, - { - "steps": 5249752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-5249752.onnx", - "reward": null, - "creation_time": 1726874475.2349055, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-5249752.pt" - ] - }, - { - "steps": 5499848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-5499848.onnx", - "reward": null, - "creation_time": 1726875029.9459288, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-5499848.pt" - ] - }, - { - "steps": 5749800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-5749800.onnx", - "reward": null, - "creation_time": 1726875601.9802082, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-5749800.pt" - ] - }, - { - "steps": 5999752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-5999752.onnx", - "reward": null, - "creation_time": 1726876165.8827033, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-5999752.pt" - ] - }, - { - "steps": 6249848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-6249848.onnx", - "reward": null, - "creation_time": 1726876696.1256096, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-6249848.pt" - ] - }, - { - "steps": 6499800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-6499800.onnx", - "reward": null, - "creation_time": 1726877250.7572887, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-6499800.pt" - ] - }, - { - "steps": 6749752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-6749752.onnx", - "reward": null, - "creation_time": 1726877825.8447108, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-6749752.pt" - ] - }, - { - "steps": 6999848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-6999848.onnx", - "reward": null, - "creation_time": 1726878373.5239935, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-6999848.pt" - ] - }, - { - "steps": 7249800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-7249800.onnx", - "reward": null, - "creation_time": 1726878925.8595445, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-7249800.pt" - ] - }, - { - "steps": 7499752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-7499752.onnx", - "reward": null, - "creation_time": 1726879491.8937926, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-7499752.pt" - ] - }, - { - "steps": 7749848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-7749848.onnx", - "reward": null, - "creation_time": 1726880034.7684882, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-7749848.pt" - ] - }, - { - "steps": 7999800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-7999800.onnx", - "reward": null, - "creation_time": 1726880569.5108337, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-7999800.pt" - ] - }, - { - "steps": 8249752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-8249752.onnx", - "reward": null, - "creation_time": 1726881101.5210292, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-8249752.pt" - ] - }, - { - "steps": 8499848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-8499848.onnx", - "reward": null, - "creation_time": 1726881642.2917244, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-8499848.pt" - ] - }, - { - "steps": 8749800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-8749800.onnx", - "reward": null, - "creation_time": 1726882213.4639268, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-8749800.pt" - ] - }, - { - "steps": 8999752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-8999752.onnx", - "reward": null, - "creation_time": 1726882780.9215643, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-8999752.pt" - ] - }, - { - "steps": 9249848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-9249848.onnx", - "reward": null, - "creation_time": 1726883325.0723217, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-9249848.pt" - ] - }, - { - "steps": 9499800, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-9499800.onnx", - "reward": null, - "creation_time": 1726883874.967686, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-9499800.pt" - ] - }, - { - "steps": 9749752, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-9749752.onnx", - "reward": null, - "creation_time": 1726884462.9092565, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-9749752.pt" - ] - }, - { - "steps": 9999848, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-9999848.onnx", - "reward": null, - "creation_time": 1726885011.617069, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-9999848.pt" - ] - }, - { - "steps": 10000360, - "file_path": "results\\tau_agent_B3\\TauAgent\\TauAgent-10000360.onnx", - "reward": null, - "creation_time": 1726885011.888399, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-10000360.pt" - ] - } - ], - "final_checkpoint": { - "steps": 10000360, - "file_path": "results\\tau_agent_B3\\TauAgent.onnx", - "reward": null, - "creation_time": 1726885011.888399, - "auxillary_file_paths": [ - "results\\tau_agent_B3\\TauAgent\\TauAgent-10000360.pt" - ] - } - }, - "metadata": { - "stats_format_version": "0.3.0", - "mlagents_version": "1.0.0", - "torch_version": "2.2.2+cu121" - } -} \ No newline at end of file diff --git a/results/tau_agent_C1_25M/Tau-C1-25M.onnx b/results/tau_agent_C1_25M/Tau-C1-25M.onnx new file mode 100644 index 0000000000000000000000000000000000000000..66044f369900efd6a34f39cf8d2485b69c8755f9 --- /dev/null +++ b/results/tau_agent_C1_25M/Tau-C1-25M.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc65fcaef06a81951ad9380c50171259e034696f3734c4f095be6c0c862cd84 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-10249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..75d022d00213fc678399253a898a97f9c4450814 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c600e8097e01bde7ee551df20d48e725180aeaae4207f3a3a0dde57404e0fb +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-10249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..207fb6710889ed9adab9406146a7bccad2919a0f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1329ec413890b7dbcc16da15955680faa34220a40c2a9d45623ba64fe0e045f6 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-10499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..bb71d0929cd0714f2afe6a09d63b379ea2b4ee15 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8e40596fd4ea56b15ca05eb6b89bc774200e3d4b55cb12d3721545db0c42fb +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-10499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..6acf30530fc4f897f055613412ca915828282ce9 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a485785fa2654a92951f32fef47bcb0aed8df14c36d36174fd842731a536d939 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-10749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..01820f016d9ef7283f14e11f5976dab42d98a355 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9c3818500843387c3001685938ff10f674725cc2a50087aa95d34711e1dd766 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-10749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..6770c2c2466a365295e662d3f92b2a627543ca21 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8eec91571c527bcbab883caac5b52f741e1bad30cade4810a9338e455f55584 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-10999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9026de5bdcad86a03ba2eb518580b7af069b1cee --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8e8463da832ba9f6b78b9227f026d42db9eb1f915bea80a188b07070f9a7d1 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-10999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-10999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcd656caf46989bcf1548a2806da9640b419284b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-10999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c86415150d98461d3e40a0ca414cc5a80cc6cefc3e12dc466aba224e1485248 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-11249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b969f02e31d729ce673c74711caf17e550bc096a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e29a3be908c03877dc7f770579c70b1b229b8926eae618a71d41e91c2ac5e2 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-11249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..78da9defc045735fe4bb0ac7cb3d726b4fb14ced --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eafc88f8c8f7e3d1a87c41913f82da601d9396426f53e5a1733c7fc56cd8386 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-11499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f7d6890aa67a4eccedc13e7eada3cbacbe199fad --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:571abf163cfeb10e5f57270806851b4f1c84cdfd86bcece12347c12a5acc39ec +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-11499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d7aad76d9159dc4f99f9a6855f1638ead378a12 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdeaf0d4842e632d42cd62b7ca5467ff7e51f00d237c8fae2b381f5ccf64073f +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-11749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3aeb6819f89fc778109f3e31ef7edf60dd9b16ad --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8294cde6b1b060d4e26742db5c1b332761fd4f5f5131114bf01cf1b52aebdb80 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-11749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8af5fce4544902f2db0a85b8bbe7dbd7818da6d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bc80406997ce9fed66d17af56a5dff799e912d2fe8589428b00d0680103c86c +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-11999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..197759424d73b4b236c2a6950f9ae7837daf92aa --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ea3a307c021db9fa99eacf17880006a68c8df22ca3c41c859a6432892370e5 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-11999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-11999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..eae339299d695e407f35f8d5425a0953e954173a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-11999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a568f35eb272a14eec43e5d9cecf8be8c265fe6102b6f6b477d3cf4dc1872d +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-12249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..aa875c584f8824b4ac37a7be0b8fa8c215a8f8d4 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4039308c4bf16fb251a6db75555c8626c7d5cae7a9d47d04fbe13e5f3d867f +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-12249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..a88b53405e179c8128ae79413c6558c722eec379 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c3cd35738fde6de641d5e562b7491623fe3481f14f5de0dfd8fe0d2f9a1c5e +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1249820.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-1249820.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4a90ae5f9ebdeaa939ac6886314553836f12ee9b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1249820.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d7659595be6e4c333172684d15d3f607c49de6d029df90f27c5339bb3d9cdd +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1249820.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-1249820.pt new file mode 100644 index 0000000000000000000000000000000000000000..4faae4faf76d46b910f0fb64170d72bcb44d3235 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1249820.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3318b52e0abc38f1d8bf975afecf40d55f73f2d89d438418340fe4607e18dfa1 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-12499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..85063a6aea3faaa37e810100b799b78c4891289b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b528f17fdb148f370c272a98a1fd4b816a7e9e3d3510e33f663fe1b26cacc66 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-12499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ed7fc2a5d71a0f8e201066e486b24c46ac71454 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:859bbf47eafd0173c61c0a18212cb76c8adffbbd84649c37914b91d22d231547 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-12749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..93a81c1d107a2ed0fe87a1d6183b89fa50912f84 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9de068c0a64a7dbef4f23d931a16ad4189d9090397535758fddfdffe36decf18 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-12749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f2abbe79ee65a7d90cc78c415f1a42c73df80bb --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b7af6feef9eabb12994425e61f23676c7e70f45b65db23cdf077023e87fc64 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-12999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8f1633159e35b9e9d6a289eee4a683ec3fe7671c --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30b638046f1b8bab0c8681fdd36a8f7c905b3152bdc721eed472dc47fb87a67 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-12999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-12999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..4681150e03dd51085caeee8b5a090b2e1c8e847f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-12999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5864e43c4545f3e6577dda84ec0d1119ae7a9bf1ad0f02d4719ee68cf6ac63ab +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-13249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4755d22dcef524202b8fca55785e7b8f1fef4bb5 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41520f813bbd735d29b54a125a69b06a80c0dddecef826bdcdd6022cb18aad8c +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-13249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea0e8622faa9f01f72329aa3df3c62471ac2e7d4 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3824801aa3a853feb15b0c704ed36b6b0d294030ea252fdb81696a000f4914ec +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-13499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..dbd23389a63705f51fd06c27e755bd56a1918cd2 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0625a7b33cd76231e8da1821659d8c3eb14b64fd808b4f205e5030e37c67ff4b +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-13499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..c30d1cc4aea83ebfa53c9de9ac81494cfa917118 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6136a14b57b66a6801ab8d87615f13feadfb852dd4b34e04f6728912476ed8 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-13749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..868bbad07c1e87e00c1df4ab5ae6d2867be72869 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f832cbfb3ecf7134d4caeefa1ac4ee810d0fde511ca2e93db3dab8e980cbf718 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-13749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..291eab71e826137cf0d076c1d819fdc883d7575e --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fed4ec00f2d0a4697b3b69d0eaf1af431529096cb0ffad3ed89622c2a0f9dc3 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-13999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b4fe7f1bc6aecc2dd6220ebf7ae6aa8299dbb0dd --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4899c70fcd84d93a7777f71b8186e6656d07f5a508e96d443a25f42d3c8fe1 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-13999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-13999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..f20873d2410c57ac5c45ece9f13b26c733abdbba --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-13999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a72f58080759aaac0546d7ec1d71e03551b44c7651e69cf8e3e96c158db7cd46 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-14249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..85ae6d274de65943cedf7cbab32b40cfb5e29215 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e42753322ef0c830e2c5ba8288ff101a479243ce896ff78d6f5579438252ddd +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-14249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f125dbe5f3afbef04387adb18bee58abf05a3fd --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0beba085860b776bb011764a03237ae0b117e581a0d58b5bf08abd2bf5405abd +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-14499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b45cdd9a683174303c83bdfa65d221da0a67018d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d851807210bf55c3ec82eed9143df27fd1383af6d03fecf235c6ffbd59b3996 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-14499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..552a9e3ad9066b6ae2ae08c562a0a8d2c9372b09 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c89479b2d5c91f822a11eeb2bc1673e7b674cfc0aac5fdb611d3f2e952912f52 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-14749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2e053712117e1ebbc9153420cced2087cc257995 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95cebb28169c1402c82aa97243cf7b61b32ea813e1180521b0e930e583681e05 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-14749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..c99ef90e72b6c902f4c29b082697f62161f4cf42 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fb5512f1138b206fd7583c6636e99c59c5adc3203124a977378366b6a8c057 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1499864.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-1499864.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9e00f5e6efe9990169c9ea598328d2137b7e0ed0 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1499864.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ae0988f008f2d7473cf926ec56b62f50b9634922de1899da19a8193bdc02355 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1499864.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-1499864.pt new file mode 100644 index 0000000000000000000000000000000000000000..d14051add05dfde028b64aa9bf27946e975bf866 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1499864.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a27b64555230b5dabe1caac48a33848433ca2b381af471470b3aa56df4aedb4 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-14999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..62bfe99472cf6d41deb920f94b441d025d901308 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e471b54ec887e3ba581db7c794e8e91860ef887d32dc619ac262352f8b6c4b0 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-14999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-14999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..10596e6f3952f8a55d7df56d4d002ff5e282fc40 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-14999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0f9f98715a0d04053e2cef416b992fd12be916d16001c69b0f665a1f91293b +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-15249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1f9b300268593cfd063d1421d397c6e869340b1c --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b201aaad364b91d9ba115637c30836bf360045719112fd09c6d357240fceaa +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-15249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..d90dfb2415995dafcaa6a107d0644bb28ccc490d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d15379d300139b4d847e0bcb2a63e2c264ec530983be17f565e3fe478c7b73 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-15499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d704d0849be151223368cdb1a6f70fb2322d730c --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cecd4b8b7c406018f0423b71a6c0bb6180f46479351296bcdfec6d8dd1fcd679 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-15499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..33340e382443bdac6c23a93a10d6b753912fa532 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf18184f4295d4d6d187560e6ec9796e8bd61972fab272057cb27849770df3f +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-15749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..985d6344fabb7985fffb4d1415470b9c09c84468 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f744f04b2fa906e3cc1999bf1b219dcdfd6021fa4f33db05ed99bdc11645b53 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-15749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..5667befd8310c30e5b2d964f3b2b07f068ade946 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee4e94cdc751ce2d4541cab622ddf08d8f9e07c4207f3209036fb4daf8f431d +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-15999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c83a5d8796e1d0afca5c47cdc21615fac6441002 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75cc1381feb0b90c321b79102f3a5f212f33ed2887448f1d63125ddf8b0fc5f +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-15999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-15999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..f441f02b6e3dd07254a322c16a24ff59e6cff976 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-15999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a578200dca2bf7fdd1cbd96e7b3d0c3f63ae75b6a0172532afbe1e55ba67ea +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-16249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..19c3d694bf76bfa223fbac649e325e7b808d20c8 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf0add9649f36a5486f4c326f6d5d7b6c064ed7db30bf045ff0d8487dd96909 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-16249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d973feb8054d3b7e876390968a98885b76ffeef --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3072d53e75dd47e2dd78f171b6a6cd04822841ffbebeae88c696033947bc3e +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-16499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1073490f59486c893aa3109a44b13334cf6895ca --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea24832d9f22286d498951a7b4c868ea02cf4956e19c932a38f35ee2d741fae2 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-16499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..b97a3d7b382b321621844f0833c6802b1b535e4d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ddcfd457c2ac9aac3a78e4fcdfb53de8db3f3f91270bf14cb7ee6e4e830d6d +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-16749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..af77b142048d6bc8cf6e2147c6cb79c9c3d56459 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:212a7a6629c81bdbe619603a8434f2ec6105a08be72781b09d63af1e1d2f847e +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-16749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..326e1522184711a3c82897c70bb291fd989427e8 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa06f2a30597785040f8b1c7b15e5294a69d9f9e40266b5cb0e7ae6405f3f241 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-16999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7e634c1844046890563e82557242ac61b05e495b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfcc9f71e037360ba5e388d231505e0ae52a3568787b7f98ea775add79229c60 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-16999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-16999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..df92ee9b048f583c41f9ce8749ee6d94793518f6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-16999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49283496791eceebde038e85560e05c543134c02b462e3cf10a2dfa6477d5b42 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-17249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6b657f0791b253122d21780e0e7fff4a8bcb3e9b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd71e80f4e454303bf29cc751ee4792de91ad2d875b6025fd6a398d2c7c08b43 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-17249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee7c176783b7f4fa0f08d1c3af95a9d8e55b10d9 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:871ca1f0a3cf1b44f07bfea7ba7d39cf26af42840585155a32307c66399b057f +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-1749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..68ee63f0c7180612a371a4f33f344bc1ec1beac8 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bd799dd51a8e4b6070babcc3049a416174cc702954c23cd90fb31b93f0efbe2 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-1749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..9797eda5107165489df3f68233182da1a86405d6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc336159b8f3f5feb96a89071139bb25155fc51a7eed758572c37d61ec197d9 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-17499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9fa7c9b8d3178c5b8bebd97c35f99d87d9d3ab42 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0496668c5c8224d0e226e58f6e96e9e34ab6161a77cadc5a7dc1e93af7f46946 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-17499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..254d8cc3cd718201a123ab057e11d5abc4ceeded --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:243cda5457f275f0ddd3fa19edaf95e068d2c7a20dc575ecb1ea14ed61252bcc +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-17749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8d6e36c947f47ba98543835e042fb7c81bf3349e --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97d6a80bc2c9a0722269483c8b3b8d65b4360e62e78561aae4dd385792d0d2e +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-17749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..1be940d149e4d9de08cafc5d207a14965b209f75 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350ffd6de1661b754f07bbc7c9b05e71d63ccd88347a154de0e8dd8d2db16123 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-17999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2e9351f4f8504be9aa4add69fb56705947046575 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a3dd479be0141e16b7222df78a9fcbc6bc8d16ccfb1f23b299e8056cbab632 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-17999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-17999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..04d5095fc244f6b22b38458310f68fd736186e53 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-17999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:729c6ce86d7e54a8a6d7532c7026a3afeaacc15962ed7da8e9cc425d91815333 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-18249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e6069a42ac57cddcde85b61db8479a7214291658 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c66075def9eefd814a6cc11dbfa4267ec1b32fbbfa5eb3514098d4effc667e63 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-18249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..249a71a47266893fd9c434fc962381ad81005f93 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b6f8ff5784635b5acecb6745257aa8308efd9d4259efe987b33cd735988caa +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-18499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..89e4804a20f6d51bf88c32fd92692c9b91f250d4 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1e55c069db9a11ae8c94af5de2ed0b35fb0990668f0c03eb2904cce6d31745 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-18499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc38db0464c43bcf3f3bf1e8ce44fe15eabf6b17 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274076a883d78a97d8609bf0a029a7c1e6bfdf20ccb735f18ae0986c7897a271 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-18749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7cb4fe214df323ba4658e1511dd4747aa2934393 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcba1196296589e522ef1ce38bb2e9791b0d0fb226fe9d2b3bd9952227cc428a +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-18749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab81a842511c8fc6cd57b1a4761e29a4e7d8ecc0 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b0607376338ed77f89ba58562e49a2f968c79349ee872c24b32db173934431 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-18999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..dc12f3acec667bec71d8b34c35f85a370ed39998 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8ee4b7e7e2d5a647019492d63fac9e820a9e9ae960fe0022fdc7c0151216c2 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-18999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-18999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..7959044b4e8b6c25619a43c4ddf22c631cea75d4 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-18999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9470c09901459a059285ef97adb4286a9520dac9080166bb0a85979ee9f83f44 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-19249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0063110fb06fda9b9876f6ec126a5ddecb32bc1f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803b50d653e58db79341b203c9e06596710dc5e40a399affe0229cf200765ba8 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-19249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..58a197cc5785444615ebd97bcabae024918c4bb2 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dc6230dc2b5431ef84fb63586c9f171f0f073a3821a3aeeb95f47a024bfe504 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-19499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9f8702416a007bd2e17c0c744451eb0cc813d7ac --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dddf1d4a2a16c91eeb6e06cb35f0b0a9639bf8daa9f6e514bd3e694bfcf859f +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-19499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bc88e769eb21692da9f6daae33f3c8021d08971 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db090b9a84285ae491ad80a8c601ae069df253b9de92c166e88c8ea81cf89e01 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-19749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8c89dfbe64d437367b02892333f0e9a91261c99d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c62d5fcd5fa46c77e65b1e1301a49118a4af96a87fd0c2818baea726cd77f9f9 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-19749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..e070e7214fa0f8e8a810b420e28b4b0aad66dcf5 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1987c5bf944f140fbd3a08b952776e9de5bbe2555f9bfd358f0103445474a663 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-1999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c09a179e3c99f05c63991dbf53071b6ad1908b49 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd3b60caf0d758176af31a6be55c6ca56c32e5d262414f674fffa26f0cef6a9 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-1999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-1999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..88364c94eae0319d23a57c53946992749cce1f06 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-1999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:856364f4defcb3a0b85f7235bc46447818370628052820db647f44fe85593c4c +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-19999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f240a7d8dc6db84350e40d9fdcd1710b5b79cdf6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e9f73350d8d3819418306d291b4aba60ba4af344e29fb404601db085478422b +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-19999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-19999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..efbe6b75e6544535aa939e0ff4ec9406a8857787 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-19999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8ee5c412d1a1fe25c14d2cde9bc4a8a2c7cd46e1640a54e9461ce18f53c3b6e +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-20249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b557b6fa03975cb33bc962c453f9a4b7e32eb466 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd4c82c7c86013fae832fba3468736acdec5d3c86e0dc4e349b920b6a106b16 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-20249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..68c51ec651280e6f1fd798eead5577d13f212760 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c50fd11b8e22ba3e0cba319226536b2701a9d8f82168a7bffed4ff9b70d2f9c +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-20499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..84fdf3a1ba35a83d832c3d1bba20340b47c77782 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee57ebf52ed82b75a38b72b51c1f3a48f7e6e0e8a80a0593d8537397f5c6b7e5 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-20499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdffb75b8bcb0d0775397de28aa1b9fd00116791 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c11299a5f122f181e4de683176e54882c3bf8d92cb965fd2a4adeb5de4a75e +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-20749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1cd23847cb696aca477205ee558041ac8278d82d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d066dfc74362671d0bd948d046e01deebec8b05a753709a9d57f63cecdd6734 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-20749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..e646acf42a9d542f2156bb300510fe3bba092bde --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e858d60025c14cb0f3d830ed4a8c1c77c71209004951abe8f9acba61fdd39990 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-20999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5e90c847bb2eef8eae3216a26369f30b5b5702f5 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f8b25ad97cc83d963e3c7a8ec5039a23d76a7b447b58b6e2d4631bdfe5d85e5 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-20999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-20999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..5556f776ec76f03c96c145ed81536c44815c5d75 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-20999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7478f03cbb7fb4178d5f11e8f43596065973d8dceb0da56c871c725f4d5cbe23 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-21249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..410ec49a5721cf392bf37a4f06c7829bc5f886a6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0052e9d898b495bcbd849dd4d13c110d351eb25030e7db9dc05dc41d3817624e +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-21249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..c04968f99814fcdde6bbb6440c4ef813c248814a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0104c866c74857cbcb6ebacb52a9c6276700967aae855556b899e61d80e086cf +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-21499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a1c06ac59af693fd783abf3564dd8f66e0c980ff --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0207689e55fa2ea2b339448ab982b67631ccf672f8d1bba87bbf851b5cb4007 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-21499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..78d507f82877b7fe1f1a22dcf28528fe7c85fbc6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53de6830d3d17a268350489d084bceab9b6619a22ef9584a7c63ca4092b4cd4d +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-21749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..007577d71986bdf3a5673a06d79c39362ecd518d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daed40e4488f38d9f8641f61d103e5eba6eb7b7a6bf8f85244a67a54aa1bd118 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-21749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec0db13b118d2bfd35f21e6a3000271f26d4b936 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:084205118940c6346643b2cec06a8c3969eba1d69c073acb48b05862b13f8044 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-21999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f09ad28172cb67dd83638f53e7f630f4937ee20d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a2c5c15907b05cab577991fb61b19e9a79d735a5adb34899cbf153030669171 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-21999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-21999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..6675f15602e58ecd8e2e04dcd27a5306d6870228 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-21999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f3b9d9340a6a2fe6cc5c42701b72b7ea30d62b7b2fdeeefcee6f5ee83daacd0 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-22249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8fedd3c99a2641083964270f60c7e61fb70ec0aa --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0c632585dcd0cbd21c1aaa04a4090a9d199dc5809cb79aba7e26e58231a655 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-22249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..e787045b374eea769bd5066ba2c1f125e9a35ac7 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4181b77585ce2b2b482773bbade18643b72fd094feb90c459e2aee1b6dac3a42 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-2249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..524786815a17c4d52babaf0efcdd62139c48d5a2 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64fa90c3b66842f8daaf4c118e513050c71053390681cfdabe56718dcb8fa2a0 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-2249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..16108dc9157186786308754eb22f5059db33a0e1 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d25bd8f60a6a55ffb4224e7975e76f3b0d02b3d0b8a75b24ff3625c2e4af72c +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-22499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b557bd2163144fc0deb9605c4d91297a2c3912b6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b2e14bac39509c31fb7d61e26fceedc919acf2682423b0ca7f10bb8a0b8ad1 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-22499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..6da5b886f1fb6fe1ab58dadfce0ee5ea9ee0c386 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20b8d7dff9590876c6c6c836e66a28badafe432120698d0b83c909a787a2c87 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-22749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..25dbb08f1f202ea49e147d77401242f76f10428b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb221a1ccee0aca6d16e0aabe252f0c5a5499087400f0d22627f983bc0b43c0 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-22749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca6a51fb9823e0f6940447c241865aeb5e6ae4c9 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705b1f83505ec3bad8d16d2f583520ae011625c2cea80e965fbf2f0cefe0ca9d +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-22999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5290d7f174be8cad116f1077e73fd9732af8dd72 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f7d7c7ac8c904fdb7764118ff76194061be34315184518560a264b0ce7bf56 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-22999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-22999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc45e7e177773cf10f31be8e83bd725cf5496668 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-22999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3679d2b146e86d382e8692d6a4d46001c1a848ddf6a45527c01c12b326046f +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-23249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..374dc154616b08cc10f55fa47225f498d35da601 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9970adab94c035ef7fffb456afb74615732371b58e7e605f8b16e63022505fa9 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-23249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..34b24c70cb729fb3855d5c8e0b016bfba8b1d007 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221b77fc20b1555736024b9257b8e163ce8b7b75e28049179dbff81ab551e696 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-23499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d2a51ee32293c329566208621d7e41ed8f539805 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c875c8e66d2389102d22fc5385c9216ff0aa52da2b7679be4ab4519628fcbe04 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-23499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2379e9c27edcb7cc2a5958633b1bbe5d3ea6a5d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff72096039b2c62d285b22c0a188c3ae934eaf1c0f2842e5f4143c874e8ad10 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-23749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..48c053a8ada02a6c28ebcc9803bc84b51ca1e7bd --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16121975704c1c8aca196b7a4379cc534b2c9f60f9b4c40a0347719e1be9bf4e +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-23749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..926de93a428e55e763f73b7ead6f8f4b998b7fe6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38014981fef9ecb8e61a21594acf79050f92a2a22dd9d1b81217a23145e9c6a4 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-23999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9da1f5341b22c9b4483301454b68d5f46c5ea5ef --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2ba24ef407428ad07bcc80248074fdcdfced669845f4823039a4dadc2eb33c +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-23999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-23999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f32d35ff46942c8408a5668c589a1f3b5063e4b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-23999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d95bd1ed2b4aeb6ee23cca455d60fdb8184d1fa1dae35dad56d5d72603dac4e0 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-24249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..665f56c4b48be28b62e4d51aa2359bfe2f23d68a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb605ae95736ff9f3f07c6ddf5da74ed0a288def79ca654d9b7aef1f74c436c +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-24249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..61eaeea7f258652dcb6f8c9298a0390030f15b56 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde3a7ad2911031ee99dc65754eae17de96b9056cb49342fde5bde6c92f7ea29 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-24499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c9521c29a5ff418c13792798d20045ce8808f956 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d93be83ab179d52e2c815a94e9acd24fe07e0e644191cc8d4e5bb022830d40 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-24499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..b49dbf6bb82d58fa47dbfbf9fceb626e04cf9224 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54689cec8a1ead0bf07aacecb7b6fd22132360bf0bad5ac751574ef0338e72e4 +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-24749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2280515474cebf7d794f90f03e2db3f2846d572e --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a1e6ddde91f1d703bb6fd1b7c48a97acfde70e1ea63fc6fce0edb01f4b6c18 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-24749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8772bf9ac43a752d771486633a6a1cfef0bba2e --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c225c274adcb1259e8e6856c30c554487e2bbe589921cbdd5d36f2236fdf331e +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-2499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d4f25d9cbdaa88654dcad529e815dbf697c3274 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c453903f9850036bd1467853c4b40563d80e5b1b55cdded8012e172e423ca7 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-2499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3c5f6416047e188159568ae523e83b1536f499a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2872dda0f7e755a5a116859c93907fcd729a8b5d53864a58c85cf81d64207fb1 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-24999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1355ccd230b0c8acd6ec92fe3dc3484417339f6d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d3e4d629ee6330929f292fbff8606f86bfd740af528c0265b95cb9a7c228ae +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-24999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-24999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..30b30f57370120cad827b92af62d4edacf88000d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-24999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77a6e4c0292979623612726c00123bd9c651d11bdf15c59550bab4497735e2c +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-25001480.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-25001480.onnx new file mode 100644 index 0000000000000000000000000000000000000000..66044f369900efd6a34f39cf8d2485b69c8755f9 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-25001480.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc65fcaef06a81951ad9380c50171259e034696f3734c4f095be6c0c862cd84 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-25001480.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-25001480.pt new file mode 100644 index 0000000000000000000000000000000000000000..327cca0675c3be2aa3e881f16ce2389c1577bce7 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-25001480.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43dfedaedb5e6bc06a3e46d92d4be533bc13721974a0d3bfa80c40e0db4e509e +size 15534410 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-2749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..13437a049ff216c138af2a3eef5ef4209ae290cb --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22592d82c01d3e85eea892bcb9cd5d08a09700c55b646acc6069b19b5317d824 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-2749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..2adb681ee17b2e8a7063aa2ee36ae12c15a28388 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf84030225f3a076179a0b57051f8494a952d58cfee64cbdf5d6cfefdc1a995 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-2999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..23cc0526277333566ccbecbc0614b32a413f0975 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455a12cc6e1a54df43a9dc0318ddd917d866a918cdb9020f4002062fc7bd3014 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-2999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-2999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..6322b42c0153ec4ab0494b3dda82794c1cbd1a9f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-2999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00cb21769911daa8707072b5ea1f49bcaa1edcea91e31ad9e99cf9468c70f614 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-3249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..6a233f2dd2b36627c75a0e0f06ce02423768e55c --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35c39d46e2916f437f3dc3679ff12ecd3570a84a1ced8df024b9d4b0b2f0fb55 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-3249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..c62270de8f0bdb119a1ae03b750ec22dbe535b3f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66ea7df759f8d54d8602d73181bb7cefca5b3468e108ad707fdd1508943f9744 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-3499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..2cb48070788a6ccbc452da2543b522603a3895b2 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f96f518026f289eee2c84cd53b31e85a5c2b1c4e151cc2a2672488e15e01ad63 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-3499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..816567223b36d5ecefa6767edd71fa780ec674fa --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdf82729ce0d0bc1e56ba15f69a310c80dcd70f5a3678db1daf91a957b019306 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-3749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f83eb3b8a08fd01c021c89a8c7bd05ecd4cd72a0 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bba1c680a1ad91de07ceb6b5f70cb3c087a2d9d66a645ebc5f17e889254046 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-3749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fe0f1eab3337c615b92f94e8fc43b353ee6604e --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e6f714fe854328dd08b3fb95be693003524dc8633d2512d5688dc92f0c63c93 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-3999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..96451a235bffc45aecf73bf177f9cca7da167fdd --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4958fbb7c985206f91a00de32f1b96ec1abd1e8e452d0f729207505b44733794 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-3999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-3999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d8a1f117ced9207137862478e488cff8b537766 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-3999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6892a10cc33e45aa3a0c473e47cc68835e837c62f7e3277b3ae1668cd75e623 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-4249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..39ae2eb16110ac17c70bb0d10c2d3c1bfc1d846b --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef81ae7ab9c9a0b774fc81db045ad3260261a3a5181658cc6ee0ffc7e81c4fa +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-4249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..aaa9c2a58b8d478aa6d3b9a338468ba83bb766f9 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccec992eb12e8243c4cc8fe8787c18fa7338db41fc4c269646a60ef1797da70f +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-4499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ec7e1f1ee8138f57bec667f3e08059f2ece4fc7c --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e011445edcd0061e1498bb5bd566ffbc9cd24439d7936e5da6156a90968775a +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-4499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..513211b3b057d3fa5425db055aa835d8dd285004 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a712b795fe705216d8729ae48a8f933dd009c9093bbacae9ceca5e82c9f6f9ac +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-4749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..be87ef13df4e44a6af4896835eee0c63a3ea497a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78059a3319c7a0bc6e8768f97d1597c7234d310852d4e402364c9ac2b9d7c96f +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-4749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..c94cf9223e1ba9ca416d2feef42d90c604c130cb --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3221436a87b2335da4788e025122ac9290022c1da0720673b1bebc05228a27d5 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-499924.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-499924.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fc51b23fc79e6c219bbe63f699f7cf295cb801fd --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-499924.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d31d4dc3c9f2839390af2697cb9580865452ddd8fb9656baeb82c18c83ada13 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-499924.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-499924.pt new file mode 100644 index 0000000000000000000000000000000000000000..b97f8ed706912acee156720b8fc608aeddcbaf8e --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-499924.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42fc5bf6af7d76f7064f81b1ebb17f21e2173e4f00eca7535ef6e6887cce2145 +size 15534102 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-4999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..36cf40cf5454c7c4d65b769155e41a31934a3a6d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a4220d556253bd674ac8e03741af6d2f78a940a42be23e01729e1e00324640 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-4999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-4999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..56c2bc5b720c0fd247813111f4d9b46a44d25791 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-4999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf38f68a6721ff9a47ef04876b90a58bf1a8e0e7da39d39b33c1d87a3e06524 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-5249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..95f5910695260d55bbd7a492720dc44d95be4e93 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe20a3a1d6be1bd08c5d3c468fe7f66d027e1f822a481e31749d84fe9a2efc64 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-5249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..35526b433cde33b2b38e22b2486a3880f9d0b582 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4882d34c95373b36e165c327895e4e75fa1a260022c971011fa4666a6f065ad0 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-5499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..976ad4c675ce5f0d1b2223b6bba76505177dce8f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8ed6eb87e35fef50e8f9ea6d1151d334f32cf8557cbbfd1f75722558ec805a +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-5499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..adb95ac2978e23881ac4bd07b5b688d6e1236344 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47092fab69434f44b78cd77c842852f938f2c96a8fe4c0d5cd64a47ff8359b93 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-5749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..06b4a6856f1b767d3707e64357bed1595e57cfd1 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22402ac723d2af7b23ec1450b354d65927ff614f78c4fe3f9889f80319fc03f +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-5749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fe3fc3987ee92ccbd94857ce45a0b8fb9acb3d5 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be32764c754685fb82501ef670a471d4acdb37e23b0366ef0e5219aa3113fd4e +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-5999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..43f3a401fe2817ba0a8d2bda4fcf7010e7f0651d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33dd11d039112d25aa83f2b3790ef4449a2e53a83f1b64d7c8dc06321ffc7c11 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-5999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-5999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b5d222be4e3425f38bbbe7d1b30323d9ce84785 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-5999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48f9c89300e20dd117861a5f478cb3ab351f982ca8050ee939564f27458994a +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-6249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..83c3629fd7b56717e557bfef721a1cc829fdcfca --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c1e5fac6ad74667c9bb43d919d3092694c44a7cc118861c7463e86966f2939c +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-6249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..d505e48e9c1fa057b914362a036c525d87c70132 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ab1bd0e2c55cd1d1ea1b239a725e960b9110bc612640fc5f14a3b76ea6077ff +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-6499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..755399141f5fccd6ddd9cdfafa478f1b53e8e815 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241f0c5c39a6c9975450c6b3102bca703a00871cdadf19ad3342da23a55a9509 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-6499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..bedeba1f65bdfce3e9a3050685d61d83f35a9e50 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7aecd34f41955479676ba8d86d7e92d4fd8e77f2a6195242f3ff9246b0822c +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-6749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d292a61f329c5f4e2dd330aab3adeba984ac3cae --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c11f2971705804ab991b37188ceb6424d505337b80d429467f8675b1643019c +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-6749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..0efb1d67161a0013a62f35f45d278b00585cade2 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61400468a39c49eca9db8184d44a979cab3083f566242e03ca26ca604d4d5c42 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-6999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..75828d1221107039ded4c7269c64dca7010ae0ee --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ef782fc1ffd5364cd91ceb1c92edc108032c2a214a49c383aef0f8b82fb93cb +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-6999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-6999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a4d232a216fa6f5156f17b80d83232232af9fb6 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-6999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc317f4793cb944a0766ab8cccb8fda387a9cb020ca90591be6780dd790d27d +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7249896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-7249896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c740c714112595adec1343c2b0d4e93c1700c33f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7249896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726fd307aeacba7d3064c682acf75ee5e44f6b452b8a87e4e10df4407c6cb818 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7249896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-7249896.pt new file mode 100644 index 0000000000000000000000000000000000000000..59aab70b599e6fcfb578bb28029a48994a44ccef --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7249896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5fa2bfbbaffd048705a1f2bd4e5b0a7a46791fa72b14af061c4b9cbe03808dc +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-749804.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-749804.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1800e3f0d5a57ab8cd9f84f8d0a5fa8bd11775ae --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-749804.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3f9c7d3662be8bacd1161c929db65b5a07203ab63cf987e219e15866f3895d +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-749804.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-749804.pt new file mode 100644 index 0000000000000000000000000000000000000000..ede8f5756b6e9b83c0094ccf624e153e3c3ec835 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-749804.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7415f1b30b2c658413bc420a1a2da88c2a7c3d53369f440e70f183ce471b1a4 +size 15534102 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7499964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-7499964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a58f48e042c810d5e76ac87007009ea1a4e66b5f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7499964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62432ee9361f89555d9558e715b5a16d668e98724b2b67e1c9dbcd30a7ba41c +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7499964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-7499964.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad2babc3978b55d218e3a5bc1ee8c25cad9b418a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7499964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95d456148e4af66891cf539d5ed11b1c4ba021915bdf5bab80f6b3ad6c646ed8 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7749944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-7749944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..06aa15070a5a08a542e4afe71faf274b408149ba --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7749944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea202352bd3b544419a801de084273d0b7d7e90a4b68908b7d6a7f9a885f82a +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7749944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-7749944.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2f5b9c9ed3350c09db1df8793f1394d380d6d12 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7749944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75bd7ad4dac25f2da9cc83217d11d9fe6c104692c7b99e9e2a23c774ee76aff4 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7999896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-7999896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e00cce8a3327073135df815f3ac04218a49c23fc --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7999896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b73c864110c623dfb90dbad4d7bcc4be45077894aaffecc8314dbe6f41295d +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-7999896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-7999896.pt new file mode 100644 index 0000000000000000000000000000000000000000..5854e79e286048efa55e27f43d7e7253cb3e0e30 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-7999896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ba810eb7327f47a3092d88bb03ae4cef9c8eb2e1964310e4cd9fc5abc7a053 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8249964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-8249964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c097bc92a9a663bcb4da0087b6b95f6e4f6a60ee --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8249964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74de02cd40bffff95f82bbbe42e2ca422bf77c0ed6bef667a3ead8cc7d83564 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8249964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-8249964.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdd2bc41a23e7dccef63f228ee225910947c2d5c --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8249964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e01a1fd6ef23934496e5344d280e9f61d34266990d9ba76a6200bd9c4c8d59dd +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8499944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-8499944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3f18fd8c1acefc2b4b0ff146fdf2fb0e5607107a --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8499944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab78e7563697bd7a60b4be52fc13a9e03e6441902ccf4e0c78651d74c20dcf8 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8499944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-8499944.pt new file mode 100644 index 0000000000000000000000000000000000000000..01c65ade1e901c0b0580c981d39694b1b2c8fb1d --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8499944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5b5ad3ac55797a7c323d7098529fcf0e8d5dd06587e40ff627900ad4071d87 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8749896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-8749896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..70d743139bd8ca9cb1e58198b63d75d6924003fc --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8749896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf05999c55a47bee8121dd2b95636bb531b05d428523078673cb82d271ce560 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8749896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-8749896.pt new file mode 100644 index 0000000000000000000000000000000000000000..213ec34ba6c0a7d79b84a77d3c998dd55df91fbe --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8749896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb2f339a5aa29c3762c189eba29ecfe8a070dc0c9dd921361f78c9755d7bfc4 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8999964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-8999964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..56d9adce17920739dd5b5a5042b437b96724ccad --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8999964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c6ec5615a49266d715b414d5786a214c61b4b02e9629225466bcafb38363207 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-8999964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-8999964.pt new file mode 100644 index 0000000000000000000000000000000000000000..383f7602cce6094874a95845251b4e942b09c242 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-8999964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddebef8502764acc738c1ee94c322aa99116cb387dcff21df4264cb298ed820d +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9249944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-9249944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..449b016776c366bc86589e001721963619e76d12 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9249944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0f558322635aa70f3399858b5d6bf5b6556a03046c5174260ff8cfd4514d27 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9249944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-9249944.pt new file mode 100644 index 0000000000000000000000000000000000000000..fec9681f00c57a4182c160f7beb2587d94bb0c23 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9249944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47cde79f1b4a7ff30240a57160608a0fc114adf5550d7a40a01597cc85f19593 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9499896.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-9499896.onnx new file mode 100644 index 0000000000000000000000000000000000000000..8dec1b68d5b8f3cfed715122aff5985c4501ad3f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9499896.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5168427e9878b064599a7c5873d3cd9a9ad5e749fa0ce22288eefa921e14a1 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9499896.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-9499896.pt new file mode 100644 index 0000000000000000000000000000000000000000..3011b03472ddc9ccf8e5586782842775fadf6181 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9499896.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63d873c1825759fec3e0ef366295692bcf912d13068342522f7a965e0262a4d0 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9749964.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-9749964.onnx new file mode 100644 index 0000000000000000000000000000000000000000..da68349002c1e8ddfabcbcf155f6cea565cc1fff --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9749964.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5093ff9967cf981430a502d8f4bf1a4b52b64912665a2cb5582022072d2b9227 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9749964.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-9749964.pt new file mode 100644 index 0000000000000000000000000000000000000000..989d5eec51d6ef5757546feb6d605d1fc1d45a08 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9749964.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0052b44c1e85227235549cd4c15e51acdc0f78510805c1cf429457d40ac052a1 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-999940.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-999940.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f412fdb49f6fb27bda4e486c31151999dfb06e6f --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-999940.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e8c43d71c25bf1d0167fbbbb2908e9d9129dcaae78eeae68477ca7cb8f8182 +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-999940.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-999940.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e925256cf0eef415cb6b0a5eb52bb05a3292732 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-999940.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c270f8bec818fc41e234ab631d5bc4db7f5f2b21884f2b3585bd61b4f074e03a +size 15534102 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9999944.onnx b/results/tau_agent_C1_25M/TauAgent/TauAgent-9999944.onnx new file mode 100644 index 0000000000000000000000000000000000000000..903c929460961a3460bf0e4001ed9fe2644834e1 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9999944.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d506762d333466924064da8055183c0a2e38664cef071505e2f680391aed74a +size 2186395 diff --git a/results/tau_agent_C1_25M/TauAgent/TauAgent-9999944.pt b/results/tau_agent_C1_25M/TauAgent/TauAgent-9999944.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6d049110b061086c7722a827b999ebe0ee7d123 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/TauAgent-9999944.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802a5a261b50093b31c30dbf4b2c1526c48afaacba32d2bdec7efd56b3db0702 +size 15534256 diff --git a/results/tau_agent_C1_25M/TauAgent/checkpoint.pt b/results/tau_agent_C1_25M/TauAgent/checkpoint.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fbd6bee4d31840c2e4aa8b6e6acd7e01ea9b8e3 --- /dev/null +++ b/results/tau_agent_C1_25M/TauAgent/checkpoint.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac55df54c124e990b36398e4b5344abcb032fcbcfccd9abfa3a421b752c1fe0e +size 15533332 diff --git a/results/tau_agent_C1_25M/configuration.yaml b/results/tau_agent_C1_25M/configuration.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3cd3522dbe08cb7dd7cf155310dca3e0f6959a7c --- /dev/null +++ b/results/tau_agent_C1_25M/configuration.yaml @@ -0,0 +1,93 @@ +default_settings: null +behaviors: + TauAgent: + trainer_type: ppo + hyperparameters: + batch_size: 256 + buffer_size: 4096 + learning_rate: 3.0e-05 + beta: 0.005 + epsilon: 0.2 + lambd: 0.95 + num_epoch: 12 + shared_critic: false + learning_rate_schedule: linear + beta_schedule: linear + epsilon_schedule: linear + checkpoint_interval: 250000 + network_settings: + normalize: true + hidden_units: 256 + num_layers: 4 + vis_encode_type: simple + memory: + sequence_length: 256 + memory_size: 256 + goal_conditioning_type: hyper + deterministic: false + reward_signals: + extrinsic: + gamma: 0.99 + strength: 1.0 + network_settings: + normalize: false + hidden_units: 128 + num_layers: 2 + vis_encode_type: simple + memory: null + goal_conditioning_type: hyper + deterministic: false + curiosity: + gamma: 0.995 + strength: 0.1 + network_settings: + normalize: true + hidden_units: 256 + num_layers: 4 + vis_encode_type: simple + memory: null + goal_conditioning_type: hyper + deterministic: false + learning_rate: 0.0003 + encoding_size: null + init_path: null + keep_checkpoints: 100 + even_checkpoints: false + max_steps: 25000000 + time_horizon: 256 + summary_freq: 10000 + threaded: true + self_play: null + behavioral_cloning: null +env_settings: + env_path: .\Build + env_args: null + base_port: 5005 + num_envs: 1 + num_areas: 1 + timeout_wait: 300 + seed: -1 + max_lifetime_restarts: 10 + restarts_rate_limit_n: 1 + restarts_rate_limit_period_s: 60 +engine_settings: + width: 84 + height: 84 + quality_level: 5 + time_scale: 20 + target_frame_rate: -1 + capture_frame_rate: 60 + no_graphics: false +environment_parameters: null +checkpoint_settings: + run_id: tau_agent_C1 + initialize_from: null + load_model: false + resume: false + force: true + train_model: false + inference: false + results_dir: results +torch_settings: + device: cuda +debug: false